pyimage.py

import re
import requests
import os
import PIL
from PIL import ImageFilter
import pytesseract
from pytesseract import image_to_string
import urllib

MatSpi = []
EgoAlt = []
IdePra = []
HedAsc = []
NihMor = []
RatRom = []
SkeAbs = []
lookup = {"Mat" or "Spi": MatSpi, "Ego" or "Alt": EgoAlt, "Ide" or "Pra": IdePra, "Hed" or "Asc": HedAsc, "Nih" or "Mor": NihMor, "Rat" or "Rom": RatRom, "Ske" or "Abs" : SkeAbs}
reloadimages = False
url = "https://boards.4chan.org/pol/thread/195261420"
page = requests.get(url)
wd = os.getcwd()+'\\etc\\'
imgUrls = re.findall('a class="fileThumb" href="(.*?)"', page.content.decode('utf-8'))
storage = os.listdir(wd)
names = open(wd+"textfn.txt",'r+')
storednames = names.readlines()
names.close()
names = open(wd+"textfn.txt",'a')
for i in imgUrls:
    i = "https:"+i
    filename = wd+i.split('/')[-1]
    if filename.split('\\')[-1] in storage:
        #print("skipped",filename)
        continue
    urllib.request.urlretrieve(i, filename=filename)
    #print("downloaded",filename)
print("finished fetching")

storage = os.listdir(wd)
textlist = []
results = []
for i in storage:
    ext = i.split('.')[-1]
    textf = i[:-3]+"txt"
    if (ext == "jpg" or ext == "png"):
        if textf not in storage or reloadimages:
            print("Writing", i, "to file")
            im = PIL.Image.open(wd+i).convert("RGB").filter(ImageFilter.SMOOTH).filter(ImageFilter.SMOOTH_MORE).filter(ImageFilter.CONTOUR).convert("L").filter(ImageFilter.CONTOUR)
            im.save(wd+"niggers.png")
            textlist.append(image_to_string(im))
            temp = open(wd+i[:-3]+"txt",'w+')
            temp.write(textlist[-1])
            temp.close()
            names.write(i+"\n")
        elif textf in storage:
            temp = open(wd+i[:-3]+"txt",'r')
            textlist.append(temp.read())
            temp.close()
names.close()

l = len(textlist)
print("finished parsing", l, "images for text")
for i in textlist:
    k = re.findall('(\w*) vs (\w*)(?:\n.*?)*.*?(\d*\.?\d?\%).*?(\d*\.?\d?\%)\n',i)
    results.append(k)
for i in results:
    for j in i:
        m,n = j[2][:-1], j[3][:-1]
        if m and m != '': m = float(m)
        if n and m != '': n = float(n)
        if m and m > 100.0 or m == '':
            m = -1
        if n and n > 100.0 or n == '':
            n = -1
        if m and n:
            if m == -1 and n == -1: continue
            if m == -1: m = 100.0 - n
            if n == -1: n = 100.0 - m
            if m+n != 100: continue
        if j[0][:3] in lookup:
            lookup[j[0][:3]].append((m,n))
        elif j[1][:3] in lookup:
            lookup[j[1][:3]].append((m,n))

for key in lookup.values():
    sm0, sm1, l = sum(i[0] for i in key), sum(i[1] for i in key), len(key)
    print ("Average value of", list(lookup.keys())[list(lookup.values()).index(key)], float("{0:.1f}".format(sm0/l)), "vs", float("{0:.1f}".format(sm1/l)), "over", l, "values")