Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import requests
- import os
- import PIL
- from PIL import ImageFilter
- import pytesseract
- from pytesseract import image_to_string
- import urllib
- MatSpi = []
- EgoAlt = []
- IdePra = []
- HedAsc = []
- NihMor = []
- RatRom = []
- SkeAbs = []
- lookup = {"Mat" or "Spi": MatSpi, "Ego" or "Alt": EgoAlt, "Ide" or "Pra": IdePra, "Hed" or "Asc": HedAsc, "Nih" or "Mor": NihMor, "Rat" or "Rom": RatRom, "Ske" or "Abs" : SkeAbs}
- reloadimages = False
- url = "https://boards.4chan.org/pol/thread/195261420"
- page = requests.get(url)
- wd = os.getcwd()+'\\etc\\'
- imgUrls = re.findall('a class="fileThumb" href="(.*?)"', page.content.decode('utf-8'))
- storage = os.listdir(wd)
- names = open(wd+"textfn.txt",'r+')
- storednames = names.readlines()
- names.close()
- names = open(wd+"textfn.txt",'a')
- for i in imgUrls:
- i = "https:"+i
- filename = wd+i.split('/')[-1]
- if filename.split('\\')[-1] in storage:
- #print("skipped",filename)
- continue
- urllib.request.urlretrieve(i, filename=filename)
- #print("downloaded",filename)
- print("finished fetching")
- storage = os.listdir(wd)
- textlist = []
- results = []
- for i in storage:
- ext = i.split('.')[-1]
- textf = i[:-3]+"txt"
- if (ext == "jpg" or ext == "png"):
- if textf not in storage or reloadimages:
- print("Writing", i, "to file")
- im = PIL.Image.open(wd+i).convert("RGB").filter(ImageFilter.SMOOTH).filter(ImageFilter.SMOOTH_MORE).filter(ImageFilter.CONTOUR).convert("L").filter(ImageFilter.CONTOUR)
- im.save(wd+"niggers.png")
- textlist.append(image_to_string(im))
- temp = open(wd+i[:-3]+"txt",'w+')
- temp.write(textlist[-1])
- temp.close()
- names.write(i+"\n")
- elif textf in storage:
- temp = open(wd+i[:-3]+"txt",'r')
- textlist.append(temp.read())
- temp.close()
- names.close()
- l = len(textlist)
- print("finished parsing", l, "images for text")
- for i in textlist:
- k = re.findall('(\w*) vs (\w*)(?:\n.*?)*.*?(\d*\.?\d?\%).*?(\d*\.?\d?\%)\n',i)
- results.append(k)
- for i in results:
- for j in i:
- m,n = j[2][:-1], j[3][:-1]
- if m and m != '': m = float(m)
- if n and m != '': n = float(n)
- if m and m > 100.0 or m == '':
- m = -1
- if n and n > 100.0 or n == '':
- n = -1
- if m and n:
- if m == -1 and n == -1: continue
- if m == -1: m = 100.0 - n
- if n == -1: n = 100.0 - m
- if m+n != 100: continue
- if j[0][:3] in lookup:
- lookup[j[0][:3]].append((m,n))
- elif j[1][:3] in lookup:
- lookup[j[1][:3]].append((m,n))
- for key in lookup.values():
- sm0, sm1, l = sum(i[0] for i in key), sum(i[1] for i in key), len(key)
- print ("Average value of", list(lookup.keys())[list(lookup.values()).index(key)], float("{0:.1f}".format(sm0/l)), "vs", float("{0:.1f}".format(sm1/l)), "over", l, "values")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement