Advertisement
unsortedIndex

pyimage.py

Dec 1st, 2018
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.94 KB | None | 0 0
  1. import re
  2. import requests
  3. import os
  4. import PIL
  5. from PIL import ImageFilter
  6. import pytesseract
  7. from pytesseract import image_to_string
  8. import urllib
  9.  
  10. MatSpi = []
  11. EgoAlt = []
  12. IdePra = []
  13. HedAsc = []
  14. NihMor = []
  15. RatRom = []
  16. SkeAbs = []
  17. lookup = {"Mat" or "Spi": MatSpi, "Ego" or "Alt": EgoAlt, "Ide" or "Pra": IdePra, "Hed" or "Asc": HedAsc, "Nih" or "Mor": NihMor, "Rat" or "Rom": RatRom, "Ske" or "Abs" : SkeAbs}
  18. reloadimages = False
  19. url = "https://boards.4chan.org/pol/thread/195261420"
  20. page = requests.get(url)
  21. wd = os.getcwd()+'\\etc\\'
  22. imgUrls = re.findall('a class="fileThumb" href="(.*?)"', page.content.decode('utf-8'))
  23. storage = os.listdir(wd)
  24. names = open(wd+"textfn.txt",'r+')
  25. storednames = names.readlines()
  26. names.close()
  27. names = open(wd+"textfn.txt",'a')
  28. for i in imgUrls:
  29.     i = "https:"+i
  30.     filename = wd+i.split('/')[-1]
  31.     if filename.split('\\')[-1] in storage:
  32.         #print("skipped",filename)
  33.         continue
  34.     urllib.request.urlretrieve(i, filename=filename)
  35.     #print("downloaded",filename)
  36. print("finished fetching")
  37.  
  38. storage = os.listdir(wd)
  39. textlist = []
  40. results = []
  41. for i in storage:
  42.     ext = i.split('.')[-1]
  43.     textf = i[:-3]+"txt"
  44.     if (ext == "jpg" or ext == "png"):
  45.         if textf not in storage or reloadimages:
  46.             print("Writing", i, "to file")
  47.             im = PIL.Image.open(wd+i).convert("RGB").filter(ImageFilter.SMOOTH).filter(ImageFilter.SMOOTH_MORE).filter(ImageFilter.CONTOUR).convert("L").filter(ImageFilter.CONTOUR)
  48.             im.save(wd+"niggers.png")
  49.             textlist.append(image_to_string(im))
  50.             temp = open(wd+i[:-3]+"txt",'w+')
  51.             temp.write(textlist[-1])
  52.             temp.close()
  53.             names.write(i+"\n")
  54.         elif textf in storage:
  55.             temp = open(wd+i[:-3]+"txt",'r')
  56.             textlist.append(temp.read())
  57.             temp.close()
  58. names.close()
  59.  
  60. l = len(textlist)
  61. print("finished parsing", l, "images for text")
  62. for i in textlist:
  63.     k = re.findall('(\w*) vs (\w*)(?:\n.*?)*.*?(\d*\.?\d?\%).*?(\d*\.?\d?\%)\n',i)
  64.     results.append(k)
  65. for i in results:
  66.     for j in i:
  67.         m,n = j[2][:-1], j[3][:-1]
  68.         if m and m != '': m = float(m)
  69.         if n and m != '': n = float(n)
  70.         if m and m > 100.0 or m == '':
  71.             m = -1
  72.         if n and n > 100.0 or n == '':
  73.             n = -1
  74.         if m and n:
  75.             if m == -1 and n == -1: continue
  76.             if m == -1: m = 100.0 - n
  77.             if n == -1: n = 100.0 - m
  78.             if m+n != 100: continue
  79.         if j[0][:3] in lookup:
  80.             lookup[j[0][:3]].append((m,n))
  81.         elif j[1][:3] in lookup:
  82.             lookup[j[1][:3]].append((m,n))
  83.  
  84. for key in lookup.values():
  85.     sm0, sm1, l = sum(i[0] for i in key), sum(i[1] for i in key), len(key)
  86.     print ("Average value of", list(lookup.keys())[list(lookup.values()).index(key)], float("{0:.1f}".format(sm0/l)), "vs", float("{0:.1f}".format(sm1/l)), "over", l, "values")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement