Guest User

XKCD Crap Counter

a guest
May 3rd, 2022
107
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.21 KB | None | 0 0
  1. import urllib
  2. import requests
  3.  
  4. header= {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) '
  5.     'AppleWebKit/537.11 (KHTML, like Gecko) '
  6.     'Chrome/23.0.1271.64 Safari/537.11',
  7.     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  8.     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
  9.     'Accept-Encoding': 'none',
  10.     'Accept-Language': 'en-US,en;q=0.8',
  11.     'Connection': 'keep-alive'}
  12.  
  13.  
  14. url = "https://www.explainxkcd.com/{0}"
  15.  
  16. crapped_comics = []
  17. for i in range(1,2613):
  18.     print(url.format(i))
  19.    
  20.     try:
  21.         req = urllib.request.Request(url=url.format(i), headers=header)
  22.         html = str(urllib.request.urlopen(req).read())
  23.     except:
  24.         print("Skipping.")
  25.        
  26.     crap_number = len(html.split("crap"))
  27.  
  28.     if crap_number > 10:
  29.         print(f"This comic is crapped ({crap_number} craps found).")
  30.         crapped_comics.append(f"Comic number: {i} | url=url.format(i)\n")
  31.  
  32.     else:
  33.         print("Comic likely not crapped.")
  34.  
  35. with open("crapped_comics.txt", "w") as cc_txt:
  36.     for line in crapped_comics:
  37.         cc_txt.write(line)
  38.  
  39.  
  40.  
  41. print(f"{len(crapped_comics)} found, corresponding to {100*round(len(crapped_comics)/2613., 3)} percent.")
Add Comment
Please, Sign In to add comment