Advertisement
Guest User

Untitled

a guest
Nov 15th, 2018
109
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.91 KB | None | 0 0
  1. import urllib.request, json, re, html
  2.  
  3. def cleanhtml(raw_html):
  4.     cleanr = re.compile('<br>')
  5.     cleantext = re.sub(cleanr, '\n', raw_html)
  6.     cleanr = re.compile('<.*?>')
  7.     cleantext = re.sub(cleanr, '', cleantext)
  8.     return cleantext
  9.  
  10. def cleanreplies(raw_string):
  11.     cleanr = re.compile('>>[0-9]*')
  12.     cleantext = re.sub(cleanr, '', raw_string)
  13.     return cleantext
  14.  
  15. def htmlue(raw_string):
  16.     return html.unescape(raw_string)
  17.  
  18. #https://2ch.hk/b/res/186629832.json
  19. #https://2ch.hk/un/res/616310.json
  20. with urllib.request.urlopen("https://2ch.hk/b/res/186629832.json") as url:
  21.     data = json.loads(url.read().decode())
  22.     count = 0
  23.     print(data["threads"][0]["posts"][1])
  24.     for x in data["threads"][0]["posts"]:
  25.         #print(cleanreplies(cleanhtml(x["comment"])))
  26.         count += 1
  27.         print('#' + str(count) + ' ' + cleanreplies(htmlue(cleanhtml(x["comment"]))))
  28.         print('')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement