Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import urllib.request, json, re, html
- def cleanhtml(raw_html):
- cleanr = re.compile('<br>')
- cleantext = re.sub(cleanr, '\n', raw_html)
- cleanr = re.compile('<.*?>')
- cleantext = re.sub(cleanr, '', cleantext)
- return cleantext
- def cleanreplies(raw_string):
- cleanr = re.compile('>>[0-9]*')
- cleantext = re.sub(cleanr, '', raw_string)
- return cleantext
- def htmlue(raw_string):
- return html.unescape(raw_string)
- #https://2ch.hk/b/res/186629832.json
- #https://2ch.hk/un/res/616310.json
- with urllib.request.urlopen("https://2ch.hk/b/res/186629832.json") as url:
- data = json.loads(url.read().decode())
- count = 0
- print(data["threads"][0]["posts"][1])
- for x in data["threads"][0]["posts"]:
- #print(cleanreplies(cleanhtml(x["comment"])))
- count += 1
- print('#' + str(count) + ' ' + cleanreplies(htmlue(cleanhtml(x["comment"]))))
- print('')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement