Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def safe_requests(url):
- import requests
- try:
- response = requests.get(url)
- response.raise_for_status()
- response.encoding = response.apparent_encoding
- except requests.exceptions.RequestException as e:
- print("error: ", e)
- return response
- def tt_to_words(tt):
- import re
- s = tt.encode('shift-jis', errors='ignore').decode('shift-jis')
- s = re.sub('\d+.dat<>(.+)\s+\[\d+?\]\s+\(\d+\)','\\1', s)
- s = re.sub('(?:【.{0,2}】)|(?:&.+?;)', ' ', s)
- print(s)
- words = re.findall('(?:[\u4E00-\u9FFF]{2,})|(?:[\u30A0-\u30FFー]{2,})', s)
- words = words[0:5]
- return words
- url3ch = "http://fad.3chan.jp/poverty/subject.txt"
- url5ch = "http://greta.5ch.net/poverty/subject.txt"
- res = safe_requests(url5ch)
- threads = res.text.encode("utf-8").splitlines()
- tt = threads[1].decode("utf-8")
- words = tt_to_words(tt)
- from urllib.parse import quote
- q = 'https://find.5ch.net/search?q=' + '+'.join(map(quote, words))
- print(tt)
- print(words)
- print(q)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement