Advertisement
Guest User

Untitled

a guest
Jun 14th, 2022
24
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.02 KB | None | 0 0
  1. def safe_requests(url):
  2. import requests
  3. try:
  4. response = requests.get(url)
  5. response.raise_for_status()
  6. response.encoding = response.apparent_encoding
  7. except requests.exceptions.RequestException as e:
  8. print("error: ", e)
  9.  
  10. return response
  11.  
  12. def tt_to_words(tt):
  13. import re
  14. s = tt.encode('shift-jis', errors='ignore').decode('shift-jis')
  15. s = re.sub('\d+.dat<>(.+)\s+\[\d+?\]\s+\(\d+\)','\\1', s)
  16. s = re.sub('(?:【.{0,2}】)|(?:&.+?;)', ' ', s)
  17. print(s)
  18. words = re.findall('(?:[\u4E00-\u9FFF]{2,})|(?:[\u30A0-\u30FFー]{2,})', s)
  19. words = words[0:5]
  20. return words
  21.  
  22. url3ch = "http://fad.3chan.jp/poverty/subject.txt"
  23. url5ch = "http://greta.5ch.net/poverty/subject.txt"
  24.  
  25. res = safe_requests(url5ch)
  26. threads = res.text.encode("utf-8").splitlines()
  27. tt = threads[1].decode("utf-8")
  28.  
  29. words = tt_to_words(tt)
  30.  
  31. from urllib.parse import quote
  32. q = 'https://find.5ch.net/search?q=' + '+'.join(map(quote, words))
  33.  
  34. print(tt)
  35. print(words)
  36. print(q)
  37.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement