Advertisement
Guest User

Untitled

a guest
Feb 21st, 2021
17
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.60 KB | None | 0 0
  1. import enchant
  2. import requests
  3. import re
  4.  
  5. dictionary = enchant.Dict("ru_RU")
  6.  
  7. with open("stop_world.txt") as f:
  8.  
  9. f = f.read()
  10.  
  11. for i in range(45,50):
  12. r = requests.get(f"https://rk.gov.ru/ru/article/show/{i}")
  13.  
  14.  
  15. if r.status_code == requests.codes.ok:
  16. r = ' '.join(r.text.split())
  17. text = re.sub('<[^<]+?>', '', r)
  18. text = re.sub('[^а-яёА-ЯЁ]', ' ', text).split()
  19.  
  20. l = [x for x in text if len(x)>4 and x.istitle() == False]
  21. ll = []
  22.  
  23. for i in l:
  24. if dictionary.check(i) == False:
  25. ll.append(i)
  26. s = ' '.join(set(ll))
  27.  
  28. for i in f:
  29. if i not in f:
  30. print(i)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement