Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import enchant
- import requests
- import re
- dictionary = enchant.Dict("ru_RU")
- with open("stop_world.txt") as f:
- f = f.read()
- for i in range(45,50):
- r = requests.get(f"https://rk.gov.ru/ru/article/show/{i}")
- if r.status_code == requests.codes.ok:
- r = ' '.join(r.text.split())
- text = re.sub('<[^<]+?>', '', r)
- text = re.sub('[^а-яёА-ЯЁ]', ' ', text).split()
- l = [x for x in text if len(x)>4 and x.istitle() == False]
- ll = []
- for i in l:
- if dictionary.check(i) == False:
- ll.append(i)
- s = ' '.join(set(ll))
- for i in f:
- if i not in f:
- print(i)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement