Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import enchant
- import requests
- import re
- import csv
- dictionary = enchant.Dict("ru_RU")
- with open("stop.txt") as f:
- f = set(f.read().split())
- lll = []
- #link = input("Введите ссылку: ")
- link = "https://rk.gov.ru/ru/article/show/10531"
- number = int(''.join(x for x in list(link) if x.isdigit()))
- req_link = link[:-len(str(number))]
- for i in range(number,1,-1):
- r = requests.get(f"{req_link}{i}")
- if r.status_code == requests.codes.ok:
- r = ' '.join(r.text.split())
- text = re.sub('<[^<]+?>', '', r)
- text = re.sub('[^а-яёА-ЯЁ]', ' ', text).split()
- l = [x for x in text if len(x)>4 and x.islower()]
- ll = [x for x in l if dictionary.check(x) == False]
- s = set(' '.join(set(ll)).split())
- print(type(s))
- print(type(f))
- sf = s-f
- if sf != set():
- #sf.add(f"{req_link}{i}")
- d = {f"{req_link}{i}":sf}
- #lll.append(d.items())
- #print(sf)
- #print(*d.items())
- a = []
- for item in lll:
- a.extend(item)
- print(d.keys())
- count = 0
- f = ""
- for x in list(link[8:]):
- if x == ".":
- count += 1
- if count == 2:
- break
- f += x
- f = f.replace(".","_") + ".csv"
- with open(f,"w",newline = "") as csv_file:
- writer = csv.writer(csv_file,delimiter=";")
- writer.writerow(d.keys())
- #with open("all.txt","a") as f:
- # print(*a,file=f,sep="\n")
- #with open(f"{f.replace('.','_')}","a") as f:
- # print(*a,file=f,sep="\n")
Advertisement
Add Comment
Please, Sign In to add comment