Guest User

Untitled

a guest
Feb 23rd, 2021
11
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.45 KB | None | 0 0
  1. import enchant
  2. import requests
  3. import re
  4. import csv
  5.  
  6. dictionary = enchant.Dict("ru_RU")
  7.  
  8. with open("stop.txt") as f:
  9.  
  10. f = set(f.read().split())
  11.  
  12. lll = []
  13.  
  14. #link = input("Введите ссылку: ")
  15. link = "https://rk.gov.ru/ru/article/show/10531"
  16. number = int(''.join(x for x in list(link) if x.isdigit()))
  17. req_link = link[:-len(str(number))]
  18.  
  19. for i in range(number,1,-1):
  20. r = requests.get(f"{req_link}{i}")
  21.  
  22.  
  23. if r.status_code == requests.codes.ok:
  24.  
  25. r = ' '.join(r.text.split())
  26. text = re.sub('<[^<]+?>', '', r)
  27. text = re.sub('[^а-яёА-ЯЁ]', ' ', text).split()
  28.  
  29. l = [x for x in text if len(x)>4 and x.islower()]
  30.  
  31. ll = [x for x in l if dictionary.check(x) == False]
  32.  
  33. s = set(' '.join(set(ll)).split())
  34. print(type(s))
  35. print(type(f))
  36. sf = s-f
  37. if sf != set():
  38. #sf.add(f"{req_link}{i}")
  39. d = {f"{req_link}{i}":sf}
  40. #lll.append(d.items())
  41. #print(sf)
  42. #print(*d.items())
  43.  
  44.  
  45. a = []
  46. for item in lll:
  47. a.extend(item)
  48.  
  49. print(d.keys())
  50. count = 0
  51. f = ""
  52.  
  53. for x in list(link[8:]):
  54. if x == ".":
  55. count += 1
  56. if count == 2:
  57. break
  58. f += x
  59.  
  60. f = f.replace(".","_") + ".csv"
  61.  
  62. with open(f,"w",newline = "") as csv_file:
  63.  
  64. writer = csv.writer(csv_file,delimiter=";")
  65. writer.writerow(d.keys())
  66.  
  67. #with open("all.txt","a") as f:
  68.  
  69. # print(*a,file=f,sep="\n")
  70.  
  71. #with open(f"{f.replace('.','_')}","a") as f:
  72.  
  73. # print(*a,file=f,sep="\n")
  74.  
  75.  
Advertisement
Add Comment
Please, Sign In to add comment