Advertisement
Guest User

Untitled

a guest
Oct 15th, 2018
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.94 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. import json
  3. from bs4 import BeautifulSoup as Soup
  4. import re
  5. from pymystem3 import Mystem
  6.  
  7. m = Mystem(entire_input=False)
  8.  
  9. patt = re.compile('установил:(.*)решил:', re.DOTALL)
  10.  
  11. with open('orders.json', 'r', encoding='utf8') as f:
  12. results = {}
  13. n = 0
  14. for line in f:
  15. elememt = json.loads(line)
  16. for field in elememt['fields']:
  17. text = field['value']
  18. if text and '<HTML>' in text:
  19. # data.append(text)
  20. soup = Soup(text)
  21. raw = soup.body.get_text().lower()
  22. res = patt.findall(raw)
  23. if res:
  24. descr = res[0]
  25. results[n] = descr
  26. print(n)
  27. break
  28.  
  29. n += 1
  30.  
  31. json_el = json.dumps(results, ensure_ascii=False)
  32. open('cases_parsed_lemmatized.json', 'w', encoding='utf-8').write(json_el)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement