Advertisement
Guest User

Untitled

a guest
Apr 21st, 2019
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.14 KB | None | 0 0
  1. import re
  2. import pymorphy2
  3. import pandas as pd
  4.  
  5.  
  6. class ItsAlive:
  7.  
  8. def reader(self):
  9. op = open("data.txt", "r", encoding='utf-8')
  10. text = op.read().split('\n')
  11. op.close()
  12. return text
  13.  
  14. def edit_line(self, str_file, bad_keys):
  15. dict_file = eval(str_file)
  16. new_str_file = ''
  17.  
  18. for word in dict_file:
  19. if word not in bad_keys:
  20. new_str_file += dict_file[word] + ' '
  21.  
  22. return new_str_file
  23.  
  24. def work(self):
  25. morph = pymorphy2.MorphAnalyzer()
  26. files = self.reader()
  27. bad_keys = ["url", "rating_1", "rating_2", "rating_3", "rating_4", "rating_5", "review_date", "recom_number"]
  28. i = 0
  29.  
  30. while i < 4:
  31. file = self.edit_line(files[i], bad_keys)
  32. file = re.sub(r'[!-@]', "", file)
  33. file = file.split()
  34. fileLenght = len(file)
  35. df = pd.DataFrame({"Word", "quantity", "tf", "tf-idf"})
  36. print(df)
  37. #
  38. # for w in file:
  39. # w = morph.parse(w)[0].normal_form
  40.  
  41. i += 1
  42.  
  43.  
  44. a = ItsAlive()
  45. a.work()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement