Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import pymorphy2
- import pandas as pd
- class ItsAlive:
- def reader(self):
- op = open("data.txt", "r", encoding='utf-8')
- text = op.read().split('\n')
- op.close()
- return text
- def edit_line(self, str_file, bad_keys):
- dict_file = eval(str_file)
- new_str_file = ''
- for word in dict_file:
- if word not in bad_keys:
- new_str_file += dict_file[word] + ' '
- return new_str_file
- def work(self):
- morph = pymorphy2.MorphAnalyzer()
- files = self.reader()
- bad_keys = ["url", "rating_1", "rating_2", "rating_3", "rating_4", "rating_5", "review_date", "recom_number"]
- i = 0
- while i < 4:
- file = self.edit_line(files[i], bad_keys)
- file = re.sub(r'[!-@]', "", file)
- file = file.split()
- fileLenght = len(file)
- df = pd.DataFrame({"Word", "quantity", "tf", "tf-idf"})
- print(df)
- #
- # for w in file:
- # w = morph.parse(w)[0].normal_form
- i += 1
- a = ItsAlive()
- a.work()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement