Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class Save(Data):
- def __init__(self, server, database, username, driver, group=None):
- super().__init__(server, database, username, driver)
- self.group = group
- def get_all_goods(self):
- goods_table = pd.read_sql_query(f''' SELECT [p_1], [p_2]
- FROM [table] WHERE [group] = '{self.group}' ''',
- self.hndl)
- return goods_table
- def data_preprocessing(self):
- data_prepared = self.get_all_goods()
- data_prepared['desc'] = data_prepared[['p_1', 'p_2']].apply(lambda x: ' '.join(x), axis=1)
- return data_prepared
- @staticmethod
- def data_cleaning(str):
- words = []
- str = re.sub(r"(w*(.w*))", ' ', str)
- str = re.sub(r"d*_d*", ' ', str)
- for i in re.split('[;,.,n,s,:,-,+,(,),=,/,«,»,@,!,?,",_,*]',str):
- if len(i) > 1:
- words.append(i)
- return words
- def data_morphy(self, text):
- morph = pymorphy2.MorphAnalyzer()
- tokens = [morph.parse(token)[0].normal_form for token in self.data_cleaning(text) if token != ' ']
- tokens = list(dict.fromkeys(tokens))
- text = ' '.join(tokens)
- return text
- def data_final(self):
- data_final = self.data_preprocessing()
- data_final['desc'] = data_final['desc'].apply(lambda x: self.data_morphy(x))
- return data_final
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement