Advertisement
Guest User

Untitled

a guest
Jul 19th, 2019
200
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.50 KB | None | 0 0
  1. class Save(Data):
  2. def __init__(self, server, database, username, driver, group=None):
  3. super().__init__(server, database, username, driver)
  4. self.group = group
  5.  
  6. def get_all_goods(self):
  7. goods_table = pd.read_sql_query(f''' SELECT [p_1], [p_2]
  8. FROM [table] WHERE [group] = '{self.group}' ''',
  9. self.hndl)
  10. return goods_table
  11.  
  12. def data_preprocessing(self):
  13. data_prepared = self.get_all_goods()
  14. data_prepared['desc'] = data_prepared[['p_1', 'p_2']].apply(lambda x: ' '.join(x), axis=1)
  15. return data_prepared
  16.  
  17. @staticmethod
  18. def data_cleaning(str):
  19. words = []
  20. str = re.sub(r"(w*(.w*))", ' ', str)
  21. str = re.sub(r"d*_d*", ' ', str)
  22. for i in re.split('[;,.,n,s,:,-,+,(,),=,/,«,»,@,!,?,",_,*]',str):
  23. if len(i) > 1:
  24. words.append(i)
  25. return words
  26.  
  27. def data_morphy(self, text):
  28. morph = pymorphy2.MorphAnalyzer()
  29. tokens = [morph.parse(token)[0].normal_form for token in self.data_cleaning(text) if token != ' ']
  30. tokens = list(dict.fromkeys(tokens))
  31. text = ' '.join(tokens)
  32. return text
  33.  
  34. def data_final(self):
  35. data_final = self.data_preprocessing()
  36. data_final['desc'] = data_final['desc'].apply(lambda x: self.data_morphy(x))
  37. return data_final
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement