Guest User

Untitled

a guest
Oct 14th, 2017
25
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.38 KB | None | 0 0
  1. from POS.POSTagger import POSTagger as PPOSTagger
  2.  
  3. def cleanup_string(text):
  4.     elim_char_string = "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM12345678910\/\n\t\r.,:-<>!@#$%^&*+=?..\"\')([]«»"
  5.     text = text.translate(str.maketrans(elim_char_string, ' ' * len(elim_char_string)))
  6.     text = text.replace("  ",'')
  7.     text = text.rstrip()
  8.  
  9.     return text
  10.  
  11.  
  12. def process_text_with_perpos_tagger(input_string):
  13.     global tagger,perpos_tagger
  14.     try:
  15.         input_string.strip()
  16.         print("input_string")
  17.         print(input_string)
  18.         elim_tag_list = ['N','Ne','P','Pe','RES','RESe','DET','DETe','CONJ','CONJe','CL','POSTP','PRO','PROe','NUM','NUMe','PUNC ']
  19.  
  20.         tokenized_data = perpos_tagger.parse(input_string.split())
  21.         print(tokenized_data)
  22.         reduced_list = []
  23.         i = 0
  24.         j = 0
  25.         while(i<len(tokenized_data)-j):
  26.             d = tokenized_data[i][1]
  27.             if d not in elim_tag_list:
  28.                 reduced_list.append(tokenized_data[i][0])
  29.                 j+=1
  30.             i+=1
  31.  
  32.  
  33.         if len(reduced_list)>0:
  34.             reduced_string = " ".join(reduced_list)
  35.             reduced_string = cleanup_string(reduced_string)
  36.             print("reduced string")
  37.             print(reduced_string)
  38.             return reduced_string
  39.         else:
  40.             return None
  41.     except:
  42.         return None
Add Comment
Please, Sign In to add comment