Advertisement
Guest User

Untitled

a guest
Jul 21st, 2019
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.85 KB | None | 0 0
  1. def summary_cleaner(text):
  2.    newString = re.sub('"','', text)
  3.    newString = ' '.join([contraction_mapping[t] if t in contraction_mapping else t for t in newString.split(" ")])    
  4.    newString = re.sub(r"'s\b","",newString)
  5.    newString = re.sub("[^a-zA-Z]", " ", newString)
  6.    newString = newString.lower()
  7.    tokens=newString.split()
  8.    newString=''
  9.    for i in tokens:
  10.        if len(i)>1:                                 
  11.            newString=newString+i+' '  
  12.    return newString
  13.  
  14. #Call the above function
  15. cleaned_summary = []
  16. for t in data['Summary']:
  17.    cleaned_summary.append(summary_cleaner(t))
  18.  
  19. data['cleaned_text']=cleaned_text
  20. data['cleaned_summary']=cleaned_summary
  21. data['cleaned_summary'].replace('', np.nan, inplace=True)
  22. data.dropna(axis=0,inplace=True)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement