Advertisement
Guest User

Untitled

a guest
Dec 5th, 2019
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.62 KB | None | 0 0
  1. stop_words = set(stopwords.words('english'))
  2. for i in range(len(df.words)) :
  3.     s = df.words[i]
  4.     # table = s.maketrans('', '', string.punctuation)
  5.     # s = s.translate(table)
  6.     # df.words[i] = s
  7.     sentence = s.lower()  # Converting to lowercase
  8.     cleanr = re.compile('<.*?>')
  9.     sentence = re.sub(cleanr, ' ', sentence)  # Removing HTML tags
  10.     sentence = re.sub(r'[?|!|\'|"|#]', r'', sentence)
  11.     sentence = re.sub(r'[.|,|)|(|\|/]', r' ', sentence)  # Removing Punctuations
  12.  
  13.     tokens = word_tokenize(sentence)
  14.     result = [i for i in tokens if not i in stop_words]
  15.  
  16.     df.words[i] = ' '.join(result)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement