Advertisement
Guest User

Untitled

a guest
Aug 17th, 2019
127
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.68 KB | None | 0 0
  1. post_punctuation = []
  2. for words in ns['isi']:
  3. #words = "".join([char for char in text if char not in string.punctuation])
  4. words = re.sub(r'http[s]?(?:[a-z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-f][0-9a-f]))+', "", words) #website
  5. words = re.sub(r'[-.,""? !.:;()//|0-9]', " ", words) #tanda baca dan angka
  6. words = re.sub(r'(?:@[\w_]+)', "", words) #akun
  7. words = re.sub(r'[(^(:\(|:\))+$]', "", words) #emoticon
  8. words = re.sub(r'RT[" "]|RW[" "]|CC', "", words) #RT
  9. words = re.sub(r'(?:\#+[\w_]+[\w\'_\-]*[\w_]+)', "", words) #hashtag
  10. words = re.sub(r'\n| +|[[]]', " ", words)
  11. if len(words) > 0:
  12. post_punctuation.append(words)
  13. post_punctuation
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement