Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- post_punctuation = []
- for words in ns['isi']:
- #words = "".join([char for char in text if char not in string.punctuation])
- words = re.sub(r'http[s]?(?:[a-z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-f][0-9a-f]))+', "", words) #website
- words = re.sub(r'[-.,""? !.:;()//|0-9]', " ", words) #tanda baca dan angka
- words = re.sub(r'(?:@[\w_]+)', "", words) #akun
- words = re.sub(r'[(^(:\(|:\))+$]', "", words) #emoticon
- words = re.sub(r'RT[" "]|RW[" "]|CC', "", words) #RT
- words = re.sub(r'(?:\#+[\w_]+[\w\'_\-]*[\w_]+)', "", words) #hashtag
- words = re.sub(r'\n| +|[[]]', " ", words)
- if len(words) > 0:
- post_punctuation.append(words)
- post_punctuation
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement