Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import csv
- import numpy as np
- import re
- with open(r"D:\Users\sentiment analysis\sentiment\sentiment3.csv", newline='') as f:
- reader = csv.reader(f)
- data = list(reader)
- def replace_chars(s):
- s= s.replace('<', '')
- s=s.replace('"','')
- s=s.replace('&','')
- s=s.replace(''','')
- return s
- def recursively_apply(l, f):
- for n, i in enumerate(l):
- if type(i) is list:
- l[n] = recursively_apply(l[n], f)
- elif type(i) is str:
- l[n] = f(i)
- return l
- data=recursively_apply(data, replace_chars)
- headers =["target","id","date","flag",'user',"text"]
- data=pd.DataFrame(data,columns=headers)
- def remove_pattern(input_txt, pattern):
- r = re.findall(pattern, input_txt)
- for i in r:
- input_txt = re.sub(i, '', input_txt)
- return input_txt
- data['tweet'] = np.vectorize(remove_pattern)(data['text'], "@[\w]*")
- data['tweet'] = data['tweet'].replace(r'http\S+', '', regex=True).replace(r'www\S+', '', regex=True)
- data['tweet'] = data['tweet'].apply(lambda x: ' '.join([w for w in x.split() if len(w)>3]))
- data['tweet'] = data['tweet'].str.replace('[^\w\s]','')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement