Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- df=pd.read_csv('DTMNegatif.csv', index_col=0)
- train, validate, test = np.split(df.sample(frac=1), [int(.6*len(df)), int(.8*len(df))])
- print (train)
- train.to_csv("trainNegatif.csv", sep=',')
- validate.to_csv("validateNegatif.csv",sep=',')
- test.to_csv("testNegatif.csv",sep=',')
- #CODE TRAINING DATA FOR NAIVE BAYES
- df=pd.read_csv('trainNegatif.csv', index_col=0)
- df2=df.copy()
- columns_names = list(df.columns.values)
- total_kata=[]
- list_kata=[]
- for col in columns_names:
- df.loc['Total',col] = df[col].sum()
- #sum1 = df[col].sum()
- if df[col].sum()<2:
- del df2[col]
- else:
- df2.loc['Total',col] = df2[col].sum()
- total_kata.append(df2[col].sum())
- list_kata.append(col)
- num = np.zeros(shape=(len(total_kata), 2), dtype=object)
- for n, (total_kata, list_kata) in enumerate(zip(total_kata, list_kata)):
- num[n,0]=total_kata
- num[n,1]=list_kata
- df3 = pd.DataFrame({'Kata':num[:,1],'Frekuensi':num[:,0]})
- df2.to_csv("SeleksiFiturNegatif.csv", sep=',')
- df3.to_csv("tabelFrekuensiNegatif.csv", sep=',')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement