Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import matplotlib.pyplot as plt
- df=pd.read_csv('.\datasets\df_generalized.csv')
- #split the dataset to train and test
- from sklearn.model_selection import train_test_split
- import matplotlib.pyplot as plt
- x_train,x_test,y_train,y_test=train_test_split(df.text,df.type,test_size = 0.2)
- y_train.plot(kind='hist',bins=80)
- y_test.plot(kind='hist',bins=80)
- #blue is train, orange is test ,we can see that the number of fake and real are balanced
- plt.show()
- print(x_train.shape)
- print(y_train.shape)
- # In[7]:
- #trainsfrom text to word2vector
- from sklearn.feature_extraction.text import CountVectorizer
- cv=CountVectorizer(min_df=0,max_df=1,ngram_range=(1,2))
- cv_train=cv.fit_transform(x_train,y=y_train)
- cv_test=cv.transform(x_test)
- print('train 2-vector',cv_train.shape)
- # In[8]:
- #
- #
- # #build CNN model
- # from keras.models import Sequential
- # from keras.layers import Dense
- # model=Sequential()
- # model.add(Dense(units=100,activation='relu',input_dim = cv_train.shape[1]))
- # model.add(Dense(units=50,activation='relu'))
- # model.add(Dense(units=25,activation='relu'))
- # model.add(Dense(units=10,activation='relu'))
- # model.add(Dense(units=1,activation='sigmoid'))
- #
- #
- # # In[9]:
- #
- #
- # print(cv_train.toarray())
- #
- #
- # # In[10]:
- #
- #
- # #Training process
- # model.compile(optimizer = 'adam' , loss = 'binary_crossentropy' , metrics = ['accuracy'])
- # model.fit(cv_train.toarray(),y_train , epochs = 5)
- #
- #
- # # In[11]:
- #
- #
- # #test
- # from sklearn.metrics import classification_report,confusion_matrix,accuracy_score
- # pre=model.predict(cv_test)
- # for i in range(len(pre)):
- # if(pre[i]>0.5):
- # pre[i]=1
- # else:
- # pre[i]=0
- # accuracy_score(pre,y_test)
- #
- #
- # # In[13]:
- #
- #
- # #Concufusion matrix test
- # matrix=confusion_matrix(y_test,pre)
- # accuracy=(matrix[0][0]+matrix[1][1])/(matrix[0][0]+matrix[0][1]+matrix[1][0]+matrix[1][1])
- # print('The accuracy is ',accuracy)
- # print(cv_train)
- #
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement