Advertisement
mskf

Untitled

Aug 7th, 2020
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.95 KB | None | 0 0
  1. import pandas as pd
  2. import matplotlib.pyplot as plt
  3.  
  4. df=pd.read_csv('.\datasets\df_generalized.csv')
  5.  
  6.  
  7. #split the dataset to train and test
  8. from sklearn.model_selection import train_test_split
  9. import matplotlib.pyplot as plt
  10. x_train,x_test,y_train,y_test=train_test_split(df.text,df.type,test_size = 0.2)
  11. y_train.plot(kind='hist',bins=80)
  12. y_test.plot(kind='hist',bins=80)
  13. #blue is train, orange is test ,we can see that the number of fake and real are balanced
  14. plt.show()
  15. print(x_train.shape)
  16. print(y_train.shape)
  17.  
  18. # In[7]:
  19.  
  20.  
  21. #trainsfrom text to word2vector
  22. from sklearn.feature_extraction.text import CountVectorizer
  23. cv=CountVectorizer(min_df=0,max_df=1,ngram_range=(1,2))
  24. cv_train=cv.fit_transform(x_train,y=y_train)
  25. cv_test=cv.transform(x_test)
  26. print('train 2-vector',cv_train.shape)
  27.  
  28.  
  29. # In[8]:
  30. #
  31. #
  32. # #build CNN model
  33. # from keras.models import Sequential
  34. # from keras.layers import Dense
  35. # model=Sequential()
  36. # model.add(Dense(units=100,activation='relu',input_dim = cv_train.shape[1]))
  37. # model.add(Dense(units=50,activation='relu'))
  38. # model.add(Dense(units=25,activation='relu'))
  39. # model.add(Dense(units=10,activation='relu'))
  40. # model.add(Dense(units=1,activation='sigmoid'))
  41. #
  42. #
  43. # # In[9]:
  44. #
  45. #
  46. # print(cv_train.toarray())
  47. #
  48. #
  49. # # In[10]:
  50. #
  51. #
  52. # #Training process
  53. # model.compile(optimizer = 'adam' , loss = 'binary_crossentropy' , metrics = ['accuracy'])
  54. # model.fit(cv_train.toarray(),y_train , epochs = 5)
  55. #
  56. #
  57. # # In[11]:
  58. #
  59. #
  60. # #test
  61. # from sklearn.metrics import classification_report,confusion_matrix,accuracy_score
  62. # pre=model.predict(cv_test)
  63. # for i in range(len(pre)):
  64. # if(pre[i]>0.5):
  65. # pre[i]=1
  66. # else:
  67. # pre[i]=0
  68. # accuracy_score(pre,y_test)
  69. #
  70. #
  71. # # In[13]:
  72. #
  73. #
  74. # #Concufusion matrix test
  75. # matrix=confusion_matrix(y_test,pre)
  76. # accuracy=(matrix[0][0]+matrix[1][1])/(matrix[0][0]+matrix[0][1]+matrix[1][0]+matrix[1][1])
  77. # print('The accuracy is ',accuracy)
  78. # print(cv_train)
  79. #
  80.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement