Advertisement
Guest User

Untitled

a guest
Aug 22nd, 2019
252
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.38 KB | None | 0 0
  1. from flask import Flask, request, render_template,jsonify,Response,redirect,url_for,session
  2. import os
  3. from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
  4. from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
  5. import pandas as pd
  6. pd.set_option('display.max_colwidth', 3000)
  7. pd.set_option('display.max_rows', 10)
  8.  
  9. import nltk
  10. from nltk.tokenize import sent_tokenize, word_tokenize
  11. from nltk.stem.snowball import SnowballStemmer
  12. from nltk.stem import PorterStemmer
  13. from nltk.corpus import stopwords
  14. import json
  15.  
  16. import re
  17. from sklearn.neighbors import KNeighborsClassifier
  18.  
  19. from sklearn import svm
  20. from sklearn.svm import SVC
  21. from sklearn import model_selection
  22. from sklearn.feature_extraction.text import TfidfVectorizer
  23. from sklearn.preprocessing import LabelEncoder
  24. from sklearn.preprocessing import OneHotEncoder
  25. from sklearn.preprocessing import Normalizer
  26. import numpy as np
  27. from ast import literal_eval
  28. from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix
  29.  
  30. app = Flask(__name__)
  31. app.secret_key = "super secret key"
  32.  
  33. #Set Random seed
  34. np.random.seed(400)
  35. #import data csv
  36. trainData = pd.read_csv('dataset.csv',encoding="ISO-8859-1")
  37. test_pd = pd.DataFrame(trainData) #makes this into a panda data frame
  38.  
  39. content = test_pd['Content']
  40.  
  41. #CASE FOLDING & REMOVE REGEX
  42. def preprocess(text):
  43. clean_data = []
  44. for x in (text[:]): #this is Df_pd for Df_np (text[:])
  45. new_text = re.sub('<.*?>', '', x) # remove HTML tags
  46. new_text = re.sub(r'[^\w\s]', '', new_text) # remove punc.
  47. new_text = re.sub(r'\d+','',new_text)# remove numbers
  48. new_text = new_text.lower() # lower case, .upper() for upper
  49. if new_text != '':
  50. clean_data.append(new_text)
  51. return clean_data
  52.  
  53.  
  54. caseFolding = preprocess(content)
  55. test_pd['case_folding']=caseFolding
  56. case_folding = test_pd['case_folding']
  57. #print(case_folding)
  58. #json = case_folding.to_json(orient='index')
  59. #print(json)
  60. #TOKENIZATION
  61. def identify_tokens(row):
  62. review = row['case_folding']
  63. tokens = word_tokenize(review)
  64. # taken only words (not punctuation)
  65. token_words = [w for w in tokens if w.isalpha()]
  66. return token_words
  67. test_pd['token'] = test_pd.apply(identify_tokens,axis=1)
  68. tokens=test_pd['token']
  69.  
  70. #REMOVE STOPWORDS
  71. stops = set(stopwords.words("indonesian"))
  72. #stops = pd.read_csv('D:\Tugas\Skripsi\Data\stopwordbahasa.csv',encoding="ISO-8859-1")
  73. def remove_stops(row):
  74. my_list = row['token']
  75. meaningful_words = [w for w in my_list if not w in stops]
  76. return (meaningful_words)
  77. test_pd['stopwords'] = test_pd.apply(remove_stops, axis=1)
  78. stopword=test_pd['stopwords']
  79.  
  80. #STEMMING
  81. def stem_list(row):
  82. factory = StemmerFactory()
  83. stemmer = factory.create_stemmer()
  84. my_list = row['stopwords']
  85. stemmed_list = [stemmer.stem(word) for word in my_list]
  86. return (stemmed_list)
  87. test_pd['stemming'] = test_pd.apply(stem_list, axis=1)
  88. stem=test_pd['stemming']
  89. test_pd['final']=stem.astype(str) #array list ke string
  90. text_final = test_pd['final'] #final column
  91.  
  92. Train_X, Test_X, Train_Y, Test_Y = model_selection.train_test_split(text_final,test_pd['Label'],test_size=0.3)
  93.  
  94. Tfidf_vect = TfidfVectorizer(max_features=5000)
  95. Tfidf_vect.fit(text_final)
  96.  
  97. Train_X_Tfidf = Tfidf_vect.transform(Train_X)
  98. Test_X_Tfidf = Tfidf_vect.transform(Test_X)
  99.  
  100. modelknn = KNeighborsClassifier(n_neighbors=2)
  101. modelknn.fit(Train_X_Tfidf,Train_Y)
  102. predictions_KNN = modelknn.predict(Test_X_Tfidf)
  103. print("KNN Accuracy Score -> ",round(accuracy_score(predictions_KNN, Test_Y)*100))
  104.  
  105. seriesA = pd.Series(Test_X,name='testing').reset_index()
  106. seriesB = pd.Series(predictions_KNN,name='predict').reset_index()
  107. gab = pd.concat([seriesA,seriesB],axis=1)
  108.  
  109.  
  110. @app.route("/")
  111. @app.route("/index")
  112. def index():
  113.  
  114. return render_template('index.html')
  115.  
  116. @app.route('/loginCheck',methods=['POST', 'GET'])
  117. def loginCheck():
  118. username= request.form['username']
  119. password= request.form['password']
  120. if (username == "Admin" and password == "P@ssw0rd"):
  121. session['admin'] = True
  122. else:
  123. return redirect(url_for('login'))
  124.  
  125. return redirect(url_for('dashboard'))
  126.  
  127. @app.route('/login')
  128. def login():
  129. if not session.get('admin'):
  130. return render_template('login.html')
  131. else:
  132. return redirect(url_for('dashboard'))
  133.  
  134.  
  135. @app.route('/logout')
  136. def logout():
  137. session.pop('admin', None)
  138. return redirect(url_for('login'))
  139.  
  140. @app.route("/admin/dashboard")
  141. def dashboard():
  142. if not session.get('admin'):
  143. return redirect(url_for('login'))
  144. else:
  145. trainNew = test_pd
  146. trainNew = trainNew.drop("Label", axis=1)
  147. trainNew = trainNew.drop("case_folding", axis=1)
  148. trainNew = trainNew.drop("token", axis=1)
  149. trainNew = trainNew.drop("stemming", axis=1)
  150. trainNew = trainNew.drop("final", axis=1)
  151. trainNew = trainNew.drop("stopwords", axis=1)
  152.  
  153. return render_template('admin/dashboard.html')
  154.  
  155.  
  156. @app.route("/admin/case")
  157. def case():
  158. if not session.get('admin'):
  159. return redirect(url_for('login'))
  160. else:
  161. trainNew = test_pd
  162. trainNew = trainNew.drop("Label", axis=1)
  163. #trainNew = trainNew.drop("case_folding", axis=1)
  164. trainNew = trainNew.drop("token", axis=1)
  165. trainNew = trainNew.drop("stemming", axis=1)
  166. trainNew = trainNew.drop("final", axis=1)
  167. trainNew = trainNew.drop("stopwords", axis=1)
  168.  
  169. return render_template('admin/casefolding.html', tables=[trainNew.to_html(classes='table table-striped table-bordered table-hover',header='true',justify='justify',table_id='tabel')])
  170.  
  171. @app.route("/admin/token")
  172. def token():
  173. if not session.get('admin'):
  174. return redirect(url_for('login'))
  175. else:
  176. trainNew = test_pd
  177. trainNew = trainNew.drop("Label", axis=1)
  178. trainNew = trainNew.drop("case_folding", axis=1)
  179. #trainNew = trainNew.drop("token", axis=1)
  180. trainNew = trainNew.drop("stemming", axis=1)
  181. trainNew = trainNew.drop("final", axis=1)
  182. trainNew = trainNew.drop("stopwords", axis=1)
  183. return render_template('admin/token.html', tables=[trainNew.to_html(classes='table table-striped table-bordered table-hover',header='true',justify='justify',table_id='tabel')])
  184.  
  185. @app.route("/admin/stopword")
  186. def stopword():
  187. if not session.get('admin'):
  188. return redirect(url_for('login'))
  189. else:
  190. trainNew = test_pd
  191. trainNew = trainNew.drop("Label", axis=1)
  192. trainNew = trainNew.drop("case_folding", axis=1)
  193. trainNew = trainNew.drop("token", axis=1)
  194. trainNew = trainNew.drop("stemming", axis=1)
  195. trainNew = trainNew.drop("final", axis=1)
  196. #trainNew = trainNew.drop("stopwords", axis=1)
  197.  
  198. return render_template('admin/stopword.html', tables=[trainNew.to_html(classes='table table-striped table-bordered table-hover',header='true',justify='justify',table_id='tabel')])
  199.  
  200. @app.route("/admin/stemming")
  201. def stemming():
  202. if not session.get('admin'):
  203. return redirect(url_for('login'))
  204. else:
  205. trainNew = test_pd
  206. trainNew = trainNew.drop("Label", axis=1)
  207. trainNew = trainNew.drop("case_folding", axis=1)
  208. trainNew = trainNew.drop("token", axis=1)
  209. #trainNew = trainNew.drop("stemming", axis=1)
  210. trainNew = trainNew.drop("final", axis=1)
  211. trainNew = trainNew.drop("stopwords", axis=1)
  212.  
  213. return render_template('admin/stemming.html', tables=[trainNew.to_html(classes='table table-striped table-bordered table-hover',header='true',justify='justify',table_id='tabel')])
  214.  
  215. @app.route("/hasil")
  216. def hasil():
  217. trainNew = test_pd
  218. trainNew = gab
  219. trainNew = trainNew.drop("index", axis=1)
  220. label = test_pd['Label']
  221. pos = label[label=='pos']
  222. neg = label[label=='neg']
  223. jml_pos = pos.count()
  224. jml_neg = pos.count()
  225. jml_label = label.count()
  226. persen_pos = str(round((jml_pos/jml_label)*100,2))
  227. persen_neg = str(round((jml_neg/jml_label)*100,2))
  228.  
  229. return render_template('hasil.html', tables=[trainNew.to_html(classes='table table-striped table-bordered table-hover',header='true',justify='justify',table_id='tabel')])
  230.  
  231.  
  232. #@app.route('/sentimenttest')
  233. #def sentimentTest():
  234. # return render_template('sentimenttest.html')
  235.  
  236. #@app.route('/checksentiment', methods=['POST'])
  237. #def checkSentiment():
  238. # text = request.form['text']
  239. # review_vector = Tfidf_vect.transform([text])
  240. # hasil = modelknn.predict(review_vector)
  241. # if hasil==['neg'] :
  242. # hasil = 'Negative'
  243. # else :
  244. # hasil = 'Positif'
  245. # return render_template('sentimenthasil.html',hasil=hasil,text=text)
  246. #
  247. if __name__ == '__main__':
  248. app.run(debug=True)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement