Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from flask import Flask, request, render_template,jsonify,Response,redirect,url_for,session
- import os
- from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
- from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
- import pandas as pd
- pd.set_option('display.max_colwidth', 3000)
- pd.set_option('display.max_rows', 10)
- import nltk
- from nltk.tokenize import sent_tokenize, word_tokenize
- from nltk.stem.snowball import SnowballStemmer
- from nltk.stem import PorterStemmer
- from nltk.corpus import stopwords
- import json
- import re
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn import svm
- from sklearn.svm import SVC
- from sklearn import model_selection
- from sklearn.feature_extraction.text import TfidfVectorizer
- from sklearn.preprocessing import LabelEncoder
- from sklearn.preprocessing import OneHotEncoder
- from sklearn.preprocessing import Normalizer
- import numpy as np
- from ast import literal_eval
- from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix
- app = Flask(__name__)
- app.secret_key = "super secret key"
- #Set Random seed
- np.random.seed(400)
- #import data csv
- trainData = pd.read_csv('dataset.csv',encoding="ISO-8859-1")
- test_pd = pd.DataFrame(trainData) #makes this into a panda data frame
- content = test_pd['Content']
- #CASE FOLDING & REMOVE REGEX
- def preprocess(text):
- clean_data = []
- for x in (text[:]): #this is Df_pd for Df_np (text[:])
- new_text = re.sub('<.*?>', '', x) # remove HTML tags
- new_text = re.sub(r'[^\w\s]', '', new_text) # remove punc.
- new_text = re.sub(r'\d+','',new_text)# remove numbers
- new_text = new_text.lower() # lower case, .upper() for upper
- if new_text != '':
- clean_data.append(new_text)
- return clean_data
- caseFolding = preprocess(content)
- test_pd['case_folding']=caseFolding
- case_folding = test_pd['case_folding']
- #print(case_folding)
- #json = case_folding.to_json(orient='index')
- #print(json)
- #TOKENIZATION
- def identify_tokens(row):
- review = row['case_folding']
- tokens = word_tokenize(review)
- # taken only words (not punctuation)
- token_words = [w for w in tokens if w.isalpha()]
- return token_words
- test_pd['token'] = test_pd.apply(identify_tokens,axis=1)
- tokens=test_pd['token']
- #REMOVE STOPWORDS
- stops = set(stopwords.words("indonesian"))
- #stops = pd.read_csv('D:\Tugas\Skripsi\Data\stopwordbahasa.csv',encoding="ISO-8859-1")
- def remove_stops(row):
- my_list = row['token']
- meaningful_words = [w for w in my_list if not w in stops]
- return (meaningful_words)
- test_pd['stopwords'] = test_pd.apply(remove_stops, axis=1)
- stopword=test_pd['stopwords']
- #STEMMING
- def stem_list(row):
- factory = StemmerFactory()
- stemmer = factory.create_stemmer()
- my_list = row['stopwords']
- stemmed_list = [stemmer.stem(word) for word in my_list]
- return (stemmed_list)
- test_pd['stemming'] = test_pd.apply(stem_list, axis=1)
- stem=test_pd['stemming']
- test_pd['final']=stem.astype(str) #array list ke string
- text_final = test_pd['final'] #final column
- Train_X, Test_X, Train_Y, Test_Y = model_selection.train_test_split(text_final,test_pd['Label'],test_size=0.3)
- Tfidf_vect = TfidfVectorizer(max_features=5000)
- Tfidf_vect.fit(text_final)
- Train_X_Tfidf = Tfidf_vect.transform(Train_X)
- Test_X_Tfidf = Tfidf_vect.transform(Test_X)
- modelknn = KNeighborsClassifier(n_neighbors=2)
- modelknn.fit(Train_X_Tfidf,Train_Y)
- predictions_KNN = modelknn.predict(Test_X_Tfidf)
- print("KNN Accuracy Score -> ",round(accuracy_score(predictions_KNN, Test_Y)*100))
- seriesA = pd.Series(Test_X,name='testing').reset_index()
- seriesB = pd.Series(predictions_KNN,name='predict').reset_index()
- gab = pd.concat([seriesA,seriesB],axis=1)
- @app.route("/")
- @app.route("/index")
- def index():
- return render_template('index.html')
- @app.route('/loginCheck',methods=['POST', 'GET'])
- def loginCheck():
- username= request.form['username']
- password= request.form['password']
- if (username == "Admin" and password == "P@ssw0rd"):
- session['admin'] = True
- else:
- return redirect(url_for('login'))
- return redirect(url_for('dashboard'))
- @app.route('/login')
- def login():
- if not session.get('admin'):
- return render_template('login.html')
- else:
- return redirect(url_for('dashboard'))
- @app.route('/logout')
- def logout():
- session.pop('admin', None)
- return redirect(url_for('login'))
- @app.route("/admin/dashboard")
- def dashboard():
- if not session.get('admin'):
- return redirect(url_for('login'))
- else:
- trainNew = test_pd
- trainNew = trainNew.drop("Label", axis=1)
- trainNew = trainNew.drop("case_folding", axis=1)
- trainNew = trainNew.drop("token", axis=1)
- trainNew = trainNew.drop("stemming", axis=1)
- trainNew = trainNew.drop("final", axis=1)
- trainNew = trainNew.drop("stopwords", axis=1)
- return render_template('admin/dashboard.html')
- @app.route("/admin/case")
- def case():
- if not session.get('admin'):
- return redirect(url_for('login'))
- else:
- trainNew = test_pd
- trainNew = trainNew.drop("Label", axis=1)
- #trainNew = trainNew.drop("case_folding", axis=1)
- trainNew = trainNew.drop("token", axis=1)
- trainNew = trainNew.drop("stemming", axis=1)
- trainNew = trainNew.drop("final", axis=1)
- trainNew = trainNew.drop("stopwords", axis=1)
- return render_template('admin/casefolding.html', tables=[trainNew.to_html(classes='table table-striped table-bordered table-hover',header='true',justify='justify',table_id='tabel')])
- @app.route("/admin/token")
- def token():
- if not session.get('admin'):
- return redirect(url_for('login'))
- else:
- trainNew = test_pd
- trainNew = trainNew.drop("Label", axis=1)
- trainNew = trainNew.drop("case_folding", axis=1)
- #trainNew = trainNew.drop("token", axis=1)
- trainNew = trainNew.drop("stemming", axis=1)
- trainNew = trainNew.drop("final", axis=1)
- trainNew = trainNew.drop("stopwords", axis=1)
- return render_template('admin/token.html', tables=[trainNew.to_html(classes='table table-striped table-bordered table-hover',header='true',justify='justify',table_id='tabel')])
- @app.route("/admin/stopword")
- def stopword():
- if not session.get('admin'):
- return redirect(url_for('login'))
- else:
- trainNew = test_pd
- trainNew = trainNew.drop("Label", axis=1)
- trainNew = trainNew.drop("case_folding", axis=1)
- trainNew = trainNew.drop("token", axis=1)
- trainNew = trainNew.drop("stemming", axis=1)
- trainNew = trainNew.drop("final", axis=1)
- #trainNew = trainNew.drop("stopwords", axis=1)
- return render_template('admin/stopword.html', tables=[trainNew.to_html(classes='table table-striped table-bordered table-hover',header='true',justify='justify',table_id='tabel')])
- @app.route("/admin/stemming")
- def stemming():
- if not session.get('admin'):
- return redirect(url_for('login'))
- else:
- trainNew = test_pd
- trainNew = trainNew.drop("Label", axis=1)
- trainNew = trainNew.drop("case_folding", axis=1)
- trainNew = trainNew.drop("token", axis=1)
- #trainNew = trainNew.drop("stemming", axis=1)
- trainNew = trainNew.drop("final", axis=1)
- trainNew = trainNew.drop("stopwords", axis=1)
- return render_template('admin/stemming.html', tables=[trainNew.to_html(classes='table table-striped table-bordered table-hover',header='true',justify='justify',table_id='tabel')])
- @app.route("/hasil")
- def hasil():
- trainNew = test_pd
- trainNew = gab
- trainNew = trainNew.drop("index", axis=1)
- label = test_pd['Label']
- pos = label[label=='pos']
- neg = label[label=='neg']
- jml_pos = pos.count()
- jml_neg = pos.count()
- jml_label = label.count()
- persen_pos = str(round((jml_pos/jml_label)*100,2))
- persen_neg = str(round((jml_neg/jml_label)*100,2))
- return render_template('hasil.html', tables=[trainNew.to_html(classes='table table-striped table-bordered table-hover',header='true',justify='justify',table_id='tabel')])
- #@app.route('/sentimenttest')
- #def sentimentTest():
- # return render_template('sentimenttest.html')
- #@app.route('/checksentiment', methods=['POST'])
- #def checkSentiment():
- # text = request.form['text']
- # review_vector = Tfidf_vect.transform([text])
- # hasil = modelknn.predict(review_vector)
- # if hasil==['neg'] :
- # hasil = 'Negative'
- # else :
- # hasil = 'Positif'
- # return render_template('sentimenthasil.html',hasil=hasil,text=text)
- #
- if __name__ == '__main__':
- app.run(debug=True)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement