Advertisement
Guest User

Untitled

a guest
Mar 27th, 2017
53
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.60 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. """
  3. Spyder Editor
  4.  
  5. This is a temporary script file.
  6. """
  7. # Importing the libraries
  8. import numpy as np
  9. import matplotlib.pyplot as plt
  10. import pandas as pd
  11.  
  12. # Importing the dataset
  13. dataset = pd.read_csv('OpeningLines2.txt', delimiter = '\t')
  14. dataset.head()
  15.  
  16. # EDA
  17. dataset.describe()
  18. dataset.groupby('Review').describe()
  19. dataset['length'] = dataset['Opening'].apply(len)
  20. dataset.head()
  21.  
  22. # Cleaning the texts
  23. import re
  24. import nltk
  25. nltk.download('stopwords')
  26. from nltk.corpus import stopwords
  27. from nltk.stem.porter import PorterStemmer
  28. corpus = []
  29. for i in range(0, 167):
  30. opening = re.sub('[^a-zA-Z]', ' ', dataset['Opening'][i])
  31. opening = opening.lower()
  32. opening = opening.split()
  33. ps = PorterStemmer()
  34. opening = [ps.stem(word) for word in opening if not word in set(stopwords.words('english'))]
  35. opening = ' '.join(opening)
  36. corpus.append(opening)
  37.  
  38. # Creating the Bag of Words model
  39. from sklearn.feature_extraction.text import CountVectorizer
  40. cv = CountVectorizer(max_features = 1000)
  41. X = cv.fit_transform(corpus).toarray()
  42. y = dataset.iloc[:, 2].values
  43.  
  44. # Splitting the dataset into the Training set and Test set
  45. from sklearn.cross_validation import train_test_split
  46. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
  47.  
  48. # Fitting Naive Bayes to the Training set
  49. from sklearn.naive_bayes import GaussianNB
  50. classifier = GaussianNB()
  51. classifier.fit(X_train, y_train)
  52.  
  53. # Predicting the Test set results
  54. y_pred = classifier.predict(X_test)
  55.  
  56. # Making the Confusion Matrix
  57. from sklearn.metrics import confusion_matrix
  58. cm = confusion_matrix(y_test, y_pred)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement