Advertisement
Guest User

Untitled

a guest
Mar 27th, 2017
47
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.52 KB | None | 0 0
  1. # Importing the libraries
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. import pandas as pd
  5.  
  6. # Importing the dataset
  7. dataset = pd.read_csv('OpeningLines2.txt', delimiter = '\t')
  8. dataset.head()
  9.  
  10. # EDA
  11. dataset.describe()
  12. dataset.groupby('Review').describe()
  13. dataset['length'] = dataset['Opening'].apply(len)
  14. dataset.head()
  15.  
  16. # Cleaning the texts
  17. import re
  18. import nltk
  19. nltk.download('stopwords')
  20. from nltk.corpus import stopwords
  21. from nltk.stem.porter import PorterStemmer
  22. corpus = []
  23. for i in range(0, 167):
  24. opening = re.sub('[^a-zA-Z]', ' ', dataset['Opening'][i])
  25. opening = opening.lower()
  26. opening = opening.split()
  27. ps = PorterStemmer()
  28. opening = [ps.stem(word) for word in opening if not word in set(stopwords.words('english'))]
  29. opening = ' '.join(opening)
  30. corpus.append(opening)
  31.  
  32. # Creating the Bag of Words model
  33. from sklearn.feature_extraction.text import CountVectorizer
  34. cv = CountVectorizer(max_features = 1000)
  35. X = cv.fit_transform(corpus).toarray()
  36. y = dataset.iloc[:, 2].values
  37.  
  38. # Splitting the dataset into the Training set and Test set
  39. from sklearn.cross_validation import train_test_split
  40. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
  41.  
  42. # Fitting Naive Bayes to the Training set
  43. from sklearn.naive_bayes import GaussianNB
  44. classifier = GaussianNB()
  45. classifier.fit(X_train, y_train)
  46.  
  47. # Predicting the Test set results
  48. y_pred = classifier.predict(X_test)
  49.  
  50. # Making the Confusion Matrix
  51. from sklearn.metrics import confusion_matrix
  52. cm = confusion_matrix(y_test, y_pred)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement