Guest User

Untitled

a guest
May 24th, 2018
184
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.16 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. import time
  5. import csv
  6. import string
  7. from sklearn.cross_validation import train_test_split
  8. from sklearn.feature_extraction.text import CountVectorizer
  9. from sklearn.naive_bayes import MultinomialNB
  10.  
  11. # Importing dataset
  12. data = pd.read_csv("test.csv", quotechar='"', delimiter=',',quoting=csv.QUOTE_ALL, skipinitialspace=True,error_bad_lines=False)
  13. df2 = data.set_index("name", drop = False)
  14.  
  15.  
  16.  
  17. df2['sentiment'] = df2['rating'].apply(lambda rating : +1 if rating > 3 else -1)
  18.  
  19.  
  20. train, test = train_test_split(df2, test_size=0.2)
  21.  
  22.  
  23. count_vect = CountVectorizer()
  24. X_train_counts = count_vect.fit_transform(train)
  25. test_matrix = count_vect.transform(test)
  26.  
  27. clf = MultinomialNB().fit(X_train_tfidf, twenty_train.target)
  28.  
  29. clf = MultinomialNB().fit(X_train_tfidf, twenty_train.target)
  30.  
  31. X : {array-like, sparse matrix}, shape = [n_samples, n_features]
  32. Training vectors, where n_samples is the number of samples and n_features is
  33. the number of features.
  34.  
  35. y : array-like, shape = [n_samples]
  36. Target values.
  37.  
  38. from sklearn.datasets import fetch_20newsgroups
  39. from sklearn.feature_extraction.text import TfidfVectorizer
  40. from sklearn.naive_bayes import MultinomialNB
  41. from sklearn import metrics
  42.  
  43. newsgroups_train = fetch_20newsgroups(subset='train')
  44. categories = ['alt.atheism', 'talk.religion.misc',
  45. 'comp.graphics', 'sci.space']
  46.  
  47. newsgroups_train = fetch_20newsgroups(subset='train',
  48. categories=categories)
  49. vectorizer = TfidfVectorizer()
  50. # the following will be the training data
  51. vectors = vectorizer.fit_transform(newsgroups_train.data)
  52. vectors.shape
  53.  
  54. newsgroups_test = fetch_20newsgroups(subset='test',
  55. categories=categories)
  56. # this is the test data
  57. vectors_test = vectorizer.transform(newsgroups_test.data)
  58.  
  59. clf = MultinomialNB(alpha=.01)
  60.  
  61. # the fitting is done using the TRAINING data
  62. # Check the shapes before fitting
  63. vectors.shape
  64. #(2034, 34118)
  65. newsgroups_train.target.shape
  66. #(2034,)
  67.  
  68. # fit the model using the TRAINING data
  69. clf.fit(vectors, newsgroups_train.target)
  70.  
  71. # the PREDICTION is done using the TEST data
  72. pred = clf.predict(vectors_test)
Add Comment
Please, Sign In to add comment