Advertisement
Guest User

Untitled

a guest
Jun 18th, 2019
114
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.03 KB | None | 0 0
  1. from nltk.tokenize import word_tokenize as wtok, sent_tokenize as stok #tokenizing sentence
  2. from nltk.corpus import stopwords as sw, wordnet #filtering sentence
  3. from nltk.stem import PorterStemmer as ps, WordNetLemmatizer as wnl #stem
  4. from nltk.probability import FreqDist #stem filter to basic
  5. from nltk.chunk import ne_chunk #show frequency of words
  6. from nltk.tag import pos_tag #detail tagging
  7. from nltk.classify import NaiveBayesClassifier as nbc, accuracy #training and testing data
  8. import pickle
  9.  
  10. from nltk.corpus import movie_reviews
  11.  
  12. negative_review = []
  13. positive_review = []
  14.  
  15. y = 0
  16.  
  17. s_words = sw.words('english')
  18.  
  19. #Get data from nltk.corpus, movie_reviews
  20. for x in movie_reviews.categories():
  21. for i in movie_reviews.fileids(x):
  22. if x == 'neg':
  23. negative_review += [word for word in movie_reviews.words(i) if word not in s_words]
  24. else:
  25. positive_review += [word for word in movie_reviews.words(i) if word not in s_words]
  26.  
  27. if y == 10:
  28. break
  29. else:
  30. y += 1
  31.  
  32. negative_review = negative_review[:5000]
  33. positive_review = positive_review[:5000]
  34.  
  35. #extract data
  36.  
  37. def extract(word, list_of_words, category):
  38. if word not in list_of_words:
  39. return ({word: False}, category)
  40. else:
  41. return({word: True}, category)
  42.  
  43. #filter data with stem and lemmatizer
  44. stemmer = ps()
  45. lemmatizer = wnl()
  46.  
  47. negative_review = [stemmer.stem(w) for w in negative_review]
  48. positive_review = [stemmer.stem(w) for w in positive_review]
  49.  
  50. negative_review = [lemmatizer.lemmatize(w) for w in negative_review]
  51. positive_review = [lemmatizer.lemmatize(w) for w in positive_review]
  52.  
  53. negative_review = [extract(w, positive_review, 'negative') for w in negative_review]
  54. positive_review = [extract(w, negative_review, 'positive') for w in positive_review]
  55.  
  56. idx = int(.8 * 5000)
  57.  
  58. #Prepping the data
  59. train_data = negative_review[:idx] + positive_review[:idx]
  60. test_data = negative_review[idx:] + positive_review[idx:]
  61.  
  62. print(len(train_data))
  63. print(len(test_data))
  64.  
  65. ##Train the model
  66. model = nbc.train(train_data)
  67.  
  68. ##Check accuracy
  69. acc = accuracy(model, test_data) * 100
  70. print(acc)
  71.  
  72. words_input = input('input words: ')
  73.  
  74. neg = 0
  75. pos = 0
  76. words = wtok(words_input)
  77.  
  78. def extract_input(w):
  79. if w not in negative_review:
  80. return {w:True}
  81. return {w:False}
  82.  
  83. for w in words:
  84. w = stemmer.stem(w)
  85. w = lemmatizer.lemmatize(w)
  86.  
  87. res = model.classify(extract_input(w))
  88.  
  89. if res == 'positive':
  90. pos += 1
  91.  
  92. if pos > len(words) / 2:
  93. print('Review is positive')
  94. else:
  95. print('Review is negative')
  96.  
  97. w = words[0]
  98. for syn in wordnet.synsets(w):
  99. for s in syn.lemmas():
  100. print(s.name())
  101.  
  102. for a in s.antonyms():
  103. print('\t' + a.name())
  104.  
  105.  
  106. #save data
  107. save_data = open('data.pickle', 'wb')#wb = write byte
  108. pickle.dump(model, save_data)
  109. save_data.close()
  110.  
  111. #load data
  112. load_data = open('data.pickle', 'rb')#rb = read byte
  113. model = pickle.load(load_data)
  114. load_data.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement