Advertisement
Guest User

Untitled

a guest
Jun 18th, 2019
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.23 KB | None | 0 0
  1. #IMPORTANT
  2. #IMPORT FIRST EVERY SINGLE LIBRARY YOU NEED !!
  3.  
  4. from nltk.tokenize import sent_tokenize as st, word_tokenize as wt #tokenizing sentence
  5. from nltk.corpus import stopwords as sw, wordnet #filtering sentence
  6. from nltk.stem import PorterStemmer as ps, WordNetLemmatizer as wnl #stem filter to basic
  7. from nltk.probability import FreqDist #show freq of words
  8. from nltk.chunk import ne_chunk #drawing tree
  9. from nltk.tag import pos_tag #detail tag
  10. from nltk.classify import NaiveBayesClassifier as nbc, accuracy #training and testing data
  11.  
  12. #import data from nltk data
  13. from nltk.corpus import movie_reviews
  14. import pickle
  15.  
  16. neg_review = []
  17. pos_review = []
  18.  
  19. stop_words = sw.words('english')
  20.  
  21. for x in movie_reviews.categories():
  22.  
  23. y = 0
  24.  
  25. for i in movie_reviews.fileids(x):
  26. if x == 'neg':
  27. neg_review += [word for word in movie_reviews.words(i) if word not in stop_words]
  28. else:
  29. pos_review += [word for word in movie_reviews.words(i) if word not in stop_words]
  30.  
  31. if y == 10:
  32. break
  33. else:
  34. y += 1
  35.  
  36. neg_review = neg_review[:5000] # shift + alt + down/up
  37. pos_review = pos_review[:5000]
  38.  
  39. #FILTERING USING STEM AND LEMMATIZE
  40.  
  41. def extract(word, list_of_words, category):
  42. if word not in list_of_words:
  43. return ({word: False}, category)
  44. else:
  45. return ({word: True}, category)
  46.  
  47. stemmer = ps()
  48. lemmatizer = wnl()
  49.  
  50. neg_review = [stemmer.stem(w) for w in neg_review]
  51. pos_review = [stemmer.stem(w) for w in pos_review]
  52.  
  53. # [ key (for key in [list]) (condition if/else) ]
  54.  
  55. neg_review = [lemmatizer.lemmatize(w) for w in neg_review]
  56.  
  57. pos_review = [lemmatizer.lemmatize(w) for w in pos_review]
  58.  
  59. #EXTRACTING FEATURE (IMPORTANT)
  60.  
  61. neg_review = [ extract(w, pos_review, 'negative') for w in neg_review ]
  62.  
  63. pos_review = [ extract(w, neg_review, 'positive') for w in pos_review ]
  64.  
  65. #CREATE TRAIN DATA AND TEST DATA
  66.  
  67. idx = int(.8 * 5000)
  68. train_data = neg_review[:idx] + pos_review[:idx]
  69. test_data = neg_review[idx:] + pos_review[idx:]
  70.  
  71. #TRAINING DATA TO CREATE A MODEL
  72.  
  73. model = nbc.train(train_data)
  74.  
  75. #CHECK ACCURACY
  76.  
  77. # acc = accuracy(model, test_data)
  78. # print(acc * 100)
  79.  
  80. # model.show_most_informative_features()
  81.  
  82. words_input = input('input words: ')
  83.  
  84. pos = 0
  85. words = wt(words_input)
  86.  
  87. def extract_input(w):
  88. if w not in neg_review:
  89. return {w: True}
  90. return {w: False}
  91.  
  92. for w in words:
  93. w = stemmer.stem(w)
  94. w = lemmatizer.lemmatize(w)
  95.  
  96. res = model.classify(extract_input(w))
  97.  
  98. if res == 'positive':
  99. pos += 1
  100.  
  101. if pos > len(words) / 2:
  102. print('Review is Positive !')
  103. else:
  104. print('Review is Negative !')
  105.  
  106. w = words[0]
  107. for syn in wordnet.synsets(w):
  108. for s in syn.lemmas():
  109. print(s.name())
  110.  
  111. for a in s.antonyms():
  112. print('\t' + a.name())
  113.  
  114.  
  115. #SAVE DATA USING PICKLE
  116. save_data = open('data.pickle', 'wb') #wb = write byte
  117. pickle.dump(model, save_data)
  118. save_data.close()
  119.  
  120. #LOAD DATA USING PICKLE
  121. load_data = open('data.pickle', 'rb') #rb = read byte
  122. model = pickle.load(load_data)
  123. load_data.close()
  124.  
  125. #freqdist, pos_tag, ne_chunk
  126.  
  127. #input
  128.  
  129. #classify input using model
  130.  
  131. #show detail synonym and antonym
  132. #show frequency, pos_tag, and tree words
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement