Advertisement
Guest User

Untitled

a guest
Nov 21st, 2017
61
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.96 KB | None | 0 0
  1. import csv
  2. import string
  3.  
  4. news = open('C:/Users/Fishers/Desktop/RedditNews.csv')
  5. words_list = list()
  6. csv_news = csv.reader(news)
  7. with open('DIM_WORD.csv', 'w', newline='') as myfile:
  8. for row in csv_news:
  9. line = row[1]
  10. lower_words = [w.lower() for w in line.split()]
  11. lower_words = [''.join(c for c in s if c not in string.punctuation) for s in lower_words]
  12. words_list.extend(lower_words)
  13.  
  14. # Make the list unique
  15. # print (words_list)
  16. unique_words = list(set(words_list))
  17. # Remove words that are "filler words" (in, the, a, on, if, an)
  18. exclude_list = ['in', 'the', 'a', 'on', 'if', 'an']
  19. print('The excluded words are: ', ', '.join(exclude_list))
  20. final_words = [x for x in unique_words if x not in exclude_list]
  21. for word in unique_words:
  22. wr = csv.writer(myfile)
  23. wr.writerow([word])
  24.  
  25. print('Parsing Complete. There are ', str(len(final_words)), ' total unique words.')
  26. news.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement