document.write('
Data hosted with ♥ by Pastebin.com - Download Raw - See Original
  1. # program to make a simple wordcloud in python
  2. from nltk.corpus import state_union
  3. from nltk.corpus import stopwords
  4. from nltk.tokenize import word_tokenize
  5. from collections import Counter
  6. import string
  7. from os import path
  8. from wordcloud import WordCloud
  9.  
  10. # method to remove punctuation from sentences
  11. # method takes string input and returns string without punctuation as output
  12. def remove_punctuation(text):
  13.     punctuations = list(string.punctuation)
  14.     text = [i for i in text if i not in punctuations]
  15.     text = "".join(text)
  16.     return text
  17.  
  18.  
  19. # method that removes stopwords from the text
  20. # method that takes input as text and returns a string without stopwords
  21. def remove_stopwords(text):
  22.     text = text.lower()
  23.     text = remove_punctuation(text)
  24.     words = word_tokenize(text)
  25.     stop_words = stopwords.words("english")
  26.     words = [w for w in words if w not in stop_words]
  27.     text = " ".join(words)
  28.     return text
  29.  
  30.  
  31. # main function
  32.  
  33. # setting the path for the wordcloud image to be saved
  34. d = path.dirname(__file__)
  35.  
  36. # let us get some text from the nltk corpora
  37. complete_text = state_union.raw("2005-GWBush.txt")
  38. complete_text += state_union.raw("2002-GWBush.txt")
  39. complete_text += state_union.raw("2003-GWBush.txt")
  40. complete_text += state_union.raw("2004-GWBush.txt")
  41.  
  42. # removing stopwords and punctuations from the text
  43. complete_text = remove_stopwords(complete_text)
  44.  
  45. # creating wordcloud
  46. print("Creating Wordcloud")
  47. wc = WordCloud(background_color="black", max_words=100, max_font_size=300, width=1920, height=1080)
  48. # generate word cloud
  49. wc.generate_from_frequencies(Counter(complete_text.split(" ")))
  50. wc.to_file(d + "\\\\simple_wordcloud.png")
  51. print("Wordcloud created and saved at location " + d + "/simple_wordcloud.png")
');