Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # program to make a simple wordcloud in python
- from nltk.corpus import state_union
- from nltk.corpus import stopwords
- from nltk.tokenize import word_tokenize
- from collections import Counter
- import string
- from os import path
- from wordcloud import WordCloud
- # method to remove punctuation from sentences
- # method takes string input and returns string without punctuation as output
- def remove_punctuation(text):
- punctuations = list(string.punctuation)
- text = [i for i in text if i not in punctuations]
- text = "".join(text)
- return text
- # method that removes stopwords from the text
- # method that takes input as text and returns a string without stopwords
- def remove_stopwords(text):
- text = text.lower()
- text = remove_punctuation(text)
- words = word_tokenize(text)
- stop_words = stopwords.words("english")
- words = [w for w in words if w not in stop_words]
- text = " ".join(words)
- return text
- # main function
- # setting the path for the wordcloud image to be saved
- d = path.dirname(__file__)
- # let us get some text from the nltk corpora
- complete_text = state_union.raw("2005-GWBush.txt")
- complete_text += state_union.raw("2002-GWBush.txt")
- complete_text += state_union.raw("2003-GWBush.txt")
- complete_text += state_union.raw("2004-GWBush.txt")
- # removing stopwords and punctuations from the text
- complete_text = remove_stopwords(complete_text)
- # creating wordcloud
- print("Creating Wordcloud")
- wc = WordCloud(background_color="black", max_words=100, max_font_size=300, width=1920, height=1080)
- # generate word cloud
- wc.generate_from_frequencies(Counter(complete_text.split(" ")))
- wc.to_file(d + "\\simple_wordcloud.png")
- print("Wordcloud created and saved at location " + d + "/simple_wordcloud.png")
Add Comment
Please, Sign In to add comment