# program to make a simple wordcloud in python
from nltk.corpus import state_union
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from collections import Counter
import string
from os import path
from wordcloud import WordCloud
# method to remove punctuation from sentences
# method takes string input and returns string without punctuation as output
def remove_punctuation(text):
punctuations = list(string.punctuation)
text = [i for i in text if i not in punctuations]
text = "".join(text)
return text
# method that removes stopwords from the text
# method that takes input as text and returns a string without stopwords
def remove_stopwords(text):
text = text.lower()
text = remove_punctuation(text)
words = word_tokenize(text)
stop_words = stopwords.words("english")
words = [w for w in words if w not in stop_words]
text = " ".join(words)
return text
# main function
# setting the path for the wordcloud image to be saved
d = path.dirname(__file__)
# let us get some text from the nltk corpora
complete_text = state_union.raw("2005-GWBush.txt")
complete_text += state_union.raw("2002-GWBush.txt")
complete_text += state_union.raw("2003-GWBush.txt")
complete_text += state_union.raw("2004-GWBush.txt")
# removing stopwords and punctuations from the text
complete_text = remove_stopwords(complete_text)
# creating wordcloud
print("Creating Wordcloud")
wc = WordCloud(background_color="black", max_words=100, max_font_size=300, width=1920, height=1080)
# generate word cloud
wc.generate_from_frequencies(Counter(complete_text.split(" ")))
wc.to_file(d + "\\\\simple_wordcloud.png")
print("Wordcloud created and saved at location " + d + "/simple_wordcloud.png")