# program to make a masked wordcloud in python
from nltk.corpus import state_union
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from collections import Counter
import string
from os import path
from PIL import Image
from wordcloud import WordCloud
import numpy as np
# method to remove punctuation from sentences
# method takes string input and returns string without punctuation as output
def remove_punctuation(text):
punctuations = list(string.punctuation)
text = [i for i in text if i not in punctuations]
text = "".join(text)
return text
# method that removes stopwords from the text
# method that takes input as text and returns a string without stopwords
def remove_stopwords(text):
text = text.lower()
text = remove_punctuation(text)
words = word_tokenize(text)
stop_words = stopwords.words("english")
words = [w for w in words if w not in stop_words]
text = " ".join(words)
return text
# main function
# let us get some text from the nltk corpora
complete_text = state_union.raw("2005-GWBush.txt")
complete_text += state_union.raw("2002-GWBush.txt")
complete_text += state_union.raw("2003-GWBush.txt")
complete_text += state_union.raw("2004-GWBush.txt")
# removing stopwords and punctuations from the text
complete_text = remove_stopwords(complete_text)
# setting the path for the wordcloud image to be saved
d = path.dirname(__file__)
# loading the mask image into a numpy array
icon = Image.open(path.join(d, "presentation.png"))
mask = Image.new("RGB", icon.size, (255, 255, 255))
mask.paste(icon, icon)
mask = np.array(mask)
# create a wordcloud object
print("Creating Wordcloud")
wc = WordCloud(background_color="black", max_words=100, mask=mask, max_font_size=300)
# generate word cloud by creating a dictionary of words in the text along with the occurences
wc.generate_from_frequencies(Counter(complete_text.split(" ")))
# wc.recolor(color_func=grey_color_func)
wc.to_file("masked_wordcloud.png")
print("Wordcloud created and saved at location " + d + "/masked_wordcloud.png")