# program to make a simple wordcloud in python
from nltk.corpus import state_union
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from collections import Counter
import string
from os import path
from wordcloud import WordCloud

# method to remove punctuation from sentences
# method takes string input and returns string without punctuation as output
def remove_punctuation(text):
    punctuations = list(string.punctuation)
    text = [i for i in text if i not in punctuations]
    text = "".join(text)
    return text


# method that removes stopwords from the text
# method that takes input as text and returns a string without stopwords
def remove_stopwords(text):
    text = text.lower()
    text = remove_punctuation(text)
    words = word_tokenize(text)
    stop_words = stopwords.words("english")
    words = [w for w in words if w not in stop_words]
    text = " ".join(words)
    return text


# main function

# setting the path for the wordcloud image to be saved
d = path.dirname(__file__)

# let us get some text from the nltk corpora
complete_text = state_union.raw("2005-GWBush.txt")
complete_text += state_union.raw("2002-GWBush.txt")
complete_text += state_union.raw("2003-GWBush.txt")
complete_text += state_union.raw("2004-GWBush.txt")

# removing stopwords and punctuations from the text
complete_text = remove_stopwords(complete_text)

# creating wordcloud
print("Creating Wordcloud")
wc = WordCloud(background_color="black", max_words=100, max_font_size=300, width=1920, height=1080)
# generate word cloud
wc.generate_from_frequencies(Counter(complete_text.split(" ")))
wc.to_file(d + "\\\\simple_wordcloud.png")
print("Wordcloud created and saved at location " + d + "/simple_wordcloud.png")