m2skills

maskedwc python

Sep 3rd, 2017
212
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.06 KB | None | 0 0
  1. # program to make a masked wordcloud in python
  2. from nltk.corpus import state_union
  3. from nltk.corpus import stopwords
  4. from nltk.tokenize import word_tokenize
  5. from collections import Counter
  6. import string
  7. from os import path
  8. from PIL import Image
  9. from wordcloud import WordCloud
  10. import numpy as np
  11.  
  12.  
  13. # method to remove punctuation from sentences
  14. # method takes string input and returns string without punctuation as output
  15. def remove_punctuation(text):
  16.     punctuations = list(string.punctuation)
  17.     text = [i for i in text if i not in punctuations]
  18.     text = "".join(text)
  19.     return text
  20.  
  21.  
  22. # method that removes stopwords from the text
  23. # method that takes input as text and returns a string without stopwords
  24. def remove_stopwords(text):
  25.     text = text.lower()
  26.     text = remove_punctuation(text)
  27.     words = word_tokenize(text)
  28.     stop_words = stopwords.words("english")
  29.     words = [w for w in words if w not in stop_words]
  30.     text = " ".join(words)
  31.     return text
  32.  
  33.  
  34. # main function
  35.  
  36. # let us get some text from the nltk corpora
  37. complete_text = state_union.raw("2005-GWBush.txt")
  38. complete_text += state_union.raw("2002-GWBush.txt")
  39. complete_text += state_union.raw("2003-GWBush.txt")
  40. complete_text += state_union.raw("2004-GWBush.txt")
  41.  
  42. # removing stopwords and punctuations from the text
  43. complete_text = remove_stopwords(complete_text)
  44.  
  45. # setting the path for the wordcloud image to be saved
  46. d = path.dirname(__file__)
  47.  
  48. # loading the mask image into a numpy array
  49. icon = Image.open(path.join(d, "presentation.png"))
  50. mask = Image.new("RGB", icon.size, (255, 255, 255))
  51. mask.paste(icon, icon)
  52. mask = np.array(mask)
  53.  
  54. # create a wordcloud object
  55. print("Creating Wordcloud")
  56. wc = WordCloud(background_color="black", max_words=100, mask=mask, max_font_size=300)
  57. # generate word cloud by creating a dictionary of words in the text along with the occurences
  58. wc.generate_from_frequencies(Counter(complete_text.split(" ")))
  59. # wc.recolor(color_func=grey_color_func)
  60. wc.to_file("masked_wordcloud.png")
  61. print("Wordcloud created and saved at location " + d + "/masked_wordcloud.png")
Add Comment
Please, Sign In to add comment