Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
- from more_words import words
- from PIL import Image
- import numpy as np
- msg_file_name = "175.html"
- u1 = u"A***** B*****"
- u2 = u"H***** B*******"
- msg_file = open(msg_file_name)
- soup = BeautifulSoup(msg_file, "html.parser")
- u1_msgs = []
- u2_msgs = []
- msg = soup.find("p")
- while True:
- if msg == None:
- break
- name = msg.find_previous("div", class_="message").span.text
- if u1 == name:
- u1_msgs.append(msg.text)
- elif u2 == name:
- u2_msgs.append(msg.text)
- msg = msg.find_next("p")
- stopwords = set(STOPWORDS)
- mask = np.array(Image.open("example.jpg"))
- image_colors = ImageColorGenerator(mask)
- for x in words:
- stopwords.add(x)
- #stopwords.add("said")
- wc = WordCloud(background_color="white", mask=mask, stopwords=stopwords)
- wc.generate(" ".join(u1_msgs))
- wc.recolor(color_func=image_colors)
- wc.to_file("u1.png")
- wc.generate(" ".join(u2_msgs))
- wc.recolor(color_func=image_colors)
- wc.to_file("u2.png")
Add Comment
Please, Sign In to add comment