Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import urllib.request
- import re
- import pickle
- from bs4 import BeautifulSoup
- class Dialog:
- def __init__(self, speaker, text, season, episode):
- self.speaker = speaker
- self.text = text
- self.season = season
- self.episode = episode
- def update_episode_transcript(url, season, episode):
- with urllib.request.urlopen(url) as response:
- html = response.read()
- soup = BeautifulSoup(html, "html.parser")
- for line in soup.find_all('dd'):
- m = re.search(r":", line.text)
- if m:
- character = line.text[:m.end()-1]
- text = line.text[m.end():].replace("\n","")
- TRANSCRIPT.append(Dialog(character, text, season, episode))
- pickle.dump(TRANSCRIPT, open("TRANSCRIPT.p","wb"))
- try:
- TRANSCRIPT = pickle.load(open("TRANSCRIPT.p","rb"))
- except Exception as e:
- TRANSCRIPT = []
- for season in range(1,8):
- print("UPDATING SEASON " + str(season) + " TRANSCRIPTS")
- with urllib.request.urlopen("http://mlp.wikia.com/wiki/Category:Season_" + str(season) + "_transcripts") as response:
- html = response.read()
- soup = BeautifulSoup(html, "html.parser")
- for link in soup.find_all('a'):
- if link.text.startswith("Transcripts/"):
- update_episode_transcript("http://mlp.wikia.com" + link.get("href"), season, link.text[12:])
- print("Updated character transcripts for episode: " + link.text[12:])
- pickle.dump(TRANSCRIPT, open("TRANSCRIPT.p","wb"))
- MANE_SIX = {
- "Twilight Sparkle": {"Twilight Sparkle": 0, "Pinkie Pie": 0, "Rarity": 0, "Applejack": 0, "Fluttershy": 0, "Rainbow Dash": 0},
- "Pinkie Pie": {"Twilight Sparkle": 0, "Pinkie Pie": 0, "Rarity": 0, "Applejack": 0, "Fluttershy": 0, "Rainbow Dash": 0},
- "Applejack": {"Twilight Sparkle": 0, "Pinkie Pie": 0, "Rarity": 0, "Applejack": 0, "Fluttershy": 0, "Rainbow Dash": 0},
- "Fluttershy": {"Twilight Sparkle": 0, "Pinkie Pie": 0, "Rarity": 0, "Applejack": 0, "Fluttershy": 0, "Rainbow Dash": 0},
- "Rainbow Dash": {"Twilight Sparkle": 0, "Pinkie Pie": 0, "Rarity": 0, "Applejack": 0, "Fluttershy": 0, "Rainbow Dash": 0},
- "Rarity": {"Twilight Sparkle": 0, "Pinkie Pie": 0, "Rarity": 0, "Applejack": 0, "Fluttershy": 0, "Rainbow Dash": 0}
- }
- NAMES = {
- "Twilight Sparkle": ["twilight"],
- "Pinkie Pie": ["pinkie"],
- "Applejack": ["applejack", "AJ", "A.J. "],
- "Fluttershy": ["fluttershy"],
- "Rainbow Dash": ["dash", "dashie"],
- "Rarity": ["rarity"],
- }
- for line in TRANSCRIPT:
- if line.speaker in MANE_SIX:
- for name in NAMES:
- for nickname in NAMES[name]:
- if nickname in line.text.replace(",","").lower():
- MANE_SIX[line.speaker][name] += 1
- for pony in MANE_SIX:
- print(pony.upper() + ":")
- for other_pony in MANE_SIX:
- if pony != other_pony:
- print(" Mentions " + other_pony.upper() + " " + str(MANE_SIX[pony][other_pony]) + " times.")
- print(" Mentions herself " + str(MANE_SIX[pony][pony]) + " times.")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement