Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- corpuses = {
- "The Wasteland": "I will show you fear in a handful of dust. In the mountains there you feel free. A crowd flowed over London Bridge.",
- "The Road Not Taken": "Two roads diverged in a yellow wood. I kept the first for another day. I doubted if I should ever come back.",
- "Trees": "I think that I shall never see a poem lovely as a tree. A tree that looks at God all day and lifts her leafy arms to pray."
- }
- def digest(corpus):
- """digests a single piece of corpus into a dictionary of the form {word: {sentence: frequency}}"""
- result = {}
- for sentence in corpus.split("."):
- words = [word.lower().strip() for word in sentence.strip().split()]
- for word in words:
- frequency = words.count(word)
- result.setdefault(word, {})[sentence] = frequency
- return result
- def combine_corpuses(corpuses):
- result = {}
- all_words = {word for corpus in corpuses for word in corpus.keys()}
- for word in all_words:
- combined_d = {}
- for corpus in corpuses:
- combined_d.update(corpus.get(word, {}))
- result[word] = combined_d
- return result
- digested_corpuses = [digest(corpus) for title, corpus in corpuses.items()]
- single_digest = combine_corpuses(digested_corpuses)
- print(single_digest)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement