Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from __future__ import print_function
- import itertools
- MORE_OUTPUT = True
- def read_in(filepath):
- with open(filepath, "rb") as file:
- content = file.read()
- return [c.lower() for c in content.split()]
- def find_in(wlist, findword):
- rlist = []
- for wordnumber, word in enumerate(wlist):
- if word.startswith(findword): #if findword in word:
- rlist.append((word, wordnumber))
- return rlist
- #vicinityword == "" returns original list and print all occurrences
- def find_vicinity(wlist, findlist, vicinityword, vicinity, printq = False):
- rlist = []
- if vicinityword == "":
- if printq:
- for origword, wordnumber in findlist:
- for word in wlist[wordnumber - vicinity : wordnumber + vicinity + 1]:
- print(word, end = ' ')
- print("\n")
- return findlist
- for origword, wordnumber in findlist:
- for offset, word in enumerate(wlist[wordnumber - vicinity : wordnumber + vicinity + 1]):
- if printq:
- if word.startswith(vicinityword): #if vicinityword in word:
- for vword in wlist[wordnumber - vicinity : wordnumber + vicinity + 1]:
- print(vword, end = ' ')
- print("\n")
- if word.startswith(vicinityword): #if vicinityword in word:
- rlist.append((origword, wordnumber, vicinityword, wordnumber - vicinity + offset))
- return rlist
- books = ["New Spring - Robert Jordan.txt", "The Eye of the World - Robert Jordan.txt", "The Great Hunt - Robert Jordan.txt", "The Dragon Reborn - Robert Jordan.txt", "The Shadow Rising - Robert Jordan.txt", "The Fires of Heaven - Robert Jordan.txt", "Lord of Chaos - Robert Jordan.txt", "A Crown of Swords - Robert Jordan.txt", "The Path of Daggers - Robert Jordan.txt", "Winter's Heart - Robert Jordan.txt", "Crossroads of Twilight - Robert Jordan.txt", "Knife of Dreams - Robert Jordan.txt", "The Gathering Storm - Robert Jordan.txt", "Towers of Midnight - Robert Jordan.txt", "A Memory of Light - Robert Jordan.txt"]
- word1 = "dice"
- word2 = "roll"
- bookscontent = [read_in(book) for book in books]
- books_tug = [find_in(book, word1) for book in bookscontent]
- books_tugs = [find_vicinity(bc, bt, "", 30) for book, bc, bt in zip(books, bookscontent, books_tug)]
- books_tugs_tent = [find_vicinity(bc, bt, word2, 30) for book, bc, bt in zip(books, bookscontent, books_tug)]
- print("Total '"+word1+"'", sum([len(tugs) for tugs in books_tugs]))
- print("Total '"+word2+"'+'"+word1+"'", sum([len(tugs) for tugs in books_tugs_tent]))
- print("Total words", sum([len(book) for book in bookscontent]))
- print("'"+word2+"'+'"+word1+"' per word", float(sum([len(tugs) for tugs in books_tugs_tent]))/sum([len(book) for book in bookscontent]))
- print()
- for book, bc, bt in zip(books, bookscontent, books_tug):
- print("############################")
- print(book)
- print()
- print("'"+word1+"'", len(find_vicinity(bc, bt, "", 30, MORE_OUTPUT)))
- print("#####")
- print()
- print("'"+word2+"'+'"+word1+"'", len(find_vicinity(bc, bt, word2, 30, MORE_OUTPUT)))
- print("#####")
- print()
- print()
- print("book | '"+word1+ "' | '" + word2+"'+'"+word1+"'")
- print(":--|:--|:--")
- for book, bc, bt in zip(books, bookscontent, books_tug):
- print(book, "|", len(find_vicinity(bc, bt, "", 30)), "|", len(find_vicinity(bc, bt, word2, 30)))
- print("total |",sum([len(tugs) for tugs in books_tugs]), "|", sum([len(tugs) for tugs in books_tugs_tent]))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement