Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from __future__ import print_function
- import os
- MORE_OUTPUT = True
- def read_in(filepath):
- with open(filepath, "rb") as file:
- content = file.read()
- return [c.lower() for c in content.split()]
- def find_in(wlist, findword):
- rlist = []
- for wordnumber, word in enumerate(wlist):
- if findword in word:
- rlist.append((word, wordnumber))
- return rlist
- #vicinityword == "" returns original list and print all occurrences
- def find_vicinity(wlist, findlist, vicinityword, vicinity, printq = False):
- rlist = []
- if vicinityword == "":
- if printq:
- for origword, wordnumber in findlist:
- for word in wlist[wordnumber - vicinity : wordnumber + vicinity + 1]:
- print(word, end = ' ')
- print("\n")
- return findlist
- for origword, wordnumber in findlist:
- for offset, word in enumerate(wlist[wordnumber - vicinity : wordnumber + vicinity + 1]):
- if printq:
- if vicinityword in word:
- for vword in wlist[wordnumber - vicinity : wordnumber + vicinity + 1]:
- print(vword, end = ' ')
- print("\n")
- if vicinityword in word:
- rlist.append((origword, wordnumber, vicinityword, wordnumber - vicinity + offset))
- return rlist
- books = ["New Spring - Robert Jordan.txt", "The Eye of the World - Robert Jordan.txt", "The Great Hunt - Robert Jordan.txt", "The Dragon Reborn - Robert Jordan.txt", "The Shadow Rising - Robert Jordan.txt", "The Fires of Heaven - Robert Jordan.txt", "Lord of Chaos - Robert Jordan.txt", "A Crown of Swords - Robert Jordan.txt", "The Path of Daggers - Robert Jordan.txt", "Winter's Heart - Robert Jordan.txt", "Crossroads of Twilight - Robert Jordan.txt", "Knife of Dreams - Robert Jordan.txt", "The Gathering Storm - Robert Jordan.txt", "Towers of Midnight - Robert Jordan.txt", "A Memory of Light - Robert Jordan.txt"]
- bookscontent = [read_in(book) for book in books]
- books_tug = [find_in(book, "tug") for book in bookscontent]
- books_tugs = [find_vicinity(bc, bt, "", 30) for book, bc, bt in zip(books, bookscontent, books_tug)]
- books_braids_tugged = [find_vicinity(bc, bt, "braid", 30) for book, bc, bt in zip(books, bookscontent, books_tug)]
- print("Total tugs", sum([len(tugs) for tugs in books_tugs]))
- print("Total braids tugged", sum([len(tugs) for tugs in books_braids_tugged]))
- print("Total words", sum([len(book) for book in bookscontent]))
- print("Braids tugged per word", float(sum([len(tugs) for tugs in books_braids_tugged]))/sum([len(book) for book in bookscontent]))
- print()
- for book, bc, bt in zip(books, bookscontent, books_tug):
- print("############################")
- print(book)
- print()
- print("tugs:", len(find_vicinity(bc, bt, "", 30, MORE_OUTPUT)))
- print("#####")
- print()
- print("braids tugged:", len(find_vicinity(bc, bt, "braid", 30, MORE_OUTPUT)))
- print("#####")
- print()
- print()
- books_smooth = [find_in(book, "smooth") for book in bookscontent]
- books_smooths = [find_vicinity(bc, bt, "", 30) for book, bc, bt in zip(books, bookscontent, books_smooth)]
- books_skirts_smoothed = [find_vicinity(bc, bt, "skirt", 30) for book, bc, bt in zip(books, bookscontent, books_smooth)]
- print("Total smooths", sum([len(smooths) for smooths in books_smooths]))
- print("Total skirts smoothed", sum([len(smooths) for smooths in books_skirts_smoothed]))
- print("Total words", sum([len(book) for book in bookscontent]))
- print("Skirts smoothed per word", float(sum([len(smooths) for smooths in books_skirts_smoothed]))/sum([len(book) for book in bookscontent]))
- print()
- for book, bc, bt in zip(books, bookscontent, books_smooth):
- print("############################")
- print(book)
- print()
- print("smooths:", len(find_vicinity(bc, bt, "", 30, MORE_OUTPUT)))
- print("#####")
- print()
- print("skirts smoothed:", len(find_vicinity(bc, bt, "skirt", 30, MORE_OUTPUT)))
- print("#####")
- print()
- print()
- books = ["The Blade Itself - Joe Abercrombie.txt", "Before They Are Hanged - Joe Abercrombie.txt", "Last Argument Of Kings - Joe Abercrombie.txt"]
- bookscontent = [read_in(book) for book in books]
- books_lick = [find_in(book, "lick") for book in bookscontent]
- books_licks = [find_vicinity(bc, bt, "", 30) for book, bc, bt in zip(books, bookscontent, books_lick)]
- books_licks_licked = [find_vicinity(bc, bt, "gum", 30) for book, bc, bt in zip(books, bookscontent, books_lick)]
- print("Total licks", sum([len(licks) for licks in books_licks]))
- print("Total gumss licked", sum([len(licks) for licks in books_licks_licked]))
- print("Total words", sum([len(book) for book in bookscontent]))
- print("Gums licked per word", float(sum([len(licks) for licks in books_licks_licked]))/sum([len(book) for book in bookscontent]))
- print()
- for book, bc, bt in zip(books, bookscontent, books_licks):
- print("############################")
- print(book)
- print()
- print("licks:", len(find_vicinity(bc, bt, "", 30, MORE_OUTPUT)))
- print("#####")
- print()
- print("gums licked:", len(find_vicinity(bc, bt, "gum", 30, MORE_OUTPUT)))
- print("#####")
- print()
- print()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement