Advertisement
Guest User

Untitled

a guest
Mar 22nd, 2017
2,620
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.21 KB | None | 0 0
  1. from __future__ import print_function
  2. import os
  3.  
  4. MORE_OUTPUT = True
  5.  
  6. def read_in(filepath):
  7.     with open(filepath, "rb") as file:
  8.         content = file.read()
  9.     return [c.lower() for c in content.split()]
  10.    
  11. def find_in(wlist, findword):
  12.     rlist = []
  13.     for wordnumber, word in enumerate(wlist):
  14.         if findword in word:
  15.             rlist.append((word, wordnumber))
  16.     return rlist
  17.  
  18.  
  19. #vicinityword == "" returns original list and print all occurrences
  20. def find_vicinity(wlist, findlist, vicinityword, vicinity, printq = False):
  21.     rlist = []
  22.     if vicinityword == "":
  23.         if printq:
  24.             for origword, wordnumber in findlist:
  25.                 for word in wlist[wordnumber - vicinity : wordnumber + vicinity + 1]:
  26.                     print(word, end = ' ')
  27.                 print("\n")
  28.         return findlist
  29.     for origword, wordnumber in findlist:
  30.         for offset, word in enumerate(wlist[wordnumber - vicinity : wordnumber + vicinity + 1]):
  31.             if printq:
  32.                 if vicinityword in word:
  33.                     for vword in wlist[wordnumber - vicinity : wordnumber + vicinity + 1]:
  34.                         print(vword, end = ' ')
  35.                     print("\n")
  36.             if vicinityword in word:
  37.                 rlist.append((origword, wordnumber, vicinityword, wordnumber - vicinity + offset))
  38.     return rlist
  39.  
  40. books = ["New Spring - Robert Jordan.txt", "The Eye of the World - Robert Jordan.txt", "The Great Hunt - Robert Jordan.txt", "The Dragon Reborn - Robert Jordan.txt", "The Shadow Rising - Robert Jordan.txt", "The Fires of Heaven - Robert Jordan.txt", "Lord of Chaos - Robert Jordan.txt", "A Crown of Swords - Robert Jordan.txt", "The Path of Daggers - Robert Jordan.txt", "Winter's Heart - Robert Jordan.txt", "Crossroads of Twilight - Robert Jordan.txt", "Knife of Dreams - Robert Jordan.txt", "The Gathering Storm - Robert Jordan.txt", "Towers of Midnight - Robert Jordan.txt", "A Memory of Light - Robert Jordan.txt"]
  41.  
  42. bookscontent = [read_in(book) for book in books]
  43. books_tug = [find_in(book, "tug") for book in bookscontent]
  44. books_tugs = [find_vicinity(bc, bt, "", 30) for book, bc, bt in zip(books, bookscontent, books_tug)]
  45. books_braids_tugged = [find_vicinity(bc, bt, "braid", 30) for book, bc, bt in zip(books, bookscontent, books_tug)]
  46.  
  47. print("Total tugs", sum([len(tugs) for tugs in books_tugs]))
  48. print("Total braids tugged", sum([len(tugs) for tugs in books_braids_tugged]))
  49. print("Total words", sum([len(book) for book in bookscontent]))
  50. print("Braids tugged per word", float(sum([len(tugs) for tugs in books_braids_tugged]))/sum([len(book) for book in bookscontent]))
  51.  
  52. print()
  53.  
  54. for book, bc, bt in zip(books, bookscontent, books_tug):
  55.     print("############################")
  56.     print(book)
  57.     print()
  58.     print("tugs:", len(find_vicinity(bc, bt, "", 30, MORE_OUTPUT)))
  59.     print("#####")
  60.     print()
  61.     print("braids tugged:", len(find_vicinity(bc, bt, "braid", 30, MORE_OUTPUT)))
  62.     print("#####")
  63.     print()
  64.  
  65. print()
  66.    
  67. books_smooth = [find_in(book, "smooth") for book in bookscontent]
  68. books_smooths = [find_vicinity(bc, bt, "", 30) for book, bc, bt in zip(books, bookscontent, books_smooth)]
  69. books_skirts_smoothed = [find_vicinity(bc, bt, "skirt", 30) for book, bc, bt in zip(books, bookscontent, books_smooth)]
  70.  
  71. print("Total smooths", sum([len(smooths) for smooths in books_smooths]))
  72. print("Total skirts smoothed", sum([len(smooths) for smooths in books_skirts_smoothed]))
  73. print("Total words", sum([len(book) for book in bookscontent]))
  74. print("Skirts smoothed per word", float(sum([len(smooths) for smooths in books_skirts_smoothed]))/sum([len(book) for book in bookscontent]))
  75. print()
  76.  
  77.  
  78. for book, bc, bt in zip(books, bookscontent, books_smooth):
  79.     print("############################")
  80.     print(book)
  81.     print()
  82.     print("smooths:", len(find_vicinity(bc, bt, "", 30, MORE_OUTPUT)))
  83.     print("#####")
  84.     print()
  85.     print("skirts smoothed:", len(find_vicinity(bc, bt, "skirt", 30, MORE_OUTPUT)))
  86.     print("#####")
  87.     print()
  88.  
  89. print()
  90.  
  91. books = ["The Blade Itself - Joe Abercrombie.txt", "Before They Are Hanged - Joe Abercrombie.txt", "Last Argument Of Kings - Joe Abercrombie.txt"]
  92. bookscontent = [read_in(book) for book in books]
  93. books_lick = [find_in(book, "lick") for book in bookscontent]
  94. books_licks = [find_vicinity(bc, bt, "", 30) for book, bc, bt in zip(books, bookscontent, books_lick)]
  95. books_licks_licked = [find_vicinity(bc, bt, "gum", 30) for book, bc, bt in zip(books, bookscontent, books_lick)]
  96.  
  97. print("Total licks", sum([len(licks) for licks in books_licks]))
  98. print("Total gumss licked", sum([len(licks) for licks in books_licks_licked]))
  99. print("Total words", sum([len(book) for book in bookscontent]))
  100. print("Gums licked per word", float(sum([len(licks) for licks in books_licks_licked]))/sum([len(book) for book in bookscontent]))
  101. print()
  102.  
  103. for book, bc, bt in zip(books, bookscontent, books_licks):
  104.     print("############################")
  105.     print(book)
  106.     print()
  107.     print("licks:", len(find_vicinity(bc, bt, "", 30, MORE_OUTPUT)))
  108.     print("#####")
  109.     print()
  110.     print("gums licked:", len(find_vicinity(bc, bt, "gum", 30, MORE_OUTPUT)))
  111.     print("#####")
  112.     print()
  113.    
  114.    
  115.    
  116. print()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement