Advertisement
Guest User

Untitled

a guest
Mar 24th, 2017
840
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.51 KB | None | 0 0
  1. from __future__ import print_function
  2. import itertools
  3.  
  4. MORE_OUTPUT = True
  5.  
  6. def read_in(filepath):
  7.     with open(filepath, "rb") as file:
  8.         content = file.read()
  9.     return [c.lower() for c in content.split()]
  10.    
  11. def find_in(wlist, findword):
  12.     rlist = []
  13.     for wordnumber, word in enumerate(wlist):
  14.         if word.startswith(findword): #if findword in word:
  15.             rlist.append((word, wordnumber))
  16.     return rlist
  17.  
  18.  
  19. #vicinityword == "" returns original list and print all occurrences
  20. def find_vicinity(wlist, findlist, vicinityword, vicinity, printq = False):
  21.     rlist = []
  22.     if vicinityword == "":
  23.         if printq:
  24.             for origword, wordnumber in findlist:
  25.                 for word in wlist[wordnumber - vicinity : wordnumber + vicinity + 1]:
  26.                     print(word, end = ' ')
  27.                 print("\n")
  28.         return findlist
  29.     for origword, wordnumber in findlist:
  30.         for offset, word in enumerate(wlist[wordnumber - vicinity : wordnumber + vicinity + 1]):
  31.             if printq:
  32.                 if word.startswith(vicinityword): #if vicinityword in word:
  33.                     for vword in wlist[wordnumber - vicinity : wordnumber + vicinity + 1]:
  34.                         print(vword, end = ' ')
  35.                     print("\n")
  36.             if word.startswith(vicinityword): #if vicinityword in word:
  37.                 rlist.append((origword, wordnumber, vicinityword, wordnumber - vicinity + offset))
  38.     return rlist
  39.  
  40.  
  41.  
  42. books = ["New Spring - Robert Jordan.txt", "The Eye of the World - Robert Jordan.txt", "The Great Hunt - Robert Jordan.txt", "The Dragon Reborn - Robert Jordan.txt", "The Shadow Rising - Robert Jordan.txt", "The Fires of Heaven - Robert Jordan.txt", "Lord of Chaos - Robert Jordan.txt", "A Crown of Swords - Robert Jordan.txt", "The Path of Daggers - Robert Jordan.txt", "Winter's Heart - Robert Jordan.txt", "Crossroads of Twilight - Robert Jordan.txt", "Knife of Dreams - Robert Jordan.txt", "The Gathering Storm - Robert Jordan.txt", "Towers of Midnight - Robert Jordan.txt", "A Memory of Light - Robert Jordan.txt"]
  43.  
  44.  
  45. word1 = "dice"
  46. word2 = "roll"
  47.  
  48. bookscontent = [read_in(book) for book in books]
  49. books_tug = [find_in(book, word1) for book in bookscontent]
  50. books_tugs = [find_vicinity(bc, bt, "", 30) for book, bc, bt in zip(books, bookscontent, books_tug)]
  51. books_tugs_tent = [find_vicinity(bc, bt, word2, 30) for book, bc, bt in zip(books, bookscontent, books_tug)]
  52.  
  53. print("Total '"+word1+"'", sum([len(tugs) for tugs in books_tugs]))
  54. print("Total '"+word2+"'+'"+word1+"'", sum([len(tugs) for tugs in books_tugs_tent]))
  55. print("Total words", sum([len(book) for book in bookscontent]))
  56. print("'"+word2+"'+'"+word1+"' per word", float(sum([len(tugs) for tugs in books_tugs_tent]))/sum([len(book) for book in bookscontent]))
  57.  
  58.  
  59. print()
  60.  
  61. for book, bc, bt in zip(books, bookscontent, books_tug):
  62.     print("############################")
  63.     print(book)
  64.     print()
  65.     print("'"+word1+"'", len(find_vicinity(bc, bt, "", 30, MORE_OUTPUT)))
  66.     print("#####")
  67.     print()
  68.     print("'"+word2+"'+'"+word1+"'", len(find_vicinity(bc, bt, word2, 30, MORE_OUTPUT)))
  69.     print("#####")
  70.     print()
  71.  
  72. print()
  73. print("book | '"+word1+ "' | '" + word2+"'+'"+word1+"'")
  74. print(":--|:--|:--")
  75.  
  76. for book, bc, bt in zip(books, bookscontent, books_tug):
  77.     print(book, "|", len(find_vicinity(bc, bt, "", 30)), "|", len(find_vicinity(bc, bt, word2, 30)))
  78. print("total |",sum([len(tugs) for tugs in books_tugs]), "|", sum([len(tugs) for tugs in books_tugs_tent]))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement