Guest User

Untitled

a guest
Jan 19th, 2018
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.98 KB | None | 0 0
  1. import string
  2. import os
  3. from bs4 import BeautifulSoup as bs
  4. from os import listdir
  5. from os.path import isfile, join
  6. import tkinter as tk
  7. from tkinter import *
  8. from operator import itemgetter
  9.  
  10.  
  11. class Retrieve(object):
  12. def __init__(self):
  13. self.root = tk.Tk()
  14. self.root.title("Simple Retrieve!!")
  15.  
  16. frame = tk.Frame(self.root)
  17. frame.pack(expand=True, padx=10, pady=10)
  18.  
  19. label = tk.Label(frame, text="Enter Keyword: ")
  20. label.grid(row=0, column=0)
  21.  
  22. self.entry = tk.Entry(frame, width=50)
  23. self.entry.grid(row=0, column=1)
  24.  
  25. button = tk.Button(frame, text="Search", command= self.search, width=15)
  26. button.grid(row=1, column=0, columnspan=2)
  27.  
  28. self.root.bind_all("<Return>", self.search)
  29.  
  30. def search(self):
  31. keyword = self.entry.get()
  32. mypath = "/Users/Tsu-AngChou/MasterProject/Practice/try_test/"
  33. files = listdir(mypath)
  34. # -------------- Remove all punctuation --------------
  35. translator = str.maketrans("","",string.punctuation)
  36. # ------------------------------------------------------
  37. # -----------------------------------------------------
  38. for f in files:
  39. fullpath = join(mypath, f)
  40. # -----------------------------------------------------
  41. # ------------- Remove .DS_Store file -------------
  42. if f == '.DS_Store':
  43. os.remove(f)
  44. elif isfile(fullpath):
  45.  
  46. # print(f)
  47. # -----------------------------------------------------
  48. # ---------------- deal with content ----------------
  49. for html_cont in range(1):
  50. response = open(f,'r',encoding='utf-8')
  51. html_cont = response.read()
  52. soup = bs(html_cont, 'html.parser')
  53. regular_string = soup.get_text()
  54. new_string = regular_string.translate(translator).split()
  55. new_list = [item[:14] for item in new_string]
  56. a = dict.fromkeys(new_list, f)
  57. # -----------------------------------------------------
  58. # ----------------- Frequency -----------------
  59. wordfreq = []
  60. c = new_list
  61.  
  62. for w in c:
  63. wordfreq.append(c.count(w))
  64. fre = dict(zip(c,wordfreq))
  65.  
  66. # -----------------------------------------------------
  67. # ------------------ Position ------------------
  68. sentence= new_list
  69. keyword1= keyword
  70. words = sentence
  71.  
  72. if keyword in fre:
  73. print(keyword,"Filename:",a[keyword],"--", "Frequency:" ,fre[keyword])
  74. for (i, subword) in enumerate(words):
  75. if (subword == keyword1):
  76. print("Position:",i+1)
  77.  
  78. # ------------------------------------------------------
  79. return a
  80.  
  81.  
  82.  
  83.  
  84. app = Retrieve()
  85. tk.mainloop()
Add Comment
Please, Sign In to add comment