Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import string
- import os
- from bs4 import BeautifulSoup as bs
- from os import listdir
- from os.path import isfile, join
- import tkinter as tk
- from tkinter import *
- from operator import itemgetter
- class Retrieve(object):
- def __init__(self):
- self.root = tk.Tk()
- self.root.title("Simple Retrieve!!")
- frame = tk.Frame(self.root)
- frame.pack(expand=True, padx=10, pady=10)
- label = tk.Label(frame, text="Enter Keyword: ")
- label.grid(row=0, column=0)
- self.entry = tk.Entry(frame, width=50)
- self.entry.grid(row=0, column=1)
- button = tk.Button(frame, text="Search", command= self.search, width=15)
- button.grid(row=1, column=0, columnspan=2)
- self.root.bind_all("<Return>", self.search)
- def search(self):
- keyword = self.entry.get()
- mypath = "/Users/Tsu-AngChou/MasterProject/Practice/try_test/"
- files = listdir(mypath)
- # -------------- Remove all punctuation --------------
- translator = str.maketrans("","",string.punctuation)
- # ------------------------------------------------------
- # -----------------------------------------------------
- for f in files:
- fullpath = join(mypath, f)
- # -----------------------------------------------------
- # ------------- Remove .DS_Store file -------------
- if f == '.DS_Store':
- os.remove(f)
- elif isfile(fullpath):
- # print(f)
- # -----------------------------------------------------
- # ---------------- deal with content ----------------
- for html_cont in range(1):
- response = open(f,'r',encoding='utf-8')
- html_cont = response.read()
- soup = bs(html_cont, 'html.parser')
- regular_string = soup.get_text()
- new_string = regular_string.translate(translator).split()
- new_list = [item[:14] for item in new_string]
- a = dict.fromkeys(new_list, f)
- # -----------------------------------------------------
- # ----------------- Frequency -----------------
- wordfreq = []
- c = new_list
- for w in c:
- wordfreq.append(c.count(w))
- fre = dict(zip(c,wordfreq))
- # -----------------------------------------------------
- # ------------------ Position ------------------
- sentence= new_list
- keyword1= keyword
- words = sentence
- if keyword in fre:
- print(keyword,"Filename:",a[keyword],"--", "Frequency:" ,fre[keyword])
- for (i, subword) in enumerate(words):
- if (subword == keyword1):
- print("Position:",i+1)
- # ------------------------------------------------------
- return a
- app = Retrieve()
- tk.mainloop()
Add Comment
Please, Sign In to add comment