Advertisement
Najeebsk

INDEX-EXT-SCRAP.py

Jan 8th, 2024
604
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.25 KB | None | 0 0
  1. import tkinter as tk
  2. from tkinter import ttk
  3. import requests
  4. from bs4 import BeautifulSoup
  5. from urllib.parse import urljoin
  6.  
  7. class IndexOfScraperApp:
  8.     def __init__(self, root):
  9.         self.root = root
  10.         self.root.title("Najeeb Shah Khan Index Of URL Scraper By Extension")
  11.  
  12.         self.url_label = ttk.Label(root, text="Enter Index Of URL:")
  13.         self.url_entry = ttk.Entry(root, width=50)
  14.         self.extension_label = ttk.Label(root, text="Enter File Extension:")
  15.         self.extension_entry = ttk.Entry(root, width=10)
  16.         self.scrape_button = ttk.Button(root, text="Scrape URLs", command=self.scrape_urls)
  17.         self.result_text = tk.Text(root, height=36, width=124, wrap="none", state=tk.DISABLED)
  18.  
  19.         self.url_label.grid(row=0, column=0, pady=5)
  20.         self.url_entry.grid(row=0, column=1, pady=5)
  21.         self.extension_label.grid(row=0, column=2, pady=5)
  22.         self.extension_entry.grid(row=0, column=3, pady=5)
  23.         self.scrape_button.grid(row=0, column=4, pady=5)
  24.         self.result_text.grid(row=1, column=0, columnspan=5, pady=10)
  25.  
  26.     def scrape_urls(self):
  27.         url = self.url_entry.get()
  28.         extension = self.extension_entry.get()
  29.  
  30.         if not url or not extension:
  31.             return
  32.  
  33.         try:
  34.             response = requests.get(url)
  35.             response.raise_for_status()
  36.         except requests.exceptions.RequestException as e:
  37.             self.display_result(f"Error: {e}")
  38.             return
  39.  
  40.         soup = BeautifulSoup(response.content, 'html.parser')
  41.         links = [a['href'] for a in soup.find_all('a', href=True) if a['href'].endswith(f'.{extension}')]
  42.  
  43.         if links:
  44.             absolute_links = [urljoin(url, link) for link in links]
  45.             result_text = "\n".join(absolute_links)
  46.             self.display_result(result_text)
  47.         else:
  48.             self.display_result(f"No {extension} links found in the directory.")
  49.  
  50.     def display_result(self, text):
  51.         self.result_text.config(state=tk.NORMAL)
  52.         self.result_text.delete(1.0, tk.END)
  53.         self.result_text.insert(tk.END, text)
  54.         self.result_text.config(state=tk.DISABLED)
  55.  
  56. if __name__ == "__main__":
  57.     root = tk.Tk()
  58.     app = IndexOfScraperApp(root)
  59.     root.mainloop()
  60.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement