Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- from urllib.parse import quote
- import tkinter as tk
- from tkinter import ttk, scrolledtext
- from ddgs import DDGS
- HEADERS = {
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
- }
- def scrape_google(query, max_results=10):
- url = f"https://www.google.com/search?q={quote(query)}"
- return scrape_search_results(url, 'a', 'href', max_results)
- def scrape_bing(query, max_results=10):
- url = f"https://www.bing.com/search?q={quote(query)}"
- return scrape_search_results(url, 'li', 'a', max_results)
- def scrape_yahoo(query, max_results=10):
- url = f"https://search.yahoo.com/search?p={quote(query)}"
- return scrape_search_results(url, 'a', 'href', max_results)
- def scrape_duckduckgo(query, max_results=10):
- results = []
- try:
- with DDGS() as ddgs:
- for r in ddgs.text(query, max_results=max_results):
- if "href" in r:
- results.append(r["href"])
- except Exception as e:
- results.append(f"DuckDuckGo error: {e}")
- return results
- def scrape_search_results(search_url, tag, attr, max_results):
- try:
- response = requests.get(search_url, headers=HEADERS, timeout=5)
- soup = BeautifulSoup(response.text, 'html.parser')
- links = []
- for link in soup.find_all(tag):
- href = link.get(attr)
- if href and is_valid_url(href):
- if href.startswith("/url?q="):
- href = href.split("/url?q=")[-1].split("&")[0]
- links.append(href)
- if len(links) >= max_results:
- break
- return links
- except Exception as e:
- return [f"Error scraping {search_url}: {e}"]
- def is_valid_url(url):
- return url.startswith("http") and all(engine not in url for engine in ["google.com", "bing.com", "yahoo.com"])
- def perform_search():
- query = entry.get()
- max_results = int(result_count.get())
- output.delete(1.0, tk.END)
- output.insert(tk.END, f"\nSearching for: {query}\n\n")
- for engine, func in {
- "Google": scrape_google,
- "Bing": scrape_bing,
- "Yahoo": scrape_yahoo,
- "DuckDuckGo": scrape_duckduckgo
- }.items():
- output.insert(tk.END, f"--- {engine} Results ---\n")
- urls = func(query, max_results)
- for i, url in enumerate(urls, 1):
- output.insert(tk.END, f"{i}. {url}\n")
- output.insert(tk.END, "\n")
- # GUI Setup
- window = tk.Tk()
- window.title("Multi-Engine Web Search Scraper")
- window.geometry("700x500")
- frame = ttk.Frame(window)
- frame.pack(pady=10)
- label = ttk.Label(frame, text="Enter Search Query:")
- label.grid(column=0, row=0, padx=5)
- entry = ttk.Entry(frame, width=50)
- entry.grid(column=1, row=0, padx=5)
- result_label = ttk.Label(frame, text="Results per engine:")
- result_label.grid(column=2, row=0, padx=5)
- result_count = ttk.Spinbox(frame, from_=1, to=20, width=5)
- result_count.set(5)
- result_count.grid(column=3, row=0, padx=5)
- search_button = ttk.Button(frame, text="Search", command=perform_search)
- search_button.grid(column=4, row=0, padx=5)
- output = scrolledtext.ScrolledText(window, wrap=tk.WORD, width=80, height=25)
- output.pack(pady=10)
- window.mainloop()
Advertisement
Add Comment
Please, Sign In to add comment