xosski

duckduckgo url scraper

Jan 3rd, 2026
4
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.25 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3. from urllib.parse import quote
  4. import tkinter as tk
  5. from tkinter import ttk, scrolledtext
  6. from ddgs import DDGS
  7.  
  8. HEADERS = {
  9. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
  10. }
  11.  
  12. def scrape_google(query, max_results=10):
  13. url = f"https://www.google.com/search?q={quote(query)}"
  14. return scrape_search_results(url, 'a', 'href', max_results)
  15.  
  16. def scrape_bing(query, max_results=10):
  17. url = f"https://www.bing.com/search?q={quote(query)}"
  18. return scrape_search_results(url, 'li', 'a', max_results)
  19.  
  20. def scrape_yahoo(query, max_results=10):
  21. url = f"https://search.yahoo.com/search?p={quote(query)}"
  22. return scrape_search_results(url, 'a', 'href', max_results)
  23.  
  24. def scrape_duckduckgo(query, max_results=10):
  25. results = []
  26. try:
  27. with DDGS() as ddgs:
  28. for r in ddgs.text(query, max_results=max_results):
  29. if "href" in r:
  30. results.append(r["href"])
  31. except Exception as e:
  32. results.append(f"DuckDuckGo error: {e}")
  33. return results
  34.  
  35. def scrape_search_results(search_url, tag, attr, max_results):
  36. try:
  37. response = requests.get(search_url, headers=HEADERS, timeout=5)
  38. soup = BeautifulSoup(response.text, 'html.parser')
  39. links = []
  40. for link in soup.find_all(tag):
  41. href = link.get(attr)
  42. if href and is_valid_url(href):
  43. if href.startswith("/url?q="):
  44. href = href.split("/url?q=")[-1].split("&")[0]
  45. links.append(href)
  46. if len(links) >= max_results:
  47. break
  48. return links
  49. except Exception as e:
  50. return [f"Error scraping {search_url}: {e}"]
  51.  
  52. def is_valid_url(url):
  53. return url.startswith("http") and all(engine not in url for engine in ["google.com", "bing.com", "yahoo.com"])
  54.  
  55. def perform_search():
  56. query = entry.get()
  57. max_results = int(result_count.get())
  58. output.delete(1.0, tk.END)
  59.  
  60. output.insert(tk.END, f"\nSearching for: {query}\n\n")
  61.  
  62. for engine, func in {
  63. "Google": scrape_google,
  64. "Bing": scrape_bing,
  65. "Yahoo": scrape_yahoo,
  66. "DuckDuckGo": scrape_duckduckgo
  67. }.items():
  68. output.insert(tk.END, f"--- {engine} Results ---\n")
  69. urls = func(query, max_results)
  70. for i, url in enumerate(urls, 1):
  71. output.insert(tk.END, f"{i}. {url}\n")
  72. output.insert(tk.END, "\n")
  73.  
  74. # GUI Setup
  75. window = tk.Tk()
  76. window.title("Multi-Engine Web Search Scraper")
  77. window.geometry("700x500")
  78.  
  79. frame = ttk.Frame(window)
  80. frame.pack(pady=10)
  81.  
  82. label = ttk.Label(frame, text="Enter Search Query:")
  83. label.grid(column=0, row=0, padx=5)
  84.  
  85. entry = ttk.Entry(frame, width=50)
  86. entry.grid(column=1, row=0, padx=5)
  87.  
  88. result_label = ttk.Label(frame, text="Results per engine:")
  89. result_label.grid(column=2, row=0, padx=5)
  90.  
  91. result_count = ttk.Spinbox(frame, from_=1, to=20, width=5)
  92. result_count.set(5)
  93. result_count.grid(column=3, row=0, padx=5)
  94.  
  95. search_button = ttk.Button(frame, text="Search", command=perform_search)
  96. search_button.grid(column=4, row=0, padx=5)
  97.  
  98. output = scrolledtext.ScrolledText(window, wrap=tk.WORD, width=80, height=25)
  99. output.pack(pady=10)
  100.  
  101. window.mainloop()
  102.  
Advertisement
Add Comment
Please, Sign In to add comment