Advertisement
Guest User

Untitled

a guest
Mar 24th, 2017
66
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.73 KB | None | 0 0
  1. from urllib.request import urlopen
  2. from bs4 import BeautifulSoup
  3. import urllib.request
  4. import re
  5.  
  6. import numpy as np
  7. count=0
  8. query=input("query>>")
  9. query=query.strip().split()
  10. query="+".join(query)
  11.  
  12. html = "https://www.google.co.in/search?site=&source=hp&q="+query+"&gws_rd=ssl"
  13. req = urllib.request.Request(html, headers={'User-Agent': 'Mozilla/5.0'})
  14.  
  15. soup = BeautifulSoup(urlopen(req).read(),"html.parser")
  16.  
  17. #Regex
  18. reg=re.compile(".*&sa=")
  19.  
  20. links = []
  21. #Parsing web urls
  22. for item in soup.find_all('h3', attrs={'class' : 'r'}):
  23.     line = (reg.match(item.a['href'][7:]).group())
  24.     links.append(line[:-4])
  25.  
  26. #print(links)
  27.  
  28. with open("Output.txt", "w") as text_file:
  29.     print("Links: {}".format(links), file=text_file)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement