Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from BeautifulSoup import BeautifulSoup, SoupStrainer
- import time, subprocess, httplib2, re
- import urllib
- f = urllib.urlopen("google.com";)
- lis=[]
- f.read()
- http = httplib2.Http()
- status, response = http.request('google.com';)
- #dictionary is returned by BeautifulSoup
- mydict = BeautifulSoup(response, parseOnlyThese=SoupStrainer('a'))
- #iterating dictionary on basic of key and value
- for key,value in mydict.iteritems():
- # check `href` key present
- if key == 'href':
- s=str(mydict['href'])
- #checking value corresponding to `href` starts with google.com
- x= s.startswith('google.com';)
- if x:
- print s lis.append(s)
- print lis
- for m in lis:
- data = urllib.urlopen(m).read()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement