Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import urllib.request as url
- site_list = []
- site = input("Enter a URL: ")
- site_list.append(site)
- me, de = url.urlretrieve(site)
- html = open(me, encoding = "utf-8")
- def findIt(html):
- try:
- me, de = url.urlretrieve(site)
- html = open(me, encoding="utf-8")
- for line in html:
- if '<a href="h' in line:
- starts = line.find('<a href="h')
- starts = starts + 9
- ends = line.find('"', starts)
- link = line[starts:ends]
- if link not in site_list:
- site_list.append(link)
- return site_list
- except:
- return print(site + " not reachable")
- #print(len(site_list)
- links = findIt(html)
- #print(findIt(html))
- #print(findtag(html))
- for i in range(len(site_list)):
- try:
- j = site_list[i]
- me, de = url.urlretrieve(j)
- html = open(me, encoding="utf-8")
- for line in html:
- if '<title>' in line:
- starts = line.find('<title>')
- starts = starts + 7
- ends = line.find('</title>')
- link = line[starts:ends]
- print(j , '\n' ,link)
- except:
- print(j , " not reachable")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement