Advertisement
Guest User

Untitled

a guest
Nov 20th, 2019
159
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.25 KB | None | 0 0
  1. import urllib.request as url
  2. site_list = []
  3.  
  4.  
  5. site = input("Enter a URL: ")
  6. site_list.append(site)
  7.  
  8. me, de = url.urlretrieve(site)
  9.  
  10. html = open(me, encoding = "utf-8")
  11.  
  12. def findIt(html):
  13. try:
  14. me, de = url.urlretrieve(site)
  15. html = open(me, encoding="utf-8")
  16.  
  17. for line in html:
  18. if '<a href="h' in line:
  19. starts = line.find('<a href="h')
  20. starts = starts + 9
  21. ends = line.find('"', starts)
  22. link = line[starts:ends]
  23. if link not in site_list:
  24. site_list.append(link)
  25. return site_list
  26. except:
  27. return print(site + " not reachable")
  28.  
  29.  
  30. #print(len(site_list)
  31. links = findIt(html)
  32. #print(findIt(html))
  33. #print(findtag(html))
  34.  
  35. for i in range(len(site_list)):
  36. try:
  37. j = site_list[i]
  38. me, de = url.urlretrieve(j)
  39. html = open(me, encoding="utf-8")
  40.  
  41. for line in html:
  42. if '<title>' in line:
  43. starts = line.find('<title>')
  44. starts = starts + 7
  45. ends = line.find('</title>')
  46. link = line[starts:ends]
  47. print(j , '\n' ,link)
  48. except:
  49. print(j , " not reachable")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement