Advertisement
Guest User

Untitled

a guest
May 31st, 2016
53
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.76 KB | None | 0 0
  1. import urllib.request
  2.  
  3.  
  4. def getLinks(links):
  5. page = urllib.request.urlopen(links)
  6. numlinks = 0
  7.  
  8. line = page.readline().decode('utf-8')
  9. while '< a href =' not in line:
  10. line = page.readline().decode('utf-8')
  11.  
  12. line = page.readline().decode('utf-8')
  13. while '< a href =' not in line:
  14. numlinks = numlinks + 1
  15. line = page.readline().decode('utf-8')
  16.  
  17. line = page.readline().decode('utf-8')
  18. while '> </a>' not in line:
  19. line = page.readline().decode('utf-8')
  20.  
  21. line = page.readline().deode('utf-8')
  22. while line != "" and '> </a>' not in line:
  23. print (line[:-1])
  24. line = page.readline().decode('utf-8')
  25.  
  26. print("number of links =",numlinks)
  27.  
  28. page.close()
  29.  
  30. print(getLinks('https://www.yahoo.com/'))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement