Advertisement
Guest User

Untitled

a guest
Aug 31st, 2016
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.43 KB | None | 0 0
  1. import urllib2
  2.  
  3. website = "WEBSITE"
  4. openwebsite = urllib2.urlopen(website)
  5. html = getwebsite.read()
  6.  
  7. print html
  8.  
  9. from BeautifulSoup import BeautifulSoup
  10. import urllib2
  11. import re
  12.  
  13. html_page = urllib2.urlopen("http://www.yourwebsite.com")
  14. soup = BeautifulSoup(html_page)
  15. for link in soup.findAll('a'):
  16. print link.get('href')
  17.  
  18. soup.findAll('a', attrs={'href': re.compile("^http://")})
  19.  
  20. from HTMLParser import HTMLParser
  21.  
  22. class MyHTMLParser(HTMLParser):
  23.  
  24. def handle_starttag(self, tag, attrs):
  25. # Only parse the 'anchor' tag.
  26. if tag == "a":
  27. # Check the list of defined attributes.
  28. for name, value in attrs:
  29. # If href is defined, print it.
  30. if name == "href":
  31. print name, "=", value
  32.  
  33.  
  34. parser = MyHTMLParser()
  35. parser.feed(your_html_string)
  36.  
  37. import BeautifulSoup
  38. soup = BeautifulSoup.BeautifulSoup(html)
  39. for link in soup.findAll("a"):
  40. print link.get("href")
  41.  
  42. import urllib
  43. test = urllib.urlopen("http://www.google.com").read()
  44. sane = 0
  45. needlestack = []
  46. while sane == 0:
  47. curpos = test.find("href")
  48. if curpos >= 0:
  49. testlen = len(test)
  50. test = test[curpos:testlen]
  51. curpos = test.find('"')
  52. testlen = len(test)
  53. test = test[curpos+1:testlen]
  54. curpos = test.find('"')
  55. needle = test[0:curpos]
  56. if needle.startswith("http" or "www"):
  57. needlestack.append(needle)
  58. else:
  59. sane = 1
  60. for item in needlestack:
  61. print item
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement