daily pastebin goal
78%
SHARE
TWEET

Python Web Scraping

wtgeographer Jul 6th, 2016 (edited) 234 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #-------------------------------------------------------------------------------
  2. # Name:        HTML Web Scraping
  3. # Purpose:
  4. #
  5. # Author:      wtgeo_000
  6. #
  7. # Created:     07/10/2016
  8. # Copyright:   (c) wtgeo_000 2016
  9. # Licence:     <your licence>
  10. #-------------------------------------------------------------------------------
  11. # Import required modules
  12. from bs4 import BeautifulSoup
  13. import urllib2
  14. import csv
  15. import os
  16.  
  17. os.chdir(r'C:\Users\wtgeo_000\Desktop')
  18. url="http://www.wtamu.edu/student-life/vhac-hours.aspx"
  19. html = urllib2.urlopen(url).read()
  20. soup=BeautifulSoup(html, 'html.parser')
  21.  
  22. #Create lists for iteration
  23. rows = soup.findChildren('tr')
  24. secondhead = soup.findAll(attrs={"class" : "secondhead"})
  25.  
  26. secondheadlst = []
  27. for i in secondhead:
  28.     secondheadlst.append(i.text)
  29.  
  30. for row in rows:
  31.     if row.text.strip() in secondheadlst:
  32.         print "########################################################"
  33.         print row.text.strip().upper()
  34.     else:
  35.         print row.text.strip()
  36.     print " "
RAW Paste Data
Top