Advertisement
sergioMITM

html table to csv via python

Jan 23rd, 2018
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.88 KB | None | 0 0
  1. //html table to csv. referenced on my blog at http://sergiomitm.com
  2. from bs4 import BeautifulSoup
  3. import urllib2
  4. import csv, argparse
  5.  
  6. def parse_args():
  7.     parser = argparse.ArgumentParser()
  8.     parser.add_argument("url", help="url of web page with table in it")
  9.     parser.add_argument("classid", help="css class name to identify the table")
  10.     return parser.parse_args()
  11.  
  12. def main():
  13.     args = parse_args()
  14.  
  15.     url = args.url
  16.     html = urllib2.urlopen(url).read()
  17.     soup = BeautifulSoup(html)
  18.     table = soup.select_one("table."+args.classid)
  19.     headers = [th.text.encode("utf-8") for th in table.select("tr th")]
  20.  
  21.     with open("out.csv", "w") as f:
  22.         wr = csv.writer(f)
  23.         wr.writerow(headers)
  24.         wr.writerows([[td.text.encode("utf-8") for td in row.find_all("td")] for row in table.select("tr + tr")])
  25.  
  26. if __name__ == "__main__":
  27.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement