Guest User

bs4_request_8

a guest
Mar 27th, 2020
86
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import csv
  2. from pprint import pprint
  3.  
  4. from bs4 import BeautifulSoup
  5. import requests
  6.  
  7. url = "http://www.nse.com.ng/Issuers-section/corporate-disclosures/corporate-actions/closure-of-register"
  8. soup = BeautifulSoup(requests.get(url).content, "html.parser")
  9.  
  10. rows = soup.select("table.ms-rteTable-default tr")
  11. headers = [header.get_text(strip=True).encode("utf-8") for header in rows[0].find_all("td")]
  12.  
  13. data = [dict(zip(headers, [cell.get_text(strip=True).encode("utf-8") for cell in row.find_all("td")]))
  14. for row in rows[1:]]
  15.  
  16. # see what the data looks like at this point
  17. pprint(data)
  18.  
  19. with open('sara.csv', 'wb') as csvfile:
  20. spamwriter = csv.DictWriter(csvfile, headers, delimiter='\t', quotechar="\n")
  21.  
  22. for row in data:
  23. spamwriter.writerow(row)
  24.  
  25.  
  26.  
  27. output:
  28.  
  29. Traceback (most recent call last):
  30. File "/home/martin/dev/python/england.py", line 11, in <module>
  31. headers = [header.get_text(strip=True).encode("utf-8") for header in rows[0].find_all("td")]
  32. IndexError: list index out of range
  33. [Finished in 0.709s]
RAW Paste Data