Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- import pandas as pd
- website_url = requests.get('https://pt.wikipedia.org/wiki/Unidades_federativas_do_Brasil').text
- soup = BeautifulSoup(website_url, "lxml")
- #print(soup.prettify())
- table = soup.find('table',{'class':'wikitable'})
- table_rows = table.find_all('tr')
- res = []
- for tr in table_rows:
- td = tr.find_all('td')
- row = [tr.text.strip() for tr in td if tr.text.strip()]
- if row:
- res.append(row)
- df = pd.DataFrame(res, columns=["UF", "Abreviação", "Capital", "Área (km2)", "População (2014)", "Densidade (2005)", "PIB (2015)", "% total 2015", "PIB per capita", "IDH (2010)", "Alfabetização (2016)", "Mortalidade infantil (2016)", "Expectativa de vida (2016)"])
- print(df)
- df.to_excel("ufbrl_wikipedia.xlsx", encoding='utf-8')
Advertisement
Add Comment
Please, Sign In to add comment