Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup as soup
- import csv
- import datetime
- date = datetime.datetime.today() #datetime object for formating name of the output file
- output_f = 'covid19singapore_{year}-{month}-{day}({hour}-{minute}).csv'.format(
- year= date.year,
- month=,date.month,
- day = date.day,
- hour = date.hour,
- minute = date.minute,
- data_dict = {
- 'Case': '',
- 'Patient': '',
- 'Age': '',
- 'Gender': '',
- 'Nationality': '',
- 'Status': '',
- 'Infection_Source': '',
- 'Country_of_Origin': '',
- 'Symptomatic_to_Confirmation': '',
- 'Days_to_Recover': '',
- 'Symptomatic_At': '',
- 'Confirmed_At': '',
- 'Recovered_At': '',
- 'Displayed_Symptoms': '',
- 'Details': '',
- 'Sources': '',
- 'URL': '',
- }
- def get_tree(url):
- url = url
- r = requests.get(url)
- return soup(r.text, 'html.parser')
- def details_sources(url):
- supa = get_tree(url)
- Details = supa.find(class_='col-lg-8 col-xl-9 mg-t-10')
- Sources = supa.find(class_='card mg-t-10')
- return [Sources.text.replace('Sources','').strip(), Details.text.replace('Details','').strip()]
- supa = get_tree('https://www.againstcovid19.com/singapore/cases/search')
- tabular = supa.find_all('tr')
- fieldnames = data_dict.keys()
- with open(output_f,'w', newline='') as f:
- csv_writer = csv.DictWriter(f)
- for i,row in tabular:
- tabs = row.find_all('td')
- for it,tab in enumerate(tabs):
- if it == 0:
- a = tab.find('a')
- print(a['href'])
- print(tab.text.strip())
- else:
- print(tab.text.strip())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement