Advertisement
Guest User

Untitled

a guest
Apr 6th, 2020
488
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.07 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup as soup
  3. import datetime
  4.  
  5. date = datetime.datetime.today()  # datetime object for formating name of the output file
  6. output_f = 'covid19singapore_{year}-{month}-{day}({hour}-{minute}).csv'.format(
  7.    year=date.year,
  8.    month= date.month,
  9.    day=date.day,
  10.    hour=date.hour,
  11.    minute=date.minute,)
  12.    
  13. data_dict = {
  14.       'Case': '',
  15.       'Patient': '',
  16.       'Age': '',
  17.       'Gender': '',
  18.       'Nationality': '',
  19.       'Status': '',
  20.       'Infection_Source': '',
  21.       'Country_of_Origin': '',
  22.       'Symptomatic_to_Confirmation': '',
  23.       'Days_to_Recover': '',
  24.       'Symptomatic_At': '',
  25.       'Confirmed_At': '',
  26.       'Recovered_At': '',
  27.       'Displayed_Symptoms': '',
  28.       'Details': '',
  29.       'Sources': '',
  30.       'URL': '',
  31.    }
  32.  
  33.  
  34. def get_tree(url):
  35.    try:
  36.       url = url
  37.       r = requests.get(url)
  38.       return soup(r.text, 'html.parser')
  39.    except Exception as e:
  40.       print('get tree --> ')
  41.       print(e)
  42.  
  43.      
  44.  
  45. def details_sources(url):
  46.    try:
  47.       supa = get_tree(url)
  48.       tableRow = supa.find(class_='row row-xs')
  49.       Details_box = tableRow.find(class_='col-lg-8 col-xl-9 mg-t-10')
  50.       Details = Details_box.find(class_='card')
  51.       sources_box = tableRow.find(class_='col-md-6 col-lg-4 col-xl-3 mg-t-10')
  52.       Sources = sources_box.find(class_='card mg-t-10')
  53.       return [Sources.text.replace('Sources', '').strip().replace(' ',''), Details.text.replace('Details', '').strip()]
  54.    except Exception as e:
  55.       print('details sources --> ')
  56.       print(e)
  57.  
  58.  
  59. def format_date(a):
  60.    date = a.text.replace('nd', '').replace('th', '').replace('rd','').replace('st','').strip()
  61.    if date != '-':
  62.       date_object = datetime.datetime.strptime(date, '%d, %b %Y')
  63.       return "{}-{}-{}".format(date_object.year, date_object.month, date_object.day)
  64.    else:
  65.       return ''
  66.  
  67.  
  68. def format_str(str):
  69.    str = str.text.strip()
  70.    if str != '-':
  71.       return str
  72.    else:
  73.       return ''
  74.  
  75.  
  76. def format_url(bs0):
  77.    return 'https://www.againstcovid19.com' + bs0.a['href']
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement