Advertisement
skip420

corona_data_China_CDC

Mar 29th, 2020
828
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.74 KB | None | 0 0
  1. # update of corona virus @Mainland_China
  2.  
  3. # 累计确诊 : Cumulative diagnosis
  4. # 死亡 : Death
  5. # 治愈; Cured
  6. # 现有确诊: Confirmed Diagnosis
  7. # 死亡率: Mortality Rate
  8. # 治愈率 : Cure Rate
  9.  
  10.  
  11. # —*— coding: utf-8 —*—
  12. import requests
  13. import json
  14. import time
  15. import pandas as pd
  16.  
  17. # 请求的URL
  18. url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback=&_=%d'
  19.  
  20. # 伪装请求头
  21. headers = {
  22.     'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36',
  23.     'referer': 'https://news.qq.com/zt2020/page/feiyan.htm?from=timeline&isappinstalled=0'
  24. }
  25.  
  26. # 抓取数据
  27. r = requests.get(url % time.time(), headers=headers)
  28.  
  29. data = json.loads(r.text)
  30. # print (r.text)
  31.  
  32. data = json.loads(data['data'])
  33.  
  34. lastUpdateTime = data['lastUpdateTime']
  35. print('数据更新时间 ' + str(lastUpdateTime))
  36.  
  37.  
  38. # part 1. 采集当日数据
  39. areaTree = data['areaTree']
  40.  
  41. print('采集当日省市数据...')
  42.  
  43. # 创建空 dataframes
  44. col_names =  ['省', '市', '新增确诊','累计确诊', '死亡', '治愈','死亡率','治愈率']
  45. col_names_p = ['省', '新增确诊', '累计确诊', '死亡', '治愈', '死亡率', '治愈率']
  46.  
  47. my_df  = pd.DataFrame(columns = col_names)
  48. my_df_p = pd.DataFrame(columns = col_names_p)
  49.  
  50. for item in areaTree:
  51.     if item['name'] == '中国':
  52.         item_ps = item['children']
  53.  
  54.         # 遍历省级数据
  55.         for item_p in item_ps:
  56.             province = item_p['name']
  57.             # print(province)
  58.             # print(item_p['total'])
  59.             confirm = item_p['total']['confirm']
  60.             death = item_p['total']['dead']
  61.             heal = item_p['total']['heal']
  62.             new_confirm = item_p['today']['confirm']
  63.             deadRate =item_p['total']['deadRate']
  64.             healRate =item_p['total']['healRate']
  65.  
  66.             # 向df添加数据
  67.             data_dict = {'省': province,'新增确诊':new_confirm,'累计确诊': confirm,
  68.                          '死亡': death, '治愈': heal, '死亡率': deadRate, '治愈率': healRate}
  69.             # print (data_dict)
  70.             my_df_p.loc[len(my_df_p)] = data_dict
  71.  
  72.             # 遍历地级数据
  73.             item_cs = item_p['children']
  74.             for item_c in item_cs:
  75.                 prefecture = item_c['name']
  76.                 # print('  ' + prefecture)
  77.                 # print('  ' + str(item_c['total']))
  78.                 new_confirm = item_c['today']['confirm']
  79.                 confirm = item_c['total']['confirm']
  80.                 # suspect = item_c['total']['suspect']
  81.                 death = item_c['total']['dead']
  82.                 heal = item_c['total']['heal']
  83.                 deadRate = item_c['total']['deadRate']
  84.                 healRate = item_c['total']['healRate']
  85.  
  86.                 # 向df添加数据
  87.                 data_dict = {'省': province, '市':prefecture, '新增确诊':new_confirm,'累计确诊': confirm,
  88.                              '死亡': death, '治愈': heal, '死亡率': deadRate, '治愈率': healRate}
  89.                 my_df.loc[len(my_df)] = data_dict
  90.  
  91. # 保存数据
  92. my_df.index += 1   # 使index从1开始
  93. my_df_p.index += 1
  94. my_df.to_csv(r'./china_prefecture_status_{}.csv'.format(str(lastUpdateTime).split()[0]), encoding='utf_8_sig', header='true')
  95. my_df_p.to_csv(r'./china_province_status_{}.csv'.format(str(lastUpdateTime).split()[0]), encoding='utf_8_sig', header='true')
  96.  
  97. # part 2. 采集中国历史数据
  98.  
  99. print('采集中国历史数据...')
  100.  
  101. # 请求的URL
  102. url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_other'
  103.  
  104. # 抓取数据
  105. r = requests.get(url, headers=headers)
  106.  
  107. # print (r.text)
  108.  
  109. data = json.loads(r.text)
  110. data = json.loads(data['data'])
  111.  
  112. china_day_list = data['chinaDayList']
  113.  
  114. col_names_cd =  ['日期','累计确诊','疑似','死亡', '治愈', '现有确诊', '现有重症','死亡率','治愈率']
  115.  
  116. my_df_cd = pd.DataFrame(columns = col_names_cd)
  117.  
  118. for day_item in china_day_list:
  119.     date = day_item['date'] + '.2020'
  120.     confirm = day_item['confirm']
  121.     suspect = day_item['suspect']
  122.     dead = day_item['dead']
  123.     heal = day_item['heal']
  124.     nowConfirm = day_item['nowConfirm']
  125.     nowSevere = day_item['nowSevere']
  126.     deadRate = day_item['deadRate']
  127.     healRate = day_item['healRate']
  128.  
  129.     # 向df添加数据
  130.     data_dict = {'日期': date,'累计确诊': confirm,'疑似': suspect,'死亡': dead, '治愈': heal, '现有确诊': nowConfirm,
  131.                  '现有重症':nowSevere,'死亡率': deadRate,'治愈率':healRate}
  132.     my_df_cd.loc[len(my_df_cd)] = data_dict
  133.  
  134. my_df_cd.index += 1
  135. my_df_cd.to_csv(r'./china_daily_status_{}.csv'.format(str(lastUpdateTime).split()[0]), encoding='utf_8_sig', header='true')
  136.  
  137. print('Success')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement