Fabian42

Corona sheet filling script

Jan 22nd, 2021 (edited)
1,497
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 10.55 KB | None | 0 0
  1. #!/bin/bash
  2. # New version: https://github.com/Fabian42/bash_scripts/blob/main/corona.sh
  3.  
  4. # Feedback: fabianroeling@googlemail.com
  5. # maybe TODO for reliability & less effort: translate into Google Sheets script?
  6.  
  7. worldometers(){
  8.  # $1 is the number of leading rows to remove for alignment of all tables on 2020-05-25
  9.  # $2 is the series name to be searched
  10.  # find series name on website, get number list ("data"), remove prefix, remove suffix, "null"→"0", negatives→0, split on commas, align to 2020-05-25
  11.  echo -n "$page" | grep -im 1 --after-context=5 "name: '$2'" | grep "data" | sed -E "s/ *data\\: \\[//" | sed -E "s/\\] *\\} *\\]? *\\,? *\\{? *//" | sed -E "s/null/0/g" | sed -E "s/\\-[0-9]+/0/g" | sed -E "s/\\,/\\n/g" | tail -n +$1
  12. }
  13.  
  14. ### START
  15. # open sheet
  16. (firefox "docs.google.com/spreadsheets/d/1uDTghO_ZYBs5nfs2kDc0Ms6e9bbx7clx_QgkWii7OMY" &> /dev/null & disown)
  17.  
  18. # print old state in case something goes wrong
  19. cat ~/misc/corona_storage
  20. echo
  21.  
  22. # Hannover cases and deaths, Lehrte cases and new (first because of highest probability of failure)
  23. # This breaks if 20 or more articles were published today after the Corona update. I could fix that, but it's extremely unlikely. Changes to the website break stuff way more often.
  24. # In case of a missed day: Replace »$(date "+%Y\\/%m\\/%d")« with e.g. »2021\\/03\\/29«
  25. links=$(wget --no-check-certificate -qO - "www.altkreisblitz.de/region-hannover" | grep -E "<div class\\=\\\"news\\-list\\-item\\-title\\\"><h2><a href\\=\\\"https\\:\\/\\/www\\.altkreisblitz\\.de\\/aktuelles\\/datum\\/$(date "+%Y\\/%m\\/%d")" | grep -Eo "https[^\\\"]+")
  26. found=0
  27. for link in $links; do # $links not quoted, because that makes it only one long string
  28.  page=$(wget -qO - "$link" | sed -E "s/\\r\\n?/\\n/g")
  29.  # find HTML line with current cases table, remove everything before title (might include another table), take top value (yesterday), remove date
  30.  temp=$(echo -n "$page" | grep --after-context=1 -Ee "Aktuell( |\\&nbsp\\;)+*Infizierte.*" -e "Aktuell( |\\&nbsp\\;)+registrierte( |\\&nbsp\\;)+Infizierte.*" | sed -E "s/.*Aktuell( |\\&nbsp\\;)+//" | grep -Eo "[0-9]+\\.[0-9]+\\.[0-9]*\\:( |\\&nbsp\\;)+[0-9\\.]+" | head -n 1 | sed -E "s/[0-9]+\\.[0-9]+\\.[0-9]*\\:( |\\&nbsp\\;)+//" | sed -E "s/\\.//")
  31.  if [[ "$temp" != "" ]]; then
  32.   cases_hannover="$temp"
  33.   temp=""
  34.  fi
  35.  # find new cases sentence, extract number, convert words into numbers
  36.  # They keep finding new ways to break my script, dammit. Who knows what they'll do for 0. This code used to look so clean…
  37.  temp=$(echo -n "$page" | grep -Eom 1 "Menschen( |\&nbsp\;)+registriert\\,( |\&nbsp\;)+die( |\&nbsp\;)+sich( |\&nbsp\;)+in( |\&nbsp\;)+der( |\&nbsp\;)+Region( |\&nbsp\;)+mit( |\&nbsp\;)+dem( |\&nbsp\;)+Coronavirus( |\&nbsp\;)+infiziert( |\&nbsp\;)+haben\\.( |\&nbsp\;)+Das( |\&nbsp\;)+(sind|ist)( |\&nbsp\;)+[0-9a-zöü\\.]+( |\&nbsp\;)+(Fälle|Fall)( |\&nbsp\;)+mehr( |\&nbsp\;)+als" | sed -E "s/Menschen( |\&nbsp\;)+registriert\\,( |\&nbsp\;)+die( |\&nbsp\;)+sich( |\&nbsp\;)+in( |\&nbsp\;)+der( |\&nbsp\;)+Region( |\&nbsp\;)+mit( |\&nbsp\;)+dem( |\&nbsp\;)+Coronavirus( |\&nbsp\;)+infiziert( |\&nbsp\;)+haben\\.( |\&nbsp\;)+Das( |\&nbsp\;)+(sind|ist)( |\&nbsp\;)+//" | sed -E "s/( |\&nbsp\;)+(Fälle|Fall)( |\&nbsp\;)+mehr( |\&nbsp\;)+als//" | sed -E "s/\\.//" | sed -E "s/zwei/2/" | sed -E "s/drei/3/" | sed -E "s/vier/4/" | sed -E "s/fünf/5/" | sed -E "s/sechs/6/" | sed -E "s/sieben/7/" | sed -E "s/acht/8/" | sed -E "s/neun/9/" | sed -E "s/zehn/10/" | sed -E "s/elf/11/" | sed -E "s/zwölf/12/")
  38.  # if numbers got corrected downwards, fallback to 0 new cases
  39.  if [[ "$temp" == "" && "$(echo -n "$page" | grep -Eo "Menschen( |\&nbsp\;)+registriert\\,( |\&nbsp\;)+die( |\&nbsp\;)+sich( |\&nbsp\;)+in( |\&nbsp\;)+der( |\&nbsp\;)+Region( |\&nbsp\;)+mit( |\&nbsp\;)+dem( |\&nbsp\;)+Coronavirus( |\&nbsp\;)+infiziert( |\&nbsp\;)+haben\\.( |\&nbsp\;)+Das( |\&nbsp\;)+(sind|ist)( |\&nbsp\;)+[0-9a-zöü\\.]+( |\&nbsp\;)+(Fälle|Fall)( |\&nbsp\;)+weniger( |\&nbsp\;)+als")" != "" ]]; then
  40.  temp=0
  41. fi
  42. if [[ "$temp" != "" ]]; then
  43.  new_cases_hannover="$temp"
  44.  temp=""
  45. fi
  46. # find (total) deaths sentence, extract number
  47. temp=$(echo -n "$page" | grep -Eo "[0-9\\.]+( |\&nbsp\;)+Menschen( |\&nbsp\;)+sind( |\&nbsp\;)+infolge( |\&nbsp\;)+einer( |\&nbsp\;)+nachgewiesenen( |\&nbsp\;)+oder( |\&nbsp\;)+mutmaßlichen( |\&nbsp\;)+Corona\\-Infektion( |\&nbsp\;)+in( |\&nbsp\;)+der( |\&nbsp\;)+Region( |\&nbsp\;)+verstorben" | grep -Eo "[0-9\\.]+" | sed -E "s/\\.//")
  48.  if [[ "$temp" != "" ]]; then
  49.   dead_hannover_now="$temp"
  50.   # calculate difference from previous saved value
  51.   deaths_hannover=$(qalc -t "$dead_hannover_now-$(cat ~/misc/corona_storage | head -n 1)" | sed -E "s/−[0-9]+/0/" | sed -E "s/\"//g")
  52.   temp=""
  53.  fi
  54.  # find current cases Lehrte based on some super specific HTML formatting (may break any day), extract number
  55.  temp=$(echo -n "$page" | grep -Eo "Lehrte<\\/p>( |\&nbsp\;)*<\\/td>( |\&nbsp\;)*<td>( |\&nbsp\;)*<p class\\=\\\"bodytext\\\">( |\&nbsp\;)*[0-9\\.]+" | grep -Eo "[0-9\\.]+" | sed -E "s/\\.//")
  56.  if [[ "$temp" != "" ]]; then
  57.   cases_lehrte="$temp"
  58.   temp=""
  59.  fi
  60.  # even more specific HTML formatting, wider range behind it, get last/second number
  61.  temp=$(echo -n "$page" | grep -Eo "Lehrte<\\/p>( |\&nbsp\;)*<\\/td>( |\&nbsp\;)*<td>( |\&nbsp\;)*<p class\\=\\\"bodytext\\\">( |\&nbsp\;)*[0-9\\.]+( |\&nbsp\;)*<\\/p>( |\&nbsp\;)*<\\/td>( |\&nbsp\;)*<td>( |\&nbsp\;)*<p class\\=\\\"bodytext\\\">( |\&nbsp\;)*[0-9\\.]+" | grep -Eo "[0-9\\.]+" | tail -n 1 | sed -E "s/\\.//")
  62.  if [[ "$temp" != "" ]]; then
  63.   total_lehrte="$temp"
  64.   # calculate difference from previous saved value
  65.   new_cases_lehrte=$(qalc -t "$total_lehrte-$(cat ~/misc/corona_storage | tail -n 1)" | sed -E "s/−[0-9]+/0/" | sed -E "s/\\\"//g")
  66.  fi
  67.  if [[ "$cases_hannover" != "" ]] && [[ "$new_cases_hannover" != "" ]] && [[ "$dead_hannover_now" != "" ]] && [[ "$cases_lehrte" != "" ]] && [[ "$total_lehrte" != "" ]]; then
  68.   found=1
  69.   break
  70.  fi
  71. done
  72. if [[ $found == 1 ]]; then
  73.  echo "found"
  74. else
  75.  echo -e "Couldn't find/parse article, skipping. Links checked:\n$links"
  76.  dead_hannover_now=$(cat ~/misc/corona_storage | head -n 1)
  77.  total_lehrte=$(cat ~/misc/corona_storage | tail -n 1)
  78.  echo "cases_hannover: $cases_hannover"
  79.  echo "new_cases_hannover: $new_cases_hannover"
  80.  echo "(dead_hannover_now: $dead_hannover_now)"
  81.  echo "deaths_hannover: $deaths_hannover"
  82.  echo "cases_lehrte: $cases_lehrte"
  83.  echo "(total_lehrte: $total_lehrte)"
  84.  echo "new_cases_lehrte: $new_cases_lehrte"
  85.  echo
  86. fi
  87.  
  88. # vaccination de, world, at
  89. page=$(wget -qO - "https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv")
  90. # get area-specific rows, start at 2021-01-15, extract 6th column
  91. vaccination_de=$(echo -n "$page" | grep "Germany" | tail -n +22 | sed -E "s/([^\,]+\,){5}([^\,]+)\,.+/\2/")
  92. vaccination_world=$(echo -n "$page" | grep "World" | tail -n +47 | sed -E "s/([^\,]+\,){5}([^\,]+)\,.+/\2/")
  93. vaccination_at=$(echo -n "$page" | grep "Austria" | tail -n +22 | sed -E "s/([^\,]+\,){5}([^\,]+)\,.+/\2/")
  94.  
  95. # vaccination Saxony
  96. # temporary file, because "7z -si" seems broken
  97. wget -qO /tmp/co "https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Impfquotenmonitoring.xlsx?__blob=publicationFile"
  98. vaccination_saxony=$(7z e -so /tmp/co xl/worksheets/sheet2.xml | grep -Eo "r\=\"E16\" s\=\"[0-9]+\"><v>[0-9]+" | sed -E "s/r\=\"E16\" s\=\"[0-9]+\"><v>//")
  99. rm /tmp/co
  100.  
  101. # vaccination North Saxony
  102. #page=$(wget -qO - "coronavirus.sachsen.de/ueberblick-coronaschutzimpfungen-in-sachsen-9874.html")
  103. #vaccination_north_saxony=$(echo -n "$page" | grep --after-context=2 "Nordsachsen" | grep -Eo "[0-9\\.]+" | tail -n 1 | sed -E "s/\\.//g")
  104.  
  105. # Germany
  106. page=$(wget -qO - "worldometers.info/coronavirus/country/germany")
  107. cases_de=$(worldometers 101 "Currently infected")
  108. new_cases_de=$(worldometers 101 "New cases")
  109. deaths_de=$(worldometers 101 "Daily deaths")
  110.  
  111. # world
  112. page=$(wget -qO - "worldometers.info/coronavirus")
  113. cases_world=$(worldometers 125 "Currently infected")
  114. new_cases_world=$(worldometers 125 "Daily cases") # yay for consistency
  115. deaths_world=$(worldometers 125 "Daily deaths")
  116.  
  117. # Austria
  118. page=$(wget -qO - "worldometers.info/coronavirus/country/austria")
  119. cases_at=$(worldometers 101 "Currently infected")
  120. new_cases_at=$(worldometers 101 "New cases")
  121. deaths_at=$(worldometers 101 "Daily deaths")
  122.  
  123. # if not found and not weekend, open page for manual check
  124. weekday=$(qalc -t "weekday(today)" | sed -E "s/\\\"//g")
  125. if [[ $found == 0 ]] && [[ $weekday -lt 6 ]]; then
  126.  (firefox "www.altkreisblitz.de/region-hannover" &> /dev/null & disown)
  127. else
  128.  # focus latest console window, hopefully that's the one with this script
  129.  wmctrl -ia $(wmctrl -l | grep "Konsole" | tail -n 1 | sed -E "s/ .+//")
  130. fi
  131.  
  132. # output to clipboard for overwriting all or to console for appending one entry
  133. echo -n "$vaccination_de" | xclip -selection clipboard; echo "paste vaccination_de"
  134. if [[ $weekday == 5 ]]; then
  135.  echo -n "$vaccination_saxony"", ""$vaccination_saxony"", ""$vaccination_saxony"; read
  136. else
  137.  echo -n "$vaccination_saxony"; read
  138. fi
  139. #echo -n "$vaccination_north_saxony"; read
  140. echo -n "$vaccination_world" | xclip -selection clipboard; echo -n "paste vaccination_world"; read
  141. echo -n "$vaccination_at" | xclip -selection clipboard; echo -n "paste vaccination_at"; read
  142. echo -n "$cases_de" | xclip -selection clipboard; echo -n "paste cases_de"; read
  143. echo -n "$new_cases_de" | xclip -selection clipboard; echo -n "paste new_cases_de"; read
  144. echo -n "$deaths_de" | xclip -selection clipboard; echo -n "paste deaths_de"; read
  145. echo -n "$cases_world" | xclip -selection clipboard; echo -n "paste cases_world"; read
  146. echo -n "$new_cases_world" | xclip -selection clipboard; echo -n "paste new_cases_world"; read
  147. echo -n "$deaths_world" | xclip -selection clipboard; echo "paste deaths_world"
  148. if [[ $found == 1 ]]; then
  149.  if [[ $weekday == 5 ]]; then
  150.   echo "$cases_hannover"", ""$cases_hannover"", ""$cases_hannover"
  151.   echo "$new_cases_hannover"", 0, 0"
  152.   echo "$deaths_hannover"", 0, 0"
  153.   echo "$cases_lehrte"", ""$cases_lehrte"", ""$cases_lehrte"
  154.   echo -n "$new_cases_lehrte"", 0, 0"
  155.  else
  156.   echo "$cases_hannover"
  157.   echo "$new_cases_hannover"
  158.   echo "$deaths_hannover"
  159.   echo "$cases_lehrte"
  160.   echo -n "$new_cases_lehrte"
  161.  fi
  162. else
  163.  echo -n "copy, 0, 0, copy, 0 (or see above)"
  164. fi
  165. read
  166. echo -n "$cases_at" | xclip -selection clipboard; echo -n "paste cases_at"; read
  167. echo -n "$new_cases_at" | xclip -selection clipboard; echo -n "paste new_cases_at"; read
  168. echo -n "$deaths_at" | xclip -selection clipboard; echo "paste deaths_at"
  169.  
  170. echo -en "$dead_hannover_now\n$total_lehrte" > ~/misc/corona_storage
Add Comment
Please, Sign In to add comment