lvalnegri

covid_vaccine_reaction.R

Mar 15th, 2021 (edited)
432
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # See https://datamaps.uk/webapps/uk_covid_vaccine_adverse_reactions/ for a webapp using Shiny
  2. library('data.table')
  3.  
  4. brands <- c(
  5.   'Pfizer' = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/968413/COVID-19_mRNA_Pfizer-_BioNTech_Vaccine_Analysis_Print__2_.pdf',
  6.   'AstraZ' = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/968414/COVID-19_AstraZeneca_Vaccine_Analysis_Print.pdf'
  7. )
  8.  
  9. get_data <- function(x){
  10.  
  11.     message('\nProcessing brand: ', names(x), '...')
  12.  
  13.     message(' - Downloading file and extracting tables...')
  14.     y <- tabulizer::extract_tables(x, method = 'stream')
  15.    
  16.     message(' - Processing output... ')
  17.     ydt <- data.table()
  18.     for(idx in 1:length(y)){
  19.         yt <- y[[idx]]
  20.         if(length(yt) > 1){
  21.             if(ncol(yt) > 3){
  22.                 if(ncol(yt) == 4){
  23.                     if(yt[1,2] == ''){
  24.                         yt <- yt[, c(1, 3, 4)]
  25.                     } else if(yt[1,4] == ''){
  26.                         yt <- yt[, c(1, 2, 4)]
  27.                     } else {
  28.                         message('Format not recognized!')
  29.                     }
  30.                 } else if(ncol(yt) == 5){
  31.                     yt <- yt[, c(1, 3, 5)]
  32.                 } else {
  33.                     message('Too many columns!')
  34.                 }
  35.             }
  36.             ydt <- rbindlist(list( ydt, data.table(yt[-(1:2), ]) ), use.names = FALSE)
  37.         }
  38.     }
  39.    
  40.     message(' - Data Engineering... ')
  41.     ydt <- ydt[!grepl('^TOTAL', V1)]
  42.     ydt[, V4 := ifelse(V3 == '', V1, NA)][, V4 := zoo::na.locf(V4)]
  43.     ydt[, V5 := ifelse(grepl('TOTAL$', V1), V1, NA)][, V5 := zoo::na.locf(V5, fromLast = TRUE)]
  44.     ydt <- ydt[!(V3 == '' | grepl('TOTAL$', V1))]
  45.     ydt[, `:=`( V2 = as.integer(V2), V3 = as.integer(V3), 'Brand' = names(x) )]
  46.     setcolorder(ydt, c('Brand', 'V5', 'V4'))
  47.     setnames(ydt, c('Brand', 'SOC', 'NEC', 'Reaction', 'Total', 'Fatal'))
  48.     ydt[, SOC := gsub(' SOC TOTAL', '', SOC)]
  49.  
  50.     message(' - Add Rank over Total by SOC... ')
  51.     ydt[, rnk := frank(-Total, ties.method = 'random'), SOC]
  52.  
  53. }
  54.  
  55. ydt <- rbindlist(list( get_data(brands[1]), get_data(brands[2]) ))
  56.  
  57. message('Data Checks... ')
  58. ydt[, .(Total = sum(Total), Fatal = sum(Fatal)), Brand]
  59. ydt[, .(Total = sum(Total), Fatal = sum(Fatal)), .(Brand, SOC)]
  60.  
  61. message('Select 5 major reactions by SOC (for SOCs with at least 50 cases)... ')
  62. yr <- ydt[!SOC %chin% ydt[, .(Total = sum(Total)), SOC][Total < 50, SOC]][rnk <= 5]
  63.  
  64. message('Build table...')
  65. y <- dcast(dts, SOC+NEC+Reaction~Brand, value.var = c('Total', 'Fatal', 'rnk'), fill = 0)
  66. setcolorder(y, c('SOC', 'NEC', 'Reaction', 'Total_AstraZ', 'Fatal_AstraZ', 'rnk_AstraZ', 'Total_Pfizer', 'Fatal_Pfizer', 'rnk_Pfizer'))
  67. sketch <- withTags(table(
  68.                 class = 'display',
  69.                 thead(
  70.                     tr(
  71.                         th(rowspan = 2, 'SOC'),
  72.                         th(rowspan = 2, 'NEC'),
  73.                         th(rowspan = 2, 'Reaction'),
  74.                         th(colspan = 3, span('AstraZeneca', style = "color:gold; font-family:'times'; font-size:20pt; display:table; margin:0 auto;") ),
  75.                         th(colspan = 3, span('Pfizer', style = "color:cyan; font-family:'times'; font-size:20pt; display:table; margin:0 auto;") )
  76.                     ),
  77.                     tr( th('Total'), th('Fatal'), th('Rank'), th('Total'), th('Fatal'), th('Rank') )
  78.                 )
  79. ))
  80.  
  81. dt <- datatable(
  82.             y,
  83.             rownames = FALSE,
  84.             container = sketch,
  85.             selection = 'none',
  86.             class = 'cell-border nowrap',
  87.             extensions = c('Buttons', 'Scroller'),
  88.             caption = tags$caption(
  89.                 style = 'caption-side:bottom;text-align:right;font-size:10px',
  90.                 em('Last Updated: 11 March 2021')
  91.             ),        
  92.             options = list(
  93.                 scrollX = TRUE,
  94.                 scrollY = 400,
  95.                 scroller = TRUE,
  96.                 ordering = TRUE,
  97.                 searchHighlight = TRUE,
  98.                 deferRender = TRUE,
  99.                 buttons = c('copy', 'csv', 'print'),
  100.                 initComplete = JS(
  101.                     "function(settings, json) {",
  102.                     "$(this.api().table().header()).css({'background-color': '#000', 'color': '#fff'});",
  103.                     "}"
  104.                 ),
  105.                 dom = 'Biftp'
  106.             )
  107. )
  108. dt <- dt %>%  formatCurrency(c('Total_AstraZ', 'Fatal_AstraZ', 'Total_Pfizer', 'Fatal_Pfizer'), '', digits = 0)
  109.  
  110.  
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×