Advertisement
lvalnegri

covid_vaccine_reaction.R

Mar 15th, 2021 (edited)
1,182
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 4.49 KB | None | 0 0
  1. # See https://datamaps.uk/webapps/uk_covid_vaccine_adverse_reactions/ for a webapp using Shiny
  2. library('data.table')
  3.  
  4. brands <- c(
  5.   'Pfizer' = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/968413/COVID-19_mRNA_Pfizer-_BioNTech_Vaccine_Analysis_Print__2_.pdf',
  6.   'AstraZ' = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/968414/COVID-19_AstraZeneca_Vaccine_Analysis_Print.pdf'
  7. )
  8.  
  9. get_data <- function(x){
  10.  
  11.     message('\nProcessing brand: ', names(x), '...')
  12.  
  13.     message(' - Downloading file and extracting tables...')
  14.     y <- tabulizer::extract_tables(x, method = 'stream')
  15.    
  16.     message(' - Processing output... ')
  17.     ydt <- data.table()
  18.     for(idx in 1:length(y)){
  19.         yt <- y[[idx]]
  20.         if(length(yt) > 1){
  21.             if(ncol(yt) > 3){
  22.                 if(ncol(yt) == 4){
  23.                     if(yt[1,2] == ''){
  24.                         yt <- yt[, c(1, 3, 4)]
  25.                     } else if(yt[1,4] == ''){
  26.                         yt <- yt[, c(1, 2, 4)]
  27.                     } else {
  28.                         message('Format not recognized!')
  29.                     }
  30.                 } else if(ncol(yt) == 5){
  31.                     yt <- yt[, c(1, 3, 5)]
  32.                 } else {
  33.                     message('Too many columns!')
  34.                 }
  35.             }
  36.             ydt <- rbindlist(list( ydt, data.table(yt[-(1:2), ]) ), use.names = FALSE)
  37.         }
  38.     }
  39.    
  40.     message(' - Data Engineering... ')
  41.     ydt <- ydt[!grepl('^TOTAL', V1)]
  42.     ydt[, V4 := ifelse(V3 == '', V1, NA)][, V4 := zoo::na.locf(V4)]
  43.     ydt[, V5 := ifelse(grepl('TOTAL$', V1), V1, NA)][, V5 := zoo::na.locf(V5, fromLast = TRUE)]
  44.     ydt <- ydt[!(V3 == '' | grepl('TOTAL$', V1))]
  45.     ydt[, `:=`( V2 = as.integer(V2), V3 = as.integer(V3), 'Brand' = names(x) )]
  46.     setcolorder(ydt, c('Brand', 'V5', 'V4'))
  47.     setnames(ydt, c('Brand', 'SOC', 'NEC', 'Reaction', 'Total', 'Fatal'))
  48.     ydt[, SOC := gsub(' SOC TOTAL', '', SOC)]
  49.  
  50.     message(' - Add Rank over Total by SOC... ')
  51.     ydt[, rnk := frank(-Total, ties.method = 'random'), SOC]
  52.  
  53. }
  54.  
  55. ydt <- rbindlist(list( get_data(brands[1]), get_data(brands[2]) ))
  56.  
  57. message('Data Checks... ')
  58. ydt[, .(Total = sum(Total), Fatal = sum(Fatal)), Brand]
  59. ydt[, .(Total = sum(Total), Fatal = sum(Fatal)), .(Brand, SOC)]
  60.  
  61. message('Select 5 major reactions by SOC (for SOCs with at least 50 cases)... ')
  62. yr <- ydt[!SOC %chin% ydt[, .(Total = sum(Total)), SOC][Total < 50, SOC]][rnk <= 5]
  63.  
  64. message('Build table...')
  65. y <- dcast(dts, SOC+NEC+Reaction~Brand, value.var = c('Total', 'Fatal', 'rnk'), fill = 0)
  66. setcolorder(y, c('SOC', 'NEC', 'Reaction', 'Total_AstraZ', 'Fatal_AstraZ', 'rnk_AstraZ', 'Total_Pfizer', 'Fatal_Pfizer', 'rnk_Pfizer'))
  67. sketch <- withTags(table(
  68.                 class = 'display',
  69.                 thead(
  70.                     tr(
  71.                         th(rowspan = 2, 'SOC'),
  72.                         th(rowspan = 2, 'NEC'),
  73.                         th(rowspan = 2, 'Reaction'),
  74.                         th(colspan = 3, span('AstraZeneca', style = "color:gold; font-family:'times'; font-size:20pt; display:table; margin:0 auto;") ),
  75.                         th(colspan = 3, span('Pfizer', style = "color:cyan; font-family:'times'; font-size:20pt; display:table; margin:0 auto;") )
  76.                     ),
  77.                     tr( th('Total'), th('Fatal'), th('Rank'), th('Total'), th('Fatal'), th('Rank') )
  78.                 )
  79. ))
  80.  
  81. dt <- datatable(
  82.             y,
  83.             rownames = FALSE,
  84.             container = sketch,
  85.             selection = 'none',
  86.             class = 'cell-border nowrap',
  87.             extensions = c('Buttons', 'Scroller'),
  88.             caption = tags$caption(
  89.                 style = 'caption-side:bottom;text-align:right;font-size:10px',
  90.                 em('Last Updated: 11 March 2021')
  91.             ),        
  92.             options = list(
  93.                 scrollX = TRUE,
  94.                 scrollY = 400,
  95.                 scroller = TRUE,
  96.                 ordering = TRUE,
  97.                 searchHighlight = TRUE,
  98.                 deferRender = TRUE,
  99.                 buttons = c('copy', 'csv', 'print'),
  100.                 initComplete = JS(
  101.                     "function(settings, json) {",
  102.                     "$(this.api().table().header()).css({'background-color': '#000', 'color': '#fff'});",
  103.                     "}"
  104.                 ),
  105.                 dom = 'Biftp'
  106.             )
  107. )
  108. dt <- dt %>%  formatCurrency(c('Total_AstraZ', 'Fatal_AstraZ', 'Total_Pfizer', 'Fatal_Pfizer'), '', digits = 0)
  109.  
  110.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement