Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # See https://datamaps.uk/webapps/uk_covid_vaccine_adverse_reactions/ for a webapp using Shiny
- library('data.table')
- brands <- c(
- 'Pfizer' = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/968413/COVID-19_mRNA_Pfizer-_BioNTech_Vaccine_Analysis_Print__2_.pdf',
- 'AstraZ' = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/968414/COVID-19_AstraZeneca_Vaccine_Analysis_Print.pdf'
- )
- get_data <- function(x){
- message('\nProcessing brand: ', names(x), '...')
- message(' - Downloading file and extracting tables...')
- y <- tabulizer::extract_tables(x, method = 'stream')
- message(' - Processing output... ')
- ydt <- data.table()
- for(idx in 1:length(y)){
- yt <- y[[idx]]
- if(length(yt) > 1){
- if(ncol(yt) > 3){
- if(ncol(yt) == 4){
- if(yt[1,2] == ''){
- yt <- yt[, c(1, 3, 4)]
- } else if(yt[1,4] == ''){
- yt <- yt[, c(1, 2, 4)]
- } else {
- message('Format not recognized!')
- }
- } else if(ncol(yt) == 5){
- yt <- yt[, c(1, 3, 5)]
- } else {
- message('Too many columns!')
- }
- }
- ydt <- rbindlist(list( ydt, data.table(yt[-(1:2), ]) ), use.names = FALSE)
- }
- }
- message(' - Data Engineering... ')
- ydt <- ydt[!grepl('^TOTAL', V1)]
- ydt[, V4 := ifelse(V3 == '', V1, NA)][, V4 := zoo::na.locf(V4)]
- ydt[, V5 := ifelse(grepl('TOTAL$', V1), V1, NA)][, V5 := zoo::na.locf(V5, fromLast = TRUE)]
- ydt <- ydt[!(V3 == '' | grepl('TOTAL$', V1))]
- ydt[, `:=`( V2 = as.integer(V2), V3 = as.integer(V3), 'Brand' = names(x) )]
- setcolorder(ydt, c('Brand', 'V5', 'V4'))
- setnames(ydt, c('Brand', 'SOC', 'NEC', 'Reaction', 'Total', 'Fatal'))
- ydt[, SOC := gsub(' SOC TOTAL', '', SOC)]
- message(' - Add Rank over Total by SOC... ')
- ydt[, rnk := frank(-Total, ties.method = 'random'), SOC]
- }
- ydt <- rbindlist(list( get_data(brands[1]), get_data(brands[2]) ))
- message('Data Checks... ')
- ydt[, .(Total = sum(Total), Fatal = sum(Fatal)), Brand]
- ydt[, .(Total = sum(Total), Fatal = sum(Fatal)), .(Brand, SOC)]
- message('Select 5 major reactions by SOC (for SOCs with at least 50 cases)... ')
- yr <- ydt[!SOC %chin% ydt[, .(Total = sum(Total)), SOC][Total < 50, SOC]][rnk <= 5]
- message('Build table...')
- y <- dcast(dts, SOC+NEC+Reaction~Brand, value.var = c('Total', 'Fatal', 'rnk'), fill = 0)
- setcolorder(y, c('SOC', 'NEC', 'Reaction', 'Total_AstraZ', 'Fatal_AstraZ', 'rnk_AstraZ', 'Total_Pfizer', 'Fatal_Pfizer', 'rnk_Pfizer'))
- sketch <- withTags(table(
- class = 'display',
- thead(
- tr(
- th(rowspan = 2, 'SOC'),
- th(rowspan = 2, 'NEC'),
- th(rowspan = 2, 'Reaction'),
- th(colspan = 3, span('AstraZeneca', style = "color:gold; font-family:'times'; font-size:20pt; display:table; margin:0 auto;") ),
- th(colspan = 3, span('Pfizer', style = "color:cyan; font-family:'times'; font-size:20pt; display:table; margin:0 auto;") )
- ),
- tr( th('Total'), th('Fatal'), th('Rank'), th('Total'), th('Fatal'), th('Rank') )
- )
- ))
- dt <- datatable(
- y,
- rownames = FALSE,
- container = sketch,
- selection = 'none',
- class = 'cell-border nowrap',
- extensions = c('Buttons', 'Scroller'),
- caption = tags$caption(
- style = 'caption-side:bottom;text-align:right;font-size:10px',
- em('Last Updated: 11 March 2021')
- ),
- options = list(
- scrollX = TRUE,
- scrollY = 400,
- scroller = TRUE,
- ordering = TRUE,
- searchHighlight = TRUE,
- deferRender = TRUE,
- buttons = c('copy', 'csv', 'print'),
- initComplete = JS(
- "function(settings, json) {",
- "$(this.api().table().header()).css({'background-color': '#000', 'color': '#fff'});",
- "}"
- ),
- dom = 'Biftp'
- )
- )
- dt <- dt %>% formatCurrency(c('Total_AstraZ', 'Fatal_AstraZ', 'Total_Pfizer', 'Fatal_Pfizer'), '', digits = 0)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement