Untitled

#========================================================#
# Setup
#========================================================#

library(dplyr)
library(ggplot2)
library(here)
library(pwr)
library(scales)
library(stringr)

#========================================================#
# Functions
#========================================================#

#============================
# power_prop_test()
#============================
# a simplification of one of
# the function from pwr
# for reuse later
power_prop_test <- function(h, n_group) {
  pwr.2p.test(
    h = h,
    n = n_group,
    sig.level = 0.01,
    alternative = 'two.sided'
  )$power
}

#============================
# power_prop_test_grid()
#============================
# function to calculate power for
# prop.test from:
#   n_group: sample size in each group
#   prop_control: proportion in the control group
#   prop_effect: effect in the treatment group
power_prop_test_grid <- function(n_group, prop_control, prop_effect) {
  # get all possible combinations of
  # n_group, prop_control, and prop_effect
  expand.grid(list(
    n_group = n_group,
    prop_control = prop_control,
    prop_effect = prop_effect
  )) %>%
    mutate(
      prop_treatment = prop_control + prop_effect,
      # standardised effect size (see pwr docs)
      h = pwr::ES.h(prop_control, prop_treatment),
      # calculate the power
      power = power_prop_test(h, n_group)
    )
}

#========================================================#
# Power curve
#========================================================#

#============================
# power curve data
#============================

# proportions to try for the control group
prop_control <- seq(0.05, 0.5, 0.025)

# effect sizes to try
prop_effect <- seq(0, 0.15, by = 0.001)

# samples sizes for each group to try
n_group <- c(500, 1000, 2500, 5000)

# get the power for all combinations
df_prop_test_power <- power_prop_test_grid(n_group, prop_control, prop_effect)

#============================
# power curve plot
#============================

# create a plot theme
theme_datasci <-
  theme_minimal(
    base_size = 12,
    base_family = "mono") +
  theme(
      panel.grid.minor = element_blank(),
      panel.grid.major.x = element_blank(),
      legend.title = element_text(size = 8),
      legend.text = element_text(size = 7))

# set the theme
theme_set(theme_datasci)

# plot
ggplot(df_prop_test_power) +
  aes(
    x = prop_effect,
    y = power,
    color = prop_control,
    group = prop_control) +
  geom_line(show.legend = T) +
  # coordinates and scales ------------
  coord_cartesian(xlim = c(0, 0.15)) +
  scale_x_continuous(
    breaks = seq(0, 0.15, 0.025),
    labels = percent_format()) +
  scale_color_viridis_c(
    option = 'plasma',
    end = 0.9,
    labels = percent_format()) +
  # facet -----------------------------
  facet_wrap(
    ~ n_group,
    ncol = 2,
    # add 'N per group:' at the start of the facet labels
    labeller = as_labeller(
      function(x) { str_c('N per group: ', x)})) +
  # labels & theme --------------------
  theme(legend.position = c(0.9, 0.2)) +
  labs(
    x = 'Difference between treatment and control groups',
    y = 'Probability of detecting an effect',
    color = 'Baseline % \nin control group',
    title = 'Power curves for a prop.test',
    subtitle = 'By sample size, base-rate and treatment effect'
  )

# save
ggsave(here('figures', 'power_curve.png'), width = 8, height = 6, dpi = 'retina')