JackEdTaylor
/
Picture-Word-N1


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
							library(tidyverse)
library(broom)
library(patchwork)
library(grid)
ggplot2::theme_set(
  theme_classic() +
    theme(
      legend.position=c(0.99, 0.01),
      legend.justification = c(1, 0),
      text=element_text(size=12),
      plot.title = element_text(hjust=0.5),
      legend.direction = "vertical"
    )
)

# colours <- c("red", "blue")
colours <- c("#EA5515", "#15AAEA")

ci_level <- 0.99
ci_level_norm_sd <- qnorm(ci_level + (1-ci_level)/2)  # how many multiples of sd to get ci_level of normal distribution
geom_ribbon_ci_level <- 0.99

# function to take vector of 0s and 1s and get lower and upper bounds of confidence interval in tidy format
tidy_binom_ci <- function(x, level=0.95, min_obs=1) {
  x <- as.numeric(x)
  if (length(x)>min_obs) {
    prop.test(sum(x), length(x), conf.level=level, correct = TRUE) %>%
      broom::tidy() %>%
      select(conf.low, conf.high)
  } else {
    tibble(conf.low = NA, conf.high = NA)
  }
}

# power <- list.files("simulations", full.names = TRUE) %>%
#   map_df(
#     read_csv,
#     col_types = cols(
#       sim_nr = col_factor(),
#       re_corr = col_double(),
#       nS = col_double(),
#       estimate = col_double(),
#       chisq = col_double(),
#       p = col_double(),
#       is_sig = col_logical(),
#       slopes_removed = col_logical()
#     )
#   ) %>%
#   mutate(re_corr = factor(round(re_corr, 1), levels=seq(0, 0.8, 0.2)))
# write_csv(power, "power.csv")

# import simulations results
power <- read_csv("power.csv", col_types = cols(
  sim_nr = col_factor(),
  re_corr = col_double(),
  nS = col_double(),
  estimate = col_double(),
  chisq = col_double(),
  p = col_double(),
  is_sig = col_logical(),
  corrs_removed = col_logical()
)) %>%
  mutate(re_corr = factor(round(re_corr, 1), levels=seq(0, 0.8, 0.2))) %>%
  group_by(nS, re_corr) %>%
  mutate(iter_id = row_number()) %>%
  ungroup()

# get one and two-tailed significance results in long format
power_summ <- power %>%
  mutate(sig_type = "One-Tailed", is_sig = as.numeric(p<0.1 & estimate>0)) %>%
  bind_rows(
    .,
    mutate(., sig_type = "Two-Tailed", is_sig = as.numeric(p<0.05))
  )

power_summ_recorr_0 <- filter(power_summ, re_corr==0)

# for the plot looking at random effect correlations, sample a subset of the 
power_summ_all_recorrs <- power %>%
  group_by(nS, re_corr) %>%
  slice_sample(prop=1) %>%
  mutate(sig_type = "One-Tailed", is_sig = as.numeric(p<0.1 & estimate>0)) %>%
  bind_rows(
    .,
    mutate(., sig_type = "Two-Tailed", is_sig = as.numeric(p<0.05))
  )

# plot power curve
plt_power_summ <- power_summ_recorr_0 %>%
  group_by(nS, sig_type) %>%
  summarise(
    power = mean(is_sig),
    ci_low = pull(tidy_binom_ci(is_sig, level=ci_level), "conf.low"),
    ci_high = pull(tidy_binom_ci(is_sig, level=ci_level), "conf.high"),
    .groups = "drop"
  ) %>%
  ggplot(aes(nS, power, colour=sig_type, fill=sig_type)) +
  geom_hline(yintercept=0.8, linetype="dashed") +
  geom_ribbon(
    data=power_summ_recorr_0,
    aes(y=is_sig, group=sig_type),
    stat="smooth", method="glm", formula = y ~ log(x), method.args=list(family="binomial"),
    alpha=0.2, level=geom_ribbon_ci_level
  ) +
  # geom_line(
  #   data=power_summ_recorr_0,
  #   aes(y=is_sig),
  #   stat="smooth", method="glm", formula = y ~ log(x), method.args=list(family="binomial"),
  #   size = 0.25, alpha=1
  # ) +
  geom_point(size=1, position=position_dodge(width=1)) +
  geom_errorbar(aes(ymin=ci_low, ymax=ci_high), width=0, position=position_dodge(width=1), key_glyph="vpath") +
  scale_x_continuous(breaks = seq(0, 100, by = 10)) +
  scale_y_continuous(breaks = seq(0, 1, by = 0.2), limits = c(0, 1), expand=c(0,0)) +
  labs(
    x = "Number of Participants",
    y = "Power"
  ) +
  scale_color_manual(name = "Significance Test", values = colours) +
  scale_fill_manual(name = "Significance Test", values = colours) +
  theme(plot.margin=unit(c(0.75,0.75,0.75,0.75), "lines"))

plt_power_summ

ggsave("power.png", plt_power_summ, device="png", type="cairo", width=3.5, height=3, dpi=600)
ggsave("power.pdf", plt_power_summ, device="pdf", width=3.5, height=3)

# plot power curve
draw_key_curve <- function(data, params, size, order=2.5) {
  x <- seq(0.1, 0.9, 0.001)
  y <- 1 - rev(
    0.1 + (x**order - min(x**order)) /
      (1/0.8 * (max(x**order) - min(x**order)))
    )
  grobTree(
    linesGrob(
      x=x, 
      y=y
    ),
    gp = gpar(
      col = data$colour %||% "grey20",
      fill = alpha(data$fill %||% "white", data$alpha),
      lwd = (data$linewidth %||% 0.5) * .pt,
      lty = data$linetype %||% 1
    )
  )
}


plt_power_summ_ot <- power_summ_recorr_0 |>
  filter(sig_type=="One-Tailed") |>
  group_by(nS) %>%
  summarise(
    power = mean(is_sig),
    ci_low = pull(tidy_binom_ci(is_sig, level=ci_level), "conf.low"),
    ci_high = pull(tidy_binom_ci(is_sig, level=ci_level), "conf.high"),
    .groups = "drop"
  ) %>%
  ggplot(aes(nS, power)) +
  geom_ribbon(
    data=filter(power_summ_recorr_0, sig_type=="One-Tailed"),
    aes(y=is_sig, group=sig_type, fill=sprintf("Loglinear %s%% CI", geom_ribbon_ci_level*100)),
    stat="smooth", method="glm", formula = y ~ log(x), method.args=list(family="binomial"),
    alpha=1, level=geom_ribbon_ci_level,
    key_glyph = draw_key_curve, colour = "#F09B0F"
  ) +
  geom_hline(yintercept=0.8, linetype="dashed") +
  geom_point(aes(colour="Point Estimates"), size=0.75) +
  geom_errorbar(aes(ymin=ci_low, ymax=ci_high, colour="Point Estimates"), width=0, position=position_dodge(width=1), key_glyph="vpath", linewidth=0.4) +
  scale_x_continuous(breaks = seq(0, 100, by = 20)) +
  scale_y_continuous(breaks = seq(0, 1, by = 0.2), limits = c(0, 1), expand=c(0,0)) +
  scale_fill_manual(values = "#F09B0F", guide=guide_legend(override.aes=list(linewidth=1))) +
  scale_colour_manual(values = "#000000") +
  labs(
    x = "N (Participants)",
    y = "Power",
    fill = NULL,
    colour = NULL
  ) +
  theme_classic() +
  theme(
    legend.position = c(1, 0),
    legend.justification = c(1, 0),
    legend.background = element_blank(),
    legend.spacing = unit(0, "pt"),
    legend.key.height = unit(10, "pt"),
    legend.key.width = unit(15, "pt"),
    legend.spacing.y = unit(-4, "pt"),
    legend.spacing.x = unit(2, "pt"),
    legend.key = element_blank()
  )

ggsave("power_onetailed.png", plt_power_summ_ot, width=2, height=2, units="in", device="png", type="cairo")
ggsave("power_onetailed.pdf", plt_power_summ_ot, width=2, height=2, units="in")

plt_power_summ_corr_vals <- power_summ_all_recorrs %>%
  group_by(nS, re_corr, sig_type) %>%
  summarise(
    power = mean(is_sig),
    ci_low = pull(tidy_binom_ci(is_sig, level=ci_level), "conf.low"),
    ci_high = pull(tidy_binom_ci(is_sig, level=ci_level), "conf.high"),
    .groups = "drop"
  ) %>%
  ggplot(aes(nS, power, colour=re_corr, group=re_corr)) +
  geom_hline(yintercept=0.8, linetype="dashed") +
  geom_line(
    data=power_summ_all_recorrs,
    aes(y=is_sig),
    stat="smooth", method="glm", formula = y ~ log(x), method.args=list(family="binomial"),
    size = 0.5
  ) +
  scale_x_continuous(breaks = seq(0, 100, by = 20)) +
  scale_y_continuous(breaks = seq(0, 1, by = 0.2), limits = c(0, 1), expand=c(0,0)) +
  scale_colour_viridis_d() +
  guides(colour = guide_legend(override.aes = list(size = 1.5))) +
  labs(
    x = "Number of Participants",
    y = "Power",
    colour = "Random\nEffects\nCorrelation"
  ) +
  facet_wrap(vars(sig_type)) +
  theme(
    legend.position = "right",
    plot.margin=unit(c(0.75,0.75,0.75,0.75), "lines"),
    strip.background = element_rect(fill = "white")
  )

plt_power_summ_corr_vals

ggsave("power_corr_vals.png", plt_power_summ_corr_vals, device="png", type="cairo", width=6.5, height=2.75, dpi=600)
ggsave("power_corr_vals.pdf", plt_power_summ_corr_vals, device="pdf", width=6.5, height=2.75)

# fit the same models as plotted for to get predictions and results
tails <- unique(power_summ$sig_type)
re_corrs <- unique(power_summ$re_corr)

power_m <- map(tails, function(tail) {
  map(re_corrs, function(re_corr_i) {
    power_summ %>%
      filter(sig_type==tail & re_corr==re_corr_i) %>%
      mutate(power=as.numeric(is_sig)) %>%
      glm(
        power ~ log(nS),
        family=binomial("logit"),
        data = .
      )
  }) %>%
    set_names(re_corrs)
}) %>%
  set_names(tails)

# get number of subjects needed for desired power
poss_nS <- seq(4, 100, 4)  # divisible by 4, so balanced between stimulus groups and response groups
# confint(power_m[["One-Tailed"]][["0"]], level=ci_level)
all_logit <- predict(power_m[["One-Tailed"]][["0"]], newdata=tibble(nS = poss_nS), se.fit=TRUE)
all_logit$se.fit <- all_logit$se.fit * ci_level_norm_sd  # make the standard errors into confidence intervals of ci_level
all_p <- exp(all_logit$fit) / (1+exp(all_logit$fit))
all_intervals_lower <- exp(all_logit$fit - all_logit$se.fit) / (1+exp(all_logit$fit - all_logit$se.fit))
all_intervals_upper <- exp(all_logit$fit + all_logit$se.fit) / (1+exp(all_logit$fit + all_logit$se.fit))
req_power <- 0.8
dist_targ <- all_intervals_lower-req_power
req_nS <- poss_nS[dist_targ == min(dist_targ[dist_targ>0])]
pred_power <- all_p[poss_nS==req_nS]
pred_ci <- c(all_intervals_lower[poss_nS==req_nS], all_intervals_upper[poss_nS==req_nS])

# print a summary of the results
power %>%
  filter(re_corr==0) %>%
  group_by(nS, re_corr) %>%
  summarise(
    n = n(),
    raw_onetailed = mean(p<0.1 & estimate>0),
    raw_twotailed = mean(p<0.05),
    m_estimate = mean(estimate),
    sd_estimate = sd(estimate),
    .groups = "drop"
  ) %>%
  rowwise() %>%
  mutate(
    re_corr = as.character(re_corr),
    logit_onetail = predict(power_m[["One-Tailed"]][[re_corr]], newdata=tibble(nS=nS)),
    power_onetail = exp(logit_onetail) / (1 + exp(logit_onetail)),
    logit_twotail = predict(power_m[["Two-Tailed"]][[re_corr]], newdata=tibble(nS=nS)),
    power_twotail = exp(logit_twotail) / (1 + exp(logit_twotail))
  )

message(sprintf("We decided that we need %s participants to reach >=%s%% power. Specifically, the model predicted that at this number of subjects, we would have %s%% power (%s%% CI = [%s%%, %s%%]) to detect the predicted effect.\n", req_nS, req_power*100, round(pred_power*100, 3), ci_level*100, round(pred_ci[1]*100, 3), round(pred_ci[2]*100, 3)))