Figure 2 - Butyrate Production Capacity from ACoA Pathway and ICB outcome

Here are the column names of the data we are plotting:

fouraminobutyrate, glutarate, lysine and pyruvate represent different parts of the butyrate synthesis pathway.

 [1] "blinded_subject_id"    "id_int"                "cohort"               
 [4] "age"                   "impact_tmb_score"      "cpb_drug"             
 [7] "ecog"                  "best_overall_response" "tt_pfs_d"             
[10] "pfs_event"             "tt_os_d"               "event_os"             
[13] "TMPT"                  "WMS_SGPID"             "SAMPID"               
[16] "total"                 "samp_id"               "identifier.y"         
[19] "Shannon"               "tx"                    "age10"                
[22] "med_buty"              "fouraminobutyrate"     "gluatarate"           
[25] "lysine"                "pyruvate"              "log_acoa"

Panel A

Survival in Mixed Solid Tumor Cohort (n = 147) by ACoA pathway gene abundance in stool above/below the median.

First for overall survival (OS):

# cut the distribution of pyruvate into above/below median:
acoa_df <- acoa_df %>%
  mutate(med_p = cut(pyruvate, quantile(pyruvate, probs=seq(0,1,0.5)), c("Below Median", "Above Median"))) %>% #ifelse(pyruvate > median(pyruvate, na.rm = T), "Above Median", "Below Median")) %>%
  mutate(tt_os_months = tt_os_d/30.44) %>%
  mutate(tt_pfs_months = tt_pfs_d/30.44) %>%
  arrange(med_p)

fit<- survfit(Surv(tt_os_months, event_os) ~ med_p, data = acoa_df)
names(fit$strata) <- gsub("med_p=", "", names(fit$strata))
p <- ggsurvplot(fit,  size = 1,  # change line size
           linetype = "solid", # use solid line
           break.time.by = 12, # break time axis by 12 months
           palette = c("aquamarine4", "coral4"), # custom color palette
           conf.int = TRUE, # Add confidence interval
           pval = TRUE, # Add p-value,
           legend.title="",
           risk.table = TRUE, risk.table.y.text.col = TRUE,
           ylab = "Survival %",
           xlab = "Time (Months)",
           xlim = c(0,48)
           ) 
print(p)

Then looking at Progression Free Survival (PFS) instead of OS:

fit<- survfit(Surv(tt_pfs_months, pfs_event) ~ med_p, data = acoa_df)
names(fit$strata) <- gsub("med_p=", "", names(fit$strata))
p <- ggsurvplot(fit,  size = 1,  # change line size
           linetype = "solid", # use solid line
           break.time.by = 12, # break time axis by 12 months
           palette = c("aquamarine4", "coral4"), # custom color palette
           conf.int = TRUE, # Add confidence interval
           pval = TRUE, # Add p-value,
           legend.title="",
           risk.table = TRUE, risk.table.y.text.col = TRUE,
           ylab = "Survival %",
           xlab = "Time (Months)",
           xlim = c(0,48)
           ) 
print(p)

Panel B

Restricted cubic spline for Observational cohort adjusted for age, diagnosis, and performance status identifies increasing hazard of overall mortality associated with below-median (<504 RPKM) ACoA pathway gene abundance among ICB recipients. Ticks along the x-axis reflect individual samples.

model_spline_os <- coxph(Surv(tt_os_d, event_os) ~ rms::rcs(pyruvate,4) + age10 + factor(cohort) + ecog, acoa_df)
ptemp <- termplot(model_spline_os, se=T, plot=F)
buterm <- ptemp$pyruvate
center <- buterm$y[which(buterm$y == median(buterm$y))]
ytemp <- buterm$y + outer(buterm$se, c(0, -1.96, 1.96), '*')
exp_ytemp <- exp(ytemp - center)

spline_data <- data.frame(buty = buterm$x, Estimate = exp_ytemp[,1],
                          Lower = exp_ytemp[,2], Upper = exp_ytemp[,3])

ggplot(spline_data, aes(x = buty)) +
  geom_ribbon(aes(ymin = Lower, ymax = Upper), fill = "grey80", alpha = 0.5) +
  geom_line(aes(y = Estimate), color = "blue") +
  geom_rug(sides = "b") +  # Add rug plot at the bottom ('b') of the plot
  scale_y_log10() +  # Log scale for y-axis
  labs(x = "Pyruvate-pathway genes present", y = "Hazard ratio") +
  theme_minimal()

We will now plot some microbiome data, using a phyloseq object that contains our sequencing data. This object contains a list of the taxonomy of organisms identified, the abundance in each sample, and any associated sample specific metadata in one object.

Principal components analysis (PCA) of Observational cohort sample (n = 147, 1 per patient) taxonomic abundance grouped at the family level shows increasing ACoA-pathway gene abundance from left to right along the first principal component (x-axis). Each point represents a single fecal sample. PCA loading vectors (arrows) identify increasing Ruminococcaceae and Lachnospiraceae abundance in the direction corresponding to increased ACoA-pathway gene abundance, versus greater Bacteroidales abundance (grouped at the order level) in the direction of decreased ACoA-pathway gene presence.

Panel C

First looking at how log ACOA distribution looks superimposed over the PCOA of Stool Samples:

ps %>%
  ps_filter(SAMPID %in% acoa_df$SAMPID) %>%
  tax_fix() %>%
  tax_transform(trans = "compositional", rank = "Species") %>%
  dist_calc("aitchison") %>%
  ord_calc("PCoA") %>%
  ord_plot(fill = "log_acoa", size = 4, shape=21, alpha=0.6,
           plot_taxa = c(1:5)) +
  scale_fill_gradientn(colours=c("#2b3f76", "#f0f0f0", "#b2182b"), values=c(0, 0.5, 1)) +
  theme_classic(base_size = 16) +  
  scale_x_reverse() + 
  labs(
    fill = "Log ACOA"
  )

Next looking at Ruminococcaeceae:

rum_ps <- ps
sample_data(rum_ps)$log_rum = log10(as.numeric(sample_data(ps)$Ruminococcaceae) + 0.0001)
rum_ps %>%
  ps_filter(SAMPID %in% acoa_df$SAMPID) %>%
  tax_fix() %>%
  tax_transform(trans = "compositional", rank = "Species") %>%
  dist_calc("aitchison") %>%
  ord_calc("PCoA") %>%
  ord_plot(fill = "log_rum", size = 4, shape=21, alpha=0.6,
           plot_taxa = c(1:5)) +
  scale_fill_gradientn(colours=c("#2b3f76", "#f0f0f0", "#b2182b"), values=c(0, 0.65, 1)) +
  theme_classic(base_size = 16) + 
  scale_x_reverse() + 
  labs(
    fill = "Log10 Rel Abund.\nRuminococcaceae"
  )

Lastly looking at Bacteroidaceae:

bac_ps <- ps
sample_data(bac_ps)$log_bac = log10(as.numeric(sample_data(ps)$Bacteroidaceae) + 0.0001)
bac_ps %>%
  ps_filter(SAMPID %in% acoa_df$SAMPID) %>%
  tax_fix() %>%
  tax_transform(trans = "compositional", rank = "Species") %>%
  dist_calc("aitchison") %>%
  ord_calc("PCoA") %>%
  ord_plot(fill = "log_bac", size = 4, shape=21, alpha=0.6,
           plot_taxa = c(1:5)) +
  scale_fill_gradientn(colours=c("#2b3f76", "#f0f0f0", "#b2182b"), values=c(0, 0.75, 1)) +
  theme_classic(base_size = 16) + 
  scale_x_reverse() + 
  labs(
    fill = "Log10 Rel Abund.\nBacteroidaceae"
  )

Panel D

ACoA-pathway gene abundance was associated with response to ICB among patients with advanced renal cell cancer receiving ipilimumab plus nivolumab in the External Renal Cancer Cohort (n = 39).

coh_base_ni <- readRDS("../data/coh_butyrate.rds")

#Figure 2D
ggplot(coh_base_ni, aes(x = response, y = pyruvate)) +
  geom_boxplot(width = 0.4, outlier.shape = NA, linewidth = 0.6, color = "black") +
  geom_jitter(aes(fill = response), shape = 21, width = 0.15, size = 5, stroke = 0.3, color = "black", alpha = 0.7) +
  scale_fill_manual(values = c("#999999", "#E69F00")) +
  scale_x_discrete(labels = c("Non-responder", "Responder")) +
  theme_classic(base_size = 16) +
  theme(
    legend.position = "none",
    axis.line = element_line(color = "black", linewidth = 0.5),
    axis.ticks = element_line(color = "black", linewidth = 0.5)
  ) +
  xlab("") +
  ylab("ACoA pathway gene abundance (RPKM)")

wilcox.test(coh_base_ni$pyruvate ~ coh_base_ni$response)


    Wilcoxon rank sum exact test

data:  coh_base_ni$pyruvate by coh_base_ni$response
W = 36, p-value = 0.002129
alternative hypothesis: true location shift is not equal to 0

group_n <- coh_base_ni %>% count(response)
print(group_n)

# A tibble: 2 × 2
  response     n
  <chr>    <int>
1 N           16
2 Y           13

#adjusted for CMB588
summary(glm(response01 ~ log1p(pyruvate) + cbm588, data = coh_base_ni , family = binomial(link = "logit")))


Call:
glm(formula = response01 ~ log1p(pyruvate) + cbm588, family = binomial(link = "logit"), 
    data = coh_base_ni)

Coefficients:
                Estimate Std. Error z value Pr(>|z|)  
(Intercept)      -15.157      7.325  -2.069   0.0385 *
log1p(pyruvate)    2.491      1.160   2.148   0.0318 *
cbm588no-CBM      -1.439      1.022  -1.407   0.1593  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 39.892  on 28  degrees of freedom
Residual deviance: 26.776  on 26  degrees of freedom
AIC: 32.776

Number of Fisher Scoring iterations: 6

Panel E

Univariate Cox regression analyses PFS relationship to ACoA gene abundance in external cohort.

wargo_surv_df <- readRDS("../data/cleaned_data/wargo_pfs_data_for_univariate_model.RDS")


model1_wargo <- coxph(Surv(pfs_d, pfsevent)~log(rum), wargo_surv_df)
model2_wargo <- coxph(Surv(pfs_d, pfsevent)~log(rum_lach), wargo_surv_df)
model3_wargo <- coxph(Surv(pfs_d, pfsevent)~log(clostridiales), wargo_surv_df)
model4_wargo <- coxph(Surv(pfs_d, pfsevent)~log(clostridia), wargo_surv_df)
model5_wargo <- coxph(Surv(pfs_d, pfsevent)~log(eval(faecalibacterium_prausnitzii+1e-7)), wargo_surv_df)
model6_wargo <- coxph(Surv(pfs_d, pfsevent)~log1p(pyruvate), wargo_surv_df)


models <- list(model1_wargo, model2_wargo, model3_wargo, model4_wargo, model5_wargo, model6_wargo)



results_df <- data.frame()


for (i in seq_along(models)) {
  model <- models[[i]]
  summary_model <- summary(model)


  hr <- summary_model$coefficients[, "exp(coef)"]
  lower_ci <- summary_model$conf.int[, "lower .95"]
  upper_ci <- summary_model$conf.int[, "upper .95"]
  pvalue <- summary_model$coefficients[, "Pr(>|z|)"]


  results_df <- rbind(results_df, data.frame(
    Model = paste0("model", i),
    HR = hr,
    Lower_CI = lower_ci,
    Upper_CI = upper_ci,
    pvalue = pvalue
  ))
}

results_df$source = rep("wargo", 6)
results_df$modelname = c("log(Ruminococcaceae)", "log(Ruminococcacaeae + Lachnospiraceae)",
                             "log(Clostridiales)", "log(Clostridia)", "log(F prausnitizii + 1e-7)", "log(ACoA")


ggplot(results_df) +
  geom_point(aes(y=fct_rev(fct_reorder(modelname, HR)), x=HR)) +
  geom_errorbarh(aes(y=fct_rev(fct_reorder(modelname, HR)), xmin=Lower_CI, xmax=Upper_CI), height=0.5)+
  geom_vline(xintercept=1, lty=2, col="gray70") +
  scale_x_continuous(trans="log") +
  theme_classic() +
  labs(title = "HR (95% CI) univariable Cox",
       subtitle ="", y="Variable", x="HR")

Panel E MSK Data

Univariate Cox regression analyses PFS relationship to ACoA gene abundance in MSK Observational Data.

msk_df <- readRDS("../data/cleaned_data/interal_msk_data_for_univariate_model.RDS")

model1_msk <- coxph(Surv(tt_pfs_d, pfs_event)~log(rum), msk_df)
model2_msk <- coxph(Surv(tt_pfs_d, pfs_event)~log(rum_lach), msk_df)
model3_msk <- coxph(Surv(tt_pfs_d, pfs_event)~log(clostridiales), msk_df)
model4_msk <- coxph(Surv(tt_pfs_d, pfs_event)~log(clostridia), msk_df)
model5_msk <- coxph(Surv(tt_pfs_d, pfs_event)~log(eval(faecalibacterium_prausnitzii+1e-4)), msk_df)
model6_msk <- coxph(Surv(tt_pfs_d, pfs_event)~log1p(pyruvate), msk_df)


models <- list(model1_msk, model2_msk, model3_msk, model4_msk, model5_msk, model6_msk)



results_df <- data.frame()


for (i in seq_along(models)) {
  model <- models[[i]]
  summary_model <- summary(model)


  hr <- summary_model$coefficients[, "exp(coef)"]
  lower_ci <- summary_model$conf.int[, "lower .95"]
  upper_ci <- summary_model$conf.int[, "upper .95"]
  pvalue <- summary_model$coefficients[, "Pr(>|z|)"]


  results_df <- rbind(results_df, data.frame(
    Model = paste0("model", i),
    HR = hr,
    Lower_CI = lower_ci,
    Upper_CI = upper_ci,
    pvalue = pvalue
  ))
}

results_df$source = rep("msk", 6)
results_df$modelname = c("log(Ruminococcaceae)", "log(Ruminococcacaeae + Lachnospiraceae)",
                             "log(Clostridiales)", "log(Clostridia)", "log(F prausnitizii + 1e-7)", "log(ACoA")


ggplot(results_df) +
  geom_point(aes(y=fct_rev(fct_reorder(modelname, HR)), x=HR)) +
  geom_errorbarh(aes(y=fct_rev(fct_reorder(modelname, HR)), xmin=Lower_CI, xmax=Upper_CI), height=0.5)+
  geom_vline(xintercept=1, lty=2, col="gray70") +
  scale_x_continuous(trans="log") +
  theme_classic() +
  labs(title = "HR (95% CI) univariable Cox",
       subtitle ="", y="Variable", x="HR")