Figure 2 - Butyrate Production Capacity from ACoA Pathway and ICB outcome

Here are the column names of the data we are plotting:

fouraminobutyrate, glutarate, lysine and pyruvate represent different parts of the butyrate synthesis pathway.

Panel A

Survival in Mixed Solid Tumor Cohort (n = 147) by ACoA pathway gene abundance in stool above/below the median.

First for overall survival (OS):

Then looking at Progression Free Survival (PFS) instead of OS:

Panel B

Restricted cubic spline for Observational cohort adjusted for age, diagnosis, and performance status identifies increasing hazard of overall mortality associated with below-median (<504 RPKM) ACoA pathway gene abundance among ICB recipients. Ticks along the x-axis reflect individual samples.

cox_os_mv_spline <- coxph(Surv(tt_os_d, event_os) ~ rms::rcs(pyruvate,4) + age10 + factor(cohort) + ecog + bmi + imputed_smoking_status, mixed_solid_tumor)
summary(cox_os_mv_spline)
Call:
coxph(formula = Surv(tt_os_d, event_os) ~ rms::rcs(pyruvate, 
    4) + age10 + factor(cohort) + ecog + bmi + imputed_smoking_status, 
    data = mixed_solid_tumor)

  n= 127, number of events= 67 
   (20 observations deleted due to missingness)

                                             coef exp(coef)  se(coef)      z
rms::rcs(pyruvate, 4)pyruvate           -0.007445  0.992582  0.002678 -2.780
rms::rcs(pyruvate, 4)pyruvate'           0.014596  1.014703  0.009261  1.576
rms::rcs(pyruvate, 4)pyruvate''         -0.030785  0.969684  0.025911 -1.188
age10                                    0.291623  1.338599  0.146378  1.992
factor(cohort)Breast                     1.465711  4.330619  1.109723  1.321
factor(cohort)Lung                       0.870819  2.388866  0.372297  2.339
factor(cohort)Melanoma                  -0.645373  0.524467  0.490361 -1.316
factor(cohort)Renal                      0.484533  1.623416  0.488579  0.992
ecog                                     0.410150  1.507044  0.255016  1.608
bmi                                      0.052437  1.053836  0.030578  1.715
imputed_smoking_statusCurrent or Former -0.580409  0.559670  0.317863 -1.826
                                        Pr(>|z|)   
rms::rcs(pyruvate, 4)pyruvate            0.00544 **
rms::rcs(pyruvate, 4)pyruvate'           0.11499   
rms::rcs(pyruvate, 4)pyruvate''          0.23478   
age10                                    0.04634 * 
factor(cohort)Breast                     0.18657   
factor(cohort)Lung                       0.01933 * 
factor(cohort)Melanoma                   0.18813   
factor(cohort)Renal                      0.32134   
ecog                                     0.10776   
bmi                                      0.08637 . 
imputed_smoking_statusCurrent or Former  0.06785 . 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

                                        exp(coef) exp(-coef) lower .95
rms::rcs(pyruvate, 4)pyruvate              0.9926     1.0075    0.9874
rms::rcs(pyruvate, 4)pyruvate'             1.0147     0.9855    0.9965
rms::rcs(pyruvate, 4)pyruvate''            0.9697     1.0313    0.9217
age10                                      1.3386     0.7470    1.0047
factor(cohort)Breast                       4.3306     0.2309    0.4920
factor(cohort)Lung                         2.3889     0.4186    1.1516
factor(cohort)Melanoma                     0.5245     1.9067    0.2006
factor(cohort)Renal                        1.6234     0.6160    0.6231
ecog                                       1.5070     0.6636    0.9142
bmi                                        1.0538     0.9489    0.9925
imputed_smoking_statusCurrent or Former    0.5597     1.7868    0.3002
                                        upper .95
rms::rcs(pyruvate, 4)pyruvate              0.9978
rms::rcs(pyruvate, 4)pyruvate'             1.0333
rms::rcs(pyruvate, 4)pyruvate''            1.0202
age10                                      1.7834
factor(cohort)Breast                      38.1195
factor(cohort)Lung                         4.9555
factor(cohort)Melanoma                     1.3712
factor(cohort)Renal                        4.2297
ecog                                       2.4843
bmi                                        1.1189
imputed_smoking_statusCurrent or Former    1.0435

Concordance= 0.677  (se = 0.036 )
Likelihood ratio test= 33.59  on 11 df,   p=4e-04
Wald test            = 30.02  on 11 df,   p=0.002
Score (logrank) test = 31.59  on 11 df,   p=9e-04
ptemp <- termplot(cox_os_mv_spline, se=T, plot=F)
buterm <- ptemp$pyruvate
center <- buterm$y[which(buterm$y == median(buterm$y))]
ytemp <- buterm$y + outer(buterm$se, c(0, -1.96, 1.96), '*')
exp_ytemp <- exp(ytemp - center)

spline_data <- data.frame(buty = buterm$x, Estimate = exp_ytemp[,1],
                          Lower = exp_ytemp[,2], Upper = exp_ytemp[,3])

ggplot(spline_data, aes(x = buty)) +
  geom_ribbon(aes(ymin = Lower, ymax = Upper), fill = "grey80", alpha = 0.5) +
  geom_line(aes(y = Estimate), color = "blue") +
  geom_rug(sides = "b") +  # Add rug plot at the bottom ('b') of the plot
  scale_y_log10() +  # Log scale for y-axis
  labs(
    title = "Spline for Overall Survival",
    x = "ACoA-pathway genes present (RPKM)",
    y = "Hazard ratio") +
  theme_minimal()

ggsave("svg/updated_fig_2_spline_with_bmi_smoking.svg", w = 3, h = 3) 
#also fit model without spline
cox_os_mv <- coxph(Surv(tt_os_d, event_os) ~ log(pyruvate) + age10 + factor(cohort) + ecog + bmi + imputed_smoking_status, mixed_solid_tumor)


cat(paste(
  "-----------------------------------------------\n\nWald p value for ACoA pathway abundance, OS pyruvate models with spline:",
  round(summary(cox_os_mv_spline)$coefficients["rms::rcs(pyruvate, 4)pyruvate", "Pr(>|z|)"],digits = 5),
  "\n\n-----------------------------------------------\n\n"
))
-----------------------------------------------

Wald p value for ACoA pathway abundance, OS pyruvate models with spline: 0.00544 

-----------------------------------------------
cat(paste(
  "-----------------------------------------------\n\nWald p value for ACoA pathway abundance, OS pyruvate continous:",
  round(summary(cox_os_mv)$coefficients["log(pyruvate)", "Pr(>|z|)"],digits = 5),
  "\n\n-----------------------------------------------\n\n"
))
-----------------------------------------------

Wald p value for ACoA pathway abundance, OS pyruvate continous: 0.00057 

-----------------------------------------------

We will now plot some microbiome data, using a phyloseq object that contains our sequencing data. This object contains a list of the taxonomy of organisms identified, the abundance in each sample, and any associated sample specific metadata in one object.

Principal components analysis (PCA) of Observational cohort sample (n = 147, 1 per patient) taxonomic abundance grouped at the family level shows increasing ACoA-pathway gene abundance from left to right along the first principal component (x-axis). Each point represents a single fecal sample. PCA loading vectors (arrows) identify increasing Ruminococcaceae and Lachnospiraceae abundance in the direction corresponding to increased ACoA-pathway gene abundance, versus greater Bacteroidales abundance (grouped at the order level) in the direction of decreased ACoA-pathway gene presence.

Panel C

First looking at how log ACOA distribution looks superimposed over the PCOA of Stool Samples:

ps %>%
  ps_filter(SAMPID %in% mixed_solid_tumor$SAMPID) %>%
  tax_fix() %>%
  tax_transform(trans = "compositional", rank = "Species") %>%
  dist_calc("aitchison") %>%
  ord_calc("PCoA") %>%
  ord_plot(fill = "log_acoa", size = 4, shape=21, alpha=0.6,
           plot_taxa = c(1:5)) +
  scale_fill_gradientn(colours=c("#2b3f76", "#f0f0f0", "#b2182b"), values=c(0, 0.5, 1)) +
  theme_classic(base_size = 16) +  
  scale_x_reverse() + 
  labs(
    fill = "Log ACOA"
  )

Next looking at Ruminococcaeceae:

rum_ps <- ps
sample_data(rum_ps)$log_rum = log10(as.numeric(sample_data(ps)$Ruminococcaceae) + 0.0001)
rum_ps %>%
  ps_filter(SAMPID %in% mixed_solid_tumor$SAMPID) %>%
  tax_fix() %>%
  tax_transform(trans = "compositional", rank = "Species") %>%
  dist_calc("aitchison") %>%
  ord_calc("PCoA") %>%
  ord_plot(fill = "log_rum", size = 4, shape=21, alpha=0.6,
           plot_taxa = c(1:5)) +
  scale_fill_gradientn(colours=c("#2b3f76", "#f0f0f0", "#b2182b"), values=c(0, 0.65, 1)) +
  theme_classic(base_size = 16) + 
  scale_x_reverse() + 
  labs(
    fill = "Log10 Rel Abund.\nRuminococcaceae"
  )

Lastly looking at Bacteroidaceae:

bac_ps <- ps
sample_data(bac_ps)$log_bac = log10(as.numeric(sample_data(ps)$Bacteroidaceae) + 0.0001)
bac_ps %>%
  ps_filter(SAMPID %in% mixed_solid_tumor$SAMPID) %>%
  tax_fix() %>%
  tax_transform(trans = "compositional", rank = "Species") %>%
  dist_calc("aitchison") %>%
  ord_calc("PCoA") %>%
  ord_plot(fill = "log_bac", size = 4, shape=21, alpha=0.6,
           plot_taxa = c(1:5)) +
  scale_fill_gradientn(colours=c("#2b3f76", "#f0f0f0", "#b2182b"), values=c(0, 0.75, 1)) +
  theme_classic(base_size = 16) + 
  scale_x_reverse() + 
  labs(
    fill = "Log10 Rel Abund.\nBacteroidaceae"
  )

Panel D

ACoA-pathway gene abundance was associated with response to ICB among patients with advanced renal cell cancer receiving ipilimumab plus nivolumab in the External Renal Cancer Cohort (n = 39).

#Figure 2D
ggplot(external_rcc, aes(x = response, y = pyruvate)) +
  geom_boxplot(width = 0.4, outlier.shape = NA, linewidth = 0.6, color = "black") +
  geom_jitter(aes(fill = response), shape = 21, width = 0.15, size = 5, stroke = 0.3, color = "black", alpha = 0.7) +
  scale_fill_manual(values = c("#999999", "#E69F00")) +
  scale_x_discrete(labels = c("Non-responder", "Responder")) +
  theme_classic(base_size = 16) +
  theme(
    legend.position = "none",
    axis.line = element_line(color = "black", linewidth = 0.5),
    axis.ticks = element_line(color = "black", linewidth = 0.5)
  ) +
  xlab("") +
  ylab("Acetyl-CoA pathway\ngene abundance (RPKM)")

wilcox.test(external_rcc$pyruvate ~ external_rcc$response)

    Wilcoxon rank sum exact test

data:  external_rcc$pyruvate by external_rcc$response
W = 36, p-value = 0.00372
alternative hypothesis: true location shift is not equal to 0
group_n <- external_rcc %>% count(response)
print(group_n)
# A tibble: 2 × 2
  response     n
  <chr>    <int>
1 N           15
2 Y           13
#adjusted for CMB588
summary(glm(response01 ~ log1p(pyruvate) + cbm588, data = external_rcc , family = binomial(link = "logit")))

Call:
glm(formula = response01 ~ log1p(pyruvate) + cbm588, family = binomial(link = "logit"), 
    data = external_rcc)

Coefficients:
                Estimate Std. Error z value Pr(>|z|)  
(Intercept)      -15.157      7.325  -2.069   0.0385 *
log1p(pyruvate)    2.491      1.160   2.148   0.0318 *
cbm588no-CBM      -1.439      1.022  -1.407   0.1593  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 38.673  on 27  degrees of freedom
Residual deviance: 26.776  on 25  degrees of freedom
AIC: 32.776

Number of Fisher Scoring iterations: 5

Panel E

Univariate Cox regression analyses PFS relationship to ACoA gene abundance in external cohort.

model1_wargo_pfs <- coxph(Surv(pfs_d, pfsevent)~test_v + age10 + adv_substage, external_melanoma_cohort %>% mutate(test_v = log(rum)))
model2_wargo_pfs <- coxph(Surv(pfs_d, pfsevent)~test_v + age10 + adv_substage, external_melanoma_cohort%>% mutate(test_v = log(rum_lach)))
model3_wargo_pfs <- coxph(Surv(pfs_d, pfsevent)~test_v + age10 + adv_substage, external_melanoma_cohort%>% mutate(test_v = log(clostridiales)))
model4_wargo_pfs <- coxph(Surv(pfs_d, pfsevent)~test_v + age10 + adv_substage, external_melanoma_cohort%>% mutate(test_v = log(clostridia)))
model5_wargo_pfs <- coxph(Surv(pfs_d, pfsevent)~test_v + age10 + adv_substage, external_melanoma_cohort%>% mutate(test_v = log(eval(faecalibacterium_prausnitzii+1e-7))))
model6_wargo_pfs <- coxph(Surv(pfs_d, pfsevent)~test_v + age10 + adv_substage, external_melanoma_cohort%>% mutate(test_v = log1p(pyruvate)))

models_external_melanoma_os <- list(model6_wargo_pfs, model1_wargo_pfs, model2_wargo_pfs, model3_wargo_pfs, model4_wargo_pfs, model5_wargo_pfs)
summary(model6_wargo_pfs)
Call:
coxph(formula = Surv(pfs_d, pfsevent) ~ test_v + age10 + adv_substage, 
    data = external_melanoma_cohort %>% mutate(test_v = log1p(pyruvate)))

  n= 114, number of events= 61 
   (43 observations deleted due to missingness)

                 coef exp(coef) se(coef)      z Pr(>|z|)    
test_v       -0.34115   0.71095  0.15696 -2.174   0.0297 *  
age10        -0.02404   0.97625  0.11894 -0.202   0.8398    
adv_substage  1.25488   3.50743  0.29953  4.189  2.8e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

             exp(coef) exp(-coef) lower .95 upper .95
test_v          0.7110     1.4066    0.5227     0.967
age10           0.9762     1.0243    0.7732     1.233
adv_substage    3.5074     0.2851    1.9500     6.309

Concordance= 0.681  (se = 0.035 )
Likelihood ratio test= 24.08  on 3 df,   p=2e-05
Wald test            = 21.44  on 3 df,   p=9e-05
Score (logrank) test = 23.49  on 3 df,   p=3e-05
results_df <- data.frame()

for (i in seq_along(models_external_melanoma_os)) {
  model <- models_external_melanoma_os[[i]]
  summary_model <- summary(model)

  hr <- round(summary_model$coefficients["test_v", "exp(coef)"], digits=2)
  lower_ci <- round(summary_model$conf.int["test_v", "lower .95"], digits=2)
  upper_ci <- round(summary_model$conf.int["test_v", "upper .95"], digits=2)
  pvalue <- round(summary_model$coefficients["test_v", "Pr(>|z|)"], digits=3)

  results_df <- rbind(results_df, data.frame(
    Model = paste0("model", i),
    HR = hr,
    Lower_CI = lower_ci,
    Upper_CI = upper_ci,
    pvalue = pvalue
  ))
}

results_df$source = rep("wargo", 6)
results_df$modelname = c("log(ACoA)", "log(Ruminococcaceae)", "log(Ruminococcacaeae + Lachnospiraceae)",
                             "log(Clostridiales)", "log(Clostridia)", "log(F prausnitizii + 1e-7)")

ggplot(results_df) +
  geom_point(aes(y=fct_rev(fct_reorder(modelname, HR)), x=HR)) +
  geom_errorbarh(aes(y=fct_rev(fct_reorder(modelname, HR)), xmin=Lower_CI, xmax=Upper_CI), height=0.5)+
  geom_vline(xintercept=1, lty=2, col="gray70") +
  scale_x_continuous(trans="log") +
  theme_classic() +
  labs(title = "HR (95% CI) multivariable Cox PFS\nExternal Melanoma Cohort",
       subtitle ="", y="Variable", x="HR")

Panel E MSK Data

Univariate Cox regression analyses PFS relationship to ACoA gene abundance in MSK Observational Data.

model1_msk_pfs <- coxph(Surv(tt_pfs_d, pfs_event)~test_v+age10+strata(cohort)+imputed_ecog+ bmi + imputed_smoking_status, mixed_solid_tumor %>% mutate(test_v = log(rum)))
model2_msk_pfs <- coxph(Surv(tt_pfs_d, pfs_event)~test_v+age10+strata(cohort)+imputed_ecog+ bmi + imputed_smoking_status, mixed_solid_tumor%>% mutate(test_v = log(rum_lach)))
model3_msk_pfs <- coxph(Surv(tt_pfs_d, pfs_event)~test_v+age10+strata(cohort)+imputed_ecog+ bmi + imputed_smoking_status, mixed_solid_tumor %>% mutate(test_v = log(clostridiales)))
model4_msk_pfs <- coxph(Surv(tt_pfs_d, pfs_event)~test_v+age10+strata(cohort)+imputed_ecog+ bmi + imputed_smoking_status, mixed_solid_tumor %>% mutate(test_v = log(clostridia)))
model5_msk_pfs <- coxph(Surv(tt_pfs_d, pfs_event)~test_v+age10+strata(cohort)+imputed_ecog+ bmi + imputed_smoking_status, mixed_solid_tumor %>% mutate(test_v = log(eval(faecalibacterium_prausnitzii+1e-4))))
model6_msk_pfs <- coxph(Surv(tt_pfs_d, pfs_event)~test_v+age10+strata(cohort)+imputed_ecog+ bmi + imputed_smoking_status, mixed_solid_tumor %>% mutate(test_v = log1p(pyruvate)))


models_mixed_solid_tumor_pfs <- list(model6_msk_pfs, model1_msk_pfs, model2_msk_pfs, model3_msk_pfs, model4_msk_pfs, model5_msk_pfs)

results_df <- data.frame()


for (i in seq_along(models_mixed_solid_tumor_pfs)) {
  model <- models_mixed_solid_tumor_pfs[[i]]
  summary_model <- summary(model)


  hr <- round(summary_model$coefficients["test_v", "exp(coef)"], digits=2)
  lower_ci <- round(summary_model$conf.int["test_v", "lower .95"], digits=2)
  upper_ci <- round(summary_model$conf.int["test_v", "upper .95"], digits=2)
  pvalue <- round(summary_model$coefficients["test_v", "Pr(>|z|)"], digits=3)


  results_df <- rbind(results_df, data.frame(
    Model = paste0("model", i),
    HR = hr,
    Lower_CI = lower_ci,
    Upper_CI = upper_ci,
    pvalue = pvalue
  ))
}

results_df$source = rep("msk", 6)
results_df$type = rep("pfs", 6)
results_df$modelname = c("log(ACoA)","log(Ruminococcaceae)", "log(Ruminococcacaeae + Lachnospiraceae)",
                             "log(Clostridiales)", "log(Clostridia)", "log(F prausnitizii + 1e-7)")


ggplot(results_df) +
  geom_point(aes(y=fct_rev(fct_reorder(modelname, HR)), x=HR)) +
  geom_errorbarh(aes(y=fct_rev(fct_reorder(modelname, HR)), xmin=Lower_CI, xmax=Upper_CI), height=0.5)+
  geom_vline(xintercept=1, lty=2, col="gray70") +
  scale_x_continuous(trans="log") +
  theme_classic() +
  labs(title = "HR (95% CI) multivariable Cox PFS\nMixed Solid Tumor Cohort",
       subtitle ="", y="Variable", x="HR")

model1_msk_os <- coxph(Surv(tt_os_d, event_os)~test_v+age10+strata(cohort)+ecog+ bmi + imputed_smoking_status, mixed_solid_tumor %>% mutate(test_v = log(rum)))
model2_msk_os <- coxph(Surv(tt_os_d, event_os)~test_v+age10+strata(cohort)+ecog+ bmi + imputed_smoking_status, mixed_solid_tumor%>% mutate(test_v = log(rum_lach)))
model3_msk_os <- coxph(Surv(tt_os_d, event_os)~test_v+age10+strata(cohort)+ecog+ bmi + imputed_smoking_status, mixed_solid_tumor %>% mutate(test_v = log(clostridiales)))
model4_msk_os <- coxph(Surv(tt_os_d, event_os)~test_v+age10+strata(cohort)+ecog+ bmi + imputed_smoking_status, mixed_solid_tumor %>% mutate(test_v = log(clostridia)))
model5_msk_os <- coxph(Surv(tt_os_d, event_os)~test_v+age10+strata(cohort)+ecog+ bmi + imputed_smoking_status, mixed_solid_tumor %>% mutate(test_v = log(eval(faecalibacterium_prausnitzii+1e-4))))
model6_msk_os <- coxph(Surv(tt_os_d, event_os)~test_v+age10+strata(cohort)+ecog+ bmi + imputed_smoking_status, mixed_solid_tumor %>% mutate(test_v = log1p(pyruvate)))


models_mixed_solid_tumor_os <- list(model6_msk_os, model1_msk_os, model2_msk_os, model3_msk_os, model4_msk_os, model5_msk_os)

results_df <- data.frame()

for (i in seq_along(models_mixed_solid_tumor_os)) {
  model <- models_mixed_solid_tumor_os[[i]]
  summary_model <- summary(model)


  hr <- round(summary_model$coefficients["test_v", "exp(coef)"], digits = 2)
  lower_ci <- round(summary_model$conf.int["test_v", "lower .95"], digits = 2)
  upper_ci <- round(summary_model$conf.int["test_v", "upper .95"], digits = 2)
  pvalue <- round(summary_model$coefficients["test_v", "Pr(>|z|)"], digits = 3)


  results_df <- rbind(results_df, data.frame(
    Model = paste0("model", i),
    HR = hr,
    Lower_CI = lower_ci,
    Upper_CI = upper_ci,
    pvalue = pvalue
  ))
}

results_df$source = rep("msk", 6)
results_df$type = rep("os", 6)
results_df$modelname = c("log(ACoA)","log(Ruminococcaceae)", "log(Ruminococcacaeae + Lachnospiraceae)",
                             "log(Clostridiales)", "log(Clostridia)", "log(F prausnitizii + 1e-7)")


ggplot(results_df) +
  geom_point(aes(y=fct_rev(fct_reorder(modelname, HR)), x=HR)) +
  geom_errorbarh(aes(y=fct_rev(fct_reorder(modelname, HR)), xmin=Lower_CI, xmax=Upper_CI), height=0.5)+
  geom_vline(xintercept=1, lty=2, col="gray70") +
  scale_x_continuous(trans="log") +
  theme_classic() +
  labs(title = "HR (95% CI) multivariable Cox OS\nMixed Solid Tumor Cohort",
       subtitle ="", y="Variable", x="HR")