Figure 2 - Butyrate Production Capacity from ACoA Pathway and ICB outcome
Here are the column names of the data we are plotting:
fouraminobutyrate, glutarate, lysine and pyruvate represent different parts of the butyrate synthesis pathway.
Panel A
Survival in Mixed Solid Tumor Cohort (n = 147) by ACoA pathway gene abundance in stool above/below the median.
First for overall survival (OS):
Then looking at Progression Free Survival (PFS) instead of OS:
Panel B
Restricted cubic spline for Observational cohort adjusted for age, diagnosis, and performance status identifies increasing hazard of overall mortality associated with below-median (<504 RPKM) ACoA pathway gene abundance among ICB recipients. Ticks along the x-axis reflect individual samples.
cox_os_mv_spline <- coxph(Surv(tt_os_d, event_os) ~ rms::rcs(pyruvate,4) + age10 + factor(cohort) + ecog + bmi + imputed_smoking_status, mixed_solid_tumor)
summary(cox_os_mv_spline)Call:
coxph(formula = Surv(tt_os_d, event_os) ~ rms::rcs(pyruvate,
4) + age10 + factor(cohort) + ecog + bmi + imputed_smoking_status,
data = mixed_solid_tumor)
n= 127, number of events= 67
(20 observations deleted due to missingness)
coef exp(coef) se(coef) z
rms::rcs(pyruvate, 4)pyruvate -0.007445 0.992582 0.002678 -2.780
rms::rcs(pyruvate, 4)pyruvate' 0.014596 1.014703 0.009261 1.576
rms::rcs(pyruvate, 4)pyruvate'' -0.030785 0.969684 0.025911 -1.188
age10 0.291623 1.338599 0.146378 1.992
factor(cohort)Breast 1.465711 4.330619 1.109723 1.321
factor(cohort)Lung 0.870819 2.388866 0.372297 2.339
factor(cohort)Melanoma -0.645373 0.524467 0.490361 -1.316
factor(cohort)Renal 0.484533 1.623416 0.488579 0.992
ecog 0.410150 1.507044 0.255016 1.608
bmi 0.052437 1.053836 0.030578 1.715
imputed_smoking_statusCurrent or Former -0.580409 0.559670 0.317863 -1.826
Pr(>|z|)
rms::rcs(pyruvate, 4)pyruvate 0.00544 **
rms::rcs(pyruvate, 4)pyruvate' 0.11499
rms::rcs(pyruvate, 4)pyruvate'' 0.23478
age10 0.04634 *
factor(cohort)Breast 0.18657
factor(cohort)Lung 0.01933 *
factor(cohort)Melanoma 0.18813
factor(cohort)Renal 0.32134
ecog 0.10776
bmi 0.08637 .
imputed_smoking_statusCurrent or Former 0.06785 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95
rms::rcs(pyruvate, 4)pyruvate 0.9926 1.0075 0.9874
rms::rcs(pyruvate, 4)pyruvate' 1.0147 0.9855 0.9965
rms::rcs(pyruvate, 4)pyruvate'' 0.9697 1.0313 0.9217
age10 1.3386 0.7470 1.0047
factor(cohort)Breast 4.3306 0.2309 0.4920
factor(cohort)Lung 2.3889 0.4186 1.1516
factor(cohort)Melanoma 0.5245 1.9067 0.2006
factor(cohort)Renal 1.6234 0.6160 0.6231
ecog 1.5070 0.6636 0.9142
bmi 1.0538 0.9489 0.9925
imputed_smoking_statusCurrent or Former 0.5597 1.7868 0.3002
upper .95
rms::rcs(pyruvate, 4)pyruvate 0.9978
rms::rcs(pyruvate, 4)pyruvate' 1.0333
rms::rcs(pyruvate, 4)pyruvate'' 1.0202
age10 1.7834
factor(cohort)Breast 38.1195
factor(cohort)Lung 4.9555
factor(cohort)Melanoma 1.3712
factor(cohort)Renal 4.2297
ecog 2.4843
bmi 1.1189
imputed_smoking_statusCurrent or Former 1.0435
Concordance= 0.677 (se = 0.036 )
Likelihood ratio test= 33.59 on 11 df, p=4e-04
Wald test = 30.02 on 11 df, p=0.002
Score (logrank) test = 31.59 on 11 df, p=9e-04
ptemp <- termplot(cox_os_mv_spline, se=T, plot=F)
buterm <- ptemp$pyruvate
center <- buterm$y[which(buterm$y == median(buterm$y))]
ytemp <- buterm$y + outer(buterm$se, c(0, -1.96, 1.96), '*')
exp_ytemp <- exp(ytemp - center)
spline_data <- data.frame(buty = buterm$x, Estimate = exp_ytemp[,1],
Lower = exp_ytemp[,2], Upper = exp_ytemp[,3])
ggplot(spline_data, aes(x = buty)) +
geom_ribbon(aes(ymin = Lower, ymax = Upper), fill = "grey80", alpha = 0.5) +
geom_line(aes(y = Estimate), color = "blue") +
geom_rug(sides = "b") + # Add rug plot at the bottom ('b') of the plot
scale_y_log10() + # Log scale for y-axis
labs(
title = "Spline for Overall Survival",
x = "ACoA-pathway genes present (RPKM)",
y = "Hazard ratio") +
theme_minimal()ggsave("svg/updated_fig_2_spline_with_bmi_smoking.svg", w = 3, h = 3) #also fit model without spline
cox_os_mv <- coxph(Surv(tt_os_d, event_os) ~ log(pyruvate) + age10 + factor(cohort) + ecog + bmi + imputed_smoking_status, mixed_solid_tumor)
cat(paste(
"-----------------------------------------------\n\nWald p value for ACoA pathway abundance, OS pyruvate models with spline:",
round(summary(cox_os_mv_spline)$coefficients["rms::rcs(pyruvate, 4)pyruvate", "Pr(>|z|)"],digits = 5),
"\n\n-----------------------------------------------\n\n"
))-----------------------------------------------
Wald p value for ACoA pathway abundance, OS pyruvate models with spline: 0.00544
-----------------------------------------------
cat(paste(
"-----------------------------------------------\n\nWald p value for ACoA pathway abundance, OS pyruvate continous:",
round(summary(cox_os_mv)$coefficients["log(pyruvate)", "Pr(>|z|)"],digits = 5),
"\n\n-----------------------------------------------\n\n"
))-----------------------------------------------
Wald p value for ACoA pathway abundance, OS pyruvate continous: 0.00057
-----------------------------------------------
We will now plot some microbiome data, using a phyloseq object that contains our sequencing data. This object contains a list of the taxonomy of organisms identified, the abundance in each sample, and any associated sample specific metadata in one object.
Principal components analysis (PCA) of Observational cohort sample (n = 147, 1 per patient) taxonomic abundance grouped at the family level shows increasing ACoA-pathway gene abundance from left to right along the first principal component (x-axis). Each point represents a single fecal sample. PCA loading vectors (arrows) identify increasing Ruminococcaceae and Lachnospiraceae abundance in the direction corresponding to increased ACoA-pathway gene abundance, versus greater Bacteroidales abundance (grouped at the order level) in the direction of decreased ACoA-pathway gene presence.
Panel C
First looking at how log ACOA distribution looks superimposed over the PCOA of Stool Samples:
ps %>%
ps_filter(SAMPID %in% mixed_solid_tumor$SAMPID) %>%
tax_fix() %>%
tax_transform(trans = "compositional", rank = "Species") %>%
dist_calc("aitchison") %>%
ord_calc("PCoA") %>%
ord_plot(fill = "log_acoa", size = 4, shape=21, alpha=0.6,
plot_taxa = c(1:5)) +
scale_fill_gradientn(colours=c("#2b3f76", "#f0f0f0", "#b2182b"), values=c(0, 0.5, 1)) +
theme_classic(base_size = 16) +
scale_x_reverse() +
labs(
fill = "Log ACOA"
)Next looking at Ruminococcaeceae:
rum_ps <- ps
sample_data(rum_ps)$log_rum = log10(as.numeric(sample_data(ps)$Ruminococcaceae) + 0.0001)
rum_ps %>%
ps_filter(SAMPID %in% mixed_solid_tumor$SAMPID) %>%
tax_fix() %>%
tax_transform(trans = "compositional", rank = "Species") %>%
dist_calc("aitchison") %>%
ord_calc("PCoA") %>%
ord_plot(fill = "log_rum", size = 4, shape=21, alpha=0.6,
plot_taxa = c(1:5)) +
scale_fill_gradientn(colours=c("#2b3f76", "#f0f0f0", "#b2182b"), values=c(0, 0.65, 1)) +
theme_classic(base_size = 16) +
scale_x_reverse() +
labs(
fill = "Log10 Rel Abund.\nRuminococcaceae"
)Lastly looking at Bacteroidaceae:
bac_ps <- ps
sample_data(bac_ps)$log_bac = log10(as.numeric(sample_data(ps)$Bacteroidaceae) + 0.0001)
bac_ps %>%
ps_filter(SAMPID %in% mixed_solid_tumor$SAMPID) %>%
tax_fix() %>%
tax_transform(trans = "compositional", rank = "Species") %>%
dist_calc("aitchison") %>%
ord_calc("PCoA") %>%
ord_plot(fill = "log_bac", size = 4, shape=21, alpha=0.6,
plot_taxa = c(1:5)) +
scale_fill_gradientn(colours=c("#2b3f76", "#f0f0f0", "#b2182b"), values=c(0, 0.75, 1)) +
theme_classic(base_size = 16) +
scale_x_reverse() +
labs(
fill = "Log10 Rel Abund.\nBacteroidaceae"
)Panel D
ACoA-pathway gene abundance was associated with response to ICB among patients with advanced renal cell cancer receiving ipilimumab plus nivolumab in the External Renal Cancer Cohort (n = 39).
#Figure 2D
ggplot(external_rcc, aes(x = response, y = pyruvate)) +
geom_boxplot(width = 0.4, outlier.shape = NA, linewidth = 0.6, color = "black") +
geom_jitter(aes(fill = response), shape = 21, width = 0.15, size = 5, stroke = 0.3, color = "black", alpha = 0.7) +
scale_fill_manual(values = c("#999999", "#E69F00")) +
scale_x_discrete(labels = c("Non-responder", "Responder")) +
theme_classic(base_size = 16) +
theme(
legend.position = "none",
axis.line = element_line(color = "black", linewidth = 0.5),
axis.ticks = element_line(color = "black", linewidth = 0.5)
) +
xlab("") +
ylab("Acetyl-CoA pathway\ngene abundance (RPKM)")wilcox.test(external_rcc$pyruvate ~ external_rcc$response)
Wilcoxon rank sum exact test
data: external_rcc$pyruvate by external_rcc$response
W = 36, p-value = 0.00372
alternative hypothesis: true location shift is not equal to 0
group_n <- external_rcc %>% count(response)
print(group_n)# A tibble: 2 × 2
response n
<chr> <int>
1 N 15
2 Y 13
#adjusted for CMB588
summary(glm(response01 ~ log1p(pyruvate) + cbm588, data = external_rcc , family = binomial(link = "logit")))
Call:
glm(formula = response01 ~ log1p(pyruvate) + cbm588, family = binomial(link = "logit"),
data = external_rcc)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -15.157 7.325 -2.069 0.0385 *
log1p(pyruvate) 2.491 1.160 2.148 0.0318 *
cbm588no-CBM -1.439 1.022 -1.407 0.1593
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 38.673 on 27 degrees of freedom
Residual deviance: 26.776 on 25 degrees of freedom
AIC: 32.776
Number of Fisher Scoring iterations: 5
Panel E
Univariate Cox regression analyses PFS relationship to ACoA gene abundance in external cohort.
model1_wargo_pfs <- coxph(Surv(pfs_d, pfsevent)~test_v + age10 + adv_substage, external_melanoma_cohort %>% mutate(test_v = log(rum)))
model2_wargo_pfs <- coxph(Surv(pfs_d, pfsevent)~test_v + age10 + adv_substage, external_melanoma_cohort%>% mutate(test_v = log(rum_lach)))
model3_wargo_pfs <- coxph(Surv(pfs_d, pfsevent)~test_v + age10 + adv_substage, external_melanoma_cohort%>% mutate(test_v = log(clostridiales)))
model4_wargo_pfs <- coxph(Surv(pfs_d, pfsevent)~test_v + age10 + adv_substage, external_melanoma_cohort%>% mutate(test_v = log(clostridia)))
model5_wargo_pfs <- coxph(Surv(pfs_d, pfsevent)~test_v + age10 + adv_substage, external_melanoma_cohort%>% mutate(test_v = log(eval(faecalibacterium_prausnitzii+1e-7))))
model6_wargo_pfs <- coxph(Surv(pfs_d, pfsevent)~test_v + age10 + adv_substage, external_melanoma_cohort%>% mutate(test_v = log1p(pyruvate)))
models_external_melanoma_os <- list(model6_wargo_pfs, model1_wargo_pfs, model2_wargo_pfs, model3_wargo_pfs, model4_wargo_pfs, model5_wargo_pfs)
summary(model6_wargo_pfs)Call:
coxph(formula = Surv(pfs_d, pfsevent) ~ test_v + age10 + adv_substage,
data = external_melanoma_cohort %>% mutate(test_v = log1p(pyruvate)))
n= 114, number of events= 61
(43 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
test_v -0.34115 0.71095 0.15696 -2.174 0.0297 *
age10 -0.02404 0.97625 0.11894 -0.202 0.8398
adv_substage 1.25488 3.50743 0.29953 4.189 2.8e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
test_v 0.7110 1.4066 0.5227 0.967
age10 0.9762 1.0243 0.7732 1.233
adv_substage 3.5074 0.2851 1.9500 6.309
Concordance= 0.681 (se = 0.035 )
Likelihood ratio test= 24.08 on 3 df, p=2e-05
Wald test = 21.44 on 3 df, p=9e-05
Score (logrank) test = 23.49 on 3 df, p=3e-05
results_df <- data.frame()
for (i in seq_along(models_external_melanoma_os)) {
model <- models_external_melanoma_os[[i]]
summary_model <- summary(model)
hr <- round(summary_model$coefficients["test_v", "exp(coef)"], digits=2)
lower_ci <- round(summary_model$conf.int["test_v", "lower .95"], digits=2)
upper_ci <- round(summary_model$conf.int["test_v", "upper .95"], digits=2)
pvalue <- round(summary_model$coefficients["test_v", "Pr(>|z|)"], digits=3)
results_df <- rbind(results_df, data.frame(
Model = paste0("model", i),
HR = hr,
Lower_CI = lower_ci,
Upper_CI = upper_ci,
pvalue = pvalue
))
}
results_df$source = rep("wargo", 6)
results_df$modelname = c("log(ACoA)", "log(Ruminococcaceae)", "log(Ruminococcacaeae + Lachnospiraceae)",
"log(Clostridiales)", "log(Clostridia)", "log(F prausnitizii + 1e-7)")
ggplot(results_df) +
geom_point(aes(y=fct_rev(fct_reorder(modelname, HR)), x=HR)) +
geom_errorbarh(aes(y=fct_rev(fct_reorder(modelname, HR)), xmin=Lower_CI, xmax=Upper_CI), height=0.5)+
geom_vline(xintercept=1, lty=2, col="gray70") +
scale_x_continuous(trans="log") +
theme_classic() +
labs(title = "HR (95% CI) multivariable Cox PFS\nExternal Melanoma Cohort",
subtitle ="", y="Variable", x="HR")Panel E MSK Data
Univariate Cox regression analyses PFS relationship to ACoA gene abundance in MSK Observational Data.
model1_msk_pfs <- coxph(Surv(tt_pfs_d, pfs_event)~test_v+age10+strata(cohort)+imputed_ecog+ bmi + imputed_smoking_status, mixed_solid_tumor %>% mutate(test_v = log(rum)))
model2_msk_pfs <- coxph(Surv(tt_pfs_d, pfs_event)~test_v+age10+strata(cohort)+imputed_ecog+ bmi + imputed_smoking_status, mixed_solid_tumor%>% mutate(test_v = log(rum_lach)))
model3_msk_pfs <- coxph(Surv(tt_pfs_d, pfs_event)~test_v+age10+strata(cohort)+imputed_ecog+ bmi + imputed_smoking_status, mixed_solid_tumor %>% mutate(test_v = log(clostridiales)))
model4_msk_pfs <- coxph(Surv(tt_pfs_d, pfs_event)~test_v+age10+strata(cohort)+imputed_ecog+ bmi + imputed_smoking_status, mixed_solid_tumor %>% mutate(test_v = log(clostridia)))
model5_msk_pfs <- coxph(Surv(tt_pfs_d, pfs_event)~test_v+age10+strata(cohort)+imputed_ecog+ bmi + imputed_smoking_status, mixed_solid_tumor %>% mutate(test_v = log(eval(faecalibacterium_prausnitzii+1e-4))))
model6_msk_pfs <- coxph(Surv(tt_pfs_d, pfs_event)~test_v+age10+strata(cohort)+imputed_ecog+ bmi + imputed_smoking_status, mixed_solid_tumor %>% mutate(test_v = log1p(pyruvate)))
models_mixed_solid_tumor_pfs <- list(model6_msk_pfs, model1_msk_pfs, model2_msk_pfs, model3_msk_pfs, model4_msk_pfs, model5_msk_pfs)
results_df <- data.frame()
for (i in seq_along(models_mixed_solid_tumor_pfs)) {
model <- models_mixed_solid_tumor_pfs[[i]]
summary_model <- summary(model)
hr <- round(summary_model$coefficients["test_v", "exp(coef)"], digits=2)
lower_ci <- round(summary_model$conf.int["test_v", "lower .95"], digits=2)
upper_ci <- round(summary_model$conf.int["test_v", "upper .95"], digits=2)
pvalue <- round(summary_model$coefficients["test_v", "Pr(>|z|)"], digits=3)
results_df <- rbind(results_df, data.frame(
Model = paste0("model", i),
HR = hr,
Lower_CI = lower_ci,
Upper_CI = upper_ci,
pvalue = pvalue
))
}
results_df$source = rep("msk", 6)
results_df$type = rep("pfs", 6)
results_df$modelname = c("log(ACoA)","log(Ruminococcaceae)", "log(Ruminococcacaeae + Lachnospiraceae)",
"log(Clostridiales)", "log(Clostridia)", "log(F prausnitizii + 1e-7)")
ggplot(results_df) +
geom_point(aes(y=fct_rev(fct_reorder(modelname, HR)), x=HR)) +
geom_errorbarh(aes(y=fct_rev(fct_reorder(modelname, HR)), xmin=Lower_CI, xmax=Upper_CI), height=0.5)+
geom_vline(xintercept=1, lty=2, col="gray70") +
scale_x_continuous(trans="log") +
theme_classic() +
labs(title = "HR (95% CI) multivariable Cox PFS\nMixed Solid Tumor Cohort",
subtitle ="", y="Variable", x="HR")model1_msk_os <- coxph(Surv(tt_os_d, event_os)~test_v+age10+strata(cohort)+ecog+ bmi + imputed_smoking_status, mixed_solid_tumor %>% mutate(test_v = log(rum)))
model2_msk_os <- coxph(Surv(tt_os_d, event_os)~test_v+age10+strata(cohort)+ecog+ bmi + imputed_smoking_status, mixed_solid_tumor%>% mutate(test_v = log(rum_lach)))
model3_msk_os <- coxph(Surv(tt_os_d, event_os)~test_v+age10+strata(cohort)+ecog+ bmi + imputed_smoking_status, mixed_solid_tumor %>% mutate(test_v = log(clostridiales)))
model4_msk_os <- coxph(Surv(tt_os_d, event_os)~test_v+age10+strata(cohort)+ecog+ bmi + imputed_smoking_status, mixed_solid_tumor %>% mutate(test_v = log(clostridia)))
model5_msk_os <- coxph(Surv(tt_os_d, event_os)~test_v+age10+strata(cohort)+ecog+ bmi + imputed_smoking_status, mixed_solid_tumor %>% mutate(test_v = log(eval(faecalibacterium_prausnitzii+1e-4))))
model6_msk_os <- coxph(Surv(tt_os_d, event_os)~test_v+age10+strata(cohort)+ecog+ bmi + imputed_smoking_status, mixed_solid_tumor %>% mutate(test_v = log1p(pyruvate)))
models_mixed_solid_tumor_os <- list(model6_msk_os, model1_msk_os, model2_msk_os, model3_msk_os, model4_msk_os, model5_msk_os)
results_df <- data.frame()
for (i in seq_along(models_mixed_solid_tumor_os)) {
model <- models_mixed_solid_tumor_os[[i]]
summary_model <- summary(model)
hr <- round(summary_model$coefficients["test_v", "exp(coef)"], digits = 2)
lower_ci <- round(summary_model$conf.int["test_v", "lower .95"], digits = 2)
upper_ci <- round(summary_model$conf.int["test_v", "upper .95"], digits = 2)
pvalue <- round(summary_model$coefficients["test_v", "Pr(>|z|)"], digits = 3)
results_df <- rbind(results_df, data.frame(
Model = paste0("model", i),
HR = hr,
Lower_CI = lower_ci,
Upper_CI = upper_ci,
pvalue = pvalue
))
}
results_df$source = rep("msk", 6)
results_df$type = rep("os", 6)
results_df$modelname = c("log(ACoA)","log(Ruminococcaceae)", "log(Ruminococcacaeae + Lachnospiraceae)",
"log(Clostridiales)", "log(Clostridia)", "log(F prausnitizii + 1e-7)")
ggplot(results_df) +
geom_point(aes(y=fct_rev(fct_reorder(modelname, HR)), x=HR)) +
geom_errorbarh(aes(y=fct_rev(fct_reorder(modelname, HR)), xmin=Lower_CI, xmax=Upper_CI), height=0.5)+
geom_vline(xintercept=1, lty=2, col="gray70") +
scale_x_continuous(trans="log") +
theme_classic() +
labs(title = "HR (95% CI) multivariable Cox OS\nMixed Solid Tumor Cohort",
subtitle ="", y="Variable", x="HR")