ps <- readRDS(paste0(data_folder, "/public_data/sequencing_data/plotting_phyloseq_deidentified.rds"))
acoa_df <- readRDS(paste0(data_folder, "/data/cleaned_data/acoa_data.rds"))
ps_family <- tax_glom(ps, taxrank = "Family")
tax_table(ps_family)[, "Family"] <- gsub("[^a-zA-Z0-9]", "_", tax_table(ps_family)[, "Family"])
family_names <- make.unique(tax_table(ps_family)[, "Family"])
taxa_names(ps_family) <- family_names
# Extract feature table (transpose to samples x features)
if (taxa_are_rows(ps_family)) {
feature_table <- as.data.frame(t(otu_table(ps_family)))
} else {
feature_table <- as.data.frame(otu_table(ps_family))
}
# Extract metadata
metadata <- sample_data(ps_family) %>%
as.matrix() %>%
as.data.frame() %>%
rownames_to_column("tmp_row_names") %>%
mutate(SAMPID = as.integer(SAMPID)) %>%
left_join(acoa_df %>% select(c(pyruvate, SAMPID)), by = "SAMPID") %>%
rename(acoa = pyruvate) %>%
column_to_rownames("tmp_row_names") # Note that the "pyruvate" generating pathway is associated with ACOA, apologies for the inconsistent names
metadata <- metadata[match(rownames(metadata), rownames(feature_table)), ]
# Check that sample names match
stopifnot(identical(rownames(feature_table), rownames(metadata)))
#output at the species level
#raw ACoA input here since log transform is included in the maaslin algorithm
fit_out <- maaslin3(input_data = feature_table,
input_metadata = metadata,
output = 'maaslin_output_family_level_abundance',
formula = '~ acoa',
normalization = 'TSS',
transform = 'LOG',
max_significance = 0.1,
median_comparison_abundance = TRUE,
warn_prevalence = FALSE,
min_abundance = 0.1,
evaluate_only = 'abundance',
max_pngs = 250,
cores = 1)Ext Data Fig 3 - Maaslin
Panel A
Results of Maaslin 3 run on MSKCC Cohort
new_metadata <- readRDS(paste0(data_folder, "/data/cleaned_data/wargo_phylo.rds")) %>%
microViz::ps_melt() %>%
mutate(acoa = pyruvate + .001) %>%
select(c(Sample, acoa)) %>%
unique() %>%
column_to_rownames("Sample") %>%
phyloseq::sample_data()
ps_wargo <- merge_phyloseq(readRDS(paste0(data_folder, "/data/cleaned_data/wargo_phylo.rds")), new_metadata)
#same for Wargo!
# Collapse taxa to the Family level
ps_wargo_f <- tax_glom(ps_wargo, taxrank = "Family")
# Optional: keep only named families
ps_wargo_f <- subset_taxa(ps_wargo_f, !is.na(Family) & Family != "")
# Replace NAs and tidy family names
tax_table(ps_wargo_f)[, "Family"] <- gsub("[^a-zA-Z0-9]", "_", tax_table(ps_wargo_f)[, "Family"])
family_names <- make.unique(tax_table(ps_wargo_f)[, "Family"])
taxa_names(ps_wargo_f) <- family_names
# Extract feature table (transpose to samples x features)
feature_table <- as.data.frame(t(otu_table(ps_wargo_f)))
if (taxa_are_rows(ps_wargo_f)) {
feature_table <- as.data.frame(t(otu_table(ps_wargo_f)))
} else {
feature_table <- as.data.frame(otu_table(ps_wargo_f))
}
# Extract metadata
metadata <- data.frame(sample_data(ps_wargo_f))
# Check that sample names match
stopifnot(identical(rownames(feature_table), rownames(metadata)))
fit_out <- maaslin3(input_data = feature_table,
input_metadata = metadata,
output = 'wargo_maaslin_output_family_level_abundance',
formula = '~ acoa',
normalization = 'TSS',
transform = 'LOG',
max_significance = 0.1,
median_comparison_abundance = TRUE,
warn_prevalence = FALSE,
min_abundance = 0.1,
evaluate_only = 'abundance',
max_pngs = 250,
cores = 1)Panel B
Results of Maaslin 3 run on the External Validation Cohort