There are two groups(gv009 and vehicle), three dosages(0mpk, 30mpk and 60mpk) and three drug batchs(201902, 201905, 201906) for this project.
comparison of treatment of different dosages of gv009 on LLC tumor mice model (201902).
comparison of treatment of different dosages of gv009 on LLC tumor mice model (201905).
comparison of treatment of different dosages of gv009 on LLC tumor mice model (201906).
Analysis detail for selected projects from project’s 16S RNA sequencing data set
Low abundance taxa were filtered out
filter <- phyloseq::genefilter_sample(phy, filterfun_sample(function(x) x >= 5))
phy_fil <- prune_taxa(filter, phy)
metat_fil <- meta(phy_fil)
phy_fil
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 934 taxa and 83 samples ]
## sample_data() Sample Data: [ 83 samples by 32 sample variables ]
## tax_table() Taxonomy Table: [ 934 taxa by 6 taxonomic ranks ]
## refseq() DNAStringSet: [ 934 reference sequences ]
phy_core <- core(phy_fil, detection = 0.0005, prevalence = .25)
metat_core <- meta(phy_core)
phy_core
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 299 taxa and 83 samples ]
## sample_data() Sample Data: [ 83 samples by 32 sample variables ]
## tax_table() Taxonomy Table: [ 299 taxa by 6 taxonomic ranks ]
## refseq() DNAStringSet: [ 299 reference sequences ]
Prevalence filter had big effect on data, only 299 taxa survived after 0.25 prevalence filtering
ps.rar <- rarefy_even_depth(phy, sample.size = min(sample_sums(phy)))
p.rar <- plot_taxa_prevalence(ps.rar, "Phylum")
p.rar
##### Raw data
par(mfrow=c(1,2),family="inconsolata")
otu_tab <- t(abundances(phy))
p1 <- vegan::rarecurve(otu_tab,
step = 100, label = T,
col = "blue", cex = 0.6,main="all taxa")
##### Raw data
otu_tab <- t(abundances(phy_core))
p2 <- vegan::rarecurve(otu_tab, ylab = NULL,main="core taxa",
step = 100, label = T,
col = "red", cex = 0.6)
Sample X17774 are obviously different from other samples, so it is removed from phyloseq and meta table.
# delete sample X17774
s1 <- rownames(subset(meta(phy), SampleID != "X17774"))
phy <- prune_samples(s1, phy)
s2 <- rownames(subset(meta(phy_core), SampleID != "X17774"))
phy_core <- prune_samples(s1, phy_core)
metat_core <- meta(phy_core)
# CLR transformed data
pca_tab <- t(otu_table(phy_core)+1)
pca = mixOmics::pca(pca_tab, ncomp = 10, center = T, scale = F,logratio = "CLR")
par(family="sans")
plot(pca)
60 mpk are more obvious and there seem no difference between 0 mpk and 30 mpk.
par(family="inconsolata")
linshi<-metat_core
Drug_batch_<-linshi$Drug_batch
Drug_batch_[is.na(Drug_batch_)] <- "water"
mixOmics::plotIndiv(pca, comp=c(1,2), group = as.factor(Drug_batch_), ind.names = F,
legend = TRUE, legend.title = "Drug_batch",
title = 'comp 1 - 2',
abline = T,
col.per.group=mycols[c(1:4)],
style = "graphics",
point.lwd = 2,
ellipse = F)
Cage effect is observed by PCA.
par(family="inconsolata")
mixOmics::plotIndiv(pca, comp=c(1,2), group = as.factor(paste(metat_core$Dosage,metat_core$Drug_batch,sep="_")), ind.names = F,
legend = TRUE, legend.title = "Dosage + Drug_batch",
title = 'comp 1 - 2',
abline = T,
col.per.group=mycols[c(1:7)],
# col = c("yellow", "yellow","yellow", "yellow","yellow", "yellow", "yellow"),
style = "graphics",
point.lwd = 2,
ellipse = F)
Drug_batch is obviously observed by PCA.
pseq.rel <- microbiome::transform(phy_core_sub, "compositional")
otu <- abundances(pseq.rel)
input_meta <- meta(pseq.rel)
input_meta$Dosage<-factor(input_meta$Dosage)
sample_data(pseq.rel) <- input_meta
# visualization landscape
p <- plot_landscape(pseq.rel, method = "NMDS", distance = "bray", col = "Dosage", size = 3)+
labs(title="NMDS/Bray_Curtis")
print(p)
ps.sel <- phy_core_sub
ps.sel@sam_data$Drug_batch <- as.factor(ps.sel@sam_data$Dosage )
# plot barplot for each taxon level
for (levels in c("Phylum","Class","Order","Family","Genus")){
pseq <- ps.sel %>%
aggregate_taxa(level = levels) %>%
microbiome::transform(transform = "compositional") %>%
aggregate_top_taxa(top = 10, levels)
p.bar <- microbiome::plot_composition(pseq, sample.sort = "Drug_batch",average_by = "Drug_batch") +
scale_fill_manual(values = mycols) +
guides(fill = guide_legend(ncol = 1)) +
ylab("Relative Abundance (%)") +
xlab("") +
guides(fill = guide_legend(levels)) +
theme_minimal() +
theme(strip.text.x = element_text(size = 10, face = "bold")) +
theme(text = element_text(size=10,family = "inconsolata")) +
theme(axis.text.x = element_text(angle = 0, size = 10, face = "bold"))
ggsave(p.bar, device="svg",
filename = paste0("output/structure/figures/composition/composition_top10_all.",levels,"" %+% drug_bash %+% ".svg"),
dpi = 300, height = 15, width = 13, units = "cm")
}
#####sPLA-DA
############################################################# sPLS-DA #########################################################
dir.create("output/spls_da_201902",recursive = T)
# first, set a grid of values to test:
grid.keepX <- c(seq(5, 50, 5))
# if you dont understand what this means, type
# grid.keepX # adjust this grid as necessary for your own data
# library(parallel)
# this chunk takes ~2 min to run
set.seed(100) # for reproducible results for this code, remove for your own code
gvri.tune.splsda <- tune.splsda(
X = X,
Y = Y,
cpus = 8,
ncomp = 3,
logratio = "CLR",
test.keepX = grid.keepX,
validation = c("Mfold"),
folds = 5,
dist = "max.dist", # prediction distance can be chosen according to tune.plsda results
nrepeat = 50,
progressBar = T
)
# saveRDS(gvri.tune.splsda, file = "gvri.tune.splsda.rds")
# may show some convergence issues for some of the cases, it is ok for tuning
plot(gvri.tune.splsda)
# optimal number of variables to select on 2 comps:
select.keepX <- gvri.tune.splsda$choice.keepX[1:3]
select.keepX
# the sPLS-DA
gvri.splsda <- splsda(X = X, Y = Y, ncomp = 3, keepX = select.keepX, logratio = "CLR")
png(filename = "output/spls_da_201902/SPLSDA_12.png", width = 8, height = 6.5, units = "in", res = 300)
plotIndiv(gvri.splsda,
comp = c(1, 2),
ind.names = F,
abline = T,
style = "graphics",
point.lwd = 2,
ellipse = TRUE,
legend = TRUE,
col.per.group = ggsci::pal_lancet("lanonc")(9)[1:nlevels(as.factor(Metadata$Dosage))],
title = "sPLS-DA comp 1-2")
dev.off()
png(filename = "output/spls_da_201902/sPLSDA_13.png", width = 8, height = 6.5, units = "in", res = 300)
plotIndiv(gvri.splsda,
comp = c(1, 3),
ind.names = F,
abline = T,
style = "graphics",
point.lwd = 2,
ellipse = TRUE,
legend = TRUE,
col.per.group = ggsci::pal_lancet("lanonc")(9)[1:nlevels(as.factor(Metadata$Dosage))],
title = "sPLS-DA comp 1-3")
dev.off()
set.seed(200) # for reproducible results for this code, remove for your own code
gvri.perf.splsda <- perf(gvri.splsda,
validation = "Mfold", folds = 5,
progressBar = FALSE, nrepeat = 50, dist = "max.dist"
)
gvri.perf.splsda$error.rate
plot(gvri.perf.splsda)
# Tax.filter<-tax_table(ps.ordered)
#############################
tax_levels<-c("Genus")#"Phylum", "Order", "class", "Family",
for (i in tax_levels) {
# i<-"Genus"
# browser()
############################
name.var <- paste(Tax.filter[, i], colnames(X), sep = "|")
#############################################n1
res.splsda.plot<-NULL
png(filename = paste0("output/spls_da_201902/",i, "_sPLSDA_comp1.png"), width = 8, height = 6.5, units = "in", res = 300)
res.splsda.plot<- plotLoadings(gvri.splsda,
comp = 1, method = "mean", contrib = "max",
legend.color = ggsci::pal_lancet("lanonc")(9)[1:nlevels(as.factor(Metadata$Dosage))], name.var = name.var,
size.title = 1.0, ndisplay = 50, size.name = .6, size.legend = 1.2 )
dev.off()
res.splsda.plot$SampleID = rownames(res.splsda.plot)
p<-NULL
n_componet<-1
p<-spda_rere_plot(res.splsda.plot,Tax.filter,paste0('n',n_componet),i)#n_componet=2;i=Genus;
# p
ggsave(plot= p, filename = paste0("output/spls_da_201902/splda_",i,"_plot_n",n_componet,".png"), dpi = 300, height = 10, width = 10, units = "in")
#
write.table(res.splsda.plot, file = paste0("output/spls_da_201902/splda_",i,"_plot_n",n_componet,".xls"),sep = "\t", row.names = F,quote = F)
#############################################n2
res.splsda.plot<-NULL
png(filename = paste0("output/spls_da_201902/",i, "_sPLSDA_comp2.png"), width = 8, height = 6.5, units = "in", res = 300)
res.splsda.plot<- plotLoadings(gvri.splsda,
comp = 2, method = "mean", contrib = "max",
legend.color = ggsci::pal_lancet("lanonc")(9)[1:nlevels(as.factor(Metadata$Dosage))], name.var = name.var,
size.title = 1.0, ndisplay = 50, size.name = .6, size.legend = 1.2)
dev.off()
res.splsda.plot$SampleID = rownames(res.splsda.plot)
p<-NULL
n_componet<-2
p<-spda_rere_plot(res.splsda.plot,Tax.filter,paste0('n',n_componet),i)#n_componet=2;i=Genus;
# p
ggsave(plot= p, filename = paste0("output/spls_da_201902/splda_",i,"_plot_n",n_componet,".png"), dpi = 300, height = 10, width = 10, units = "in")
#
write.table(res.splsda.plot, file = paste0("output/spls_da_201902/splda_",i,"_plot_n",n_componet,".xls"),sep = "\t", row.names = F,quote = F)
#############################################n3
res.splsda.plot<-NULL
png(filename = paste0("output/spls_da_201902/",i, "_sPLSDA_comp3.png"), width = 8, height = 6.5, units = "in", res = 300)
res.splsda.plot<- plotLoadings(gvri.splsda,
comp = 3, method = "mean", contrib = "max",
legend.color = ggsci::pal_lancet("lanonc")(9)[1:nlevels(as.factor(Metadata$Dosage))], name.var = name.var,
size.title = 1.0, ndisplay = 50, size.name = .6, size.legend = 1.2)
dev.off()
res.splsda.plot$SampleID = rownames(res.splsda.plot)
p<-NULL
n_componet<-3
p<-spda_rere_plot(res.splsda.plot,Tax.filter,paste0('n',n_componet),i)#n_componet=2;i=Genus;
# p
ggsave(plot= p, filename = paste0("output/spls_da_201902/splda_",i,"_plot_n",n_componet,".png"), dpi = 300, height = 10, width = 10, units = "in")
#
write.table(res.splsda.plot, file = paste0("output/spls_da_201902/splda_",i,"_plot_n",n_componet,".xls"),sep = "\t", row.names = F,quote = F)
}
###############
####### redefine a color palette to make it consistent with the above color palette
color.gvri <- function(num.vector) {
if (is.factor(num.vector)) {
num.vector <- as.numeric(num.vector)
}
if (!is.numeric(num.vector)) {
stop(paste("num.vector has to be numeric", call. = FALSE))
}
##### lancet color palette
mixo.col <- c("#00468BFF", "#ED0000FF", "#42B540FF", "#0099B4FF", "#925E9FFF", "#FDAF91FF", "#AD002AFF", "#ADB6B6FF", "#1B1919FF")
n <- length(num.vector)
if (isTRUE(num.vector) > length(mixo.col)) {
stop(paste(
"We only have a few mix.colors available, n <= ",
length(mixo.col)
), call. = FALSE)
}
if (isTRUE(!is.finite((num.vector))) || (n < 1)) {
stop("'num.vector' must be an integer vector with positive values.",
call. = FALSE
)
}
return(mixo.col[num.vector])
}
##################### Heatmap on outputs of sPLS-DA
png(filename = "output/spls_da_201902/Heatmap_comp1_comp2.png", width = 12, height = 7.5, units = "in", res = 300)
cim(gvri.splsda,
row.sideColors = color.gvri(Y),
cluster = "column",
row.names = F,
comp = c(1, 2), ## please make sure you choose the right number of components, or keep all components by unticking this line
margins = c(5, 6),
# color = colorRampPalette(c("#009E73","white","#E69F00"))(200),
color = colorRampPalette(c("#0D8CFF", "#43A6FF", "#5EB3FF", "#93CCFF", "#FFFFFF", "#FFBBAA", "#FF7755", "#FF6039", "#FF3300"))(100) )
dev.off()
png(filename = "output/spls_da_201902/Heatmap_comp1_comp3.png", width = 12, height = 7.5, units = "in", res = 300)
cim(gvri.splsda,
row.sideColors = color.gvri(Y),
cluster = "column",
row.names = F,
comp = c(1, 3), ## please make sure you choose the right number of components, or keep all components by unticking this line
margins = c(5, 6),
# color = viridis::viridis(1000, option = "D") ## change the palette
# color = colorRampPalette(c("#009E73","white","#E69F00"))(20),
color = colorRampPalette(c("#0D8CFF", "#43A6FF", "#5EB3FF", "#93CCFF", "#FFFFFF", "#FFBBAA", "#FF7755", "#FF6039", "#FF3300"))(100) )
dev.off()
png(filename = "output/spls_da_201902/Heatmap_comp1_comp2_comp3.png", width = 12, height = 7.5, units = "in", res = 300)
cim(gvri.splsda,
row.sideColors = color.gvri(Y),
cluster = "column",
# comp = c(1,2), ## please make sure you choose the right number of components, or keep all components by unticking this line
margins = c(5, 6),
row.names = F,
# color = viridis::viridis(1000, option = "D") ## change the palette
# color = c( "#009E73","white","#E69F00"),
# color = colorRampPalette(c("#009E73","white","#E69F00"))(20),
# color = colorRampPalette(c("navy", "white", "firebrick3"))(100)
color = colorRampPalette(c("#0D8CFF", "#43A6FF", "#5EB3FF", "#93CCFF", "#FFFFFF", "#FFBBAA", "#FF7755", "#FF6039", "#FF3300"))(100) )
dev.off()
In our case, We split our data for pairwise comparison
lefse_func(group1 = "0", group2 = "30", phy_new =phy_core_sub, ref = "0", group = "Dosage", batch = "201902")
There are significant difference in groups.
pseq.rel <- microbiome::transform(phy_core_sub, "compositional")
otu <- abundances(pseq.rel)
input_meta <- meta(pseq.rel)
input_meta$Dosage<-factor(input_meta$Dosage)
sample_data(pseq.rel) <- input_meta
# visualization landscape
p <- plot_landscape(pseq.rel, method = "NMDS", distance = "bray", col = "Dosage", size = 3)+
labs(title="NMDS/Bray_Curtis")
print(p)
ps.sel <- phy_core_sub
ps.sel@sam_data$Drug_batch <- as.factor(ps.sel@sam_data$Dosage )
# plot barplot for each taxon level
for (levels in c("Phylum","Class","Order","Family","Genus")){
pseq <- ps.sel %>%
aggregate_taxa(level = levels) %>%
microbiome::transform(transform = "compositional") %>%
aggregate_top_taxa(top = 10, levels)
p.bar <- microbiome::plot_composition(pseq, sample.sort = "Drug_batch",average_by = "Drug_batch") +
scale_fill_manual(values = mycols) +
guides(fill = guide_legend(ncol = 1)) +
ylab("Relative Abundance (%)") +
xlab("") +
guides(fill = guide_legend(levels)) +
theme_minimal() +
theme(strip.text.x = element_text(size = 10, face = "bold")) +
theme(text = element_text(size=10,family = "inconsolata")) +
theme(axis.text.x = element_text(angle = 0, size = 10, face = "bold"))
ggsave(p.bar, device="svg",
filename = paste0("output/structure/figures/composition/composition_top10_all.",levels,"" %+% drug_bash %+% ".svg"),
dpi = 300, height = 15, width = 13, units = "cm")
}
#####sPLA-DA
############################################################# sPLS-DA #########################################################
dir.create("output/spls_da_201905",recursive = T)
# first, set a grid of values to test:
grid.keepX <- c(seq(5, 50, 5))
# if you dont understand what this means, type
# grid.keepX # adjust this grid as necessary for your own data
# library(parallel)
# this chunk takes ~2 min to run
set.seed(100) # for reproducible results for this code, remove for your own code
gvri.tune.splsda <- tune.splsda(
X = X,
Y = Y,
cpus = 8,
ncomp = 3,
logratio = "CLR",
test.keepX = grid.keepX,
validation = c("Mfold"),
folds = 5,
dist = "max.dist", # prediction distance can be chosen according to tune.plsda results
nrepeat = 50,
progressBar = T
)
# saveRDS(gvri.tune.splsda, file = "gvri.tune.splsda.rds")
# may show some convergence issues for some of the cases, it is ok for tuning
plot(gvri.tune.splsda)
# optimal number of variables to select on 2 comps:
select.keepX <- gvri.tune.splsda$choice.keepX[1:3]
select.keepX
# the sPLS-DA
gvri.splsda <- splsda(X = X, Y = Y, ncomp = 3, keepX = select.keepX, logratio = "CLR")
png(filename = "output/spls_da_201905/SPLSDA_12.png", width = 8, height = 6.5, units = "in", res = 300)
plotIndiv(gvri.splsda,
comp = c(1, 2),
ind.names = F,
abline = T,
style = "graphics",
point.lwd = 2,
ellipse = TRUE,
legend = TRUE,
col.per.group = ggsci::pal_lancet("lanonc")(9)[1:nlevels(as.factor(Metadata$Dosage))],
title = "sPLS-DA comp 1-2")
dev.off()
png(filename = "output/spls_da_201905/sPLSDA_13.png", width = 8, height = 6.5, units = "in", res = 300)
plotIndiv(gvri.splsda,
comp = c(1, 3),
ind.names = F,
abline = T,
style = "graphics",
point.lwd = 2,
ellipse = TRUE,
legend = TRUE,
col.per.group = ggsci::pal_lancet("lanonc")(9)[1:nlevels(as.factor(Metadata$Dosage))],
title = "sPLS-DA comp 1-3")
dev.off()
set.seed(200) # for reproducible results for this code, remove for your own code
gvri.perf.splsda <- perf(gvri.splsda,
validation = "Mfold", folds = 5,
progressBar = FALSE, nrepeat = 50, dist = "max.dist"
)
gvri.perf.splsda$error.rate
plot(gvri.perf.splsda)
# Tax.filter<-tax_table(ps.ordered)
#############################
tax_levels<-c("Genus")#"Phylum", "Order", "class", "Family",
for (i in tax_levels) {
# i<-"Genus"
# browser()
############################
name.var <- paste(Tax.filter[, i], colnames(X), sep = "|")
#############################################n1
res.splsda.plot<-NULL
png(filename = paste0("output/spls_da_201905/",i, "_sPLSDA_comp1.png"), width = 8, height = 6.5, units = "in", res = 300)
res.splsda.plot<- plotLoadings(gvri.splsda,
comp = 1, method = "mean", contrib = "max",
legend.color = ggsci::pal_lancet("lanonc")(9)[1:nlevels(as.factor(Metadata$Dosage))], name.var = name.var,
size.title = 1.0, ndisplay = 50, size.name = .6, size.legend = 1.2 )
dev.off()
res.splsda.plot$SampleID = rownames(res.splsda.plot)
p<-NULL
n_componet<-1
p<-spda_rere_plot(res.splsda.plot,Tax.filter,paste0('n',n_componet),i)#n_componet=2;i=Genus;
# p
ggsave(plot= p, filename = paste0("output/spls_da_201905/splda_",i,"_plot_n",n_componet,".png"), dpi = 300, height = 10, width = 10, units = "in")
#
write.table(res.splsda.plot, file = paste0("output/spls_da_201905/splda_",i,"_plot_n",n_componet,".xls"),sep = "\t", row.names = F,quote = F)
#############################################n2
res.splsda.plot<-NULL
png(filename = paste0("output/spls_da_201905/",i, "_sPLSDA_comp2.png"), width = 8, height = 6.5, units = "in", res = 300)
res.splsda.plot<- plotLoadings(gvri.splsda,
comp = 2, method = "mean", contrib = "max",
legend.color = ggsci::pal_lancet("lanonc")(9)[1:nlevels(as.factor(Metadata$Dosage))], name.var = name.var,
size.title = 1.0, ndisplay = 50, size.name = .6, size.legend = 1.2)
dev.off()
res.splsda.plot$SampleID = rownames(res.splsda.plot)
p<-NULL
n_componet<-2
p<-spda_rere_plot(res.splsda.plot,Tax.filter,paste0('n',n_componet),i)#n_componet=2;i=Genus;
# p
ggsave(plot= p, filename = paste0("output/spls_da_201905/splda_",i,"_plot_n",n_componet,".png"), dpi = 300, height = 10, width = 10, units = "in")
#
write.table(res.splsda.plot, file = paste0("output/spls_da_201905/splda_",i,"_plot_n",n_componet,".xls"),sep = "\t", row.names = F,quote = F)
#############################################n3
res.splsda.plot<-NULL
png(filename = paste0("output/spls_da_201905/",i, "_sPLSDA_comp3.png"), width = 8, height = 6.5, units = "in", res = 300)
res.splsda.plot<- plotLoadings(gvri.splsda,
comp = 3, method = "mean", contrib = "max",
legend.color = ggsci::pal_lancet("lanonc")(9)[1:nlevels(as.factor(Metadata$Dosage))], name.var = name.var,
size.title = 1.0, ndisplay = 50, size.name = .6, size.legend = 1.2)
dev.off()
res.splsda.plot$SampleID = rownames(res.splsda.plot)
p<-NULL
n_componet<-3
p<-spda_rere_plot(res.splsda.plot,Tax.filter,paste0('n',n_componet),i)#n_componet=2;i=Genus;
# p
ggsave(plot= p, filename = paste0("output/spls_da_201905/splda_",i,"_plot_n",n_componet,".png"), dpi = 300, height = 10, width = 10, units = "in")
#
write.table(res.splsda.plot, file = paste0("output/spls_da_201905/splda_",i,"_plot_n",n_componet,".xls"),sep = "\t", row.names = F,quote = F)
}
###############
####### redefine a color palette to make it consistent with the above color palette
color.gvri <- function(num.vector) {
if (is.factor(num.vector)) {
num.vector <- as.numeric(num.vector)
}
if (!is.numeric(num.vector)) {
stop(paste("num.vector has to be numeric", call. = FALSE))
}
##### lancet color palette
mixo.col <- c("#00468BFF", "#ED0000FF", "#42B540FF", "#0099B4FF", "#925E9FFF", "#FDAF91FF", "#AD002AFF", "#ADB6B6FF", "#1B1919FF")
n <- length(num.vector)
if (isTRUE(num.vector) > length(mixo.col)) {
stop(paste(
"We only have a few mix.colors available, n <= ",
length(mixo.col)
), call. = FALSE)
}
if (isTRUE(!is.finite((num.vector))) || (n < 1)) {
stop("'num.vector' must be an integer vector with positive values.",
call. = FALSE
)
}
return(mixo.col[num.vector])
}
##################### Heatmap on outputs of sPLS-DA
png(filename = "output/spls_da_201905/Heatmap_comp1_comp2.png", width = 12, height = 7.5, units = "in", res = 300)
cim(gvri.splsda,
row.sideColors = color.gvri(Y),
cluster = "column",
row.names = F,
comp = c(1, 2), ## please make sure you choose the right number of components, or keep all components by unticking this line
margins = c(5, 6),
# color = colorRampPalette(c("#009E73","white","#E69F00"))(200),
color = colorRampPalette(c("#0D8CFF", "#43A6FF", "#5EB3FF", "#93CCFF", "#FFFFFF", "#FFBBAA", "#FF7755", "#FF6039", "#FF3300"))(100) )
dev.off()
png(filename = "output/spls_da_201905/Heatmap_comp1_comp3.png", width = 12, height = 7.5, units = "in", res = 300)
cim(gvri.splsda,
row.sideColors = color.gvri(Y),
cluster = "column",
row.names = F,
comp = c(1, 3), ## please make sure you choose the right number of components, or keep all components by unticking this line
margins = c(5, 6),
# color = viridis::viridis(1000, option = "D") ## change the palette
# color = colorRampPalette(c("#009E73","white","#E69F00"))(20),
color = colorRampPalette(c("#0D8CFF", "#43A6FF", "#5EB3FF", "#93CCFF", "#FFFFFF", "#FFBBAA", "#FF7755", "#FF6039", "#FF3300"))(100) )
dev.off()
png(filename = "output/spls_da_201905/Heatmap_comp1_comp2_comp3.png", width = 12, height = 7.5, units = "in", res = 300)
cim(gvri.splsda,
row.sideColors = color.gvri(Y),
cluster = "column",
# comp = c(1,2), ## please make sure you choose the right number of components, or keep all components by unticking this line
margins = c(5, 6),
row.names = F,
# color = viridis::viridis(1000, option = "D") ## change the palette
# color = c( "#009E73","white","#E69F00"),
# color = colorRampPalette(c("#009E73","white","#E69F00"))(20),
# color = colorRampPalette(c("navy", "white", "firebrick3"))(100)
color = colorRampPalette(c("#0D8CFF", "#43A6FF", "#5EB3FF", "#93CCFF", "#FFFFFF", "#FFBBAA", "#FF7755", "#FF6039", "#FF3300"))(100) )
dev.off()
In our case, We split our data for pairwise comparison
lefse_func(group1 = "0", group2 = "30", phy_new =phy_core_sub, ref = "0", group = "Dosage", batch = "201905")
There are significant difference in groups.
pseq.rel <- microbiome::transform(phy_core_sub, "compositional")
otu <- abundances(pseq.rel)
input_meta <- meta(pseq.rel)
input_meta$Dosage<-factor(input_meta$Dosage)
sample_data(pseq.rel) <- input_meta
# visualization landscape
p <- plot_landscape(pseq.rel, method = "NMDS", distance = "bray", col = "Dosage", size = 3)+
labs(title="NMDS/Bray_Curtis")
print(p)
ps.sel <- phy_core_sub
ps.sel@sam_data$Drug_batch <- as.factor(ps.sel@sam_data$Dosage )
# plot barplot for each taxon level
for (levels in c("Phylum","Class","Order","Family","Genus")){
pseq <- ps.sel %>%
aggregate_taxa(level = levels) %>%
microbiome::transform(transform = "compositional") %>%
aggregate_top_taxa(top = 10, levels)
p.bar <- microbiome::plot_composition(pseq, sample.sort = "Drug_batch",average_by = "Drug_batch") +
scale_fill_manual(values = mycols) +
guides(fill = guide_legend(ncol = 1)) +
ylab("Relative Abundance (%)") +
xlab("") +
guides(fill = guide_legend(levels)) +
theme_minimal() +
theme(strip.text.x = element_text(size = 10, face = "bold")) +
theme(text = element_text(size=10,family = "inconsolata")) +
theme(axis.text.x = element_text(angle = 0, size = 10, face = "bold"))
ggsave(p.bar, device="svg",
filename = paste0("output/structure/figures/composition/composition_top10_all.",levels,"" %+% drug_bash %+% ".svg"),
dpi = 300, height = 15, width = 13, units = "cm")
}
#####sPLA-DA
############################################################# sPLS-DA #########################################################
dir.create("output/spls_da_201906",recursive = T)
# first, set a grid of values to test:
grid.keepX <- c(seq(5, 50, 5))
# if you dont understand what this means, type
# grid.keepX # adjust this grid as necessary for your own data
# library(parallel)
# this chunk takes ~2 min to run
set.seed(100) # for reproducible results for this code, remove for your own code
gvri.tune.splsda <- tune.splsda(
X = X,
Y = Y,
cpus = 8,
ncomp = 3,
logratio = "CLR",
test.keepX = grid.keepX,
validation = c("Mfold"),
folds = 5,
dist = "max.dist", # prediction distance can be chosen according to tune.plsda results
nrepeat = 50,
progressBar = T
)
# saveRDS(gvri.tune.splsda, file = "gvri.tune.splsda.rds")
# may show some convergence issues for some of the cases, it is ok for tuning
plot(gvri.tune.splsda)
# optimal number of variables to select on 2 comps:
select.keepX <- gvri.tune.splsda$choice.keepX[1:3]
select.keepX
# the sPLS-DA
gvri.splsda <- splsda(X = X, Y = Y, ncomp = 3, keepX = select.keepX, logratio = "CLR")
png(filename = "output/spls_da_201906/SPLSDA_12.png", width = 8, height = 6.5, units = "in", res = 300)
plotIndiv(gvri.splsda,
comp = c(1, 2),
ind.names = F,
abline = T,
style = "graphics",
point.lwd = 2,
ellipse = TRUE,
legend = TRUE,
col.per.group = ggsci::pal_lancet("lanonc")(9)[1:nlevels(as.factor(Metadata$Dosage))],
title = "sPLS-DA comp 1-2")
dev.off()
png(filename = "output/spls_da_201906/sPLSDA_13.png", width = 8, height = 6.5, units = "in", res = 300)
plotIndiv(gvri.splsda,
comp = c(1, 3),
ind.names = F,
abline = T,
style = "graphics",
point.lwd = 2,
ellipse = TRUE,
legend = TRUE,
col.per.group = ggsci::pal_lancet("lanonc")(9)[1:nlevels(as.factor(Metadata$Dosage))],
title = "sPLS-DA comp 1-3")
dev.off()
set.seed(200) # for reproducible results for this code, remove for your own code
gvri.perf.splsda <- perf(gvri.splsda,
validation = "Mfold", folds = 5,
progressBar = FALSE, nrepeat = 50, dist = "max.dist"
)
gvri.perf.splsda$error.rate
plot(gvri.perf.splsda)
# Tax.filter<-tax_table(ps.ordered)
#############################
tax_levels<-c("Genus")#"Phylum", "Order", "class", "Family",
for (i in tax_levels) {
# i<-"Genus"
# browser()
############################
name.var <- paste(Tax.filter[, i], colnames(X), sep = "|")
#############################################n1
res.splsda.plot<-NULL
png(filename = paste0("output/spls_da_201906/",i, "_sPLSDA_comp1.png"), width = 8, height = 6.5, units = "in", res = 300)
res.splsda.plot<- plotLoadings(gvri.splsda,
comp = 1, method = "mean", contrib = "max",
legend.color = ggsci::pal_lancet("lanonc")(9)[1:nlevels(as.factor(Metadata$Dosage))], name.var = name.var,
size.title = 1.0, ndisplay = 50, size.name = .6, size.legend = 1.2 )
dev.off()
res.splsda.plot$SampleID = rownames(res.splsda.plot)
p<-NULL
n_componet<-1
p<-spda_rere_plot(res.splsda.plot,Tax.filter,paste0('n',n_componet),i)#n_componet=2;i=Genus;
# p
ggsave(plot= p, filename = paste0("output/spls_da_201906/splda_",i,"_plot_n",n_componet,".png"), dpi = 300, height = 10, width = 10, units = "in")
#
write.table(res.splsda.plot, file = paste0("output/spls_da_201906/splda_",i,"_plot_n",n_componet,".xls"),sep = "\t", row.names = F,quote = F)
#############################################n2
res.splsda.plot<-NULL
png(filename = paste0("output/spls_da_201906/",i, "_sPLSDA_comp2.png"), width = 8, height = 6.5, units = "in", res = 300)
res.splsda.plot<- plotLoadings(gvri.splsda,
comp = 2, method = "mean", contrib = "max",
legend.color = ggsci::pal_lancet("lanonc")(9)[1:nlevels(as.factor(Metadata$Dosage))], name.var = name.var,
size.title = 1.0, ndisplay = 50, size.name = .6, size.legend = 1.2)
dev.off()
res.splsda.plot$SampleID = rownames(res.splsda.plot)
p<-NULL
n_componet<-2
p<-spda_rere_plot(res.splsda.plot,Tax.filter,paste0('n',n_componet),i)#n_componet=2;i=Genus;
# p
ggsave(plot= p, filename = paste0("output/spls_da_201906/splda_",i,"_plot_n",n_componet,".png"), dpi = 300, height = 10, width = 10, units = "in")
#
write.table(res.splsda.plot, file = paste0("output/spls_da_201906/splda_",i,"_plot_n",n_componet,".xls"),sep = "\t", row.names = F,quote = F)
#############################################n3
res.splsda.plot<-NULL
png(filename = paste0("output/spls_da_201906/",i, "_sPLSDA_comp3.png"), width = 8, height = 6.5, units = "in", res = 300)
res.splsda.plot<- plotLoadings(gvri.splsda,
comp = 3, method = "mean", contrib = "max",
legend.color = ggsci::pal_lancet("lanonc")(9)[1:nlevels(as.factor(Metadata$Dosage))], name.var = name.var,
size.title = 1.0, ndisplay = 50, size.name = .6, size.legend = 1.2)
dev.off()
res.splsda.plot$SampleID = rownames(res.splsda.plot)
p<-NULL
n_componet<-3
p<-spda_rere_plot(res.splsda.plot,Tax.filter,paste0('n',n_componet),i)#n_componet=2;i=Genus;
# p
ggsave(plot= p, filename = paste0("output/spls_da_201906/splda_",i,"_plot_n",n_componet,".png"), dpi = 300, height = 10, width = 10, units = "in")
#
write.table(res.splsda.plot, file = paste0("output/spls_da_201906/splda_",i,"_plot_n",n_componet,".xls"),sep = "\t", row.names = F,quote = F)
}
###############
####### redefine a color palette to make it consistent with the above color palette
color.gvri <- function(num.vector) {
if (is.factor(num.vector)) {
num.vector <- as.numeric(num.vector)
}
if (!is.numeric(num.vector)) {
stop(paste("num.vector has to be numeric", call. = FALSE))
}
##### lancet color palette
mixo.col <- c("#00468BFF", "#ED0000FF", "#42B540FF", "#0099B4FF", "#925E9FFF", "#FDAF91FF", "#AD002AFF", "#ADB6B6FF", "#1B1919FF")
n <- length(num.vector)
if (isTRUE(num.vector) > length(mixo.col)) {
stop(paste(
"We only have a few mix.colors available, n <= ",
length(mixo.col)
), call. = FALSE)
}
if (isTRUE(!is.finite((num.vector))) || (n < 1)) {
stop("'num.vector' must be an integer vector with positive values.",
call. = FALSE
)
}
return(mixo.col[num.vector])
}
##################### Heatmap on outputs of sPLS-DA
png(filename = "output/spls_da_201906/Heatmap_comp1_comp2.png", width = 12, height = 7.5, units = "in", res = 300)
cim(gvri.splsda,
row.sideColors = color.gvri(Y),
cluster = "column",
row.names = F,
comp = c(1, 2), ## please make sure you choose the right number of components, or keep all components by unticking this line
margins = c(5, 6),
# color = colorRampPalette(c("#009E73","white","#E69F00"))(200),
color = colorRampPalette(c("#0D8CFF", "#43A6FF", "#5EB3FF", "#93CCFF", "#FFFFFF", "#FFBBAA", "#FF7755", "#FF6039", "#FF3300"))(100) )
dev.off()
png(filename = "output/spls_da_201906/Heatmap_comp1_comp3.png", width = 12, height = 7.5, units = "in", res = 300)
cim(gvri.splsda,
row.sideColors = color.gvri(Y),
cluster = "column",
row.names = F,
comp = c(1, 3), ## please make sure you choose the right number of components, or keep all components by unticking this line
margins = c(5, 6),
# color = viridis::viridis(1000, option = "D") ## change the palette
# color = colorRampPalette(c("#009E73","white","#E69F00"))(20),
color = colorRampPalette(c("#0D8CFF", "#43A6FF", "#5EB3FF", "#93CCFF", "#FFFFFF", "#FFBBAA", "#FF7755", "#FF6039", "#FF3300"))(100) )
dev.off()
png(filename = "output/spls_da_201906/Heatmap_comp1_comp2_comp3.png", width = 12, height = 7.5, units = "in", res = 300)
cim(gvri.splsda,
row.sideColors = color.gvri(Y),
cluster = "column",
# comp = c(1,2), ## please make sure you choose the right number of components, or keep all components by unticking this line
margins = c(5, 6),
row.names = F,
# color = viridis::viridis(1000, option = "D") ## change the palette
# color = c( "#009E73","white","#E69F00"),
# color = colorRampPalette(c("#009E73","white","#E69F00"))(20),
# color = colorRampPalette(c("navy", "white", "firebrick3"))(100)
color = colorRampPalette(c("#0D8CFF", "#43A6FF", "#5EB3FF", "#93CCFF", "#FFFFFF", "#FFBBAA", "#FF7755", "#FF6039", "#FF3300"))(100) )
dev.off()
In our case, We split our data for pairwise comparison
lefse_func(group1 = "0", group2 = "30", phy_new =phy_core_sub, ref = "0", group = "Dosage", batch = "201906")
There are significant difference in groups.