PCOA
2021-3-11 15:27
阅读:7508
#载入绘图所需的包 library(vegan) library(ape) library(ggplot2) library(ggrepel) #分组数较少、组内生物学重复较多 data <- read.csv("otu.txt", head=TRUE,sep="\t",row.names = 1) #载入分组文件 groups <- read.table("group.txt",sep = "\t",header = F,colClasses = c("character")) #将分组文件转换为列表 groups <- as.list(groups) #将数据转置、去除缺失值 data <- t(data) data[is.na(data)] <- 0 #计算Bray-Curtis距离 data <- vegdist(data,method = "bray") #进行PCoA分析 pcoa<- pcoa(data, correction = "none", rn = NULL) #制作绘图文件 PC1 = pcoa$vectors[,1] PC2 = pcoa$vectors[,2] plotdata <- data.frame(rownames(pcoa$vectors),PC1,PC2,groups$V2) colnames(plotdata) <-c("sample","PC1","PC2","group") pich=c(21:24) #制作绘图参数的赋值向量。 #用于填充样本点的颜色 cbbPalette <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442") #样本点的边框颜色 Palette <- c("#000000", "#000000", "#000000", "#000000") #用于绘制横纵坐标label的文本,以显示解释比例 pc1 <-floor(pcoa$values$Relative_eig[1]*100) pc2 <-floor(pcoa$values$Relative_eig[2]*100) otu.adonis=adonis(data~V2,data = groups,distance = "bray") #匹配横纵坐标 ggplot(plotdata, aes(PC1, PC2)) + #绘制样本点,根据分组匹配颜色和形状,size调整点的大小 geom_point(aes(colour=group,shape=group,fill=group),size=12)+ geom_text(aes(x = 0.05,y = 0.35,label = paste("PERMANOVA:\n Group_A VS Group_B\n p-value = ",otu.adonis$aov.tab$`Pr(>F)`[1],sep = "")), size = 10,hjust = 0)+ stat_ellipse(aes(fill = group),geom = "polygon",level = 0.95,alpha = 0.3)+ #匹配形状、边框和填充的图例 scale_shape_manual(values=pich)+ scale_colour_manual(values=Palette)+ scale_fill_manual(values=cbbPalette)+ #设置标题和横纵坐标label文字 labs(title="PCoA - The composition of gut microbiome") + xlab(paste("PC1 ( ",pc1,"%"," )",sep="")) + ylab(paste("PC2 ( ",pc2,"%"," )",sep=""))+ theme(text=element_text(size=30))+ #添加横纵两条虚线 geom_vline(aes(xintercept = 0),linetype="dotted")+ geom_hline(aes(yintercept = 0),linetype="dotted")+ #调整背景、坐标轴、图例的格式 theme(panel.background = element_rect(fill='white', colour='black'), panel.grid=element_blank(), axis.title = element_text(color='black',size=34), axis.ticks.length = unit(0.4,"lines"), axis.ticks = element_line(color='black'), axis.line = element_line(colour = "black"), axis.title.x=element_text(colour='black', size=34), axis.title.y=element_text(colour='black', size=34), axis.text=element_text(colour='black',size=28), legend.title=element_blank(), legend.text=element_text(size=34), legend.key=element_blank(),legend.position = c(0.12,0.88), legend.background = element_rect(colour = "black"), legend.key.height=unit(1.6,"cm"))+ #设置标题的格式 theme(plot.title = element_text(size=34,colour = "black",hjust = 0.5,face = "bold"))
#载入分析包 library(vegan) library(ape) #载入分析数据 data <- read.csv("otu.txt", head=TRUE,sep="\t",row.names = 1) #将数据转置、去除缺失值 data <- t(data) data[is.na(data)] <- 0 #计算Bray-Curtis距离 data <- vegdist(data, method = "bray") #进行PCoA分析 pcoa<- pcoa(data, correction = "none", rn = NULL) #载入分组文件 groups <- read.table("group.txt",sep = "\t",header = F,colClasses = c("character")) #将分组文件转换为列表 groups <- as.list(groups) #制作绘图文件 PC1 = pcoa$vectors[,1] PC2 = pcoa$vectors[,2] plotdata <- data.frame(rownames(pcoa$vectors),PC1,PC2,groups$V2) colnames(plotdata) <-c("sample","PC1","PC2","group") #设置形状点数 pich=c(21:24) #用于填充样本点的颜色 cbbPalette <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442") #样本点的边框颜色 Palette <- c("#000000", "#000000","#000000","#000000") #用于绘制横纵坐标label的文本,以显示解释比例 pc1 <-floor(pcoa$values$Relative_eig[1]*100) pc2 <-floor(pcoa$values$Relative_eig[2]*100) #载入绘图包 library(ggplot2) #匹配横纵坐标 ggplot(plotdata, aes(PC1, PC2)) + #绘制样本点,根据分组匹配颜色和形状,size调整点的大小 geom_point(aes(colour=group,shape=group,fill=group),size=12)+ #匹配形状、边框和填充的图例 scale_shape_manual(values=pich)+ scale_colour_manual(values=Palette)+ scale_fill_manual(values=cbbPalette)+ #设置标题和横纵坐标label文字 labs(title="PCoA - The composition of gut microbiome") + xlab(paste("PC1 ( ",pc1,"%"," )",sep="")) + ylab(paste("PC2 ( ",pc2,"%"," )",sep=""))+ theme(text=element_text(size=30))+ #添加横纵两条虚线 geom_vline(aes(xintercept = 0),linetype="dotted")+ geom_hline(aes(yintercept = 0),linetype="dotted")+ #调整背景、坐标轴、图例的格式 theme(panel.background = element_rect(fill='white', colour='black'), panel.grid=element_blank(), axis.title = element_text(color='black',size=34), axis.ticks.length = unit(0.4,"lines"), axis.ticks = element_line(color='black'), axis.line = element_line(colour = "black"), axis.title.x=element_text(colour='black', size=34), axis.title.y=element_text(colour='black', size=34), axis.text=element_text(colour='black',size=28), legend.title=element_blank(), legend.text=element_text(size=34), legend.key=element_blank(), legend.background = element_rect(colour = "black"), legend.key.height=unit(1.6,"cm"))+ #设置标题的格式 theme(plot.title = element_text(size=34,colour = "black",hjust = 0.5,face = "bold"))
#同时显示两种分类方法 data <- read.csv("otu_taxa_table.txt", head=TRUE,sep="\t",row.names = 1) groups <- read.table("group.list.txt",sep = "\t",header = F,colClasses = c("character")) groups <- as.list(groups) data <- t(data) data[is.na(data)] <- 0 data <- vegdist(data,method = "bray") pcoa<- pcoa(data, correction = "none", rn = NULL) PC1 = pcoa$vectors[,1] PC2 = pcoa$vectors[,2] plotdata <- data.frame(rownames(pcoa$vectors),PC1,PC2,groups$V2,groups$V3) colnames(plotdata) <-c("sample","PC1","PC2","Treatment","Time") pich=c(21:25) cbbPalette <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442") pc1 <-floor(pcoa$values$Relative_eig[1]*100) pc2 <-floor(pcoa$values$Relative_eig[2]*100) ggplot(plotdata, aes(PC1, PC2)) + geom_point(aes(shape=Time,fill=Treatment,colour=Treatment),size=10)+ scale_shape_manual(values=pich)+ scale_colour_manual(values=cbbPalette)+ scale_fill_manual(values=cbbPalette)+ labs(title="PCoA - PC1-VS-PC2") + xlab(paste("PC1 ( ",pc1,"%"," )",sep="")) + ylab(paste("PC2 ( ",pc2,"%"," )",sep=""))+ theme(text=element_text(size=30))+ geom_vline(aes(xintercept = 0),linetype="dotted")+ geom_hline(aes(yintercept = 0),linetype="dotted")+ theme(panel.background = element_rect(fill='white', colour='black'), panel.grid=element_blank(), axis.title = element_text(color='black',size=34), axis.ticks.length = unit(0.4,"lines"), axis.ticks = element_line(color='black'), axis.line = element_line(colour = "black"), axis.title.x=element_text(colour='black', size=34), axis.title.y=element_text(colour='black', size=34), axis.text=element_text(colour='black',size=28), legend.title=element_text(colour = "black",size = 34,face = "bold"), legend.text=element_text(size=34), legend.key=element_blank(), legend.background = element_rect(colour = "black"), legend.key.height=unit(1.6,"cm"))+ theme(plot.title = element_text(size=34,colour = "black",hjust = 0.5,face = "bold"))
#stat_ellipse(aes(fill = Treatment),geom = "polygon",level = 0.95,alpha = 0.3)+ #level表示置信阈值,通常为0.95,alpha为置信椭圆颜色的透明度。 #单组样品数目至少要有4个,不然画不出来椭圆。 #legend.position = c(0.12,0.88) #可以看到默认的绘图方式中,图例是显示在图像右侧,确实是有点占地方, #通过观察图像,发现图像的左上方空间较大,因此在糊涂代码的theme中添加一行命令, #图例位置的修改没有办法自动生成,需要先绘制一遍默认的图像,通过观察确认图例可以防止的位置, #以防对样本点形成遮挡。 #通常情况下,除了PCoA之外,还需要使用PERMANOVA来检验不同组样本间的微生物群落是否具有显著差异。 #使用vegan包中的adonis函数进行PERMANOVA分析。 #otu.adonis=adonis(data~V2,data = groups,distance = "bray") #之后在绘图代码中添加如下一行命令,将PERMANVOA的结果在PCoA图中进行显示。 #geom_text(aes(x = 0.05,y = 0.35,label = paste("PERMANOVA:\n # Group_A VS Group_B\n p-value = ",otu.adonis$aov.tab$`Pr(>F)`[1],sep = "")), # size = 10,hjust = 0) #PERMANOVA结果的x和y坐标要根据图像实际情况进行设置,防止显示不全或者遮挡样本点。 #由于本示例第二种方式有分为5类,因此pich的赋值改为了21-25,请根据自己数据的分组数目自行调整。 #由于分组是按照12个月进行分组的,因而需要通过指定分组因子的顺序 #以达到图例中从1至12月依次显示的目的。 #由于分组有12个,而R中提供的具有填充颜色的形状远没有这么多, #因此至选择了4个形状通过重复3次来达到形状的匹配。 #通过legned.key.height调整图例的高度,从而达到图例正好与图像高度一致。 #从图总可以看出有一些样本存在离群现象, #要想在图中展示哪个样本离群就需要在样本点附近标出样本名称。 #常规的方法可以使用geom_text通过对样本点坐标的匹配标出样本名称, #比如可以在绘图代码中添加如下命令。geom_text(aes(label=sample),size=5,hjust=0.5,vjust=-1) #添加的样本名称会由于部分样品距离较近而出现重叠遮挡, #当然可以通过一个名称一个名称的调整来解决,但是效率实在是低。 #比较快速的方法是通过ggerpel包添加样本名称,其会自动调整遮挡的样本名,从而达到区分的目的。 #载入ggerple包后,在绘图命令中添加如下代码。 #geom_label_repel(aes(PC1,PC2,label = plotdata$sample),fill = "white",color = "black", box.padding = unit(0.6,"lines"),segment.colour = "grey50", label.padding = unit(0.35,"lines")) #ggerple添加样本名称在样本过于密集的情况也, #偶尔也会出现个别样本名称有遮挡的情况,99.9%以上没有问题。
#geom_polygon(data = plotdata,aes(fill = Treatment),alpha = 0.5,show.legend = FALSE)+
#分组数目较多、组内生物学重复较少 data <- read.csv("otu 2.txt", head=TRUE,sep="\t",row.names = 1) data <- t(data) data[is.na(data)] <- 0 data <- vegdist(data) pcoa<- pcoa(data, correction = "none", rn = NULL) groups <- read.table("group 2.txt",sep = "\t",header = F,colClasses = c("character")) groups <- as.list(groups) groups$V2 <- factor(groups$V2,levels = c("Jan","Feb","Mar","Apr","May", "Jun","Jul","Aug","Sep","Oct","Nov","Dec")) PC1 = pcoa$vectors[,1] PC2 = pcoa$vectors[,2] plotdata <- data.frame(rownames(pcoa$vectors),PC1,PC2,groups$V2) colnames(plotdata) <-c("sample","PC1","PC2","group") pc1 <-floor(pcoa$values$Relative_eig[1]*100) pc2 <-floor(pcoa$values$Relative_eig[2]*100) pich=rep(c(21:24),3) library(RColorBrewer) cbbPalette <- brewer.pal(12,"Set3") Palette <- c(rep("#000000",12)) ggplot(plotdata, aes(PC1, PC2)) + geom_point(aes(colour=group,shape=group,fill=group),size=8)+ geom_label_repel(aes(PC1,PC2,label = sample),fill = "white",color = "black", box.padding = unit(0.6,"lines"), segment.colour = "grey50", label.padding = unit(0.35,"lines")) + scale_shape_manual(values=pich)+ scale_colour_manual(values=Palette)+ scale_fill_manual(values=cbbPalette)+ labs(title="PCoA - The composition of microbiome in sediment") + xlab(paste("PC1 ( ",pc1,"%"," )",sep="")) + ylab(paste("PC2 ( ",pc2,"%"," )",sep=""))+ theme(text=element_text(size=30))+ geom_vline(aes(xintercept = 0),linetype="dotted")+ geom_hline(aes(yintercept = 0),linetype="dotted")+ theme(panel.background = element_rect(fill='white', colour='black'), panel.grid=element_blank(), axis.title = element_text(color='black',size=34), axis.ticks.length = unit(0.4,"lines"), axis.ticks = element_line(color='black'), axis.line = element_line(colour = "black"), axis.title.x=element_text(colour='black', size=34), axis.title.y=element_text(colour='black', size=34), axis.text=element_text(colour='black',size=28), legend.title=element_blank(), legend.text=element_text(size=28), legend.key=element_blank(),legend.position = "right", legend.background = element_rect(colour = "black"), legend.key.height=unit(1.55,"cm"))+ theme(plot.title = element_text(size=34,colour = "black",hjust = 0.5,face = "bold"))
#geom_polygon(data = plotdata, aes(fill = group), alpha = 0.5, show.legend = FALSE)+
转载本文请联系原作者获取授权,同时请注明本文来自林国鹏科学网博客。
链接地址:https://wap.sciencenet.cn/blog-3448646-1276179.html?mobile=1
收藏
当前推荐数:0
推荐到博客首页
网友评论0 条评论