|||
熊荣川
六盘水师范学院生物信息学实验室
http://blog.sciencenet.cn/u/Bearjazz
#R语言终止密码子统计
setwd("**")
infile = "protein1.fasta"
outname = sub(".fas","",infile)
proseq = readLines(infile)
proseq = proseq[!grepl(">",proseq)]
proseq = strsplit(proseq, "")
proseq = matchar(proseq)
stop.p = grep("\*",proseq)
L = length(stop.p)
mat = matrix(NA,length(stop.p),7)
mat[,2] = stop.p
mat[1,1] = 1
mat[2:L,1] = stop.p[1:(L-1)]
for(i in 1:L){
mat[i,3] = mat[i,2]-mat[i,1]-1
mat[i,4] = mat[i,1]*3+1
mat[i,5] = mat[i,2]*3-3
mat[i,6] = mat[i,5]-mat[i,4]+1
}
colnames(mat) = c("pre.start","post.stop","length","DNA.start","DNA.stop","DNA.L","pro.name")
mat[,7] = outname
writeWorksheetToFile("**.xlsx", data=mat, sheet=outname)
Archiver|手机版|科学网 ( 京ICP备07017567号-12 )
GMT+8, 2024-4-29 18:03
Powered by ScienceNet.cn
Copyright © 2007- 中国科学报社