在 R 中使用应用(或其他迭代函数)创建向量矩阵
Creating a matrix of vectors with apply (or other iterative function) in R
我需要运行一组八聚体(8个字母的集合)的正交编码函数,return它们作为nx160个数字的矩阵(其中n是八聚体的数量在数据上)。
正交编码函数为:
orthocode <- function(octamer){
matcode <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
octamer_char <- as.character(octamer)
octamer_split <- strsplit(octamer_char,"")[[1]]
for (letter in octamer_split){
ifelse (letter == "A", (matcode = rbind(matcode,c(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "R", (matcode = rbind(matcode,c(0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "N", (matcode = rbind(matcode,c(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "D", (matcode = rbind(matcode,c(0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "C", (matcode = rbind(matcode,c(0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "Q", (matcode = rbind(matcode,c(0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "E", (matcode = rbind(matcode,c(0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "G", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "H", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "I", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "L", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "K", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0))),
ifelse (letter == "M", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0))),
ifelse (letter == "F", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0))),
ifelse (letter == "P", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0))),
ifelse (letter == "S", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0))),
ifelse (letter == "T", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0))),
ifelse (letter == "W", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0))),
ifelse (letter == "Y", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0))),
ifelse (letter == "V", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1)))
))))))))))))))))))))
}
matcode <- matcode[-1,]
matcode <- c(matcode)
return(matcode)
}
正如一些人所问,这是一个例子,即使这不是不起作用的部分:
orthocode("ARNDCQEG")
[1] 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[81] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
该函数适用于单个八聚体,但是当我尝试在其上使用 lapply 时,结果只是一个 160 个数字的向量,这次代码已更改(且无意义)。
lapply(data[1], orthocode)
结果如下:
$V1
[1] 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[81] 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
正交代码功能确实有效。我需要知道的是如何从数据框中提取八聚体,运行 它们的函数,结果得到一个如下所示的矩阵:
rbind(orthocode("ARNDCQEG"),orthocode("NGJKAEPS"),orthocode("ABGSWKLA"))
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26] [,27] [,28]
[1,] 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1
[2,] 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
[3,] 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50] [,51] [,52] [,53] [,54]
[1,] 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
[3,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[,55] [,56] [,57] [,58] [,59] [,60] [,61] [,62] [,63] [,64] [,65] [,66] [,67] [,68] [,69] [,70] [,71] [,72] [,73] [,74] [,75] [,76] [,77] [,78] [,79] [,80]
[1,] 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[3,] 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[,81] [,82] [,83] [,84] [,85] [,86] [,87] [,88] [,89] [,90] [,91] [,92] [,93] [,94] [,95] [,96] [,97] [,98] [,99] [,100] [,101] [,102] [,103] [,104] [,105]
[1,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
[3,] 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
[,106] [,107] [,108] [,109] [,110] [,111] [,112] [,113] [,114] [,115] [,116] [,117] [,118] [,119] [,120] [,121] [,122] [,123] [,124] [,125] [,126] [,127]
[1,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
[3,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
[,128] [,129] [,130] [,131] [,132] [,133] [,134] [,135] [,136] [,137] [,138] [,139] [,140] [,141] [,142] [,143] [,144] [,145] [,146] [,147] [,148] [,149]
[1,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[3,] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
[,150] [,151] [,152] [,153] [,154] [,155] [,156] [,157] [,158] [,159] [,160]
[1,] 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0
[3,] 0 0 0 0 0 0 0 0 0 0 0
输出必须是一个n行160列的矩阵。在数据上我要运行它,结果矩阵应该是一个947x160的。
有什么想法吗?
switch
具有其他语言中存在的 CASE 结构的语义。在没有很好的例子的情况下进行了轻微测试,但试试这个:
orthocode <- function(octamer){
matcode <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
octamer_char <- as.character(octamer)
octamer_split <- strsplit(octamer_char,"")[[1]]
for (letter in octamer_split){
val <- switch( letter,
"A" = c(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
"R" = c(0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
"N" = c(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
"D" = c(0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
"C" = c(0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
"Q" = c(0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
"E" = c(0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0),
"G" = c(0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0),
"H" = c(0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0),
"I" = c(0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0),
"L" = c(0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0),
"K" = c(0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0),
"M" = c(0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0),
"F" = c(0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0),
"P" = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0),
"S" = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0),
"T" = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0),
"W" = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0),
"Y" = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0),
"V" = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1)
)
matcode=c(matcode,val)
}
matcode
}
请注意,我删除了带有 matcode <- c(matcode)
的行,因为它具有破坏矩阵结构的副作用。 有了这个:
dat <- list("ARNDE", "CQEGD")
我得到:
t( sapply(dat, orthocode) )
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16] [,17]
[1,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32]
[1,] 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
[,33] [,34] [,35] [,36] [,37] [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47]
[1,] 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
[,48] [,49] [,50] [,51] [,52] [,53] [,54] [,55] [,56] [,57] [,58] [,59] [,60] [,61] [,62]
[1,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[,63] [,64] [,65] [,66] [,67] [,68] [,69] [,70] [,71] [,72] [,73] [,74] [,75] [,76] [,77]
[1,] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
[,78] [,79] [,80] [,81] [,82] [,83] [,84] [,85] [,86] [,87] [,88] [,89] [,90] [,91] [,92]
[1,] 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
[,93] [,94] [,95] [,96] [,97] [,98] [,99] [,100] [,101] [,102] [,103] [,104] [,105] [,106]
[1,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0 1 0 0
[,107] [,108] [,109] [,110] [,111] [,112] [,113] [,114] [,115] [,116] [,117] [,118] [,119]
[1,] 1 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0 0 0
[,120]
[1,] 0
[2,] 0
如果我在最后使用它,我会更喜欢结果(但这不是你想要的):
matcode <- matcode[-1, ,drop=FALSE]
rownames(matcode) <- octamer_split
return(matcode) # here the return call is needed.
我们可以用 match
简化 ifelse
,并删除 forloop
:
orthocode <- function(octamer){
matcode <- rep(0, 20)
octamer_char <- as.character(octamer)
octamer_split <- strsplit(octamer_char,"")[[1]]
t(sapply(octamer_split, function(letter){
res <- matcode
res[ match(letter, c("A","R","N","D","C","Q","E","G","H","I",
"L","K","M","F","P","S","T","W","Y","V"))] <- 1
res
}))
}
R 是向量化的。忘记 运行 每个案例的单独代码块。不要在循环中增长对象。我会简单地选择
orthocode <- function(octamer) {
# Predifine identity matrix
m <- diag(20)
# Predefine values vector (no "J" or "B" here btw)
rownames(m) <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L",
"K", "M", "F", "P", "S", "T", "W", "Y", "V")
# Create a character vector for each input
octamer_split <- strsplit(as.character(octamer), "", fixed = TRUE)
# match values for each value
t(sapply(octamer_split, function(x) m[match(x, rownames(m)),]))
}
此函数适用于单个输入或向量。您可以使用
对其进行测试
orthocode(c("ARNDCQEG", "NGJKAEPS", "ABGSWKLA"))
或者在你的情况下只使用
orthocode(data[, 1])
P.S.
您的向量中没有 J
或 B
,因此不确定应该如何处理您的示例。在这种情况下它 returns NA
s
我需要运行一组八聚体(8个字母的集合)的正交编码函数,return它们作为nx160个数字的矩阵(其中n是八聚体的数量在数据上)。
正交编码函数为:
orthocode <- function(octamer){
matcode <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
octamer_char <- as.character(octamer)
octamer_split <- strsplit(octamer_char,"")[[1]]
for (letter in octamer_split){
ifelse (letter == "A", (matcode = rbind(matcode,c(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "R", (matcode = rbind(matcode,c(0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "N", (matcode = rbind(matcode,c(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "D", (matcode = rbind(matcode,c(0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "C", (matcode = rbind(matcode,c(0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "Q", (matcode = rbind(matcode,c(0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "E", (matcode = rbind(matcode,c(0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "G", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "H", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "I", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "L", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "K", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0))),
ifelse (letter == "M", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0))),
ifelse (letter == "F", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0))),
ifelse (letter == "P", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0))),
ifelse (letter == "S", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0))),
ifelse (letter == "T", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0))),
ifelse (letter == "W", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0))),
ifelse (letter == "Y", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0))),
ifelse (letter == "V", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1)))
))))))))))))))))))))
}
matcode <- matcode[-1,]
matcode <- c(matcode)
return(matcode)
}
正如一些人所问,这是一个例子,即使这不是不起作用的部分:
orthocode("ARNDCQEG")
[1] 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[81] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
该函数适用于单个八聚体,但是当我尝试在其上使用 lapply 时,结果只是一个 160 个数字的向量,这次代码已更改(且无意义)。
lapply(data[1], orthocode)
结果如下:
$V1
[1] 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[81] 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
正交代码功能确实有效。我需要知道的是如何从数据框中提取八聚体,运行 它们的函数,结果得到一个如下所示的矩阵:
rbind(orthocode("ARNDCQEG"),orthocode("NGJKAEPS"),orthocode("ABGSWKLA"))
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26] [,27] [,28]
[1,] 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1
[2,] 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
[3,] 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50] [,51] [,52] [,53] [,54]
[1,] 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
[3,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[,55] [,56] [,57] [,58] [,59] [,60] [,61] [,62] [,63] [,64] [,65] [,66] [,67] [,68] [,69] [,70] [,71] [,72] [,73] [,74] [,75] [,76] [,77] [,78] [,79] [,80]
[1,] 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[3,] 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[,81] [,82] [,83] [,84] [,85] [,86] [,87] [,88] [,89] [,90] [,91] [,92] [,93] [,94] [,95] [,96] [,97] [,98] [,99] [,100] [,101] [,102] [,103] [,104] [,105]
[1,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
[3,] 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
[,106] [,107] [,108] [,109] [,110] [,111] [,112] [,113] [,114] [,115] [,116] [,117] [,118] [,119] [,120] [,121] [,122] [,123] [,124] [,125] [,126] [,127]
[1,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
[3,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
[,128] [,129] [,130] [,131] [,132] [,133] [,134] [,135] [,136] [,137] [,138] [,139] [,140] [,141] [,142] [,143] [,144] [,145] [,146] [,147] [,148] [,149]
[1,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[3,] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
[,150] [,151] [,152] [,153] [,154] [,155] [,156] [,157] [,158] [,159] [,160]
[1,] 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0
[3,] 0 0 0 0 0 0 0 0 0 0 0
输出必须是一个n行160列的矩阵。在数据上我要运行它,结果矩阵应该是一个947x160的。
有什么想法吗?
switch
具有其他语言中存在的 CASE 结构的语义。在没有很好的例子的情况下进行了轻微测试,但试试这个:
orthocode <- function(octamer){
matcode <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
octamer_char <- as.character(octamer)
octamer_split <- strsplit(octamer_char,"")[[1]]
for (letter in octamer_split){
val <- switch( letter,
"A" = c(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
"R" = c(0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
"N" = c(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
"D" = c(0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
"C" = c(0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
"Q" = c(0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
"E" = c(0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0),
"G" = c(0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0),
"H" = c(0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0),
"I" = c(0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0),
"L" = c(0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0),
"K" = c(0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0),
"M" = c(0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0),
"F" = c(0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0),
"P" = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0),
"S" = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0),
"T" = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0),
"W" = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0),
"Y" = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0),
"V" = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1)
)
matcode=c(matcode,val)
}
matcode
}
请注意,我删除了带有 有了这个:matcode <- c(matcode)
的行,因为它具有破坏矩阵结构的副作用。
dat <- list("ARNDE", "CQEGD")
我得到:
t( sapply(dat, orthocode) )
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16] [,17]
[1,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32]
[1,] 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
[,33] [,34] [,35] [,36] [,37] [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47]
[1,] 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
[,48] [,49] [,50] [,51] [,52] [,53] [,54] [,55] [,56] [,57] [,58] [,59] [,60] [,61] [,62]
[1,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[,63] [,64] [,65] [,66] [,67] [,68] [,69] [,70] [,71] [,72] [,73] [,74] [,75] [,76] [,77]
[1,] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
[,78] [,79] [,80] [,81] [,82] [,83] [,84] [,85] [,86] [,87] [,88] [,89] [,90] [,91] [,92]
[1,] 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
[,93] [,94] [,95] [,96] [,97] [,98] [,99] [,100] [,101] [,102] [,103] [,104] [,105] [,106]
[1,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0 1 0 0
[,107] [,108] [,109] [,110] [,111] [,112] [,113] [,114] [,115] [,116] [,117] [,118] [,119]
[1,] 1 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0 0 0
[,120]
[1,] 0
[2,] 0
如果我在最后使用它,我会更喜欢结果(但这不是你想要的):
matcode <- matcode[-1, ,drop=FALSE]
rownames(matcode) <- octamer_split
return(matcode) # here the return call is needed.
我们可以用 match
简化 ifelse
,并删除 forloop
:
orthocode <- function(octamer){
matcode <- rep(0, 20)
octamer_char <- as.character(octamer)
octamer_split <- strsplit(octamer_char,"")[[1]]
t(sapply(octamer_split, function(letter){
res <- matcode
res[ match(letter, c("A","R","N","D","C","Q","E","G","H","I",
"L","K","M","F","P","S","T","W","Y","V"))] <- 1
res
}))
}
R 是向量化的。忘记 运行 每个案例的单独代码块。不要在循环中增长对象。我会简单地选择
orthocode <- function(octamer) {
# Predifine identity matrix
m <- diag(20)
# Predefine values vector (no "J" or "B" here btw)
rownames(m) <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L",
"K", "M", "F", "P", "S", "T", "W", "Y", "V")
# Create a character vector for each input
octamer_split <- strsplit(as.character(octamer), "", fixed = TRUE)
# match values for each value
t(sapply(octamer_split, function(x) m[match(x, rownames(m)),]))
}
此函数适用于单个输入或向量。您可以使用
对其进行测试orthocode(c("ARNDCQEG", "NGJKAEPS", "ABGSWKLA"))
或者在你的情况下只使用
orthocode(data[, 1])
P.S.
您的向量中没有 J
或 B
,因此不确定应该如何处理您的示例。在这种情况下它 returns NA
s