R:计算和重新编码矩阵中的连续值
R: counting and recoding consecutive values in a matrix
我正在处理包含大量 NA 的矩阵。我想在一个新矩阵中记录每个 NA 序列的长度。
下面的例子应该更通俗易懂。
#Generating a random 5x5 population matrix with 15 NA
M=matrix(sample(1:9,25,T),5)
M[sample(1:length(M),15,F)]=NA
dimnames(M)=list(paste(rep("City",dim(M)[1]),1:dim(M)[1],sep=""),paste(rep("Year",dim(M)[2]),1:dim(M)[2],sep=""))
M
Year1 Year2 Year3 Year4 Year5
City1 2 NA NA NA NA
City2 NA NA NA 6 8
City3 1 NA NA 6 NA
City4 NA 5 NA NA 1
City5 8 NA 1 NA 2
所需的输出如下。例如4 4 4 4表示连续4个NA的序列。
Year1 Year2 Year3 Year4 Year5
City1 0 4 4 4 4
City2 3 3 3 0 0
City3 0 2 2 0 1
City4 1 0 2 2 0
City5 0 1 0 1 0
你知道我该怎么做吗?
不是有史以来最高效的代码:
r1=c(1,1,NA,1,1)
r2=c(1,NA,NA,1,1)
r3=c(1,NA,NA,NA,1)
r4=c(NA,NA,1,1,1)
r5=c(1,1,1,NA,NA)
M=rbind(r1,r2,r3,r4,r5)
就像@Pascal 指出的那样,您的方法会将整个矩阵转换为字符,因此您可以将 1 分配给 0,然后执行以下操作:
M[M == 1] <- 0
(xx <- t(apply(M, 1, function(x) {
s <- sum(is.na(x))
if (is.na(x[1])) x[is.na(x)] <- rep(4, s) else
if (is.na(tail(x, 1))) x[is.na(x)] <- rep(5, s) else
x[is.na(x)] <- s
x
})))
# [,1] [,2] [,3] [,4] [,5]
# r1 0 0 1 0 0
# r2 0 2 2 0 0
# r3 0 3 3 3 0
# r4 4 4 0 0 0
# r5 0 0 0 5 5
这是您想要的输出。如果你不相信我,把 0 转换回 1 并根据整数分配字母
xx[xx > 0] <- letters[xx[xx > 0]]
xx[xx == '0'] <- 1
r1=c(1,1,"a",1,1)
r2=c(1,"b","b",1,1)
r3=c(1,"c","c","c",1)
r4=c("d","d",1,1,1)
r5=c(1,1,1,"e","e")
R=rbind(r1,r2,r3,r4,r5)
identical(R, xx)
# [1] TRUE
这是将应用于每一行的函数的另一个基础。我试过了,但无法避免 for 循环:
x = c(1,NA,1,NA,NA,1,NA,NA,NA,1,NA,NA,NA,NA)
#Find the Start and End of each sequence of NA's (Vectorized)
(start <- is.na(x) * c(T,!is.na(x[-length(x)])))
#> [1] 0 1 0 1 0 0 1 0 0 0 1 0 0 0
(end <- is.na(x) * c(!is.na(x[-1]),T))
#> [1] 0 1 0 0 1 0 0 0 1 0 0 0 0 1
# The difference betweeen the start and end of the sequence +1 is the sequence length
wStart <- which(!!start)
wEnd <- which(!!end)
sequenceLength <- wEnd[i] - wStart[i] + 1
# replace the sequence of NA's with it's class
for(i in seq_along(wStart))
x[`:`(wStart[i],wEnd[i])] <- letters[sequenceLength]
x
#> [1] "1" "a" "1" "b" "b" "1" "c" "c" "c" "1" "d" "d" "d" "d"
如:
(xx <- t(apply(M, 1, function(x) {
wStart <- which(!!(is.na(x) * c(T,!is.na(x[-length(x)]))))
wEnd <- which(!!is.na(x) * c(!is.na(x[-1]),T))
sequenceLength <-
for(i in seq_along(wStart))
x[`:`(wStart[i],wEnd[i])] <- letters[wEnd[i] - wStart[i] + 1]
return(x)
})))
我正在处理包含大量 NA 的矩阵。我想在一个新矩阵中记录每个 NA 序列的长度。
下面的例子应该更通俗易懂。
#Generating a random 5x5 population matrix with 15 NA
M=matrix(sample(1:9,25,T),5)
M[sample(1:length(M),15,F)]=NA
dimnames(M)=list(paste(rep("City",dim(M)[1]),1:dim(M)[1],sep=""),paste(rep("Year",dim(M)[2]),1:dim(M)[2],sep=""))
M
Year1 Year2 Year3 Year4 Year5
City1 2 NA NA NA NA
City2 NA NA NA 6 8
City3 1 NA NA 6 NA
City4 NA 5 NA NA 1
City5 8 NA 1 NA 2
所需的输出如下。例如4 4 4 4表示连续4个NA的序列。
Year1 Year2 Year3 Year4 Year5
City1 0 4 4 4 4
City2 3 3 3 0 0
City3 0 2 2 0 1
City4 1 0 2 2 0
City5 0 1 0 1 0
你知道我该怎么做吗?
不是有史以来最高效的代码:
r1=c(1,1,NA,1,1)
r2=c(1,NA,NA,1,1)
r3=c(1,NA,NA,NA,1)
r4=c(NA,NA,1,1,1)
r5=c(1,1,1,NA,NA)
M=rbind(r1,r2,r3,r4,r5)
就像@Pascal 指出的那样,您的方法会将整个矩阵转换为字符,因此您可以将 1 分配给 0,然后执行以下操作:
M[M == 1] <- 0
(xx <- t(apply(M, 1, function(x) {
s <- sum(is.na(x))
if (is.na(x[1])) x[is.na(x)] <- rep(4, s) else
if (is.na(tail(x, 1))) x[is.na(x)] <- rep(5, s) else
x[is.na(x)] <- s
x
})))
# [,1] [,2] [,3] [,4] [,5]
# r1 0 0 1 0 0
# r2 0 2 2 0 0
# r3 0 3 3 3 0
# r4 4 4 0 0 0
# r5 0 0 0 5 5
这是您想要的输出。如果你不相信我,把 0 转换回 1 并根据整数分配字母
xx[xx > 0] <- letters[xx[xx > 0]]
xx[xx == '0'] <- 1
r1=c(1,1,"a",1,1)
r2=c(1,"b","b",1,1)
r3=c(1,"c","c","c",1)
r4=c("d","d",1,1,1)
r5=c(1,1,1,"e","e")
R=rbind(r1,r2,r3,r4,r5)
identical(R, xx)
# [1] TRUE
这是将应用于每一行的函数的另一个基础。我试过了,但无法避免 for 循环:
x = c(1,NA,1,NA,NA,1,NA,NA,NA,1,NA,NA,NA,NA)
#Find the Start and End of each sequence of NA's (Vectorized)
(start <- is.na(x) * c(T,!is.na(x[-length(x)])))
#> [1] 0 1 0 1 0 0 1 0 0 0 1 0 0 0
(end <- is.na(x) * c(!is.na(x[-1]),T))
#> [1] 0 1 0 0 1 0 0 0 1 0 0 0 0 1
# The difference betweeen the start and end of the sequence +1 is the sequence length
wStart <- which(!!start)
wEnd <- which(!!end)
sequenceLength <- wEnd[i] - wStart[i] + 1
# replace the sequence of NA's with it's class
for(i in seq_along(wStart))
x[`:`(wStart[i],wEnd[i])] <- letters[sequenceLength]
x
#> [1] "1" "a" "1" "b" "b" "1" "c" "c" "c" "1" "d" "d" "d" "d"
如:
(xx <- t(apply(M, 1, function(x) {
wStart <- which(!!(is.na(x) * c(T,!is.na(x[-length(x)]))))
wEnd <- which(!!is.na(x) * c(!is.na(x[-1]),T))
sequenceLength <-
for(i in seq_along(wStart))
x[`:`(wStart[i],wEnd[i])] <- letters[wEnd[i] - wStart[i] + 1]
return(x)
})))