R 中因子的连续值和新水平
Consecutive value after and new level of factor in R
我有以下样本
id <- c("a","b","a","b","a","a","a","a","b","b","c")
SOG <- c(4,4,0,0,0,0,0,0,0,0,9)
data <- data.frame(id,SOG)
我希望在新列中显示 SOG == 0 时的累积值。
使用以下代码
tmp <- rle(SOG) #run length encoding:
tmp$values <- tmp$values == 0 #turn values into logicals
tmp$values[tmp$values] <- cumsum(tmp$values[tmp$values]) #cumulative sum of TRUE values
inverse.rle(tmp) #inverse the run length encoding
我创建列 "stop":
data$Stops <- inverse.rle(tmp)
我能进去:
[1] 0 0 1 1 1 1 1 1 1 1 0
但我想改用
[1] 0 0 1 2 3 3 3 3 4 4 0
我的意思是当因子"id"的水平与上一行不同时,我想跳到下一个"stop"(i+1)。
看看dplyr
包
library(dplyr)
data %>%
mutate(
Stops = ifelse(
SOG > 0,
0,
cumsum(SOG == 0 & lag(id) != id)
)
)
我们可以试试
library(data.table)
setDT(data1)[, v1 := if(all(!SOG)) c(TRUE, id[-1]!= id[-.N]) else
rep(FALSE, .N), .(grp = rleid(SOG))][,cumsum(v1)*(!SOG)]
#[1] 0 0 1 2 3 3 3 3 4 4 0 0 0 0 5 5 0 6 6 0
使用旧数据
setDT(data)[, v1 := if(all(!SOG)) c(TRUE, id[-1]!= id[-.N])
else rep(FALSE, .N), .(grp = rleid(SOG))][,cumsum(v1)*(!SOG)]
#[1] 0 0 1 2 3 3 3 3 4 4 0
数据
id <- c("a","b","a","b","a","a","a","a","b","b","c","a","a","a","a","a","a","a","a", "a")
SOG <- c(4,4,0,0,0,0,0,0,0,0,9,1,5,3,0,0,4,0,0,1)
data1 <- data.frame(id, SOG, stringsAsFactors=FALSE)
我有以下样本
id <- c("a","b","a","b","a","a","a","a","b","b","c")
SOG <- c(4,4,0,0,0,0,0,0,0,0,9)
data <- data.frame(id,SOG)
我希望在新列中显示 SOG == 0 时的累积值。 使用以下代码
tmp <- rle(SOG) #run length encoding:
tmp$values <- tmp$values == 0 #turn values into logicals
tmp$values[tmp$values] <- cumsum(tmp$values[tmp$values]) #cumulative sum of TRUE values
inverse.rle(tmp) #inverse the run length encoding
我创建列 "stop":
data$Stops <- inverse.rle(tmp)
我能进去:
[1] 0 0 1 1 1 1 1 1 1 1 0
但我想改用
[1] 0 0 1 2 3 3 3 3 4 4 0
我的意思是当因子"id"的水平与上一行不同时,我想跳到下一个"stop"(i+1)。
看看dplyr
包
library(dplyr)
data %>%
mutate(
Stops = ifelse(
SOG > 0,
0,
cumsum(SOG == 0 & lag(id) != id)
)
)
我们可以试试
library(data.table)
setDT(data1)[, v1 := if(all(!SOG)) c(TRUE, id[-1]!= id[-.N]) else
rep(FALSE, .N), .(grp = rleid(SOG))][,cumsum(v1)*(!SOG)]
#[1] 0 0 1 2 3 3 3 3 4 4 0 0 0 0 5 5 0 6 6 0
使用旧数据
setDT(data)[, v1 := if(all(!SOG)) c(TRUE, id[-1]!= id[-.N])
else rep(FALSE, .N), .(grp = rleid(SOG))][,cumsum(v1)*(!SOG)]
#[1] 0 0 1 2 3 3 3 3 4 4 0
数据
id <- c("a","b","a","b","a","a","a","a","b","b","c","a","a","a","a","a","a","a","a", "a")
SOG <- c(4,4,0,0,0,0,0,0,0,0,9,1,5,3,0,0,4,0,0,1)
data1 <- data.frame(id, SOG, stringsAsFactors=FALSE)