有条件地替换 R 中前几行中的值
conditionally replace values in preceding rows in R
我想根据其他列中的值替换前一行中的值。
这是我的数据示例,minutes/day 用于各种活动。
activity <- c("car","soccer","eat","drink")
category <- c("travel","sport","eat/drink","eat/drink")
duration <- c(75,15,10,160)
df <- data.frame(activity, category,duration)
activity category duration
1 car travel 75
2 soccer sport 15
3 eat eat/drink 10
4 drink eat/drink 160
如果在任何一行中,"drink" 的持续时间 > 5 分钟(如第 4 行),我想将该行中的 "duration" 替换为 5 分钟,并添加剩余的时间(在本例中为 155 分钟)到前一行中的 "duration" 值,除非前一行的 "eat/drink" 作为其 "category",在这种情况下我想将剩余时间添加到前一行的 "duration"...
在上面的示例中,我会在第 2 行的 "duration" 中添加 155 分钟。但是,如果第 2 行也有 "eat/drink" 作为其 "category",我想添加前一行(第 1 行)的 155 分钟。
感谢您的帮助!
到目前为止我已经尝试过:
df$duration[-nrow(df)] <- ifelse(df$activity[-1]=="drink" & df$duration[-1] > 5,
df$duration + c(df$duration[-1]-5, 0),
df$duration)
将 155 分钟添加到上一行,并给出了这个:
activity category duration
1 car travel 75
2 soccer sport 15
3 eat eat/drink 165
4 drink eat/drink 160
然后我只是将第 4 行中的持续时间替换为 5 分钟,就像这样。
df$duration <- ifelse(df$activity =="drink" & df$duration >5,
5,
df$duration)
这给了我这个...
activity category duration
1 car travel 75
2 soccer sport 15
3 eat eat/drink 165
4 drink eat/drink 5
但我不知道如何将 155 分钟移动到前一行(第 2 行),条件是它没有 "eat/drink" 作为类别。那样的话,我想把它移到前一行,等等...
这是一个答案,但不幸的是我没有设法进行模糊连接,这意味着左侧有一列,右侧有两列。所以在某个时刻(合并时)有一个笛卡尔积。您的结果在 'df6'、变量 'duration2'.
中
activity <- c("car","soccer","eat","drink","car","drink","car","drink")
category <- c("travel","sport","eat/drink","eat/drink","travel","eat/drink","travel","eat/drink")
duration <- c(75,15,10,160,100,50,200,60)
df <- data.frame(activity, category,duration)
df$row<-1:nrow(df)
df1<-df[(activity=="drink")&(duration>5),]
df1$time<-df1$duration-5
library(dplyr)
df2<- df1
df2$row1<-lag(df2$row)
df2<-rename(df2,row2=row)
df$key <-1
df2$key <-1
df3 <- merge(df,df2,by="key") %>% filter(((is.na(row1)&(row<row2)|(row>row1)&(row<row2)))&(category.x!="eat/drink"))
df4 <- df3 %>% group_by(row1) %>%
summarize(row=last(row),time=last(time)) %>% select(row,time)
df5 <- df %>% left_join(df4,by="row") %>%
mutate(duration2=ifelse(is.na(time),duration,duration+time)) %>%
select(activity,category,duration,duration2,row)
df2 <- select(df2,row2,time)
df6 <- df5 %>% left_join(df2,by=c("row" = "row2")) %>%
mutate(duration2=ifelse(is.na(time),duration2,duration-time)) %>%
select(-time)
df6
# activity category duration duration2 row
#1 car travel 75 75 1
#2 soccer sport 15 170 2
#3 eat eat/drink 10 10 3
#4 drink eat/drink 160 5 4
#5 car travel 100 145 5
#6 drink eat/drink 50 5 6
#7 car travel 200 255 7
#8 drink eat/drink 60 5 8
我想根据其他列中的值替换前一行中的值。
这是我的数据示例,minutes/day 用于各种活动。
activity <- c("car","soccer","eat","drink")
category <- c("travel","sport","eat/drink","eat/drink")
duration <- c(75,15,10,160)
df <- data.frame(activity, category,duration)
activity category duration
1 car travel 75
2 soccer sport 15
3 eat eat/drink 10
4 drink eat/drink 160
如果在任何一行中,"drink" 的持续时间 > 5 分钟(如第 4 行),我想将该行中的 "duration" 替换为 5 分钟,并添加剩余的时间(在本例中为 155 分钟)到前一行中的 "duration" 值,除非前一行的 "eat/drink" 作为其 "category",在这种情况下我想将剩余时间添加到前一行的 "duration"...
在上面的示例中,我会在第 2 行的 "duration" 中添加 155 分钟。但是,如果第 2 行也有 "eat/drink" 作为其 "category",我想添加前一行(第 1 行)的 155 分钟。
感谢您的帮助!
到目前为止我已经尝试过:
df$duration[-nrow(df)] <- ifelse(df$activity[-1]=="drink" & df$duration[-1] > 5,
df$duration + c(df$duration[-1]-5, 0),
df$duration)
将 155 分钟添加到上一行,并给出了这个:
activity category duration
1 car travel 75
2 soccer sport 15
3 eat eat/drink 165
4 drink eat/drink 160
然后我只是将第 4 行中的持续时间替换为 5 分钟,就像这样。
df$duration <- ifelse(df$activity =="drink" & df$duration >5,
5,
df$duration)
这给了我这个...
activity category duration
1 car travel 75
2 soccer sport 15
3 eat eat/drink 165
4 drink eat/drink 5
但我不知道如何将 155 分钟移动到前一行(第 2 行),条件是它没有 "eat/drink" 作为类别。那样的话,我想把它移到前一行,等等...
这是一个答案,但不幸的是我没有设法进行模糊连接,这意味着左侧有一列,右侧有两列。所以在某个时刻(合并时)有一个笛卡尔积。您的结果在 'df6'、变量 'duration2'.
中activity <- c("car","soccer","eat","drink","car","drink","car","drink")
category <- c("travel","sport","eat/drink","eat/drink","travel","eat/drink","travel","eat/drink")
duration <- c(75,15,10,160,100,50,200,60)
df <- data.frame(activity, category,duration)
df$row<-1:nrow(df)
df1<-df[(activity=="drink")&(duration>5),]
df1$time<-df1$duration-5
library(dplyr)
df2<- df1
df2$row1<-lag(df2$row)
df2<-rename(df2,row2=row)
df$key <-1
df2$key <-1
df3 <- merge(df,df2,by="key") %>% filter(((is.na(row1)&(row<row2)|(row>row1)&(row<row2)))&(category.x!="eat/drink"))
df4 <- df3 %>% group_by(row1) %>%
summarize(row=last(row),time=last(time)) %>% select(row,time)
df5 <- df %>% left_join(df4,by="row") %>%
mutate(duration2=ifelse(is.na(time),duration,duration+time)) %>%
select(activity,category,duration,duration2,row)
df2 <- select(df2,row2,time)
df6 <- df5 %>% left_join(df2,by=c("row" = "row2")) %>%
mutate(duration2=ifelse(is.na(time),duration2,duration-time)) %>%
select(-time)
df6
# activity category duration duration2 row
#1 car travel 75 75 1
#2 soccer sport 15 170 2
#3 eat eat/drink 10 10 3
#4 drink eat/drink 160 5 4
#5 car travel 100 145 5
#6 drink eat/drink 50 5 6
#7 car travel 200 255 7
#8 drink eat/drink 60 5 8