除了新列的行值之外,还使用条件创建新列
Creating a new column with conditions in addition to the row value of the new column
关于如何使用列 A 的值创建新列 B 的任何想法,
在使用新创建的列 B 上方行的值时?
B的值应该对应于:
A0 = value of the row above.
A1 = 1.
A2 = value of the row above + 1.
当前数据框 + 期望结果
Dataframe Desired outcome
A A B
1 1 1
0 0 1
2 2 2
0 0 2
2 2 3
0 0 3
2 2 4
0 0 4
2 2 5
0 0 5
2 2 6
0 0 6
1 1 1
0 0 1
1 1 1
0 0 1
2 2 2
0 0 2
2 2 3
0 0 3
1 1 1
0 0 1
2 2 2
0 0 2
Data Frame
A <- c(1,0,2,0,2,0,2,0,2,0,2,0,1,0,1,0,2,0,2,0,1,0,2,0)
Bdesiredoutcome <- c(1,1,2,2,3,3,4,4,5,5,6,6,1,1,1,1,2,2,3,3,1,1,2,2)
df = data.frame(A,Bdesiredoutcome)
我试过使用 dpylr, mutate(), case_when()
和 lag()
但 运行 一直出错。由于使用了 lag()
函数。使用 lag(A)
时无法生成所需的结果。
关于如何解决这个问题有什么想法吗?
df <- df %>%
mutate(B = case_when((A == 0) ~ lag(B),
(A == 1) ~ 1,
(A == 2) ~ (lag(B)+1)
))
Error in UseMethod("mutate_") :
no applicable method for 'mutate_' applied to an object of class "function"
In addition: Warning message:
我们可以用 cumsum
创建一个分组列,然后创建 'B' 列
library(dplyr)
df %>%
group_by(grp = cumsum(A == 1)) %>%
mutate(B = cumsum(A != 0)) %>%
ungroup %>%
select(-grp) %>%
as.data.frame
-输出
A Bdesired B
1 1 1 1
2 0 1 1
3 2 2 2
4 0 2 2
5 2 3 3
6 0 3 3
7 2 4 4
8 0 4 4
9 2 5 5
10 0 5 5
11 2 6 6
12 0 6 6
13 1 1 1
14 0 1 1
15 1 1 1
16 0 1 1
17 2 2 2
18 0 2 2
19 2 3 3
20 0 3 3
21 1 1 1
22 0 1 1
23 2 2 2
24 0 2 2
关于你原来的问题,我得到以下信息:
library(tidyverse)
library(lubridate)
df$date <-dmy(df$date)
df <- df %>%
arrange(id, date) %>%
group_by(id) %>%
mutate(daysbetween = replace_na(date - lag(date),0),
ind = 1,
NewA= case_when (daysbetween < 7 ~ 0, daysbetween > 7 ~ 1),
NewB= case_when (daysbetween < 85 ~ 0, daysbetween > 85 ~ 1),
A = case_when (1 + cumsum(ind*NewA) <= 6 ~ 1 + cumsum(ind*NewA),
1 + cumsum(ind*NewA) > 6 ~ 1 + cumsum(ind*NewA) - 6),
B = 1 + cumsum(ind*NewB))%>%
select(id, date, A, B)
它只有在 A 的重置为 6 时才有效。我按照上面的建议使用了 cumsum()
。
关于如何使用列 A 的值创建新列 B 的任何想法, 在使用新创建的列 B 上方行的值时?
B的值应该对应于:
A0 = value of the row above.
A1 = 1.
A2 = value of the row above + 1.
当前数据框 + 期望结果
Dataframe Desired outcome
A A B
1 1 1
0 0 1
2 2 2
0 0 2
2 2 3
0 0 3
2 2 4
0 0 4
2 2 5
0 0 5
2 2 6
0 0 6
1 1 1
0 0 1
1 1 1
0 0 1
2 2 2
0 0 2
2 2 3
0 0 3
1 1 1
0 0 1
2 2 2
0 0 2
Data Frame
A <- c(1,0,2,0,2,0,2,0,2,0,2,0,1,0,1,0,2,0,2,0,1,0,2,0)
Bdesiredoutcome <- c(1,1,2,2,3,3,4,4,5,5,6,6,1,1,1,1,2,2,3,3,1,1,2,2)
df = data.frame(A,Bdesiredoutcome)
我试过使用 dpylr, mutate(), case_when()
和 lag()
但 运行 一直出错。由于使用了 lag()
函数。使用 lag(A)
时无法生成所需的结果。
关于如何解决这个问题有什么想法吗?
df <- df %>%
mutate(B = case_when((A == 0) ~ lag(B),
(A == 1) ~ 1,
(A == 2) ~ (lag(B)+1)
))
Error in UseMethod("mutate_") :
no applicable method for 'mutate_' applied to an object of class "function"
In addition: Warning message:
我们可以用 cumsum
创建一个分组列,然后创建 'B' 列
library(dplyr)
df %>%
group_by(grp = cumsum(A == 1)) %>%
mutate(B = cumsum(A != 0)) %>%
ungroup %>%
select(-grp) %>%
as.data.frame
-输出
A Bdesired B
1 1 1 1
2 0 1 1
3 2 2 2
4 0 2 2
5 2 3 3
6 0 3 3
7 2 4 4
8 0 4 4
9 2 5 5
10 0 5 5
11 2 6 6
12 0 6 6
13 1 1 1
14 0 1 1
15 1 1 1
16 0 1 1
17 2 2 2
18 0 2 2
19 2 3 3
20 0 3 3
21 1 1 1
22 0 1 1
23 2 2 2
24 0 2 2
关于你原来的问题,我得到以下信息:
library(tidyverse)
library(lubridate)
df$date <-dmy(df$date)
df <- df %>%
arrange(id, date) %>%
group_by(id) %>%
mutate(daysbetween = replace_na(date - lag(date),0),
ind = 1,
NewA= case_when (daysbetween < 7 ~ 0, daysbetween > 7 ~ 1),
NewB= case_when (daysbetween < 85 ~ 0, daysbetween > 85 ~ 1),
A = case_when (1 + cumsum(ind*NewA) <= 6 ~ 1 + cumsum(ind*NewA),
1 + cumsum(ind*NewA) > 6 ~ 1 + cumsum(ind*NewA) - 6),
B = 1 + cumsum(ind*NewB))%>%
select(id, date, A, B)
它只有在 A 的重置为 6 时才有效。我按照上面的建议使用了 cumsum()
。