使用列表填充数据框的列
Fill column of dataframe using a list
我有以下数据框:
tibble(
people = rep(c("person1", "person2", "person3"), each = 4),
things = rep(c("thing1", "thing2", "thing3", "thing4"), times = 3),
vals = 0) %>%
group_by(people) %>%
mutate(order = seq_along(vals))
|people |things | vals| order|
|:-------|:------|----:|-----:|
|person1 |thing1 | 0| 1|
|person1 |thing2 | 0| 2|
|person1 |thing3 | 0| 3|
|person1 |thing4 | 0| 4|
|person2 |thing1 | 0| 1|
|person2 |thing2 | 0| 2|
|person2 |thing3 | 0| 3|
|person2 |thing4 | 0| 4|
|person3 |thing1 | 0| 1|
|person3 |thing2 | 0| 2|
|person3 |thing3 | 0| 3|
|person3 |thing4 | 0| 4|
我也有“人”做过的“事”的清单。
# What they did
list(
person1 = c(1, 3, 4),
person2 = c(2, 3),
person3 = NA
)
列表中的值是指原始数据帧的顺序列。所以 person1
做了第 1、3、4 件事,它们对应于顺序 order
列的数字 1、3、4。
我想根据每个人所做的事情,使用列表来填写vals
栏。
此示例的所需输出应如下所示:
|people |things | vals| order|
|:-------|:------|----:|-----:|
|person1 |thing1 | 1| 1|
|person1 |thing2 | 0| 2|
|person1 |thing3 | 1| 3|
|person1 |thing4 | 1| 4|
|person2 |thing1 | 0| 1|
|person2 |thing2 | 1| 2|
|person2 |thing3 | 1| 3|
|person2 |thing4 | 0| 4|
|person3 |thing1 | 0| 1|
|person3 |thing2 | 0| 2|
|person3 |thing3 | 0| 3|
|person3 |thing4 | 0| 4|
我也尝试过使用 base R case_when
但我似乎无法理解如何做到这一点。
这是一个选项。我们将 list
更改为包含 enframe
的两列数据集,然后对原始数据集执行 right_join
,按 'people' 分组,检查 'order' 值是否是 %in%
first
unlist
ed 'value' 列,使用 +
强制转换为二进制
library(dplyr)
library(tibble)
enframe(outlst, name = 'people') %>%
right_join(df1) %>%
group_by(people) %>%
mutate(vals = +(order %in% unlist(value[[1]]))) %>%
ungroup
-输出
# A tibble: 12 x 5
# people value things vals order
# <chr> <list> <chr> <int> <int>
# 1 person1 <dbl [3]> thing1 1 1
# 2 person1 <dbl [3]> thing2 0 2
# 3 person1 <dbl [3]> thing3 1 3
# 4 person1 <dbl [3]> thing4 1 4
# 5 person2 <dbl [2]> thing1 0 1
# 6 person2 <dbl [2]> thing2 1 2
# 7 person2 <dbl [2]> thing3 1 3
# 8 person2 <dbl [2]> thing4 0 4
# 9 person3 <lgl [1]> thing1 0 1
#10 person3 <lgl [1]> thing2 0 2
#11 person3 <lgl [1]> thing3 0 3
#12 person3 <lgl [1]> thing4 0 4
基本 R 选项
transform(
merge(
subset(df, select = -vals),
cbind(subset(stack(lst), !is.na(values)), vals = 1),
by.x = c("people", "order"),
by.y = c("ind", "values"),
all = TRUE
),
vals = replace(vals, is.na(vals), 0)
)[names(df)]
这给出了
people things vals order
1 person1 thing1 1 1
2 person1 thing2 0 2
3 person1 thing3 1 3
4 person1 thing4 1 4
5 person2 thing1 0 1
6 person2 thing2 1 2
7 person2 thing3 1 3
8 person2 thing4 0 4
9 person3 thing1 0 1
10 person3 thing2 0 2
11 person3 thing3 0 3
12 person3 thing4 0 4
数据
> dput(df)
structure(list(people = c("person1", "person1", "person1", "person1",
"person2", "person2", "person2", "person2", "person3", "person3",
"person3", "person3"), things = c("thing1", "thing2", "thing3",
"thing4", "thing1", "thing2", "thing3", "thing4", "thing1", "thing2",
"thing3", "thing4"), vals = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0), order = c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L)), row.names = c(NA,
-12L), groups = structure(list(people = c("person1", "person2",
"person3"), .rows = structure(list(1:4, 5:8, 9:12), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 3L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
> dput(lst)
list(person1 = c(1, 3, 4), person2 = c(2, 3), person3 = NA)
我有以下数据框:
tibble(
people = rep(c("person1", "person2", "person3"), each = 4),
things = rep(c("thing1", "thing2", "thing3", "thing4"), times = 3),
vals = 0) %>%
group_by(people) %>%
mutate(order = seq_along(vals))
|people |things | vals| order|
|:-------|:------|----:|-----:|
|person1 |thing1 | 0| 1|
|person1 |thing2 | 0| 2|
|person1 |thing3 | 0| 3|
|person1 |thing4 | 0| 4|
|person2 |thing1 | 0| 1|
|person2 |thing2 | 0| 2|
|person2 |thing3 | 0| 3|
|person2 |thing4 | 0| 4|
|person3 |thing1 | 0| 1|
|person3 |thing2 | 0| 2|
|person3 |thing3 | 0| 3|
|person3 |thing4 | 0| 4|
我也有“人”做过的“事”的清单。
# What they did
list(
person1 = c(1, 3, 4),
person2 = c(2, 3),
person3 = NA
)
列表中的值是指原始数据帧的顺序列。所以 person1
做了第 1、3、4 件事,它们对应于顺序 order
列的数字 1、3、4。
我想根据每个人所做的事情,使用列表来填写vals
栏。
此示例的所需输出应如下所示:
|people |things | vals| order|
|:-------|:------|----:|-----:|
|person1 |thing1 | 1| 1|
|person1 |thing2 | 0| 2|
|person1 |thing3 | 1| 3|
|person1 |thing4 | 1| 4|
|person2 |thing1 | 0| 1|
|person2 |thing2 | 1| 2|
|person2 |thing3 | 1| 3|
|person2 |thing4 | 0| 4|
|person3 |thing1 | 0| 1|
|person3 |thing2 | 0| 2|
|person3 |thing3 | 0| 3|
|person3 |thing4 | 0| 4|
我也尝试过使用 base R case_when
但我似乎无法理解如何做到这一点。
这是一个选项。我们将 list
更改为包含 enframe
的两列数据集,然后对原始数据集执行 right_join
,按 'people' 分组,检查 'order' 值是否是 %in%
first
unlist
ed 'value' 列,使用 +
library(dplyr)
library(tibble)
enframe(outlst, name = 'people') %>%
right_join(df1) %>%
group_by(people) %>%
mutate(vals = +(order %in% unlist(value[[1]]))) %>%
ungroup
-输出
# A tibble: 12 x 5
# people value things vals order
# <chr> <list> <chr> <int> <int>
# 1 person1 <dbl [3]> thing1 1 1
# 2 person1 <dbl [3]> thing2 0 2
# 3 person1 <dbl [3]> thing3 1 3
# 4 person1 <dbl [3]> thing4 1 4
# 5 person2 <dbl [2]> thing1 0 1
# 6 person2 <dbl [2]> thing2 1 2
# 7 person2 <dbl [2]> thing3 1 3
# 8 person2 <dbl [2]> thing4 0 4
# 9 person3 <lgl [1]> thing1 0 1
#10 person3 <lgl [1]> thing2 0 2
#11 person3 <lgl [1]> thing3 0 3
#12 person3 <lgl [1]> thing4 0 4
基本 R 选项
transform(
merge(
subset(df, select = -vals),
cbind(subset(stack(lst), !is.na(values)), vals = 1),
by.x = c("people", "order"),
by.y = c("ind", "values"),
all = TRUE
),
vals = replace(vals, is.na(vals), 0)
)[names(df)]
这给出了
people things vals order
1 person1 thing1 1 1
2 person1 thing2 0 2
3 person1 thing3 1 3
4 person1 thing4 1 4
5 person2 thing1 0 1
6 person2 thing2 1 2
7 person2 thing3 1 3
8 person2 thing4 0 4
9 person3 thing1 0 1
10 person3 thing2 0 2
11 person3 thing3 0 3
12 person3 thing4 0 4
数据
> dput(df)
structure(list(people = c("person1", "person1", "person1", "person1",
"person2", "person2", "person2", "person2", "person3", "person3",
"person3", "person3"), things = c("thing1", "thing2", "thing3",
"thing4", "thing1", "thing2", "thing3", "thing4", "thing1", "thing2",
"thing3", "thing4"), vals = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0), order = c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L)), row.names = c(NA,
-12L), groups = structure(list(people = c("person1", "person2",
"person3"), .rows = structure(list(1:4, 5:8, 9:12), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 3L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
> dput(lst)
list(person1 = c(1, 3, 4), person2 = c(2, 3), person3 = NA)