使用R从数据框的列中提取列表元素
Extract list element from column of dataframe using R
我有一个数据框,其中第三列是列表列表。我希望向现有数据框添加一列,该列仅包含列表的元素,其中 key = wb_id
和值字符串是我想要在新列中的内容。以前,我认为这始终是列表中的第 14 个元素。我看错了,好像是四处走动,但始终是key = wb_id
.
标识
因此在下面的示例中,将有一个新列 wb_id 添加到包含 2 行的 df:
> df[[3]][[1]][[14]][["value"]]
[1] "test1_secret_ID"
> df[[3]][[2]][[14]][["value"]]
[1] "test2_secret_ID"
这是数据框
df <- structure(list(email = list("test1@example.com", "test2@example.com"),
type = list("active", "active"), fields = list(list(list(
key = "name", value = "", type = "TEXT"), list(key = "email",
value = "test1@example.com", type = "TEXT"), list(key = "company",
value = "", type = "TEXT"), list(key = "country", value = "",
type = "TEXT"), list(key = "city", value = "", type = "TEXT"),
list(key = "phone", value = "", type = "TEXT"), list(
key = "state", value = "", type = "TEXT"), list(key = "zip",
value = "", type = "TEXT"), list(key = "last_name",
value = "", type = "TEXT"), list(key = "notify_pref",
value = "new_leader", type = "TEXT"), list(key = "your_message",
value = "", type = "TEXT"), list(key = "selected",
value = "Canadian Tire Bank,Bridgewater Bank,Motive Financial",
type = "TEXT"), list(key = "confirmed_email", value = "",
type = "TEXT"), list(key = "wb_id", value = "test1_secret_ID",
type = "TEXT")), list(list(key = "name", value = "",
type = "TEXT"), list(key = "email", value = "test2@example.com",
type = "TEXT"), list(key = "company", value = "", type = "TEXT"),
list(key = "country", value = "", type = "TEXT"), list(
key = "city", value = "", type = "TEXT"), list(key = "phone",
value = "", type = "TEXT"), list(key = "state", value = "",
type = "TEXT"), list(key = "zip", value = "", type = "TEXT"),
list(key = "last_name", value = "", type = "TEXT"), list(
key = "notify_pref", value = "new_leader", type = "TEXT"),
list(key = "your_message", value = "", type = "TEXT"),
list(key = "selected", value = "Canadian Tire Bank,Bridgewater Bank,Motive Financial",
type = "TEXT"), list(key = "confirmed_email", value = "",
type = "TEXT"), list(key = "wb_id", value = "test2_secret_ID",
type = "TEXT"))), date_created = list("2020-10-24 01:57:10",
"2020-10-24 01:57:23")), row.names = 1:2, class = "data.frame")
如果我们需要使用循环(R 4.1.0
),用sapply
循环第3列,从第14个元素
中提取'value'部分
df$new_column <- sapply(df[[3]], \(x) x[[14]]$value)
df$new_column
#[1] "test1_secret_ID" "test2_secret_ID"
如果我们想使用'key'
提取
sapply(df[[3]], function(x)
x[sapply(x, function(y) y$key == 'wb_id')][[1]]$value)
#[1] "test1_secret_ID" "test2_secret_ID"
或使用Filter
sapply(df[[3]], \(x) Filter(\(y) y$key == "wb_id", x)[[1]]$value)
#[1] "test1_secret_ID" "test2_secret_ID"
根据R news,
R now provides a shorthand notation for creating functions, e.g. (x) x + 1 is parsed as function(x) x + 1.
或对 R
的早期版本使用 function(x) x
df$new_column <- sapply(df[[3]], function(x) x[[14]]$value)
或使用 purrr
中的 map
library(dplyr)
library(purrr)
df <- df %>%
mutate(new_column = map_chr(fields, ~keep(.x, ~ .x$key == 'wb_id') %>%
pluck(1, 'value')))
我有一个数据框,其中第三列是列表列表。我希望向现有数据框添加一列,该列仅包含列表的元素,其中 key = wb_id
和值字符串是我想要在新列中的内容。以前,我认为这始终是列表中的第 14 个元素。我看错了,好像是四处走动,但始终是key = wb_id
.
因此在下面的示例中,将有一个新列 wb_id 添加到包含 2 行的 df:
> df[[3]][[1]][[14]][["value"]]
[1] "test1_secret_ID"
> df[[3]][[2]][[14]][["value"]]
[1] "test2_secret_ID"
这是数据框
df <- structure(list(email = list("test1@example.com", "test2@example.com"),
type = list("active", "active"), fields = list(list(list(
key = "name", value = "", type = "TEXT"), list(key = "email",
value = "test1@example.com", type = "TEXT"), list(key = "company",
value = "", type = "TEXT"), list(key = "country", value = "",
type = "TEXT"), list(key = "city", value = "", type = "TEXT"),
list(key = "phone", value = "", type = "TEXT"), list(
key = "state", value = "", type = "TEXT"), list(key = "zip",
value = "", type = "TEXT"), list(key = "last_name",
value = "", type = "TEXT"), list(key = "notify_pref",
value = "new_leader", type = "TEXT"), list(key = "your_message",
value = "", type = "TEXT"), list(key = "selected",
value = "Canadian Tire Bank,Bridgewater Bank,Motive Financial",
type = "TEXT"), list(key = "confirmed_email", value = "",
type = "TEXT"), list(key = "wb_id", value = "test1_secret_ID",
type = "TEXT")), list(list(key = "name", value = "",
type = "TEXT"), list(key = "email", value = "test2@example.com",
type = "TEXT"), list(key = "company", value = "", type = "TEXT"),
list(key = "country", value = "", type = "TEXT"), list(
key = "city", value = "", type = "TEXT"), list(key = "phone",
value = "", type = "TEXT"), list(key = "state", value = "",
type = "TEXT"), list(key = "zip", value = "", type = "TEXT"),
list(key = "last_name", value = "", type = "TEXT"), list(
key = "notify_pref", value = "new_leader", type = "TEXT"),
list(key = "your_message", value = "", type = "TEXT"),
list(key = "selected", value = "Canadian Tire Bank,Bridgewater Bank,Motive Financial",
type = "TEXT"), list(key = "confirmed_email", value = "",
type = "TEXT"), list(key = "wb_id", value = "test2_secret_ID",
type = "TEXT"))), date_created = list("2020-10-24 01:57:10",
"2020-10-24 01:57:23")), row.names = 1:2, class = "data.frame")
如果我们需要使用循环(R 4.1.0
),用sapply
循环第3列,从第14个元素
df$new_column <- sapply(df[[3]], \(x) x[[14]]$value)
df$new_column
#[1] "test1_secret_ID" "test2_secret_ID"
如果我们想使用'key'
提取sapply(df[[3]], function(x)
x[sapply(x, function(y) y$key == 'wb_id')][[1]]$value)
#[1] "test1_secret_ID" "test2_secret_ID"
或使用Filter
sapply(df[[3]], \(x) Filter(\(y) y$key == "wb_id", x)[[1]]$value)
#[1] "test1_secret_ID" "test2_secret_ID"
根据R news,
R now provides a shorthand notation for creating functions, e.g. (x) x + 1 is parsed as function(x) x + 1.
或对 R
function(x) x
df$new_column <- sapply(df[[3]], function(x) x[[14]]$value)
或使用 purrr
map
library(dplyr)
library(purrr)
df <- df %>%
mutate(new_column = map_chr(fields, ~keep(.x, ~ .x$key == 'wb_id') %>%
pluck(1, 'value')))