在尝试将值与 R 中的另一个对象匹配的函数上使用 lapply
Using lapply on a function that attempts to match values to another object in R
我希望有人能帮助我。我在 output
中有一份 ID 和血液结果列表
dput(输出)
list(BNP = structure(list(record_id = structure(c("113-1", "113-10",
"113-11", "113-12", "113-13", "113-14"), label = c(record_id = "Record ID"), class = c("labelled",
"character")), BNP = c(67.8, 1873.3, 784.5, 82.3, 156.5, 116.4
)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
)), proBNP = structure(list(record_id = structure(c("103-13",
"103-16", "103-17", "103-20", "104-1", "106-12", "106-13", "106-14",
"106-2", "106-3", "106-4", "106-6", "106-7", "106-8", "112-1"
), label = c(record_id = "Record ID"), class = c("labelled",
"character")), proBNP = c(66, 1865, 6143, 275, 1240, 992, 1116,
8469, 165, 2486, 85, 100, 9231, 8561, 815)), row.names = c(NA,
-15L), class = c("tbl_df", "tbl", "data.frame")), troponin = structure(list(
record_id = structure(c("101-2", "103-13", "103-14", "103-16",
"103-17", "104-1", "104-2", "106-12", "106-13", "106-14",
"106-5", "106-6", "106-7", "106-8", "112-1", "113-1", "113-10",
"113-11", "113-12", "113-13", "113-14"), label = c(record_id = "Record ID"), class = c("labelled",
"character")), troponin = c(29900, 5, 3, 5, 422, 11300, 10,
5.1, 1.5, 159.7, 15.9, 1863, 92.8, 706.5, 643, 50, 110, 60,
30, 130, 10)), row.names = c(NA, -21L), class = c("tbl_df",
"tbl", "data.frame")), CRP = structure(list(record_id = structure(c("101-2",
"103-13", "103-14", "103-15", "103-16", "103-17", "103-19", "103-20",
"104-1", "104-2", "106-1", "106-11", "106-12", "106-13", "106-14",
"106-2", "106-3", "106-4", "106-5", "106-6", "106-7", "106-8",
"112-1", "113-1", "113-10", "113-11", "113-12", "113-13", "113-14"
), label = c(record_id = "Record ID"), class = c("labelled",
"character")), CRP = c(54.8, 78, 229, 166, 77, 345, 25, 124,
225.4, 156.2, 141.11, 110, 96.87, 126, 238.97, 6.19, 135.7, 135,
138.7, 84.7, 242, 299, 41, 114.7, 156.2, 112.3, 394.3, 179.5,
93)), row.names = c(NA, -29L), class = c("tbl_df", "tbl", "data.frame"
)), ferritin = structure(list(record_id = structure(c("101-2",
"103-13", "103-14", "103-15", "103-16", "103-17", "103-20", "104-1",
"106-11", "106-12", "106-13", "106-14", "106-2", "106-3", "106-4",
"106-5", "106-6", "106-8", "112-1", "113-10", "113-11", "113-12",
"113-13", "113-14"), label = c(record_id = "Record ID"), class = c("labelled",
"character")), ferritin = c(253.97, 314, 438, 199, 390, 1342,
128, 462.6, 125.8, 428, 237, 302.23, 1651, 133.6, 167, 1746,
343, 1145.96, 697.76, 690.2, 395.4, 1492.2, 275.4, 254.2)), row.names = c(NA,
-24L), class = c("tbl_df", "tbl", "data.frame")))
我在一个名为 identity
的单独 df 中也有一个 ID 列表
dput(身份)
structure(list(identity = c("101-2", "103-13", "103-14", "103-15",
"103-16", "103-17", "103-19", "103-20", "104-1", "104-2", "106-1",
"106-11", "106-12", "106-13", "106-14", "106-2", "106-3", "106-4",
"106-5", "106-6", "106-7", "106-8", "112-1", "113-1", "113-10",
"113-11", "113-12", "113-13", "113-14")), row.names = c(NA, -29L
), class = "data.frame")
我有一个血液测试向量,我试图通过它应用我的函数 key
:
c("BNP", "proBNP", "troponin", "CRP", "ferritin")
key
中的每个变量对应于其中一项血液测试。我正在尝试遍历 output
列表中的每个血液测试,然后将这些结果和相应的 ID 与 identity
数据框进行匹配,为每个血液测试创建一个新列。我认为问题可能是循环列表然后尝试输出到对象?我可能过于复杂了!
我失败的尝试:
#match output values with IDs
match_IDs <- function(x, y) {
#output[[ c(deparse(substitute(x)), "record_id") ]] gives record ID for blood test
#output[[ c(deparse(substitute(x)), deparse(substitute(x))) ]] gives blood value for blood test
y$new <- output[[ c(deparse(substitute(x)), deparse(substitute(x))) ]][match(y$`df2$record_id`, output[[ c(deparse(substitute(x)), "record_id") ]] )]
return(y$new)
}
lapply(key, identity, function(x) do.call("match_IDs", list(as.name(x)))) -> output2
在 match_IDs
函数之外,匹配代码有效,但我想自动执行该过程而不是硬编码。这就是我的目标:
structure(list(identity = c("101-2", "103-13", "103-14", "103-15",
"103-16", "103-17", "103-19", "103-20", "104-1", "104-2", "106-1",
"106-11", "106-12", "106-13", "106-14", "106-2", "106-3", "106-4",
"106-5", "106-6", "106-7", "106-8", "112-1", "113-1", "113-10",
"113-11", "113-12", "113-13", "113-14"), baseline_CRP = c(54.8,
78, 229, 166, 77, 345, 25, 124, 225.4, 156.2, 141.11, 110, 96.87,
126, 238.97, 6.19, 135.7, 135, 138.7, 84.7, 242, 299, 41, 114.7,
156.2, 112.3, 394.3, 179.5, 93), baseline_bnp = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 67.8, 1873.3, 784.5, 82.3, 156.5, 116.4), baseline_ferritin = c(253.97,
314, 438, 199, 390, 1342, NA, 128, 462.6, NA, NA, 125.8, 428,
237, 302.23, 1651, 133.6, 167, 1746, 343, NA, 1145.96, 697.76,
NA, 690.2, 395.4, 1492.2, 275.4, 254.2), baseline_trop = c(29900,
5, 3, NA, 5, 422, NA, NA, 11300, 10, NA, NA, 5.1, 1.5, 159.7,
NA, NA, NA, 15.9, 1863, 92.8, 706.5, 643, 50, 110, 60, 30, 130,
10), baseline_proBNP = c(NA, 66, NA, NA, 1865, 6143, NA, 275,
1240, NA, NA, NA, 992, 1116, 8469, 165, 2486, 85, NA, 100, 9231,
8561, 815, NA, NA, NA, NA, NA, NA)), row.names = c(NA, -29L), class = c("tbl_df",
"tbl", "data.frame"))
如果有人有什么建议,我很想学习!
如果您修复 output
,使其具有与 identity
相同的列名和类型,我们就可以进行一系列连接。
library(dplyr)
library(purrr)
# first, make the list fit the target
output_fixed <- map(
output,
~ .x %>% mutate(identity = as.character(record_id)) %>% select(-record_id)
)
# then, join repeatedly until at end of the list
output2 <- reduce(output_fixed, left_join, .init = identity, by = 'identity')
head(output2)
identity BNP proBNP troponin CRP ferritin
1 101-2 NA NA 29900 54.8 253.97
2 103-13 NA 66 5 78.0 314.00
3 103-14 NA NA 3 229.0 438.00
4 103-15 NA NA NA 166.0 199.00
5 103-16 NA 1865 5 77.0 390.00
6 103-17 NA 6143 422 345.0 1342.00
我希望有人能帮助我。我在 output
dput(输出)
list(BNP = structure(list(record_id = structure(c("113-1", "113-10",
"113-11", "113-12", "113-13", "113-14"), label = c(record_id = "Record ID"), class = c("labelled",
"character")), BNP = c(67.8, 1873.3, 784.5, 82.3, 156.5, 116.4
)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
)), proBNP = structure(list(record_id = structure(c("103-13",
"103-16", "103-17", "103-20", "104-1", "106-12", "106-13", "106-14",
"106-2", "106-3", "106-4", "106-6", "106-7", "106-8", "112-1"
), label = c(record_id = "Record ID"), class = c("labelled",
"character")), proBNP = c(66, 1865, 6143, 275, 1240, 992, 1116,
8469, 165, 2486, 85, 100, 9231, 8561, 815)), row.names = c(NA,
-15L), class = c("tbl_df", "tbl", "data.frame")), troponin = structure(list(
record_id = structure(c("101-2", "103-13", "103-14", "103-16",
"103-17", "104-1", "104-2", "106-12", "106-13", "106-14",
"106-5", "106-6", "106-7", "106-8", "112-1", "113-1", "113-10",
"113-11", "113-12", "113-13", "113-14"), label = c(record_id = "Record ID"), class = c("labelled",
"character")), troponin = c(29900, 5, 3, 5, 422, 11300, 10,
5.1, 1.5, 159.7, 15.9, 1863, 92.8, 706.5, 643, 50, 110, 60,
30, 130, 10)), row.names = c(NA, -21L), class = c("tbl_df",
"tbl", "data.frame")), CRP = structure(list(record_id = structure(c("101-2",
"103-13", "103-14", "103-15", "103-16", "103-17", "103-19", "103-20",
"104-1", "104-2", "106-1", "106-11", "106-12", "106-13", "106-14",
"106-2", "106-3", "106-4", "106-5", "106-6", "106-7", "106-8",
"112-1", "113-1", "113-10", "113-11", "113-12", "113-13", "113-14"
), label = c(record_id = "Record ID"), class = c("labelled",
"character")), CRP = c(54.8, 78, 229, 166, 77, 345, 25, 124,
225.4, 156.2, 141.11, 110, 96.87, 126, 238.97, 6.19, 135.7, 135,
138.7, 84.7, 242, 299, 41, 114.7, 156.2, 112.3, 394.3, 179.5,
93)), row.names = c(NA, -29L), class = c("tbl_df", "tbl", "data.frame"
)), ferritin = structure(list(record_id = structure(c("101-2",
"103-13", "103-14", "103-15", "103-16", "103-17", "103-20", "104-1",
"106-11", "106-12", "106-13", "106-14", "106-2", "106-3", "106-4",
"106-5", "106-6", "106-8", "112-1", "113-10", "113-11", "113-12",
"113-13", "113-14"), label = c(record_id = "Record ID"), class = c("labelled",
"character")), ferritin = c(253.97, 314, 438, 199, 390, 1342,
128, 462.6, 125.8, 428, 237, 302.23, 1651, 133.6, 167, 1746,
343, 1145.96, 697.76, 690.2, 395.4, 1492.2, 275.4, 254.2)), row.names = c(NA,
-24L), class = c("tbl_df", "tbl", "data.frame")))
我在一个名为 identity
dput(身份)
structure(list(identity = c("101-2", "103-13", "103-14", "103-15",
"103-16", "103-17", "103-19", "103-20", "104-1", "104-2", "106-1",
"106-11", "106-12", "106-13", "106-14", "106-2", "106-3", "106-4",
"106-5", "106-6", "106-7", "106-8", "112-1", "113-1", "113-10",
"113-11", "113-12", "113-13", "113-14")), row.names = c(NA, -29L
), class = "data.frame")
我有一个血液测试向量,我试图通过它应用我的函数 key
:
c("BNP", "proBNP", "troponin", "CRP", "ferritin")
key
中的每个变量对应于其中一项血液测试。我正在尝试遍历 output
列表中的每个血液测试,然后将这些结果和相应的 ID 与 identity
数据框进行匹配,为每个血液测试创建一个新列。我认为问题可能是循环列表然后尝试输出到对象?我可能过于复杂了!
我失败的尝试:
#match output values with IDs
match_IDs <- function(x, y) {
#output[[ c(deparse(substitute(x)), "record_id") ]] gives record ID for blood test
#output[[ c(deparse(substitute(x)), deparse(substitute(x))) ]] gives blood value for blood test
y$new <- output[[ c(deparse(substitute(x)), deparse(substitute(x))) ]][match(y$`df2$record_id`, output[[ c(deparse(substitute(x)), "record_id") ]] )]
return(y$new)
}
lapply(key, identity, function(x) do.call("match_IDs", list(as.name(x)))) -> output2
在 match_IDs
函数之外,匹配代码有效,但我想自动执行该过程而不是硬编码。这就是我的目标:
structure(list(identity = c("101-2", "103-13", "103-14", "103-15",
"103-16", "103-17", "103-19", "103-20", "104-1", "104-2", "106-1",
"106-11", "106-12", "106-13", "106-14", "106-2", "106-3", "106-4",
"106-5", "106-6", "106-7", "106-8", "112-1", "113-1", "113-10",
"113-11", "113-12", "113-13", "113-14"), baseline_CRP = c(54.8,
78, 229, 166, 77, 345, 25, 124, 225.4, 156.2, 141.11, 110, 96.87,
126, 238.97, 6.19, 135.7, 135, 138.7, 84.7, 242, 299, 41, 114.7,
156.2, 112.3, 394.3, 179.5, 93), baseline_bnp = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 67.8, 1873.3, 784.5, 82.3, 156.5, 116.4), baseline_ferritin = c(253.97,
314, 438, 199, 390, 1342, NA, 128, 462.6, NA, NA, 125.8, 428,
237, 302.23, 1651, 133.6, 167, 1746, 343, NA, 1145.96, 697.76,
NA, 690.2, 395.4, 1492.2, 275.4, 254.2), baseline_trop = c(29900,
5, 3, NA, 5, 422, NA, NA, 11300, 10, NA, NA, 5.1, 1.5, 159.7,
NA, NA, NA, 15.9, 1863, 92.8, 706.5, 643, 50, 110, 60, 30, 130,
10), baseline_proBNP = c(NA, 66, NA, NA, 1865, 6143, NA, 275,
1240, NA, NA, NA, 992, 1116, 8469, 165, 2486, 85, NA, 100, 9231,
8561, 815, NA, NA, NA, NA, NA, NA)), row.names = c(NA, -29L), class = c("tbl_df",
"tbl", "data.frame"))
如果有人有什么建议,我很想学习!
如果您修复 output
,使其具有与 identity
相同的列名和类型,我们就可以进行一系列连接。
library(dplyr)
library(purrr)
# first, make the list fit the target
output_fixed <- map(
output,
~ .x %>% mutate(identity = as.character(record_id)) %>% select(-record_id)
)
# then, join repeatedly until at end of the list
output2 <- reduce(output_fixed, left_join, .init = identity, by = 'identity')
head(output2)
identity BNP proBNP troponin CRP ferritin 1 101-2 NA NA 29900 54.8 253.97 2 103-13 NA 66 5 78.0 314.00 3 103-14 NA NA 3 229.0 438.00 4 103-15 NA NA NA 166.0 199.00 5 103-16 NA 1865 5 77.0 390.00 6 103-17 NA 6143 422 345.0 1342.00