如何延长轴心并将变量名与辅助 table 匹配?
How pivot longer and match the variable name with a secondary table?
我正在为跨越这两个表的逻辑而苦苦挣扎:
我的数据框是:
df6
每列包含一个 id 和一个数值。 v1
v2
和 v3
将始终只有一个非零值,表示该变量的 yummyness
值。
id v1 v2 v3
1 0 0 3
2 2 0 0
3 0 1 0
df_yummy
包含 v1
v2
和 v3
的等价名称以及“美味”值。
VarId VarName Yummyness
v1 Apple 2
v2 Lemon 1
v3 Peach 3
我想要的结果应该是
id Fruit Yummyness
1 Peach 3
2 Apple 2
3 Lemon 1
编辑:错别字更正
这是 tidyverse 中针对您的问题的一种解决方案(确保 v1、v2 和 v3 每行仅显示一个不同于零的值非常重要,否则您将得到重复或丢失数据):
#dummy data
df1 <- data.table::fread("id v1 v2 v3
1 0 0 3
2 2 0 0
3 0 1 0")
df2 <- data.table::fread("VarId VarName Yummyness
v1 Apple 2
v2 Lemon 1
v3 Peach 3")
library(tidyverse)
df1 %>%
tidyr::pivot_longer(-id, names_to = "VarId", values_to = "vals") %>%
dplyr::filter(vals != 0) %>%
dplyr::right_join(df2, by = "VarId") %>%
dplyr::select(id, Fruit = VarName, Yummyness)
# A tibble: 3 x 3
id Fruit Yummyness
<int> <chr> <int>
1 1 Peach 3
2 2 Apple 2
3 3 Lemon 1
不确定这是否是您要查找的内容:
library(dplyr)
df6 %>%
mutate(Yummyness = rowSums(across(v1:v3)), .keep = "unused") %>%
left_join(df_yummy, by = "Yummyness") %>%
select(id, Fruit = VarName, Yummyness)
returns
# A tibble: 3 x 3
id Fruit Yummyness
<dbl> <chr> <dbl>
1 1 Peach 3
2 2 Apple 2
3 3 Lemon 1
数据
df6 <- structure(list(id = c(1, 2, 3), v1 = c(0, 2, 0), v2 = c(0, 0,
1), v3 = c(3, 0, 0)), class = c("spec_tbl_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -3L), spec = structure(list(
cols = list(id = structure(list(), class = c("collector_double",
"collector")), v1 = structure(list(), class = c("collector_double",
"collector")), v2 = structure(list(), class = c("collector_double",
"collector")), v3 = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1L), class = "col_spec"))
df_yummy <- structure(list(VarId = c("v1", "v2", "v3"), VarName = c("Apple",
"Lemon", "Peach"), Yummyness = c(2, 1, 3)), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -3L), spec = structure(list(
cols = list(VarId = structure(list(), class = c("collector_character",
"collector")), VarName = structure(list(), class = c("collector_character",
"collector")), Yummyness = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1L), class = "col_spec"))
这是一个使用基础 R 的简单解决方案。
# Dummy data
df6 <- data.frame(
id = c(1, 2, 3),
v1 = c(0, 2, 0),
v2 = c(0, 0, 1),
v3 = c(3, 0, 0)
)
df_yummy <- data.frame(
VarId = c("v1", "v2", "v3"),
VarName = c("Apple", "Lemon", "Peach"),
Yummyness = c(2, 1, 3)
)
df6$VarId <- names(df6[-1])[apply(X = df6[-1] > 0, MARGIN = 1, FUN = which)]
df_result <- merge(df6, df_yummy)[, c("id", "VarName", "Yummyness")]
names(df_result) <- c("id", "Fruit", "Yummyness")
> df_result[order(df_result$id), ]
id Fruit Yummyness
3 1 Peach 3
1 2 Apple 2
2 3 Lemon 1
此解决方案的关键在于 apply(X = df6[-1] > 0, MARGIN = 1, FUN = which)
。这样做是获取您的 df6
并删除第一列([-1]
)部分,然后查找哪些记录大于零。然后,我们使用 apply
函数将 which
函数应用于行(MARGIN = 1)部分。这意味着对于每一行,我们都返回非零索引。我们将其传递给 names()
以获得每个名称的正确名称,然后剩下的只是一些连接。
这是一个使用 data.table
包的稍微更紧凑的版本。
library(data.table)
# Dummy data
df6 <- data.table(
id = c(1, 2, 3),
v1 = c(0, 2, 0),
v2 = c(0, 0, 1),
v3 = c(3, 0, 0)
)
df_yummy <- data.table(
VarId = c("v1", "v2", "v3"),
VarName = c("Apple", "Lemon", "Peach"),
Yummyness = c(2, 1, 3)
)
df6[, VarId := names(df6)[-1][which(.SD > 0)], by = id]
df_result <- merge(df6, df_yummy)[order(id), list(id, Fruit = VarName, Yummyness)]
这在很大程度上是相同的原理;我们通过 id
在 data.table
特定的 .SD
上使用 which
函数,然后使用它来选择正确的名称。由于 DT 语法,这最终会稍微紧凑一些,但本质上并没有什么不同。
一个data.table
选项
setorder(
melt(setDT(df6), id.var = "id")[
value != 0
][setDT(df_yummy),
on = ("variable == VarId")
],
by = "id"
)[, .(id, VarName, Yummyness)]
给予
id VarName Yummyness
1: 1 Peach 3
2: 2 Apple 2
3: 3 Lemon 1
我正在为跨越这两个表的逻辑而苦苦挣扎:
我的数据框是:
df6
每列包含一个 id 和一个数值。 v1
v2
和 v3
将始终只有一个非零值,表示该变量的 yummyness
值。
id v1 v2 v3
1 0 0 3
2 2 0 0
3 0 1 0
df_yummy
包含 v1
v2
和 v3
的等价名称以及“美味”值。
VarId VarName Yummyness
v1 Apple 2
v2 Lemon 1
v3 Peach 3
我想要的结果应该是
id Fruit Yummyness
1 Peach 3
2 Apple 2
3 Lemon 1
编辑:错别字更正
这是 tidyverse 中针对您的问题的一种解决方案(确保 v1、v2 和 v3 每行仅显示一个不同于零的值非常重要,否则您将得到重复或丢失数据):
#dummy data
df1 <- data.table::fread("id v1 v2 v3
1 0 0 3
2 2 0 0
3 0 1 0")
df2 <- data.table::fread("VarId VarName Yummyness
v1 Apple 2
v2 Lemon 1
v3 Peach 3")
library(tidyverse)
df1 %>%
tidyr::pivot_longer(-id, names_to = "VarId", values_to = "vals") %>%
dplyr::filter(vals != 0) %>%
dplyr::right_join(df2, by = "VarId") %>%
dplyr::select(id, Fruit = VarName, Yummyness)
# A tibble: 3 x 3
id Fruit Yummyness
<int> <chr> <int>
1 1 Peach 3
2 2 Apple 2
3 3 Lemon 1
不确定这是否是您要查找的内容:
library(dplyr)
df6 %>%
mutate(Yummyness = rowSums(across(v1:v3)), .keep = "unused") %>%
left_join(df_yummy, by = "Yummyness") %>%
select(id, Fruit = VarName, Yummyness)
returns
# A tibble: 3 x 3
id Fruit Yummyness
<dbl> <chr> <dbl>
1 1 Peach 3
2 2 Apple 2
3 3 Lemon 1
数据
df6 <- structure(list(id = c(1, 2, 3), v1 = c(0, 2, 0), v2 = c(0, 0,
1), v3 = c(3, 0, 0)), class = c("spec_tbl_df", "tbl_df", "tbl",
"data.frame"), row.names = c(NA, -3L), spec = structure(list(
cols = list(id = structure(list(), class = c("collector_double",
"collector")), v1 = structure(list(), class = c("collector_double",
"collector")), v2 = structure(list(), class = c("collector_double",
"collector")), v3 = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1L), class = "col_spec"))
df_yummy <- structure(list(VarId = c("v1", "v2", "v3"), VarName = c("Apple",
"Lemon", "Peach"), Yummyness = c(2, 1, 3)), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -3L), spec = structure(list(
cols = list(VarId = structure(list(), class = c("collector_character",
"collector")), VarName = structure(list(), class = c("collector_character",
"collector")), Yummyness = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1L), class = "col_spec"))
这是一个使用基础 R 的简单解决方案。
# Dummy data
df6 <- data.frame(
id = c(1, 2, 3),
v1 = c(0, 2, 0),
v2 = c(0, 0, 1),
v3 = c(3, 0, 0)
)
df_yummy <- data.frame(
VarId = c("v1", "v2", "v3"),
VarName = c("Apple", "Lemon", "Peach"),
Yummyness = c(2, 1, 3)
)
df6$VarId <- names(df6[-1])[apply(X = df6[-1] > 0, MARGIN = 1, FUN = which)]
df_result <- merge(df6, df_yummy)[, c("id", "VarName", "Yummyness")]
names(df_result) <- c("id", "Fruit", "Yummyness")
> df_result[order(df_result$id), ]
id Fruit Yummyness
3 1 Peach 3
1 2 Apple 2
2 3 Lemon 1
此解决方案的关键在于 apply(X = df6[-1] > 0, MARGIN = 1, FUN = which)
。这样做是获取您的 df6
并删除第一列([-1]
)部分,然后查找哪些记录大于零。然后,我们使用 apply
函数将 which
函数应用于行(MARGIN = 1)部分。这意味着对于每一行,我们都返回非零索引。我们将其传递给 names()
以获得每个名称的正确名称,然后剩下的只是一些连接。
这是一个使用 data.table
包的稍微更紧凑的版本。
library(data.table)
# Dummy data
df6 <- data.table(
id = c(1, 2, 3),
v1 = c(0, 2, 0),
v2 = c(0, 0, 1),
v3 = c(3, 0, 0)
)
df_yummy <- data.table(
VarId = c("v1", "v2", "v3"),
VarName = c("Apple", "Lemon", "Peach"),
Yummyness = c(2, 1, 3)
)
df6[, VarId := names(df6)[-1][which(.SD > 0)], by = id]
df_result <- merge(df6, df_yummy)[order(id), list(id, Fruit = VarName, Yummyness)]
这在很大程度上是相同的原理;我们通过 id
在 data.table
特定的 .SD
上使用 which
函数,然后使用它来选择正确的名称。由于 DT 语法,这最终会稍微紧凑一些,但本质上并没有什么不同。
一个data.table
选项
setorder(
melt(setDT(df6), id.var = "id")[
value != 0
][setDT(df_yummy),
on = ("variable == VarId")
],
by = "id"
)[, .(id, VarName, Yummyness)]
给予
id VarName Yummyness
1: 1 Peach 3
2: 2 Apple 2
3: 3 Lemon 1