将相同的值分散到多个列中
spread same value into multiple columns
这是我的起始df
test <- data.frame(ident = c(1,2,3,4,5),
cult1 = c("CER", "CER", "CER", "INF", "INF"),
cult2 = c("GEX", "GEX", "GEX", "GEX", "GEX"),
cult3 = c(NA, "ORG", "ORG", NA, NA),
prod = c(11050,2134,0,26250,2124),
value = c(19785,34132,9207,6987,4455),
weight = c(0,0,56,13,556))
我尝试为每个邪教传播价值、产品和权重,如果它不是 NA,到目前为止我已经完成了,但是使用 NA 命名列,我还需要复制列值,每个传播的产品权重,因为它稍后被删除。
test <- test %>% mutate (prod2 = prod,
prod3 = prod,
value2 = value,
value3 = value,
weight2 = weight,
weight3 = weight)
test <- test %>% mutate(CATEGORIE_TEMP = paste0("P_", cult1)) %>%
spread(key = CATEGORIE_TEMP, value = "prod")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("V_", cult1)) %>%
spread(key = CATEGORIE_TEMP, value = "value")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("W_", cult1)) %>%
spread(key = CATEGORIE_TEMP, value = "weight")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("P_", cult2)) %>%
spread(key = CATEGORIE_TEMP, value = "prod2")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("V_", cult2)) %>%
spread(key = CATEGORIE_TEMP, value = "value2")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("W_", cult2)) %>%
spread(key = CATEGORIE_TEMP, value = "weight2")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("P_", cult3)) %>%
spread(key = CATEGORIE_TEMP, value = "prod3")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("V_", cult3)) %>%
spread(key = CATEGORIE_TEMP, value = "value3")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("W_", cult3)) %>%
spread(key = CATEGORIE_TEMP, value = "weight3")
#removing the NA named columns
test <- test %>% select(-ends_with("NA"))
有没有更有效的方法来做到这一点?
我没有 pivot_wider 功能,因为我是 运行 一个旧的 R 版本。
预期结果:
ident cult1 cult2 cult3 P_CER P_INF V_CER V_INF W_CER W_INF P_GEX V_GEX W_GEX P_ORG V_ORG W_ORG
1 1 CER GEX <NA> 11050 NA 19785 NA 0 NA 11050 19785 0 NA NA NA
2 2 CER GEX ORG 2134 NA 34132 NA 0 NA 2134 34132 0 2134 34132 0
3 3 CER GEX ORG 0 NA 9207 NA 56 NA 0 9207 56 0 9207 56
4 4 INF GEX <NA> NA 26250 NA 6987 NA 13 26250 6987 13 NA NA NA
5 5 INF GEX <NA> NA 2124 NA 4455 NA 556 2124 4455 556 NA NA NA
我不确定你能不能称之为更高效,但至少它是一个代码块:
test2 <- test %>%
gather(cult1:cult3, key = "key", value = "cult") %>%
mutate(prod_cult = paste0("P_",cult),
value_cult = paste0("V_",cult),
weight_cult = paste0("W_",cult)) %>%
spread(key = key, value = cult) %>%
spread(key = prod_cult, value = prod) %>%
spread(key = value_cult, value = value) %>%
spread(key = weight_cult, value = weight) %>%
group_by(ident) %>%
summarise_all(~first(na.omit(.))) %>%
select(-ends_with("NA"))
这是我的起始df
test <- data.frame(ident = c(1,2,3,4,5),
cult1 = c("CER", "CER", "CER", "INF", "INF"),
cult2 = c("GEX", "GEX", "GEX", "GEX", "GEX"),
cult3 = c(NA, "ORG", "ORG", NA, NA),
prod = c(11050,2134,0,26250,2124),
value = c(19785,34132,9207,6987,4455),
weight = c(0,0,56,13,556))
我尝试为每个邪教传播价值、产品和权重,如果它不是 NA,到目前为止我已经完成了,但是使用 NA 命名列,我还需要复制列值,每个传播的产品权重,因为它稍后被删除。
test <- test %>% mutate (prod2 = prod,
prod3 = prod,
value2 = value,
value3 = value,
weight2 = weight,
weight3 = weight)
test <- test %>% mutate(CATEGORIE_TEMP = paste0("P_", cult1)) %>%
spread(key = CATEGORIE_TEMP, value = "prod")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("V_", cult1)) %>%
spread(key = CATEGORIE_TEMP, value = "value")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("W_", cult1)) %>%
spread(key = CATEGORIE_TEMP, value = "weight")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("P_", cult2)) %>%
spread(key = CATEGORIE_TEMP, value = "prod2")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("V_", cult2)) %>%
spread(key = CATEGORIE_TEMP, value = "value2")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("W_", cult2)) %>%
spread(key = CATEGORIE_TEMP, value = "weight2")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("P_", cult3)) %>%
spread(key = CATEGORIE_TEMP, value = "prod3")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("V_", cult3)) %>%
spread(key = CATEGORIE_TEMP, value = "value3")
test <- test %>% mutate(CATEGORIE_TEMP = paste0("W_", cult3)) %>%
spread(key = CATEGORIE_TEMP, value = "weight3")
#removing the NA named columns
test <- test %>% select(-ends_with("NA"))
有没有更有效的方法来做到这一点? 我没有 pivot_wider 功能,因为我是 运行 一个旧的 R 版本。
预期结果:
ident cult1 cult2 cult3 P_CER P_INF V_CER V_INF W_CER W_INF P_GEX V_GEX W_GEX P_ORG V_ORG W_ORG
1 1 CER GEX <NA> 11050 NA 19785 NA 0 NA 11050 19785 0 NA NA NA
2 2 CER GEX ORG 2134 NA 34132 NA 0 NA 2134 34132 0 2134 34132 0
3 3 CER GEX ORG 0 NA 9207 NA 56 NA 0 9207 56 0 9207 56
4 4 INF GEX <NA> NA 26250 NA 6987 NA 13 26250 6987 13 NA NA NA
5 5 INF GEX <NA> NA 2124 NA 4455 NA 556 2124 4455 556 NA NA NA
我不确定你能不能称之为更高效,但至少它是一个代码块:
test2 <- test %>%
gather(cult1:cult3, key = "key", value = "cult") %>%
mutate(prod_cult = paste0("P_",cult),
value_cult = paste0("V_",cult),
weight_cult = paste0("W_",cult)) %>%
spread(key = key, value = cult) %>%
spread(key = prod_cult, value = prod) %>%
spread(key = value_cult, value = value) %>%
spread(key = weight_cult, value = weight) %>%
group_by(ident) %>%
summarise_all(~first(na.omit(.))) %>%
select(-ends_with("NA"))