R:使用字符串匹配将特定列的行除以 df2 的列
R: divide rows of specific columns by column of df2 with string-match
我有一个关于两个数据帧的问题。
我有这样的数据:
df1 <- data.frame(ID = c("a","b","c","d","e"),
var1 = c(10,15,12,14,NA))
df2 <- data.frame(Name = c("Nr1","Nr2","Nr100","Nr76"),
a_xyz = c(100, 120,130,NA),
b_xyz = c(150,NA,80,90),
c_xyz = c(120,120,NA,140),
d_xyz = c(140,130,150,180),
e_xyz = c(130,120,100,150))
我想(也许是一个循环)将 df2 中列“a_xyz”的每个值除以 df1“a”的值。
我想对 df2 中的每个变量执行此操作,因此我必须查找字符串匹配项以及该值是否预先存在。遗憾的是,我在 df1 和 df2 中没有相同的名称(“a”和“a_xyz”),这使得这项任务比我希望的更复杂。
生成的 df3 最后应该是这样的:
df3 <- data.frame(Name = c("Nr1","Nr2","Nr100","Nr76"),
a_xyz = c(10, 12, 13, NA),
b_xyz = c(10,NA,5.33,6),
c_xyz = c(10,10,NA,1.667),
d_xyz = c(10,10.83,10.71,12.85),
e_xyz = c(130,120,100,150))
你们能帮帮我吗?也许我想多了,这个问题有一个简单的解决方案。
非常感谢您!
试试这个。您的某些最终值不正确,因此如果有其他计算定义,请告诉我。可以进行重塑和合并以获得与所需输出类似的输出:
library(dplyr)
library(tidyr)
#Code
new <- df2 %>% pivot_longer(-Name) %>%
mutate(val=name) %>%
separate(val,c('ID','V1'),sep="_") %>%
left_join(df1) %>%
mutate(Res=ifelse(!is.na(var1),value/var1,value)) %>%
select(c(Name,name,Res)) %>%
pivot_wider(names_from = name,values_from=Res)
输出:
# A tibble: 4 x 6
Name a_xyz b_xyz c_xyz d_xyz e_xyz
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Nr1 10 10 10 10 130
2 Nr2 12 NA 10 9.29 120
3 Nr100 13 5.33 NA 10.7 100
4 Nr76 NA 6 11.7 12.9 150
另一个解决方案是转向 df1
,我认为这会导致一个相当干净的解决方案。
library(dplyr, warn.conflicts = F)
library(tidyr)
df1 <- data.frame(ID = c("a","b","c","d","e"),
var1 = c(10,15,12,14,NA))
df2 <- data.frame(Name = c("Nr1","Nr2","Nr100","Nr76"),
a_xyz = c(100, 120,130,NA),
b_xyz = c(150,NA,80,90),
c_xyz = c(120,120,NA,140),
d_xyz = c(140,130,150,180),
e_xyz = c(130,120,100,150))
df1 <- df1 %>% pivot_wider(names_from = ID, values_from = var1)
df3 <- df2 %>%
mutate(a_xyz = a_xyz / df1$a,
b_xyz = b_xyz / df1$b,
c_xyz = c_xyz / df1$c,
d_xyz = d_xyz / df1$d,
e_xyz = e_xyz / df1$e)
df3
#> Name a_xyz b_xyz c_xyz d_xyz e_xyz
#> 1 Nr1 10 10.000000 10.00000 10.000000 NA
#> 2 Nr2 12 NA 10.00000 9.285714 NA
#> 3 Nr100 13 5.333333 NA 10.714286 NA
#> 4 Nr76 NA 6.000000 11.66667 12.857143 NA
我有一个关于两个数据帧的问题。
我有这样的数据:
df1 <- data.frame(ID = c("a","b","c","d","e"),
var1 = c(10,15,12,14,NA))
df2 <- data.frame(Name = c("Nr1","Nr2","Nr100","Nr76"),
a_xyz = c(100, 120,130,NA),
b_xyz = c(150,NA,80,90),
c_xyz = c(120,120,NA,140),
d_xyz = c(140,130,150,180),
e_xyz = c(130,120,100,150))
我想(也许是一个循环)将 df2 中列“a_xyz”的每个值除以 df1“a”的值。 我想对 df2 中的每个变量执行此操作,因此我必须查找字符串匹配项以及该值是否预先存在。遗憾的是,我在 df1 和 df2 中没有相同的名称(“a”和“a_xyz”),这使得这项任务比我希望的更复杂。
生成的 df3 最后应该是这样的:
df3 <- data.frame(Name = c("Nr1","Nr2","Nr100","Nr76"),
a_xyz = c(10, 12, 13, NA),
b_xyz = c(10,NA,5.33,6),
c_xyz = c(10,10,NA,1.667),
d_xyz = c(10,10.83,10.71,12.85),
e_xyz = c(130,120,100,150))
你们能帮帮我吗?也许我想多了,这个问题有一个简单的解决方案。
非常感谢您!
试试这个。您的某些最终值不正确,因此如果有其他计算定义,请告诉我。可以进行重塑和合并以获得与所需输出类似的输出:
library(dplyr)
library(tidyr)
#Code
new <- df2 %>% pivot_longer(-Name) %>%
mutate(val=name) %>%
separate(val,c('ID','V1'),sep="_") %>%
left_join(df1) %>%
mutate(Res=ifelse(!is.na(var1),value/var1,value)) %>%
select(c(Name,name,Res)) %>%
pivot_wider(names_from = name,values_from=Res)
输出:
# A tibble: 4 x 6
Name a_xyz b_xyz c_xyz d_xyz e_xyz
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Nr1 10 10 10 10 130
2 Nr2 12 NA 10 9.29 120
3 Nr100 13 5.33 NA 10.7 100
4 Nr76 NA 6 11.7 12.9 150
另一个解决方案是转向 df1
,我认为这会导致一个相当干净的解决方案。
library(dplyr, warn.conflicts = F)
library(tidyr)
df1 <- data.frame(ID = c("a","b","c","d","e"),
var1 = c(10,15,12,14,NA))
df2 <- data.frame(Name = c("Nr1","Nr2","Nr100","Nr76"),
a_xyz = c(100, 120,130,NA),
b_xyz = c(150,NA,80,90),
c_xyz = c(120,120,NA,140),
d_xyz = c(140,130,150,180),
e_xyz = c(130,120,100,150))
df1 <- df1 %>% pivot_wider(names_from = ID, values_from = var1)
df3 <- df2 %>%
mutate(a_xyz = a_xyz / df1$a,
b_xyz = b_xyz / df1$b,
c_xyz = c_xyz / df1$c,
d_xyz = d_xyz / df1$d,
e_xyz = e_xyz / df1$e)
df3
#> Name a_xyz b_xyz c_xyz d_xyz e_xyz
#> 1 Nr1 10 10.000000 10.00000 10.000000 NA
#> 2 Nr2 12 NA 10.00000 9.285714 NA
#> 3 Nr100 13 5.333333 NA 10.714286 NA
#> 4 Nr76 NA 6.000000 11.66667 12.857143 NA