将每列中的值乘以 R 中另一个 data.frame 中的权重

Multiply values across each column by weight in another data.frame in R

我有两个 data.framesdfweights(代码如下)。

df 看起来像这样:

    id             a           b           d           EE           f
 1  this           0.23421153  -0.02324956 0.5457353   0.73068586   0.5642554
 2  is             0.28378641  0.36346241  1.0190496   1.97715019   -1.190179
 3  an             -0.04372133  0.07412557  -0.4510299  1.8442713   -0.5301328
 4  example        -0.18139887  0.10404329  -1.0871962  1.46920108  0.4685703
 5  data.frame     0.24235498   -0.1501064  -1.1183967  0.22884407  0.4272259
 6  for             -0.72718239 0.16337997  1.2635683   0.44206945  0.7303647
 7  Whosebug  0.25203942   -0.1772715  -0.3371532  -0.29167792 -0.7065494
 8  please         -0.11047364  -0.06631552 0.4342659   -1.49584522 0.2837016
 9  help           -0.1136639   0.22414253  0.4284864   1.59096047  0.2915938
 10 me             -0.3677288   0.05974474  -0.1136177  0.02322094  -0.6533994

如何将每列中的值乘以 weights data.frame 中的相应权重?

预期结果:

    id            a             b           d            EE   f
 1  this          0.749476896   -0.1743717  5.29363241   NA   4.17548996
 2  is            0.908116512   2.725968075 9.88478112   NA   -8.8073246
 3  an            -0.139908256  0.555941775 -4.37499003  NA   -3.92298272
 4  example       -0.580476384  0.780324675 -10.54580314 NA   3.46742022
 5  data.frame    0.775535936   -1.125798   -10.84844799 NA   3.16147166
 6  for           -2.326983648  1.225349775 12.25661251  NA   5.40469878
 7  Whosebug 0.806526144   -1.32953625 -3.27038604  NA   -5.22846556
 8  please        -0.353515648  -0.4973664  4.21237923   NA   2.09939184
 9  help          -0.36372448   1.681068975 4.15631808   NA   2.15779412
 10 me            -1.17673216   0.44808555  -1.10209169  NA   -4.83515556

代码:

 set.seed(12345)
 df <- data.frame(id=c("this", "is", "an", "example", "data.frame", "for", 
      "Whosebug", "please", "help", "me"), a=rnorm(10,0,0.4), b=rnorm(10,0,0.2), 
      d=rnorm(10,0,0.7), EE=rnorm(10,0,0.9), f=rnorm(10,0,0.5))
 weights <- data.frame(V1=as.numeric(c("3.2", "7.5", "2.2", "9.7", "5.4", "7.4", "2.1", 
      "5.0", "3.3", "7.6", "3.6", "7.7", "7.1", "3.3", "9.8", "9.2", "2.5", "6.2", "4.1", "8.7", 
      "3.3", "9.3", "8.3")))
 rownames(weights) <- paste(letters[1:23])

您可以使用 sweepmatch -

df[-1] <- sweep(df[-1],2, weights$V1[match(names(df[-1]),rownames(weights))],`*`)

df
#              id          a          b          d EE         f
#1           this  0.7494769 -0.1743717   5.293633 NA  4.175490
#2             is  0.9081165  2.7259681   9.884781 NA -8.807325
#3             an -0.1399082  0.5559418  -4.374990 NA -3.922983
#4        example -0.5804764  0.7803247 -10.545803 NA  3.467420
#5     data.frame  0.7755359 -1.1257980 -10.848448 NA  3.161471
#6            for -2.3269836  1.2253498  12.256612 NA  5.404699
#7  Whosebug  0.8065261 -1.3295363  -3.270386 NA -5.228465
#8         please -0.3535157 -0.4973664   4.212379 NA  2.099392
#9           help -0.3637245  1.6810690   4.156319 NA  2.157794
#10            me -1.1767322  0.4480855  -1.102092 NA -4.835156

这是一个替代解决方案:

  1. weights
  2. 中创建名为 Names 的行名列
  3. match 来自 df 的列名和 weights 数据框的 Names
  4. 通过复制weights$V1
  5. ,根据数字索引乘以相应的V1
  6. 最后 cbinddf 中得到 id,为了计算目的,它被 df[-1] 删除了。
library(tibble)
# rownames to column
weights <- weights %>% 
  rownames_to_column(var = "Names")

df_result <- df[-1]*weights$V1[match(names(df[-1]), weights$Names)][col(df[-1])]
df_result <- cbind(id=df$id, df_result)
df_result

输出:

id          a          b          d EE         f
1           this  0.7494769 -0.1743717   5.293633 NA  4.175490
2             is  0.9081165  2.7259681   9.884781 NA -8.807325
3             an -0.1399082  0.5559418  -4.374990 NA -3.922983
4        example -0.5804764  0.7803247 -10.545803 NA  3.467420
5     data.frame  0.7755359 -1.1257980 -10.848448 NA  3.161471
6            for -2.3269836  1.2253498  12.256612 NA  5.404699
7  Whosebug  0.8065261 -1.3295363  -3.270386 NA -5.228465
8         please -0.3535157 -0.4973664   4.212379 NA  2.099392
9           help -0.3637245  1.6810690   4.156319 NA  2.157794
10            me -1.1767322  0.4480855  -1.102092 NA -4.835156

使用lapply。应该比 matching 和 sweeping 快得多。

df[-1] <- lapply(names(df)[-1], \(x) df[, x]*weights[x, ])
#               id          a          b          d EE         f
# 1           this  0.7494769 -0.1743717   5.293633 NA  4.175490
# 2             is  0.9081165  2.7259681   9.884781 NA -8.807325
# 3             an -0.1399082  0.5559418  -4.374990 NA -3.922983
# 4        example -0.5804764  0.7803247 -10.545803 NA  3.467420
# 5     data.frame  0.7755359 -1.1257980 -10.848448 NA  3.161471
# 6            for -2.3269836  1.2253498  12.256612 NA  5.404699
# 7  Whosebug  0.8065261 -1.3295363  -3.270386 NA -5.228465
# 8         please -0.3535157 -0.4973664   4.212379 NA  2.099392
# 9           help -0.3637245  1.6810690   4.156319 NA  2.157794
# 10            me -1.1767322  0.4480855  -1.102092 NA -4.835156

使用 tidyverse,我们可以循环 across 列,获取相应的列名 (cur_column()),使用它通过指定 [=14] 对“权重”数据进行子集化=], 名字和相乘

library(dplyr)
df %>%
    mutate(across(where(is.numeric), ~ . * weights[cur_column(), 'V1']))
              id          a          b          d EE         f
1           this  0.7494769 -0.1743717   5.293633 NA  4.175490
2             is  0.9081165  2.7259681   9.884781 NA -8.807325
3             an -0.1399082  0.5559418  -4.374990 NA -3.922983
4        example -0.5804764  0.7803247 -10.545803 NA  3.467420
5     data.frame  0.7755359 -1.1257980 -10.848448 NA  3.161471
6            for -2.3269836  1.2253498  12.256612 NA  5.404699
7  Whosebug  0.8065261 -1.3295363  -3.270386 NA -5.228465
8         please -0.3535157 -0.4973664   4.212379 NA  2.099392
9           help -0.3637245  1.6810690   4.156319 NA  2.157794
10            me -1.1767322  0.4480855  -1.102092 NA -4.835156

您可以尝试下面的基本 R 代码

df[-1] <- df[-1] * weights[names(df)[-1], "V1"][col(df[-1])]

这给出了

> df
              id          a          b          d EE         f
1           this  0.7494769 -0.1743717   5.293633 NA  4.175490
2             is  0.9081165  2.7259681   9.884781 NA -8.807325
3             an -0.1399082  0.5559418  -4.374990 NA -3.922983
4        example -0.5804764  0.7803247 -10.545803 NA  3.467420
5     data.frame  0.7755359 -1.1257980 -10.848448 NA  3.161471
6            for -2.3269836  1.2253498  12.256612 NA  5.404699
7  Whosebug  0.8065261 -1.3295363  -3.270386 NA -5.228465
8         please -0.3535157 -0.4973664   4.212379 NA  2.099392
9           help -0.3637245  1.6810690   4.156319 NA  2.157794
10            me -1.1767322  0.4480855  -1.102092 NA -4.835156