R中不同列的加权平均值
Weighted average for different columns in R
我想计算数据框中不同列的加权平均值
这是我的数据:
x.4 <- c(2,3,4,5)
a.4 <- c(2,3,4,5)
x.8 <- c(3,24,2,2)
a.8 <- c(2,3,4,7)
x.12 <- c(3,2,4,5)
a.12 <- c(3,2,4,5)
x.24 <- c(2,4,5,2)
a.24 <- c(2,4,5,2)
x.36 <- c(2,1,3,6)
a.36 <- c(2,4,5,2)
x.50 <- c(2,3,5,2)
a.50 <- c(2,3,5,20)
x.100 <- c(2,3,4,5)
a.100 <- c(2,3,4,5)
x.10000 <- c(2,3,46,2)
a.10000 <- c(2,32,46,2)
name_x <- c("a", "b", "c", "d")
df <- data.frame(name_x, x.4,a.4, x.8, a.8, x.12,a.12,x.24,a.24,x.36,a.36,x.50, a.50,x.100,a.100,x.10000, a.10000)
我想要创建一个包含加权平均值的“x”和“a”变量,对于以 4 结尾的变量使用 8,以 8 结尾的变量使用 7,以 12 结尾的变量使用 6,以及依此类推
x = (x.4 * 8 + x.8 * 7 + x.12 * 6 + x.24 * 5 .......x.10000 * 1)/36
a = (a.4 * 8 + a.8 * 7 + a.12 * 6 + a.24 * 5 .......a.10000 * 1)/36
我创建了一个变量,每个权重值乘以我的列,然后除法,但这需要很多时间。在这种情况下,是否有更简洁的方法来计算加权平均值?
分段,可以使用
Xs <- grep("^x\.", names(df))
Xs_seq <- rev(seq_along(Xs))
as.matrix(df[Xs]) %*% matrix(Xs_seq, ncol = 1) / sum(Xs_seq)
# [,1]
# [1,] 2.361111
# [2,] 6.833333
# [3,] 4.888889
# [4,] 3.777778
As <- grep("^a\.", names(df))
As_seq <- rev(seq_along(As))
as.matrix(df[As]) %*% matrix(As_seq, ncol = 1) / sum(As_seq)
# [,1]
# [1,] 2.166667
# [2,] 3.888889
# [3,] 5.500000
# [4,] 5.805556
如果您的列名模式一致(例如“单个字母、一个句点、然后是数字”),则
Z <- unique(gsub("\..*", "", grep("^.\.[0-9]+$", names(df), value = TRUE)))
Z
# [1] "x" "a"
lapply(setNames(nm = Z), function(z) {
Zs <- grep(paste0("^", z, "\."), names(df))
Zs_seq <- rev(seq_along(Zs))
as.matrix(df[Zs]) %*% matrix(Zs_seq, ncol = 1) / sum(Zs_seq)
})
# $x
# [,1]
# [1,] 2.361111
# [2,] 6.833333
# [3,] 4.888889
# [4,] 3.777778
# $a
# [,1]
# [1,] 2.166667
# [2,] 3.888889
# [3,] 5.500000
# [4,] 5.805556
试试这个:
library(tidyverse)
the_weights <- 8:1
df2 <- df %>%
gather(var, value, c(x.4, x.8, x.12, x.24, x.36, x.50, x.100, x.10000,
a.4, a.8, a.12, a.24, a.36, a.50, a.100, a.10000)) %>%
separate(var, c("var", "number"))
df2 %>%
group_by(name_x, var) %>%
summarise(sum(the_weights*value)/36)
为了更清晰,我更喜欢将这两个步骤分开,但也可以用管道将它们连接起来
# A tibble: 8 x 3
# Groups: name_x [4]
name_x var `sum(the_weights * value)/36`
<chr> <chr> <dbl>
1 a a 2.17
2 a x 2.36
3 b a 3.89
4 b x 6.83
5 c a 5.5
6 c x 4.89
7 d a 5.81
8 d x 3.78
我想计算数据框中不同列的加权平均值 这是我的数据:
x.4 <- c(2,3,4,5)
a.4 <- c(2,3,4,5)
x.8 <- c(3,24,2,2)
a.8 <- c(2,3,4,7)
x.12 <- c(3,2,4,5)
a.12 <- c(3,2,4,5)
x.24 <- c(2,4,5,2)
a.24 <- c(2,4,5,2)
x.36 <- c(2,1,3,6)
a.36 <- c(2,4,5,2)
x.50 <- c(2,3,5,2)
a.50 <- c(2,3,5,20)
x.100 <- c(2,3,4,5)
a.100 <- c(2,3,4,5)
x.10000 <- c(2,3,46,2)
a.10000 <- c(2,32,46,2)
name_x <- c("a", "b", "c", "d")
df <- data.frame(name_x, x.4,a.4, x.8, a.8, x.12,a.12,x.24,a.24,x.36,a.36,x.50, a.50,x.100,a.100,x.10000, a.10000)
我想要创建一个包含加权平均值的“x”和“a”变量,对于以 4 结尾的变量使用 8,以 8 结尾的变量使用 7,以 12 结尾的变量使用 6,以及依此类推
x = (x.4 * 8 + x.8 * 7 + x.12 * 6 + x.24 * 5 .......x.10000 * 1)/36
a = (a.4 * 8 + a.8 * 7 + a.12 * 6 + a.24 * 5 .......a.10000 * 1)/36
我创建了一个变量,每个权重值乘以我的列,然后除法,但这需要很多时间。在这种情况下,是否有更简洁的方法来计算加权平均值?
分段,可以使用
Xs <- grep("^x\.", names(df))
Xs_seq <- rev(seq_along(Xs))
as.matrix(df[Xs]) %*% matrix(Xs_seq, ncol = 1) / sum(Xs_seq)
# [,1]
# [1,] 2.361111
# [2,] 6.833333
# [3,] 4.888889
# [4,] 3.777778
As <- grep("^a\.", names(df))
As_seq <- rev(seq_along(As))
as.matrix(df[As]) %*% matrix(As_seq, ncol = 1) / sum(As_seq)
# [,1]
# [1,] 2.166667
# [2,] 3.888889
# [3,] 5.500000
# [4,] 5.805556
如果您的列名模式一致(例如“单个字母、一个句点、然后是数字”),则
Z <- unique(gsub("\..*", "", grep("^.\.[0-9]+$", names(df), value = TRUE)))
Z
# [1] "x" "a"
lapply(setNames(nm = Z), function(z) {
Zs <- grep(paste0("^", z, "\."), names(df))
Zs_seq <- rev(seq_along(Zs))
as.matrix(df[Zs]) %*% matrix(Zs_seq, ncol = 1) / sum(Zs_seq)
})
# $x
# [,1]
# [1,] 2.361111
# [2,] 6.833333
# [3,] 4.888889
# [4,] 3.777778
# $a
# [,1]
# [1,] 2.166667
# [2,] 3.888889
# [3,] 5.500000
# [4,] 5.805556
试试这个:
library(tidyverse)
the_weights <- 8:1
df2 <- df %>%
gather(var, value, c(x.4, x.8, x.12, x.24, x.36, x.50, x.100, x.10000,
a.4, a.8, a.12, a.24, a.36, a.50, a.100, a.10000)) %>%
separate(var, c("var", "number"))
df2 %>%
group_by(name_x, var) %>%
summarise(sum(the_weights*value)/36)
为了更清晰,我更喜欢将这两个步骤分开,但也可以用管道将它们连接起来
# A tibble: 8 x 3
# Groups: name_x [4]
name_x var `sum(the_weights * value)/36`
<chr> <chr> <dbl>
1 a a 2.17
2 a x 2.36
3 b a 3.89
4 b x 6.83
5 c a 5.5
6 c x 4.89
7 d a 5.81
8 d x 3.78