基于dataframe进行加权计算
Make weighted calculations based on dataframe
我要计算一场比赛不同玩家的加权分数。我在前三列有他们的个人得分,在接下来的三列有他们的权重,这取决于比赛(每行代表一场比赛)。
谁能帮我有效地计算加权分数?
df <- data.frame(player1 = c(1,2,1), player2 = c(2,2,1), player3 = c(2,2,2), weightplayer1=c(0.7,0.8,0.7), weightplayer2 = c(0.6,0.1,0.6), weightplayer3=c(0.2,0.7,0.2))
# player1 player2 player3 weightplayer1 weightplayer2 weightplayer3
# 1 2 2 0.7 0.6 0.2
# 2 2 2 0.8 0.1 0.7
# 1 1 2 0.7 0.6 0.2
我需要这样的输出,其中 weighted1scores 列取权重之和
得分为 1 的球员的数量,weighted2scores 列取权重之和
得分 2 的球员。实际上我有一长串可能的得分,所以实际上
这个数据框有很多列(直到 weighted100scores 等)
因此,高效的 formula/loop 会很棒。
# player1 player2 player3 weightplayer1 weightplayer2 weightplayer3 weighted1scores weighted2scores
# 1 2 2 0.7 0.6 0.2 0.7 0.8
# 2 2 2 0.8 0.1 0.7 0 1.6
# 1 1 2 0.7 0.6 0.2 1.3 0.2
您可以将表规范化为 3NF,然后连接和聚合:
library(tidyverse)
df <- data.frame(player1 = c(1, 2, 1), player2 = c(2, 2, 1), player3 = c(2, 2, 2), weightplayer1 = c(0.7, 0.8, 0.7), weightplayer2 = c(0.6, 0.1, 0.6), weightplayer3 = c(0.2, 0.7, 0.2))
scores <-
df %>%
as_tibble(rownames = "game") %>%
pivot_longer(starts_with("player")) %>%
transmute(
game,
player_id = name %>% str_extract("[0-9]+$"),
score = value
)
scores
#> # A tibble: 9 × 3
#> game player_id score
#> <chr> <chr> <dbl>
#> 1 1 1 1
#> 2 1 2 2
#> 3 1 3 2
#> 4 2 1 2
#> 5 2 2 2
#> 6 2 3 2
#> 7 3 1 1
#> 8 3 2 1
#> 9 3 3 2
weights <-
df %>%
as_tibble(rownames = "game") %>%
pivot_longer(starts_with("weight")) %>%
transmute(
game,
player_id = name %>% str_extract("[0-9]+$"),
weight = value
)
weights
#> # A tibble: 9 × 3
#> game player_id weight
#> <chr> <chr> <dbl>
#> 1 1 1 0.7
#> 2 1 2 0.6
#> 3 1 3 0.2
#> 4 2 1 0.8
#> 5 2 2 0.1
#> 6 2 3 0.7
#> 7 3 1 0.7
#> 8 3 2 0.6
#> 9 3 3 0.2
scores %>%
inner_join(weights) %>%
group_by(player_id, game) %>%
summarise(weighted = sum(weight)) %>%
pivot_wider(
names_from = player_id,
values_from = weighted,
names_prefix = "weighted"
)
#> Joining, by = c("game", "player_id")
#> `summarise()` has grouped output by 'player_id'. You can override using the `.groups` argument.
#> # A tibble: 3 × 4
#> game weighted1 weighted2 weighted3
#> <chr> <dbl> <dbl> <dbl>
#> 1 1 0.7 0.6 0.2
#> 2 2 0.8 0.1 0.7
#> 3 3 0.7 0.6 0.2
由 reprex package (v2.0.1)
于 2021-09-20 创建
这是一种方法:
df %>%
mutate(game = row_number()) %>%
pivot_longer(cols = starts_with('player'), names_to = "Player", values_to = "Score") %>%
pivot_longer(cols = starts_with('weightplayer'), names_to = "WPlayer", values_to = "Weight") %>%
filter(parse_number(Player) == parse_number(WPlayer)) %>%
select(-WPlayer) %>%
mutate(
WeightedScore = Score * Weight
)
你可以保持原样 return 这样整洁 table
# A tibble: 9 x 5
game Player Score Weight WeightedScore
<int> <chr> <dbl> <dbl> <dbl>
1 1 player1 1 0.7 0.7
2 1 player2 2 0.6 1.2
3 1 player3 2 0.2 0.4
4 2 player1 2 0.8 1.6
5 2 player2 2 0.1 0.2
6 2 player3 2 0.7 1.4
7 3 player1 1 0.7 0.7
8 3 player2 1 0.6 0.6
9 3 player3 2 0.2 0.4
或继续:
df %>%
mutate(game = row_number()) %>%
pivot_longer(cols = starts_with('player'), names_to = "Player", values_to = "Score") %>%
pivot_longer(cols = starts_with('weightplayer'), names_to = "WPlayer", values_to = "Weight") %>%
filter(parse_number(Player) == parse_number(WPlayer)) %>%
select(-WPlayer) %>%
mutate(
WeightedScore = Score * Weight
) %>%
pivot_longer(cols = c(Score, Weight, WeightedScore)) %>%
mutate(name = paste(Player, name, sep = '_')) %>%
pivot_wider(id = game)
结束于:
# A tibble: 3 x 10
game player1_Score player1_Weight player1_WeightedScore player2_Score player2_Weight player2_WeightedScore player3_Score player3_Weight player3_WeightedScore
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 0.7 0.7 2 0.6 1.2 2 0.2 0.4
2 2 2 0.8 1.6 2 0.1 0.2 2 0.7 1.4
3 3 1 0.7 0.7 1 0.6 0.6 2 0.2 0.4
到目前为止,其他解决方案无法重现您的输出,可能是因为您没有按照我们预期的方式使用“权重”。
执行以下操作:
df <- data.frame(player1 = c(1,2,1), player2 = c(2,2,1), player3 = c(2,2,2), weightplayer1=c(0.7,0.8,0.7), weightplayer2 = c(0.6,0.1,0.6), weightplayer3=c(0.2,0.7,0.2))
library(tidyverse)
df <- df %>% mutate(game = row_number())
df %>%
pivot_longer(
cols = player1:weightplayer3,
names_to = c(".value", "player_id"),
names_pattern = "(.+)(\d)") %>%
rename(score = player, weight = weightplayer, player = player_id) %>%
group_by(game, score_col = paste0("weighted",score,"score")) %>%
summarize(weightedscore = sum(weight)) %>%
pivot_wider(names_from = score_col, values_from = weightedscore, values_fill = 0) %>%
left_join(df, .) %>%
select(-game) %>%
as.data.frame() # just to print all columns
#> `summarise()` has grouped output by 'game'. You can override using the `.groups` argument.
#> Joining, by = "game"
#> player1 player2 player3 weightplayer1 weightplayer2 weightplayer3
#> 1 1 2 2 0.7 0.6 0.2
#> 2 2 2 2 0.8 0.1 0.7
#> 3 1 1 2 0.7 0.6 0.2
#> weighted1score weighted2score
#> 1 0.7 0.8
#> 2 0.0 1.6
#> 3 1.3 0.2
由 reprex package (v2.0.1)
于 2021-09-20 创建
我们首先将数据重塑为整齐的数据(每行一个观察值),理想情况下我们应该留在那里直到报告,然后我们进行聚合计算,将其重塑为不整齐并将其缝合回原始 data.frame 。
这是另一种可能更简洁的方法:
library(tidyverse)
df %>%
mutate(output = pmap(., ~ {x <- c(...)[startsWith(names(df), "player")]
y <- c(...)[startsWith(names(df), "weight")]
as_tibble(cbind(sum(y[x == 1]), sum(y[x == 2])))})) %>%
unnest_wider(output) %>%
rename_with(~ gsub("V(\d+)", "Weighted\1scores", .), starts_with("V"))
# A tibble: 3 x 8
player1 player2 player3 weightplayer1 weightplayer2 weightplayer3 Weighted1scores
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2 2 0.7 0.6 0.2 0.7
2 2 2 2 0.8 0.1 0.7 0
3 1 1 2 0.7 0.6 0.2 1.3
# ... with 1 more variable: Weighted2scores <dbl>
我要计算一场比赛不同玩家的加权分数。我在前三列有他们的个人得分,在接下来的三列有他们的权重,这取决于比赛(每行代表一场比赛)。
谁能帮我有效地计算加权分数?
df <- data.frame(player1 = c(1,2,1), player2 = c(2,2,1), player3 = c(2,2,2), weightplayer1=c(0.7,0.8,0.7), weightplayer2 = c(0.6,0.1,0.6), weightplayer3=c(0.2,0.7,0.2))
# player1 player2 player3 weightplayer1 weightplayer2 weightplayer3
# 1 2 2 0.7 0.6 0.2
# 2 2 2 0.8 0.1 0.7
# 1 1 2 0.7 0.6 0.2
我需要这样的输出,其中 weighted1scores 列取权重之和 得分为 1 的球员的数量,weighted2scores 列取权重之和 得分 2 的球员。实际上我有一长串可能的得分,所以实际上 这个数据框有很多列(直到 weighted100scores 等) 因此,高效的 formula/loop 会很棒。
# player1 player2 player3 weightplayer1 weightplayer2 weightplayer3 weighted1scores weighted2scores
# 1 2 2 0.7 0.6 0.2 0.7 0.8
# 2 2 2 0.8 0.1 0.7 0 1.6
# 1 1 2 0.7 0.6 0.2 1.3 0.2
您可以将表规范化为 3NF,然后连接和聚合:
library(tidyverse)
df <- data.frame(player1 = c(1, 2, 1), player2 = c(2, 2, 1), player3 = c(2, 2, 2), weightplayer1 = c(0.7, 0.8, 0.7), weightplayer2 = c(0.6, 0.1, 0.6), weightplayer3 = c(0.2, 0.7, 0.2))
scores <-
df %>%
as_tibble(rownames = "game") %>%
pivot_longer(starts_with("player")) %>%
transmute(
game,
player_id = name %>% str_extract("[0-9]+$"),
score = value
)
scores
#> # A tibble: 9 × 3
#> game player_id score
#> <chr> <chr> <dbl>
#> 1 1 1 1
#> 2 1 2 2
#> 3 1 3 2
#> 4 2 1 2
#> 5 2 2 2
#> 6 2 3 2
#> 7 3 1 1
#> 8 3 2 1
#> 9 3 3 2
weights <-
df %>%
as_tibble(rownames = "game") %>%
pivot_longer(starts_with("weight")) %>%
transmute(
game,
player_id = name %>% str_extract("[0-9]+$"),
weight = value
)
weights
#> # A tibble: 9 × 3
#> game player_id weight
#> <chr> <chr> <dbl>
#> 1 1 1 0.7
#> 2 1 2 0.6
#> 3 1 3 0.2
#> 4 2 1 0.8
#> 5 2 2 0.1
#> 6 2 3 0.7
#> 7 3 1 0.7
#> 8 3 2 0.6
#> 9 3 3 0.2
scores %>%
inner_join(weights) %>%
group_by(player_id, game) %>%
summarise(weighted = sum(weight)) %>%
pivot_wider(
names_from = player_id,
values_from = weighted,
names_prefix = "weighted"
)
#> Joining, by = c("game", "player_id")
#> `summarise()` has grouped output by 'player_id'. You can override using the `.groups` argument.
#> # A tibble: 3 × 4
#> game weighted1 weighted2 weighted3
#> <chr> <dbl> <dbl> <dbl>
#> 1 1 0.7 0.6 0.2
#> 2 2 0.8 0.1 0.7
#> 3 3 0.7 0.6 0.2
由 reprex package (v2.0.1)
于 2021-09-20 创建这是一种方法:
df %>%
mutate(game = row_number()) %>%
pivot_longer(cols = starts_with('player'), names_to = "Player", values_to = "Score") %>%
pivot_longer(cols = starts_with('weightplayer'), names_to = "WPlayer", values_to = "Weight") %>%
filter(parse_number(Player) == parse_number(WPlayer)) %>%
select(-WPlayer) %>%
mutate(
WeightedScore = Score * Weight
)
你可以保持原样 return 这样整洁 table
# A tibble: 9 x 5
game Player Score Weight WeightedScore
<int> <chr> <dbl> <dbl> <dbl>
1 1 player1 1 0.7 0.7
2 1 player2 2 0.6 1.2
3 1 player3 2 0.2 0.4
4 2 player1 2 0.8 1.6
5 2 player2 2 0.1 0.2
6 2 player3 2 0.7 1.4
7 3 player1 1 0.7 0.7
8 3 player2 1 0.6 0.6
9 3 player3 2 0.2 0.4
或继续:
df %>%
mutate(game = row_number()) %>%
pivot_longer(cols = starts_with('player'), names_to = "Player", values_to = "Score") %>%
pivot_longer(cols = starts_with('weightplayer'), names_to = "WPlayer", values_to = "Weight") %>%
filter(parse_number(Player) == parse_number(WPlayer)) %>%
select(-WPlayer) %>%
mutate(
WeightedScore = Score * Weight
) %>%
pivot_longer(cols = c(Score, Weight, WeightedScore)) %>%
mutate(name = paste(Player, name, sep = '_')) %>%
pivot_wider(id = game)
结束于:
# A tibble: 3 x 10
game player1_Score player1_Weight player1_WeightedScore player2_Score player2_Weight player2_WeightedScore player3_Score player3_Weight player3_WeightedScore
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 0.7 0.7 2 0.6 1.2 2 0.2 0.4
2 2 2 0.8 1.6 2 0.1 0.2 2 0.7 1.4
3 3 1 0.7 0.7 1 0.6 0.6 2 0.2 0.4
到目前为止,其他解决方案无法重现您的输出,可能是因为您没有按照我们预期的方式使用“权重”。
执行以下操作:
df <- data.frame(player1 = c(1,2,1), player2 = c(2,2,1), player3 = c(2,2,2), weightplayer1=c(0.7,0.8,0.7), weightplayer2 = c(0.6,0.1,0.6), weightplayer3=c(0.2,0.7,0.2))
library(tidyverse)
df <- df %>% mutate(game = row_number())
df %>%
pivot_longer(
cols = player1:weightplayer3,
names_to = c(".value", "player_id"),
names_pattern = "(.+)(\d)") %>%
rename(score = player, weight = weightplayer, player = player_id) %>%
group_by(game, score_col = paste0("weighted",score,"score")) %>%
summarize(weightedscore = sum(weight)) %>%
pivot_wider(names_from = score_col, values_from = weightedscore, values_fill = 0) %>%
left_join(df, .) %>%
select(-game) %>%
as.data.frame() # just to print all columns
#> `summarise()` has grouped output by 'game'. You can override using the `.groups` argument.
#> Joining, by = "game"
#> player1 player2 player3 weightplayer1 weightplayer2 weightplayer3
#> 1 1 2 2 0.7 0.6 0.2
#> 2 2 2 2 0.8 0.1 0.7
#> 3 1 1 2 0.7 0.6 0.2
#> weighted1score weighted2score
#> 1 0.7 0.8
#> 2 0.0 1.6
#> 3 1.3 0.2
由 reprex package (v2.0.1)
于 2021-09-20 创建我们首先将数据重塑为整齐的数据(每行一个观察值),理想情况下我们应该留在那里直到报告,然后我们进行聚合计算,将其重塑为不整齐并将其缝合回原始 data.frame 。
这是另一种可能更简洁的方法:
library(tidyverse)
df %>%
mutate(output = pmap(., ~ {x <- c(...)[startsWith(names(df), "player")]
y <- c(...)[startsWith(names(df), "weight")]
as_tibble(cbind(sum(y[x == 1]), sum(y[x == 2])))})) %>%
unnest_wider(output) %>%
rename_with(~ gsub("V(\d+)", "Weighted\1scores", .), starts_with("V"))
# A tibble: 3 x 8
player1 player2 player3 weightplayer1 weightplayer2 weightplayer3 Weighted1scores
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2 2 0.7 0.6 0.2 0.7
2 2 2 2 0.8 0.1 0.7 0
3 1 1 2 0.7 0.6 0.2 1.3
# ... with 1 more variable: Weighted2scores <dbl>