R 运行 根据不同的列进行计数
R running count based on different column
我想根据某个事物之前在 ColumnB 中出现的次数来计算它在 ColumnA 中的 运行 计数。理想情况下,此计数也可以按 ColumnC 进行子集化。
例如,我想在这里得到 运行 获胜者之前的损失或失败者之前的胜利的总和:
#create df
year <- c(2017, 2017, 2017, 2017, 2017, 2016, 2016, 2016, 2016, 2016)
winner <- c('sam', 'ryan', 'sally', 'sally', 'ryan', 'sally', 'mike', 'ryan', 'mike', 'sam')
loser <- c('mike', 'mike', 'ryan', 'sam', 'sam', 'mike', 'sally', 'mike', 'ryan', 'sally')
df <- data.frame(year, winner, loser)
#successul methods for getting winner's cumulative wins or loser's cumulative losses
df <- as.data.table(df)[, winner_wins := seq(.N), by = "winner"][]
df <- as.data.table(df)[, loser_losses := seq(.N), by = "loser"][]
#successul methods for getting winner's cumulative wins or loser's cumulative losses by year
df <- df %>% group_by(year, winner) %>% mutate(winner_wins = row_number())
df <- df %>% group_by(year, loser) %>% mutate(loser_losses = row_number())
#failed attempt to get winner's cumulative losses by year
df <- df %>% group_by(year) %>% mutate(winner_losses = cumsum(winner == loser & year == year))
我希望输出是我的原始数据框,但有四个新列:winner_cum_wins、winner_cum_losses、loser_cum_wins、loser_cum_losses.
year <- c(2017, 2017, 2017, 2017, 2017, 2016, 2016, 2016, 2016, 2016)
winner <- c('sam', 'ryan', 'sally', 'sally', 'ryan', 'sally', 'mike', 'ryan', 'mike', 'sam')
loser <- c('mike', 'mike', 'ryan', 'sam', 'sam', 'mike', 'sally', 'mike', 'ryan', 'sally')
df <- data.frame(year, winner, loser)
#successul methods for getting winner's cumulative wins or loser's cumulative losses by year
df <- df %>% group_by(year, winner) %>% mutate(winner_wins = row_number())
df <- df %>% group_by(year, loser) %>% mutate(loser_losses = row_number())
我创建了以下函数来计算 y
中 x
之前出现的次数。
count_wins_losses <- function(x,y){
n = length(x)
counts = numeric(n)
for (i in 1:n){
counts2 = numeric(i)
for (j in 1:i){counts2[j] = sum(x[i] == y[j])}
counts[i] = sum(counts2)
}
return(counts)
}
我使用 split
将函数应用于每一年。
# count the cummullative wins of the losers
loser_cum_wins <- df %>%
split(year) %>%
lapply(., function(x) count_winner_losses(x$loser, x$winner)) %>%
unlist()
# count the cummulative losses of the winners
winner_cum_losses <- df %>%
split(year) %>%
lapply(., function(x) count_winner_losses(x$winner, x$loser)) %>%
unlist()
arrange
在这里完成,以便 df
和 loser_cum_wins
和 winner_cum_losses
中的年份匹配。
df <- arrange(df, year)
df$loser_cum_wins <- loser_cum_wins
df$winner_cum_losses <- winner_cum_losses
df
## A tibble: 10 x 7
## Groups: year, loser [6]
# year winner loser winner_wins loser_losses loser_cum_wins winner_cum_losses
# <dbl> <chr> <chr> <int> <int> <dbl> <dbl>
# 1 2016. sally mike 1 1 0. 0.
# 2 2016. mike sally 1 1 1. 1.
# 3 2016. ryan mike 1 2 1. 0.
# 4 2016. mike ryan 2 1 1. 2.
# 5 2016. sam sally 1 2 1. 0.
# 6 2017. sam mike 1 1 0. 0.
# 7 2017. ryan mike 1 2 0. 0.
# 8 2017. sally ryan 1 1 1. 0.
# 9 2017. sally sam 2 1 1. 0.
#10 2017. ryan sam 2 2 1. 1.
另一种使用 count_wins_losses()
函数的方法是通过 year
过滤 df
并对每个拆分使用该函数,然后合并结果。
df2016 <- df %>%
filter(year == 2016)
df2017 <- df %>%
filter(year == 2017)
df2016$loser_cum_wins <- with(df2016, count_winner_losses(loser, winner))
df2016$winner_cum_losses <- with(df2016, count_winner_losses(winner, loser))
df2017$loser_cum_wins <- with(df2017, count_winner_losses(loser, winner))
df2017$winner_cum_losses <- with(df2017, count_winner_losses(winner, loser))
rbind(df2016,df2017)
这应该会为您提供所需的所有数据框:
library(tidyverse)
df %>%
group_by(year) %>%
mutate(match_id_year = row_number()) %>%
gather(outcome, name, -year, -match_id_year) %>%
arrange(year, match_id_year) %>%
group_by(year, name) %>%
mutate(cum_wins_year = cumsum(outcome == "winner"),
cum_losses_year = cumsum(outcome == "loser"))
我想根据某个事物之前在 ColumnB 中出现的次数来计算它在 ColumnA 中的 运行 计数。理想情况下,此计数也可以按 ColumnC 进行子集化。
例如,我想在这里得到 运行 获胜者之前的损失或失败者之前的胜利的总和:
#create df
year <- c(2017, 2017, 2017, 2017, 2017, 2016, 2016, 2016, 2016, 2016)
winner <- c('sam', 'ryan', 'sally', 'sally', 'ryan', 'sally', 'mike', 'ryan', 'mike', 'sam')
loser <- c('mike', 'mike', 'ryan', 'sam', 'sam', 'mike', 'sally', 'mike', 'ryan', 'sally')
df <- data.frame(year, winner, loser)
#successul methods for getting winner's cumulative wins or loser's cumulative losses
df <- as.data.table(df)[, winner_wins := seq(.N), by = "winner"][]
df <- as.data.table(df)[, loser_losses := seq(.N), by = "loser"][]
#successul methods for getting winner's cumulative wins or loser's cumulative losses by year
df <- df %>% group_by(year, winner) %>% mutate(winner_wins = row_number())
df <- df %>% group_by(year, loser) %>% mutate(loser_losses = row_number())
#failed attempt to get winner's cumulative losses by year
df <- df %>% group_by(year) %>% mutate(winner_losses = cumsum(winner == loser & year == year))
我希望输出是我的原始数据框,但有四个新列:winner_cum_wins、winner_cum_losses、loser_cum_wins、loser_cum_losses.
year <- c(2017, 2017, 2017, 2017, 2017, 2016, 2016, 2016, 2016, 2016)
winner <- c('sam', 'ryan', 'sally', 'sally', 'ryan', 'sally', 'mike', 'ryan', 'mike', 'sam')
loser <- c('mike', 'mike', 'ryan', 'sam', 'sam', 'mike', 'sally', 'mike', 'ryan', 'sally')
df <- data.frame(year, winner, loser)
#successul methods for getting winner's cumulative wins or loser's cumulative losses by year
df <- df %>% group_by(year, winner) %>% mutate(winner_wins = row_number())
df <- df %>% group_by(year, loser) %>% mutate(loser_losses = row_number())
我创建了以下函数来计算 y
中 x
之前出现的次数。
count_wins_losses <- function(x,y){
n = length(x)
counts = numeric(n)
for (i in 1:n){
counts2 = numeric(i)
for (j in 1:i){counts2[j] = sum(x[i] == y[j])}
counts[i] = sum(counts2)
}
return(counts)
}
我使用 split
将函数应用于每一年。
# count the cummullative wins of the losers
loser_cum_wins <- df %>%
split(year) %>%
lapply(., function(x) count_winner_losses(x$loser, x$winner)) %>%
unlist()
# count the cummulative losses of the winners
winner_cum_losses <- df %>%
split(year) %>%
lapply(., function(x) count_winner_losses(x$winner, x$loser)) %>%
unlist()
arrange
在这里完成,以便 df
和 loser_cum_wins
和 winner_cum_losses
中的年份匹配。
df <- arrange(df, year)
df$loser_cum_wins <- loser_cum_wins
df$winner_cum_losses <- winner_cum_losses
df
## A tibble: 10 x 7
## Groups: year, loser [6]
# year winner loser winner_wins loser_losses loser_cum_wins winner_cum_losses
# <dbl> <chr> <chr> <int> <int> <dbl> <dbl>
# 1 2016. sally mike 1 1 0. 0.
# 2 2016. mike sally 1 1 1. 1.
# 3 2016. ryan mike 1 2 1. 0.
# 4 2016. mike ryan 2 1 1. 2.
# 5 2016. sam sally 1 2 1. 0.
# 6 2017. sam mike 1 1 0. 0.
# 7 2017. ryan mike 1 2 0. 0.
# 8 2017. sally ryan 1 1 1. 0.
# 9 2017. sally sam 2 1 1. 0.
#10 2017. ryan sam 2 2 1. 1.
另一种使用 count_wins_losses()
函数的方法是通过 year
过滤 df
并对每个拆分使用该函数,然后合并结果。
df2016 <- df %>%
filter(year == 2016)
df2017 <- df %>%
filter(year == 2017)
df2016$loser_cum_wins <- with(df2016, count_winner_losses(loser, winner))
df2016$winner_cum_losses <- with(df2016, count_winner_losses(winner, loser))
df2017$loser_cum_wins <- with(df2017, count_winner_losses(loser, winner))
df2017$winner_cum_losses <- with(df2017, count_winner_losses(winner, loser))
rbind(df2016,df2017)
这应该会为您提供所需的所有数据框:
library(tidyverse)
df %>%
group_by(year) %>%
mutate(match_id_year = row_number()) %>%
gather(outcome, name, -year, -match_id_year) %>%
arrange(year, match_id_year) %>%
group_by(year, name) %>%
mutate(cum_wins_year = cumsum(outcome == "winner"),
cum_losses_year = cumsum(outcome == "loser"))