R:确定组内某些子组的最大值和最小值之间的差异
R: determining difference between max and min for certain subgroups within groups
下面的示例数据字段
Event Ethnicity Score
50 yd dash Asian 7
50 yd dash Afr. Am 8
50 yd dash White 5
Hurdle Asian 6
Hurdle Afr. Am 8
Hurdle White 9
我正在尝试确定每个事件中某些种族之间的差异,希望使用 dplyr 或 tidyverse 中的某些东西,但会采取任何 answer/help。比如亚组和白组在每场比赛中的差距,
例如,亚洲人 (7) - 白人 (5) = 差异 (2),
导致类似于以下的输出:
Event Difference
50 yd dash 2
Hurdle -3
使用以下内容应该可以帮助您:
library(tidyverse)
df %>%
spread(Ethnicity, Score) %>%
mutate("Difference" = Asian - White) %>%
select(-Asian, -White, -`Afr. Am`)
# Event Difference
#1 50 yd dash 2
#2 Hurdle -3
数据。
df <-
structure(list(Event = structure(c(1L, 1L, 1L, 2L, 2L, 2L), .Label = c("50 yd dash",
"Hurdle"), class = "factor"), Ethnicity = structure(c(2L, 1L,
3L, 2L, 1L, 3L), .Label = c("Afr. Am", "Asian", "White"), class = "factor"),
Score = c(7L, 8L, 5L, 6L, 8L, 9L)), class = "data.frame", row.names = c(NA,
-6L))
@AntoniosK 已经发布了 read.table
读取 OP 发布的数据的方法,但我的方法有点不同。我没有从列的值中删除空格,而是将它们放在单引号之间。 (它必须是 单 引号,因为指令将参数 text
的值放在 双 引号之间。)
df <- read.table(text = "
Event Ethnicity Score
'50 yd dash' Asian 7
'50 yd dash' 'Afr. Am' 8
'50 yd dash' White 5
Hurdle Asian 6
Hurdle 'Afr. Am' 8
Hurdle White 9
", header = TRUE)
df %>%
mutate(rn = row_number()) %>%
spread(Ethnicity, Score) %>%
group_by(Event) %>%
summarise(Difference = max(Asian, na.rm = T) - max(White, na.rm = T))
# # A tibble: 2 x 2
# Event Difference
# <chr> <dbl>
# 1 50 yd dash 2
# 2 Hurdle -3
数据:
df <-
structure(list(Event = c("50 yd dash", "50 yd dash", "50 yd dash",
"Hurdle", "Hurdle", "Hurdle"), Ethnicity = c("Asian", "Afr. Am",
"White", "Asian", "Afr. Am", "White"), Score = c(7, 8, 5, 6,
8, 9)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
))
数据
df = read.table(text = "
Event Ethnicity Score
50yddash Asian 7
50yddash Afr.Am 8
50yddash White 5
Hurdle Asian 6
Hurdle Afr.Am 8
Hurdle White 9
", header=T, stringsAsFactors=F)
第一种方法,您手动指定感兴趣的种族:
library(dplyr)
df %>%
group_by(Event) %>%
summarise(Diff = Score[Ethnicity=="Asian"] - Score[Ethnicity=="White"])
# # A tibble: 2 x 2
# Event Diff
# <chr> <int>
# 1 50yddash 2
# 2 Hurdle -3
您可以将这段代码用作函数(输入感兴趣的两个种族)。
第二种方法,您可以为所有独特的种族和事件组合创建所有差异:
library(tidyverse)
# create vectorised function that calculates the difference
# based on a given event and ethnicities
f = function(event, eth1, eth2) {
df$Score[df$Event==event & df$Ethnicity==eth1] -
df$Score[df$Event==event & df$Ethnicity==eth2] }
f = Vectorize(f)
data.frame(t(combn(unique(df$Ethnicity), 2)), stringsAsFactors = F) %>% # create combinations of ethnicities
mutate(Event = list(unique(df$Event))) %>% # create combinations with events
unnest() %>%
mutate(Diff = f(Event, X1, X2)) # apply the function
# X1 X2 Event Diff
# 1 Asian Afr.Am 50yddash -1
# 2 Asian Afr.Am Hurdle -2
# 3 Asian White 50yddash 2
# 4 Asian White Hurdle -3
# 5 Afr.Am White 50yddash 3
# 6 Afr.Am White Hurdle -1
此过程使用字母顺序来创建独特的差异。如果你想要所有这些(即亚裔白人和白人亚洲人),你可以使用这个
expand.grid(Event = unique(df$Event),
X1 = unique(df$Ethnicity),
X2 = unique(df$Ethnicity)) %>%
filter(X1 != X2) %>%
mutate(Diff = f(Event, X1, X2))
# Event X1 X2 Diff
# 1 50yddash Afr.Am Asian 1
# 2 Hurdle Afr.Am Asian 2
# 3 50yddash White Asian -2
# 4 Hurdle White Asian 3
# 5 50yddash Asian Afr.Am -1
# 6 Hurdle Asian Afr.Am -2
# 7 50yddash White Afr.Am -3
# 8 Hurdle White Afr.Am 1
# 9 50yddash Asian White 2
# 10 Hurdle Asian White -3
# 11 50yddash Afr.Am White 3
# 12 Hurdle Afr.Am White -1
下面的示例数据字段
Event Ethnicity Score
50 yd dash Asian 7
50 yd dash Afr. Am 8
50 yd dash White 5
Hurdle Asian 6
Hurdle Afr. Am 8
Hurdle White 9
我正在尝试确定每个事件中某些种族之间的差异,希望使用 dplyr 或 tidyverse 中的某些东西,但会采取任何 answer/help。比如亚组和白组在每场比赛中的差距,
例如,亚洲人 (7) - 白人 (5) = 差异 (2),
导致类似于以下的输出:
Event Difference
50 yd dash 2
Hurdle -3
使用以下内容应该可以帮助您:
library(tidyverse)
df %>%
spread(Ethnicity, Score) %>%
mutate("Difference" = Asian - White) %>%
select(-Asian, -White, -`Afr. Am`)
# Event Difference
#1 50 yd dash 2
#2 Hurdle -3
数据。
df <-
structure(list(Event = structure(c(1L, 1L, 1L, 2L, 2L, 2L), .Label = c("50 yd dash",
"Hurdle"), class = "factor"), Ethnicity = structure(c(2L, 1L,
3L, 2L, 1L, 3L), .Label = c("Afr. Am", "Asian", "White"), class = "factor"),
Score = c(7L, 8L, 5L, 6L, 8L, 9L)), class = "data.frame", row.names = c(NA,
-6L))
@AntoniosK 已经发布了 read.table
读取 OP 发布的数据的方法,但我的方法有点不同。我没有从列的值中删除空格,而是将它们放在单引号之间。 (它必须是 单 引号,因为指令将参数 text
的值放在 双 引号之间。)
df <- read.table(text = "
Event Ethnicity Score
'50 yd dash' Asian 7
'50 yd dash' 'Afr. Am' 8
'50 yd dash' White 5
Hurdle Asian 6
Hurdle 'Afr. Am' 8
Hurdle White 9
", header = TRUE)
df %>%
mutate(rn = row_number()) %>%
spread(Ethnicity, Score) %>%
group_by(Event) %>%
summarise(Difference = max(Asian, na.rm = T) - max(White, na.rm = T))
# # A tibble: 2 x 2
# Event Difference
# <chr> <dbl>
# 1 50 yd dash 2
# 2 Hurdle -3
数据:
df <-
structure(list(Event = c("50 yd dash", "50 yd dash", "50 yd dash",
"Hurdle", "Hurdle", "Hurdle"), Ethnicity = c("Asian", "Afr. Am",
"White", "Asian", "Afr. Am", "White"), Score = c(7, 8, 5, 6,
8, 9)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
))
数据
df = read.table(text = "
Event Ethnicity Score
50yddash Asian 7
50yddash Afr.Am 8
50yddash White 5
Hurdle Asian 6
Hurdle Afr.Am 8
Hurdle White 9
", header=T, stringsAsFactors=F)
第一种方法,您手动指定感兴趣的种族:
library(dplyr)
df %>%
group_by(Event) %>%
summarise(Diff = Score[Ethnicity=="Asian"] - Score[Ethnicity=="White"])
# # A tibble: 2 x 2
# Event Diff
# <chr> <int>
# 1 50yddash 2
# 2 Hurdle -3
您可以将这段代码用作函数(输入感兴趣的两个种族)。
第二种方法,您可以为所有独特的种族和事件组合创建所有差异:
library(tidyverse)
# create vectorised function that calculates the difference
# based on a given event and ethnicities
f = function(event, eth1, eth2) {
df$Score[df$Event==event & df$Ethnicity==eth1] -
df$Score[df$Event==event & df$Ethnicity==eth2] }
f = Vectorize(f)
data.frame(t(combn(unique(df$Ethnicity), 2)), stringsAsFactors = F) %>% # create combinations of ethnicities
mutate(Event = list(unique(df$Event))) %>% # create combinations with events
unnest() %>%
mutate(Diff = f(Event, X1, X2)) # apply the function
# X1 X2 Event Diff
# 1 Asian Afr.Am 50yddash -1
# 2 Asian Afr.Am Hurdle -2
# 3 Asian White 50yddash 2
# 4 Asian White Hurdle -3
# 5 Afr.Am White 50yddash 3
# 6 Afr.Am White Hurdle -1
此过程使用字母顺序来创建独特的差异。如果你想要所有这些(即亚裔白人和白人亚洲人),你可以使用这个
expand.grid(Event = unique(df$Event),
X1 = unique(df$Ethnicity),
X2 = unique(df$Ethnicity)) %>%
filter(X1 != X2) %>%
mutate(Diff = f(Event, X1, X2))
# Event X1 X2 Diff
# 1 50yddash Afr.Am Asian 1
# 2 Hurdle Afr.Am Asian 2
# 3 50yddash White Asian -2
# 4 Hurdle White Asian 3
# 5 50yddash Asian Afr.Am -1
# 6 Hurdle Asian Afr.Am -2
# 7 50yddash White Afr.Am -3
# 8 Hurdle White Afr.Am 1
# 9 50yddash Asian White 2
# 10 Hurdle Asian White -3
# 11 50yddash Afr.Am White 3
# 12 Hurdle Afr.Am White -1