按组划分的四分位数在数据框中保存为新变量

Quartiles by group saved as new variable in data frame

我有这样的数据:

id <- c(1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9,9)
yr <- c(1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3)
gr <- c(3,4,5,3,4,5,3,4,5,4,5,6,4,5,6,4,5,6,5,6,7,5,6,7,5,6,7)
x <- c(33,48,31,41,31,36,25,38,28,17,39,53,60,60,19,39,34,47,20,28,38,15,17,49,48,45,39)
df <- data.frame(id,yr,gr,x)

   id yr gr  x
1   1  1  3 33
2   1  2  4 48
3   1  3  5 31
4   2  1  3 41
5   2  2  4 31
6   2  3  5 36
7   3  1  3 25
8   3  2  4 38
9   3  3  5 28
10  4  1  4 17
11  4  2  5 39
12  4  3  6 53
13  5  1  4 60
14  5  2  5 60
15  5  3  6 19
16  6  1  4 39
17  6  2  5 34
18  6  3  6 47
19  7  1  5 20
20  7  2  6 28
21  7  3  7 38
22  8  1  5 15
23  8  2  6 17
24  8  3  7 49
25  9  1  5 48
26  9  2  6 45
27  9  3  7 39

我想在数据框中创建一个新变量,其中包含在 "yr" 和 "gr" 的每个唯一组合中计算的 "x" 的分位数。也就是说,我不想根据示例中的所有 27 行数据找到 "x" 的分位数,而是想通过两个分组变量来计算分位数:yr 和 gr。比如yr = 1 and gr = 3, yr = 1 and gr = 4时"x"的分位数等

计算完这些值后,我希望将它们作为单个列附加到数据框中,比如 "x_quant"。

我能够将数据分成我需要的单独组,并且我知道如何计算分位数,但我无法以一种适合在中创建新列的方式组合这两个步骤现有数据框。

如能提供帮助,我们将不胜感激!非常感谢!

~kj

# turn "yr" and "gr" into sortable column
df$y <- paste(df$yr,"",df$gr)
df.ordered <- df[order(df$y),] #sort df based on group
grp <- split(df.ordered,df.ordered$y);grp

# get quantiles and turn results into string
q <- vector('list')
for (i in 1:length(grp)) {
  a <- quantile(grp[[i]]$x)
  q[i] <- paste(a[1],"",a[2],"",a[3],"",a[4],"",a[5])
}
x_quant <- unlist(sapply(q, `[`, 1))
x_quant <- rep(x_quant,each=3)

# append quantile back to data frame. Gave new column a more descriptive name
df.ordered$xq_0_25_50_75_100 <- x_quant
df.ordered$y <- NULL
df <- df.ordered;df </pre>

输出:

> # turn "yr" and "gr" into sortable column
> df$y <- paste(df$yr,"",df$gr)
> df.ordered <- df[order(df$y),] #sort df based on group
> grp <- split(df.ordered,df.ordered$y);grp
$`1  3`
  id yr gr  x    y
1  1  1  3 33 1  3
4  2  1  3 41 1  3
7  3  1  3 25 1  3

$`1  4`
   id yr gr  x    y
10  4  1  4 17 1  4
13  5  1  4 60 1  4
16  6  1  4 39 1  4

$`1  5`
   id yr gr  x    y
19  7  1  5 20 1  5
22  8  1  5 15 1  5
25  9  1  5 48 1  5

$`2  4`
  id yr gr  x    y
2  1  2  4 48 2  4
5  2  2  4 31 2  4
8  3  2  4 38 2  4

$`2  5`
   id yr gr  x    y
11  4  2  5 39 2  5
14  5  2  5 60 2  5
17  6  2  5 34 2  5

$`2  6`
   id yr gr  x    y
20  7  2  6 28 2  6
23  8  2  6 17 2  6
26  9  2  6 45 2  6

$`3  5`
  id yr gr  x    y
3  1  3  5 31 3  5
6  2  3  5 36 3  5
9  3  3  5 28 3  5

$`3  6`
   id yr gr  x    y
12  4  3  6 53 3  6
15  5  3  6 19 3  6
18  6  3  6 47 3  6

$`3  7`
   id yr gr  x    y
21  7  3  7 38 3  7
24  8  3  7 49 3  7
27  9  3  7 39 3  7

> # get quantiles and turn results into string
> q <- vector('list')
> for (i in 1:length(grp)) {
+ a <- quantile(grp[[i]]$x)
+ q[i] <- paste(a[1],"",a[2],"",a[3],"",a[4],"",a[5])
+ }
> x_quant <- unlist(sapply(q, `[`, 1))
> x_quant <- rep(x_quant,each=3)
> # append quantile back to data frame
> df.ordered$xq_0_25_50_75_100 <- x_quant
> df.ordered$y <- NULL
> df <- df.ordered
> df
   id yr gr  x      xq_0_25_50_75_100
1   1  1  3 33     25  29  33  37  41
4   2  1  3 41     25  29  33  37  41
7   3  1  3 25     25  29  33  37  41
10  4  1  4 17   17  28  39  49.5  60
13  5  1  4 60   17  28  39  49.5  60
16  6  1  4 39   17  28  39  49.5  60
19  7  1  5 20   15  17.5  20  34  48
22  8  1  5 15   15  17.5  20  34  48
25  9  1  5 48   15  17.5  20  34  48
2   1  2  4 48   31  34.5  38  43  48
5   2  2  4 31   31  34.5  38  43  48
8   3  2  4 38   31  34.5  38  43  48
11  4  2  5 39 34  36.5  39  49.5  60
14  5  2  5 60 34  36.5  39  49.5  60
17  6  2  5 34 34  36.5  39  49.5  60
20  7  2  6 28 17  22.5  28  36.5  45
23  8  2  6 17 17  22.5  28  36.5  45
26  9  2  6 45 17  22.5  28  36.5  45
3   1  3  5 31 28  29.5  31  33.5  36
6   2  3  5 36 28  29.5  31  33.5  36
9   3  3  5 28 28  29.5  31  33.5  36
12  4  3  6 53     19  33  47  50  53
15  5  3  6 19     19  33  47  50  53
18  6  3  6 47     19  33  47  50  53
21  7  3  7 38   38  38.5  39  44  49
24  8  3  7 49   38  38.5  39  44  49
27  9  3  7 39   38  38.5  39  44  49
>