ddply 中每组的分位数
Quantile per group in ddply
我试图 class 将我的分组数据划分为四分位数,因此向数据框 diam_quart
添加一列 z
分配四行中的每一行 class es 1、2、3 或 4:
quart = ddply(z, .(Code), transform,
diam_quart =
ifelse(Diameter <= quantile(Diameter , 0.25), 1,
ifelse(Diameter <= quantile(Diameter , 0.5), 2,
ifelse(Diameter <= quantile(Diameter , 0.75), 3, 4)))
)
但是,如果我检查结果,我在每个四分位数中得到的频率并不相同 class。我用这个来检查:
x <- ddply(quart, .(Code, diam_quart), summarize,
Diam_quartile = mean(diam_quart),
Frequency = length(Diameter))
x
根据定义,每个四分位数 class 不应该包含相同的行数吗?如果这是真的,那么四分位数函数是否指的是整个数据集而不是每个子集(由 ddply
函数中的 "code" 定义)?
不确定我的逻辑在哪里失败。点亮任何人?
这是我使用原始数据框得到的结果:
head(x, 16)
Code Diam_quartile Frequency
1 T1iOgP1 1 26
2 T1iOgP1 2 22
3 T1iOgP1 3 21
4 T1iOgP1 4 23
5 T1iOgP2 1 11
6 T1iOgP2 2 12
7 T1iOgP2 3 10
8 T1iOgP2 4 11
9 T1iOgP3 1 5
10 T1iOgP3 2 5
11 T1iOgP3 3 4
12 T1iOgP3 4 5
13 T1iRgP1 1 15
14 T1iRgP1 2 9
15 T1iRgP1 3 10
16 T1iRgP1 4 12
编辑:这是我的数据框相关列的前 200 行:
dput(z)
structure(list(Code = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L), .Label = c("T1iOgP1", "T1iOgP2", "T1iOgP3", "T1iRgP1", "T1iRgP2",
"T1iRgP3", "T1iRtP1", "T1iRtP2", "T1iRtP3", "T1rOgP1", "T1rOgP2",
"T1rOgP3", "T1rRgP1", "T1rRgP2", "T1rRgP3", "T1rRtP1", "T1rRtP2",
"T1rRtP3", "T1sOgP1", "T1sOgP2", "T1sOgP3", "T1sRgP1", "T1sRgP2",
"T1sRgP3", "T1sRtP1", "T1sRtP2", "T1sRtP3", "T2iOgP1", "T2iOgP2",
"T2iOgP3", "T2iRgP1", "T2iRgP2", "T2iRgP3", "T2iRtP1", "T2iRtP2",
"T2iRtP3", "T2rOgP1", "T2rOgP2", "T2rOgP3", "T2rRgP1", "T2rRgP2",
"T2rRgP3", "T2rRtP1", "T2rRtP2", "T2rRtP3", "T2sOgP1", "T2sOgP2",
"T2sOgP3", "T2sRgP1", "T2sRgP2", "T2sRgP3", "T2sRtP1", "T2sRtP2",
"T2sRtP3"), class = "factor"), Diameter = c(3.819718634, 2.705634033,
2.705634033, 3.978873577, 5.092958179, 7.957747155, 2.228169203,
1.114084602, 4.933803236, 7.480282325, 8.435211984, 7.639437268,
2.228169203, 2.387324146, 2.06901426, 10.50422624, 8.435211984,
4.456338407, 3.819718634, 2.228169203, 6.843662553, 4.13802852,
2.546479089, 4.965634224, 14.48309982, 13.36901522, 2.06901426,
2.06901426, 1.591549431, 2.705634033, 2.228169203, 2.546479089,
2.228169203, 1.909859317, 2.387324146, 7.480282325, 1.909859317,
3.183098862, 4.774648293, 9.390141642, 7.002817496, 7.480282325,
4.456338407, 3.342253805, 10.50422624, 12.89155039, 3.819718634,
8.435211984, 1.750704374, 11.30000096, 3.660563691, 3.501408748,
1.750704374, 1.591549431, 10.66338119, 3.501408748, 1.273239545,
2.228169203, 11.93662073, 3.183098862, 3.501408748, 1.750704374,
1.591549431, 1.273239545, 1.750704374, 12.09577567, 3.978873577,
2.705634033, 2.228169203, 3.501408748, 3.183098862, 1.432394488,
10.66338119, 1.432394488, 1.750704374, 2.228169203, 1.591549431,
1.432394488, 2.546479089, 2.387324146, 1.114084602, 2.546479089,
3.342253805, 3.978873577, 1.273239545, 1.273239545, 4.61549335,
4.13802852, 0.795774715, 7.798592212, 1.273239545, 2.06901426,
4.297183463, 4.297183463, 24.98732607, 6.207042781, 7.957747155,
3.023943919, 1.432394488, 5.252113122, 7.002817496, 3.819718634,
5.729577951, 18.97126922, 20.21267777, 3.978873577, 2.864788976,
1.750704374, 10.66338119, 6.366197724, 19.73521294, 5.729577951,
3.023943919, 12.41408556, 3.501408748, 21.16760743, 10.50422624,
2.228169203, 9.071831756, 11.77746579, 8.435211984, 6.207042781,
30.39859413, 8.912676813, 6.525352667, 1.909859317, 2.705634033,
20.37183272, 3.501408748, 5.888732894, 14.32394488, 7.321127382,
7.321127382, 3.023943919, 2.546479089, 3.342253805, 5.888732894,
2.06901426, 1.782535363, 4.965634224, 5.092958179, 14.32394488,
10.66338119, 16.55211408, 5.570423008, 2.228169203, 10.3450713,
2.864788976, 10.18591636, 4.456338407, 8.75352187, 6.68450761,
8.594366927, 1.909859317, 19.89436789, 1.591549431, 1.432394488,
1.750704374, 1.273239545, 1.273239545, 1.909859317, 2.546479089,
0.954929659, 2.705634033, 2.06901426, 0.954929659, 1.114084602,
1.273239545, 1.273239545, 1.273239545, 1.273239545, 1.909859317,
1.432394488, 1.273239545, 1.273239545, 1.909859317, 1.750704374,
5.252113122, 1.273239545, 3.501408748, 2.546479089, 7.161972439,
2.228169203, 1.909859317, 2.387324146, 4.456338407, 1.591549431,
3.501408748, 1.273239545, 1.750704374, 1.909859317, 2.705634033,
3.342253805, 1.909859317, 1.750704374, 2.06901426, 2.228169203,
2.546479089, 1.273239545, 1.750704374), Diam_quartile = c(3,
2, 2, 3, 4, 4, 2, 1, 3, 4, 4, 4, 2, 2, 1, 4, 4, 3, 3, 2, 4, 3,
2, 3, 4, 4, 1, 1, 1, 2, 2, 2, 2, 1, 2, 4, 1, 2, 3, 4, 4, 4, 3,
3, 4, 4, 3, 4, 1, 4, 3, 3, 1, 1, 4, 3, 1, 2, 4, 2, 3, 1, 1, 1,
1, 4, 3, 2, 2, 3, 2, 1, 4, 1, 1, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1,
1, 3, 3, 1, 4, 1, 1, 2, 2, 4, 2, 3, 1, 1, 2, 3, 2, 2, 4, 4, 2,
1, 1, 4, 3, 4, 2, 1, 4, 2, 4, 3, 1, 3, 4, 3, 2, 4, 3, 3, 1, 1,
4, 2, 2, 4, 3, 3, 1, 1, 1, 2, 1, 1, 2, 2, 4, 4, 4, 2, 1, 4, 1,
3, 2, 3, 3, 3, 1, 4, 2, 2, 2, 1, 1, 3, 4, 1, 4, 3, 1, 1, 1, 1,
1, 1, 3, 2, 1, 1, 3, 2, 4, 1, 4, 4, 4, 3, 3, 4, 4, 2, 4, 1, 2,
3, 4, 4, 3, 2, 3, 3, 4, 1, 2)), .Names = c("Code", "Diameter",
"Diam_quartile"), row.names = c(NA, 200L), class = "data.frame")
编辑 II:x
的完整结果:
dput(x)
structure(list(Code = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L,
6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L,
10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L,
13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L,
17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 20L,
20L, 20L, 20L, 21L, 21L, 21L, 21L, 22L, 22L, 22L, 22L, 23L, 23L,
23L, 23L, 24L, 24L, 24L, 24L, 25L, 25L, 25L, 25L, 26L, 26L, 26L,
26L, 27L, 27L, 27L, 27L, 28L, 28L, 28L, 28L, 29L, 29L, 29L, 29L,
30L, 30L, 30L, 30L, 31L, 31L, 31L, 31L, 32L, 32L, 32L, 32L, 33L,
33L, 33L, 33L, 34L, 34L, 34L, 34L, 35L, 35L, 35L, 35L, 36L, 36L,
36L, 36L, 37L, 37L, 37L, 37L, 38L, 38L, 38L, 38L, 39L, 39L, 39L,
39L, 40L, 40L, 40L, 40L, 41L, 41L, 41L, 41L, 42L, 42L, 42L, 42L,
43L, 43L, 43L, 43L, 44L, 44L, 44L, 44L, 45L, 45L, 45L, 45L, 46L,
46L, 46L, 46L, 47L, 47L, 47L, 47L, 48L, 48L, 48L, 48L, 49L, 49L,
49L, 49L, 50L, 50L, 50L, 50L, 51L, 51L, 51L, 51L, 52L, 52L, 52L,
52L, 53L, 53L, 53L, 53L, 54L, 54L, 54L, 54L), .Label = c("T1iOgP1",
"T1iOgP2", "T1iOgP3", "T1iRgP1", "T1iRgP2", "T1iRgP3", "T1iRtP1",
"T1iRtP2", "T1iRtP3", "T1rOgP1", "T1rOgP2", "T1rOgP3", "T1rRgP1",
"T1rRgP2", "T1rRgP3", "T1rRtP1", "T1rRtP2", "T1rRtP3", "T1sOgP1",
"T1sOgP2", "T1sOgP3", "T1sRgP1", "T1sRgP2", "T1sRgP3", "T1sRtP1",
"T1sRtP2", "T1sRtP3", "T2iOgP1", "T2iOgP2", "T2iOgP3", "T2iRgP1",
"T2iRgP2", "T2iRgP3", "T2iRtP1", "T2iRtP2", "T2iRtP3", "T2rOgP1",
"T2rOgP2", "T2rOgP3", "T2rRgP1", "T2rRgP2", "T2rRgP3", "T2rRtP1",
"T2rRtP2", "T2rRtP3", "T2sOgP1", "T2sOgP2", "T2sOgP3", "T2sRgP1",
"T2sRgP2", "T2sRgP3", "T2sRtP1", "T2sRtP2", "T2sRtP3"), class = "factor"),
Diam_quartile = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1,
2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3,
4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2,
3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1,
2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3,
4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2,
3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1,
2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3,
4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4), Frequency = c(26,
22, 21, 23, 11, 12, 10, 11, 5, 5, 4, 5, 15, 9, 10, 12, 18,
16, 13, 14, **28, 22, 17, 22**, 21, 17, 17, 19, 14, 12, 12, 13,
22, 14, 19, 16, 9, 9, 8, 9, 6, 6, 5, 6, 9, 8, 8, 9, 13, 8,
12, 9, 11, 11, 11, 11, 13, 13, 12, 13, 30, 19, 20, 17, 19,
14, 15, 15, 25, 23, 19, 22, 14, 16, 12, 14, 12, 11, 11, 11,
10, 9, 9, 9, 17, 7, 12, 12, 36, 13, 23, 20, 24, 15, 15, 18,
33, 30, 31, 30, 28, 30, 26, 27, 37, 30, 31, 33, 24, 20, 21,
22, 12, 6, 9, 9, 6, 5, 5, 6, 13, 13, 9, 12, 18, 14, 16, 16,
22, 21, 15, 20, 13, 12, 8, 11, 11, 11, 7, 10, 8, 7, 7, 7,
9, 8, 8, 9, 9, 8, 8, 8, 15, 15, 14, 13, 11, 3, 7, 7, 11,
11, 11, 11, 14, 12, 12, 13, 31, 40, 19, 25, 25, 23, 21, 21,
30, 18, 25, 21, 22, 9, 15, 16, 10, 9, 10, 8, 12, 13, 11,
12, 15, 13, 13, 14, 19, 16, 9, 15, 14, 9, 8, 10, 20, 18,
17, 19, 48, 35, 39, 39, 23, 22, 22, 22)), .Names = c("Code",
"Diam_quartile", "Frequency"), row.names = c(NA, 216L), class = "data.frame")
第一次计算 x
的结果与第二次 <=
运算符替换为 <
的结果比较:
quart = ddply(dataframe, .(Code), transform,
diam_quart =
ifelse(Diameter < quantile(Diameter , 0.25), 1,
ifelse(Diameter < quantile(Diameter , 0.5), 2,
ifelse(Diameter < quantile(Diameter , 0.75), 3, 4)))
其中Diam_quartile
是用<=
和diam_quart
用<
计算的结果只有
dput(x)
structure(list(Code = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L,
6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L,
10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L,
13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L,
17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 20L,
20L, 20L, 20L, 21L, 21L, 21L, 21L, 22L, 22L, 22L, 22L, 23L, 23L,
23L, 23L, 24L, 24L, 24L, 24L, 25L, 25L, 25L, 25L, 26L, 26L, 26L,
26L, 27L, 27L, 27L, 27L, 28L, 28L, 28L, 28L, 29L, 29L, 29L, 29L,
30L, 30L, 30L, 30L, 31L, 31L, 31L, 31L, 32L, 32L, 32L, 32L, 33L,
33L, 33L, 33L, 34L, 34L, 34L, 34L, 35L, 35L, 35L, 35L, 36L, 36L,
36L, 36L, 37L, 37L, 37L, 37L, 38L, 38L, 38L, 38L, 39L, 39L, 39L,
39L, 40L, 40L, 40L, 40L, 41L, 41L, 41L, 41L, 42L, 42L, 42L, 42L,
43L, 43L, 43L, 43L, 44L, 44L, 44L, 44L, 45L, 45L, 45L, 45L, 46L,
46L, 46L, 46L, 47L, 47L, 47L, 47L, 48L, 48L, 48L, 48L, 49L, 49L,
49L, 49L, 50L, 50L, 50L, 50L, 51L, 51L, 51L, 51L, 52L, 52L, 52L,
52L, 53L, 53L, 53L, 53L, 54L, 54L, 54L, 54L), .Label = c("T1iOgP1",
"T1iOgP2", "T1iOgP3", "T1iRgP1", "T1iRgP2", "T1iRgP3", "T1iRtP1",
"T1iRtP2", "T1iRtP3", "T1rOgP1", "T1rOgP2", "T1rOgP3", "T1rRgP1",
"T1rRgP2", "T1rRgP3", "T1rRtP1", "T1rRtP2", "T1rRtP3", "T1sOgP1",
"T1sOgP2", "T1sOgP3", "T1sRgP1", "T1sRgP2", "T1sRgP3", "T1sRtP1",
"T1sRtP2", "T1sRtP3", "T2iOgP1", "T2iOgP2", "T2iOgP3", "T2iRgP1",
"T2iRgP2", "T2iRgP3", "T2iRtP1", "T2iRtP2", "T2iRtP3", "T2rOgP1",
"T2rOgP2", "T2rOgP3", "T2rRgP1", "T2rRgP2", "T2rRgP3", "T2rRtP1",
"T2rRtP2", "T2rRtP3", "T2sOgP1", "T2sOgP2", "T2sOgP3", "T2sRgP1",
"T2sRgP2", "T2sRgP3", "T2sRtP1", "T2sRtP2", "T2sRtP3"), class = "factor"),
Diam_quartile = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1,
2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3,
4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2,
3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1,
2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3,
4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2,
3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1,
2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3,
4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4), Frequency = c(26,
22, 21, 23, 11, 12, 10, 11, 5, 5, 4, 5, 15, 9, 10, 12, 18,
16, 13, 14, 28, 22, 17, 22, 21, 17, 17, 19, 14, 12, 12, 13,
22, 14, 19, 16, 9, 9, 8, 9, 6, 6, 5, 6, 9, 8, 8, 9, 13, 8,
12, 9, 11, 11, 11, 11, 13, 13, 12, 13, 30, 19, 20, 17, 19,
14, 15, 15, 25, 23, 19, 22, 14, 16, 12, 14, 12, 11, 11, 11,
10, 9, 9, 9, 17, 7, 12, 12, 36, 13, 23, 20, 24, 15, 15, 18,
33, 30, 31, 30, 28, 30, 26, 27, 37, 30, 31, 33, 24, 20, 21,
22, 12, 6, 9, 9, 6, 5, 5, 6, 13, 13, 9, 12, 18, 14, 16, 16,
22, 21, 15, 20, 13, 12, 8, 11, 11, 11, 7, 10, 8, 7, 7, 7,
9, 8, 8, 9, 9, 8, 8, 8, 15, 15, 14, 13, 11, 3, 7, 7, 11,
11, 11, 11, 14, 12, 12, 13, 31, 40, 19, 25, 25, 23, 21, 21,
30, 18, 25, 21, 22, 9, 15, 16, 10, 9, 10, 8, 12, 13, 11,
12, 15, 13, 13, 14, 19, 16, 9, 15, 14, 9, 8, 10, 20, 18,
17, 19, 48, 35, 39, 39, 23, 22, 22, 22), diam_quart = c(1,
2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3,
4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2,
3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1,
2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3,
4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2,
3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1,
2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3,
4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2,
3, 4, 1, 2, 3, 4), Frequency.1 = c(22, 23, 24, 23, 11, 10,
12, 11, 5, 4, 5, 5, 4, 15, 15, 12, 13, 16, 14, 18, **22, 20,
17, 30,** 10, 22, 23, 19, 11, 12, 15, 13, 12, 19, 19, 21, 9,
8, 9, 9, 6, 5, 6, 6, 9, 8, 8, 9, 10, 11, 8, 13, 11, 11, 11,
11, 13, 11, 14, 13, 21, 19, 17, 29, 12, 19, 15, 17, 17, 26,
21, 25, 14, 11, 17, 14, 11, 11, 11, 12, 9, 9, 9, 10, 10,
14, 12, 12, 21, 15, 32, 24, 15, 20, 19, 18, 26, 28, 32, 38,
28, 24, 26, 33, 26, 32, 40, 33, 17, 25, 23, 22, 8, 10, 9,
9, 6, 5, 5, 6, 11, 9, 15, 12, 9, 23, 16, 16, 13, 15, 30,
20, 10, 11, 12, 11, 9, 10, 10, 10, 3, 11, 7, 8, 9, 8, 8,
9, 8, 8, 8, 9, 13, 12, 17, 15, 5, 9, 7, 7, 11, 11, 11, 11,
12, 13, 13, 13, 18, 38, 22, 37, 12, 30, 24, 24, 22, 19, 25,
28, 15, 16, 15, 16, 8, 10, 9, 10, 12, 11, 13, 12, 12, 15,
14, 14, 14, 9, 21, 15, 9, 10, 9, 13, 15, 21, 19, 19, 38,
35, 45, 43, 22, 19, 23, 25)), .Names = c("Code", "Diam_quartile",
"Frequency", "diam_quart", "Frequency.1"), row.names = c(NA,
216L), class = "data.frame")
我没有发现你的计算有任何问题。仅当您拥有非常大的数据集时,每个四分位数才包含相同数量的元素。下面我提取了第一组的直径。
table(findInterval(diameter, quantile(diameter, c(.25, .5, .75))))
# 0 1 2 3
#22 23 24 23
sum(diameter < quantile(diameter, .25))
#[1] 22
sum(diameter <=quantile(diameter, .25))
#[1] 26
如果在 ifelse 语句中使用 <
而不是 <=
,您会得到更接近的数字。
diameter <- c(3.819718634, 2.705634033, 2.705634033, 3.978873577, 5.092958179,
7.957747155, 2.228169203, 1.114084602, 4.933803236, 7.480282325,
8.435211984, 7.639437268, 2.228169203, 2.387324146, 2.06901426,
10.50422624, 8.435211984, 4.456338407, 3.819718634, 2.228169203,
6.843662553, 4.13802852, 2.546479089, 4.965634224, 14.48309982,
13.36901522, 2.06901426, 2.06901426, 1.591549431, 2.705634033,
2.228169203, 2.546479089, 2.228169203, 1.909859317, 2.387324146,
7.480282325, 1.909859317, 3.183098862, 4.774648293, 9.390141642,
7.002817496, 7.480282325, 4.456338407, 3.342253805, 10.50422624,
12.89155039, 3.819718634, 8.435211984, 1.750704374, 11.30000096,
3.660563691, 3.501408748, 1.750704374, 1.591549431, 10.66338119,
3.501408748, 1.273239545, 2.228169203, 11.93662073, 3.183098862,
3.501408748, 1.750704374, 1.591549431, 1.273239545, 1.750704374,
12.09577567, 3.978873577, 2.705634033, 2.228169203, 3.501408748,
3.183098862, 1.432394488, 10.66338119, 1.432394488, 1.750704374,
2.228169203, 1.591549431, 1.432394488, 2.546479089, 2.387324146,
1.114084602, 2.546479089, 3.342253805, 3.978873577, 1.273239545,
1.273239545, 4.61549335, 4.13802852, 0.795774715, 7.798592212,
1.273239545, 2.06901426)
只是想总结一下 ExperimenteR 帮助我理解的内容:
计算是正确的。每个四分位数组的数量差异很大,这是由于数据子集较小,而直接位于四分位数极限的数据比例相对较大。根据它们是否包含在较低或较高四分位数的比例中,数字可能会发生显着变化。有关详细信息,请参阅他的评论。
谢谢!
我试图 class 将我的分组数据划分为四分位数,因此向数据框 diam_quart
添加一列 z
分配四行中的每一行 class es 1、2、3 或 4:
quart = ddply(z, .(Code), transform,
diam_quart =
ifelse(Diameter <= quantile(Diameter , 0.25), 1,
ifelse(Diameter <= quantile(Diameter , 0.5), 2,
ifelse(Diameter <= quantile(Diameter , 0.75), 3, 4)))
)
但是,如果我检查结果,我在每个四分位数中得到的频率并不相同 class。我用这个来检查:
x <- ddply(quart, .(Code, diam_quart), summarize,
Diam_quartile = mean(diam_quart),
Frequency = length(Diameter))
x
根据定义,每个四分位数 class 不应该包含相同的行数吗?如果这是真的,那么四分位数函数是否指的是整个数据集而不是每个子集(由 ddply
函数中的 "code" 定义)?
不确定我的逻辑在哪里失败。点亮任何人?
这是我使用原始数据框得到的结果:
head(x, 16)
Code Diam_quartile Frequency
1 T1iOgP1 1 26
2 T1iOgP1 2 22
3 T1iOgP1 3 21
4 T1iOgP1 4 23
5 T1iOgP2 1 11
6 T1iOgP2 2 12
7 T1iOgP2 3 10
8 T1iOgP2 4 11
9 T1iOgP3 1 5
10 T1iOgP3 2 5
11 T1iOgP3 3 4
12 T1iOgP3 4 5
13 T1iRgP1 1 15
14 T1iRgP1 2 9
15 T1iRgP1 3 10
16 T1iRgP1 4 12
编辑:这是我的数据框相关列的前 200 行:
dput(z)
structure(list(Code = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L), .Label = c("T1iOgP1", "T1iOgP2", "T1iOgP3", "T1iRgP1", "T1iRgP2",
"T1iRgP3", "T1iRtP1", "T1iRtP2", "T1iRtP3", "T1rOgP1", "T1rOgP2",
"T1rOgP3", "T1rRgP1", "T1rRgP2", "T1rRgP3", "T1rRtP1", "T1rRtP2",
"T1rRtP3", "T1sOgP1", "T1sOgP2", "T1sOgP3", "T1sRgP1", "T1sRgP2",
"T1sRgP3", "T1sRtP1", "T1sRtP2", "T1sRtP3", "T2iOgP1", "T2iOgP2",
"T2iOgP3", "T2iRgP1", "T2iRgP2", "T2iRgP3", "T2iRtP1", "T2iRtP2",
"T2iRtP3", "T2rOgP1", "T2rOgP2", "T2rOgP3", "T2rRgP1", "T2rRgP2",
"T2rRgP3", "T2rRtP1", "T2rRtP2", "T2rRtP3", "T2sOgP1", "T2sOgP2",
"T2sOgP3", "T2sRgP1", "T2sRgP2", "T2sRgP3", "T2sRtP1", "T2sRtP2",
"T2sRtP3"), class = "factor"), Diameter = c(3.819718634, 2.705634033,
2.705634033, 3.978873577, 5.092958179, 7.957747155, 2.228169203,
1.114084602, 4.933803236, 7.480282325, 8.435211984, 7.639437268,
2.228169203, 2.387324146, 2.06901426, 10.50422624, 8.435211984,
4.456338407, 3.819718634, 2.228169203, 6.843662553, 4.13802852,
2.546479089, 4.965634224, 14.48309982, 13.36901522, 2.06901426,
2.06901426, 1.591549431, 2.705634033, 2.228169203, 2.546479089,
2.228169203, 1.909859317, 2.387324146, 7.480282325, 1.909859317,
3.183098862, 4.774648293, 9.390141642, 7.002817496, 7.480282325,
4.456338407, 3.342253805, 10.50422624, 12.89155039, 3.819718634,
8.435211984, 1.750704374, 11.30000096, 3.660563691, 3.501408748,
1.750704374, 1.591549431, 10.66338119, 3.501408748, 1.273239545,
2.228169203, 11.93662073, 3.183098862, 3.501408748, 1.750704374,
1.591549431, 1.273239545, 1.750704374, 12.09577567, 3.978873577,
2.705634033, 2.228169203, 3.501408748, 3.183098862, 1.432394488,
10.66338119, 1.432394488, 1.750704374, 2.228169203, 1.591549431,
1.432394488, 2.546479089, 2.387324146, 1.114084602, 2.546479089,
3.342253805, 3.978873577, 1.273239545, 1.273239545, 4.61549335,
4.13802852, 0.795774715, 7.798592212, 1.273239545, 2.06901426,
4.297183463, 4.297183463, 24.98732607, 6.207042781, 7.957747155,
3.023943919, 1.432394488, 5.252113122, 7.002817496, 3.819718634,
5.729577951, 18.97126922, 20.21267777, 3.978873577, 2.864788976,
1.750704374, 10.66338119, 6.366197724, 19.73521294, 5.729577951,
3.023943919, 12.41408556, 3.501408748, 21.16760743, 10.50422624,
2.228169203, 9.071831756, 11.77746579, 8.435211984, 6.207042781,
30.39859413, 8.912676813, 6.525352667, 1.909859317, 2.705634033,
20.37183272, 3.501408748, 5.888732894, 14.32394488, 7.321127382,
7.321127382, 3.023943919, 2.546479089, 3.342253805, 5.888732894,
2.06901426, 1.782535363, 4.965634224, 5.092958179, 14.32394488,
10.66338119, 16.55211408, 5.570423008, 2.228169203, 10.3450713,
2.864788976, 10.18591636, 4.456338407, 8.75352187, 6.68450761,
8.594366927, 1.909859317, 19.89436789, 1.591549431, 1.432394488,
1.750704374, 1.273239545, 1.273239545, 1.909859317, 2.546479089,
0.954929659, 2.705634033, 2.06901426, 0.954929659, 1.114084602,
1.273239545, 1.273239545, 1.273239545, 1.273239545, 1.909859317,
1.432394488, 1.273239545, 1.273239545, 1.909859317, 1.750704374,
5.252113122, 1.273239545, 3.501408748, 2.546479089, 7.161972439,
2.228169203, 1.909859317, 2.387324146, 4.456338407, 1.591549431,
3.501408748, 1.273239545, 1.750704374, 1.909859317, 2.705634033,
3.342253805, 1.909859317, 1.750704374, 2.06901426, 2.228169203,
2.546479089, 1.273239545, 1.750704374), Diam_quartile = c(3,
2, 2, 3, 4, 4, 2, 1, 3, 4, 4, 4, 2, 2, 1, 4, 4, 3, 3, 2, 4, 3,
2, 3, 4, 4, 1, 1, 1, 2, 2, 2, 2, 1, 2, 4, 1, 2, 3, 4, 4, 4, 3,
3, 4, 4, 3, 4, 1, 4, 3, 3, 1, 1, 4, 3, 1, 2, 4, 2, 3, 1, 1, 1,
1, 4, 3, 2, 2, 3, 2, 1, 4, 1, 1, 2, 1, 1, 2, 2, 1, 2, 3, 3, 1,
1, 3, 3, 1, 4, 1, 1, 2, 2, 4, 2, 3, 1, 1, 2, 3, 2, 2, 4, 4, 2,
1, 1, 4, 3, 4, 2, 1, 4, 2, 4, 3, 1, 3, 4, 3, 2, 4, 3, 3, 1, 1,
4, 2, 2, 4, 3, 3, 1, 1, 1, 2, 1, 1, 2, 2, 4, 4, 4, 2, 1, 4, 1,
3, 2, 3, 3, 3, 1, 4, 2, 2, 2, 1, 1, 3, 4, 1, 4, 3, 1, 1, 1, 1,
1, 1, 3, 2, 1, 1, 3, 2, 4, 1, 4, 4, 4, 3, 3, 4, 4, 2, 4, 1, 2,
3, 4, 4, 3, 2, 3, 3, 4, 1, 2)), .Names = c("Code", "Diameter",
"Diam_quartile"), row.names = c(NA, 200L), class = "data.frame")
编辑 II:x
的完整结果:
dput(x)
structure(list(Code = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L,
6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L,
10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L,
13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L,
17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 20L,
20L, 20L, 20L, 21L, 21L, 21L, 21L, 22L, 22L, 22L, 22L, 23L, 23L,
23L, 23L, 24L, 24L, 24L, 24L, 25L, 25L, 25L, 25L, 26L, 26L, 26L,
26L, 27L, 27L, 27L, 27L, 28L, 28L, 28L, 28L, 29L, 29L, 29L, 29L,
30L, 30L, 30L, 30L, 31L, 31L, 31L, 31L, 32L, 32L, 32L, 32L, 33L,
33L, 33L, 33L, 34L, 34L, 34L, 34L, 35L, 35L, 35L, 35L, 36L, 36L,
36L, 36L, 37L, 37L, 37L, 37L, 38L, 38L, 38L, 38L, 39L, 39L, 39L,
39L, 40L, 40L, 40L, 40L, 41L, 41L, 41L, 41L, 42L, 42L, 42L, 42L,
43L, 43L, 43L, 43L, 44L, 44L, 44L, 44L, 45L, 45L, 45L, 45L, 46L,
46L, 46L, 46L, 47L, 47L, 47L, 47L, 48L, 48L, 48L, 48L, 49L, 49L,
49L, 49L, 50L, 50L, 50L, 50L, 51L, 51L, 51L, 51L, 52L, 52L, 52L,
52L, 53L, 53L, 53L, 53L, 54L, 54L, 54L, 54L), .Label = c("T1iOgP1",
"T1iOgP2", "T1iOgP3", "T1iRgP1", "T1iRgP2", "T1iRgP3", "T1iRtP1",
"T1iRtP2", "T1iRtP3", "T1rOgP1", "T1rOgP2", "T1rOgP3", "T1rRgP1",
"T1rRgP2", "T1rRgP3", "T1rRtP1", "T1rRtP2", "T1rRtP3", "T1sOgP1",
"T1sOgP2", "T1sOgP3", "T1sRgP1", "T1sRgP2", "T1sRgP3", "T1sRtP1",
"T1sRtP2", "T1sRtP3", "T2iOgP1", "T2iOgP2", "T2iOgP3", "T2iRgP1",
"T2iRgP2", "T2iRgP3", "T2iRtP1", "T2iRtP2", "T2iRtP3", "T2rOgP1",
"T2rOgP2", "T2rOgP3", "T2rRgP1", "T2rRgP2", "T2rRgP3", "T2rRtP1",
"T2rRtP2", "T2rRtP3", "T2sOgP1", "T2sOgP2", "T2sOgP3", "T2sRgP1",
"T2sRgP2", "T2sRgP3", "T2sRtP1", "T2sRtP2", "T2sRtP3"), class = "factor"),
Diam_quartile = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1,
2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3,
4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2,
3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1,
2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3,
4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2,
3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1,
2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3,
4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4), Frequency = c(26,
22, 21, 23, 11, 12, 10, 11, 5, 5, 4, 5, 15, 9, 10, 12, 18,
16, 13, 14, **28, 22, 17, 22**, 21, 17, 17, 19, 14, 12, 12, 13,
22, 14, 19, 16, 9, 9, 8, 9, 6, 6, 5, 6, 9, 8, 8, 9, 13, 8,
12, 9, 11, 11, 11, 11, 13, 13, 12, 13, 30, 19, 20, 17, 19,
14, 15, 15, 25, 23, 19, 22, 14, 16, 12, 14, 12, 11, 11, 11,
10, 9, 9, 9, 17, 7, 12, 12, 36, 13, 23, 20, 24, 15, 15, 18,
33, 30, 31, 30, 28, 30, 26, 27, 37, 30, 31, 33, 24, 20, 21,
22, 12, 6, 9, 9, 6, 5, 5, 6, 13, 13, 9, 12, 18, 14, 16, 16,
22, 21, 15, 20, 13, 12, 8, 11, 11, 11, 7, 10, 8, 7, 7, 7,
9, 8, 8, 9, 9, 8, 8, 8, 15, 15, 14, 13, 11, 3, 7, 7, 11,
11, 11, 11, 14, 12, 12, 13, 31, 40, 19, 25, 25, 23, 21, 21,
30, 18, 25, 21, 22, 9, 15, 16, 10, 9, 10, 8, 12, 13, 11,
12, 15, 13, 13, 14, 19, 16, 9, 15, 14, 9, 8, 10, 20, 18,
17, 19, 48, 35, 39, 39, 23, 22, 22, 22)), .Names = c("Code",
"Diam_quartile", "Frequency"), row.names = c(NA, 216L), class = "data.frame")
第一次计算 x
的结果与第二次 <=
运算符替换为 <
的结果比较:
quart = ddply(dataframe, .(Code), transform,
diam_quart =
ifelse(Diameter < quantile(Diameter , 0.25), 1,
ifelse(Diameter < quantile(Diameter , 0.5), 2,
ifelse(Diameter < quantile(Diameter , 0.75), 3, 4)))
其中Diam_quartile
是用<=
和diam_quart
用<
计算的结果只有
dput(x)
structure(list(Code = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L,
6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L,
10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L,
13L, 14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L,
17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 20L,
20L, 20L, 20L, 21L, 21L, 21L, 21L, 22L, 22L, 22L, 22L, 23L, 23L,
23L, 23L, 24L, 24L, 24L, 24L, 25L, 25L, 25L, 25L, 26L, 26L, 26L,
26L, 27L, 27L, 27L, 27L, 28L, 28L, 28L, 28L, 29L, 29L, 29L, 29L,
30L, 30L, 30L, 30L, 31L, 31L, 31L, 31L, 32L, 32L, 32L, 32L, 33L,
33L, 33L, 33L, 34L, 34L, 34L, 34L, 35L, 35L, 35L, 35L, 36L, 36L,
36L, 36L, 37L, 37L, 37L, 37L, 38L, 38L, 38L, 38L, 39L, 39L, 39L,
39L, 40L, 40L, 40L, 40L, 41L, 41L, 41L, 41L, 42L, 42L, 42L, 42L,
43L, 43L, 43L, 43L, 44L, 44L, 44L, 44L, 45L, 45L, 45L, 45L, 46L,
46L, 46L, 46L, 47L, 47L, 47L, 47L, 48L, 48L, 48L, 48L, 49L, 49L,
49L, 49L, 50L, 50L, 50L, 50L, 51L, 51L, 51L, 51L, 52L, 52L, 52L,
52L, 53L, 53L, 53L, 53L, 54L, 54L, 54L, 54L), .Label = c("T1iOgP1",
"T1iOgP2", "T1iOgP3", "T1iRgP1", "T1iRgP2", "T1iRgP3", "T1iRtP1",
"T1iRtP2", "T1iRtP3", "T1rOgP1", "T1rOgP2", "T1rOgP3", "T1rRgP1",
"T1rRgP2", "T1rRgP3", "T1rRtP1", "T1rRtP2", "T1rRtP3", "T1sOgP1",
"T1sOgP2", "T1sOgP3", "T1sRgP1", "T1sRgP2", "T1sRgP3", "T1sRtP1",
"T1sRtP2", "T1sRtP3", "T2iOgP1", "T2iOgP2", "T2iOgP3", "T2iRgP1",
"T2iRgP2", "T2iRgP3", "T2iRtP1", "T2iRtP2", "T2iRtP3", "T2rOgP1",
"T2rOgP2", "T2rOgP3", "T2rRgP1", "T2rRgP2", "T2rRgP3", "T2rRtP1",
"T2rRtP2", "T2rRtP3", "T2sOgP1", "T2sOgP2", "T2sOgP3", "T2sRgP1",
"T2sRgP2", "T2sRgP3", "T2sRtP1", "T2sRtP2", "T2sRtP3"), class = "factor"),
Diam_quartile = c(1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1,
2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3,
4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2,
3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1,
2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3,
4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2,
3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1,
2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3,
4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4), Frequency = c(26,
22, 21, 23, 11, 12, 10, 11, 5, 5, 4, 5, 15, 9, 10, 12, 18,
16, 13, 14, 28, 22, 17, 22, 21, 17, 17, 19, 14, 12, 12, 13,
22, 14, 19, 16, 9, 9, 8, 9, 6, 6, 5, 6, 9, 8, 8, 9, 13, 8,
12, 9, 11, 11, 11, 11, 13, 13, 12, 13, 30, 19, 20, 17, 19,
14, 15, 15, 25, 23, 19, 22, 14, 16, 12, 14, 12, 11, 11, 11,
10, 9, 9, 9, 17, 7, 12, 12, 36, 13, 23, 20, 24, 15, 15, 18,
33, 30, 31, 30, 28, 30, 26, 27, 37, 30, 31, 33, 24, 20, 21,
22, 12, 6, 9, 9, 6, 5, 5, 6, 13, 13, 9, 12, 18, 14, 16, 16,
22, 21, 15, 20, 13, 12, 8, 11, 11, 11, 7, 10, 8, 7, 7, 7,
9, 8, 8, 9, 9, 8, 8, 8, 15, 15, 14, 13, 11, 3, 7, 7, 11,
11, 11, 11, 14, 12, 12, 13, 31, 40, 19, 25, 25, 23, 21, 21,
30, 18, 25, 21, 22, 9, 15, 16, 10, 9, 10, 8, 12, 13, 11,
12, 15, 13, 13, 14, 19, 16, 9, 15, 14, 9, 8, 10, 20, 18,
17, 19, 48, 35, 39, 39, 23, 22, 22, 22), diam_quart = c(1,
2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3,
4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2,
3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1,
2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3,
4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2,
3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1,
2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3,
4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2,
3, 4, 1, 2, 3, 4), Frequency.1 = c(22, 23, 24, 23, 11, 10,
12, 11, 5, 4, 5, 5, 4, 15, 15, 12, 13, 16, 14, 18, **22, 20,
17, 30,** 10, 22, 23, 19, 11, 12, 15, 13, 12, 19, 19, 21, 9,
8, 9, 9, 6, 5, 6, 6, 9, 8, 8, 9, 10, 11, 8, 13, 11, 11, 11,
11, 13, 11, 14, 13, 21, 19, 17, 29, 12, 19, 15, 17, 17, 26,
21, 25, 14, 11, 17, 14, 11, 11, 11, 12, 9, 9, 9, 10, 10,
14, 12, 12, 21, 15, 32, 24, 15, 20, 19, 18, 26, 28, 32, 38,
28, 24, 26, 33, 26, 32, 40, 33, 17, 25, 23, 22, 8, 10, 9,
9, 6, 5, 5, 6, 11, 9, 15, 12, 9, 23, 16, 16, 13, 15, 30,
20, 10, 11, 12, 11, 9, 10, 10, 10, 3, 11, 7, 8, 9, 8, 8,
9, 8, 8, 8, 9, 13, 12, 17, 15, 5, 9, 7, 7, 11, 11, 11, 11,
12, 13, 13, 13, 18, 38, 22, 37, 12, 30, 24, 24, 22, 19, 25,
28, 15, 16, 15, 16, 8, 10, 9, 10, 12, 11, 13, 12, 12, 15,
14, 14, 14, 9, 21, 15, 9, 10, 9, 13, 15, 21, 19, 19, 38,
35, 45, 43, 22, 19, 23, 25)), .Names = c("Code", "Diam_quartile",
"Frequency", "diam_quart", "Frequency.1"), row.names = c(NA,
216L), class = "data.frame")
我没有发现你的计算有任何问题。仅当您拥有非常大的数据集时,每个四分位数才包含相同数量的元素。下面我提取了第一组的直径。
table(findInterval(diameter, quantile(diameter, c(.25, .5, .75))))
# 0 1 2 3
#22 23 24 23
sum(diameter < quantile(diameter, .25))
#[1] 22
sum(diameter <=quantile(diameter, .25))
#[1] 26
如果在 ifelse 语句中使用 <
而不是 <=
,您会得到更接近的数字。
diameter <- c(3.819718634, 2.705634033, 2.705634033, 3.978873577, 5.092958179,
7.957747155, 2.228169203, 1.114084602, 4.933803236, 7.480282325,
8.435211984, 7.639437268, 2.228169203, 2.387324146, 2.06901426,
10.50422624, 8.435211984, 4.456338407, 3.819718634, 2.228169203,
6.843662553, 4.13802852, 2.546479089, 4.965634224, 14.48309982,
13.36901522, 2.06901426, 2.06901426, 1.591549431, 2.705634033,
2.228169203, 2.546479089, 2.228169203, 1.909859317, 2.387324146,
7.480282325, 1.909859317, 3.183098862, 4.774648293, 9.390141642,
7.002817496, 7.480282325, 4.456338407, 3.342253805, 10.50422624,
12.89155039, 3.819718634, 8.435211984, 1.750704374, 11.30000096,
3.660563691, 3.501408748, 1.750704374, 1.591549431, 10.66338119,
3.501408748, 1.273239545, 2.228169203, 11.93662073, 3.183098862,
3.501408748, 1.750704374, 1.591549431, 1.273239545, 1.750704374,
12.09577567, 3.978873577, 2.705634033, 2.228169203, 3.501408748,
3.183098862, 1.432394488, 10.66338119, 1.432394488, 1.750704374,
2.228169203, 1.591549431, 1.432394488, 2.546479089, 2.387324146,
1.114084602, 2.546479089, 3.342253805, 3.978873577, 1.273239545,
1.273239545, 4.61549335, 4.13802852, 0.795774715, 7.798592212,
1.273239545, 2.06901426)
只是想总结一下 ExperimenteR 帮助我理解的内容: 计算是正确的。每个四分位数组的数量差异很大,这是由于数据子集较小,而直接位于四分位数极限的数据比例相对较大。根据它们是否包含在较低或较高四分位数的比例中,数字可能会发生显着变化。有关详细信息,请参阅他的评论。 谢谢!