我需要帮助在 R 中按小时查找百分比
I need help finding percentages by hour in R
我的 DF 有两列,start_time 和 member_casual。我想看看在 R 中每个小时内开始工作的会员和临时工的百分比。
我不知道从哪里开始。
这是我第一次使用dput
;如果数据太多请告诉我。
structure(list(start_time = structure(c(55320, 86160, 71340,
68760, 38340, 59580, 43080, 60480, 39600, 65100, 56640, 65100,
32880, 83160, 64740, 75420, 62340, 61620, 20160, 37260, 64980,
62820, 69420, 54540, 43200, 57300, 58560, 44460, 65820, 48240,
29160, 41100, 83640, 69840, 44100, 29460, 62700, 71100, 27780,
48240, 35280, 840, 39480, 27000, 64140, 48300, 28080, 61800,
59880, 45420, 49860, 39180, 38580, 31320, 72660, 20100, 30480,
64140, 62280, 56280, 29700, 56820, 30180, 64260, 33780, 37920,
69780, 27240, 40440, 72180, 34800, 72780, 28080, 52500, 82200,
28500, 60120, 60780, 54780, 58560, 62220, 59700, 68220, 50580,
63060, 59220, 56580, 39720, 52800, 52080, 63960, 63540, 70500,
69420, 82560, 58500, 53880, 48120, 29700, 62760), class = c("hms",
"difftime"), units = "secs"), member_casual = c("member", "member",
"casual", "casual", "member", "casual", "member", "member", "member",
"member", "member", "member", "member", "casual", "member", "member",
"casual", "member", "member", "casual", "casual", "member", "member",
"casual", "casual", "member", "casual", "member", "member", "casual",
"casual", "casual", "casual", "member", "casual", "member", "casual",
"casual", "casual", "casual", "casual", "casual", "casual", "casual",
"casual", "member", "casual", "member", "member", "casual", "casual",
"casual", "member", "member", "member", "casual", "member", "casual",
"member", "casual", "member", "casual", "member", "casual", "member",
"member", "casual", "member", "member", "member", "member", "member",
"member", "member", "member", "casual", "member", "member", "member",
"member", "member", "member", "member", "member", "member", "member",
"member", "member", "member", "member", "member", "member", "member",
"casual", "casual", "casual", "casual", "casual", "member", "member"
)), row.names = c(NA, -100L), class = c("tbl_df", "tbl", "data.frame"
))
library(dplyr)
library(lubridate)
df %>%
count(hour = hour(start_time), member_casual) %>%
group_by(hour) %>%
mutate(p = 100*n/sum(n))
# A tibble: 34 x 4
# Groups: hour [18]
hour member_casual n p
<int> <chr> <int> <dbl>
1 0 casual 1 100
2 5 casual 1 50
3 5 member 1 50
4 7 casual 4 66.7
5 7 member 2 33.3
6 8 casual 1 14.3
7 8 member 6 85.7
8 9 casual 1 25
9 9 member 3 75
10 10 casual 3 50
# ... with 24 more rows
我的 DF 有两列,start_time 和 member_casual。我想看看在 R 中每个小时内开始工作的会员和临时工的百分比。
我不知道从哪里开始。
这是我第一次使用dput
;如果数据太多请告诉我。
structure(list(start_time = structure(c(55320, 86160, 71340,
68760, 38340, 59580, 43080, 60480, 39600, 65100, 56640, 65100,
32880, 83160, 64740, 75420, 62340, 61620, 20160, 37260, 64980,
62820, 69420, 54540, 43200, 57300, 58560, 44460, 65820, 48240,
29160, 41100, 83640, 69840, 44100, 29460, 62700, 71100, 27780,
48240, 35280, 840, 39480, 27000, 64140, 48300, 28080, 61800,
59880, 45420, 49860, 39180, 38580, 31320, 72660, 20100, 30480,
64140, 62280, 56280, 29700, 56820, 30180, 64260, 33780, 37920,
69780, 27240, 40440, 72180, 34800, 72780, 28080, 52500, 82200,
28500, 60120, 60780, 54780, 58560, 62220, 59700, 68220, 50580,
63060, 59220, 56580, 39720, 52800, 52080, 63960, 63540, 70500,
69420, 82560, 58500, 53880, 48120, 29700, 62760), class = c("hms",
"difftime"), units = "secs"), member_casual = c("member", "member",
"casual", "casual", "member", "casual", "member", "member", "member",
"member", "member", "member", "member", "casual", "member", "member",
"casual", "member", "member", "casual", "casual", "member", "member",
"casual", "casual", "member", "casual", "member", "member", "casual",
"casual", "casual", "casual", "member", "casual", "member", "casual",
"casual", "casual", "casual", "casual", "casual", "casual", "casual",
"casual", "member", "casual", "member", "member", "casual", "casual",
"casual", "member", "member", "member", "casual", "member", "casual",
"member", "casual", "member", "casual", "member", "casual", "member",
"member", "casual", "member", "member", "member", "member", "member",
"member", "member", "member", "casual", "member", "member", "member",
"member", "member", "member", "member", "member", "member", "member",
"member", "member", "member", "member", "member", "member", "member",
"casual", "casual", "casual", "casual", "casual", "member", "member"
)), row.names = c(NA, -100L), class = c("tbl_df", "tbl", "data.frame"
))
library(dplyr)
library(lubridate)
df %>%
count(hour = hour(start_time), member_casual) %>%
group_by(hour) %>%
mutate(p = 100*n/sum(n))
# A tibble: 34 x 4
# Groups: hour [18]
hour member_casual n p
<int> <chr> <int> <dbl>
1 0 casual 1 100
2 5 casual 1 50
3 5 member 1 50
4 7 casual 4 66.7
5 7 member 2 33.3
6 8 casual 1 14.3
7 8 member 6 85.7
8 9 casual 1 25
9 9 member 3 75
10 10 casual 3 50
# ... with 24 more rows