如何在 R 中按时间间隔分组
How to group per time interval in R
我下面有一组数据
Date
Status
Value
05/12/2021 23:59
Failed
500
05/12/2021 23:59
Successful
1500
05/12/2021 23:59
Successful
500
05/12/2021 23:59
Successful
1500
05/12/2021 23:59
Successful
1500
05/12/2021 23:59
Failed
1500
05/12/2021 23:59
Failed
1500
05/12/2021 23:59
Successful
500
05/12/2021 23:59
Successful
1500
05/12/2021 23:59
Failed
1500
05/12/2021 23:59
Successful
500
05/12/2021 23:59
Failed
500
05/12/2021 23:59
Failed
1500
05/12/2021 23:59
Successful
1500
05/12/2021 23:59
Failed
1500
05/12/2021 23:59
Successful
1500
05/12/2021 23:59
Successful
1500
05/12/2021 23:59
Successful
500
05/12/2021 23:59
Successful
500
05/12/2021 23:59
Successful
1500
05/12/2021 23:59
Successful
1500
05/12/2021 23:59
Successful
1500
05/12/2021 23:59
Failed
1500
05/12/2021 23:59
Successful
500
05/12/2021 23:59
Failed
500
05/12/2021 23:59
Failed
500
05/12/2021 23:59
Successful
1500
05/12/2021 23:59
Successful
500
05/12/2021 23:59
Successful
500
05/12/2021 23:59
Successful
1500
05/12/2021 23:59
Successful
500
05/12/2021 23:59
Successful
1500
05/12/2021 23:59
Successful
500
我希望能够拆分日期时间列以安排我的时间
然后将时间分组为每小时间隔
然后汇总得到下面的列
我想知道一小时内处理了多少笔交易
然后一小时内的值
然后有一列说明有多少人成功,然后另一列说明一小时内有多少人失败
在下方查看所需汇总 table 的输出
Interval
Value
Count
Successful
Failed
00:00 am - 00:59 am
32,000
54
40
15
00:59 am - 01:00 am
42,000
55
41
14
01:00 am - 02:59 am
21,400
56
42
14
03:00 am - 03:59 am
4,00
57
43
14
04:00 am - 04:59 am
543,000
58
2
56
05:00 am - 05:59 am
411,000
59
6
53
这个怎么样:
library(tidyverse)
library(lubridate)
library(glue)
df <- tribble(~Date, ~Status, ~Value,
"05/12/2021 23:59", "Failed", 500,
"05/12/2021 23:59", "Successful", 1500,
"05/12/2021 23:59", "Successful", 500,
"05/12/2021 23:59", "Successful", 1500,
"05/12/2021 23:59", "Successful", 1500,
"05/12/2021 23:59", "Failed", 1500)
df2 <- df %>%
mutate(Datetime = dmy_hms(Date), # convert to datetime format
Date = as.Date(Datetime), # extract date, if you need it later
Hour = hour(Datetime)) # extract hour
hourly_value <- df2 %>%
group_by(Hour) %>%
summarize(Value = sum(Value),
.groups = "drop")
hourly_count <- df2 %>%
count(Hour, Status) %>%
pivot_wider(names_from = "Status", values_from = "n")
interval_helper <- tibble(Hour = 0:23,
display_hour = str_pad(Hour %% 12, 2, pad = '0'),
ampm = if_else(Hour < 12, "am", "pm"),
Interval = glue("{display_hour}:00 {ampm} - {display_hour}:59 {ampm}"))
full_join(hourly_value, hourly_count, by = "Hour") %>%
replace_na(list(Successful = 0L, Failed = 0L, Value = 0)) %>%
left_join(interval_helper, by = "Hour") %>%
mutate(Count = Successful + Failed) %>%
select(Interval, Value, Count, Successful, Failed)
我不太确定您原来的 Date
列的格式。这里我假设它是一个字符串。因为 Interval
列的确切格式对您很重要,所以使用您想要显示的字符串制作和连接一个单独的小标题似乎更容易。
重要的是用零替换缺失的 NA
值,否则 Count = Successful + Failed
将在只有其中一个存在时秘密失败。
我下面有一组数据
Date | Status | Value |
---|---|---|
05/12/2021 23:59 | Failed | 500 |
05/12/2021 23:59 | Successful | 1500 |
05/12/2021 23:59 | Successful | 500 |
05/12/2021 23:59 | Successful | 1500 |
05/12/2021 23:59 | Successful | 1500 |
05/12/2021 23:59 | Failed | 1500 |
05/12/2021 23:59 | Failed | 1500 |
05/12/2021 23:59 | Successful | 500 |
05/12/2021 23:59 | Successful | 1500 |
05/12/2021 23:59 | Failed | 1500 |
05/12/2021 23:59 | Successful | 500 |
05/12/2021 23:59 | Failed | 500 |
05/12/2021 23:59 | Failed | 1500 |
05/12/2021 23:59 | Successful | 1500 |
05/12/2021 23:59 | Failed | 1500 |
05/12/2021 23:59 | Successful | 1500 |
05/12/2021 23:59 | Successful | 1500 |
05/12/2021 23:59 | Successful | 500 |
05/12/2021 23:59 | Successful | 500 |
05/12/2021 23:59 | Successful | 1500 |
05/12/2021 23:59 | Successful | 1500 |
05/12/2021 23:59 | Successful | 1500 |
05/12/2021 23:59 | Failed | 1500 |
05/12/2021 23:59 | Successful | 500 |
05/12/2021 23:59 | Failed | 500 |
05/12/2021 23:59 | Failed | 500 |
05/12/2021 23:59 | Successful | 1500 |
05/12/2021 23:59 | Successful | 500 |
05/12/2021 23:59 | Successful | 500 |
05/12/2021 23:59 | Successful | 1500 |
05/12/2021 23:59 | Successful | 500 |
05/12/2021 23:59 | Successful | 1500 |
05/12/2021 23:59 | Successful | 500 |
我希望能够拆分日期时间列以安排我的时间
然后将时间分组为每小时间隔
然后汇总得到下面的列
我想知道一小时内处理了多少笔交易
然后一小时内的值
然后有一列说明有多少人成功,然后另一列说明一小时内有多少人失败
在下方查看所需汇总 table 的输出
Interval | Value | Count | Successful | Failed |
---|---|---|---|---|
00:00 am - 00:59 am | 32,000 | 54 | 40 | 15 |
00:59 am - 01:00 am | 42,000 | 55 | 41 | 14 |
01:00 am - 02:59 am | 21,400 | 56 | 42 | 14 |
03:00 am - 03:59 am | 4,00 | 57 | 43 | 14 |
04:00 am - 04:59 am | 543,000 | 58 | 2 | 56 |
05:00 am - 05:59 am | 411,000 | 59 | 6 | 53 |
这个怎么样:
library(tidyverse)
library(lubridate)
library(glue)
df <- tribble(~Date, ~Status, ~Value,
"05/12/2021 23:59", "Failed", 500,
"05/12/2021 23:59", "Successful", 1500,
"05/12/2021 23:59", "Successful", 500,
"05/12/2021 23:59", "Successful", 1500,
"05/12/2021 23:59", "Successful", 1500,
"05/12/2021 23:59", "Failed", 1500)
df2 <- df %>%
mutate(Datetime = dmy_hms(Date), # convert to datetime format
Date = as.Date(Datetime), # extract date, if you need it later
Hour = hour(Datetime)) # extract hour
hourly_value <- df2 %>%
group_by(Hour) %>%
summarize(Value = sum(Value),
.groups = "drop")
hourly_count <- df2 %>%
count(Hour, Status) %>%
pivot_wider(names_from = "Status", values_from = "n")
interval_helper <- tibble(Hour = 0:23,
display_hour = str_pad(Hour %% 12, 2, pad = '0'),
ampm = if_else(Hour < 12, "am", "pm"),
Interval = glue("{display_hour}:00 {ampm} - {display_hour}:59 {ampm}"))
full_join(hourly_value, hourly_count, by = "Hour") %>%
replace_na(list(Successful = 0L, Failed = 0L, Value = 0)) %>%
left_join(interval_helper, by = "Hour") %>%
mutate(Count = Successful + Failed) %>%
select(Interval, Value, Count, Successful, Failed)
我不太确定您原来的 Date
列的格式。这里我假设它是一个字符串。因为 Interval
列的确切格式对您很重要,所以使用您想要显示的字符串制作和连接一个单独的小标题似乎更容易。
重要的是用零替换缺失的 NA
值,否则 Count = Successful + Failed
将在只有其中一个存在时秘密失败。