在 R 中按季度分组数据,但在没有数据时占季度
Grouping data by quarter in R but accounting for quarters when there are no data
我有一个简单的数据集,其中的行需要按季度分组。该代码工作正常,但问题是当一个季度没有数据时,它不会写入数据,因此不会显示在图表上。我想添加一个声明,其中没有一个季度将其置零而不是跳过它。
我当前的代码:
mydata <- dataset %>%
janitor::clean_names() %>%
mutate(test_date = lubridate::dmy(test_date)) %>%
mutate(test_quarter = zoo::as.yearqtr(test_date)) %>%
group_by(test_quarter) %>%
summarise(cases = n())
例如,2021 Q2 的输出是这样的,没有任何内容,当然是因为没有案例:
我想要案例为零的季度的代码帐户。
感谢任何帮助。
您可以使用 seq
函数创建季度序列并在 tidyr::complete
中使用它。
library(dplyr)
dataset %>%
janitor::clean_names() %>%
mutate(test_date = lubridate::dmy(test_date),
test_quarter = zoo::as.yearqtr(test_date)) %>%
count(test_quarter, name = 'cases') %>%
tidyr::complete(test_quarter = seq(min(test_quarter), max(test_quarter), 1/4),
fill = list(cases = 0))
基础 R 解决方案:
# Import data: df1 => data.frame
df1 <- data.frame(
test_quarter = c(
"2019 Q3",
"2019 Q4",
"2020 Q1",
"2020 Q2",
"2020 Q3",
"2020 Q4",
"2020 Q3",
"2021 Q3"
),
cases = c(4, 3, 1, 2, 2, 1, 3, 2)
)
# Create a date vector holding the range:
# date_rng => Date vector
date_rng <- as.Date(
paste0(
with(
df1,
range(
as.integer(
gsub(
"^(\d{4}).*",
"\1",
test_quarter
)
)
)
),
c("-01-01", "-12-31")
)
)
# Create a data.frame with each quarter in the range:
# date_rng_df => data.frame
date_rng_df <- transform(
data.frame(
dates = seq.Date(
from = date_rng[1],
to = date_rng[2],
by = "quarter"
)
),
# Derive the quarter vector:
# test_quater => character vector
test_quarter = paste(
gsub(
"^(\d{4}).*",
"\1",
dates
),
quarters(dates)
)
)
# Function to generate a sequence between integers:
# seq_int_between => function()
seq_int_between <- function(...){
# Store the inputs as a list variable: pholder => list
pholder <- list(...)
# Extract the range: rng => integer vector
rng <- range(pholder)
# Generate a vector between integer range:
# res => integer vector
res <- seq.int(
from = rng[1],
to = rng[2],
by = 1
)
# Explicitly define the returned object:
# integer vector => env
return(res)
}
# Extend the data.frame appropriately: res => data.frame
res <- transform(
merge(
date_rng_df[
seq_int_between(
match(
df1$test_quarter,
date_rng_df$test_quarter
)
),
"test_quarter",
drop = FALSE
],
df1,
by = "test_quarter",
all.x = TRUE
),
# 0 out NA values: cases => integer vector
cases = ifelse(
is.na(cases),
0,
cases
)
)
# Print to console: data.frame => stdout(console)
res
我有一个简单的数据集,其中的行需要按季度分组。该代码工作正常,但问题是当一个季度没有数据时,它不会写入数据,因此不会显示在图表上。我想添加一个声明,其中没有一个季度将其置零而不是跳过它。 我当前的代码:
mydata <- dataset %>%
janitor::clean_names() %>%
mutate(test_date = lubridate::dmy(test_date)) %>%
mutate(test_quarter = zoo::as.yearqtr(test_date)) %>%
group_by(test_quarter) %>%
summarise(cases = n())
例如,2021 Q2 的输出是这样的,没有任何内容,当然是因为没有案例:
我想要案例为零的季度的代码帐户。 感谢任何帮助。
您可以使用 seq
函数创建季度序列并在 tidyr::complete
中使用它。
library(dplyr)
dataset %>%
janitor::clean_names() %>%
mutate(test_date = lubridate::dmy(test_date),
test_quarter = zoo::as.yearqtr(test_date)) %>%
count(test_quarter, name = 'cases') %>%
tidyr::complete(test_quarter = seq(min(test_quarter), max(test_quarter), 1/4),
fill = list(cases = 0))
基础 R 解决方案:
# Import data: df1 => data.frame
df1 <- data.frame(
test_quarter = c(
"2019 Q3",
"2019 Q4",
"2020 Q1",
"2020 Q2",
"2020 Q3",
"2020 Q4",
"2020 Q3",
"2021 Q3"
),
cases = c(4, 3, 1, 2, 2, 1, 3, 2)
)
# Create a date vector holding the range:
# date_rng => Date vector
date_rng <- as.Date(
paste0(
with(
df1,
range(
as.integer(
gsub(
"^(\d{4}).*",
"\1",
test_quarter
)
)
)
),
c("-01-01", "-12-31")
)
)
# Create a data.frame with each quarter in the range:
# date_rng_df => data.frame
date_rng_df <- transform(
data.frame(
dates = seq.Date(
from = date_rng[1],
to = date_rng[2],
by = "quarter"
)
),
# Derive the quarter vector:
# test_quater => character vector
test_quarter = paste(
gsub(
"^(\d{4}).*",
"\1",
dates
),
quarters(dates)
)
)
# Function to generate a sequence between integers:
# seq_int_between => function()
seq_int_between <- function(...){
# Store the inputs as a list variable: pholder => list
pholder <- list(...)
# Extract the range: rng => integer vector
rng <- range(pholder)
# Generate a vector between integer range:
# res => integer vector
res <- seq.int(
from = rng[1],
to = rng[2],
by = 1
)
# Explicitly define the returned object:
# integer vector => env
return(res)
}
# Extend the data.frame appropriately: res => data.frame
res <- transform(
merge(
date_rng_df[
seq_int_between(
match(
df1$test_quarter,
date_rng_df$test_quarter
)
),
"test_quarter",
drop = FALSE
],
df1,
by = "test_quarter",
all.x = TRUE
),
# 0 out NA values: cases => integer vector
cases = ifelse(
is.na(cases),
0,
cases
)
)
# Print to console: data.frame => stdout(console)
res