基于多个日期范围和多列的切片以使用 R 格式化新数据框
Slice based on multiple date ranges and multiple columns to formating a new dataframe with R
假设我有一个示例数据如下:
df <- structure(list(date = structure(c(18912, 18913, 18914, 18915,
18916, 18917, 18918, 18919, 18920, 18921, 18922, 18923), class = "Date"),
value1 = c(1.015, NA, NA, 1.015, 1.015, 1.015, 1.015, 1.015,
1.015, 1.015, 1.015, 1.015), value2 = c(1.115668, 1.104622,
1.093685, 1.082857, 1.072135, 1.06152, 1.05101, NA, NA, 1.0201,
1.01, 1), value3 = c(1.015, 1.030225, NA, NA, 1.077284, 1.093443,
1.109845, 1.126493, 1.14339, 1.160541, 1.177949, 1.195618
)), row.names = c(NA, -12L), class = "data.frame")
和三个日期间隔:
date_range1 <- (date>='2021-10-12' & date<='2021-10-15')
date_range2 <- (date>='2021-10-16' & date<='2021-10-18')
date_range3 <- (date>='2021-10-21' & date<='2021-10-23')
我需要分别使用 date_range1
、date_range2
和 date_range3
从 value1
、value2
和 value3
中分割数据,最后将它们连接到一列,如下所示:
请注意类型 1、2 和 3 是表示日期范围的数字:date_range1
、date_range2
和 date_range3
。
我如何使用 R 的包实现这一点?谢谢。
编辑:
str(real_data)
输出:
tibble [1,537 x 5] (S3: tbl_df/tbl/data.frame)
$ date : chr [1:1537] "2008-01-31" "2008-02-29" "2008-03-31" "2008-04-30" ...
$ value1: num [1:1537] 11.3 11.4 11.4 11.3 11.2 ...
$ value2 : num [1:1537] 11.4 11.4 11.3 11.3 11.1 ...
$ value3: num [1:1537] NA NA NA NA NA NA NA NA NA NA ...
$ value4 : chr [1:1537] "11.60" "10.20" "12.55" "10.37" ...
您可以使用 dplyr::case_when
library(dplyr)
df %>%
mutate(type = case_when(
date>='2021-10-12' & date<='2021-10-15' ~ 1,
date>='2021-10-16' & date<='2021-10-18' ~ 2,
date>='2021-10-21' & date<='2021-10-23' ~ 3,
TRUE ~ NA_real_
),
value = case_when(
type == 1 ~ value1,
type == 2 ~ value2,
type == 3 ~ value3,
TRUE ~ NA_real_
)) %>%
select(date, value, type) %>%
filter(!is.na(type))
date value type
1 2021-10-12 1.015000 1
2 2021-10-13 NA 1
3 2021-10-14 NA 1
4 2021-10-15 1.015000 1
5 2021-10-16 1.072135 2
6 2021-10-17 1.061520 2
7 2021-10-18 1.051010 2
8 2021-10-21 1.160541 3
9 2021-10-22 1.177949 3
10 2021-10-23 1.195618 3
假设我有一个示例数据如下:
df <- structure(list(date = structure(c(18912, 18913, 18914, 18915,
18916, 18917, 18918, 18919, 18920, 18921, 18922, 18923), class = "Date"),
value1 = c(1.015, NA, NA, 1.015, 1.015, 1.015, 1.015, 1.015,
1.015, 1.015, 1.015, 1.015), value2 = c(1.115668, 1.104622,
1.093685, 1.082857, 1.072135, 1.06152, 1.05101, NA, NA, 1.0201,
1.01, 1), value3 = c(1.015, 1.030225, NA, NA, 1.077284, 1.093443,
1.109845, 1.126493, 1.14339, 1.160541, 1.177949, 1.195618
)), row.names = c(NA, -12L), class = "data.frame")
和三个日期间隔:
date_range1 <- (date>='2021-10-12' & date<='2021-10-15')
date_range2 <- (date>='2021-10-16' & date<='2021-10-18')
date_range3 <- (date>='2021-10-21' & date<='2021-10-23')
我需要分别使用 date_range1
、date_range2
和 date_range3
从 value1
、value2
和 value3
中分割数据,最后将它们连接到一列,如下所示:
请注意类型 1、2 和 3 是表示日期范围的数字:date_range1
、date_range2
和 date_range3
。
我如何使用 R 的包实现这一点?谢谢。
编辑:
str(real_data)
输出:
tibble [1,537 x 5] (S3: tbl_df/tbl/data.frame)
$ date : chr [1:1537] "2008-01-31" "2008-02-29" "2008-03-31" "2008-04-30" ...
$ value1: num [1:1537] 11.3 11.4 11.4 11.3 11.2 ...
$ value2 : num [1:1537] 11.4 11.4 11.3 11.3 11.1 ...
$ value3: num [1:1537] NA NA NA NA NA NA NA NA NA NA ...
$ value4 : chr [1:1537] "11.60" "10.20" "12.55" "10.37" ...
您可以使用 dplyr::case_when
library(dplyr)
df %>%
mutate(type = case_when(
date>='2021-10-12' & date<='2021-10-15' ~ 1,
date>='2021-10-16' & date<='2021-10-18' ~ 2,
date>='2021-10-21' & date<='2021-10-23' ~ 3,
TRUE ~ NA_real_
),
value = case_when(
type == 1 ~ value1,
type == 2 ~ value2,
type == 3 ~ value3,
TRUE ~ NA_real_
)) %>%
select(date, value, type) %>%
filter(!is.na(type))
date value type
1 2021-10-12 1.015000 1
2 2021-10-13 NA 1
3 2021-10-14 NA 1
4 2021-10-15 1.015000 1
5 2021-10-16 1.072135 2
6 2021-10-17 1.061520 2
7 2021-10-18 1.051010 2
8 2021-10-21 1.160541 3
9 2021-10-22 1.177949 3
10 2021-10-23 1.195618 3