R 中的 for 循环的独立函数
Separate function as for loop in R
我想将时间戳列分成两列,分别命名为“日期”和“时间”。只有一个文件的代码是:
data1 <- separate(data = data1, col = timestamp, into = c('Date', 'Time'), sep = ' ')
但是,我有多个 csv.data(data1、data2、data3、data4、data5、...)。
在每个文件中,我想将时间戳分为“日期”和“文件” 如何将单独的函数实现到 for 循环中?
我尝试了以下代码,但没有成功:
for(i in 1:length(data_files)) {
assign(paste0("data", i),
separate(data = data_files, col = timestamp, into = c('Date', 'Time'), sep = ' '))
}
这里有两个适合您的解决方案。
一些虚拟数据:
library(tidyverse)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
data1 <- tibble(timestamp = seq(as_datetime("2021-12-30"), as_datetime("2022-01-04"), by="hours"))
data2 <- tibble(timestamp = seq(as_datetime("2020-12-30"), as_datetime("2021-01-04"), by="hours"))
data3 <- tibble(timestamp = seq(as_datetime("2019-12-30"), as_datetime("2020-01-04"), by="hours"))
data1
#> # A tibble: 121 x 1
#> timestamp
#> <dttm>
#> 1 2021-12-30 00:00:00
#> 2 2021-12-30 01:00:00
#> 3 2021-12-30 02:00:00
#> 4 2021-12-30 03:00:00
#> 5 2021-12-30 04:00:00
#> 6 2021-12-30 05:00:00
#> 7 2021-12-30 06:00:00
#> 8 2021-12-30 07:00:00
#> 9 2021-12-30 08:00:00
#> 10 2021-12-30 09:00:00
#> # ... with 111 more rows
可以使用 for-loop
:
lst_datasets <- list(data1, data2, data3)
updated_datasets_lst <- list()
for (i in 1:length(lst_datasets)){
d <- lst_datasets[[i]] %>% separate(col = timestamp, into = c('Date', 'Time'), sep = ' ')
updated_datasets_lst[[i]] <- d
}
updated_datasets_lst
#> [[1]]
#> # A tibble: 121 x 2
#> Date Time
#> <chr> <chr>
#> 1 2021-12-30 00:00:00
#> 2 2021-12-30 01:00:00
#> 3 2021-12-30 02:00:00
#> 4 2021-12-30 03:00:00
#> 5 2021-12-30 04:00:00
#> 6 2021-12-30 05:00:00
#> 7 2021-12-30 06:00:00
#> 8 2021-12-30 07:00:00
#> 9 2021-12-30 08:00:00
#> 10 2021-12-30 09:00:00
#> # ... with 111 more rows
#>
#> [[2]]
#> # A tibble: 121 x 2
#> Date Time
#> <chr> <chr>
#> 1 2020-12-30 00:00:00
#> 2 2020-12-30 01:00:00
#> 3 2020-12-30 02:00:00
#> 4 2020-12-30 03:00:00
#> 5 2020-12-30 04:00:00
#> 6 2020-12-30 05:00:00
#> 7 2020-12-30 06:00:00
#> 8 2020-12-30 07:00:00
#> 9 2020-12-30 08:00:00
#> 10 2020-12-30 09:00:00
#> # ... with 111 more rows
#>
#> [[3]]
#> # A tibble: 121 x 2
#> Date Time
#> <chr> <chr>
#> 1 2019-12-30 00:00:00
#> 2 2019-12-30 01:00:00
#> 3 2019-12-30 02:00:00
#> 4 2019-12-30 03:00:00
#> 5 2019-12-30 04:00:00
#> 6 2019-12-30 05:00:00
#> 7 2019-12-30 06:00:00
#> 8 2019-12-30 07:00:00
#> 9 2019-12-30 08:00:00
#> 10 2019-12-30 09:00:00
#> # ... with 111 more rows
由 reprex package (v2.0.1)
于 2022-04-20 创建
或来自 {dplyr} 的 map
:
library(tidyverse)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
data1 <- tibble(timestamp = seq(as_datetime("2021-12-30"), as_datetime("2022-01-04"), by="hours"))
data2 <- tibble(timestamp = seq(as_datetime("2020-12-30"), as_datetime("2021-01-04"), by="hours"))
data3 <- tibble(timestamp = seq(as_datetime("2019-12-30"), as_datetime("2020-01-04"), by="hours"))
data1
#> # A tibble: 121 x 1
#> timestamp
#> <dttm>
#> 1 2021-12-30 00:00:00
#> 2 2021-12-30 01:00:00
#> 3 2021-12-30 02:00:00
#> 4 2021-12-30 03:00:00
#> 5 2021-12-30 04:00:00
#> 6 2021-12-30 05:00:00
#> 7 2021-12-30 06:00:00
#> 8 2021-12-30 07:00:00
#> 9 2021-12-30 08:00:00
#> 10 2021-12-30 09:00:00
#> # ... with 111 more rows
lst_datasets <- list(data1, data2, data3)
datasets_df <- tibble(datasets = lst_datasets) %>%
mutate(dataset_id = 1:nrow(.)) %>%
mutate(res = map(datasets, .f = function(d){
r <- separate(data = d, col = timestamp, into = c('Date', 'Time'), sep = ' ')
return(r)
})) %>%
dplyr::select(-datasets) %>%
unnest(c(res)) %>%
split(.$dataset_id); datasets_df
#> $`1`
#> # A tibble: 121 x 3
#> dataset_id Date Time
#> <int> <chr> <chr>
#> 1 1 2021-12-30 00:00:00
#> 2 1 2021-12-30 01:00:00
#> 3 1 2021-12-30 02:00:00
#> 4 1 2021-12-30 03:00:00
#> 5 1 2021-12-30 04:00:00
#> 6 1 2021-12-30 05:00:00
#> 7 1 2021-12-30 06:00:00
#> 8 1 2021-12-30 07:00:00
#> 9 1 2021-12-30 08:00:00
#> 10 1 2021-12-30 09:00:00
#> # ... with 111 more rows
#>
#> $`2`
#> # A tibble: 121 x 3
#> dataset_id Date Time
#> <int> <chr> <chr>
#> 1 2 2020-12-30 00:00:00
#> 2 2 2020-12-30 01:00:00
#> 3 2 2020-12-30 02:00:00
#> 4 2 2020-12-30 03:00:00
#> 5 2 2020-12-30 04:00:00
#> 6 2 2020-12-30 05:00:00
#> 7 2 2020-12-30 06:00:00
#> 8 2 2020-12-30 07:00:00
#> 9 2 2020-12-30 08:00:00
#> 10 2 2020-12-30 09:00:00
#> # ... with 111 more rows
#>
#> $`3`
#> # A tibble: 121 x 3
#> dataset_id Date Time
#> <int> <chr> <chr>
#> 1 3 2019-12-30 00:00:00
#> 2 3 2019-12-30 01:00:00
#> 3 3 2019-12-30 02:00:00
#> 4 3 2019-12-30 03:00:00
#> 5 3 2019-12-30 04:00:00
#> 6 3 2019-12-30 05:00:00
#> 7 3 2019-12-30 06:00:00
#> 8 3 2019-12-30 07:00:00
#> 9 3 2019-12-30 08:00:00
#> 10 3 2019-12-30 09:00:00
#> # ... with 111 more rows
由 reprex package (v2.0.1)
于 2022-04-20 创建
我想将时间戳列分成两列,分别命名为“日期”和“时间”。只有一个文件的代码是:
data1 <- separate(data = data1, col = timestamp, into = c('Date', 'Time'), sep = ' ')
但是,我有多个 csv.data(data1、data2、data3、data4、data5、...)。
在每个文件中,我想将时间戳分为“日期”和“文件” 如何将单独的函数实现到 for 循环中?
我尝试了以下代码,但没有成功:
for(i in 1:length(data_files)) {
assign(paste0("data", i),
separate(data = data_files, col = timestamp, into = c('Date', 'Time'), sep = ' '))
}
这里有两个适合您的解决方案。
一些虚拟数据:
library(tidyverse)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
data1 <- tibble(timestamp = seq(as_datetime("2021-12-30"), as_datetime("2022-01-04"), by="hours"))
data2 <- tibble(timestamp = seq(as_datetime("2020-12-30"), as_datetime("2021-01-04"), by="hours"))
data3 <- tibble(timestamp = seq(as_datetime("2019-12-30"), as_datetime("2020-01-04"), by="hours"))
data1
#> # A tibble: 121 x 1
#> timestamp
#> <dttm>
#> 1 2021-12-30 00:00:00
#> 2 2021-12-30 01:00:00
#> 3 2021-12-30 02:00:00
#> 4 2021-12-30 03:00:00
#> 5 2021-12-30 04:00:00
#> 6 2021-12-30 05:00:00
#> 7 2021-12-30 06:00:00
#> 8 2021-12-30 07:00:00
#> 9 2021-12-30 08:00:00
#> 10 2021-12-30 09:00:00
#> # ... with 111 more rows
可以使用 for-loop
:
lst_datasets <- list(data1, data2, data3)
updated_datasets_lst <- list()
for (i in 1:length(lst_datasets)){
d <- lst_datasets[[i]] %>% separate(col = timestamp, into = c('Date', 'Time'), sep = ' ')
updated_datasets_lst[[i]] <- d
}
updated_datasets_lst
#> [[1]]
#> # A tibble: 121 x 2
#> Date Time
#> <chr> <chr>
#> 1 2021-12-30 00:00:00
#> 2 2021-12-30 01:00:00
#> 3 2021-12-30 02:00:00
#> 4 2021-12-30 03:00:00
#> 5 2021-12-30 04:00:00
#> 6 2021-12-30 05:00:00
#> 7 2021-12-30 06:00:00
#> 8 2021-12-30 07:00:00
#> 9 2021-12-30 08:00:00
#> 10 2021-12-30 09:00:00
#> # ... with 111 more rows
#>
#> [[2]]
#> # A tibble: 121 x 2
#> Date Time
#> <chr> <chr>
#> 1 2020-12-30 00:00:00
#> 2 2020-12-30 01:00:00
#> 3 2020-12-30 02:00:00
#> 4 2020-12-30 03:00:00
#> 5 2020-12-30 04:00:00
#> 6 2020-12-30 05:00:00
#> 7 2020-12-30 06:00:00
#> 8 2020-12-30 07:00:00
#> 9 2020-12-30 08:00:00
#> 10 2020-12-30 09:00:00
#> # ... with 111 more rows
#>
#> [[3]]
#> # A tibble: 121 x 2
#> Date Time
#> <chr> <chr>
#> 1 2019-12-30 00:00:00
#> 2 2019-12-30 01:00:00
#> 3 2019-12-30 02:00:00
#> 4 2019-12-30 03:00:00
#> 5 2019-12-30 04:00:00
#> 6 2019-12-30 05:00:00
#> 7 2019-12-30 06:00:00
#> 8 2019-12-30 07:00:00
#> 9 2019-12-30 08:00:00
#> 10 2019-12-30 09:00:00
#> # ... with 111 more rows
由 reprex package (v2.0.1)
于 2022-04-20 创建或来自 {dplyr} 的 map
:
library(tidyverse)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
data1 <- tibble(timestamp = seq(as_datetime("2021-12-30"), as_datetime("2022-01-04"), by="hours"))
data2 <- tibble(timestamp = seq(as_datetime("2020-12-30"), as_datetime("2021-01-04"), by="hours"))
data3 <- tibble(timestamp = seq(as_datetime("2019-12-30"), as_datetime("2020-01-04"), by="hours"))
data1
#> # A tibble: 121 x 1
#> timestamp
#> <dttm>
#> 1 2021-12-30 00:00:00
#> 2 2021-12-30 01:00:00
#> 3 2021-12-30 02:00:00
#> 4 2021-12-30 03:00:00
#> 5 2021-12-30 04:00:00
#> 6 2021-12-30 05:00:00
#> 7 2021-12-30 06:00:00
#> 8 2021-12-30 07:00:00
#> 9 2021-12-30 08:00:00
#> 10 2021-12-30 09:00:00
#> # ... with 111 more rows
lst_datasets <- list(data1, data2, data3)
datasets_df <- tibble(datasets = lst_datasets) %>%
mutate(dataset_id = 1:nrow(.)) %>%
mutate(res = map(datasets, .f = function(d){
r <- separate(data = d, col = timestamp, into = c('Date', 'Time'), sep = ' ')
return(r)
})) %>%
dplyr::select(-datasets) %>%
unnest(c(res)) %>%
split(.$dataset_id); datasets_df
#> $`1`
#> # A tibble: 121 x 3
#> dataset_id Date Time
#> <int> <chr> <chr>
#> 1 1 2021-12-30 00:00:00
#> 2 1 2021-12-30 01:00:00
#> 3 1 2021-12-30 02:00:00
#> 4 1 2021-12-30 03:00:00
#> 5 1 2021-12-30 04:00:00
#> 6 1 2021-12-30 05:00:00
#> 7 1 2021-12-30 06:00:00
#> 8 1 2021-12-30 07:00:00
#> 9 1 2021-12-30 08:00:00
#> 10 1 2021-12-30 09:00:00
#> # ... with 111 more rows
#>
#> $`2`
#> # A tibble: 121 x 3
#> dataset_id Date Time
#> <int> <chr> <chr>
#> 1 2 2020-12-30 00:00:00
#> 2 2 2020-12-30 01:00:00
#> 3 2 2020-12-30 02:00:00
#> 4 2 2020-12-30 03:00:00
#> 5 2 2020-12-30 04:00:00
#> 6 2 2020-12-30 05:00:00
#> 7 2 2020-12-30 06:00:00
#> 8 2 2020-12-30 07:00:00
#> 9 2 2020-12-30 08:00:00
#> 10 2 2020-12-30 09:00:00
#> # ... with 111 more rows
#>
#> $`3`
#> # A tibble: 121 x 3
#> dataset_id Date Time
#> <int> <chr> <chr>
#> 1 3 2019-12-30 00:00:00
#> 2 3 2019-12-30 01:00:00
#> 3 3 2019-12-30 02:00:00
#> 4 3 2019-12-30 03:00:00
#> 5 3 2019-12-30 04:00:00
#> 6 3 2019-12-30 05:00:00
#> 7 3 2019-12-30 06:00:00
#> 8 3 2019-12-30 07:00:00
#> 9 3 2019-12-30 08:00:00
#> 10 3 2019-12-30 09:00:00
#> # ... with 111 more rows
由 reprex package (v2.0.1)
于 2022-04-20 创建