R 中的 for 循环的独立函数

Question

我想将时间戳列分成两列，分别命名为“日期”和“时间”。只有一个文件的代码是：

data1 <- separate(data = data1, col = timestamp, into  = c('Date', 'Time'), sep = ' ')

但是，我有多个 csv.data（data1、data2、data3、data4、data5、...）。

在每个文件中，我想将时间戳分为“日期”和“文件” 如何将单独的函数实现到 for 循环中？

我尝试了以下代码，但没有成功：

 for(i in 1:length(data_files)) {                              
  assign(paste0("data", i),                                   
      separate(data = data_files, col = timestamp, into  = c('Date', 'Time'), sep = ' '))  
}

Answer 1

这里有两个适合您的解决方案。

一些虚拟数据：

library(tidyverse)
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union
data1 <- tibble(timestamp = seq(as_datetime("2021-12-30"), as_datetime("2022-01-04"), by="hours"))
data2 <- tibble(timestamp = seq(as_datetime("2020-12-30"), as_datetime("2021-01-04"), by="hours"))
data3 <- tibble(timestamp = seq(as_datetime("2019-12-30"), as_datetime("2020-01-04"), by="hours"))

data1
#> # A tibble: 121 x 1
#>    timestamp          
#>    <dttm>             
#>  1 2021-12-30 00:00:00
#>  2 2021-12-30 01:00:00
#>  3 2021-12-30 02:00:00
#>  4 2021-12-30 03:00:00
#>  5 2021-12-30 04:00:00
#>  6 2021-12-30 05:00:00
#>  7 2021-12-30 06:00:00
#>  8 2021-12-30 07:00:00
#>  9 2021-12-30 08:00:00
#> 10 2021-12-30 09:00:00
#> # ... with 111 more rows

可以使用 for-loop:

lst_datasets <- list(data1, data2, data3)

updated_datasets_lst <- list()
for (i in 1:length(lst_datasets)){
  d <- lst_datasets[[i]] %>% separate(col = timestamp, into  = c('Date', 'Time'), sep = ' ')
  updated_datasets_lst[[i]] <- d
}
updated_datasets_lst
#> [[1]]
#> # A tibble: 121 x 2
#>    Date       Time    
#>    <chr>      <chr>   
#>  1 2021-12-30 00:00:00
#>  2 2021-12-30 01:00:00
#>  3 2021-12-30 02:00:00
#>  4 2021-12-30 03:00:00
#>  5 2021-12-30 04:00:00
#>  6 2021-12-30 05:00:00
#>  7 2021-12-30 06:00:00
#>  8 2021-12-30 07:00:00
#>  9 2021-12-30 08:00:00
#> 10 2021-12-30 09:00:00
#> # ... with 111 more rows
#> 
#> [[2]]
#> # A tibble: 121 x 2
#>    Date       Time    
#>    <chr>      <chr>   
#>  1 2020-12-30 00:00:00
#>  2 2020-12-30 01:00:00
#>  3 2020-12-30 02:00:00
#>  4 2020-12-30 03:00:00
#>  5 2020-12-30 04:00:00
#>  6 2020-12-30 05:00:00
#>  7 2020-12-30 06:00:00
#>  8 2020-12-30 07:00:00
#>  9 2020-12-30 08:00:00
#> 10 2020-12-30 09:00:00
#> # ... with 111 more rows
#> 
#> [[3]]
#> # A tibble: 121 x 2
#>    Date       Time    
#>    <chr>      <chr>   
#>  1 2019-12-30 00:00:00
#>  2 2019-12-30 01:00:00
#>  3 2019-12-30 02:00:00
#>  4 2019-12-30 03:00:00
#>  5 2019-12-30 04:00:00
#>  6 2019-12-30 05:00:00
#>  7 2019-12-30 06:00:00
#>  8 2019-12-30 07:00:00
#>  9 2019-12-30 08:00:00
#> 10 2019-12-30 09:00:00
#> # ... with 111 more rows

^{由 reprex package (v2.0.1)}

于 2022-04-20 创建

或来自 {dplyr} 的 map:

library(tidyverse)
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union
data1 <- tibble(timestamp = seq(as_datetime("2021-12-30"), as_datetime("2022-01-04"), by="hours"))
data2 <- tibble(timestamp = seq(as_datetime("2020-12-30"), as_datetime("2021-01-04"), by="hours"))
data3 <- tibble(timestamp = seq(as_datetime("2019-12-30"), as_datetime("2020-01-04"), by="hours"))

data1
#> # A tibble: 121 x 1
#>    timestamp          
#>    <dttm>             
#>  1 2021-12-30 00:00:00
#>  2 2021-12-30 01:00:00
#>  3 2021-12-30 02:00:00
#>  4 2021-12-30 03:00:00
#>  5 2021-12-30 04:00:00
#>  6 2021-12-30 05:00:00
#>  7 2021-12-30 06:00:00
#>  8 2021-12-30 07:00:00
#>  9 2021-12-30 08:00:00
#> 10 2021-12-30 09:00:00
#> # ... with 111 more rows
lst_datasets <- list(data1, data2, data3)

datasets_df <- tibble(datasets = lst_datasets) %>%
  mutate(dataset_id = 1:nrow(.)) %>%
  mutate(res = map(datasets, .f = function(d){
    r <- separate(data = d, col = timestamp, into  = c('Date', 'Time'), sep = ' ')
    return(r)
  })) %>%
  dplyr::select(-datasets) %>%
  unnest(c(res)) %>%
  split(.$dataset_id); datasets_df
#> $`1`
#> # A tibble: 121 x 3
#>    dataset_id Date       Time    
#>         <int> <chr>      <chr>   
#>  1          1 2021-12-30 00:00:00
#>  2          1 2021-12-30 01:00:00
#>  3          1 2021-12-30 02:00:00
#>  4          1 2021-12-30 03:00:00
#>  5          1 2021-12-30 04:00:00
#>  6          1 2021-12-30 05:00:00
#>  7          1 2021-12-30 06:00:00
#>  8          1 2021-12-30 07:00:00
#>  9          1 2021-12-30 08:00:00
#> 10          1 2021-12-30 09:00:00
#> # ... with 111 more rows
#> 
#> $`2`
#> # A tibble: 121 x 3
#>    dataset_id Date       Time    
#>         <int> <chr>      <chr>   
#>  1          2 2020-12-30 00:00:00
#>  2          2 2020-12-30 01:00:00
#>  3          2 2020-12-30 02:00:00
#>  4          2 2020-12-30 03:00:00
#>  5          2 2020-12-30 04:00:00
#>  6          2 2020-12-30 05:00:00
#>  7          2 2020-12-30 06:00:00
#>  8          2 2020-12-30 07:00:00
#>  9          2 2020-12-30 08:00:00
#> 10          2 2020-12-30 09:00:00
#> # ... with 111 more rows
#> 
#> $`3`
#> # A tibble: 121 x 3
#>    dataset_id Date       Time    
#>         <int> <chr>      <chr>   
#>  1          3 2019-12-30 00:00:00
#>  2          3 2019-12-30 01:00:00
#>  3          3 2019-12-30 02:00:00
#>  4          3 2019-12-30 03:00:00
#>  5          3 2019-12-30 04:00:00
#>  6          3 2019-12-30 05:00:00
#>  7          3 2019-12-30 06:00:00
#>  8          3 2019-12-30 07:00:00
#>  9          3 2019-12-30 08:00:00
#> 10          3 2019-12-30 09:00:00
#> # ... with 111 more rows

^{由 reprex package (v2.0.1)}

于 2022-04-20 创建

R 中的 for 循环的独立函数

Separate function as for loop in R

timestamp

for-loop

r