从 R 中的 csv 文件中提取 3 维数据

Extract 3 dimensional data from csv file in R

我正在尝试将我的数据从 csv 文件提取到 R 中。 数据当前采用以下格式:

,"[{time=2014-01-01T00:00:00, NDVI=0.3793765496776215}, {time=2014-02-01T00:00:00, NDVI=...

,"[{time=2014-01-01T00:00:00, NDVI=0.4071076986818826}, {time=2014-02-01T00:00:00, ...

,"[{time=2014-01-01T00:00:00, NDVI=0.3412131556625801}, {time=2014-02-01T00:00:00, NDVI=...

每一行都是不同地区的数据。 我想以这种格式使用它:

Region [1]

Time       NDVI

[1]        [1]

[2]        [2]

[3]        [3]

[...]      [...]


Region [2]

Time       NDVI

[1]        [1]

[2]        [2]

[3]        [3]

[...]      [...]


Region [3]

Time       NDVI

[1]        [1]

[2]        [2]

[3]        [3]

[...]      [...]

我该怎么做?

也许有一个包可以解析这个。但是,您可以使用 tidyverse 包进行一些数据转换。

您可以使用 readLines() 读入您的数据:

dat <- readLines("test.txt")

在这种情况下看起来像这样:

dat <- c(",\"[{time=2014-01-01T00:00:00, NDVI=0.3793765496776215}, {time=2014-02-01T00:00:00, NDVI=2}]\"", 
"", ",\"[{time=2014-01-01T00:00:00, NDVI=0.4071076986818826}, {time=2014-02-01T00:00:00, NDVI=3}]\"", 
"", ",\"[{time=2014-01-01T00:00:00, NDVI=0.3412131556625801}, {time=2014-02-01T00:00:00, NDVI=4}]\""
)

然后您可以使用 for 循环进行一些数据转换,并将结果存储在列表中。

library(tidyverse)
dat <- c(",\"[{time=2014-01-01T00:00:00, NDVI=0.3793765496776215}, {time=2014-02-01T00:00:00, NDVI=2}]\"", 
         "", ",\"[{time=2014-01-01T00:00:00, NDVI=0.4071076986818826}, {time=2014-02-01T00:00:00, NDVI=3}]\"", 
         "", ",\"[{time=2014-01-01T00:00:00, NDVI=0.3412131556625801}, {time=2014-02-01T00:00:00, NDVI=4}]\""
)

l <- list()
counter <- 1
for (line in dat){
  if(nchar(line)>0){
    line <- as.data.frame(line) %>%
        # We need to remove some unwanted strings
      mutate(line = str_replace_all(line, 
                                    c("\\""="", ",\["= "", "\]" = ""))) %>% 
        # The lines can be separated into rows where the string "}, {" occurs
               separate_rows(line, line, sep = "\}, \{") %>% 
        # again removing some unwanted strings
      mutate(line = str_replace_all(line, c("\{"="", "\}"=""))) %>% 
        # add a unique identifier for each observation
      mutate(observation = row_number()) %>% 
        # separete the rows where a "," occurs
      separate_rows(line, line, sep =",") %>% 
      separate(., line, into = c("category", "value"), sep = "=") %>% 
        # put it into the long format
      pivot_wider(names_from = category, values_from = value)
    l[[counter]] <- line
    counter <- counter+1
  }
}

l
#> [[1]]
#> # A tibble: 2 x 3
#>   observation time                ` NDVI`           
#>         <int> <chr>               <chr>             
#> 1           1 2014-01-01T00:00:00 0.3793765496776215
#> 2           2 2014-02-01T00:00:00 2                 
#> 
#> [[2]]
#> # A tibble: 2 x 3
#>   observation time                ` NDVI`           
#>         <int> <chr>               <chr>             
#> 1           1 2014-01-01T00:00:00 0.4071076986818826
#> 2           2 2014-02-01T00:00:00 3                 
#> 
#> [[3]]
#> # A tibble: 2 x 3
#>   observation time                ` NDVI`           
#>         <int> <chr>               <chr>             
#> 1           1 2014-01-01T00:00:00 0.3412131556625801
#> 2           2 2014-02-01T00:00:00 4

reprex package (v0.3.0)

于 2020 年 3 月 24 日创建