重组数据中的数据 table
Reorganizing Data in a data table
当我为网络数据库提取数据时,我有这样格式的数据。
site
date
time
parameter_code
remark_code
result
001
01-01-2020
15:10
00010
NA
16.6
001
01-01-2020
15:10
00095
NA
521.0
001
01-01-2020
15:10
00300
NA
5.6
001
01-01-2020
15:10
34475
<
1.0
001
03-30-2020
09:45
00010
NA
18.0
001
03-30-2020
09:45
00095
NA
546.0
001
03-30-2020
09:45
00300
NA
3.7
001
03-30-2020
09:45
34475
NA
2.3
我想格式化如下。
我知道我需要先合并 Remark_code 和 Result 的列,但我不确定如何将 Parameter_code 列解析为单独的列,结果填充在table.
site
date
time
00010
00095
00300
34475
001
01-01-2020
15:10
16.6
521.0
5.6
<1.0
001
03-30-2020
15:10
18.0
546.0
3.7
2.3
我不一定需要有关如何执行此操作的所有代码,而只需要有关使用哪些功能的说明。我一直在为此苦苦挣扎,因为我什至不确定要查找哪些关键字或这种类型的转换(?)会被称为什么。任何帮助将不胜感激。
我们可以在 unite
列 'REMARK_CODE' 和 'RESULT'
之后使用 pivot_wider
library(dplyr)
library(tidyr)
df1 %>%
unite(RESULT, REMARK_CODE, RESULT, sep="", na.rm = TRUE) %>%
pivot_wider(names_from = PARAMETER_CODE, values_from = RESULT)
-输出
# A tibble: 2 x 7
# SITE DATE TIME `10` `95` `300` `34475`
# <chr> <chr> <chr> <chr> <chr> <chr> <chr>
# 1 001 1/1/2020 15:10 16.6 521 5.6 <1
# 2 001 3/30/2020 9:45 18 546 3.7 2.3
数据
df1 <- structure(list(SITE = c("001", "001", "001", "001", "001", "001",
"001", "001"), DATE = c("1/1/2020", "1/1/2020", "1/1/2020", "1/1/2020",
"3/30/2020", "3/30/2020", "3/30/2020", "3/30/2020"), TIME = c("15:10",
"15:10", "15:10", "15:10", "9:45", "9:45", "9:45", "9:45"), PARAMETER_CODE = c(10L,
95L, 300L, 34475L, 10L, 95L, 300L, 34475L), REMARK_CODE = c(NA,
NA, NA, "<", NA, NA, NA, NA), RESULT = c(16.6, 521, 5.6, 1, 18,
546, 3.7, 2.3)), row.names = c(NA, -8L), class = "data.frame")
另一种方法可以是:
library(dplyr)
library(tidyr)
#Code
new <- df %>% mutate(RESULT=ifelse(is.na(REMARK_CODE),paste0('',RESULT),
paste0(REMARK_CODE,RESULT))) %>%
select(-REMARK_CODE) %>%
pivot_wider(names_from = PARAMETER_CODE,values_from=RESULT)
输出:
# A tibble: 2 x 7
SITE DATE TIME `10` `95` `300` `34475`
<chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 001 1/1/2020 15:10 16.6 521 5.6 <1
2 001 3/30/2020 9:45 18 546 3.7 2.3
使用了一些数据:
#Data
df <- structure(list(SITE = c("001", "001", "001", "001", "001", "001",
"001", "001"), DATE = c("1/1/2020", "1/1/2020", "1/1/2020", "1/1/2020",
"3/30/2020", "3/30/2020", "3/30/2020", "3/30/2020"), TIME = c("15:10",
"15:10", "15:10", "15:10", "9:45", "9:45", "9:45", "9:45"), PARAMETER_CODE = c(10L,
95L, 300L, 34475L, 10L, 95L, 300L, 34475L), REMARK_CODE = c(NA,
NA, NA, "<", NA, NA, NA, NA), RESULT = c(16.6, 521, 5.6, 1, 18,
546, 3.7, 2.3)), row.names = c(NA, -8L), class = "data.frame")
当我为网络数据库提取数据时,我有这样格式的数据。
site | date | time | parameter_code | remark_code | result |
---|---|---|---|---|---|
001 | 01-01-2020 | 15:10 | 00010 | NA | 16.6 |
001 | 01-01-2020 | 15:10 | 00095 | NA | 521.0 |
001 | 01-01-2020 | 15:10 | 00300 | NA | 5.6 |
001 | 01-01-2020 | 15:10 | 34475 | < | 1.0 |
001 | 03-30-2020 | 09:45 | 00010 | NA | 18.0 |
001 | 03-30-2020 | 09:45 | 00095 | NA | 546.0 |
001 | 03-30-2020 | 09:45 | 00300 | NA | 3.7 |
001 | 03-30-2020 | 09:45 | 34475 | NA | 2.3 |
我想格式化如下。 我知道我需要先合并 Remark_code 和 Result 的列,但我不确定如何将 Parameter_code 列解析为单独的列,结果填充在table.
site | date | time | 00010 | 00095 | 00300 | 34475 |
---|---|---|---|---|---|---|
001 | 01-01-2020 | 15:10 | 16.6 | 521.0 | 5.6 | <1.0 |
001 | 03-30-2020 | 15:10 | 18.0 | 546.0 | 3.7 | 2.3 |
我不一定需要有关如何执行此操作的所有代码,而只需要有关使用哪些功能的说明。我一直在为此苦苦挣扎,因为我什至不确定要查找哪些关键字或这种类型的转换(?)会被称为什么。任何帮助将不胜感激。
我们可以在 unite
列 'REMARK_CODE' 和 'RESULT'
pivot_wider
library(dplyr)
library(tidyr)
df1 %>%
unite(RESULT, REMARK_CODE, RESULT, sep="", na.rm = TRUE) %>%
pivot_wider(names_from = PARAMETER_CODE, values_from = RESULT)
-输出
# A tibble: 2 x 7
# SITE DATE TIME `10` `95` `300` `34475`
# <chr> <chr> <chr> <chr> <chr> <chr> <chr>
# 1 001 1/1/2020 15:10 16.6 521 5.6 <1
# 2 001 3/30/2020 9:45 18 546 3.7 2.3
数据
df1 <- structure(list(SITE = c("001", "001", "001", "001", "001", "001",
"001", "001"), DATE = c("1/1/2020", "1/1/2020", "1/1/2020", "1/1/2020",
"3/30/2020", "3/30/2020", "3/30/2020", "3/30/2020"), TIME = c("15:10",
"15:10", "15:10", "15:10", "9:45", "9:45", "9:45", "9:45"), PARAMETER_CODE = c(10L,
95L, 300L, 34475L, 10L, 95L, 300L, 34475L), REMARK_CODE = c(NA,
NA, NA, "<", NA, NA, NA, NA), RESULT = c(16.6, 521, 5.6, 1, 18,
546, 3.7, 2.3)), row.names = c(NA, -8L), class = "data.frame")
另一种方法可以是:
library(dplyr)
library(tidyr)
#Code
new <- df %>% mutate(RESULT=ifelse(is.na(REMARK_CODE),paste0('',RESULT),
paste0(REMARK_CODE,RESULT))) %>%
select(-REMARK_CODE) %>%
pivot_wider(names_from = PARAMETER_CODE,values_from=RESULT)
输出:
# A tibble: 2 x 7
SITE DATE TIME `10` `95` `300` `34475`
<chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 001 1/1/2020 15:10 16.6 521 5.6 <1
2 001 3/30/2020 9:45 18 546 3.7 2.3
使用了一些数据:
#Data
df <- structure(list(SITE = c("001", "001", "001", "001", "001", "001",
"001", "001"), DATE = c("1/1/2020", "1/1/2020", "1/1/2020", "1/1/2020",
"3/30/2020", "3/30/2020", "3/30/2020", "3/30/2020"), TIME = c("15:10",
"15:10", "15:10", "15:10", "9:45", "9:45", "9:45", "9:45"), PARAMETER_CODE = c(10L,
95L, 300L, 34475L, 10L, 95L, 300L, 34475L), REMARK_CODE = c(NA,
NA, NA, "<", NA, NA, NA, NA), RESULT = c(16.6, 521, 5.6, 1, 18,
546, 3.7, 2.3)), row.names = c(NA, -8L), class = "data.frame")