数据重塑

Reshaping of data

我无法在 R 中重塑数据,希望有人能帮助我。 数据如下所示:

ID 测量 biomarker_x biomarker_y
1 1 10 100
1 2 11 110
1 3 12 120
2 1 20 200
2 2 19 190
2 3 21 210

并且需要重塑为如下所示:

ID 生物标志物 测量1 测量2 测量3
1 x 10 11 12
1 y 100 110 120
2 x 20 19 21
2 y 200 190 210

我尝试使用 tidyr::gather 和传播以及 pivot_widerpivot_longer 但失败了。 如果有人有将其应用于多种生物标志物的解决方案,我将非常感激。

这个有用吗:

library(dplyr)
library(tidyr)
library(stringr)


df %>% pivot_longer(-c(ID, measurement), names_to = 'biomarker') %>% mutate(biomarker = str_extract(biomarker, '[xy]$')) %>% 
  pivot_wider(c(ID, biomarker), names_from = measurement, names_prefix = 'measurement', values_from = value)
# A tibble: 4 x 5
     ID biomarker measurement1 measurement2 measurement3
  <int> <chr>            <int>        <int>        <int>
1     1 x                   10           11           12
2     1 y                  100          110          120
3     2 x                   20           19           21
4     2 y                  200          190          210

这是一种方法。

library(tidyverse)

dat |> 
  pivot_longer(
    cols = starts_with("bio"),
    names_to = "biomarker"
  ) |> 
  mutate(biomarker = str_remove(biomarker, "biomarker_")) |> 
  pivot_wider(
    names_from = measurement,
    values_from = value,
    names_prefix = "measurement"
  )

# # A tibble: 4 x 5
#      ID biomarker measurement1 measurement2 measurement3
#   <int> <chr>            <int>        <int>        <int>
# 1     1 x                   10           11           12
# 2     1 y                  100          110          120
# 3     2 x                   20           19           21
# 4     2 y                  200          190          210

只能在 tidyr 中完成

library(tidyr)

df <- read.table(header = T, text = 'ID measurement biomarker_x biomarker_y
1   1   10  100
1   2   11  110
1   3   12  120
2   1   20  200
2   2   19  190
2   3   21  210')

df %>% pivot_longer(starts_with('biomarker'), names_to = 'biomarker', names_prefix = 'biomarker_') %>%
  pivot_wider(names_from = measurement, values_from = value, names_prefix = 'measurement_')

#> # A tibble: 4 x 5
#>      ID biomarker measurement_1 measurement_2 measurement_3
#>   <int> <chr>             <int>         <int>         <int>
#> 1     1 x                    10            11            12
#> 2     1 y                   100           110           120
#> 3     2 x                    20            19            21
#> 4     2 y                   200           190           210

reprex package (v2.0.0)

于 2021-07-06 创建

使用 reshape2

中的 recast
library(reshape2)
names(df1)[-(1:2)] <- sub("biomarker_", "", names(df1)[-(1:2)])
reshape2::recast(df1, id.var = c("ID", "measurement"), 
     ID + variable ~ paste0('measurement', measurement), value.var = 'value')

-输出

  ID variable measurement1 measurement2 measurement3
1  1        x           10           11           12
2  1        y          100          110          120
3  2        x           20           19           21
4  2        y          200          190          210

数据

df1 <- structure(list(ID = c(1L, 1L, 1L, 2L, 2L, 2L), measurement = c(1L, 
2L, 3L, 1L, 2L, 3L), biomarker_x = c(10L, 11L, 12L, 20L, 19L, 
21L), biomarker_y = c(100L, 110L, 120L, 200L, 190L, 210L)), 
class = "data.frame", row.names = c(NA, 
-6L))

使用嵌套 ´reshape` 的纯基础 R 选项

reshape(
    reshape(
        df,
        direction = "long",
        idvar = c("ID", "measurement"),
        varying = -(1:2),
        sep = "_"
    ),
    direction = "wide",
    idvar = c("ID", "time"),
    timevar = "measurement"
)

给予

      ID time biomarker.1 biomarker.2 biomarker.3
1.1.x  1    x          10          11          12
2.1.x  2    x          20          19          21
1.1.y  1    y         100         110         120
2.1.y  2    y         200         190         210