基于行的减法

Substraction based on rows

df <- data.frame(PatientID = c("0002" ,"0002", "0005", "0005" ,"0009" ,"0009" ,"0018", "0018" ,"0020" ,"0027", "0039" ,"0039" ,"0042", "0043" ,"0043" ,"0045", "0046", "0046" ,"0048" ,"0048", "0055"),
                 Timepoint= c("A", "B", "A", "B", "A", "B", "A", "B", "A", "A", "A", "B", "A", "A", "B",  "A",  "A", "B", "A", "B", "A"),
                 A = c(NA , 977.146 , NA , 964.315 ,NA , 952.311 , NA , 950.797 , 958.975  ,960.712  ,NA , 947.465 , 902.852 , NA,  985.124  ,NA , 930.141 ,1007.790 , 948.848, 1027.110 , 999.414),
                 B = c(998.988 , NA , 998.680 , NA , NA ,1020.560 , 947.751 ,1029.560 , 955.540 , 911.606 , 964.039   ,    NA,  988.087 , 902.367 , 959.338 ,1029.050 , 925.162 , 987.374 ,1066.400  ,957.512 , 917.597),
                 C = c( NA , 987.140 , 961.810 , 929.466 , 978.166, 1005.820  ,925.752 , 969.469 , 943.398  ,936.034,  965.292 , 996.404 , 920.610 , 967.047  ,986.565 , 913.517 , 893.428 , 921.606 , NA , 929.590  ,950.493), 
                 D = c(975.634 , 987.140 , 961.810 , 929.466 , 978.166, 1005.820 , 925.752 , 969.469  ,943.398 , NA , 965.292 , 996.404 , NA , 967.047 , 986.565 , NA , 893.428 , 921.606 , 976.192 , 929.590 , 950.493),
                 E = c(1006.330, 1028.070 , NA , 954.274 ,1005.910  ,949.969 , 992.820 , 977.048  ,934.407 , 948.913 , NA , NA , NA,  961.375  ,955.296 , 961.128  ,998.119 ,1009.110 , 994.891 ,1000.170  ,982.763),
                 G= c(NA , 958.990 , NA , NA , 924.680 , 955.927 , NA , 949.384  ,973.348 , 984.392 , 943.894 , 961.468 , 995.368 , 994.997 , NA , 979.454 , 952.605 , NA  ,   NA, NA , 956.507), stringsAsFactors = F)

我需要创建一个新的 df$Timepoint 因子,即 C,它将通过减去 [df$timepoint==B - df$timepoint==A]

得到

为了清楚起见,我在下面粘贴了一个小示例,上面的数据集中有 3 名患者。

感谢您的帮助!

library(tidyverse)
df %>% 
  pivot_longer(-c(PatientID, Timepoint), names_to = "COLS") %>% 
  pivot_wider(c(PatientID, COLS), names_from = Timepoint, values_from = value) %>% 
  mutate(C = B - A) %>% 
  pivot_longer(-c(PatientID, COLS), names_to = "Timepoint") %>% 
  pivot_wider(c(PatientID, Timepoint), names_from = COLS)

# A tibble: 39 x 8
   PatientID Timepoint     A     B      C      D      E     G
   <chr>     <chr>     <dbl> <dbl>  <dbl>  <dbl>  <dbl> <dbl>
 1 0002      A           NA   999.   NA    976.  1006.   NA  
 2 0002      B          977.   NA   987.   987.  1028.  959. 
 3 0002      C           NA    NA    NA     11.5   21.7  NA  
 4 0005      A           NA   999.  962.   962.    NA    NA  
 5 0005      B          964.   NA   929.   929.   954.   NA  
 6 0005      C           NA    NA   -32.3  -32.3   NA    NA  
 7 0009      A           NA    NA   978.   978.  1006.  925. 
 8 0009      B          952. 1021. 1006.  1006.   950.  956. 
 9 0009      C           NA    NA    27.7   27.7  -55.9  31.2
10 0018      A           NA   948.  926.   926.   993.   NA  
# ... with 29 more rows

这应该可以解决问题:

library(dplyr)
rbind(df, df %>% 
        group_by(PatientID) %>% 
        mutate(across(c("A":"G"), ~ . - lag(.))) %>% 
        filter(Timepoint == "B") %>% 
        mutate(Timepoint = "C")) %>% 
  arrange(PatientID, Timepoint)

输出:

   PatientID Timepoint        A        B        C        D        E       G
1       0002         A       NA  998.988       NA  975.634 1006.330      NA
2       0002         B  977.146       NA  987.140  987.140 1028.070 958.990
3       0002         C       NA       NA       NA   11.506   21.740      NA
4       0005         A       NA  998.680  961.810  961.810       NA      NA
5       0005         B  964.315       NA  929.466  929.466  954.274      NA
6       0005         C       NA       NA  -32.344  -32.344       NA      NA
7       0009         A       NA       NA  978.166  978.166 1005.910 924.680
8       0009         B  952.311 1020.560 1005.820 1005.820  949.969 955.927
9       0009         C       NA       NA   27.654   27.654  -55.941  31.247
10      0018         A       NA  947.751  925.752  925.752  992.820      NA
11      0018         B  950.797 1029.560  969.469  969.469  977.048 949.384
12      0018         C       NA   81.809   43.717   43.717  -15.772      NA

您可以汇总数据框并将其绑定到原始数​​据框。

library(dplyr)

df %>%
  arrange(PatientID, Timepoint) %>%
  group_by(PatientID) %>%
  summarise(across(A:G, .fns = diff)) %>%
  ungroup %>%
  mutate(Timepoint = 'C', .before = 2) %>%
  bind_rows(df) %>%
  arrange(PatientID, Timepoint) 

# PatientID Timepoint     A     B      C      D      E     G
#   <chr>     <chr>     <dbl> <dbl>  <dbl>  <dbl>  <dbl> <dbl>
# 1 0002      A           NA   999.   NA    976.  1006.   NA  
# 2 0002      B          977.   NA   987.   987.  1028.  959. 
# 3 0002      C           NA    NA    NA     11.5   21.7  NA  
# 4 0005      A           NA   999.  962.   962.    NA    NA  
# 5 0005      B          964.   NA   929.   929.   954.   NA  
# 6 0005      C           NA    NA   -32.3  -32.3   NA    NA  
# 7 0009      A           NA    NA   978.   978.  1006.  925. 
# 8 0009      B          952. 1021. 1006.  1006.   950.  956. 
# 9 0009      C           NA    NA    27.7   27.7  -55.9  31.2
#10 0018      A           NA   948.  926.   926.   993.   NA  
# … with 19 more rows