基于行的减法
Substraction based on rows
df <- data.frame(PatientID = c("0002" ,"0002", "0005", "0005" ,"0009" ,"0009" ,"0018", "0018" ,"0020" ,"0027", "0039" ,"0039" ,"0042", "0043" ,"0043" ,"0045", "0046", "0046" ,"0048" ,"0048", "0055"),
Timepoint= c("A", "B", "A", "B", "A", "B", "A", "B", "A", "A", "A", "B", "A", "A", "B", "A", "A", "B", "A", "B", "A"),
A = c(NA , 977.146 , NA , 964.315 ,NA , 952.311 , NA , 950.797 , 958.975 ,960.712 ,NA , 947.465 , 902.852 , NA, 985.124 ,NA , 930.141 ,1007.790 , 948.848, 1027.110 , 999.414),
B = c(998.988 , NA , 998.680 , NA , NA ,1020.560 , 947.751 ,1029.560 , 955.540 , 911.606 , 964.039 , NA, 988.087 , 902.367 , 959.338 ,1029.050 , 925.162 , 987.374 ,1066.400 ,957.512 , 917.597),
C = c( NA , 987.140 , 961.810 , 929.466 , 978.166, 1005.820 ,925.752 , 969.469 , 943.398 ,936.034, 965.292 , 996.404 , 920.610 , 967.047 ,986.565 , 913.517 , 893.428 , 921.606 , NA , 929.590 ,950.493),
D = c(975.634 , 987.140 , 961.810 , 929.466 , 978.166, 1005.820 , 925.752 , 969.469 ,943.398 , NA , 965.292 , 996.404 , NA , 967.047 , 986.565 , NA , 893.428 , 921.606 , 976.192 , 929.590 , 950.493),
E = c(1006.330, 1028.070 , NA , 954.274 ,1005.910 ,949.969 , 992.820 , 977.048 ,934.407 , 948.913 , NA , NA , NA, 961.375 ,955.296 , 961.128 ,998.119 ,1009.110 , 994.891 ,1000.170 ,982.763),
G= c(NA , 958.990 , NA , NA , 924.680 , 955.927 , NA , 949.384 ,973.348 , 984.392 , 943.894 , 961.468 , 995.368 , 994.997 , NA , 979.454 , 952.605 , NA , NA, NA , 956.507), stringsAsFactors = F)
我需要创建一个新的 df$Timepoint
因子,即 C,它将通过减去 [df$timepoint==B - df$timepoint==A]
得到
为了清楚起见,我在下面粘贴了一个小示例,上面的数据集中有 3 名患者。
感谢您的帮助!
library(tidyverse)
df %>%
pivot_longer(-c(PatientID, Timepoint), names_to = "COLS") %>%
pivot_wider(c(PatientID, COLS), names_from = Timepoint, values_from = value) %>%
mutate(C = B - A) %>%
pivot_longer(-c(PatientID, COLS), names_to = "Timepoint") %>%
pivot_wider(c(PatientID, Timepoint), names_from = COLS)
# A tibble: 39 x 8
PatientID Timepoint A B C D E G
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0002 A NA 999. NA 976. 1006. NA
2 0002 B 977. NA 987. 987. 1028. 959.
3 0002 C NA NA NA 11.5 21.7 NA
4 0005 A NA 999. 962. 962. NA NA
5 0005 B 964. NA 929. 929. 954. NA
6 0005 C NA NA -32.3 -32.3 NA NA
7 0009 A NA NA 978. 978. 1006. 925.
8 0009 B 952. 1021. 1006. 1006. 950. 956.
9 0009 C NA NA 27.7 27.7 -55.9 31.2
10 0018 A NA 948. 926. 926. 993. NA
# ... with 29 more rows
这应该可以解决问题:
library(dplyr)
rbind(df, df %>%
group_by(PatientID) %>%
mutate(across(c("A":"G"), ~ . - lag(.))) %>%
filter(Timepoint == "B") %>%
mutate(Timepoint = "C")) %>%
arrange(PatientID, Timepoint)
输出:
PatientID Timepoint A B C D E G
1 0002 A NA 998.988 NA 975.634 1006.330 NA
2 0002 B 977.146 NA 987.140 987.140 1028.070 958.990
3 0002 C NA NA NA 11.506 21.740 NA
4 0005 A NA 998.680 961.810 961.810 NA NA
5 0005 B 964.315 NA 929.466 929.466 954.274 NA
6 0005 C NA NA -32.344 -32.344 NA NA
7 0009 A NA NA 978.166 978.166 1005.910 924.680
8 0009 B 952.311 1020.560 1005.820 1005.820 949.969 955.927
9 0009 C NA NA 27.654 27.654 -55.941 31.247
10 0018 A NA 947.751 925.752 925.752 992.820 NA
11 0018 B 950.797 1029.560 969.469 969.469 977.048 949.384
12 0018 C NA 81.809 43.717 43.717 -15.772 NA
您可以汇总数据框并将其绑定到原始数据框。
library(dplyr)
df %>%
arrange(PatientID, Timepoint) %>%
group_by(PatientID) %>%
summarise(across(A:G, .fns = diff)) %>%
ungroup %>%
mutate(Timepoint = 'C', .before = 2) %>%
bind_rows(df) %>%
arrange(PatientID, Timepoint)
# PatientID Timepoint A B C D E G
# <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 0002 A NA 999. NA 976. 1006. NA
# 2 0002 B 977. NA 987. 987. 1028. 959.
# 3 0002 C NA NA NA 11.5 21.7 NA
# 4 0005 A NA 999. 962. 962. NA NA
# 5 0005 B 964. NA 929. 929. 954. NA
# 6 0005 C NA NA -32.3 -32.3 NA NA
# 7 0009 A NA NA 978. 978. 1006. 925.
# 8 0009 B 952. 1021. 1006. 1006. 950. 956.
# 9 0009 C NA NA 27.7 27.7 -55.9 31.2
#10 0018 A NA 948. 926. 926. 993. NA
# … with 19 more rows
df <- data.frame(PatientID = c("0002" ,"0002", "0005", "0005" ,"0009" ,"0009" ,"0018", "0018" ,"0020" ,"0027", "0039" ,"0039" ,"0042", "0043" ,"0043" ,"0045", "0046", "0046" ,"0048" ,"0048", "0055"),
Timepoint= c("A", "B", "A", "B", "A", "B", "A", "B", "A", "A", "A", "B", "A", "A", "B", "A", "A", "B", "A", "B", "A"),
A = c(NA , 977.146 , NA , 964.315 ,NA , 952.311 , NA , 950.797 , 958.975 ,960.712 ,NA , 947.465 , 902.852 , NA, 985.124 ,NA , 930.141 ,1007.790 , 948.848, 1027.110 , 999.414),
B = c(998.988 , NA , 998.680 , NA , NA ,1020.560 , 947.751 ,1029.560 , 955.540 , 911.606 , 964.039 , NA, 988.087 , 902.367 , 959.338 ,1029.050 , 925.162 , 987.374 ,1066.400 ,957.512 , 917.597),
C = c( NA , 987.140 , 961.810 , 929.466 , 978.166, 1005.820 ,925.752 , 969.469 , 943.398 ,936.034, 965.292 , 996.404 , 920.610 , 967.047 ,986.565 , 913.517 , 893.428 , 921.606 , NA , 929.590 ,950.493),
D = c(975.634 , 987.140 , 961.810 , 929.466 , 978.166, 1005.820 , 925.752 , 969.469 ,943.398 , NA , 965.292 , 996.404 , NA , 967.047 , 986.565 , NA , 893.428 , 921.606 , 976.192 , 929.590 , 950.493),
E = c(1006.330, 1028.070 , NA , 954.274 ,1005.910 ,949.969 , 992.820 , 977.048 ,934.407 , 948.913 , NA , NA , NA, 961.375 ,955.296 , 961.128 ,998.119 ,1009.110 , 994.891 ,1000.170 ,982.763),
G= c(NA , 958.990 , NA , NA , 924.680 , 955.927 , NA , 949.384 ,973.348 , 984.392 , 943.894 , 961.468 , 995.368 , 994.997 , NA , 979.454 , 952.605 , NA , NA, NA , 956.507), stringsAsFactors = F)
我需要创建一个新的 df$Timepoint
因子,即 C,它将通过减去 [df$timepoint==B - df$timepoint==A]
为了清楚起见,我在下面粘贴了一个小示例,上面的数据集中有 3 名患者。
感谢您的帮助!
library(tidyverse)
df %>%
pivot_longer(-c(PatientID, Timepoint), names_to = "COLS") %>%
pivot_wider(c(PatientID, COLS), names_from = Timepoint, values_from = value) %>%
mutate(C = B - A) %>%
pivot_longer(-c(PatientID, COLS), names_to = "Timepoint") %>%
pivot_wider(c(PatientID, Timepoint), names_from = COLS)
# A tibble: 39 x 8
PatientID Timepoint A B C D E G
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0002 A NA 999. NA 976. 1006. NA
2 0002 B 977. NA 987. 987. 1028. 959.
3 0002 C NA NA NA 11.5 21.7 NA
4 0005 A NA 999. 962. 962. NA NA
5 0005 B 964. NA 929. 929. 954. NA
6 0005 C NA NA -32.3 -32.3 NA NA
7 0009 A NA NA 978. 978. 1006. 925.
8 0009 B 952. 1021. 1006. 1006. 950. 956.
9 0009 C NA NA 27.7 27.7 -55.9 31.2
10 0018 A NA 948. 926. 926. 993. NA
# ... with 29 more rows
这应该可以解决问题:
library(dplyr)
rbind(df, df %>%
group_by(PatientID) %>%
mutate(across(c("A":"G"), ~ . - lag(.))) %>%
filter(Timepoint == "B") %>%
mutate(Timepoint = "C")) %>%
arrange(PatientID, Timepoint)
输出:
PatientID Timepoint A B C D E G
1 0002 A NA 998.988 NA 975.634 1006.330 NA
2 0002 B 977.146 NA 987.140 987.140 1028.070 958.990
3 0002 C NA NA NA 11.506 21.740 NA
4 0005 A NA 998.680 961.810 961.810 NA NA
5 0005 B 964.315 NA 929.466 929.466 954.274 NA
6 0005 C NA NA -32.344 -32.344 NA NA
7 0009 A NA NA 978.166 978.166 1005.910 924.680
8 0009 B 952.311 1020.560 1005.820 1005.820 949.969 955.927
9 0009 C NA NA 27.654 27.654 -55.941 31.247
10 0018 A NA 947.751 925.752 925.752 992.820 NA
11 0018 B 950.797 1029.560 969.469 969.469 977.048 949.384
12 0018 C NA 81.809 43.717 43.717 -15.772 NA
您可以汇总数据框并将其绑定到原始数据框。
library(dplyr)
df %>%
arrange(PatientID, Timepoint) %>%
group_by(PatientID) %>%
summarise(across(A:G, .fns = diff)) %>%
ungroup %>%
mutate(Timepoint = 'C', .before = 2) %>%
bind_rows(df) %>%
arrange(PatientID, Timepoint)
# PatientID Timepoint A B C D E G
# <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 0002 A NA 999. NA 976. 1006. NA
# 2 0002 B 977. NA 987. 987. 1028. 959.
# 3 0002 C NA NA NA 11.5 21.7 NA
# 4 0005 A NA 999. 962. 962. NA NA
# 5 0005 B 964. NA 929. 929. 954. NA
# 6 0005 C NA NA -32.3 -32.3 NA NA
# 7 0009 A NA NA 978. 978. 1006. 925.
# 8 0009 B 952. 1021. 1006. 1006. 950. 956.
# 9 0009 C NA NA 27.7 27.7 -55.9 31.2
#10 0018 A NA 948. 926. 926. 993. NA
# … with 19 more rows