使用 RowSum 定义基于不同类别的阈值
Define thresholds based on different categories with RowSum
df <- data.frame(PatientID = c("0002" ,"0002", "0005", "0005" ,"0009" ,"0009" ,"0018", "0018" ,"0039" ,"0039" , "0043" ,"0043", "0046", "0046" ,"0048" ,"0048"),
Timepoint= c("A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B"),
sex= c("F", "F", "M", "M", "F", "F", "M", "M","F", "F", "M", "M", "M", "M", "F", "F"),
country= c("I", "I", "S", "S", "S", "S", "S", "S","S", "S", "I", "I", "I", "I", "I", "I"),
A = c(NA , 977.146 , NA , 964.315 ,NA , 952.311 , NA , 950.797 , 947.465 , 902.852 , 985.124 ,NA , 930.141 ,1007.790 , 1027.110 , 999.414),
B = c(998.988 , NA , 998.680 , NA , 1020.560 , 955.540 , 911.606 , 964.039 , 988.087 , 902.367 , 959.338 ,1029.050 , 987.374 ,1066.400 ,957.512 , 917.597),
C = c( 987.140 , 961.810 , 929.466 , 978.166, 969.469 , 943.398 ,936.034, 965.292 , 996.404 , 920.610 , 967.047, 913.517 , 893.428 , 921.606 , 929.590 ,950.493),
D = c( 961.810 , 929.466 , 978.166, 1005.820 , 925.752 , 969.469 ,943.398 , 965.292 , 996.404 , 967.047 , NA , 893.428 , 921.606 , 976.192 , 929.590 , 950.493),
E = c(1006.330, 1028.070 , 954.274 ,1005.910 ,949.969 , 992.820 ,934.407 , 948.913 , 961.375 ,955.296 , 961.128 ,998.119 ,1009.110 , 994.891 ,1000.170 ,982.763),
G= c(NA , 958.990 , 924.680 , 955.927 , NA , 949.384 ,973.348 , 984.392 , 943.894 , 961.468 , 995.368 , 994.997 , 979.454 , 952.605 ,NA , 956.507), stringsAsFactors = F)
我有这段代码可以对 3 列或更多列超出范围的人进行分类,阈值为 1015:
cols <- 5:10
df$Myo <- ifelse(rowSums(df[cols] > 1015, na.rm = TRUE) >= 3, 'Yes', 'No')
我需要将此代码伪装成另外 2 个代码:
一个代码具有不同的性别阈值(女性阈值 1004 (df$sex==F
) 和男性阈值 986 (df$sex==M
)。
其他基于4个阈值选择的代码:
a) 第一个门槛是居住在瑞典 (df$country==S
) 的男性 (df$sex==M
),这个门槛是 900
b) 第二个门槛是居住在瑞典 (df$country==S
) 的女性 (df$sex==F
),这个门槛是 1016
c) 第一个门槛是居住在冰岛的男性 (df$sex==M
) (df$country==I
),这个门槛是 800
d) 第二个门槛是居住在冰岛 (df$country==I
) 的女性 (df$sex==F
),这个门槛是 1000。
谢谢!!
我更喜欢使用 data.table
来解决这个问题。函数fcase
用于处理多阈值分支。我不确定 Myo
的输出是否符合您的要求。无论如何我想你可以更正它,我稍后会编辑它。
df <- data.frame(PatientID = c("0002" ,"0002", "0005", "0005" ,"0009" ,"0009" ,"0018", "0018" ,"0039" ,"0039" , "0043" ,"0043", "0046", "0046" ,"0048" ,"0048"),
Timepoint= c("A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B"),
sex= c("F", "F", "M", "M", "F", "F", "M", "M","F", "F", "M", "M", "M", "M", "F", "F"),
country= c("I", "I", "S", "S", "S", "S", "S", "S","S", "S", "I", "I", "I", "I", "I", "I"),
A = c(NA , 977.146 , NA , 964.315 ,NA , 952.311 , NA , 950.797 , 947.465 , 902.852 , 985.124 ,NA , 930.141 ,1007.790 , 1027.110 , 999.414),
B = c(998.988 , NA , 998.680 , NA , 1020.560 , 955.540 , 911.606 , 964.039 , 988.087 , 902.367 , 959.338 ,1029.050 , 987.374 ,1066.400 ,957.512 , 917.597),
C = c( 987.140 , 961.810 , 929.466 , 978.166, 969.469 , 943.398 ,936.034, 965.292 , 996.404 , 920.610 , 967.047, 913.517 , 893.428 , 921.606 , 929.590 ,950.493),
D = c( 961.810 , 929.466 , 978.166, 1005.820 , 925.752 , 969.469 ,943.398 , 965.292 , 996.404 , 967.047 , NA , 893.428 , 921.606 , 976.192 , 929.590 , 950.493),
E = c(1006.330, 1028.070 , 954.274 ,1005.910 ,949.969 , 992.820 ,934.407 , 948.913 , 961.375 ,955.296 , 961.128 ,998.119 ,1009.110 , 994.891 ,1000.170 ,982.763),
G= c(NA , 958.990 , 924.680 , 955.927 , NA , 949.384 ,973.348 , 984.392 , 943.894 , 961.468 , 995.368 , 994.997 , 979.454 , 952.605 ,NA , 956.507), stringsAsFactors = F)
library(data.table)
setDT(df)
cols <- 5:10
df[, Myo := fcase(sex == "F" & rowSums(.SD > 1004,na.rm = T) >= 3, "Yes",
sex == "M" & rowSums(.SD > 986,na.rm = T) >= 3, "Yes",
default = "No"),
.SDcols = cols]
df[, Myo2 := fcase(sex == "M" & country == "S" & rowSums(.SD > 900,na.rm = T) >= 3, "Yes",
sex == "F" & country == "S" & rowSums(.SD > 1016,na.rm = T) >= 3, "Yes",
sex == "M" & country == "I" & rowSums(.SD > 800 ,na.rm = T) >= 3, "Yes",
sex == "F" & country == "I" & rowSums(.SD > 1000,na.rm = T) >= 3, "Yes",
default = "No"),
.SDcols = cols]
df
#> PatientID Timepoint sex country A B C D E
#> 1: 0002 A F I NA 998.988 987.140 961.810 1006.330
#> 2: 0002 B F I 977.146 NA 961.810 929.466 1028.070
#> 3: 0005 A M S NA 998.680 929.466 978.166 954.274
#> 4: 0005 B M S 964.315 NA 978.166 1005.820 1005.910
#> 5: 0009 A F S NA 1020.560 969.469 925.752 949.969
#> 6: 0009 B F S 952.311 955.540 943.398 969.469 992.820
#> 7: 0018 A M S NA 911.606 936.034 943.398 934.407
#> 8: 0018 B M S 950.797 964.039 965.292 965.292 948.913
#> 9: 0039 A F S 947.465 988.087 996.404 996.404 961.375
#> 10: 0039 B F S 902.852 902.367 920.610 967.047 955.296
#> 11: 0043 A M I 985.124 959.338 967.047 NA 961.128
#> 12: 0043 B M I NA 1029.050 913.517 893.428 998.119
#> 13: 0046 A M I 930.141 987.374 893.428 921.606 1009.110
#> 14: 0046 B M I 1007.790 1066.400 921.606 976.192 994.891
#> 15: 0048 A F I 1027.110 957.512 929.590 929.590 1000.170
#> 16: 0048 B F I 999.414 917.597 950.493 950.493 982.763
#> G Myo Myo2
#> 1: NA No No
#> 2: 958.990 No No
#> 3: 924.680 No Yes
#> 4: 955.927 No Yes
#> 5: NA No No
#> 6: 949.384 No No
#> 7: 973.348 No Yes
#> 8: 984.392 No Yes
#> 9: 943.894 No No
#> 10: 961.468 No No
#> 11: 995.368 No Yes
#> 12: 994.997 Yes Yes
#> 13: 979.454 No Yes
#> 14: 952.605 Yes Yes
#> 15: NA No No
#> 16: 956.507 No No
由 reprex package (v2.0.0)
于 2021-08-03 创建
我发现这段代码也适用于第一种情况:
cols <- 5:10
df$sex= as.factor(df$sex)
df %>% mutate(Myo=ifelse(sex == "F" & (rowSums(df[cols] > 1004, na.rm = TRUE) >=3) ,'Yes',
ifelse(sex == "M" & (rowSums(df[cols] > 986, na.rm = TRUE) >=3) ,'Yes','No')))-> df
df <- data.frame(PatientID = c("0002" ,"0002", "0005", "0005" ,"0009" ,"0009" ,"0018", "0018" ,"0039" ,"0039" , "0043" ,"0043", "0046", "0046" ,"0048" ,"0048"),
Timepoint= c("A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B"),
sex= c("F", "F", "M", "M", "F", "F", "M", "M","F", "F", "M", "M", "M", "M", "F", "F"),
country= c("I", "I", "S", "S", "S", "S", "S", "S","S", "S", "I", "I", "I", "I", "I", "I"),
A = c(NA , 977.146 , NA , 964.315 ,NA , 952.311 , NA , 950.797 , 947.465 , 902.852 , 985.124 ,NA , 930.141 ,1007.790 , 1027.110 , 999.414),
B = c(998.988 , NA , 998.680 , NA , 1020.560 , 955.540 , 911.606 , 964.039 , 988.087 , 902.367 , 959.338 ,1029.050 , 987.374 ,1066.400 ,957.512 , 917.597),
C = c( 987.140 , 961.810 , 929.466 , 978.166, 969.469 , 943.398 ,936.034, 965.292 , 996.404 , 920.610 , 967.047, 913.517 , 893.428 , 921.606 , 929.590 ,950.493),
D = c( 961.810 , 929.466 , 978.166, 1005.820 , 925.752 , 969.469 ,943.398 , 965.292 , 996.404 , 967.047 , NA , 893.428 , 921.606 , 976.192 , 929.590 , 950.493),
E = c(1006.330, 1028.070 , 954.274 ,1005.910 ,949.969 , 992.820 ,934.407 , 948.913 , 961.375 ,955.296 , 961.128 ,998.119 ,1009.110 , 994.891 ,1000.170 ,982.763),
G= c(NA , 958.990 , 924.680 , 955.927 , NA , 949.384 ,973.348 , 984.392 , 943.894 , 961.468 , 995.368 , 994.997 , 979.454 , 952.605 ,NA , 956.507), stringsAsFactors = F)
我有这段代码可以对 3 列或更多列超出范围的人进行分类,阈值为 1015:
cols <- 5:10
df$Myo <- ifelse(rowSums(df[cols] > 1015, na.rm = TRUE) >= 3, 'Yes', 'No')
我需要将此代码伪装成另外 2 个代码:
一个代码具有不同的性别阈值(女性阈值 1004 (
df$sex==F
) 和男性阈值 986 (df$sex==M
)。其他基于4个阈值选择的代码:
a) 第一个门槛是居住在瑞典 (df$country==S
) 的男性 (df$sex==M
),这个门槛是 900
b) 第二个门槛是居住在瑞典 (df$country==S
) 的女性 (df$sex==F
),这个门槛是 1016
c) 第一个门槛是居住在冰岛的男性 (df$sex==M
) (df$country==I
),这个门槛是 800
d) 第二个门槛是居住在冰岛 (df$country==I
) 的女性 (df$sex==F
),这个门槛是 1000。
谢谢!!
我更喜欢使用 data.table
来解决这个问题。函数fcase
用于处理多阈值分支。我不确定 Myo
的输出是否符合您的要求。无论如何我想你可以更正它,我稍后会编辑它。
df <- data.frame(PatientID = c("0002" ,"0002", "0005", "0005" ,"0009" ,"0009" ,"0018", "0018" ,"0039" ,"0039" , "0043" ,"0043", "0046", "0046" ,"0048" ,"0048"),
Timepoint= c("A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B"),
sex= c("F", "F", "M", "M", "F", "F", "M", "M","F", "F", "M", "M", "M", "M", "F", "F"),
country= c("I", "I", "S", "S", "S", "S", "S", "S","S", "S", "I", "I", "I", "I", "I", "I"),
A = c(NA , 977.146 , NA , 964.315 ,NA , 952.311 , NA , 950.797 , 947.465 , 902.852 , 985.124 ,NA , 930.141 ,1007.790 , 1027.110 , 999.414),
B = c(998.988 , NA , 998.680 , NA , 1020.560 , 955.540 , 911.606 , 964.039 , 988.087 , 902.367 , 959.338 ,1029.050 , 987.374 ,1066.400 ,957.512 , 917.597),
C = c( 987.140 , 961.810 , 929.466 , 978.166, 969.469 , 943.398 ,936.034, 965.292 , 996.404 , 920.610 , 967.047, 913.517 , 893.428 , 921.606 , 929.590 ,950.493),
D = c( 961.810 , 929.466 , 978.166, 1005.820 , 925.752 , 969.469 ,943.398 , 965.292 , 996.404 , 967.047 , NA , 893.428 , 921.606 , 976.192 , 929.590 , 950.493),
E = c(1006.330, 1028.070 , 954.274 ,1005.910 ,949.969 , 992.820 ,934.407 , 948.913 , 961.375 ,955.296 , 961.128 ,998.119 ,1009.110 , 994.891 ,1000.170 ,982.763),
G= c(NA , 958.990 , 924.680 , 955.927 , NA , 949.384 ,973.348 , 984.392 , 943.894 , 961.468 , 995.368 , 994.997 , 979.454 , 952.605 ,NA , 956.507), stringsAsFactors = F)
library(data.table)
setDT(df)
cols <- 5:10
df[, Myo := fcase(sex == "F" & rowSums(.SD > 1004,na.rm = T) >= 3, "Yes",
sex == "M" & rowSums(.SD > 986,na.rm = T) >= 3, "Yes",
default = "No"),
.SDcols = cols]
df[, Myo2 := fcase(sex == "M" & country == "S" & rowSums(.SD > 900,na.rm = T) >= 3, "Yes",
sex == "F" & country == "S" & rowSums(.SD > 1016,na.rm = T) >= 3, "Yes",
sex == "M" & country == "I" & rowSums(.SD > 800 ,na.rm = T) >= 3, "Yes",
sex == "F" & country == "I" & rowSums(.SD > 1000,na.rm = T) >= 3, "Yes",
default = "No"),
.SDcols = cols]
df
#> PatientID Timepoint sex country A B C D E
#> 1: 0002 A F I NA 998.988 987.140 961.810 1006.330
#> 2: 0002 B F I 977.146 NA 961.810 929.466 1028.070
#> 3: 0005 A M S NA 998.680 929.466 978.166 954.274
#> 4: 0005 B M S 964.315 NA 978.166 1005.820 1005.910
#> 5: 0009 A F S NA 1020.560 969.469 925.752 949.969
#> 6: 0009 B F S 952.311 955.540 943.398 969.469 992.820
#> 7: 0018 A M S NA 911.606 936.034 943.398 934.407
#> 8: 0018 B M S 950.797 964.039 965.292 965.292 948.913
#> 9: 0039 A F S 947.465 988.087 996.404 996.404 961.375
#> 10: 0039 B F S 902.852 902.367 920.610 967.047 955.296
#> 11: 0043 A M I 985.124 959.338 967.047 NA 961.128
#> 12: 0043 B M I NA 1029.050 913.517 893.428 998.119
#> 13: 0046 A M I 930.141 987.374 893.428 921.606 1009.110
#> 14: 0046 B M I 1007.790 1066.400 921.606 976.192 994.891
#> 15: 0048 A F I 1027.110 957.512 929.590 929.590 1000.170
#> 16: 0048 B F I 999.414 917.597 950.493 950.493 982.763
#> G Myo Myo2
#> 1: NA No No
#> 2: 958.990 No No
#> 3: 924.680 No Yes
#> 4: 955.927 No Yes
#> 5: NA No No
#> 6: 949.384 No No
#> 7: 973.348 No Yes
#> 8: 984.392 No Yes
#> 9: 943.894 No No
#> 10: 961.468 No No
#> 11: 995.368 No Yes
#> 12: 994.997 Yes Yes
#> 13: 979.454 No Yes
#> 14: 952.605 Yes Yes
#> 15: NA No No
#> 16: 956.507 No No
由 reprex package (v2.0.0)
于 2021-08-03 创建我发现这段代码也适用于第一种情况:
cols <- 5:10
df$sex= as.factor(df$sex)
df %>% mutate(Myo=ifelse(sex == "F" & (rowSums(df[cols] > 1004, na.rm = TRUE) >=3) ,'Yes',
ifelse(sex == "M" & (rowSums(df[cols] > 986, na.rm = TRUE) >=3) ,'Yes','No')))-> df