数据框按规则分类
Data frame classification according to a rule
我有两个数据框DF1
和DF2
,DF2
是一些multiple treatments和p
对应值的比较。我想根据 p
的重要性在 DF2
中创建一个变量 groups
,我的最终输出是:
#
trat sp1 sp2 sp3 sp4 groups
1 P12A 4.653732 2.490977 4.236323 5.382113 "P12AvsP12BvsP12CvsP12D"
2 P12A 5.009581 2.254713 4.604529 4.842553 "P12AvsP12BvsP12CvsP12D"
3 P12A 4.809242 2.391435 4.675318 4.732977 "P12AvsP12BvsP12CvsP12D"
4 P12A 5.053077 2.483129 4.561690 5.311215 "P12AvsP12BvsP12CvsP12D"
5 P12A 5.356384 2.745474 4.616074 5.114969 "P12AvsP12BvsP12CvsP12D"
6 P12A 5.186120 2.384487 4.401133 5.041926 "P12AvsP12BvsP12CvsP12D"
7 P12A 5.175057 2.443725 4.678268 4.931249 "P12AvsP12BvsP12CvsP12D"
8 P12A 4.942661 2.288388 4.796253 5.123081 "P12AvsP12BvsP12CvsP12D"
9 P12A 5.049273 2.518262 4.022213 5.334939 "P12AvsP12BvsP12CvsP12D"
10 P12A 5.301788 2.431241 4.778733 4.880814 "P12AvsP12BvsP12CvsP12D"
11 P12A 5.079584 2.403393 4.882675 4.625801 "P12AvsP12BvsP12CvsP12D"
12 P12A 4.644050 2.488449 4.712999 4.699644 "P12AvsP12BvsP12CvsP12D"
13 P12A 4.898727 2.293527 5.275999 4.898135 "P12AvsP12BvsP12CvsP12D"
14 P12A 5.248847 2.286491 4.927688 4.763063 "P12AvsP12BvsP12CvsP12D"
15 P12A 5.239704 2.529684 4.930668 5.212917 "P12AvsP12BvsP12CvsP12D"
16 P13A 5.229522 2.570899 4.781054 4.852892 "P13A"
17 P13A 4.962258 2.982522 5.000820 4.659382 "P13A"
18 P13A 4.694233 2.214868 5.066398 4.887591 "P13A"
19 P13A 4.782794 2.169470 4.968994 4.839169 "P13A"
20 P13A 4.739379 2.807208 4.967769 4.872314 "P13A"
21 P13A 4.724091 2.362886 5.074778 5.064301 "P13A"
22 P13A 5.111046 2.468498 5.004575 4.895111 "P13A"
23 P13A 4.948762 2.671930 4.712485 4.761943 "P13A"
24 P13A 5.418908 2.676300 4.892651 5.128098 "P13A"
25 P13A 4.967169 2.700370 5.212501 4.563933 "P13A"
26 P13A 4.950029 2.384279 4.946293 4.875622 "P13A"
27 P13A 5.013728 2.278861 4.845646 5.191396 "P13A"
#
也就是说,如果p
= 或> 0.05,我将加入由vs 分隔的处理级别("P12AvsP12BvsP12CvsP12D"),否则我重复级别的名称("P13A"), 按照我下面的 CRM 和我的功能不起作用,谢谢,
## Data frame 1
pares<-c("P12A vs P12B","P12A vs P12C","P12A vs P12D","P12A vs
P12E","P16A vs P12F",
"P18A vs P12G","P20A vs P12H","P21A vs P12I","P22A vs P13A","P30A vs
P13B","P33A vs P142",
"P34A vs p142","P35A vs P148","P35A vs p148")
p<-c(1,1,4.00E-04,1.00E-04,0.0272,1,0.0012,1,2.00E-04,0.0281,2.00E-04,1,4.00E-04,1)
DF1<-data.frame(pares,p)
head(DF1)
#Data frame 2
#Factor
trat <- gl(3, 15, labels = paste("P", 12:14, "A",sep=""))
# Response variable
set.seed(124)
sp <- cbind(c(rnorm(10, 5, 0.25), rnorm(35, 5, 0.25)), rnorm(45, 2.5,
0.25),
c(rnorm(10, 4.5, 0.25), rnorm(35, 5, 0.25)), rnorm(45, 5,
0.25))
colnames(sp) <- c("sp1", "sp2", "sp3", "sp4")
DF2<-data.frame(trat,sp)
## Classification
# Function tentative
if (DF1$p>=0.05) {
DF2$groups = DF1$pares
} else {
DF2$groups = DF1$trat
}
head(DF2)
#
一种方法是迭代每个处理,过滤掉高于阈值的处理之间的比较。有了 treatment => valid_pairs 的映射,很容易创建组列。
下面是注释代码:
combine_pairs <- function(trat, threshold = 0.05){
#find every pair related to trat (it can be "trat vs X" or "X vs trat")
#this can be expressed using regular expressions as "(^trat vs|vs trat$)"
pares <- grepl(paste0("(^", trat, " vs|vs ", trat, "$)"), DF1$pares)
#among those pairs find the names of the ones that are above the threshold
matches <- DF1$pares[pares & DF1$p >= threshold]
if (length(matches) >= 1){
#clean up string for output, removing the "vs" and the trat name in each pair
trat_names <- gsub(paste0("( vs |", trat, ")"), "", matches)
groups <- paste0(trat, "vs", paste(trat_names, collapse = "vs"))
}else{
groups <- trat #what to show when no comparison is >= threshold
}
groups
}
#create a named list with the correct groups value for each treatment
groups <- setNames(lapply(levels(DF2$trat), combine_pairs), levels(DF2$trat))
DF2$groups <- groups[DF2$trat]
输出与您的示例略有不同(P12D 未列出用于处理 P12A)。在您的示例中,输入这对(P12A 与 P12D)的 p 值为 0.0004。
我有两个数据框DF1
和DF2
,DF2
是一些multiple treatments和p
对应值的比较。我想根据 p
的重要性在 DF2
中创建一个变量 groups
,我的最终输出是:
#
trat sp1 sp2 sp3 sp4 groups
1 P12A 4.653732 2.490977 4.236323 5.382113 "P12AvsP12BvsP12CvsP12D"
2 P12A 5.009581 2.254713 4.604529 4.842553 "P12AvsP12BvsP12CvsP12D"
3 P12A 4.809242 2.391435 4.675318 4.732977 "P12AvsP12BvsP12CvsP12D"
4 P12A 5.053077 2.483129 4.561690 5.311215 "P12AvsP12BvsP12CvsP12D"
5 P12A 5.356384 2.745474 4.616074 5.114969 "P12AvsP12BvsP12CvsP12D"
6 P12A 5.186120 2.384487 4.401133 5.041926 "P12AvsP12BvsP12CvsP12D"
7 P12A 5.175057 2.443725 4.678268 4.931249 "P12AvsP12BvsP12CvsP12D"
8 P12A 4.942661 2.288388 4.796253 5.123081 "P12AvsP12BvsP12CvsP12D"
9 P12A 5.049273 2.518262 4.022213 5.334939 "P12AvsP12BvsP12CvsP12D"
10 P12A 5.301788 2.431241 4.778733 4.880814 "P12AvsP12BvsP12CvsP12D"
11 P12A 5.079584 2.403393 4.882675 4.625801 "P12AvsP12BvsP12CvsP12D"
12 P12A 4.644050 2.488449 4.712999 4.699644 "P12AvsP12BvsP12CvsP12D"
13 P12A 4.898727 2.293527 5.275999 4.898135 "P12AvsP12BvsP12CvsP12D"
14 P12A 5.248847 2.286491 4.927688 4.763063 "P12AvsP12BvsP12CvsP12D"
15 P12A 5.239704 2.529684 4.930668 5.212917 "P12AvsP12BvsP12CvsP12D"
16 P13A 5.229522 2.570899 4.781054 4.852892 "P13A"
17 P13A 4.962258 2.982522 5.000820 4.659382 "P13A"
18 P13A 4.694233 2.214868 5.066398 4.887591 "P13A"
19 P13A 4.782794 2.169470 4.968994 4.839169 "P13A"
20 P13A 4.739379 2.807208 4.967769 4.872314 "P13A"
21 P13A 4.724091 2.362886 5.074778 5.064301 "P13A"
22 P13A 5.111046 2.468498 5.004575 4.895111 "P13A"
23 P13A 4.948762 2.671930 4.712485 4.761943 "P13A"
24 P13A 5.418908 2.676300 4.892651 5.128098 "P13A"
25 P13A 4.967169 2.700370 5.212501 4.563933 "P13A"
26 P13A 4.950029 2.384279 4.946293 4.875622 "P13A"
27 P13A 5.013728 2.278861 4.845646 5.191396 "P13A"
#
也就是说,如果p
= 或> 0.05,我将加入由vs 分隔的处理级别("P12AvsP12BvsP12CvsP12D"),否则我重复级别的名称("P13A"), 按照我下面的 CRM 和我的功能不起作用,谢谢,
## Data frame 1
pares<-c("P12A vs P12B","P12A vs P12C","P12A vs P12D","P12A vs
P12E","P16A vs P12F",
"P18A vs P12G","P20A vs P12H","P21A vs P12I","P22A vs P13A","P30A vs
P13B","P33A vs P142",
"P34A vs p142","P35A vs P148","P35A vs p148")
p<-c(1,1,4.00E-04,1.00E-04,0.0272,1,0.0012,1,2.00E-04,0.0281,2.00E-04,1,4.00E-04,1)
DF1<-data.frame(pares,p)
head(DF1)
#Data frame 2
#Factor
trat <- gl(3, 15, labels = paste("P", 12:14, "A",sep=""))
# Response variable
set.seed(124)
sp <- cbind(c(rnorm(10, 5, 0.25), rnorm(35, 5, 0.25)), rnorm(45, 2.5,
0.25),
c(rnorm(10, 4.5, 0.25), rnorm(35, 5, 0.25)), rnorm(45, 5,
0.25))
colnames(sp) <- c("sp1", "sp2", "sp3", "sp4")
DF2<-data.frame(trat,sp)
## Classification
# Function tentative
if (DF1$p>=0.05) {
DF2$groups = DF1$pares
} else {
DF2$groups = DF1$trat
}
head(DF2)
#
一种方法是迭代每个处理,过滤掉高于阈值的处理之间的比较。有了 treatment => valid_pairs 的映射,很容易创建组列。 下面是注释代码:
combine_pairs <- function(trat, threshold = 0.05){
#find every pair related to trat (it can be "trat vs X" or "X vs trat")
#this can be expressed using regular expressions as "(^trat vs|vs trat$)"
pares <- grepl(paste0("(^", trat, " vs|vs ", trat, "$)"), DF1$pares)
#among those pairs find the names of the ones that are above the threshold
matches <- DF1$pares[pares & DF1$p >= threshold]
if (length(matches) >= 1){
#clean up string for output, removing the "vs" and the trat name in each pair
trat_names <- gsub(paste0("( vs |", trat, ")"), "", matches)
groups <- paste0(trat, "vs", paste(trat_names, collapse = "vs"))
}else{
groups <- trat #what to show when no comparison is >= threshold
}
groups
}
#create a named list with the correct groups value for each treatment
groups <- setNames(lapply(levels(DF2$trat), combine_pairs), levels(DF2$trat))
DF2$groups <- groups[DF2$trat]
输出与您的示例略有不同(P12D 未列出用于处理 P12A)。在您的示例中,输入这对(P12A 与 P12D)的 p 值为 0.0004。