我如何创建一个循环或函数来循环遍历包含变量的两个向量列表,以对响应进行 运行 KS 测试
How can I create a loop or function to cycle through a list of two vectors containing variables to run KS tests on the response
为了方便起见,我简化并概括了问题的代码。
所以我的问题(即 R 问题)是我正在尝试循环通过一组 Kolmogorov Smirnov ks.boot 测试,针对两个因素的多个级别。我需要对向量 df.test$names 每个级别的数据进行子集化(例如 W、X、Y 和 Z 代表物种名称),然后循环比较 df.test$ 每个级别之间的长度分布TSM.FACT(例如A、B、C等代表时间段)。
因此,对于 df.test$names 中的每个级别(例如 W、X、Y、Z),我需要比较它们在不同时间段 A 和 B 的长度分布;然后是 A 与 C,然后是 B 与 C,并将每个结果保存在数据框中;记录比较发生的地方。
#for ease create the data##
df.fact <- data.frame("A"=abs(rnorm(1000, mean = 350, sd=160)),"B"= abs(rpois(n = 1000, lambda = 50)), "C"=abs(rnorm(1000, mean = 200, sd=80)), names=rep(factor(LETTERS[23:26]), 1000))
library(reshape2)
df.test<-melt(df.fact, id.vars = "names", value.name = "Length2")
names(df.test)[names(df.test) =="variable"] <- "TSM.FACT"
names(df.test)[names(df.test) =="value"] <- "length2"
dfX <-subset(df.test, names == c("X"))
A <-subset(dfX , TSM.FACT == c("A"))
B <-subset(dfX , TSM.FACT == c("B"))
C <-subset(dfX , TSM.FACT == c("C"))
KS.XAB <- ks.boot(A$length2,B$length2, nboots=5000)
KS.XAC <- ks.boot(A$length2,C$length2, nboots=5000)
KS.XBC <- ks.boot(B$length2,C$length2, nboots=5000)
summary(KS.XAB)
summary(KS.XAC)
summary(KS.XBC)
dfY<-subset(df.test, names == c("Y"))
A <-subset(dfY , TSM.FACT == c("A"))
B <-subset(dfY , TSM.FACT == c("B"))
C <-subset(dfY , TSM.FACT == c("C"))
KS.YAB <- ks.boot(A$length2,B$length2, nboots=5000)
KS.YAC <- ks.boot(A$length2,C$length2, nboots=5000)
KS.YBC <- ks.boot(B$length2,C$length2, nboots=5000)
summary(KS.YAB)
summary(KS.YAC)
summary(KS.YBC)
#AND REPEAT FOR Z#
准备好所有的名称模式,TSM.FACT1,TSM.FACT2,你可以通过类似循环的方法轻松完成所有测试。
这是我的例子:
library(tibble); library(tidyr); library(dplyr); library(purrr)
# preparing all pattern
comb_d <- df.test %>%
as_tibble() %>% # conv to tibble
group_by(names) %>%
summarize(TSM.FACT = list(unique(as.character(TSM.FACT)))) %>% # get unique TSM.FACT as vector
mutate(comb_ = map(TSM.FACT,
~ {
.x %>% # calculate all combination by combn()
combn(2) %>% # output is row:2 x col:n matrix
t() %>%
as_tibble() # conv to row:n x col:2 tibble
})) %>%
dplyr::select(names, comb_) %>%
# unnest(names(.)) %>% # for tidyr v1.0.0
unnest() %>% # for tidyr under v1.0.0
set_names("names_", "TSM.FACT1", "TSM.FACT2") # chage colnames
# making data and do ks.test line by line
comb_d <- comb_d %>% # I typo the below line, sorry.
mutate(ks_res = pmap(list(names_, TSM.FACT1, TSM.FACT2), # making data sets using 3rows
function(names_, TSM.FACT1, TSM.FACT2){
temp <- df.test %>%
filter(names == names_) # LIKE subset(df.test, names == c("X"))
d1 <- temp %>%
filter(TSM.FACT == TSM.FACT1) %>% # LIKE subset(dfX , TSM.FACT == c("A"))
pull(Length2) # pull the col as vector
d2 <- temp %>%
filter(TSM.FACT == TSM.FACT2) %>%
pull(Length2)
Matching::ks.boot(d1, d2, nboots = 5000) # do ks.boot
}))
# you can access the result such like
comb_d$ks_res[[1]] %>%
summary()
# or
comb_d %>%
filter(names == "X", TSM.FACT1 == "A", TSM.FACT2 == "B") %>%
pluck("ks_res")
为了方便起见,我简化并概括了问题的代码。
所以我的问题(即 R 问题)是我正在尝试循环通过一组 Kolmogorov Smirnov ks.boot 测试,针对两个因素的多个级别。我需要对向量 df.test$names 每个级别的数据进行子集化(例如 W、X、Y 和 Z 代表物种名称),然后循环比较 df.test$ 每个级别之间的长度分布TSM.FACT(例如A、B、C等代表时间段)。
因此,对于 df.test$names 中的每个级别(例如 W、X、Y、Z),我需要比较它们在不同时间段 A 和 B 的长度分布;然后是 A 与 C,然后是 B 与 C,并将每个结果保存在数据框中;记录比较发生的地方。
#for ease create the data##
df.fact <- data.frame("A"=abs(rnorm(1000, mean = 350, sd=160)),"B"= abs(rpois(n = 1000, lambda = 50)), "C"=abs(rnorm(1000, mean = 200, sd=80)), names=rep(factor(LETTERS[23:26]), 1000))
library(reshape2)
df.test<-melt(df.fact, id.vars = "names", value.name = "Length2")
names(df.test)[names(df.test) =="variable"] <- "TSM.FACT"
names(df.test)[names(df.test) =="value"] <- "length2"
dfX <-subset(df.test, names == c("X"))
A <-subset(dfX , TSM.FACT == c("A"))
B <-subset(dfX , TSM.FACT == c("B"))
C <-subset(dfX , TSM.FACT == c("C"))
KS.XAB <- ks.boot(A$length2,B$length2, nboots=5000)
KS.XAC <- ks.boot(A$length2,C$length2, nboots=5000)
KS.XBC <- ks.boot(B$length2,C$length2, nboots=5000)
summary(KS.XAB)
summary(KS.XAC)
summary(KS.XBC)
dfY<-subset(df.test, names == c("Y"))
A <-subset(dfY , TSM.FACT == c("A"))
B <-subset(dfY , TSM.FACT == c("B"))
C <-subset(dfY , TSM.FACT == c("C"))
KS.YAB <- ks.boot(A$length2,B$length2, nboots=5000)
KS.YAC <- ks.boot(A$length2,C$length2, nboots=5000)
KS.YBC <- ks.boot(B$length2,C$length2, nboots=5000)
summary(KS.YAB)
summary(KS.YAC)
summary(KS.YBC)
#AND REPEAT FOR Z#
准备好所有的名称模式,TSM.FACT1,TSM.FACT2,你可以通过类似循环的方法轻松完成所有测试。
这是我的例子:
library(tibble); library(tidyr); library(dplyr); library(purrr)
# preparing all pattern
comb_d <- df.test %>%
as_tibble() %>% # conv to tibble
group_by(names) %>%
summarize(TSM.FACT = list(unique(as.character(TSM.FACT)))) %>% # get unique TSM.FACT as vector
mutate(comb_ = map(TSM.FACT,
~ {
.x %>% # calculate all combination by combn()
combn(2) %>% # output is row:2 x col:n matrix
t() %>%
as_tibble() # conv to row:n x col:2 tibble
})) %>%
dplyr::select(names, comb_) %>%
# unnest(names(.)) %>% # for tidyr v1.0.0
unnest() %>% # for tidyr under v1.0.0
set_names("names_", "TSM.FACT1", "TSM.FACT2") # chage colnames
# making data and do ks.test line by line
comb_d <- comb_d %>% # I typo the below line, sorry.
mutate(ks_res = pmap(list(names_, TSM.FACT1, TSM.FACT2), # making data sets using 3rows
function(names_, TSM.FACT1, TSM.FACT2){
temp <- df.test %>%
filter(names == names_) # LIKE subset(df.test, names == c("X"))
d1 <- temp %>%
filter(TSM.FACT == TSM.FACT1) %>% # LIKE subset(dfX , TSM.FACT == c("A"))
pull(Length2) # pull the col as vector
d2 <- temp %>%
filter(TSM.FACT == TSM.FACT2) %>%
pull(Length2)
Matching::ks.boot(d1, d2, nboots = 5000) # do ks.boot
}))
# you can access the result such like
comb_d$ks_res[[1]] %>%
summary()
# or
comb_d %>%
filter(names == "X", TSM.FACT1 == "A", TSM.FACT2 == "B") %>%
pluck("ks_res")