在数据框中找到一系列规则,并打破规则
find a sequence of rules in a dataframe, with break rules
我展示了我如何看待这个算法的实现,我把它分为两个步骤
第一步序列搜索
第二步检查休息规则
set.seed(123)
dat <- as.data.frame(matrix(sample(10,60,replace = T),ncol = 3))
colnames(dat) <- LETTERS[1:ncol(dat)]
dat
rule <- c("A==0","A==10 & B==4","C==9","A>10","B<0","C==0","A==5","A>10",
"B<0","C==0","A==9 & B==9","A>10","B<0","A==10","A==7 & B==5")
action <- c("break","next","next",rep("break",3),"next",rep("break",3),
"next",rep("break",3) ,"next")
rule <- cbind(rule,action)
我认为这可行 -
seq_rule <- function(dat, rule, res.only = TRUE) {
value = rule$action
rule <- rule$rule
m <- with(dat, lapply(rule, function(r) eval(str2expression(r))))
fu <- function(x, y) {
k <- which(y)
ifelse(all(k <= x), NA, min(k[k > x]))
}
idx <- Reduce(fu , m,init = 0, accumulate = TRUE)[-1]
if (!res.only) {
idx <- na.omit(idx)
fidx <- head(idx, length(rule))
debug.vec <- replace(rep("no", nrow(dat)), fidx, rule[seq_along(fidx)])
return(cbind(dat, debug.vec))
}
if(any(value[!is.na(idx)] == 'break')) return(FALSE)
idx <- na.omit(idx)
length(idx) >= length(rule)
}
这里有一些检查 -
rule <- data.frame(rule= c("A==9","B==4","C==4","A==4", "B==10","C==4") ,
action= c(rep("next",3),"break","break","next"))
seq_rule(dat = dat,rule = rule)
#[1] FALSE
rule <- data.frame(rule= c("C==9","B==3","C==4"),
action= c(rep("next",3)))
seq_rule(dat = dat,rule = rule)
#[1] TRUE
seq_rule(dat = dat,rule = rule, res.only = FALSE)
# A B C debug.vec
#1 3 5 9 C==9
#2 3 3 3 B==3
#3 10 9 4 C==4
#4 2 9 1 no
#5 6 9 7 no
#6 5 3 5 no
#7 4 8 10 no
#8 6 10 7 no
#9 9 7 9 no
#10 10 10 9 no
rule <- data.frame(rule= c("C==9","B==3","C==4", "A == 1"),
action= c(rep("next",3), 'break'))
seq_rule(dat = dat,rule = rule)
#[1] FALSE
rule <- data.frame(rule= c("C==9","B==3","C==4", "A == 6"),
action= c(rep("next",3), 'break'))
seq_rule(dat = dat,rule = rule)
#[1] FALSE
由于你的问题逻辑有点复杂,我想一个简单的方法,例如使用loops
,可能会更有效和可读。这是 seq_rule
的一个版本
seq_rule <- function(dat, rule, res.only = TRUE) {
m <- with(dat, as.data.frame(sapply(rule$rule, function(r) eval(str2expression(r)))))
rule_next <- with(rule, rule[action == "next"])
m_next <- m[rule_next]
idx <- na.omit(
Reduce(
function(x, y) {
k <- which(y)
ifelse(all(k <= x), NA, min(k[k > x]))
}, m_next,
init = 0, accumulate = TRUE
)
)[-1]
fidx <- head(idx, length(rule_next))
debug.vec <- replace(rep("no", nrow(dat)), fidx, rule_next[seq_along(fidx)])
trgs <- do.call(
rbind,
Map(
function(p, q) {
u <- as.matrix(m[p, ][q[q %in% with(rule, rule[action == "break"])]])
k <- which(u, arr.ind = TRUE)
data.frame(breakRowID = row.names(u)[k[, "row"]], breakTrigger = colnames(u)[k[, "col"]])
},
split(1:nrow(dat), cut(1:nrow(dat), c(0, idx, Inf))),
split.default(names(m), cumsum(rule$action != "break"))
)
)
triggerBreaks <- replace(rep("no", nrow(dat)), debug.vec != "no", NA)
if (!res.only) {
cbind(dat, debug.vec, trigger.break = with(trgs, replace(triggerBreaks, as.numeric(breakRowID), breakTrigger)))
} else {
nrow(trgs) == 0
}
}
你会看到
> seq_rule(dat = dat, rule = rule)
[1] FALSE
> seq_rule(dat = dat, rule = rule, res.only = FALSE)
A B C debug.vec trigger.break
1 3 9 2 no no
2 3 3 1 no no
3 10 4 9 A==10 & B==4 <NA>
4 2 1 9 C==9 <NA>
5 6 7 6 no no
6 5 5 5 A==5 <NA>
7 4 10 9 no no
8 6 7 10 no no
9 9 9 4 A==9 & B==9 <NA>
10 10 9 6 no A==10
11 5 10 8 no no
12 3 7 6 no no
13 9 5 6 no no
14 9 7 7 no no
15 9 5 1 no no
16 3 6 6 no no
17 8 9 2 no no
18 10 2 1 no A==10
19 7 5 2 A==7 & B==5 <NA>
20 10 8 4 no no
非常感谢所有试图帮助我的人,以及无限的耐心..
但是帮不了我,因为我自己也不完全明白我想要什么。我没有把问题分成几个部分单独问(应该这样),而是问了一个很难解释的大问题。
对此我感到非常非常抱歉。
这是我的答案,这是我最终想要得到的。
seq_rule2 <- function(dat , rule ,res.only = TRUE){
# This is a fast function written by Thomas here
#
# as an answer to my earlier question.
# It takes the rules as a vector and looks for the sequence
seq_rule <- function(dat, rule, res.only = TRUE) {
m <- with(dat, lapply(rule, function(r) eval(str2expression(r))))
fu <- function(x, y) {
k <- which(y)
ifelse(all(k <= x), NA, min(k[k > x]))
}
idx <- na.omit(Reduce( fu, m,init = 0, accumulate = TRUE ))[-1]
if (!res.only) {
fidx <- head(idx, length(rule))
debug.vec <- replace(rep("no", nrow(dat)), fidx, rule[seq_along(fidx)])
return(cbind(dat, debug.vec))
}
length(idx) >= length(rule)
}
#if there is only one next rule, then there is no point in continuing to return the FALSE and finish completely
if( length(rule$rule[rule$action=="next"]) <= 1 ) return(FALSE)
# STEP 1
# run seq_rule
yes.next.rule.seq <- seq_rule(dat = dat , rule = rule$rule[rule$action=="next"] , res.only = T)
if(res.only==FALSE & yes.next.rule.seq==FALSE) {
Next <- rep("no",nrow(dat))
Break <- rep("no",nrow(dat))
dat <- cbind(dat,Next=Next, Break=Break)
return(dat)
}
if(res.only==TRUE & yes.next.rule.seq==FALSE) return(FALSE)
# if the seq_rule found the sequence (TRUE) but there are no "break rules" in the "rule",
# then there is no point in searching for "break rules". Return TRUE and finish completely
if( length(rule$rule[rule$action=="break"]) == 0 & yes.next.rule.seq == TRUE) return(TRUE)
# STEP 2
#looking for break rules in the range between next rules
if(yes.next.rule.seq){
#get indices where the "next rules" triggered in dat
deb.vec <- seq_rule(dat = dat , rule = rule$rule[rule$action=="next"] , res.only = F)[,"debug.vec"]
idx.next.rules <- which(deb.vec!="no")
#get indices where the "break rules" triggered in dat
m <- with(dat, lapply(rule$rule[rule$action=="break"], function(r) eval(str2expression(r))))
idx.break.rules <- unlist(lapply(m,which))
# RES the final result is equal to TRUE,
# but if a "break rule" is found between the "next rules",
# then the RES will be false
RES <- TRUE
# sliding window of two "next rules" http://prntscr.com/1qhnzae
for(i in 2:length(idx.next.rules)){
temp.range <- idx.next.rules[ (i-1):i ]
# Check if there is any "break rule" index between the "next rule" indexes
break.detect <- any( idx.break.rules > temp.range[1] & idx.break.rules < temp.range[2] )
if( break.detect ) RES <- FALSE ; break
}
}
if(!res.only) {
Next <- rep("no",nrow(dat)) ; Next[idx.next.rules] <- "yes"
Break <- rep("no",nrow(dat)) ; Break[idx.break.rules] <- "yes"
dat <- cbind(dat,Next=Next, Break=Break)
return(dat)
}
return(RES)
}
要检查的数据
set.seed(963)
dat <- as.data.frame(matrix(sample(10,30,replace = T),ncol = 3))
colnames(dat) <- LETTERS[1:ncol(dat)]
rule <- cbind.data.frame(rule= c("A==9","B==4","C==4","A==4") ,
action= c("next","break","break","next"))
rule <- as.data.frame(rule,stringsAsFactors = F)
seq_rule2(dat = dat, rule = rule)
dat
rule
例如没有休息 set.seed(963)
http://prntscr.com/1qhprxq
有中断 set.seed(930)
http://prntscr.com/1qhpv2h
我展示了我如何看待这个算法的实现,我把它分为两个步骤
第一步序列搜索
第二步检查休息规则
set.seed(123)
dat <- as.data.frame(matrix(sample(10,60,replace = T),ncol = 3))
colnames(dat) <- LETTERS[1:ncol(dat)]
dat
rule <- c("A==0","A==10 & B==4","C==9","A>10","B<0","C==0","A==5","A>10",
"B<0","C==0","A==9 & B==9","A>10","B<0","A==10","A==7 & B==5")
action <- c("break","next","next",rep("break",3),"next",rep("break",3),
"next",rep("break",3) ,"next")
rule <- cbind(rule,action)
我认为这可行 -
seq_rule <- function(dat, rule, res.only = TRUE) {
value = rule$action
rule <- rule$rule
m <- with(dat, lapply(rule, function(r) eval(str2expression(r))))
fu <- function(x, y) {
k <- which(y)
ifelse(all(k <= x), NA, min(k[k > x]))
}
idx <- Reduce(fu , m,init = 0, accumulate = TRUE)[-1]
if (!res.only) {
idx <- na.omit(idx)
fidx <- head(idx, length(rule))
debug.vec <- replace(rep("no", nrow(dat)), fidx, rule[seq_along(fidx)])
return(cbind(dat, debug.vec))
}
if(any(value[!is.na(idx)] == 'break')) return(FALSE)
idx <- na.omit(idx)
length(idx) >= length(rule)
}
这里有一些检查 -
rule <- data.frame(rule= c("A==9","B==4","C==4","A==4", "B==10","C==4") ,
action= c(rep("next",3),"break","break","next"))
seq_rule(dat = dat,rule = rule)
#[1] FALSE
rule <- data.frame(rule= c("C==9","B==3","C==4"),
action= c(rep("next",3)))
seq_rule(dat = dat,rule = rule)
#[1] TRUE
seq_rule(dat = dat,rule = rule, res.only = FALSE)
# A B C debug.vec
#1 3 5 9 C==9
#2 3 3 3 B==3
#3 10 9 4 C==4
#4 2 9 1 no
#5 6 9 7 no
#6 5 3 5 no
#7 4 8 10 no
#8 6 10 7 no
#9 9 7 9 no
#10 10 10 9 no
rule <- data.frame(rule= c("C==9","B==3","C==4", "A == 1"),
action= c(rep("next",3), 'break'))
seq_rule(dat = dat,rule = rule)
#[1] FALSE
rule <- data.frame(rule= c("C==9","B==3","C==4", "A == 6"),
action= c(rep("next",3), 'break'))
seq_rule(dat = dat,rule = rule)
#[1] FALSE
由于你的问题逻辑有点复杂,我想一个简单的方法,例如使用loops
,可能会更有效和可读。这是 seq_rule
seq_rule <- function(dat, rule, res.only = TRUE) {
m <- with(dat, as.data.frame(sapply(rule$rule, function(r) eval(str2expression(r)))))
rule_next <- with(rule, rule[action == "next"])
m_next <- m[rule_next]
idx <- na.omit(
Reduce(
function(x, y) {
k <- which(y)
ifelse(all(k <= x), NA, min(k[k > x]))
}, m_next,
init = 0, accumulate = TRUE
)
)[-1]
fidx <- head(idx, length(rule_next))
debug.vec <- replace(rep("no", nrow(dat)), fidx, rule_next[seq_along(fidx)])
trgs <- do.call(
rbind,
Map(
function(p, q) {
u <- as.matrix(m[p, ][q[q %in% with(rule, rule[action == "break"])]])
k <- which(u, arr.ind = TRUE)
data.frame(breakRowID = row.names(u)[k[, "row"]], breakTrigger = colnames(u)[k[, "col"]])
},
split(1:nrow(dat), cut(1:nrow(dat), c(0, idx, Inf))),
split.default(names(m), cumsum(rule$action != "break"))
)
)
triggerBreaks <- replace(rep("no", nrow(dat)), debug.vec != "no", NA)
if (!res.only) {
cbind(dat, debug.vec, trigger.break = with(trgs, replace(triggerBreaks, as.numeric(breakRowID), breakTrigger)))
} else {
nrow(trgs) == 0
}
}
你会看到
> seq_rule(dat = dat, rule = rule)
[1] FALSE
> seq_rule(dat = dat, rule = rule, res.only = FALSE)
A B C debug.vec trigger.break
1 3 9 2 no no
2 3 3 1 no no
3 10 4 9 A==10 & B==4 <NA>
4 2 1 9 C==9 <NA>
5 6 7 6 no no
6 5 5 5 A==5 <NA>
7 4 10 9 no no
8 6 7 10 no no
9 9 9 4 A==9 & B==9 <NA>
10 10 9 6 no A==10
11 5 10 8 no no
12 3 7 6 no no
13 9 5 6 no no
14 9 7 7 no no
15 9 5 1 no no
16 3 6 6 no no
17 8 9 2 no no
18 10 2 1 no A==10
19 7 5 2 A==7 & B==5 <NA>
20 10 8 4 no no
非常感谢所有试图帮助我的人,以及无限的耐心.. 但是帮不了我,因为我自己也不完全明白我想要什么。我没有把问题分成几个部分单独问(应该这样),而是问了一个很难解释的大问题。
对此我感到非常非常抱歉。 这是我的答案,这是我最终想要得到的。
seq_rule2 <- function(dat , rule ,res.only = TRUE){
# This is a fast function written by Thomas here
#
# as an answer to my earlier question.
# It takes the rules as a vector and looks for the sequence
seq_rule <- function(dat, rule, res.only = TRUE) {
m <- with(dat, lapply(rule, function(r) eval(str2expression(r))))
fu <- function(x, y) {
k <- which(y)
ifelse(all(k <= x), NA, min(k[k > x]))
}
idx <- na.omit(Reduce( fu, m,init = 0, accumulate = TRUE ))[-1]
if (!res.only) {
fidx <- head(idx, length(rule))
debug.vec <- replace(rep("no", nrow(dat)), fidx, rule[seq_along(fidx)])
return(cbind(dat, debug.vec))
}
length(idx) >= length(rule)
}
#if there is only one next rule, then there is no point in continuing to return the FALSE and finish completely
if( length(rule$rule[rule$action=="next"]) <= 1 ) return(FALSE)
# STEP 1
# run seq_rule
yes.next.rule.seq <- seq_rule(dat = dat , rule = rule$rule[rule$action=="next"] , res.only = T)
if(res.only==FALSE & yes.next.rule.seq==FALSE) {
Next <- rep("no",nrow(dat))
Break <- rep("no",nrow(dat))
dat <- cbind(dat,Next=Next, Break=Break)
return(dat)
}
if(res.only==TRUE & yes.next.rule.seq==FALSE) return(FALSE)
# if the seq_rule found the sequence (TRUE) but there are no "break rules" in the "rule",
# then there is no point in searching for "break rules". Return TRUE and finish completely
if( length(rule$rule[rule$action=="break"]) == 0 & yes.next.rule.seq == TRUE) return(TRUE)
# STEP 2
#looking for break rules in the range between next rules
if(yes.next.rule.seq){
#get indices where the "next rules" triggered in dat
deb.vec <- seq_rule(dat = dat , rule = rule$rule[rule$action=="next"] , res.only = F)[,"debug.vec"]
idx.next.rules <- which(deb.vec!="no")
#get indices where the "break rules" triggered in dat
m <- with(dat, lapply(rule$rule[rule$action=="break"], function(r) eval(str2expression(r))))
idx.break.rules <- unlist(lapply(m,which))
# RES the final result is equal to TRUE,
# but if a "break rule" is found between the "next rules",
# then the RES will be false
RES <- TRUE
# sliding window of two "next rules" http://prntscr.com/1qhnzae
for(i in 2:length(idx.next.rules)){
temp.range <- idx.next.rules[ (i-1):i ]
# Check if there is any "break rule" index between the "next rule" indexes
break.detect <- any( idx.break.rules > temp.range[1] & idx.break.rules < temp.range[2] )
if( break.detect ) RES <- FALSE ; break
}
}
if(!res.only) {
Next <- rep("no",nrow(dat)) ; Next[idx.next.rules] <- "yes"
Break <- rep("no",nrow(dat)) ; Break[idx.break.rules] <- "yes"
dat <- cbind(dat,Next=Next, Break=Break)
return(dat)
}
return(RES)
}
要检查的数据
set.seed(963)
dat <- as.data.frame(matrix(sample(10,30,replace = T),ncol = 3))
colnames(dat) <- LETTERS[1:ncol(dat)]
rule <- cbind.data.frame(rule= c("A==9","B==4","C==4","A==4") ,
action= c("next","break","break","next"))
rule <- as.data.frame(rule,stringsAsFactors = F)
seq_rule2(dat = dat, rule = rule)
dat
rule
例如没有休息 set.seed(963)
http://prntscr.com/1qhprxq
有中断 set.seed(930)
http://prntscr.com/1qhpv2h