在数据框中找到一系列规则,并打破规则

find a sequence of rules in a dataframe, with break rules

我展示了我如何看待这个算法的实现,我把它分为两个步骤

第一步序列搜索

第二步检查休息规则

set.seed(123)
dat <- as.data.frame(matrix(sample(10,60,replace = T),ncol = 3))
colnames(dat) <- LETTERS[1:ncol(dat)]
dat

rule <- c("A==0","A==10 & B==4","C==9","A>10","B<0","C==0","A==5","A>10",
          "B<0","C==0","A==9 & B==9","A>10","B<0","A==10","A==7 & B==5")
action <- c("break","next","next",rep("break",3),"next",rep("break",3),
            "next",rep("break",3) ,"next")

rule <- cbind(rule,action)

我认为这可行 -

seq_rule <- function(dat, rule, res.only = TRUE) {
  value = rule$action
  rule <- rule$rule
  m <- with(dat, lapply(rule, function(r) eval(str2expression(r))))  
  fu <- function(x, y) {
    k <- which(y)
    ifelse(all(k <= x), NA, min(k[k > x]))
  }  
  idx <- Reduce(fu , m,init = 0, accumulate = TRUE)[-1]  
  if (!res.only) {
    idx <- na.omit(idx)
    fidx <- head(idx, length(rule))
    debug.vec <- replace(rep("no", nrow(dat)), fidx, rule[seq_along(fidx)])
    return(cbind(dat, debug.vec))
  }
  if(any(value[!is.na(idx)] == 'break')) return(FALSE)
  idx <- na.omit(idx)
  length(idx) >= length(rule)
}

这里有一些检查 -

rule <- data.frame(rule= c("A==9","B==4","C==4","A==4", "B==10","C==4") ,
                   action= c(rep("next",3),"break","break","next"))

seq_rule(dat = dat,rule = rule)
#[1] FALSE


rule <- data.frame(rule= c("C==9","B==3","C==4"),
                   action= c(rep("next",3)))
seq_rule(dat = dat,rule = rule)
#[1] TRUE

seq_rule(dat = dat,rule = rule, res.only = FALSE)
#    A  B  C debug.vec
#1   3  5  9      C==9
#2   3  3  3      B==3
#3  10  9  4      C==4
#4   2  9  1        no
#5   6  9  7        no
#6   5  3  5        no
#7   4  8 10        no
#8   6 10  7        no
#9   9  7  9        no
#10 10 10  9        no

rule <- data.frame(rule= c("C==9","B==3","C==4", "A == 1"),
                   action= c(rep("next",3), 'break'))
seq_rule(dat = dat,rule = rule)
#[1] FALSE

rule <- data.frame(rule= c("C==9","B==3","C==4", "A == 6"),
                   action= c(rep("next",3), 'break'))
seq_rule(dat = dat,rule = rule)
#[1] FALSE

由于你的问题逻辑有点复杂,我想一个简单的方法,例如使用loops,可能会更有效和可读。这是 seq_rule

的一个版本
seq_rule <- function(dat, rule, res.only = TRUE) {
  m <- with(dat, as.data.frame(sapply(rule$rule, function(r) eval(str2expression(r)))))
  rule_next <- with(rule, rule[action == "next"])
  m_next <- m[rule_next]
  idx <- na.omit(
    Reduce(
      function(x, y) {
        k <- which(y)
        ifelse(all(k <= x), NA, min(k[k > x]))
      }, m_next,
      init = 0, accumulate = TRUE
    )
  )[-1]
  fidx <- head(idx, length(rule_next))
  debug.vec <- replace(rep("no", nrow(dat)), fidx, rule_next[seq_along(fidx)])
  trgs <- do.call(
    rbind,
    Map(
      function(p, q) {
        u <- as.matrix(m[p, ][q[q %in% with(rule, rule[action == "break"])]])
        k <- which(u, arr.ind = TRUE)
        data.frame(breakRowID = row.names(u)[k[, "row"]], breakTrigger = colnames(u)[k[, "col"]])
      },
      split(1:nrow(dat), cut(1:nrow(dat), c(0, idx, Inf))),
      split.default(names(m), cumsum(rule$action != "break"))
    )
  )
  triggerBreaks <- replace(rep("no", nrow(dat)), debug.vec != "no", NA)
  if (!res.only) {
    cbind(dat, debug.vec, trigger.break = with(trgs, replace(triggerBreaks, as.numeric(breakRowID), breakTrigger)))
  } else {
    nrow(trgs) == 0
  }
}

你会看到

> seq_rule(dat = dat, rule = rule)
[1] FALSE

> seq_rule(dat = dat, rule = rule, res.only = FALSE)
    A  B  C    debug.vec trigger.break
1   3  9  2           no            no
2   3  3  1           no            no
3  10  4  9 A==10 & B==4          <NA>
4   2  1  9         C==9          <NA>
5   6  7  6           no            no
6   5  5  5         A==5          <NA>
7   4 10  9           no            no
8   6  7 10           no            no
9   9  9  4  A==9 & B==9          <NA>
10 10  9  6           no         A==10
11  5 10  8           no            no
12  3  7  6           no            no
13  9  5  6           no            no
14  9  7  7           no            no
15  9  5  1           no            no
16  3  6  6           no            no
17  8  9  2           no            no
18 10  2  1           no         A==10
19  7  5  2  A==7 & B==5          <NA>
20 10  8  4           no            no

非常感谢所有试图帮助我的人,以及无限的耐心.. 但是帮不了我,因为我自己也不完全明白我想要什么。我没有把问题分成几个部分单独问(应该这样),而是问了一个很难解释的大问题。

对此我感到非常非常抱歉。 这是我的答案,这是我最终想要得到的。

   seq_rule2 <- function(dat , rule ,res.only = TRUE){
  
  # This is a fast function written by Thomas here
  # 
  # as an answer to my earlier question. 
  # It takes the rules as a vector and looks for the sequence
  
  
  seq_rule <- function(dat, rule, res.only = TRUE) {
    m <- with(dat, lapply(rule, function(r) eval(str2expression(r))))
    fu <-  function(x, y) {
      k <- which(y)
      ifelse(all(k <= x), NA, min(k[k > x]))
    }
    idx <- na.omit(Reduce( fu, m,init = 0, accumulate = TRUE ))[-1]
    if (!res.only) {
      fidx <- head(idx, length(rule))
      debug.vec <- replace(rep("no", nrow(dat)), fidx, rule[seq_along(fidx)])
      return(cbind(dat, debug.vec))
    }
    length(idx) >= length(rule)
  }
  
  
  
  #if there is only one next rule, then there is no point in continuing to return the FALSE and finish completely
  if(  length(rule$rule[rule$action=="next"]) <= 1  )  return(FALSE)
  
  # STEP 1  
  # run seq_rule  
  yes.next.rule.seq <- seq_rule(dat = dat , rule = rule$rule[rule$action=="next"] , res.only = T)
  
  if(res.only==FALSE & yes.next.rule.seq==FALSE) {
    Next <- rep("no",nrow(dat)) 
    Break <- rep("no",nrow(dat)) 
    dat <- cbind(dat,Next=Next, Break=Break)
    return(dat)
  }
  if(res.only==TRUE & yes.next.rule.seq==FALSE)  return(FALSE)
  
  
  # if the seq_rule found the sequence (TRUE) but there are no "break rules" in the "rule",
  # then there is no point in searching for "break rules". Return TRUE and finish completely
  if( length(rule$rule[rule$action=="break"]) == 0  &  yes.next.rule.seq == TRUE) return(TRUE)
  
  # STEP 2
  #looking for break rules in the range between next rules
  
  if(yes.next.rule.seq){
    
    
    #get indices where the "next rules"  triggered in dat  
    deb.vec <- seq_rule(dat = dat , rule = rule$rule[rule$action=="next"] , res.only = F)[,"debug.vec"]
    idx.next.rules <- which(deb.vec!="no")
    
    
    #get indices where the "break rules"  triggered in dat  
    m <- with(dat, lapply(rule$rule[rule$action=="break"], function(r) eval(str2expression(r))))
    idx.break.rules <- unlist(lapply(m,which))
    
    
    # RES the final result is equal to TRUE, 
    # but if a "break rule" is found between the "next rules", 
    # then the RES will be false
    RES <- TRUE  
    
    
    # sliding window of two "next rules"  http://prntscr.com/1qhnzae
    for(i in 2:length(idx.next.rules)){
      temp.range <- idx.next.rules[  (i-1):i  ]
      # Check if there is any "break rule" index between the "next rule" indexes
      break.detect <- any(  idx.break.rules > temp.range[1]   &  idx.break.rules < temp.range[2] )
      if( break.detect )   RES <- FALSE ; break
    }
    
  }
  
  
  if(!res.only) {
    Next <- rep("no",nrow(dat)) ; Next[idx.next.rules] <- "yes"
    Break <- rep("no",nrow(dat)) ; Break[idx.break.rules] <- "yes"
    dat <- cbind(dat,Next=Next, Break=Break)
    return(dat)
  }
  return(RES)
}

要检查的数据

set.seed(963)
dat <- as.data.frame(matrix(sample(10,30,replace = T),ncol = 3))
colnames(dat) <- LETTERS[1:ncol(dat)]
rule <- cbind.data.frame(rule= c("A==9","B==4","C==4","A==4") ,
                         action= c("next","break","break","next"))
rule <- as.data.frame(rule,stringsAsFactors = F)
seq_rule2(dat = dat, rule = rule)
dat
rule

例如没有休息 set.seed(963) http://prntscr.com/1qhprxq

有中断 set.seed(930) http://prntscr.com/1qhpv2h