如何使用dplyr有条件地计算出现次数?

How to conditionally count the number of occurrences using dplyr?

在下面的可重现示例中,用户根据 (A) 时间点和 (B) 两列值中的哪一列的输入对数据框的元素进行分层。

在用户选择对“Values_2”元素进行分层的情况下,我试图仅计算“标志”列中标有“N”的那些行;否则,当用户选择对“Values_1”元素进行分层时,Flag 列将被忽略。

除了在选择“Values_2”时计算标记为“N”的元素外,以下代码有效。我注释掉了我对标记为“N”的元素进行计数的尝试...因为它会产生无意义的结果。

我也只会对标记为“N”的“Value_2”元素求和,但我可以算出一个。

底部的图片更好地解释了这个问题。

有什么建议吗?

library(shiny)
library(tidyverse)
library(shinyWidgets)

ui <-
  fluidPage(
    uiOutput("stratPeriod"),
    uiOutput("stratValues"),
    h5(strong("Raw data frame:")), tableOutput("rawData"),
    h5(strong("Stratified data:")), tableOutput("stratData")
  )

server <- function(input, output, session) {

  dat <- reactive({
    data.frame(
      ID = c(1,1,2,2,2,2,3,3,3,3),
      Period = c("2020-03", "2020-04", "2020-01", "2020-02", "2020-03", "2020-04", "2020-01", "2020-02", "2020-03", "2020-04"),
      Values_1 = c(-6, 26, 36, 46, 56, 86, 100, 10, 20, 30),
      Values_2 = c(-6, 13, 18, 46, 28, 43, 100, 10, 10, 30),
      Flag = c("N","Y","Y","N","Y","Y","N","N","Y","N")
    )  
  })
  
  output$stratPeriod <- renderUI({
    chc <- unique(na.omit(dat()[[2]]))
    selectInput(inputId = "stratPeriod", 
                label = "Choose point-in-time:",
                choices = chc,
                selected = chc[1])
  })
  
  output$stratValues <- renderUI({
    selectInput("stratValues", 
                "Choose values type to sum:", 
                choices = c("Values_1","Values_2"),  
                selected = c("Values_1")
    )
  })
  
  output$rawData <- renderTable({dat()})
  
  output$stratData <- renderTable({
    req(input$stratValues)
    req(input$stratPeriod)
    
    filter_exp1 <- parse(text=paste0("Period",  "==", "'",input$stratPeriod, "'"))
    
    dat_1 <- reactive({dat() %>% filter(eval(filter_exp1))})
    
    min <- custom_min(dat_1()[[input$stratValues]])
    max <- custom_max(dat_1()[[input$stratValues]])
    breaks <- if(any(is.infinite(c(min,max)))) c(0, 10) else seq(min, max, length.out = 6) 
    
    tmp <- dat() %>% 
      filter(eval(filter_exp1)) %>%
      mutate(Range = cut(!!sym(input$stratValues), breaks=breaks, include.lowest=TRUE, right = TRUE, dig.lab = 5)) %>% 
      group_by(Range) %>% 
      summarise(Count = n(),Values = sum(!!sym(input$stratValues))) %>%
      # summarise(Count = if_else(!!sym(input$stratValues) == "Values_1",n(),sum(dat()[[5]]=="N")),Values = sum(!!sym(input$stratValues))) %>%
      complete(Range, fill = list(Count = 0,Values = 0)) %>% 
      ungroup %>% 
      mutate(Count_pct = Count/sum(Count)*100, Values_pct = Values/sum(Values)*100) %>% 
      dplyr::select(everything(), Count, Count_pct, Values, Values_pct) %>%
      bind_rows(summarise_all(., ~(if(is.numeric(.)) sum(.) else "Total")))
    tmp
  })

}

shinyApp(ui, server)

我认为最简单的方法可能是将 renderTable() 函数更改为以下内容:

output$stratData <- renderTable({
    req(input$stratValues)
    req(input$stratPeriod)
    
    filter_exp1 <- parse(text=paste0("Period",  "==", "'",input$stratPeriod, "'"))
    
    dat_1 <- reactive({dat() %>% filter(eval(filter_exp1))})
    min <- custom_min(dat_1()[[input$stratValues]])
    max <- custom_max(dat_1()[[input$stratValues]])
    breaks <- if(any(is.infinite(c(min,max)))) c(0, 10) else seq(min, max, length.out = 6) 
    
    tmp <- dat() %>% 
      filter(eval(filter_exp1)) %>%
      mutate(Range = cut(!!sym(input$stratValues), breaks=breaks, include.lowest=TRUE, right = TRUE, dig.lab = 5)) %>% 
      group_by(Range) 
    if(input$stratValues == "Values_2"){
      tmp <- tmp %>% 
        filter(Flag == "N") 
    }
    tmp <- tmp %>% 
      summarise(Count = n(),Values = sum(!!sym(input$stratValues))) %>% 
      complete(Range, fill = list(Count = 0,Values = 0)) %>% 
      ungroup %>% 
      mutate(Count_pct = Count/sum(Count)*100, Values_pct = Values/sum(Values)*100) %>% 
      dplyr::select(everything(), Count, Count_pct, Values, Values_pct) %>%
      bind_rows(summarise_all(., ~(if(is.numeric(.)) sum(.) else "Total")))
    tmp
  })

在上面的代码中,有一个if()条件来标识stratValues是否为Values_2。如果是这样,它将过滤数据以仅包括 Flag 上的 "N" 观察结果。然后,它继续进行其余的分析。如果 Values and Countare calculated only on the observations whereFlag == "N"`.

这将起作用