如何使用dplyr有条件地计算出现次数?
How to conditionally count the number of occurrences using dplyr?
在下面的可重现示例中,用户根据 (A) 时间点和 (B) 两列值中的哪一列的输入对数据框的元素进行分层。
在用户选择对“Values_2”元素进行分层的情况下,我试图仅计算“标志”列中标有“N”的那些行;否则,当用户选择对“Values_1”元素进行分层时,Flag 列将被忽略。
除了在选择“Values_2”时计算标记为“N”的元素外,以下代码有效。我注释掉了我对标记为“N”的元素进行计数的尝试...因为它会产生无意义的结果。
我也只会对标记为“N”的“Value_2”元素求和,但我可以算出一个。
底部的图片更好地解释了这个问题。
有什么建议吗?
library(shiny)
library(tidyverse)
library(shinyWidgets)
ui <-
fluidPage(
uiOutput("stratPeriod"),
uiOutput("stratValues"),
h5(strong("Raw data frame:")), tableOutput("rawData"),
h5(strong("Stratified data:")), tableOutput("stratData")
)
server <- function(input, output, session) {
dat <- reactive({
data.frame(
ID = c(1,1,2,2,2,2,3,3,3,3),
Period = c("2020-03", "2020-04", "2020-01", "2020-02", "2020-03", "2020-04", "2020-01", "2020-02", "2020-03", "2020-04"),
Values_1 = c(-6, 26, 36, 46, 56, 86, 100, 10, 20, 30),
Values_2 = c(-6, 13, 18, 46, 28, 43, 100, 10, 10, 30),
Flag = c("N","Y","Y","N","Y","Y","N","N","Y","N")
)
})
output$stratPeriod <- renderUI({
chc <- unique(na.omit(dat()[[2]]))
selectInput(inputId = "stratPeriod",
label = "Choose point-in-time:",
choices = chc,
selected = chc[1])
})
output$stratValues <- renderUI({
selectInput("stratValues",
"Choose values type to sum:",
choices = c("Values_1","Values_2"),
selected = c("Values_1")
)
})
output$rawData <- renderTable({dat()})
output$stratData <- renderTable({
req(input$stratValues)
req(input$stratPeriod)
filter_exp1 <- parse(text=paste0("Period", "==", "'",input$stratPeriod, "'"))
dat_1 <- reactive({dat() %>% filter(eval(filter_exp1))})
min <- custom_min(dat_1()[[input$stratValues]])
max <- custom_max(dat_1()[[input$stratValues]])
breaks <- if(any(is.infinite(c(min,max)))) c(0, 10) else seq(min, max, length.out = 6)
tmp <- dat() %>%
filter(eval(filter_exp1)) %>%
mutate(Range = cut(!!sym(input$stratValues), breaks=breaks, include.lowest=TRUE, right = TRUE, dig.lab = 5)) %>%
group_by(Range) %>%
summarise(Count = n(),Values = sum(!!sym(input$stratValues))) %>%
# summarise(Count = if_else(!!sym(input$stratValues) == "Values_1",n(),sum(dat()[[5]]=="N")),Values = sum(!!sym(input$stratValues))) %>%
complete(Range, fill = list(Count = 0,Values = 0)) %>%
ungroup %>%
mutate(Count_pct = Count/sum(Count)*100, Values_pct = Values/sum(Values)*100) %>%
dplyr::select(everything(), Count, Count_pct, Values, Values_pct) %>%
bind_rows(summarise_all(., ~(if(is.numeric(.)) sum(.) else "Total")))
tmp
})
}
shinyApp(ui, server)
我认为最简单的方法可能是将 renderTable()
函数更改为以下内容:
output$stratData <- renderTable({
req(input$stratValues)
req(input$stratPeriod)
filter_exp1 <- parse(text=paste0("Period", "==", "'",input$stratPeriod, "'"))
dat_1 <- reactive({dat() %>% filter(eval(filter_exp1))})
min <- custom_min(dat_1()[[input$stratValues]])
max <- custom_max(dat_1()[[input$stratValues]])
breaks <- if(any(is.infinite(c(min,max)))) c(0, 10) else seq(min, max, length.out = 6)
tmp <- dat() %>%
filter(eval(filter_exp1)) %>%
mutate(Range = cut(!!sym(input$stratValues), breaks=breaks, include.lowest=TRUE, right = TRUE, dig.lab = 5)) %>%
group_by(Range)
if(input$stratValues == "Values_2"){
tmp <- tmp %>%
filter(Flag == "N")
}
tmp <- tmp %>%
summarise(Count = n(),Values = sum(!!sym(input$stratValues))) %>%
complete(Range, fill = list(Count = 0,Values = 0)) %>%
ungroup %>%
mutate(Count_pct = Count/sum(Count)*100, Values_pct = Values/sum(Values)*100) %>%
dplyr::select(everything(), Count, Count_pct, Values, Values_pct) %>%
bind_rows(summarise_all(., ~(if(is.numeric(.)) sum(.) else "Total")))
tmp
})
在上面的代码中,有一个if()
条件来标识stratValues
是否为Values_2
。如果是这样,它将过滤数据以仅包括 Flag
上的 "N"
观察结果。然后,它继续进行其余的分析。如果 Values and
Countare calculated only on the observations where
Flag == "N"`.
这将起作用
在下面的可重现示例中,用户根据 (A) 时间点和 (B) 两列值中的哪一列的输入对数据框的元素进行分层。
在用户选择对“Values_2”元素进行分层的情况下,我试图仅计算“标志”列中标有“N”的那些行;否则,当用户选择对“Values_1”元素进行分层时,Flag 列将被忽略。
除了在选择“Values_2”时计算标记为“N”的元素外,以下代码有效。我注释掉了我对标记为“N”的元素进行计数的尝试...因为它会产生无意义的结果。
我也只会对标记为“N”的“Value_2”元素求和,但我可以算出一个。
底部的图片更好地解释了这个问题。
有什么建议吗?
library(shiny)
library(tidyverse)
library(shinyWidgets)
ui <-
fluidPage(
uiOutput("stratPeriod"),
uiOutput("stratValues"),
h5(strong("Raw data frame:")), tableOutput("rawData"),
h5(strong("Stratified data:")), tableOutput("stratData")
)
server <- function(input, output, session) {
dat <- reactive({
data.frame(
ID = c(1,1,2,2,2,2,3,3,3,3),
Period = c("2020-03", "2020-04", "2020-01", "2020-02", "2020-03", "2020-04", "2020-01", "2020-02", "2020-03", "2020-04"),
Values_1 = c(-6, 26, 36, 46, 56, 86, 100, 10, 20, 30),
Values_2 = c(-6, 13, 18, 46, 28, 43, 100, 10, 10, 30),
Flag = c("N","Y","Y","N","Y","Y","N","N","Y","N")
)
})
output$stratPeriod <- renderUI({
chc <- unique(na.omit(dat()[[2]]))
selectInput(inputId = "stratPeriod",
label = "Choose point-in-time:",
choices = chc,
selected = chc[1])
})
output$stratValues <- renderUI({
selectInput("stratValues",
"Choose values type to sum:",
choices = c("Values_1","Values_2"),
selected = c("Values_1")
)
})
output$rawData <- renderTable({dat()})
output$stratData <- renderTable({
req(input$stratValues)
req(input$stratPeriod)
filter_exp1 <- parse(text=paste0("Period", "==", "'",input$stratPeriod, "'"))
dat_1 <- reactive({dat() %>% filter(eval(filter_exp1))})
min <- custom_min(dat_1()[[input$stratValues]])
max <- custom_max(dat_1()[[input$stratValues]])
breaks <- if(any(is.infinite(c(min,max)))) c(0, 10) else seq(min, max, length.out = 6)
tmp <- dat() %>%
filter(eval(filter_exp1)) %>%
mutate(Range = cut(!!sym(input$stratValues), breaks=breaks, include.lowest=TRUE, right = TRUE, dig.lab = 5)) %>%
group_by(Range) %>%
summarise(Count = n(),Values = sum(!!sym(input$stratValues))) %>%
# summarise(Count = if_else(!!sym(input$stratValues) == "Values_1",n(),sum(dat()[[5]]=="N")),Values = sum(!!sym(input$stratValues))) %>%
complete(Range, fill = list(Count = 0,Values = 0)) %>%
ungroup %>%
mutate(Count_pct = Count/sum(Count)*100, Values_pct = Values/sum(Values)*100) %>%
dplyr::select(everything(), Count, Count_pct, Values, Values_pct) %>%
bind_rows(summarise_all(., ~(if(is.numeric(.)) sum(.) else "Total")))
tmp
})
}
shinyApp(ui, server)
我认为最简单的方法可能是将 renderTable()
函数更改为以下内容:
output$stratData <- renderTable({
req(input$stratValues)
req(input$stratPeriod)
filter_exp1 <- parse(text=paste0("Period", "==", "'",input$stratPeriod, "'"))
dat_1 <- reactive({dat() %>% filter(eval(filter_exp1))})
min <- custom_min(dat_1()[[input$stratValues]])
max <- custom_max(dat_1()[[input$stratValues]])
breaks <- if(any(is.infinite(c(min,max)))) c(0, 10) else seq(min, max, length.out = 6)
tmp <- dat() %>%
filter(eval(filter_exp1)) %>%
mutate(Range = cut(!!sym(input$stratValues), breaks=breaks, include.lowest=TRUE, right = TRUE, dig.lab = 5)) %>%
group_by(Range)
if(input$stratValues == "Values_2"){
tmp <- tmp %>%
filter(Flag == "N")
}
tmp <- tmp %>%
summarise(Count = n(),Values = sum(!!sym(input$stratValues))) %>%
complete(Range, fill = list(Count = 0,Values = 0)) %>%
ungroup %>%
mutate(Count_pct = Count/sum(Count)*100, Values_pct = Values/sum(Values)*100) %>%
dplyr::select(everything(), Count, Count_pct, Values, Values_pct) %>%
bind_rows(summarise_all(., ~(if(is.numeric(.)) sum(.) else "Total")))
tmp
})
在上面的代码中,有一个if()
条件来标识stratValues
是否为Values_2
。如果是这样,它将过滤数据以仅包括 Flag
上的 "N"
观察结果。然后,它继续进行其余的分析。如果 Values and
Countare calculated only on the observations where
Flag == "N"`.