RShiny 使用来自 selectInput 的数据框列名

RShiny working with data frame column names from selectInput

我正在 RShiny 中创建一个交互式应用程序,它可以显示分组箱线图以及 tables 显示基于 select 列的样本组的 Kruskal-Wallis 测试结果由用户在上传 CSV 文件时编辑。

理想情况下,当用户上传他们的 CSV 文件时,他们可以 select 他们希望将哪些列用作 X 和 Y 变量、class 标签以及分组框的组标签绘图和统计计算。

代码需要尽可能通用以允许不同的输入列。但是,我很难理解使用 input$colName 语法访问 selected 列中的数据所需的语法。

例如:

当我尝试使用以下 dplyr 语法获取 table 显示样本组 (input$groupCol) 列 select 中每个组的 kruskal-wallis 结果时,输出不正确,因为它以某种方式未使用正确的列值:

KWtable <- copyCleanedDF  %>% group_by([copyCleanedDF[,input$groupCol]) %>% kruskal_test([copyCleanedDF[,input$numCol] ~ [copyCleanedDF[,input$varCol])

当我通过首先对每个组进行子集化并计算每个组的 KW 来分解代码时,它可以正常工作,但是它是硬编码的并且不适用于任何组变量列:

# subset team A and test KW
ASubset <- data.frame(copyCleanedDF[copyCleanedDF[,input$groupCol] %in% "A",])
krusyTeamA <- kruskal_test(ASubset[,input$numCol] ~ ASubset[,input$varCol], data = ASubset)
# add in a column showing it is tested in team A samples only
krusyTeamA$groupVariable = "Team A"

# subset team B and test KW
BSubset <- data.frame(copyCleanedDF[copyCleanedDF[,input$groupCol] %in% "B",])
krusyTeamB <- kruskal_test(BSubset[,input$numCol] ~ BSubset[,input$varCol], data = BSubset)
# add in a column showing it is tested in team B samples only
krusyTeamB$groupVariable = "Team B"

# subset team C and test KW
CSubset <- data.frame(copyCleanedDF[copyCleanedDF[,input$groupCol] %in% "C",])
krusyTeamC <- kruskal_test(CSubset[,input$numCol] ~ CSubset[,input$varCol], data = CSubset)
# add in a column showing it is tested in team C samples only
krusyTeamC$groupVariable = "Team C"

# put the three tables together

KWtable <- rbind(krusyTeamA,krusyTeamB,krusyTeamC)

列名也很奇怪,显示的是语法而不是实际的列名(例如,它显示为 cleanData()[input$varCol)] 而不是“Genotype”,所以我想了解更好地处理这些数据。

我已经包含了完整的 Rshiny 代码和示例 CSV 文件以便能够重现此代码。

library(shiny)
library(datasets)
library(plotly)
library(dplyr)
library(reticulate)
library(DT)
library(ggplot2)
library(tidyverse)
library(rstatix)
library(dplyr)


ui <- shinyUI(fluidPage(
titlePanel("TargetID Median Levels"),
tabsetPanel(
  tabPanel("Upload File",
         titlePanel("Uploading Files"),
         sidebarLayout(
           sidebarPanel(
             fileInput('file1', 'Browse and select your CSV file',
                       accept=c('text/csv', 
                                'text/comma-separated-values,text/plain', 
                                '.csv')),
             
             # added interface for uploading data from
             # http://shiny.rstudio.com/gallery/file-upload.html
             tags$br(),
             checkboxInput('header', 'Column headers', TRUE),
             selectInput('varCol', 'X Variable', ""),
             selectInput('numCol', 'Select the Y Variable,...)', "", selected = ""),
             selectInput('classCol', 'Select the class label,...)', "", selected = ""),
             selectInput('groupCol', 'Select the group label,...)', "", selected = ""),
             selectInput("plot.type","Plot Type:",
                            list(boxplot = "boxplot")#, histogram = "histogram", density = "density")
             ),
             
             radioButtons('sep', 'Delimiter',
                          c(Semicolon=';',
                            Comma=',',
                            Tab='\t'),
                          ','),
             radioButtons('quote', 'Quote',
                          c(None='',
                            'Double Quote'='"',
                            'Single Quote'="'"),
                          '"')
             
           ),
           mainPanel(
             
             h3("Uploaded data"),
             
             dataTableOutput('table1'),
             
             h3(""),
             
             h3("Boxplot with Median Levels"),
             
             plotlyOutput('MyPlot'),
             
             h3("Kruskal-Wallis H Test"),
             
             dataTableOutput('table2')
             
             
           )
         )
 )
 )
 )
 )

server <- shinyServer(function(input, output, session) {
# added "session" because updateSelectInput requires it
options(warn=-1)
# options(encoding="UTF-8")

data <- reactive({ 
req(input$file1) ## ?req #  require that the input is available

# get the input file uploaded to the server side

inFile <- input$file1 

# read the input file as a data frame into R

inputDF <- read.csv(inFile$datapath, header = input$header, sep = input$sep,
                    quote = input$quote, stringsAsFactors = TRUE)


inputDF[inputDF=="NA"] <- NA # convert missing value strings to NAs recognised by R

# clean up the no-calls and in-phase genotypes from the variant genotypes columns

inputDF[inputDF == "./." | inputDF == ".|."] <- NA # convert the no-calls to missing (NA)
inputDF[inputDF == "0|0"] <- "0/0" # change in-phase wildtype
inputDF[inputDF == "0|1"] <- "0/1" # change in-phase heterzygous
inputDF[inputDF == "1|1"] <- "1/1" # change in-phase homo alt

# Update inputs (you could create an observer with both updateSel...)
# You can also constraint your choices. If you wanted select only numeric
# variables you could set "choices = sapply(df, is.numeric)"
# It depends on what do you want to do later on.

updateSelectInput(session, inputId = 'numCol', label = 'Numerical variable (e.g. LapTime...)',
                  choices = names(inputDF), selected = names(inputDF)[4])
updateSelectInput(session, inputId = 'varCol', label = 'Sample genotypes for a variant',
                  choices = names(inputDF), selected = names(inputDF)[1])
updateSelectInput(session, inputId = 'classCol', label = 'Class label (e.g. Sex)',
                  choices = names(inputDF), selected = names(inputDF)[3])
updateSelectInput(session, inputId = 'groupCol', label = 'Group label (e.g. Team)',
                  choices = names(inputDF), selected = names(inputDF)[2])

return(inputDF)
})


# display the first output table with the uploaded data
output$table1 <- renderDataTable({
req(input$file1)
datatable(
  data(),
  filter = "top",
  selection = "none", #this is to avoid select rows if you click on the rows
  rownames = FALSE,
  extensions = 'Buttons',
  
  options = list(
    scrollX = TRUE,
    autoWidth = TRUE,
    dom = 'Blrtip',
    buttons =
      list(I('colvis'), 'copy', 'print', list(
        extend = 'collection',
        buttons = list(list(extend = 'csv', filename = "LapTime_variant", title = NULL, exportOptions = list(columns = ":visible")),
                       list(extend = 'excel', filename = "LapTime_variant", title = NULL, exportOptions = list(columns = ":visible"))),
        text = 'Download'
      )),
    lengthMenu = list(c(10, 30, 50, -1),
                      c('10', '30', '50', 'All'))
  ),
  class = "display"
 )
 })

cleanData <- reactive({

req(input$file1)

# save the selected dataframe and subset to have only the selected columns

copyDF <- data.frame(data())

# remove any rows with nas in the 4 selected columns

cleanedDF <- copyDF %>% drop_na(c(input$varCol, input$numCol, input$classCol, input$groupCol))

return(cleanedDF)
})

kwData <- reactive({

req(input$file1)

copyCleanedDF <- data.frame(cleanData())

# get the freq count for each group to plot N
# using dplyr function to create a frequency table to match the grouped plt
# this will enable the freq counts to be added to the plot

myFreqs <- copyCleanedDF %>% group_by(copyCleanedDF[input$groupCol], copyCleanedDF[input$varCol],copyCleanedDF[input$classCol]) %>% summarize(Freq=n())  

groupVariable <- as.character(input$groupCol)

# subset team A and test KW
ASubset <- data.frame(copyCleanedDF[copyCleanedDF[,input$groupCol] %in% "A",])
krusyTeamA <- kruskal_test(ASubset[,input$numCol] ~ ASubset[,input$varCol], data = ASubset)
# add in a column showing it is tested in team A samples only
krusyTeamA$groupVariable = "Team A"

# subset team B and test KW
BSubset <- data.frame(copyCleanedDF[copyCleanedDF[,input$groupCol] %in% "B",])
krusyTeamB <- kruskal_test(BSubset[,input$numCol] ~ BSubset[,input$varCol], data = BSubset)
# add in a column showing it is tested in team B samples only
krusyTeamB$groupVariable = "Team B"

# subset team C and test KW
CSubset <- data.frame(copyCleanedDF[copyCleanedDF[,input$groupCol] %in% "C",])
krusyTeamC <- kruskal_test(CSubset[,input$numCol] ~ CSubset[,input$varCol], data = CSubset)
# add in a column showing it is tested in team C samples only
krusyTeamC$groupVariable = "Team C"

# put the three tables together

KWtable <- rbind(krusyTeamA,krusyTeamB,krusyTeamC)

### this part of the code doesn't work 

#KWtable <- copyCleanedDF  %>% group_by(copyCleanedDF[,input$groupCol]) %>% kruskal_test(copyCleanedDF[,input$numCol] ~ copyCleanedDF[,input$varCol])

# then paste the KW p-value to the team label in the main dataframe
# to include it in the plot image

#plotDF <- merge(copyCleanedDF,KWtable,by=("input$groupCol")) # doesn't work 

#plotDF$input$groupCol <- paste0(plotDF$input$groupCol, "\n", plotDF$method, " p=", plotDF$p)

return(KWtable)
})

freqData <- reactive({

req(input$file1)

copyCleanedDF <- data.frame(cleanData())

# get the freq count for each group to plot N
# using dplyr function to create a frequency table to match the grouped plt
# this will enable the freq counts to be added to the plot

myFreqs <- copyCleanedDF %>% group_by(copyCleanedDF[input$groupCol],   copyCleanedDF[input$varCol],copyCleanedDF[input$classCol]) %>% summarize(Freq=n())  

return(myFreqs)
})

# display grouped boxplots

output$MyPlot <- renderPlotly({

req(input$file1)

if(input$plot.type == "boxplot"){
  pl <-  ggplot(cleanData(), aes(x=cleanData()[,input$varCol], y=cleanData()[,input$numCol], fill=cleanData()[,input$classCol])) +
    stat_boxplot(geom ='errorbar') + # add error bars
    geom_boxplot()  + 
    facet_grid(~cleanData()[,input$groupCol],scale="free")

  pl <- pl + stat_summary(geom = 'text', label = paste("n=", freqData()$Freq), fun = max, vjust = -1, position = position_dodge(width=0.7))
  
  # This is to change the y-axis depending on the plot to allow for N to show on the plot
  pl <- pl + scale_y_continuous(limits = function(x){
    c(min(x), ceiling(max(x) * 1.1))
  })
  
  pl %>%
    ggplotly() %>%
    layout(boxmode = "group", autosize = TRUE, boxgroupgap=0.002, boxgap=0.01)
}

})

# display the second output table
output$table2 <- renderDataTable({
req(input$file1)
datatable(
  kwData(),
  filter = "top",
  selection = "none", #this is to avoid select rows if you click on the rows
  rownames = FALSE,
  extensions = 'Buttons',
  
  options = list(
    scrollX = TRUE,
    autoWidth = TRUE,
    dom = 'Blrtip',
    buttons =
      list(I('colvis'), 'copy', 'print', list(
        extend = 'collection',
        buttons = list(list(extend = 'csv', filename = "LapTime_variant", title = NULL, exportOptions = list(columns = ":visible")),
                       list(extend = 'excel', filename = "LapTime_variant", title = NULL, exportOptions = list(columns = ":visible"))),
        text = 'Download'
      )),
    lengthMenu = list(c(10, 30, 50, -1),
                      c('10', '30', '50', 'All'))
  ),
  class = "display"
)
})


})

shinyApp(ui, server)

有没有人知道发生了什么以及我可以如何改进我的代码?

非常感谢

Example CSV file

这是一个小例子,它会使用像 input 这样的字符列表来进行分析:

library(rstatix)
library(dplyr)
input <- list(groupCol = 'supp', numCol = 'len', varCol = 'dose')
input
#> $groupCol
#> [1] "supp"
#> 
#> $numCol
#> [1] "len"
#> 
#> $varCol
#> [1] "dose"
ToothGrowth %>% 
  group_by(!!sym(input$groupCol)) %>% 
  kruskal_test(reformulate(input$varCol, response=input$numCol))
#> # A tibble: 2 × 7
#>   supp  .y.       n statistic    df          p method        
#> * <fct> <chr> <int>     <dbl> <int>      <dbl> <chr>         
#> 1 OJ    len      30      18.5     2 0.0000958  Kruskal-Wallis
#> 2 VC    len      30      25.1     2 0.00000359 Kruskal-Wallis

reprex package (v2.0.1)

于 2022-05-18 创建