使用 RDCOMClient 打开数据框时内存泄漏

Memory Leak When Opening Data Frame With RDCOMClient

我正在尝试使用 RDCOMClient 包将数据帧动态打开到 Excel。我有将数据帧打开到 Excel 的功能代码,但是我 运行 遇到的问题是 [=18] 没有释放将数据加载到 Excel 的内存资源=] 当代码完成时。即使在关闭 Excel 应用程序后,内存资源也不会被释放。有没有我遗漏的东西,我可以用来从 rsession.exe 释放内存?我能够释放内存的唯一方法是关闭 RStudio 并再次打开它。

函数代码

in.xl <- function(data, headers = TRUE, rownames = FALSE) {
  require(RDCOMClient)

  # Attempt to coerce non dataframe data into a dataframe
  if (!is.data.frame(data)) {
    data <- as.data.frame(data)
  }

  # Set row range for data
  if (headers == TRUE) {
    d.row.start <- 2
    d.row.end <- nrow(data) + 1
  } else {
    d.row.start <- 1
    d.row.end <- nrow(data)
  }

  # Set column range for data
  if (rownames == TRUE) {
    d.col.start <- 2
    d.col.end <- ncol(data) + 1
  } else {
      d.col.start <- 1
      d.col.end <- ncol(data)
    }

  # Create COM Connection to Excel
  xlApp <- COMCreate("Excel.Application")
  xlWB <- xlApp[["Workbooks"]]$Add()
  xlSheet <- xlWB$Sheets(1)

  # Check if headers should be included
  if (headers == TRUE) {

    # Create a dataframe from headers
    headers <- t(as.data.frame(colnames(data)))

    # Set range for header values
    startCell <- xlSheet$Cells(1, d.col.start)
    endCell <- xlSheet$Cells(1, d.col.end)
    rng <- xlSheet$Range(startCell, endCell)

    # Add headers to Excel sheet
    rng[["Value"]] <- asCOMArray(headers)

    # Remove header dataframe
    rm(headers)
  }

  # Check if rownames should be included
  if(rownames == TRUE) {

    # Create dataframe from row names
    if (is.null(rownames(data))) {
      rnames = as.data.frame(1:nrow(data))
    } else {
        rnames = as.data.frame(rownames(data))
      }

    # Set range for row name values
    startCell <- xlSheet$Cells(d.row.start, 1)
    endCell <- xlSheet$Cells(d.row.end, 1)
    rng <- xlSheet$Range(startCell, endCell)

    # Add row names to Excel sheet
    rng[["Value"]] <- asCOMArray(rnames)

    # Remove row name dataframe
    rm(rnames)
  } 

  xlApp[["ScreenUpdating"]] <- FALSE

  nblocks <- ceiling(nrow(data) / 2000)
  pb <- txtProgressBar(min = 0, max = nblocks, initial = 0, style = 3, width = 20)
  data.start <- d.row.start
  block <- 1
  d.row.end <- d.row.start
  df.row.start <- 1

  while(d.row.end < nrow(data)) {
    d.row.end <- d.row.start + 1999
    df.row.end <- df.row.start + 1999

    if (d.row.end > nrow(data) + data.start) {
      d.row.end <- nrow(data) + data.start - 1
    }

    if (df.row.end > nrow(data)) {
      df.row.end <- nrow(data)
    }

    xlApp[["StatusBar"]] <- paste("Processing block", block, "of", nblocks)

    # Set range for data values
    rng <- xlSheet$Range(xlSheet$Cells(d.row.start, d.col.start), xlSheet$Cells(d.row.end, d.col.end))

    # Add data to Excel sheet
    rng[["Value"]] <- asCOMArray(data[df.row.start:df.row.end, ])

    d.row.start <- d.row.end + 1
    df.row.start <- df.row.end + 1

    if (block != nblocks) {
      block <- block + 1
    }

    # update the progress bar with the current value
    setTxtProgressBar(pb,block)
    rm(rng, vals)
    gc()
  }

  xlApp[["StatusBar"]] <- "Formatting Columns..."

  # Auto adjust column widths
  for(c in 1:d.col.end) {
    col <- xlSheet$Columns(c)
    col[["EntireColumn"]]$AutoFit()
  }

  xlApp[["StatusBar"]] <- ""
  xlApp[["ScreenUpdating"]] <- TRUE

  # Show Excel application
  xlApp[["Visible"]] <- TRUE

  gc()
} 

生成用于测试的大型数据框的代码。 (在任务管理器中查看 rsession.exe 进程时更容易看到内存使用问题)

df <- data.frame(replicate(20, sample(replicate(10, paste(sample(LETTERS, 15, rep = TRUE), collapse = "")), 100000, rep = TRUE)))

in.xl(df)

您可以使用包callr。使用以下代码,释放内存:

df <- data.frame(replicate(20, sample(replicate(10, paste(sample(LETTERS, 15, rep = TRUE), collapse = "")), 1000000, rep = TRUE)))

library(callr)
callr::r(func = in.xl, args = list(data = df))