如何将所有 mysql 个数据库放入 r

How to get all mysql databases into r

我想从 mysql 中将几百个单独的数据库放入 r 中。我可以将它们分别放入,但我不知道如何制作 loop/function/apply 来一次将它们全部放入。

以下是我如何将它们分别放入。

library(RMySQL)
mydb = dbConnect(MySQL(), user='root', password='nelson', host='localhost', dbname="bookstore")                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       
dub4_4_16 <- dbSendQuery(mydb, "select * from dub" )
dub4_4_16 = fetch(dub4_4_16, n=-1)

reg4_4_16 <- dbSendQuery(mydb, "select * from all_sorted")
reg4_4_16 = fetch(reg4_4_16, n=-1)

dub4_5_16 <- dbSendQuery(mydb, "select * from dub4_5")
dub4_5_16 = fetch(dub4_5_16, n=-1)

我使用如下函数列表(基于从 tcpl 包中获取的函数)在 RMySQL 环境中工作。这些函数只是 DBI 调用的包装器。这是一个很长的答案,但方法的要点是创建查询字符串,在查询上应用以加载数据。结果 (results) 给出了一个 data.table 对象的列表,其中包含来自您连接中每个数据库的每个 table。

library(data.table)
library(RMySQL)

##----------------------------------------------------------------------------##
## Accessory functions
##----------------------------------------------------------------------------##

getQuery <- function(query, db, user = "dayne", 
                     pass = "password", host = "localhost") {
  ## Change the default values as necessary, or just pass your values
  ## each time the function is called. Check out the original to see
  ## how it finds global variables set by the package.

  dbcon <- dbConnect(drv = RMySQL::MySQL(),
                     user = user,
                     password = pass,
                     host = host,
                     dbname = db)
  result <- dbGetQuery(dbcon, query)
  dbDisconnect(dbcon)
  result <- as.data.table(result)
  result[]

}

sendQuery <- function(query, db = "", user = "dayne", 
                     pass = "password", host = "localhost") {

  dbcon <- dbConnect(drv = RMySQL::MySQL(),
                     user = user,
                     password = pass,
                     host = host,
                     dbname = db)
  temp <- try(dbSendQuery(dbcon, query), silent = TRUE)
  if (!is(temp, "try-error")) dbClearResult(temp)
  dbDisconnect(dbcon)

  if (!is(temp, "try-error")) return(TRUE)

  temp[1]

}

appendTable <- function(dat, tbl, db, user = "dayne", 
                        pass = "password", host = "localhost") {

  dbcon <- dbConnect(drv = RMySQL::MySQL(),
                     user = user,
                     password = pass,
                     host = host,
                     dbname = db)
  dbWriteTable(conn = dbcon, 
               name = tbl, 
               value = dat, 
               row.names = FALSE, 
               append = TRUE)

  dbDisconnect(dbcon)

  return(TRUE)

}

##----------------------------------------------------------------------------##
## Create example data
##----------------------------------------------------------------------------##

listofdb <- c("db1", "db2", "db3")
q1 <- paste0("CREATE DATABASE ", listofdb, ";")
sapply(q1, sendQuery)
listoftables <- paste0("tb", 1:5)
q2fmt <- "CREATE TABLE %s ( val DOUBLE )"
q2 <- sprintf(q2fmt, listoftables)
createtables <- function(db) {
  sapply(q2, sendQuery, db = db)
  sapply(listoftables, 
         appendTable, 
         db = db, 
         dat = data.table(val = rnorm(10)))
}
sapply(listofdb, create tables)


##----------------------------------------------------------------------------##
## Do the work
##----------------------------------------------------------------------------##

## Load all of the tables form different databases
my_db_list <- getQuery("SHOW DATABASES;", db = "")$Database
my_db_list
# [1] "db1" "db2" "db3"

table_list <- lapply(my_db_list, getQuery, query = "SHOW TABLES;")
names(table_list) <- my_db_list
table_list
# $db1
#    Tables_in_db1
# 1:           tb1
# 2:           tb2
# 3:           tb3
# 4:           tb4
# 5:           tb5
# 
# $db2
#    Tables_in_db2
# 1:           tb1
# 2:           tb2
# 3:           tb3
# 4:           tb4
# 5:           tb5
# 
# $db3
#    Tables_in_db3
# 1:           tb1
# 2:           tb2
# 3:           tb3
# 4:           tb4
# 5:           tb5

## The db name to the table_list data.tables and collapse
table_list <- lapply(names(table_list), 
                     function(x) table_list[[x]][ , db := x])
table_list <- rbindlist(table_list)
setnames(table_list, c("tbl", "db"))

## Load all tables from all databases
table_list[ , full_name := paste(db, tbl, sep = ".")]
get_tables <-paste0("SELECT * FROM ", table_list$full_name, ";")
results <- lapply(get_tables, getQuery, db = "")
names(results) <- table_list$full_name

results[["db1.tb5"]]
#            val
# 1:  -0.09380952
# 2:   0.81556657
# 3:   1.18589086
# 4:   0.19746379
# 5:   0.91738280
# 6:   1.30142674
# 7:   1.42089957
# 8:  -0.16475130
# 9:   0.40345353
# 10: -1.31012033

##----------------------------------------------------------------------------##
## Remove example data
##----------------------------------------------------------------------------##

cleanup <- paste0("DROP DATABASE ", listofdb, ";")
sapply(cleanup, sendQuery)