如何将所有 mysql 个数据库放入 r
How to get all mysql databases into r
我想从 mysql 中将几百个单独的数据库放入 r 中。我可以将它们分别放入,但我不知道如何制作 loop/function/apply 来一次将它们全部放入。
以下是我如何将它们分别放入。
library(RMySQL)
mydb = dbConnect(MySQL(), user='root', password='nelson', host='localhost', dbname="bookstore")
dub4_4_16 <- dbSendQuery(mydb, "select * from dub" )
dub4_4_16 = fetch(dub4_4_16, n=-1)
reg4_4_16 <- dbSendQuery(mydb, "select * from all_sorted")
reg4_4_16 = fetch(reg4_4_16, n=-1)
dub4_5_16 <- dbSendQuery(mydb, "select * from dub4_5")
dub4_5_16 = fetch(dub4_5_16, n=-1)
我使用如下函数列表(基于从 tcpl
包中获取的函数)在 RMySQL 环境中工作。这些函数只是 DBI
调用的包装器。这是一个很长的答案,但方法的要点是创建查询字符串,在查询上应用以加载数据。结果 (results
) 给出了一个 data.table
对象的列表,其中包含来自您连接中每个数据库的每个 table。
library(data.table)
library(RMySQL)
##----------------------------------------------------------------------------##
## Accessory functions
##----------------------------------------------------------------------------##
getQuery <- function(query, db, user = "dayne",
pass = "password", host = "localhost") {
## Change the default values as necessary, or just pass your values
## each time the function is called. Check out the original to see
## how it finds global variables set by the package.
dbcon <- dbConnect(drv = RMySQL::MySQL(),
user = user,
password = pass,
host = host,
dbname = db)
result <- dbGetQuery(dbcon, query)
dbDisconnect(dbcon)
result <- as.data.table(result)
result[]
}
sendQuery <- function(query, db = "", user = "dayne",
pass = "password", host = "localhost") {
dbcon <- dbConnect(drv = RMySQL::MySQL(),
user = user,
password = pass,
host = host,
dbname = db)
temp <- try(dbSendQuery(dbcon, query), silent = TRUE)
if (!is(temp, "try-error")) dbClearResult(temp)
dbDisconnect(dbcon)
if (!is(temp, "try-error")) return(TRUE)
temp[1]
}
appendTable <- function(dat, tbl, db, user = "dayne",
pass = "password", host = "localhost") {
dbcon <- dbConnect(drv = RMySQL::MySQL(),
user = user,
password = pass,
host = host,
dbname = db)
dbWriteTable(conn = dbcon,
name = tbl,
value = dat,
row.names = FALSE,
append = TRUE)
dbDisconnect(dbcon)
return(TRUE)
}
##----------------------------------------------------------------------------##
## Create example data
##----------------------------------------------------------------------------##
listofdb <- c("db1", "db2", "db3")
q1 <- paste0("CREATE DATABASE ", listofdb, ";")
sapply(q1, sendQuery)
listoftables <- paste0("tb", 1:5)
q2fmt <- "CREATE TABLE %s ( val DOUBLE )"
q2 <- sprintf(q2fmt, listoftables)
createtables <- function(db) {
sapply(q2, sendQuery, db = db)
sapply(listoftables,
appendTable,
db = db,
dat = data.table(val = rnorm(10)))
}
sapply(listofdb, create tables)
##----------------------------------------------------------------------------##
## Do the work
##----------------------------------------------------------------------------##
## Load all of the tables form different databases
my_db_list <- getQuery("SHOW DATABASES;", db = "")$Database
my_db_list
# [1] "db1" "db2" "db3"
table_list <- lapply(my_db_list, getQuery, query = "SHOW TABLES;")
names(table_list) <- my_db_list
table_list
# $db1
# Tables_in_db1
# 1: tb1
# 2: tb2
# 3: tb3
# 4: tb4
# 5: tb5
#
# $db2
# Tables_in_db2
# 1: tb1
# 2: tb2
# 3: tb3
# 4: tb4
# 5: tb5
#
# $db3
# Tables_in_db3
# 1: tb1
# 2: tb2
# 3: tb3
# 4: tb4
# 5: tb5
## The db name to the table_list data.tables and collapse
table_list <- lapply(names(table_list),
function(x) table_list[[x]][ , db := x])
table_list <- rbindlist(table_list)
setnames(table_list, c("tbl", "db"))
## Load all tables from all databases
table_list[ , full_name := paste(db, tbl, sep = ".")]
get_tables <-paste0("SELECT * FROM ", table_list$full_name, ";")
results <- lapply(get_tables, getQuery, db = "")
names(results) <- table_list$full_name
results[["db1.tb5"]]
# val
# 1: -0.09380952
# 2: 0.81556657
# 3: 1.18589086
# 4: 0.19746379
# 5: 0.91738280
# 6: 1.30142674
# 7: 1.42089957
# 8: -0.16475130
# 9: 0.40345353
# 10: -1.31012033
##----------------------------------------------------------------------------##
## Remove example data
##----------------------------------------------------------------------------##
cleanup <- paste0("DROP DATABASE ", listofdb, ";")
sapply(cleanup, sendQuery)
我想从 mysql 中将几百个单独的数据库放入 r 中。我可以将它们分别放入,但我不知道如何制作 loop/function/apply 来一次将它们全部放入。
以下是我如何将它们分别放入。
library(RMySQL)
mydb = dbConnect(MySQL(), user='root', password='nelson', host='localhost', dbname="bookstore")
dub4_4_16 <- dbSendQuery(mydb, "select * from dub" )
dub4_4_16 = fetch(dub4_4_16, n=-1)
reg4_4_16 <- dbSendQuery(mydb, "select * from all_sorted")
reg4_4_16 = fetch(reg4_4_16, n=-1)
dub4_5_16 <- dbSendQuery(mydb, "select * from dub4_5")
dub4_5_16 = fetch(dub4_5_16, n=-1)
我使用如下函数列表(基于从 tcpl
包中获取的函数)在 RMySQL 环境中工作。这些函数只是 DBI
调用的包装器。这是一个很长的答案,但方法的要点是创建查询字符串,在查询上应用以加载数据。结果 (results
) 给出了一个 data.table
对象的列表,其中包含来自您连接中每个数据库的每个 table。
library(data.table)
library(RMySQL)
##----------------------------------------------------------------------------##
## Accessory functions
##----------------------------------------------------------------------------##
getQuery <- function(query, db, user = "dayne",
pass = "password", host = "localhost") {
## Change the default values as necessary, or just pass your values
## each time the function is called. Check out the original to see
## how it finds global variables set by the package.
dbcon <- dbConnect(drv = RMySQL::MySQL(),
user = user,
password = pass,
host = host,
dbname = db)
result <- dbGetQuery(dbcon, query)
dbDisconnect(dbcon)
result <- as.data.table(result)
result[]
}
sendQuery <- function(query, db = "", user = "dayne",
pass = "password", host = "localhost") {
dbcon <- dbConnect(drv = RMySQL::MySQL(),
user = user,
password = pass,
host = host,
dbname = db)
temp <- try(dbSendQuery(dbcon, query), silent = TRUE)
if (!is(temp, "try-error")) dbClearResult(temp)
dbDisconnect(dbcon)
if (!is(temp, "try-error")) return(TRUE)
temp[1]
}
appendTable <- function(dat, tbl, db, user = "dayne",
pass = "password", host = "localhost") {
dbcon <- dbConnect(drv = RMySQL::MySQL(),
user = user,
password = pass,
host = host,
dbname = db)
dbWriteTable(conn = dbcon,
name = tbl,
value = dat,
row.names = FALSE,
append = TRUE)
dbDisconnect(dbcon)
return(TRUE)
}
##----------------------------------------------------------------------------##
## Create example data
##----------------------------------------------------------------------------##
listofdb <- c("db1", "db2", "db3")
q1 <- paste0("CREATE DATABASE ", listofdb, ";")
sapply(q1, sendQuery)
listoftables <- paste0("tb", 1:5)
q2fmt <- "CREATE TABLE %s ( val DOUBLE )"
q2 <- sprintf(q2fmt, listoftables)
createtables <- function(db) {
sapply(q2, sendQuery, db = db)
sapply(listoftables,
appendTable,
db = db,
dat = data.table(val = rnorm(10)))
}
sapply(listofdb, create tables)
##----------------------------------------------------------------------------##
## Do the work
##----------------------------------------------------------------------------##
## Load all of the tables form different databases
my_db_list <- getQuery("SHOW DATABASES;", db = "")$Database
my_db_list
# [1] "db1" "db2" "db3"
table_list <- lapply(my_db_list, getQuery, query = "SHOW TABLES;")
names(table_list) <- my_db_list
table_list
# $db1
# Tables_in_db1
# 1: tb1
# 2: tb2
# 3: tb3
# 4: tb4
# 5: tb5
#
# $db2
# Tables_in_db2
# 1: tb1
# 2: tb2
# 3: tb3
# 4: tb4
# 5: tb5
#
# $db3
# Tables_in_db3
# 1: tb1
# 2: tb2
# 3: tb3
# 4: tb4
# 5: tb5
## The db name to the table_list data.tables and collapse
table_list <- lapply(names(table_list),
function(x) table_list[[x]][ , db := x])
table_list <- rbindlist(table_list)
setnames(table_list, c("tbl", "db"))
## Load all tables from all databases
table_list[ , full_name := paste(db, tbl, sep = ".")]
get_tables <-paste0("SELECT * FROM ", table_list$full_name, ";")
results <- lapply(get_tables, getQuery, db = "")
names(results) <- table_list$full_name
results[["db1.tb5"]]
# val
# 1: -0.09380952
# 2: 0.81556657
# 3: 1.18589086
# 4: 0.19746379
# 5: 0.91738280
# 6: 1.30142674
# 7: 1.42089957
# 8: -0.16475130
# 9: 0.40345353
# 10: -1.31012033
##----------------------------------------------------------------------------##
## Remove example data
##----------------------------------------------------------------------------##
cleanup <- paste0("DROP DATABASE ", listofdb, ";")
sapply(cleanup, sendQuery)