Select 导入 CSV 时指定的行
Select specified rows when importing CSV
我有一个很大的 CSV 文件,如果有的话,我只想导入 select 某些行。首先,我创建将要导入的行的索引,然后我希望将这些行的名称传递给 sqldf 和 return 指定行的完整记录。
#create the random rows ids that will be sampled
library(dplyr)
#range for the values
index<-c(1:20)
index<-as.data.frame(as.matrix(index))
#number of values to be returned
number<-5
ids<-sample_n(index,number)
#sample the data
library(sqldf)
#filepath
f<-file("/Users/.../filename.csv")
#select data
df<-sqldf("select * from f")
如何通过指定行号从 CSV 文件中导入 select 行?
也许是这样的 base
R
...
# dummy csv
write.csv( data.frame( myid=1:10, var=runif(10) ),"temp.csv")
# define ids
ids <- c(1,3,4)
# reading from line 3 to 4 / reading 2 lines
read.table("temp.csv", header=T, sep=",", skip=2, nrows=2)
## X2 X2.1 X0.406697876984254
## 1 3 3 0.6199803
## 2 4 4 0.0271722
# selctive line retrieval function
dummy <- function(file, ids){
tmp <-
mapply(
read.table,
skip=ids,
MoreArgs= list(nrows=1, file=file, sep=",") ,
SIMPLIFY = FALSE
)
tmp_df <- do.call(rbind.data.frame, tmp)
names(tmp_df) <- names(read.table("temp.csv", header=T, sep=",",nrows=1))
return(tmp_df)
}
# et voila
dummy("temp.csv", ids)
## X myid var
## 1 1 1 0.9040861
## 2 3 3 0.6027502
## 3 4 4 0.6829611
试试这个例子:
library(sqldf)
#dummy csv
write.csv(data.frame(myid=1:10,var=runif(10)),"temp.csv")
#define ids
ids <- c(1,3,4)
ids <- paste(ids,collapse = ",")
f <- file("temp.csv")
#query with subset
fn$sqldf("select *
from f
where myid in ($ids)",
file.format = list(header = TRUE, sep = ","))
#output
# X myid var
# 1 "1" 1 0.2310945
# 2 "3" 3 0.8825055
# 3 "4" 4 0.6655517
close(f)
我有一个很大的 CSV 文件,如果有的话,我只想导入 select 某些行。首先,我创建将要导入的行的索引,然后我希望将这些行的名称传递给 sqldf 和 return 指定行的完整记录。
#create the random rows ids that will be sampled
library(dplyr)
#range for the values
index<-c(1:20)
index<-as.data.frame(as.matrix(index))
#number of values to be returned
number<-5
ids<-sample_n(index,number)
#sample the data
library(sqldf)
#filepath
f<-file("/Users/.../filename.csv")
#select data
df<-sqldf("select * from f")
如何通过指定行号从 CSV 文件中导入 select 行?
也许是这样的 base
R
...
# dummy csv
write.csv( data.frame( myid=1:10, var=runif(10) ),"temp.csv")
# define ids
ids <- c(1,3,4)
# reading from line 3 to 4 / reading 2 lines
read.table("temp.csv", header=T, sep=",", skip=2, nrows=2)
## X2 X2.1 X0.406697876984254
## 1 3 3 0.6199803
## 2 4 4 0.0271722
# selctive line retrieval function
dummy <- function(file, ids){
tmp <-
mapply(
read.table,
skip=ids,
MoreArgs= list(nrows=1, file=file, sep=",") ,
SIMPLIFY = FALSE
)
tmp_df <- do.call(rbind.data.frame, tmp)
names(tmp_df) <- names(read.table("temp.csv", header=T, sep=",",nrows=1))
return(tmp_df)
}
# et voila
dummy("temp.csv", ids)
## X myid var
## 1 1 1 0.9040861
## 2 3 3 0.6027502
## 3 4 4 0.6829611
试试这个例子:
library(sqldf)
#dummy csv
write.csv(data.frame(myid=1:10,var=runif(10)),"temp.csv")
#define ids
ids <- c(1,3,4)
ids <- paste(ids,collapse = ",")
f <- file("temp.csv")
#query with subset
fn$sqldf("select *
from f
where myid in ($ids)",
file.format = list(header = TRUE, sep = ","))
#output
# X myid var
# 1 "1" 1 0.2310945
# 2 "3" 3 0.8825055
# 3 "4" 4 0.6655517
close(f)