R 可以读取不规则的 xlsx 吗?
Can R read unregular xlsx?
我有这么多(大约1,000个)像上图这样的xlsx。我想阅读每个 xlsx 并获取每个候选人的姓名、号码和年龄的数据。但是我不知道怎么读这个不规则的xlsx?
我不知道是否有任何 R Excel API 足够智能来处理您的列格式,但有一个简单的解决方法。您可以将上述工作表保存为 CSV 格式。对上面显示的数据执行此操作后,我得到了以下三个 CSV 行:
Title,,,,,
name,mike,number,123214,age,28
,score,,ddd,aaa,bbb
您可以试试下面的代码:
df <- read.csv(file="path/to/your/file.csv", header=FALSE)
df <- df[2:nrow(df), ] # drop first row
获取 Mike 的姓名、电话号码和年龄:
name <- df[1, 2]
number <- df[1, 4]
age <- df[1, 6]
您可以运行 Excel 中的VBA 代码,并将任意数量的XLSX 文件转换为CSV 文件。然后遍历所有 CSV 文件以将所有文件合并到 R 中的数据框中。
Sub Convert_Excel_To_CSV()
Dim MyPath As String, FilesInPath As String
Dim MyFiles() As String, Fnum As Long
Dim mybook As Workbook
Dim CalcMode As Long
Dim sh As Worksheet
Dim ErrorYes As Boolean
Dim LPosition As Integer
'Fill in the path\folder where the Excel files are
MyPath = "C:\Users\Ryan\Desktop\Excel_Files\"
FilesInPath = Dir(MyPath & "*.xlsx*")
If FilesInPath = "" Then
MsgBox "No files found"
Exit Sub
End If
Fnum = 0
Do While FilesInPath <> ""
Fnum = Fnum + 1
ReDim Preserve MyFiles(1 To Fnum)
MyFiles(Fnum) = FilesInPath
FilesInPath = Dir()
Loop
With Application
CalcMode = .Calculation
.Calculation = xlCalculationManual
.ScreenUpdating = False
.EnableEvents = False
End With
If Fnum > 0 Then
For Fnum = LBound(MyFiles) To UBound(MyFiles)
Set mybook = Nothing
On Error Resume Next
Set mybook = Workbooks.Open(MyPath & MyFiles(Fnum))
On Error GoTo 0
If Not mybook Is Nothing Then
LPosition = InStr(1, mybook.Name, ".") - 1
mybookname = Left(mybook.Name, LPosition)
mybook.Activate
'All XLSX Files get saved in the directory below:
ActiveWorkbook.SaveAs Filename:="C:\your_path_here\" & mybookname & ".csv" _
, FileFormat:=xlCSVMSDOS, _
CreateBackup:=False
End If
mybook.Close SaveChanges:=False
Next Fnum
End If
If ErrorYes = True Then
MsgBox "There are problems in one or more files, possible problem:" _
& vbNewLine & "protected workbook/sheet or a sheet/range that not exist"
End If
With Application
.ScreenUpdating = True
.EnableEvents = True
.Calculation = CalcMode
End With
End Sub
setwd("C:/your_path")
fnames <- list.files()
csv <- lapply(fnames, read.csv)
result <- do.call(rbind, csv)
******** ******** ******** ******** ******** ******** ******** ********
filedir <- setwd("C:/your_path")
file_names <- dir(filedir)
your_data_frame <- do.call(rbind,lapply(file_names,read.csv))
******** ******** ******** ******** ******** ******** ******** ********
filedir <- setwd("C:/your_path")
file_names <- dir(filedir)
your_data_frame <- do.call(rbind, lapply(file_names, read.csv, skip = 1, header = FALSE))
******** ******** ******** ******** ******** ******** ******** ********
filedir <- setwd("C:/your_path")
file_names <- dir(filedir)
your_data_frame <- do.call(rbind, lapply(file_names, read.csv, header = FALSE))
******** ******** ******** ******** ******** ******** ******** ********
#
temp <- setwd("C:/your_path")
temp = list.files(pattern="*.csv")
myfiles = lapply(temp, read.delim)
我有这么多(大约1,000个)像上图这样的xlsx。我想阅读每个 xlsx 并获取每个候选人的姓名、号码和年龄的数据。但是我不知道怎么读这个不规则的xlsx?
我不知道是否有任何 R Excel API 足够智能来处理您的列格式,但有一个简单的解决方法。您可以将上述工作表保存为 CSV 格式。对上面显示的数据执行此操作后,我得到了以下三个 CSV 行:
Title,,,,,
name,mike,number,123214,age,28
,score,,ddd,aaa,bbb
您可以试试下面的代码:
df <- read.csv(file="path/to/your/file.csv", header=FALSE)
df <- df[2:nrow(df), ] # drop first row
获取 Mike 的姓名、电话号码和年龄:
name <- df[1, 2]
number <- df[1, 4]
age <- df[1, 6]
您可以运行 Excel 中的VBA 代码,并将任意数量的XLSX 文件转换为CSV 文件。然后遍历所有 CSV 文件以将所有文件合并到 R 中的数据框中。
Sub Convert_Excel_To_CSV()
Dim MyPath As String, FilesInPath As String
Dim MyFiles() As String, Fnum As Long
Dim mybook As Workbook
Dim CalcMode As Long
Dim sh As Worksheet
Dim ErrorYes As Boolean
Dim LPosition As Integer
'Fill in the path\folder where the Excel files are
MyPath = "C:\Users\Ryan\Desktop\Excel_Files\"
FilesInPath = Dir(MyPath & "*.xlsx*")
If FilesInPath = "" Then
MsgBox "No files found"
Exit Sub
End If
Fnum = 0
Do While FilesInPath <> ""
Fnum = Fnum + 1
ReDim Preserve MyFiles(1 To Fnum)
MyFiles(Fnum) = FilesInPath
FilesInPath = Dir()
Loop
With Application
CalcMode = .Calculation
.Calculation = xlCalculationManual
.ScreenUpdating = False
.EnableEvents = False
End With
If Fnum > 0 Then
For Fnum = LBound(MyFiles) To UBound(MyFiles)
Set mybook = Nothing
On Error Resume Next
Set mybook = Workbooks.Open(MyPath & MyFiles(Fnum))
On Error GoTo 0
If Not mybook Is Nothing Then
LPosition = InStr(1, mybook.Name, ".") - 1
mybookname = Left(mybook.Name, LPosition)
mybook.Activate
'All XLSX Files get saved in the directory below:
ActiveWorkbook.SaveAs Filename:="C:\your_path_here\" & mybookname & ".csv" _
, FileFormat:=xlCSVMSDOS, _
CreateBackup:=False
End If
mybook.Close SaveChanges:=False
Next Fnum
End If
If ErrorYes = True Then
MsgBox "There are problems in one or more files, possible problem:" _
& vbNewLine & "protected workbook/sheet or a sheet/range that not exist"
End If
With Application
.ScreenUpdating = True
.EnableEvents = True
.Calculation = CalcMode
End With
End Sub
setwd("C:/your_path")
fnames <- list.files()
csv <- lapply(fnames, read.csv)
result <- do.call(rbind, csv)
******** ******** ******** ******** ******** ******** ******** ********
filedir <- setwd("C:/your_path")
file_names <- dir(filedir)
your_data_frame <- do.call(rbind,lapply(file_names,read.csv))
******** ******** ******** ******** ******** ******** ******** ********
filedir <- setwd("C:/your_path")
file_names <- dir(filedir)
your_data_frame <- do.call(rbind, lapply(file_names, read.csv, skip = 1, header = FALSE))
******** ******** ******** ******** ******** ******** ******** ********
filedir <- setwd("C:/your_path")
file_names <- dir(filedir)
your_data_frame <- do.call(rbind, lapply(file_names, read.csv, header = FALSE))
******** ******** ******** ******** ******** ******** ******** ********
#
temp <- setwd("C:/your_path")
temp = list.files(pattern="*.csv")
myfiles = lapply(temp, read.delim)