如何使用 python 将多个 excel 文件转换为 CSV utf-8 编码
How to convert multiple excel files to CSV utf-8 encoding using python
我在同一目录中有 30 多个 xlsx
文件并使用 python
我想将所有文件转换为使用 utf-8 编码的 csv,无论文件中存在何种编码。我正在使用 python 的魔法库来获取文件名(在代码下方)。为了转换,我尝试了 SO 用户 Julian here (I used the code posted here) 提到的代码,但代码抛出错误 "InvalidFileException: openpyxl does not support file format, please check you can open it with Excel first. Supported formats are: .xlsx,.xlsm,.xltx,.xltm
。下面是抛出 error.The 第二个问题的代码是基于我有限的 python
知识,我相信代码适用于一个 excel 文件。我应该如何让它适用于多个文件?
在此先感谢您的帮助!
# import a library to detect encodings
import magic
import glob
print("File".ljust(45), "Encoding")
for filename in glob.glob('path*.xlsx'):
with open(filename, 'rb') as rawdata:
result = magic.from_buffer(rawdata.read(2048))
print(filename.ljust(45), result)
来自 SO 用户的代码抛出错误 github link 提到 here
from openpyxl import load_workbook
import csv
from os import sys
def get_all_sheets(excel_file):
sheets = []
workbook = load_workbook(excel_file,read_only=True,data_only=True)
all_worksheets = workbook.get_sheet_names()
for worksheet_name in all_worksheets:
sheets.append(worksheet_name)
return sheets
def csv_from_excel(excel_file, sheets):
workbook = load_workbook(excel_file,data_only=True)
for worksheet_name in sheets:
print("Export " + worksheet_name + " ...")
try:
worksheet = workbook.get_sheet_by_name(worksheet_name)
except KeyError:
print("Could not find " + worksheet_name)
sys.exit(1)
your_csv_file = open(''.join([worksheet_name,'.csv']), 'wb')
wr = csv.writer(your_csv_file, quoting=csv.QUOTE_ALL)
for row in worksheet.iter_rows():
lrow = []
for cell in row:
lrow.append(cell.value)
wr.writerow(lrow)
print(" ... done")
your_csv_file.close()
if not 2 <= len(sys.argv) <= 3:
print("Call with " + sys.argv[0] + " <xlxs file> [comma separated list of sheets to export]")
sys.exit(1)
else:
sheets = []
if len(sys.argv) == 3:
sheets = list(sys.argv[2].split(','))
else:
sheets = get_all_sheets(sys.argv[1])
assert(sheets != None and len(sheets
) > 0)
csv_from_excel(sys.argv[1], sheets)
您是否尝试过使用 Pandas
库?您可以使用 os
将所有文件存储在列表中。然后您可以遍历列表并使用 read_excel
打开每个 Excel
文件,然后写入 csv
。所以它看起来像这样:
"""Code to read excel workbooks and output each sheet as a csv"""
""""with utf-8 encoding"""
#Declare a file path where you will store all your excel workbook. You
#can update the file path for the ExcelPath variable
#Declare a file path where you will store all your csv output. You can
#update the file path for the CsvPath variable
import pandas as pd
import os
ExcelPath = "C:/ExcelPath" #Store path for your excel workbooks
CsvPath = "C:/CsvPath" #Store path for you csv outputs
fileList = [f for f in os.listdir(ExcelPath)]
for file in fileList:
xls = pd.ExcelFile(ExcelPath+'/'+file)
sheets = xls.sheet_names #Get the names of each and loop to create
#individual csv files
for sheet in sheets:
fileNameCSV = str(file)[:-5]+'.'+str(sheet) #declare the csv
#filename which will be excelWorkbook + SheetName
df = pd.read_excel(ExcelPath+'/'+file, sheet_name = sheet)
os.chdir(CsvPath)
df.to_csv("{}.csv".format(fileNameCSV), encoding="utf-8")
不是最好的,但应该能满足您的需求
首先,第一个错误很明显:
InvalidFileException: openpyxl does not support file format, please check you can open it with Excel first.
Excel是否成功打开了这个文件?如果是,我们需要工作簿(或其中的一小部分)。
第二题答案:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# vi:ts=4:et
"""I test to open multiple files."""
import csv
from pathlib import Path
from openpyxl import load_workbook
# find all *.xlsx files into current directory
# and iterate over it
for file in Path('.').glob('*.xlsx'):
# read the Excel file
wb = load_workbook(file)
# small test (optional)
print(file, wb.active.title)
# export all sheets to CSV
for sheetname in wb.sheetnames:
# Write to utf-8 encoded file with BOM signature
with open(f'{file.stem}-{sheetname}.csv', 'w',
encoding="utf-8-sig") as csvfile:
# Write to CSV
spamwriter = csv.writer(csvfile)
# Iterate over rows in sheet
for row in wb[sheetname].rows:
# Write a row
spamwriter.writerow([cell.value for cell in row])
您也可以明确指定 dialect of csv as csv.writer 参数。
我在同一目录中有 30 多个 xlsx
文件并使用 python
我想将所有文件转换为使用 utf-8 编码的 csv,无论文件中存在何种编码。我正在使用 python 的魔法库来获取文件名(在代码下方)。为了转换,我尝试了 SO 用户 Julian here (I used the code posted here) 提到的代码,但代码抛出错误 "InvalidFileException: openpyxl does not support file format, please check you can open it with Excel first. Supported formats are: .xlsx,.xlsm,.xltx,.xltm
。下面是抛出 error.The 第二个问题的代码是基于我有限的 python
知识,我相信代码适用于一个 excel 文件。我应该如何让它适用于多个文件?
在此先感谢您的帮助!
# import a library to detect encodings
import magic
import glob
print("File".ljust(45), "Encoding")
for filename in glob.glob('path*.xlsx'):
with open(filename, 'rb') as rawdata:
result = magic.from_buffer(rawdata.read(2048))
print(filename.ljust(45), result)
来自 SO 用户的代码抛出错误 github link 提到 here
from openpyxl import load_workbook
import csv
from os import sys
def get_all_sheets(excel_file):
sheets = []
workbook = load_workbook(excel_file,read_only=True,data_only=True)
all_worksheets = workbook.get_sheet_names()
for worksheet_name in all_worksheets:
sheets.append(worksheet_name)
return sheets
def csv_from_excel(excel_file, sheets):
workbook = load_workbook(excel_file,data_only=True)
for worksheet_name in sheets:
print("Export " + worksheet_name + " ...")
try:
worksheet = workbook.get_sheet_by_name(worksheet_name)
except KeyError:
print("Could not find " + worksheet_name)
sys.exit(1)
your_csv_file = open(''.join([worksheet_name,'.csv']), 'wb')
wr = csv.writer(your_csv_file, quoting=csv.QUOTE_ALL)
for row in worksheet.iter_rows():
lrow = []
for cell in row:
lrow.append(cell.value)
wr.writerow(lrow)
print(" ... done")
your_csv_file.close()
if not 2 <= len(sys.argv) <= 3:
print("Call with " + sys.argv[0] + " <xlxs file> [comma separated list of sheets to export]")
sys.exit(1)
else:
sheets = []
if len(sys.argv) == 3:
sheets = list(sys.argv[2].split(','))
else:
sheets = get_all_sheets(sys.argv[1])
assert(sheets != None and len(sheets
) > 0)
csv_from_excel(sys.argv[1], sheets)
您是否尝试过使用 Pandas
库?您可以使用 os
将所有文件存储在列表中。然后您可以遍历列表并使用 read_excel
打开每个 Excel
文件,然后写入 csv
。所以它看起来像这样:
"""Code to read excel workbooks and output each sheet as a csv"""
""""with utf-8 encoding"""
#Declare a file path where you will store all your excel workbook. You
#can update the file path for the ExcelPath variable
#Declare a file path where you will store all your csv output. You can
#update the file path for the CsvPath variable
import pandas as pd
import os
ExcelPath = "C:/ExcelPath" #Store path for your excel workbooks
CsvPath = "C:/CsvPath" #Store path for you csv outputs
fileList = [f for f in os.listdir(ExcelPath)]
for file in fileList:
xls = pd.ExcelFile(ExcelPath+'/'+file)
sheets = xls.sheet_names #Get the names of each and loop to create
#individual csv files
for sheet in sheets:
fileNameCSV = str(file)[:-5]+'.'+str(sheet) #declare the csv
#filename which will be excelWorkbook + SheetName
df = pd.read_excel(ExcelPath+'/'+file, sheet_name = sheet)
os.chdir(CsvPath)
df.to_csv("{}.csv".format(fileNameCSV), encoding="utf-8")
不是最好的,但应该能满足您的需求
首先,第一个错误很明显:
InvalidFileException: openpyxl does not support file format, please check you can open it with Excel first.
Excel是否成功打开了这个文件?如果是,我们需要工作簿(或其中的一小部分)。
第二题答案:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# vi:ts=4:et
"""I test to open multiple files."""
import csv
from pathlib import Path
from openpyxl import load_workbook
# find all *.xlsx files into current directory
# and iterate over it
for file in Path('.').glob('*.xlsx'):
# read the Excel file
wb = load_workbook(file)
# small test (optional)
print(file, wb.active.title)
# export all sheets to CSV
for sheetname in wb.sheetnames:
# Write to utf-8 encoded file with BOM signature
with open(f'{file.stem}-{sheetname}.csv', 'w',
encoding="utf-8-sig") as csvfile:
# Write to CSV
spamwriter = csv.writer(csvfile)
# Iterate over rows in sheet
for row in wb[sheetname].rows:
# Write a row
spamwriter.writerow([cell.value for cell in row])
您也可以明确指定 dialect of csv as csv.writer 参数。