将 BCP 文件导出为 CSV 格式
Export a BCP file into CSV format
我想从中提取数据
Data Charity Register Extract 并将其导出为 Excel 文件 (.csv)。
此人在 Github 中发布了他的代码,使用 import.py file
这是我的做法:有两种方式:
第一种方式:
我从上面的link下载文件:RegPlusExtract_November_2015.zip放在C:\Python27(我也安装了Python)
我在 IDLE 中打开下面的代码文件 (import.py) 和之后的代码文件 (bcp.py) 和 运行 import.py (使用 F5 ).我把这两个.py文件放在C:\Python27
#!/usr/bin/env python
import bcp
import zipfile
import sys
cc_files = {
"extract_acct_submit": [
"regno",
"submit_date",
"arno",
"fyend"
],
"extract_aoo_ref": [
"aootype",
"aookey",
"aooname",
"aoosort",
"welsh",
"master",
"code"
],
"extract_ar_submit": [
"regno",
"arno",
"submit_date"
],
"extract_charity": [
"regno",
"subno",
"name",
"orgtype",
"gd",
"aob",
"aob_defined",
"nhs",
"ha_no",
"corr",
"add1",
"add2",
"add3",
"add4",
"add5",
"postcode",
"phone",
"fax",
],
"extract_charity_aoo": [
"regno",
"aootype",
"aookey",
"welsh",
"master"
],
"extract_class": [
"regno",
"class"
],
"extract_class_ref": [
"classno",
"classtext",
],
"extract_financial": [
"regno",
"fystart",
"fyend",
"income",
"expend"
],
"extract_main_charity": [
"regno",
"coyno",
"trustees",
"fyend",
"welsh",
"incomedate",
"income",
"grouptype",
"email",
"web"
],
"extract_name": [
"regno",
"subno",
"nameno",
"name"
],
"extract_objects": [
"regno",
"subno",
"seqno",
"object"
],
"extract_partb": [
"regno",
"artype",
"fystart",
"fyend",
"inc_leg",
"inc_end",
"inc_vol",
"inc_fr",
"inc_char",
"inc_invest",
"inc_other",
"inc_total",
"invest_gain",
"asset_gain",
"pension_gain",
"exp_vol",
"exp_trade",
"exp_invest",
"exp_grant",
"exp_charble",
"exp_gov",
"exp_other",
"exp_total",
"exp_support",
"exp_dep",
"reserves",
"asset_open",
"asset_close",
"fixed_assets",
"open_assets",
"invest_assets",
"cash_assets",
"current_assets",
"credit_1",
"credit_long",
"pension_assets",
"total_assets",
"funds_end",
"funds_restrict",
"funds_unrestrict",
"funds_total",
"employees",
"volunteers",
"cons_acc",
"charity_acc"
],
"extract_registration": [
"regno",
"subno",
"regdate",
"remdate",
"remcode"
],
"extract_remove_ref": [
"code",
"text"
],
"extract_trustee": [
"regno",
"trustee"
]
}
def import_zip(zip_file):
zf = zipfile.ZipFile(zip_file, 'r')
print 'Opened zip file: %s' % zip_file
for filename in cc_files:
try:
bcp_filename = filename + '.bcp'
csv_filename = filename + '.csv'
bcpdata = zf.read(bcp_filename)
bcp.convert(bcpdata, csvfilename=csv_filename, col_headers=cc_files[filename])
print 'Converted: %s' % bcp_filename
except KeyError:
print 'ERROR: Did not find %s in zip file' % bcp_filename
def main():
zip_file = sys.argv[1]
import_zip(zip_file)
if __name__ == '__main__':
main()
#!/usr/bin/env python
import sys
import csv
def convert(bcpdata, csvfilename="", lineterminator='*@@*', delimiter='@**@', quote='"', newdelimiter=',', col_headers=None, escapechar='\', newline='\n'):
bcpdata = bcpdata.replace(escapechar, escapechar + escapechar)
bcpdata = bcpdata.replace(quote, escapechar + quote)
bcpdata = bcpdata.replace(delimiter, quote + newdelimiter + quote)
bcpdata = bcpdata.replace(lineterminator, quote + newline + quote)
if csvfilename=="":
csvfilename = 'converted.csv'
with open(csvfilename, 'wb') as csvfile:
if(col_headers):
writer = csv.writer(csvfile)
writer.writerow(col_headers)
csvfile.write('"')
csvfile.write(bcpdata)
csvfile.write('"')
def main():
bcp_filename = sys.argv[1]
try:
csv_filename = sys.argv[2]
except IndexError:
csv_filename = bcp_filename.replace('.bcp', '.csv')
with open(bcp_filename, 'rb') as bcpfile:
bcpdata = bcpfile.read()
convert(bcpdata, csv_filename)
if __name__ == '__main__':
main()
这个错误让我震惊:
>>> ================================ RESTART ================================
>>>
Traceback (most recent call last):
File "C:\Python27\bcp.py", line 31, in <module>
main()
File "C:\Python27\bcp.py", line 21, in main
bcp_filename = sys.argv[1]
IndexError: list index out of range
>>> ================================ RESTART ================================
>>>
Traceback (most recent call last):
File "C:\Python27\import.py", line 175, in <module>
main()
File "C:\Python27\import.py", line 171, in main
zip_file = sys.argv[1]
IndexError: list index out of range
>>>
谁能指出哪里出错了?
第二种方式:
然后我尝试使用 Windows 中的命令提示符来 运行 文件:
首先,我将路径设置为我保存所有文件的位置 (C:\python27)
然后我在命令提示符运行
python import RegPlusExtract_November_2015.zip
给我的印象是:错误:
File"<stdin>", line 1
python import RegPlusExtract_November_2015.zip
任何人都可以指出我哪里错了,或者告诉我如何从上面的数据 link 中提取 csv 文件。
当您从 IDLE 运行 脚本时,您不能在 sys.argv
中传递参数。所以在那个用例中,错误是正常的。但是在F5出现错误后,你应该可以直接调用:
zip_file = 'RegPlusExtract_November_2015.zip'
import_zip(zip_file)
它应该允许您处理数据。
对于命令提示符中的第二种方式,您必须给出脚本文件的确切名称。命令应该是:
python import.py RegPlusExtract_November_2015.zip
但是无论如何,将您自己的脚本和其他数据文件放在Python 目录中是不好的。 C:\Python27
应该只包含来自初始 Python 发行版和其他通用实用程序的文件, 而不是 您的本地脚本。
通常的方法是将 c:\Python
添加到您的 PATH 环境,并为您的 Data Charity 处理使用专用目录
我想从中提取数据 Data Charity Register Extract 并将其导出为 Excel 文件 (.csv)。
此人在 Github 中发布了他的代码,使用 import.py file
这是我的做法:有两种方式:
第一种方式:
我从上面的link下载文件:RegPlusExtract_November_2015.zip放在C:\Python27(我也安装了Python)
我在 IDLE 中打开下面的代码文件 (import.py) 和之后的代码文件 (bcp.py) 和 运行 import.py (使用 F5 ).我把这两个.py文件放在C:\Python27
#!/usr/bin/env python
import bcp
import zipfile
import sys
cc_files = {
"extract_acct_submit": [
"regno",
"submit_date",
"arno",
"fyend"
],
"extract_aoo_ref": [
"aootype",
"aookey",
"aooname",
"aoosort",
"welsh",
"master",
"code"
],
"extract_ar_submit": [
"regno",
"arno",
"submit_date"
],
"extract_charity": [
"regno",
"subno",
"name",
"orgtype",
"gd",
"aob",
"aob_defined",
"nhs",
"ha_no",
"corr",
"add1",
"add2",
"add3",
"add4",
"add5",
"postcode",
"phone",
"fax",
],
"extract_charity_aoo": [
"regno",
"aootype",
"aookey",
"welsh",
"master"
],
"extract_class": [
"regno",
"class"
],
"extract_class_ref": [
"classno",
"classtext",
],
"extract_financial": [
"regno",
"fystart",
"fyend",
"income",
"expend"
],
"extract_main_charity": [
"regno",
"coyno",
"trustees",
"fyend",
"welsh",
"incomedate",
"income",
"grouptype",
"email",
"web"
],
"extract_name": [
"regno",
"subno",
"nameno",
"name"
],
"extract_objects": [
"regno",
"subno",
"seqno",
"object"
],
"extract_partb": [
"regno",
"artype",
"fystart",
"fyend",
"inc_leg",
"inc_end",
"inc_vol",
"inc_fr",
"inc_char",
"inc_invest",
"inc_other",
"inc_total",
"invest_gain",
"asset_gain",
"pension_gain",
"exp_vol",
"exp_trade",
"exp_invest",
"exp_grant",
"exp_charble",
"exp_gov",
"exp_other",
"exp_total",
"exp_support",
"exp_dep",
"reserves",
"asset_open",
"asset_close",
"fixed_assets",
"open_assets",
"invest_assets",
"cash_assets",
"current_assets",
"credit_1",
"credit_long",
"pension_assets",
"total_assets",
"funds_end",
"funds_restrict",
"funds_unrestrict",
"funds_total",
"employees",
"volunteers",
"cons_acc",
"charity_acc"
],
"extract_registration": [
"regno",
"subno",
"regdate",
"remdate",
"remcode"
],
"extract_remove_ref": [
"code",
"text"
],
"extract_trustee": [
"regno",
"trustee"
]
}
def import_zip(zip_file):
zf = zipfile.ZipFile(zip_file, 'r')
print 'Opened zip file: %s' % zip_file
for filename in cc_files:
try:
bcp_filename = filename + '.bcp'
csv_filename = filename + '.csv'
bcpdata = zf.read(bcp_filename)
bcp.convert(bcpdata, csvfilename=csv_filename, col_headers=cc_files[filename])
print 'Converted: %s' % bcp_filename
except KeyError:
print 'ERROR: Did not find %s in zip file' % bcp_filename
def main():
zip_file = sys.argv[1]
import_zip(zip_file)
if __name__ == '__main__':
main()
#!/usr/bin/env python
import sys
import csv
def convert(bcpdata, csvfilename="", lineterminator='*@@*', delimiter='@**@', quote='"', newdelimiter=',', col_headers=None, escapechar='\', newline='\n'):
bcpdata = bcpdata.replace(escapechar, escapechar + escapechar)
bcpdata = bcpdata.replace(quote, escapechar + quote)
bcpdata = bcpdata.replace(delimiter, quote + newdelimiter + quote)
bcpdata = bcpdata.replace(lineterminator, quote + newline + quote)
if csvfilename=="":
csvfilename = 'converted.csv'
with open(csvfilename, 'wb') as csvfile:
if(col_headers):
writer = csv.writer(csvfile)
writer.writerow(col_headers)
csvfile.write('"')
csvfile.write(bcpdata)
csvfile.write('"')
def main():
bcp_filename = sys.argv[1]
try:
csv_filename = sys.argv[2]
except IndexError:
csv_filename = bcp_filename.replace('.bcp', '.csv')
with open(bcp_filename, 'rb') as bcpfile:
bcpdata = bcpfile.read()
convert(bcpdata, csv_filename)
if __name__ == '__main__':
main()
这个错误让我震惊:
>>> ================================ RESTART ================================
>>>
Traceback (most recent call last):
File "C:\Python27\bcp.py", line 31, in <module>
main()
File "C:\Python27\bcp.py", line 21, in main
bcp_filename = sys.argv[1]
IndexError: list index out of range
>>> ================================ RESTART ================================
>>>
Traceback (most recent call last):
File "C:\Python27\import.py", line 175, in <module>
main()
File "C:\Python27\import.py", line 171, in main
zip_file = sys.argv[1]
IndexError: list index out of range
>>>
谁能指出哪里出错了?
第二种方式:
然后我尝试使用 Windows 中的命令提示符来 运行 文件: 首先,我将路径设置为我保存所有文件的位置 (C:\python27) 然后我在命令提示符运行
python import RegPlusExtract_November_2015.zip
给我的印象是:错误:
File"<stdin>", line 1
python import RegPlusExtract_November_2015.zip
任何人都可以指出我哪里错了,或者告诉我如何从上面的数据 link 中提取 csv 文件。
当您从 IDLE 运行 脚本时,您不能在 sys.argv
中传递参数。所以在那个用例中,错误是正常的。但是在F5出现错误后,你应该可以直接调用:
zip_file = 'RegPlusExtract_November_2015.zip'
import_zip(zip_file)
它应该允许您处理数据。
对于命令提示符中的第二种方式,您必须给出脚本文件的确切名称。命令应该是:
python import.py RegPlusExtract_November_2015.zip
但是无论如何,将您自己的脚本和其他数据文件放在Python 目录中是不好的。 C:\Python27
应该只包含来自初始 Python 发行版和其他通用实用程序的文件, 而不是 您的本地脚本。
通常的方法是将 c:\Python
添加到您的 PATH 环境,并为您的 Data Charity 处理使用专用目录