将文本文档拆分为 excel sheet xls
Splitting a text document into an excel sheet xls
我目前正在尝试 export/convert 我拥有的文本文档到 .xls 文件中。因此,根据我的发现,我能够创建一个 xls,但现在我只需要从文本文档中获取正确的 xls 格式。
这是我正在尝试做的一个例子。
假设我有以下文本文档:numbers.txt
|<DOg>|
|Data1 = 300 |
|Data2 = 200 |
|Data3 = 15 |
|Data4 = 14 |
|Data5 = 4 |
|<DOg>|
|Data1 = 800 |
|Data2 = 500 |
|Data3 = 25 |
|Data4 = 10 |
|Data5 = 5 |
如果我 运行 我的代码使用 |
作为分隔符,我会收到它作为 .xls 文件
如您所见,格式已关闭。
我想要实现的目标是以下格式。
我目前使用的代码如下:
mypath = raw_input("Please enter the directory path for the input files: ")
from os import listdir
from os.path import isfile, join
textfiles = [ join(mypath,f) for f in listdir(mypath) if isfile(join(mypath,f)) and '.txt' in f]
def is_number(s):
try:
float(s)
return True
except ValueError:
return False
import xlwt
import xlrd
style = xlwt.XFStyle()
style.num_format_str = '#,###0.00'
for textfile in textfiles:
f = open(textfile, 'r+')
row_list = []
for row in f:
row_list.append(row.split('|'))
column_list = zip(*row_list)
# for column_list in f:
# column_list.append(column.split('|'))
workbook = xlwt.Workbook()
worksheet = workbook.add_sheet('Sheet1')
i = 0
for column in column_list:
for item in range(len(column)):
value = column[item].strip()
if is_number(value):
worksheet.write(item, i, float(value), style=style)
else:
worksheet.write(item, i, value)
i+=1
workbook.save(textfile.replace('.txt', '.xls'))
我的想法是对列使用 .split()
方法,但是我不确定如何正确实施,因为当我对列使用 split
时,每一行最终都是它自己的列。
如果我没看错问题,我猜你可以将它转换为逗号分隔格式,然后将其用作 csv 文件。
>>> for i in f.readlines():
... print i
...
|Data1 = 300 |
|Data2 = 200 |
|Data3 = 15 |
|Data4 = 14 |
|Data5 = 4 |
|<DOg>|
|Data1 = 800 |
|Data2 = 500 |
|Data3 = 25 |
|Data4 = 10 |
>>> f.seek(0)
for i in f.readlines():
... if "=" in i:
... "".join(",".join(i.split("=")).split("|")).strip()
'Data1 , 300'
'Data2 , 200'
'Data3 , 15'
'Data4 , 14'
'Data5 , 4'
'Data1 , 800'
您可以修改脚本以将其写入另一个文件,并可能将其格式化为完美的 csv 文件。
def convert_for_excel(data):
import re
with open(data, 'r') as f:
st = ' '.join(f.readlines())
li = [x for x in re.split(r'\s*\|',st) if x]
# find <DOg> indices
ind_of_dog = [i for i, x in enumerate(li) if x == '<DOg>' ]
# break the list into sublists by indices of <DOg>
all_lines = [ li[i:j] for i, j in zip([0]+ind_of_dog, ind_of_dog+[None]) if li[i:j]]
# zip sublists to make tuples
# join tuples to make Excel ready strings
excel_ready = [','.join(t) for t in list(zip(*all_lines)) ]
return excel_ready
pprint.pprint(convert_for_excel('data'))
['<DOg>,<DOg>',
'Data1 = 300,Data1 = 800',
'Data2 = 200,Data2 = 500',
'Data3 = 15,Data3 = 25',
'Data4 = 14,Data4 = 10',
'Data5 = 4,Data5 = 5']
您的列似乎没有限制。您需要将所有结果捕获到一个数组中并按如下方式转置它们:
import re
# Strip all spaces and dump all data into an array
lines = [mo for mo in re.findall('(?s)(?<=\|)([<\w].+?)\s+?\|', open('py.txt').read())]
# Create an array to hold the transformation
combined = ['' for x in range(len(lines) / lines.count("<DOg>|"))]
# Append by rows
for idx in range(len(lines)):
combined[idx % len(combined)] += lines[idx] + ','
# Write array to file
output = open('numbersConverted.csv','w')
for comb in combined:
output.write(comb + "\n")
output.close
这会将您的结果转储到 numbersConverted.csv 准备导入。
我目前正在尝试 export/convert 我拥有的文本文档到 .xls 文件中。因此,根据我的发现,我能够创建一个 xls,但现在我只需要从文本文档中获取正确的 xls 格式。
这是我正在尝试做的一个例子。
假设我有以下文本文档:numbers.txt
|<DOg>|
|Data1 = 300 |
|Data2 = 200 |
|Data3 = 15 |
|Data4 = 14 |
|Data5 = 4 |
|<DOg>|
|Data1 = 800 |
|Data2 = 500 |
|Data3 = 25 |
|Data4 = 10 |
|Data5 = 5 |
如果我 运行 我的代码使用 |
作为分隔符,我会收到它作为 .xls 文件
如您所见,格式已关闭。
我想要实现的目标是以下格式。
我目前使用的代码如下:
mypath = raw_input("Please enter the directory path for the input files: ")
from os import listdir
from os.path import isfile, join
textfiles = [ join(mypath,f) for f in listdir(mypath) if isfile(join(mypath,f)) and '.txt' in f]
def is_number(s):
try:
float(s)
return True
except ValueError:
return False
import xlwt
import xlrd
style = xlwt.XFStyle()
style.num_format_str = '#,###0.00'
for textfile in textfiles:
f = open(textfile, 'r+')
row_list = []
for row in f:
row_list.append(row.split('|'))
column_list = zip(*row_list)
# for column_list in f:
# column_list.append(column.split('|'))
workbook = xlwt.Workbook()
worksheet = workbook.add_sheet('Sheet1')
i = 0
for column in column_list:
for item in range(len(column)):
value = column[item].strip()
if is_number(value):
worksheet.write(item, i, float(value), style=style)
else:
worksheet.write(item, i, value)
i+=1
workbook.save(textfile.replace('.txt', '.xls'))
我的想法是对列使用 .split()
方法,但是我不确定如何正确实施,因为当我对列使用 split
时,每一行最终都是它自己的列。
如果我没看错问题,我猜你可以将它转换为逗号分隔格式,然后将其用作 csv 文件。
>>> for i in f.readlines():
... print i
...
|Data1 = 300 |
|Data2 = 200 |
|Data3 = 15 |
|Data4 = 14 |
|Data5 = 4 |
|<DOg>|
|Data1 = 800 |
|Data2 = 500 |
|Data3 = 25 |
|Data4 = 10 |
>>> f.seek(0)
for i in f.readlines():
... if "=" in i:
... "".join(",".join(i.split("=")).split("|")).strip()
'Data1 , 300'
'Data2 , 200'
'Data3 , 15'
'Data4 , 14'
'Data5 , 4'
'Data1 , 800'
您可以修改脚本以将其写入另一个文件,并可能将其格式化为完美的 csv 文件。
def convert_for_excel(data):
import re
with open(data, 'r') as f:
st = ' '.join(f.readlines())
li = [x for x in re.split(r'\s*\|',st) if x]
# find <DOg> indices
ind_of_dog = [i for i, x in enumerate(li) if x == '<DOg>' ]
# break the list into sublists by indices of <DOg>
all_lines = [ li[i:j] for i, j in zip([0]+ind_of_dog, ind_of_dog+[None]) if li[i:j]]
# zip sublists to make tuples
# join tuples to make Excel ready strings
excel_ready = [','.join(t) for t in list(zip(*all_lines)) ]
return excel_ready
pprint.pprint(convert_for_excel('data'))
['<DOg>,<DOg>',
'Data1 = 300,Data1 = 800',
'Data2 = 200,Data2 = 500',
'Data3 = 15,Data3 = 25',
'Data4 = 14,Data4 = 10',
'Data5 = 4,Data5 = 5']
您的列似乎没有限制。您需要将所有结果捕获到一个数组中并按如下方式转置它们:
import re
# Strip all spaces and dump all data into an array
lines = [mo for mo in re.findall('(?s)(?<=\|)([<\w].+?)\s+?\|', open('py.txt').read())]
# Create an array to hold the transformation
combined = ['' for x in range(len(lines) / lines.count("<DOg>|"))]
# Append by rows
for idx in range(len(lines)):
combined[idx % len(combined)] += lines[idx] + ','
# Write array to file
output = open('numbersConverted.csv','w')
for comb in combined:
output.write(comb + "\n")
output.close
这会将您的结果转储到 numbersConverted.csv 准备导入。