无法从 .csv 文件 pyexcel 中删除行
Unable to delete rows from .csv file pyexcel
这是我的函数:
def prepare_file(time, mkt):
# renames file to corresponding market name
global previous_time
for file in glob.glob(os.getcwd()+'\Reports\*'):
# if it's the most recently downloaded file
if time > previous_time:
previous_time = time
# remove rows for properties that have not changed status
sheet = pyexcel.get_sheet(file_name=file)
for row in sheet:
if row[1] in changed_addresses:
pass
else:
del row
# save file as correct name
sheet.save_as(
os.getcwd() + '\Reports\' + mkt[0] + '.csv'
)
os.remove(file)
思路是在一个目录中找到最近下载的文件,打开它,从changed_addresses
列表中删除所有不包含地址的行,并将其保存为包含在mkt
列表。
除了删除行外,一切正常。它正确地遍历它们,并理解何时应该删除一行,但输出的文件仍然包含所有应该删除的行。
del row
不是这种情况下的正确命令吗?
使用 csv
我认为这应该可行:
import csv
import os
import glob
def prepare_file(time, mkt):
# renames file to corresponding market name
global previous_time
for file in glob.glob(os.getcwd()+'\Reports\*'):
# if it's the most recently downloaded file
if time > previous_time:
previous_time = time
# remove rows for properties that have not changed status
fin = open(file, 'r')
fout = open((os.getcwd() + '\Reports\' + mkt[0] + '.csv'), 'w', newline='')
reader = csv.reader(fin)
writer = csv.writer(fout)
for row in reader:
if row[1] not in changed_addresses:
writer.writerow(row)
# close files
fin.close()
fout.close()
# remove original
os.remove(file)
所以首先打开名称为 file
的数据文件,然后用新名称保存它。
和pyexcel,你需要使用这个语法:
del sheet.row[index] or del sheet.row[index1, index2, index3]
示例代码如下:
def prepare_file(time, mkt):
# renames file to corresponding market name
global previous_time
for file in glob.glob(os.getcwd()+'\Reports\*'):
# if it's the most recently downloaded file
if time > previous_time:
previous_time = time
# remove rows for properties that have not changed status
sheet = pyexcel.get_sheet(file_name=file)
indices_to_be_removed = [] # <-
for index, row in enumerate(sheet):
if row[1] in changed_addresses:
pass
else:
indices_to_be_removed # <-
# save file as correct name
del sheet.row[indices_to_be_removed] # <-
sheet.save_as(
os.getcwd() + '\Reports\' + mkt[0] + '.csv'
)
os.remove(file)
或者,您可以编写一个过滤器,该替代方法的优点是它可以处理具有自定义内存占用的巨大数据文件:
def filter(file_name, changed_addresses):
for row in pyexcel.iget_array(file_name=file_name):
if row[1] in changed_addresses:
yield row
def prepare_file(time, mkt):
# renames file to corresponding market name
global previous_time
for file in glob.glob(os.getcwd()+'\Reports\*'):
# if it's the most recently downloaded file
if time > previous_time:
previous_time = time
# remove rows for properties that have not changed status
pyexcel.isave_as(array=filter(file, changed_addresses),
dest_file_name=os.getcwd() + '\Reports\' + mkt[0] + '.csv')
os.remove(file)
但请记得在代码末尾调用。它将关闭所有 csv 文件句柄。
pyexcel.free_resources()
这是我的函数:
def prepare_file(time, mkt):
# renames file to corresponding market name
global previous_time
for file in glob.glob(os.getcwd()+'\Reports\*'):
# if it's the most recently downloaded file
if time > previous_time:
previous_time = time
# remove rows for properties that have not changed status
sheet = pyexcel.get_sheet(file_name=file)
for row in sheet:
if row[1] in changed_addresses:
pass
else:
del row
# save file as correct name
sheet.save_as(
os.getcwd() + '\Reports\' + mkt[0] + '.csv'
)
os.remove(file)
思路是在一个目录中找到最近下载的文件,打开它,从changed_addresses
列表中删除所有不包含地址的行,并将其保存为包含在mkt
列表。
除了删除行外,一切正常。它正确地遍历它们,并理解何时应该删除一行,但输出的文件仍然包含所有应该删除的行。
del row
不是这种情况下的正确命令吗?
使用 csv
我认为这应该可行:
import csv
import os
import glob
def prepare_file(time, mkt):
# renames file to corresponding market name
global previous_time
for file in glob.glob(os.getcwd()+'\Reports\*'):
# if it's the most recently downloaded file
if time > previous_time:
previous_time = time
# remove rows for properties that have not changed status
fin = open(file, 'r')
fout = open((os.getcwd() + '\Reports\' + mkt[0] + '.csv'), 'w', newline='')
reader = csv.reader(fin)
writer = csv.writer(fout)
for row in reader:
if row[1] not in changed_addresses:
writer.writerow(row)
# close files
fin.close()
fout.close()
# remove original
os.remove(file)
所以首先打开名称为 file
的数据文件,然后用新名称保存它。
和pyexcel,你需要使用这个语法:
del sheet.row[index] or del sheet.row[index1, index2, index3]
示例代码如下:
def prepare_file(time, mkt):
# renames file to corresponding market name
global previous_time
for file in glob.glob(os.getcwd()+'\Reports\*'):
# if it's the most recently downloaded file
if time > previous_time:
previous_time = time
# remove rows for properties that have not changed status
sheet = pyexcel.get_sheet(file_name=file)
indices_to_be_removed = [] # <-
for index, row in enumerate(sheet):
if row[1] in changed_addresses:
pass
else:
indices_to_be_removed # <-
# save file as correct name
del sheet.row[indices_to_be_removed] # <-
sheet.save_as(
os.getcwd() + '\Reports\' + mkt[0] + '.csv'
)
os.remove(file)
或者,您可以编写一个过滤器,该替代方法的优点是它可以处理具有自定义内存占用的巨大数据文件:
def filter(file_name, changed_addresses):
for row in pyexcel.iget_array(file_name=file_name):
if row[1] in changed_addresses:
yield row
def prepare_file(time, mkt):
# renames file to corresponding market name
global previous_time
for file in glob.glob(os.getcwd()+'\Reports\*'):
# if it's the most recently downloaded file
if time > previous_time:
previous_time = time
# remove rows for properties that have not changed status
pyexcel.isave_as(array=filter(file, changed_addresses),
dest_file_name=os.getcwd() + '\Reports\' + mkt[0] + '.csv')
os.remove(file)
但请记得在代码末尾调用。它将关闭所有 csv 文件句柄。
pyexcel.free_resources()