遍历以前过滤的行 openpyxl
iterate through previously filtered rows openpyxl
我编写了一个 python 代码,它加载 excel 工作簿,遍历指定列中的所有行,将这些行保存在字典中并将该字典写入 .txt文件。
引用的 vb 脚本在 openpyxl 之前打开工作簿并将其过滤为仅显示一些数据。
唯一的问题是,当openpyxl 遍历工作簿时,它记录的是每一个值,而不是过滤后的数据。
例如,如果原始电子表格是:
A B C
1 x x x
2 x y x
3 x x x
并且我筛选 B 列以仅显示包含 "x" 的行,然后保存工作簿。我希望 openpyxl 仅遍历第 1 行和第 3 行。
这是我的代码:
from openpyxl import load_workbook
from openpyxl import workbook
import os
#sort using vba script
os.system(r"C:\script.vbs")
#load workbook
path = 'C:/public/temp/workbook.xlsm'
wb = load_workbook(filename = path)
ws=wb.get_sheet_by_name('Sheet3')
#make empty lists
proj_name = []
proj_num = []
proj_status = []
#iterate through rows and append values to lists
for row in ws.iter_rows('D{}:D{}'.format(ws.min_row,ws.max_row)):
for cell in row:
proj_name.append(cell.value)
for row in ws.iter_rows('R{}:R{}'.format(ws.min_row,ws.max_row)):
for cell in row:
proj_num.append(cell.value)
for row in ws.iter_rows('G{}:G{}'.format(ws.min_row,ws.max_row)):
for cell in row:
proj_status.append(cell.value)
#create dictionary from lists using defaultdict
from collections import defaultdict
dict1 = dict((z[0],list(z[1:])) for z in zip(proj_num,proj_name,proj_status))
with open(r"C:\public\list2.txt", "w") as text_file:
text_file.write(str(dict1))
text_file.close()
很遗憾,openpyxl
目前在其功能中不包含过滤功能。正如 the documentation 所说:"Filters and sorts can only be configured by openpyxl but will need to be applied in applications like Excel."
看来您可能必须找到其他解决方案...
f 是我要过滤的数据:(例如 'CISCO' only with(and)'PAI' or 'BD' only with(and) 'PAP' or 'H' 是 42 )
f = {
'C': ["CISCO", "BD"],
'E': ["PAI", "PAP"],
'H': [60]
}
from openpyxl import load_workbook
from openpyxl.utils.cell import column_index_from_string
def filter_data(rows, f_config, skip_header=False):
# convert column alphabet string to index number (e.g. A=1, B=2)
new_config = {}
for col, fil in f_config.items():
if type(col) == str:
col = column_index_from_string(col)
new_config[col] = fil
output = []
t_filter = len(new_config.items())
for n, row in enumerate(rows):
if n == 0:
if skip_header == True:
# first row header
continue
for i, (col,fil) in enumerate(new_config.items()):
if type(fil) != list:
fil = [fil]
val = row[col-1].value
# break the loop if any of the conditions not meet
if not val in fil:
break
if i+1 == t_filter:
# all conditions were met, add into output
output.append(row)
return output
#flexible to edit/filter which column of data you want
data1 = filter_data(sheet.rows, { "C": "CISCO", "E": "PAI" }, skip_header=True)
#filter 2 possibility, either str or value
data2 = filter_data(data1, { "H": [ "60", 60 ] } )
我编写了一个 python 代码,它加载 excel 工作簿,遍历指定列中的所有行,将这些行保存在字典中并将该字典写入 .txt文件。
引用的 vb 脚本在 openpyxl 之前打开工作簿并将其过滤为仅显示一些数据。
唯一的问题是,当openpyxl 遍历工作簿时,它记录的是每一个值,而不是过滤后的数据。
例如,如果原始电子表格是:
A B C
1 x x x
2 x y x
3 x x x
并且我筛选 B 列以仅显示包含 "x" 的行,然后保存工作簿。我希望 openpyxl 仅遍历第 1 行和第 3 行。
这是我的代码:
from openpyxl import load_workbook
from openpyxl import workbook
import os
#sort using vba script
os.system(r"C:\script.vbs")
#load workbook
path = 'C:/public/temp/workbook.xlsm'
wb = load_workbook(filename = path)
ws=wb.get_sheet_by_name('Sheet3')
#make empty lists
proj_name = []
proj_num = []
proj_status = []
#iterate through rows and append values to lists
for row in ws.iter_rows('D{}:D{}'.format(ws.min_row,ws.max_row)):
for cell in row:
proj_name.append(cell.value)
for row in ws.iter_rows('R{}:R{}'.format(ws.min_row,ws.max_row)):
for cell in row:
proj_num.append(cell.value)
for row in ws.iter_rows('G{}:G{}'.format(ws.min_row,ws.max_row)):
for cell in row:
proj_status.append(cell.value)
#create dictionary from lists using defaultdict
from collections import defaultdict
dict1 = dict((z[0],list(z[1:])) for z in zip(proj_num,proj_name,proj_status))
with open(r"C:\public\list2.txt", "w") as text_file:
text_file.write(str(dict1))
text_file.close()
很遗憾,openpyxl
目前在其功能中不包含过滤功能。正如 the documentation 所说:"Filters and sorts can only be configured by openpyxl but will need to be applied in applications like Excel."
看来您可能必须找到其他解决方案...
f 是我要过滤的数据:(例如 'CISCO' only with(and)'PAI' or 'BD' only with(and) 'PAP' or 'H' 是 42 )
f = {
'C': ["CISCO", "BD"],
'E': ["PAI", "PAP"],
'H': [60]
}
from openpyxl import load_workbook
from openpyxl.utils.cell import column_index_from_string
def filter_data(rows, f_config, skip_header=False):
# convert column alphabet string to index number (e.g. A=1, B=2)
new_config = {}
for col, fil in f_config.items():
if type(col) == str:
col = column_index_from_string(col)
new_config[col] = fil
output = []
t_filter = len(new_config.items())
for n, row in enumerate(rows):
if n == 0:
if skip_header == True:
# first row header
continue
for i, (col,fil) in enumerate(new_config.items()):
if type(fil) != list:
fil = [fil]
val = row[col-1].value
# break the loop if any of the conditions not meet
if not val in fil:
break
if i+1 == t_filter:
# all conditions were met, add into output
output.append(row)
return output
#flexible to edit/filter which column of data you want
data1 = filter_data(sheet.rows, { "C": "CISCO", "E": "PAI" }, skip_header=True)
#filter 2 possibility, either str or value
data2 = filter_data(data1, { "H": [ "60", 60 ] } )