删除空行 - openpyxl
Delete empty row - openpyxl
在花了最后几个小时试图找到一种方法之后,我决定直接问问。
我已经完成了 openpyxl docs more than a few times, as well as going through the questions asked and here, and even the chapter from this online book,其中 none 确实回答了我想要做的事情。
这是我现在拥有的代码:
for row in ws.iter_rows():
i = 1
if row[i].internal_value() == None:
ws.Rows(i).Delete()
else:
i + 1
我已经用这个尝试了很多不同的东西,现在我遇到了一个错误:
TypeError: 'NoneType' object is not callable
我做错了什么,我该如何解决它以便我遍历所有行并删除任何完全为空的行,或者(如果它更容易实现)有一个空的第一个单元格?
谢谢
据我所知,openpyxl 没有提供删除行的方法。您可以使用 COM 代替,例如:
import win32com.client
filename = 'c:/my_file.xlsx'
sheetname = 'Sheet1'
xl = win32com.client.DispatchEx('Excel.Application')
wb = xl.Workbooks.Open(Filename=filename)
ws = wb.Sheets(sheetname)
begrow = 1
endrow = ws.UsedRange.Rows.Count
for row in range(begrow,endrow+1): # just an example
if ws.Range('A{}'.format(row)).Value is None:
ws.Range('A{}'.format(row)).EntireRow.Delete(Shift=-4162) # shift up
wb.Save()
wb.Close()
xl.Quit()
openpyxl 不提供这种可能性的原因有很多,但您也许可以根据此代码段解决一些问题:
https://bitbucket.org/snippets/openpyxl/qyzKn
否则请查看用于远程控制的 xlwings Excel,而不必弄乱 COM。
2018年更新:今天在搜索如何删除一行,发现openpyxl 2.5.0-b2增加了这个功能。刚试过,效果很好。
这是我找到答案的 link:https://bitbucket.org/openpyxl/openpyxl/issues/964/delete_rows-does-not-work-on-deleting
下面是删除一行的语法:
ws.delete_rows(index, 1)
其中:
'ws' 是工作表,
'index' 是行号,并且
'1' 是要删除的行数。
还有删除列的功能,不过我没试过。
下一个代码可能对某人有用:
index_row = []
# loop each row in column A
for i in range(1, ws.max_row):
# define emptiness of cell
if ws.cell(i, 1).value is None:
# collect indexes of rows
index_row.append(i)
# loop each index value
for row_del in range(len(index_row)):
ws.delete_rows(idx=index_row[row_del], amount=1)
# exclude offset of rows through each iteration
index_row = list(map(lambda k: k - 1, index_row))
openpyxl.worksheet.worksheet.Worksheet.insert_rows()
openpyxl.worksheet.worksheet.Worksheet.insert_cols()
openpyxl.worksheet.worksheet.Worksheet.delete_rows()
openpyxl.worksheet.worksheet.Worksheet.delete_cols()
特定行:
ws.insert_rows(7)
列范围(行相同):
ws.delete_cols(6, 3)
(这是2018的功能,记得升级:python3 -m pip install openpyxl --upgrade)
我发现的一个原因是每次循环 运行 时你都将 i 的值初始化为 1,如下所示:
i=1
for row in ws.iter_rows():
if row[i].internal_value() == None:
ws.Rows(i).Delete()
else:
i + 1
剩下的看完完整代码就可以回答了。
此脚本遍历工作簿中的所有工作表并删除列表 "rows_to_delete" 中的行。请务必删除 运行 脚本之前的所有 table 格式。换句话说,您想先将 table 转换为正常范围。
import openpyxl
rows_to_delete = [None, '', ' ']
for i in wb.sheetnames:
print(f'Now in sheet: {i}')
ws = wb[i]
# loop each row in column B
column_b = range(1, ws.max_row)
for i in reversed(column_b):
if ws.cell(i, 2).value in rows_to_delete:
print(f'Deleting Row: {ws.cell(i,2).row}')
ws.delete_rows(ws.cell(i,2).row)
可以应用相同的逻辑来删除空列。
from openpyxl import *
import numpy as np
import os
path = "filepath"
workbooks = os.listdir(path)
workbooks = [_ for _ in workbooks if not _.startswith('~')]
for workbook in workbooks:
wb2 = load_workbook(os.path.join(path, workbook))
for sheet in wb2.worksheets:
max_row_in_sheet = sheet.max_row
max_col_in_sheet = sheet.max_column
array_3 = np.array([])
array_4 = np.array([])
r = 1
c = 1
for r in range(1, max_row_in_sheet+1):
array_1 = np.array([])
array_2 = np.array([])
for c in range (1, max_col_in_sheet+1):
if sheet.cell(row = r, column = c).value == None:
array_1 = np.append(array_2, c)
array_2 = array_1
if len(array_1) == max_col_in_sheet:
array_3 = np.append(array_4, r)
array_4 = array_3
array_3 = array_3.astype(int)
if len(array_3) != 0:
index_of_last_array_element = len(array_3) - 1
while index_of_last_array_element != -1:
sheet.delete_rows(array_3[index_of_last_array_element], 1)
index_of_last_array_element = index_of_last_array_element - 1
wb2.save(workbook)
这适用于行和列:
import openpyxl
from openpyxl import *
import numpy as np
wb2 = openpyxl.load_workbook('/content/Drafts .xlsx')
for sheet in wb2.worksheets:
print ('Your currently in ', sheet)
max_row_in_sheet = sheet.max_row
max_col_in_sheet = sheet.max_column
print (max_row_in_sheet, max_col_in_sheet)
array_3 = np.array([])
array_4 = np.array([])
r = 1 # initially declaring row as 1
c = 1 # initially declaring column as 1
for r in range(1, max_row_in_sheet + 1): # 31 row
array_1 = np.array([])
array_2 = np.array([])
for c in range(1, max_col_in_sheet + 1): # 9 cols
if sheet.cell(row=r, column=c).value == None: # (9,1)
array_1 = np.append(array_2, c)
array_2 = array_1 # 1,2,3,4,5,6,7,8,9
if len(array_1) == max_col_in_sheet: # ( 9 == 9 )
array_3 = np.append(array_4, r) # 9
array_4 = array_3
array_3 = array_3.astype(int)
if len(array_3) != 0: # 11len
index_of_last_array_element = len(array_3) - 1
while index_of_last_array_element != -1:
sheet.delete_rows(array_3[index_of_last_array_element], 1)
index_of_last_array_element = index_of_last_array_element \
- 1
max_row_in_sheet = sheet.max_row # maximum enterd row
max_col_in_sheet = sheet.max_column # maximum entered column
print 'Maximum Rows and Cols after Removing'
print (max_row_in_sheet, max_col_in_sheet)
print '======================================'
col_arr = []
for x in range(1, sheet.max_column + 1):
col_arr.append(0)
for r in range(1, max_row_in_sheet + 1):
array_1 = np.array([])
array_2 = np.array([])
for c in range(1, max_col_in_sheet + 1):
if sheet.cell(row=r, column=c).value == None:
array_1 = np.append(array_2, c)
array_2 = array_1
col_arr[c - 1] += 1
print col_arr
array_2 = [int(x) for x in array_2]
print len(array_2)
print array_2
if len(array_2) != 0:
index = len(array_2) - 1
print index
while index != -1:
temp = array_2[index]
# print(temp)
sheet.delete_cols(temp, 1)
index = index - 1
wb2.save('/content/outputs.xlsx')
在花了最后几个小时试图找到一种方法之后,我决定直接问问。
我已经完成了 openpyxl docs more than a few times, as well as going through the questions asked
这是我现在拥有的代码:
for row in ws.iter_rows():
i = 1
if row[i].internal_value() == None:
ws.Rows(i).Delete()
else:
i + 1
我已经用这个尝试了很多不同的东西,现在我遇到了一个错误:
TypeError: 'NoneType' object is not callable
我做错了什么,我该如何解决它以便我遍历所有行并删除任何完全为空的行,或者(如果它更容易实现)有一个空的第一个单元格?
谢谢
据我所知,openpyxl 没有提供删除行的方法。您可以使用 COM 代替,例如:
import win32com.client
filename = 'c:/my_file.xlsx'
sheetname = 'Sheet1'
xl = win32com.client.DispatchEx('Excel.Application')
wb = xl.Workbooks.Open(Filename=filename)
ws = wb.Sheets(sheetname)
begrow = 1
endrow = ws.UsedRange.Rows.Count
for row in range(begrow,endrow+1): # just an example
if ws.Range('A{}'.format(row)).Value is None:
ws.Range('A{}'.format(row)).EntireRow.Delete(Shift=-4162) # shift up
wb.Save()
wb.Close()
xl.Quit()
openpyxl 不提供这种可能性的原因有很多,但您也许可以根据此代码段解决一些问题: https://bitbucket.org/snippets/openpyxl/qyzKn
否则请查看用于远程控制的 xlwings Excel,而不必弄乱 COM。
2018年更新:今天在搜索如何删除一行,发现openpyxl 2.5.0-b2增加了这个功能。刚试过,效果很好。 这是我找到答案的 link:https://bitbucket.org/openpyxl/openpyxl/issues/964/delete_rows-does-not-work-on-deleting
下面是删除一行的语法:
ws.delete_rows(index, 1)
其中: 'ws' 是工作表, 'index' 是行号,并且 '1' 是要删除的行数。
还有删除列的功能,不过我没试过。
下一个代码可能对某人有用:
index_row = []
# loop each row in column A
for i in range(1, ws.max_row):
# define emptiness of cell
if ws.cell(i, 1).value is None:
# collect indexes of rows
index_row.append(i)
# loop each index value
for row_del in range(len(index_row)):
ws.delete_rows(idx=index_row[row_del], amount=1)
# exclude offset of rows through each iteration
index_row = list(map(lambda k: k - 1, index_row))
openpyxl.worksheet.worksheet.Worksheet.insert_rows()
openpyxl.worksheet.worksheet.Worksheet.insert_cols()
openpyxl.worksheet.worksheet.Worksheet.delete_rows()
openpyxl.worksheet.worksheet.Worksheet.delete_cols()
特定行:
ws.insert_rows(7)
列范围(行相同):
ws.delete_cols(6, 3)
(这是2018的功能,记得升级:python3 -m pip install openpyxl --upgrade)
我发现的一个原因是每次循环 运行 时你都将 i 的值初始化为 1,如下所示:
i=1
for row in ws.iter_rows():
if row[i].internal_value() == None:
ws.Rows(i).Delete()
else:
i + 1
剩下的看完完整代码就可以回答了。
此脚本遍历工作簿中的所有工作表并删除列表 "rows_to_delete" 中的行。请务必删除 运行 脚本之前的所有 table 格式。换句话说,您想先将 table 转换为正常范围。
import openpyxl
rows_to_delete = [None, '', ' ']
for i in wb.sheetnames:
print(f'Now in sheet: {i}')
ws = wb[i]
# loop each row in column B
column_b = range(1, ws.max_row)
for i in reversed(column_b):
if ws.cell(i, 2).value in rows_to_delete:
print(f'Deleting Row: {ws.cell(i,2).row}')
ws.delete_rows(ws.cell(i,2).row)
可以应用相同的逻辑来删除空列。
from openpyxl import *
import numpy as np
import os
path = "filepath"
workbooks = os.listdir(path)
workbooks = [_ for _ in workbooks if not _.startswith('~')]
for workbook in workbooks:
wb2 = load_workbook(os.path.join(path, workbook))
for sheet in wb2.worksheets:
max_row_in_sheet = sheet.max_row
max_col_in_sheet = sheet.max_column
array_3 = np.array([])
array_4 = np.array([])
r = 1
c = 1
for r in range(1, max_row_in_sheet+1):
array_1 = np.array([])
array_2 = np.array([])
for c in range (1, max_col_in_sheet+1):
if sheet.cell(row = r, column = c).value == None:
array_1 = np.append(array_2, c)
array_2 = array_1
if len(array_1) == max_col_in_sheet:
array_3 = np.append(array_4, r)
array_4 = array_3
array_3 = array_3.astype(int)
if len(array_3) != 0:
index_of_last_array_element = len(array_3) - 1
while index_of_last_array_element != -1:
sheet.delete_rows(array_3[index_of_last_array_element], 1)
index_of_last_array_element = index_of_last_array_element - 1
wb2.save(workbook)
这适用于行和列:
import openpyxl
from openpyxl import *
import numpy as np
wb2 = openpyxl.load_workbook('/content/Drafts .xlsx')
for sheet in wb2.worksheets:
print ('Your currently in ', sheet)
max_row_in_sheet = sheet.max_row
max_col_in_sheet = sheet.max_column
print (max_row_in_sheet, max_col_in_sheet)
array_3 = np.array([])
array_4 = np.array([])
r = 1 # initially declaring row as 1
c = 1 # initially declaring column as 1
for r in range(1, max_row_in_sheet + 1): # 31 row
array_1 = np.array([])
array_2 = np.array([])
for c in range(1, max_col_in_sheet + 1): # 9 cols
if sheet.cell(row=r, column=c).value == None: # (9,1)
array_1 = np.append(array_2, c)
array_2 = array_1 # 1,2,3,4,5,6,7,8,9
if len(array_1) == max_col_in_sheet: # ( 9 == 9 )
array_3 = np.append(array_4, r) # 9
array_4 = array_3
array_3 = array_3.astype(int)
if len(array_3) != 0: # 11len
index_of_last_array_element = len(array_3) - 1
while index_of_last_array_element != -1:
sheet.delete_rows(array_3[index_of_last_array_element], 1)
index_of_last_array_element = index_of_last_array_element \
- 1
max_row_in_sheet = sheet.max_row # maximum enterd row
max_col_in_sheet = sheet.max_column # maximum entered column
print 'Maximum Rows and Cols after Removing'
print (max_row_in_sheet, max_col_in_sheet)
print '======================================'
col_arr = []
for x in range(1, sheet.max_column + 1):
col_arr.append(0)
for r in range(1, max_row_in_sheet + 1):
array_1 = np.array([])
array_2 = np.array([])
for c in range(1, max_col_in_sheet + 1):
if sheet.cell(row=r, column=c).value == None:
array_1 = np.append(array_2, c)
array_2 = array_1
col_arr[c - 1] += 1
print col_arr
array_2 = [int(x) for x in array_2]
print len(array_2)
print array_2
if len(array_2) != 0:
index = len(array_2) - 1
print index
while index != -1:
temp = array_2[index]
# print(temp)
sheet.delete_cols(temp, 1)
index = index - 1
wb2.save('/content/outputs.xlsx')