Openpyxl比较两个工作表中的值以查看值是否不存在
Openpyxl comparing values in two sheets to see if a value does not exist
我正在制作一个 excel 比较程序,它需要一定数量的工作表,比较它们并检查其中一张工作表中的值是否不存在于另一张工作表中。但是,我收到一个重复的问题。澄清一下,这是我的代码:
import tkinter as tk
from tkinter import filedialog
import openpyxl, os, csv
from openpyxl.utils import get_column_letter, column_index_from_string
# Output File
outputFile = open('output.csv','w',newline='')
outputWriter = csv.writer(outputFile)
# Tk initialization for file dialog
root = tk.Tk()
root.withdraw()
# Number of sheets to be compared
number = input('Enter number of workbook sheets for comparison: ')
number = int(number)
# Functions for generating file paths
def generate_file_path():
file_path = filedialog.askopenfilename(title="Open Workbook")
return file_path
# Variables to store file paths, workbooks and worksheets
all_ws = []
# Core function for program's logistics
def core():
# for loops for generating file paths, workbooks and worsheets
for x in range(number):
path = generate_file_path()
wb = openpyxl.load_workbook(path)
ws = wb['CBF']
all_ws.append(ws)
# for loop to use for finding diff
for row in all_ws[1].iter_cols():
for cellz in row:
sheet_cols.append(cellz.value)
# loop that checks if the value does not exist
for ws_diff in range(number):
for row,row2 in zip(all_ws[0].iter_cols(),all_ws[1].iter_cols()):
for cell,cell2 in zip(row,row2):
if cell.value not in sheet_cols:
outputWriter.writerow([str(cell2.value)])
但是,当我检查我的 csv 文件时,输出的 "differences" 似乎在两个文件中。有没有人有任何帮助我的建议?什么都会很好谢谢你
我试图尽可能接近您提供的源代码,只做了一些改动。我所做的最大更改是向 csv 输出添加更多列,并添加告诉核心函数您是否关心单元格位置的功能;你是否是-
1.) 将第一个工作表之后的每个工作表的每个单元格与第一个工作表中该精确单元格坐标的值进行比较 (location_matters=True)
或者你是否
2.) 将第一个工作表之后的每个工作表的每个单元格与在选定的第一个工作表中找到的每个值进行比较 (location_matters=False)
这是我修改后的代码:
import tkinter as tk
from tkinter import filedialog
import openpyxl, os, csv
from openpyxl.utils import get_column_letter, column_index_from_string
# Output File
outputFile = open('output.csv', 'w', newline='')
outputWriter = csv.writer(outputFile)
outputWriter.writerow(["Worksheet Number", "Value Not Found", "Location of Cell"]) # add columns
# Tk initialization for file dialog
root = tk.Tk()
root.withdraw()
# Number of sheets to be compared
number = input('Enter number of workbook sheets for comparison: ')
number = int(number)
# Functions for generating file paths
def generate_file_path():
file_path = filedialog.askopenfilename(title="Open Workbook")
return file_path
# Variables to store file paths, workbooks and worksheets
all_ws = []
# Core function for program's logistics
def core(number_of_worksheets, location_matters=False):
# for loops for generating file paths, workbooks and worsheets
for x in range(number_of_worksheets):
path = generate_file_path()
wb = openpyxl.load_workbook(path)
ws = wb['CBF']
all_ws.append(ws)
root.destroy() # destroy tk when you're done with it
# for loop to use for loading first workbook's values into list
sheet_cols = []
for row in all_ws[0].iter_cols(): # using the 1st ws here instead of the 2nd
for cellz in row:
sheet_cols.append(cellz.value)
# loop that checks if the value does not exist in first ws
for ws_diff in range(1, number):
for row, row2 in zip(all_ws[0].iter_cols(), all_ws[ws_diff].iter_cols()):
for cell, cell2 in zip(row, row2):
if location_matters:
if cell.value != cell2.value:
outputWriter.writerow([ws_diff, str(cell2.value), cell.coordinate])
else:
if cell2.value not in sheet_cols:
outputWriter.writerow([ws_diff, str(cell2.value), "N/A"])
if __name__ == '__main__':
core(number_of_worksheets=number, location_matters=False)
也许你可以这样做:
变化:for row in all_ws[1].iter_cols():
收件人:for row in all_ws[0].iter_cols():
之后:
改变if cell.value not in sheet_cols:
收件人:if cell2.value in sheet_cols:
希望对您有所帮助:)
我正在制作一个 excel 比较程序,它需要一定数量的工作表,比较它们并检查其中一张工作表中的值是否不存在于另一张工作表中。但是,我收到一个重复的问题。澄清一下,这是我的代码:
import tkinter as tk
from tkinter import filedialog
import openpyxl, os, csv
from openpyxl.utils import get_column_letter, column_index_from_string
# Output File
outputFile = open('output.csv','w',newline='')
outputWriter = csv.writer(outputFile)
# Tk initialization for file dialog
root = tk.Tk()
root.withdraw()
# Number of sheets to be compared
number = input('Enter number of workbook sheets for comparison: ')
number = int(number)
# Functions for generating file paths
def generate_file_path():
file_path = filedialog.askopenfilename(title="Open Workbook")
return file_path
# Variables to store file paths, workbooks and worksheets
all_ws = []
# Core function for program's logistics
def core():
# for loops for generating file paths, workbooks and worsheets
for x in range(number):
path = generate_file_path()
wb = openpyxl.load_workbook(path)
ws = wb['CBF']
all_ws.append(ws)
# for loop to use for finding diff
for row in all_ws[1].iter_cols():
for cellz in row:
sheet_cols.append(cellz.value)
# loop that checks if the value does not exist
for ws_diff in range(number):
for row,row2 in zip(all_ws[0].iter_cols(),all_ws[1].iter_cols()):
for cell,cell2 in zip(row,row2):
if cell.value not in sheet_cols:
outputWriter.writerow([str(cell2.value)])
但是,当我检查我的 csv 文件时,输出的 "differences" 似乎在两个文件中。有没有人有任何帮助我的建议?什么都会很好谢谢你
我试图尽可能接近您提供的源代码,只做了一些改动。我所做的最大更改是向 csv 输出添加更多列,并添加告诉核心函数您是否关心单元格位置的功能;你是否是-
1.) 将第一个工作表之后的每个工作表的每个单元格与第一个工作表中该精确单元格坐标的值进行比较 (location_matters=True)
或者你是否
2.) 将第一个工作表之后的每个工作表的每个单元格与在选定的第一个工作表中找到的每个值进行比较 (location_matters=False)
这是我修改后的代码:
import tkinter as tk
from tkinter import filedialog
import openpyxl, os, csv
from openpyxl.utils import get_column_letter, column_index_from_string
# Output File
outputFile = open('output.csv', 'w', newline='')
outputWriter = csv.writer(outputFile)
outputWriter.writerow(["Worksheet Number", "Value Not Found", "Location of Cell"]) # add columns
# Tk initialization for file dialog
root = tk.Tk()
root.withdraw()
# Number of sheets to be compared
number = input('Enter number of workbook sheets for comparison: ')
number = int(number)
# Functions for generating file paths
def generate_file_path():
file_path = filedialog.askopenfilename(title="Open Workbook")
return file_path
# Variables to store file paths, workbooks and worksheets
all_ws = []
# Core function for program's logistics
def core(number_of_worksheets, location_matters=False):
# for loops for generating file paths, workbooks and worsheets
for x in range(number_of_worksheets):
path = generate_file_path()
wb = openpyxl.load_workbook(path)
ws = wb['CBF']
all_ws.append(ws)
root.destroy() # destroy tk when you're done with it
# for loop to use for loading first workbook's values into list
sheet_cols = []
for row in all_ws[0].iter_cols(): # using the 1st ws here instead of the 2nd
for cellz in row:
sheet_cols.append(cellz.value)
# loop that checks if the value does not exist in first ws
for ws_diff in range(1, number):
for row, row2 in zip(all_ws[0].iter_cols(), all_ws[ws_diff].iter_cols()):
for cell, cell2 in zip(row, row2):
if location_matters:
if cell.value != cell2.value:
outputWriter.writerow([ws_diff, str(cell2.value), cell.coordinate])
else:
if cell2.value not in sheet_cols:
outputWriter.writerow([ws_diff, str(cell2.value), "N/A"])
if __name__ == '__main__':
core(number_of_worksheets=number, location_matters=False)
也许你可以这样做:
变化:
for row in all_ws[1].iter_cols():
收件人:
for row in all_ws[0].iter_cols():
之后:
改变if cell.value not in sheet_cols:
收件人:if cell2.value in sheet_cols:
希望对您有所帮助:)