Python 将列表附加到字典。列表被分成单独的字符而不是整个

Python appending a list to a dictionary. List is split into individual chars instead of the whole

我有一个电子表格。它有识别号码,然后是与这些号码相关联的多个代码。 例如;

ID Code
1 ABC1234
1 CBA1234
2 ABS1234
2 DEF3456

等...

我正在尝试遍历信息并创建以 ID 作为键并以代码列表作为值的字典。我的问题是所有值都被拆分为单个字符。 IE; {1: ['A', 'B', 'C', '1', '2', '3', '4', 'C', 'B', 'A'、“1”、“2”、“3”、“4”]}

我的密码是

from openpyxl import *

FilePath = "File.xlsx"

wb = load_workbook(filename=FilePath)


Sheet = wb.active
Sheet1 = wb['Sheet1']
Sheet2 = wb['Sheet2']
Dict1 = {}
UnitPlace = Sheet1.iter_cols(min_row=2, max_row=2, min_col=18, max_col=110)
Dict = {}
def add_values_in_dict(sample_dict, key, list_of_values):
    if key not in sample_dict:
        sample_dict[key] = list()
    sample_dict[key].extend(list_of_values)
    return sample_dict

def read():
    for SID2 in Sheet2['A']:
        UnitInSheet2 = Sheet2['D'+(str(SID2.row))].value
        Dict1 = add_values_in_dict(Dict, SID2.value, UnitInSheet2)
        print (Dict1)
        for SID1 in Sheet1['D']:
            Rows = SID1.row
read()

更新:使用 Openpyxl 读取 Excel 文件

我们可以使用 Openpyxl 迭代行。然后我们将 ID-codes 存储为字典,其中 ID 是键,Code 是值列表。

from openpyxl import load_workbook


def read_file_openpyxl(file_path):
    wb = load_workbook(filename=file_path)
    sheet_1 = wb['Sheet1']
    frequency = {}
    for row in sheet_1.values:
        if type(row[0]) == int:
            id = row[0]
            code = row[1]
            if id in frequency:
                frequency[id].append(code)
            else:
                frequency[id] = [code]
    return frequency


if __name__ == "__main__":
    FilePath = "dummy.xlsx"
    print(read_file_openpyxl(FilePath))

输出:

{1: ['ABC1234', 'CBA1234'], 2: ['ABS1234', 'DEF3456']}

使用Pandas的替代解决方案:

或者,您可以使用 pandas.read_excel 方法读取 excel 文件。

import pandas as pd


def read_data(file_path):
    dataset = pd.read_excel(file_path)
    ids = list(dataset["ID"])
    codes = list(dataset["Code"])
    frequency = {}
    for i in range(len(ids)):
        id = ids[i]
        code = codes[i]
        if id in frequency:
            frequency[id].append(code)
        else:
            frequency[id] = [code]
    return frequency


if __name__ == "__main__":
    FilePath = "dummy.xlsx"
    freq = read_data(FilePath)
    print(freq)

输出:

{1: ['ABC1234', 'CBA1234'], 2: ['ABS1234', 'DEF3456']}

解释:

  • 首先,我们使用pandas.read_excel方法读取excel文件。
  • 然后我们将 idscodes 与 excel sheet 的行分开。
  • 我们使用名为 frequency 的字典来存储每个唯一 id.
  • 出现的 code

参考文献:

我更喜欢列表理解,下面是我的解决方案

# Import openpyxl
# Note: openpyxl package provides both read and write capabilities to excel
import openpyxl
import os


# Class definitions should use CamelCase convention based on pep-8 guidelines
class CustomOpenpyxl:

    # Initialize the class with filename as only argument
    def __init__(self, _my_file_name):
        assert _my_file_name.split('.')[-1] == 'xlsx', 'Input file is not xlsx'
        self.my_filename = _my_file_name
        self.my_base_wb = openpyxl.load_workbook(self.my_filename, read_only=False)
        # following line will get the names of worksheets in the workbook
        self.ws_names_in_my_base_wb = self.my_base_wb.sheetnames
        # following line will set the last worksheet in the workbook as active
        self.my_base_active_ws = self.my_base_wb.active

    # Method to set a specific worksheet as active where column names are in row#1
    # Argument to this method is: - worksheet name
    def active_ws(self, _ws_name):
        # if the worksheet name exists in the workbook
        if self.if_ws_in_wb(_ws_name):
            self.my_base_active_ws = self.my_base_wb[_ws_name]
        else:
            print('Worksheet {} not found in workbook'.format(_ws_name))

    # Method to check if a given worksheet exists in workbook
    # Argument to this method is: workbook name
    def if_ws_in_wb(self, _ws_name):
        # if worksheet exists in list of worksheets of workbook returns True else False
        if _ws_name in self.ws_names_in_my_base_wb:
            return True
        return False

    # Create a dictionary from excel cell values
    # No arguments to this method
    def create_dict_from_values(self):
        # Create an empty dictionary
        _my_dict = dict()
        # get the unique keys (numeric) from the first column of the worksheet
        _my_keys = set([_row[0] for _row in self.my_base_active_ws.values if str(_row[0]).isdigit()])
        # Iterate over the keys and add the values as a list
        for _ in _my_keys:
            # using list comprehension add the values to the key or keys
            _my_dict[_] = [_row[1] for _row in self.my_base_active_ws.values if _row[0] == _]
        return _my_dict


_my_file_path = os.getcwd()
_my_file_name = 'Book1.xlsx'

# Instantiate an object using the newly created class in this code block
# So that this object gets access to all methods in the class
_my_src_obj = CustomOpenpyxl(_my_file_name)
print(_my_src_obj.create_dict_from_values())
####################################################################PRINT Result################################################################################

{1: ['ABC1234', 'CBA1234'], 2: ['ABS1234', 'DEF3456']}

Process finished with exit code 0