在 python 中将 excel 转换为 XML

Convert excel to XML in python

我正在尝试将 excel 数据库转换为 python。 我有一个交易数据,我需要以 xml 格式导入系统。

我的代码如下:

df = pd.read_excel("C:/Users/junag/Documents/XML/Portfolio2.xlsx", sheet_name="Sheet1", dtype=object)
root = ET.Element('trading-data')
root.set('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')
tree = ET.ElementTree(root)
Portfolios = ET.SubElement(root, "Portfolios")
Defaults = ET.SubElement(Portfolios, "Defaults", BaseCurrency="USD")

for row in df.itertuples():
Portfolio = ET.SubElement(Portfolios, "Portfolio", Name=row.Name, BaseCurrency=row.BaseCurrency2, TradingPower=str(row.TradingPower),
                          ValidationProfile=row.ValidationProfile, CommissionProfile=row.CommissionProfile)
PortfolioPositions = ET.SubElement(Portfolio, "PortfolioPositions")
if row.Type == "Cash":
    PortfolioPosition = ET.SubElement(PortfolioPositions, "PortfolioPosition", Type=row.Type, Volume=str(row.Volume))
    Cash = ET.SubElement(PortfolioPosition, 'Cash', Currency=str(row.Currency))
else:
    PortfolioPosition = ET.SubElement(PortfolioPositions, "PortfolioPosition", Type=row.Type, Volume=str(row.Volume),
                                      Invested=str(row.Invested), BaseInvested=str(row.BaseInvested))
    Instrument = ET.SubElement(PortfolioPosition, 'Instrument', Ticker=str(row.Ticker), ISIN=str(row.ISIN), Market=str(row.Market),
                               Currency=str(row.Currency2), CFI=str(row.CFI))


ET.indent(tree, space="\t", level=0)
tree.write("Portfolios_converted2.xml", encoding="utf-8")

输出如下所示: enter image description here

虽然我需要它看起来像这样: enter image description here

如何改进我的代码以使输出 xml 看起来更好?请指教

这里是 excel 数据:

在 python

中将 excel 转换为 XML
import openpyxl
import xml.etree.ElementTree as ET

def convert_excel_to_xml(file_name, sheet_name):
    wb = openpyxl.load_workbook(file_name)
    sheet = wb[sheet_name]
    root = ET.Element("root")
    for row in sheet.rows:
        for cell in row:
            ET.SubElement(root, "cell", value=cell.value)
    tree = ET.ElementTree(root)
    tree.write("{}.xml".format(sheet_name))

运行函数

convert_excel_to_xml("test.xlsx", "Sheet1")

由于您需要单个 <Portfolio><PortfolioPositions> 作为父分组,请考虑通过遍历数据帧列表的嵌套循环。然后,在每个数据帧中循环遍历其行:

import xml.etree.ElementTree as ET
import pandas as pd
import xml.dom.minidom as md

df = pd.read_excel("Input.xlsx", sheet_name="Sheet1", dtype=object)

# LIST OF DATA FRAME SPLITS
df_list = [g for i,g in df.groupby(
    ["Name", "BaseCurrency2", "TradingPower", "ValidationProfile", "CommissionProfile"]
)]

# ROOT LEVEL
root = ET.Element('trading-data')
root.set('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')

# ROOT CHILD LEVEL
Portfolios = ET.SubElement(root, "Portfolios")
Defaults = ET.SubElement(Portfolios, "Defaults", BaseCurrency="USD")

# GROUP LEVEL ITERATION
for df in df_list:
    Portfolio = ET.SubElement(
        Portfolios, 
        "Portfolio", 
        Name = df["Name"][0],
        BaseCurrency = df["BaseCurrency2"][0], 
        TradingPower = str(df["TradingPower"][0]),
        ValidationProfile = df["ValidationProfile"][0], 
        CommissionProfile = df["CommissionProfile"][0]
    )

    PortfolioPositions = ET.SubElement(Portfolio, "PortfolioPositions")

    # ROW LEVEL ITERATION
    for row in df.itertuples():
        if row.Type == "Cash":
            PortfolioPosition = ET.SubElement(
                PortfolioPositions, 
                "PortfolioPosition", 
                Type = row.Type, 
                Volume = str(row.Volume)
            )
            Cash = ET.SubElement(
                PortfolioPosition, 
                "Cash", 
                Currency = str(row.Currency)
            )
        else:
            PortfolioPosition = ET.SubElement(
                 PortfolioPositions, 
                 "PortfolioPosition", 
                 Type = row.Type,
                 Volume = str(row.Volume),
                 Invested = str(row.Invested), 
                 BaseInvested = str(row.BaseInvested)
            )
            Instrument = ET.SubElement(
                 PortfolioPosition, 
                 "Instrument", 
                 Ticker = str(row.Ticker),
                 ISIN = str(row.ISIN),
                 Market = str(row.Market),
                 Currency = str(row.Currency2),
                 CFI = str(row.CFI)
            )

# SAVE PRETTY PRINT OUTPUT
with open("Output.xml", "wb") as f:
    dom = md.parseString(ET.tostring(root))
    f.write(dom.toprettyxml().encode("utf-8"))