如何格式化 xslt 以特定方式缩进

How to format xslt to indent in a specific way

我需要您在 XSLT 文件方面的帮助,因为我对处理此类文件以格式化 XML.

还很陌生

作为输入,我有以下 XML

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<alto xmlns="http://www.loc.gov/standards/alto/ns-v3#" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v3# http://www.loc.gov/alto/v3/alto-3-1.xsd">
<Description>
<MeasurementUnit>pixel</MeasurementUnit>
<OCRProcessing ID="IdOcr"><ocrProcessingStep><processingDateTime>2020-12-08</processingDateTime><processingSoftware><softwareCreator>ABCDE</softwareCreator><softwareName>ABCDE Engine</softwareName><softwareVersion>12</softwareVersion></processingSoftware></ocrProcessingStep></OCRProcessing>
</Description>
<Styles><TextStyle ID="font0" FONTFAMILY="Arial" FONTSIZE="10"/><TextStyle ID="font1" FONTFAMILY="Arial" FONTSIZE="11"/><TextStyle ID="font2" FONTFAMILY="Calibri" FONTSIZE="11"/><TextStyle ID="font3" FONTFAMILY="Cambria" FONTSIZE="11"/><TextStyle ID="font4" FONTFAMILY="Symbol" FONTSIZE="10"/><TextStyle ID="font5" FONTFAMILY="Symbol" FONTSIZE="11"/>
</Styles>
<Layout>
<Page ID="Page1" PHYSICAL_IMG_NR="1" HEIGHT="3508" WIDTH="2481">
<TopMargin HEIGHT="300" WIDTH="2481" VPOS="0" HPOS="0">
<TextBlock ID="Page1_Block1" HEIGHT="58" WIDTH="465" VPOS="150" HPOS="1008" LANG="en-US" STYLEREFS="font2">
<TextLine HEIGHT="46" WIDTH="453" VPOS="156" HPOS="1014"><String STYLE="bold" WC="1." CONTENT="LOREUM" HEIGHT="46" WIDTH="167" VPOS="156" HPOS="1014">LOREUM</String><SP HEIGHT="46" WIDTH="8" VPOS="156" HPOS="1182"/><String STYLE="bold" WC="1." CONTENT="IPSUM" HEIGHT="46" WIDTH="276" VPOS="156" HPOS="1191">IPSUM</String></TextLine>
</TextBlock>
</TopMargin>
<LeftMargin HEIGHT="2902" WIDTH="219" VPOS="300" HPOS="0">
</LeftMargin>
<RightMargin HEIGHT="2902" WIDTH="219" VPOS="300" HPOS="2262">
</RightMargin>
<BottomMargin HEIGHT="306" WIDTH="2481" VPOS="3202" HPOS="0">
<ComposedBlock ID="Page1_Block2" HEIGHT="58" WIDTH="1297" VPOS="3253" HPOS="965" TYPE="container">
<TextBlock ID="Page1_Block3" HEIGHT="51" WIDTH="410" VPOS="3259" HPOS="965" LANG="en-US" STYLEREFS="font2">
<TextLine HEIGHT="39" WIDTH="398" VPOS="3265" HPOS="971"><String STYLE="bold" STYLEREFS="font0" WC="1." CONTENT="dolor" HEIGHT="39" WIDTH="146" VPOS="3265" HPOS="971">dolor</String><SP HEIGHT="39" WIDTH="9" VPOS="3265" HPOS="1118"/><String STYLE="bold" STYLEREFS="font0" WC="1." CONTENT="sit" HEIGHT="39" WIDTH="241" VPOS="3265" HPOS="1128">sit</String></TextLine>
</TextBlock>
<TextBlock ID="Page1_Block4" HEIGHT="58" WIDTH="141" VPOS="3253" HPOS="2121" LANG="en-US" STYLEREFS="font2">
<TextLine HEIGHT="46" WIDTH="129" VPOS="3259" HPOS="2127"><String STYLEREFS="font3" WC="1." CONTENT="Page" HEIGHT="46" WIDTH="94" VPOS="3259" HPOS="2127">Page</String><SP HEIGHT="46" WIDTH="8" VPOS="3259" HPOS="2222"/><String STYLEREFS="font3" WC="1." CONTENT="1" HEIGHT="46" WIDTH="25" VPOS="3259" HPOS="2231">1</String></TextLine>
</TextBlock></ComposedBlock><GraphicalElement ID="Page1_Block5" HEIGHT="14" WIDTH="2044" VPOS="3228" HPOS="218"/><GraphicalElement ID="Page1_Block6" HEIGHT="4" WIDTH="2044" VPOS="3244" HPOS="218"/>
</BottomMargin>
<PrintSpace HEIGHT="2902" WIDTH="2043" VPOS="300" HPOS="219">
<TextBlock ID="Page1_Block7" HEIGHT="124" WIDTH="898" VPOS="303" HPOS="791" LANG="en-US" STYLEREFS="font0">
<TextLine HEIGHT="39" WIDTH="886" VPOS="309" HPOS="797"><String STYLE="bold" WC="1." CONTENT="amet" HEIGHT="39" WIDTH="183" VPOS="309" HPOS="797">amet</String><SP HEIGHT="39" WIDTH="9" VPOS="309" HPOS="981"/><String STYLE="bold" WC="1." CONTENT="consectetur " HEIGHT="39" WIDTH="236" VPOS="309" HPOS="991">consectetur </String><SP HEIGHT="39" WIDTH="10" VPOS="309" HPOS="1228"/><String STYLE="bold" WC="1." CONTENT="adipiscing " HEIGHT="39" WIDTH="58" VPOS="309" HPOS="1239">adipiscing </String><SP HEIGHT="39" WIDTH="10" VPOS="309" HPOS="1298"/><String STYLE="bold" WC="1." CONTENT="elit" HEIGHT="39" WIDTH="374" VPOS="309" HPOS="1309">elit</String></TextLine>
<TextLine HEIGHT="39" WIDTH="252" VPOS="382" HPOS="1108"><String STYLE="bold" WC="1." CONTENT="Aliquam " HEIGHT="39" WIDTH="203" VPOS="382" HPOS="1108">Aliquam </String><SP HEIGHT="39" WIDTH="10" VPOS="382" HPOS="1312"/><String STYLE="bold" WC="1." CONTENT="eu" HEIGHT="39" WIDTH="37" VPOS="382" HPOS="1323">eu</String></TextLine>
</TextBlock>
</PrintSpace>
</Page>
</Layout>
</alto>

我正在尝试对其应用一些东西,但我的主要问题是我想让我的 textline 标签包含 stringsp 标签只在一个 line 上,没有缩进。例如,对于我的 textline 标签,我将其作为输出:

<TextLine/>
            <String STYLE="bold"
                    WC="1."
                    CONTENT="amet"
                    HEIGHT="39"
                    WIDTH="183"
                    VPOS="309"
                    HPOS="797">amet</String>
            <String STYLE="bold"
                    WC="1."
                    CONTENT="consectetur "
                    HEIGHT="39"
                    WIDTH="236"
                    VPOS="309"
                    HPOS="991">consectetur </String>
            <String STYLE="bold"
                    WC="1."
                    CONTENT="adipiscing "
                    HEIGHT="39"
                    WIDTH="58"
                    VPOS="309"
                    HPOS="1239">adipiscing </String>
            <String STYLE="bold"
                    WC="1."
                    CONTENT="elit"
                    HEIGHT="39"
                    WIDTH="374"
                    VPOS="309"
                    HPOS="1309">elit</String>
            <SP HEIGHT="39" WIDTH="9" VPOS="309" HPOS="981"/>
            <SP HEIGHT="39" WIDTH="10" VPOS="309" HPOS="1228"/>
            <SP HEIGHT="39" WIDTH="10" VPOS="309" HPOS="1298"/>
         </TextLine>

我想要这个:

<TextLine/> <String STYLE="bold" WC="1." CONTENT="amet" HEIGHT="39" WIDTH="183" VPOS="309" HPOS="797">amet</String><SP HEIGHT="39" WIDTH="9" VPOS="309" HPOS="981"/><String STYLE="bold" WC="1." CONTENT="consectetur " HEIGHT="39" WIDTH="236" VPOS="309" HPOS="991">consectetur </String><SP HEIGHT="39" WIDTH="10" VPOS="309" HPOS="1228"/><String STYLE="bold" WC="1." CONTENT="adipiscing " HEIGHT="39" WIDTH="58" VPOS="309" HPOS="1239">adipiscing </String><SP HEIGHT="39" WIDTH="10" VPOS="309" HPOS="1298"/> <String STYLE="bold" WC="1." CONTENT="elit" HEIGHT="39" WIDTH="374" VPOS="309" HPOS="1309">elit</String> </TextLine>

你知道我应该在 this xsl 中 modify/delete/add 做什么吗?

<?xml version="1.0" encoding="utf-8"?>
<!-- Created with Liquid Studio 2019 (https://www.liquid-technologies.com) -->
<xsl:stylesheet version="1.1" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:tns="http://www.loc.gov/standards/alto/ns-v3#" >

    <xsl:output indent="yes"/>
    <xsl:key name="TextBlock-by-LANG" match="//tns:TextBlock" use="@LANG" />
    <xsl:variable name="lang">
        <xsl:for-each select="//tns:TextBlock">
            <xsl:sort select="count(key('TextBlock-by-LANG', @LANG))" data-type="number" order="descending"/>
            <xsl:if test="position() = 1">
                <xsl:value-of select="@LANG"/>
            </xsl:if>
        </xsl:for-each>
    </xsl:variable>
    <xsl:template match="/">
        <alto>
            <xsl:apply-templates select="//tns:Description"/>
            <xsl:apply-templates select="//tns:Styles"/>
            <xsl:apply-templates select="//tns:Page">
                <xsl:sort select="@PHYSICAL_IMG_NR" data-type="number" order="ascending"/>
            </xsl:apply-templates>
        </alto>
    </xsl:template>
    <xsl:template match="tns:Description">
        <xsl:copy-of select="."/>
    </xsl:template>
    <xsl:template match="tns:Styles">
        <xsl:copy-of select="."/>
    </xsl:template>
    
    <xsl:template match="tns:Page">
        <xsl:copy>
            <xsl:copy-of select="@*"/>
            <xsl:apply-templates select="tns:PrintSpace//tns:TextBlock">
                <xsl:sort select="@VPOS" data-type="number" order="ascending"/>
                <xsl:sort select="@HPOS" data-type="number" order="ascending"/>
            </xsl:apply-templates>
        </xsl:copy>
    </xsl:template>
  
    <xsl:template match="tns:TextBlock">
        <xsl:copy>
            <xsl:copy-of select="@*"/>
            <xsl:apply-templates select="tns:TextLine">
                <xsl:sort select="@VPOS" data-type="number" order="ascending"/>
                <xsl:sort select="@HPOS" data-type="number" order="ascending"/>
            </xsl:apply-templates>
        </xsl:copy>
    </xsl:template>
 
    <xsl:template match="tns:TextLine">
        <xsl:copy>
            <xsl:copy select="."/>
            <xsl:apply-templates select="tns:String"/>
            <xsl:apply-templates select="tns:SP"/>
        </xsl:copy>
    </xsl:template>
    
    <xsl:template match="tns:String">
        <xsl:copy select=".">
            <xsl:copy-of select="@*"/>
            <xsl:value-of select="@CONTENT"/>
        </xsl:copy>
    </xsl:template>

    <xsl:template match="tns:SP">
        <xsl:copy select=".">
            <xsl:copy-of select="@*"/>
            <xsl:value-of select="@CONTENT"/>
        </xsl:copy>
    </xsl:template>
    
    <xsl:strip-space elements="*"/>
</xsl:stylesheet>

非常感谢您的帮助和建议

显示的格式可能是由于在引擎盖下使用 Saxon 9 或 10 造成的,因此使用 XSLT 3 或 2,例如<xsl:output indent="yes" suppress-indentation="tns:TextLine"/> 应该有助于不缩进 tns 命名空间中 TextLine 元素的内容。