在内存中分组并添加节点 XML

group and add nodes in memory XML

我有以下输入:

<income>
    <flow>
        <Year>2020-2021</Year>
        <Period>8</Period>
        <Date>2020-11-30</Date>
        <Metric>in</Metric>
        <Monthly_Value>1</Monthly_Value>
    </flow>
    <flow>
        <Year>2020-2021</Year>
        <Period>8</Period>
        <Date>2020-11-30</Date>
        <Metric>out</Metric>
        <Monthly_Value>4</Monthly_Value>
    </flow>
    <flow>
        <Year>2020-2021</Year>
        <Period>9</Period>
        <Date>2020-12-31</Date>
        <Metric>in</Metric>
        <Monthly_Value>3</Monthly_Value>
    </flow>
    <flow>
        <Year>2020-2021</Year>
        <Period>9</Period>
        <Date>2020-12-31</Date>
        <Metric>out</Metric>
        <Monthly_Value>2</Monthly_Value>
    </flow>
    <flow>
        <Year>2020-2021</Year>
        <Period>10</Period>
        <Date>2021-01-31</Date>
        <Metric>in</Metric>
        <Monthly_Value>2</Monthly_Value>
    </flow>
    <flow>
        <Year>2020-2021</Year>
       <Period>10</Period>
        <Date>2021-01-31</Date>
        <Metric>out</Metric>
        <Monthly_Value>1</Monthly_Value>
    </flow>
    <flow>
        <Year>2020-2021</Year>
        <Period>11</Period>
        <Date>2021-02-28</Date>
        <Metric>in</Metric>
        <Monthly_Value>1</Monthly_Value>
    </flow>
    <flow>
        <Year>2020-2021</Year>
        <Period>11</Period>
        <Date>2021-02-28</Date>
        <Metric>out</Metric>
        <Monthly_Value>3</Monthly_Value>
    </flow>
    <flow>
        <Year>2020-2021</Year>
        <Period>12</Period>
        <Date>2021-03-31</Date>
        <Metric>in</Metric>
        <Monthly_Value>2</Monthly_Value>
    </flow>
    <flow>
        <Year>2020-2021</Year>
        <Period>12</Period>
        <Date>2021-03-31</Date>
        <Metric>out</Metric>
        <Monthly_Value>1</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>1</Period>
        <Date>2021-04-30</Date>
        <Metric>in</Metric>
        <Monthly_Value>1</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>1</Period>
        <Date>2021-04-30</Date>
        <Metric>out</Metric>
        <Monthly_Value>2</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>2</Period>
        <Date>2021-05-31</Date>
        <Metric>in</Metric>
        <Monthly_Value>2</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>2</Period>
        <Date>2021-05-31</Date>
        <Metric>out</Metric>
        <Monthly_Value>1</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>4</Period>
        <Date>2021-07-31</Date>
        <Metric>out</Metric>
        <Monthly_Value>1</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>5</Period>
        <Date>2021-08-31</Date>
        <Metric>in</Metric>
        <Monthly_Value>3</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>5</Period>
        <Date>2021-08-31</Date>
        <Metric>out</Metric>
        <Monthly_Value>1</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>6</Period>
        <Date>2021-09-30</Date>
        <Metric>in</Metric>
        <Monthly_Value>2</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>6</Period>
        <Date>2021-09-30</Date>
        <Metric>out</Metric>
        <Monthly_Value>2</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>7</Period>
        <Date>2021-10-31</Date>
        <Metric>in</Metric>
        <Monthly_Value>2</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>7</Period>
        <Date>2021-10-31</Date>
        <Metric>out</Metric>
        <Monthly_Value>5</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>8</Period>
        <Date>2021-11-30</Date>
        <Metric>in</Metric>
        <Monthly_Value>1</Monthly_Value>
    </flow>
    <flow>
        <Year>2021-2022</Year>
        <Period>8</Period>
        <Date>2021-11-30</Date>
        <Metric>out</Metric>
        <Monthly_Value>1</Monthly_Value>
    </flow>
</income>

我需要将 MetricYear 分组,首先按 Period 升序排序。此过程生成 4 个组:

  1. in & 2020-2021
  2. in & 2021-2022
  3. out & 2020-2021
  4. out & 2021-2022

然后我得到之前的Monthly_Value,然后在每个Monthly_Value节点之后添加新节点作为Previous_Value

预期输出:

<income>
   <flows>
      <Year>2020-2021</Year>
      <flow>
         <Year>2020-2021</Year>
         <Period>8</Period>
         <Date>2020-11-30</Date>
         <Metric>in</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>1</Previous_Value>
      </flow>
      <flow>
         <Year>2020-2021</Year>
         <Period>9</Period>
         <Date>2020-12-31</Date>
         <Metric>in</Metric>
         <Monthly_Value>3</Monthly_Value>
         <Previous_Value>1</Previous_Value>
      </flow>
      <flow>
         <Year>2020-2021</Year>
         <Period>10</Period>
         <Date>2021-01-31</Date>
         <Metric>in</Metric>
         <Monthly_Value>2</Monthly_Value>
         <Previous_Value>3</Previous_Value>
      </flow>
      <flow>
         <Year>2020-2021</Year>
         <Period>11</Period>
         <Date>2021-02-28</Date>
         <Metric>in</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>2</Previous_Value>
      </flow>
      <flow>
         <Year>2020-2021</Year>
         <Period>12</Period>
         <Date>2021-03-31</Date>
         <Metric>in</Metric>
         <Monthly_Value>2</Monthly_Value>
         <Previous_Value>1</Previous_Value>
      </flow>
   </flows>
   <flows>
      <Year>2021-2022</Year>
      <flow>
         <Year>2021-2022</Year>
         <Period>1</Period>
         <Date>2021-04-30</Date>
         <Metric>in</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>1</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>2</Period>
         <Date>2021-05-31</Date>
         <Metric>in</Metric>
         <Monthly_Value>2</Monthly_Value>
         <Previous_Value>1</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>3</Period>
         <Date>2021-06-30</Date>
         <Metric>in</Metric>
         <Monthly_Value>2</Monthly_Value>
         <Previous_Value>2</Previous_Value>
      </flow>
 <flow>
         <Year>2021-2022</Year>
         <Period>4</Period>
         <Date>2021-07-31</Date>
         <Metric>in</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>2</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>5</Period>
         <Date>2021-08-31</Date>
         <Metric>in</Metric>
         <Monthly_Value>3</Monthly_Value>
         <Previous_Value>1</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>6</Period>
         <Date>2021-09-30</Date>
         <Metric>in</Metric>
         <Monthly_Value>2</Monthly_Value>
         <Previous_Value>3</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>7</Period>
         <Date>2021-10-31</Date>
         <Metric>in</Metric>
         <Monthly_Value>2</Monthly_Value>
         <Previous_Value>2</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>8</Period>
         <Date>2021-11-30</Date>
         <Metric>in</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>2</Previous_Value>
      </flow>
   </flows>
   <flows>
      <Year>2020-2021</Year>
      <flow>
         <Year>2020-2021</Year>
         <Period>8</Period>
         <Date>2020-11-30</Date>
         <Metric>out</Metric>
         <Monthly_Value>4</Monthly_Value>
         <Previous_Value>4</Previous_Value>
      </flow>
      <flow>
         <Year>2020-2021</Year>
         <Period>9</Period>
         <Date>2020-12-31</Date>
         <Metric>out</Metric>
         <Monthly_Value>2</Monthly_Value>
         <Previous_Value>4</Previous_Value>
      </flow>
      <flow>
         <Year>2020-2021</Year>
         <Period>10</Period>
         <Date>2021-01-31</Date>
         <Metric>out</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>2</Previous_Value>
      </flow>
 <flow>
         <Year>2020-2021</Year>
         <Period>11</Period>
         <Date>2021-02-28</Date>
         <Metric>out</Metric>
         <Monthly_Value>3</Monthly_Value>
         <Previous_Value>1</Previous_Value>
      </flow>
      <flow>
         <Year>2020-2021</Year>
         <Period>12</Period>
         <Date>2021-03-31</Date>
         <Metric>out</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>3</Previous_Value>
      </flow>
   </flows>
   <flows>
      <Year>2021-2022</Year>
      <flow>
         <Year>2021-2022</Year>
         <Period>1</Period>
         <Date>2021-04-30</Date>
         <Metric>out</Metric>
         <Monthly_Value>2</Monthly_Value>
         <Previous_Value>2</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>2</Period>
         <Date>2021-05-31</Date>
         <Metric>out</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>2</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>3</Period>
         <Date>2021-06-30</Date>
         <Metric>out</Metric>
         <Monthly_Value>2</Monthly_Value>
         <Previous_Value>1</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>4</Period>
         <Date>2021-07-31</Date>
         <Metric>out</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>2</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>5</Period>
         <Date>2021-08-31</Date>
         <Metric>out</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>1</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>6</Period>
         <Date>2021-09-30</Date>
         <Metric>out</Metric>
         <Monthly_Value>2</Monthly_Value>
         <Previous_Value>1</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>7</Period>
         <Date>2021-10-31</Date>
         <Metric>out</Metric>
         <Monthly_Value>5</Monthly_Value>
         <Previous_Value>2</Previous_Value>
      </flow>
      <flow>
         <Year>2021-2022</Year>
         <Period>8</Period>
         <Date>2021-11-30</Date>
         <Metric>out</Metric>
         <Monthly_Value>1</Monthly_Value>
         <Previous_Value>5</Previous_Value>
      </flow>
   </flows>
</income>

我的 Xquery:

for $metric in  distinct-values($nodes//Metric) 
for $y in  distinct-values($nodes/flow[Metric eq $metric]/Year)
for $p in distinct-values($nodes/flow[Metric eq $metric and Year eq $y]/Period)
order by $p ascending
return 
  <income>{
  for $node in $nodes
  let $mv := $node/preceding-sibling::*
  return 
    <Previous_Value>{$mv}</Previous_Value> 
  }  
  </income>

我得到一个包含 26 个空白项的序列:

<income><Previous_Value/></income>

我怎样才能产生正确的输出?

我不确定我是否理解您预期输出的逻辑,但按照这些思路应该可以让您足够接近:

declare context item := document {
[your xml input]};
let $yrs :=distinct-values(
  //flow/Year
),
$metrics :=  distinct-values(
  //flow//Metric
) 
return
for $yr in $yrs, 
 $met in $metrics
return
<income>{
   
<flows>{
  <Metric>{
    $met
  }</Metric>,
<Year>{
 (
      $yr, '&#xa;    ',
   for $fl in  //flow[./Year=$yr][./Metric=$met]
   let $pmv := $fl/preceding-sibling::flow[./Year=$yr][./Metric=$met][1]/Monthly_Value/text()
   return <flow>{
         $fl/(
          Date,Period,Monthly_Value,
  <Previous_Value>{
            $pmv
          }</Previous_Value> 
)
      }</flow>
    )
}
</Year>
}</flows>

}</income>

对于分组项目,您不能使用 XPath 从源 XML 中查找 preceding-sibling::。如果你让一个变量,比如下面的$sorted-flowsflow元素按Period排序,那么你可以从该排序序列中的前一项获得Monthly_Value .

由于第一项没有前一项,您似乎想将其值用作 Previous_Value。下面的逻辑使用前一个(如果存在)创建一个序列,然后使用当前值并使用 head() 从该序列中获取第一项。

  <income>{
    for $metric in  distinct-values($nodes//Metric) 
    for $y in  distinct-values($nodes/flow[Metric eq $metric]/Year)
    let $sorted-flows := 
      for $p in distinct-values($nodes/flow[Metric eq $metric and Year eq $y]/Period)
      return 
        for $flow in $nodes/flow
        where $flow[Metric eq $metric and Year eq $y and Period eq $p]
        order by $flow/Period ascending
        return $flow
    return
      <flows>{
        element {"year"} {$y},
        for $item at $i in $sorted-flows
        return
          <flow>{
            $item/*,
            <Previous_Value>{
              (: either the previous Monthly_Value or the current for the first:)
              ($sorted-flows[$i - 1]/self::flow/Monthly_Value/data(), $item/Monthly_Value/data()) => head()
            }
            </Previous_Value>
          }
          </flow> 
       }
       </flows>
   }
   </income> 

您还可以应用 XSLT 并利用 xsl:for-each-group 作为替代解决方案:

let $xslt := 
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="2.0">
  <xsl:output indent="yes"/>
    
  <xsl:template match="income">
    <xsl:copy>
      <flows>
        <xsl:for-each-group select="flow" group-by="string-join((Year,Metric), '|')" >
          <xsl:sort select="Metric" order="descending"/>
          <flows>
            <Year><xsl:value-of select="Year"/></Year>
            <xsl:variable name="sorted-flows" as="element(flow)*">
              <xsl:perform-sort select="current-group()">
                <xsl:sort select="Period/number()" order="ascending"/>
              </xsl:perform-sort>
            </xsl:variable>
            <xsl:for-each select="$sorted-flows">
              <xsl:variable name="current-position" select="position()"/>
              <xsl:copy>
                <xsl:copy-of select="*"/>
                  <Previous_Value>
                    <xsl:value-of select="(($sorted-flows[$current-position - 1], .))[1]/Monthly_Value"/>
                  </Previous_Value>
              </xsl:copy>
            </xsl:for-each>
          </flows>
        </xsl:for-each-group>
      </flows>
    </xsl:copy>
  </xsl:template> 
</xsl:stylesheet>
return 
  xdmp:xslt-eval($xslt, $nodes)