不同员工的连续天数分组

Grouping of consecutive days for different employees

我有一个 xml 同一员工在不同日期有不同条目,如下所示:

<wd:Report_Data
    <wd:Report_Entry>
        <wd:workerGroup>
            <wd:staffID>111</wd:staffID>
        </wd:workerGroup>
        <wd:workerType>Staff</wd:workerType>
        <wd:requestType>Absence Request</wd:requestType>
        <wd:date>2020-08-21-07:00</wd:date>
    </wd:Report_Entry>
    <wd:Report_Entry>
        <wd:workerGroup>
            <wd:staffID>111</wd:staffID>
        </wd:workerGroup>
        <wd:workerType>Staff</wd:workerType>
        <wd:requestType>Absence Request</wd:requestType>
        <wd:date>2020-08-22-07:00</wd:date>
    </wd:Report_Entry>
    <wd:Report_Entry>
        <wd:workerGroup>
            <wd:staffID>222</wd:staffID>
        </wd:workerGroup>
        <wd:workerType>Staff</wd:workerType>
        <wd:requestType>Absence Request</wd:requestType>
        <wd:date>2020-08-23-07:00</wd:date>
    </wd:Report_Entry>
    <wd:Report_Entry>
        <wd:workerGroup>
            <wd:staffID>333</wd:staffID>
        </wd:workerGroup>
        <wd:workerType>Staff</wd:workerType>
        <wd:requestType>Absence Request</wd:requestType>
        <wd:date>2020-08-15-07:00</wd:date>
    </wd:Report_Entry>
    <wd:Report_Entry>
        <wd:workerGroup>
            <wd:staffID>333</wd:staffID>
        </wd:workerGroup>
        <wd:workerType>Staff</wd:workerType>
        <wd:requestType>Absence Request</wd:requestType>
        <wd:date>2020-08-16-07:00</wd:date>
    </wd:Report_Entry>
    <wd:Report_Entry>
        <wd:workerGroup>
            <wd:staffID>333</wd:staffID>
        </wd:workerGroup>
        <wd:workerType>Staff</wd:workerType>
        <wd:requestType>Absence Request</wd:requestType>
        <wd:date>2020-08-29-07:00</wd:date>
    </wd:Report_Entry>
</wd:Report_Data>

我想要一个输出,它将连续几天的开始日期和结束日期分组如下:

<wd:Report_Entry>
   <worker>
      <staffID>111</staffID>
      <start_date>2020-08-21-07:00</start_date>
      <end_date>2020-08-22-07:00</end_date>
   </worker>
   <worker>
      <staffID>222</staffID>
      <start_date>2020-08-23-07:00</start_date>
      <end_date>2020-08-23-07:00</end_date>
   </worker>
   <worker>
      <staffID>333</staffID>
      <start_date>2020-08-15-07:00</start_date>
      <end_date>2020-08-16-07:00</end_date>
   </worker>
   <worker>
      <staffID>333</staffID>
      <start_date>2020-08-29-07:00</start_date>
      <end_date>2020-08-29-07:00</end_date>
   </worker>
</wd:Report_Entry>

我试过使用 group-starting-with 但它是分组而不考虑 staffID。

<xsl:template match="/wd:Report_Data">
  
    <wd:Report_Entry>
              <xsl:for-each-group select="wd:Report_Entry" group-starting-with="*[not(xs:date(wd:date) = xs:date(preceding-sibling::*[1]/wd:date) + xs:dayTimeDuration('P1D'))] "> 
            <worker>
                <staffID>
                    <xsl:value-of select="wd:workerGroup/wd:staffID"/>
                </staffID>
                <start_date>
                    <xsl:value-of select="wd:date"/>
                </start_date>
                <end_date>
                    <xsl:value-of select="current-group()[last()]/wd:date"/>
                </end_date>
            </worker>
            </xsl:for-each-group>
        <!--   </xsl:for-each-group> --> 
    </wd:Report_Entry>
   
</xsl:template>

得到如下输出错误:

<wd:Report_Entry>
   <worker>
      <staffID>111</staffID>
      <start_date>2020-08-21-07:00</start_date>
      <end_date>2020-08-23-07:00</end_date>
   </worker>
</wd:Report_Entry>

首先对员工 ID 使用 group-by,然后按日期对组进行排序,然后按相邻日期减去 position() * 1 天进行分组似乎就足够了:

<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    version="3.0"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    xpath-default-namespace="http://example.com/wd"
    exclude-result-prefixes="#all"
    expand-text="yes">
    
    <xsl:output method="xml" indent="yes"/>
    
    <xsl:template match="Report_Data">
        <root>
            <xsl:for-each-group select="Report_Entry" group-by="workerGroup/staffID">
                <xsl:for-each-group select="sort(current-group(), (), function($e) { $e/date })" group-adjacent="xs:date(date) - position() * xs:dayTimeDuration('P1D')">
                    <worker>
                        <staffID>
                            <xsl:value-of select="workerGroup/staffID"/>
                        </staffID>
                        <start_date>
                            <xsl:value-of select="date[1]"/>
                        </start_date>
                        <end_date>
                            <xsl:value-of select="current-group()[last()]/date"/>
                        </end_date>
                    </worker>
                </xsl:for-each-group>
            </xsl:for-each-group>
        </root>
    </xsl:template>
    
</xsl:stylesheet>

对于 XSLT 2 处理器,您可能需要根据 xsl:perform-sort:

在 user-defined 函数中实现排序
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    version="3.0"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    xmlns:mf="http://example.com/mf"
    xpath-default-namespace="http://example.com/wd"
    exclude-result-prefixes="#all"
    expand-text="yes">
    
    <xsl:function name="mf:sort-by-date" as="element(Report_Entry)*">
        <xsl:param name="entries" as="element(Report_Entry)*"/>
        <xsl:perform-sort select="$entries">
            <xsl:sort select="date"/>
        </xsl:perform-sort>
    </xsl:function>
    
    <xsl:output method="xml" indent="yes"/>
    
    <xsl:template match="Report_Data">
        <root>
            <xsl:for-each-group select="Report_Entry" group-by="workerGroup/staffID">
                <xsl:for-each-group select="mf:sort-by-date(current-group())" group-adjacent="xs:date(date) - position() * xs:dayTimeDuration('P1D')">
                    <worker>
                        <staffID>
                            <xsl:value-of select="workerGroup/staffID"/>
                        </staffID>
                        <start_date>
                            <xsl:value-of select="date[1]"/>
                        </start_date>
                        <end_date>
                            <xsl:value-of select="current-group()[last()]/date"/>
                        </end_date>
                    </worker>
                </xsl:for-each-group>
            </xsl:for-each-group>
        </root>
    </xsl:template>
    
</xsl:stylesheet>

当然,只有在输入未按员工 ID 和日期排序时才需要排序,否则显示的分组就足够了。