不同员工的连续天数分组
Grouping of consecutive days for different employees
我有一个 xml 同一员工在不同日期有不同条目,如下所示:
<wd:Report_Data
<wd:Report_Entry>
<wd:workerGroup>
<wd:staffID>111</wd:staffID>
</wd:workerGroup>
<wd:workerType>Staff</wd:workerType>
<wd:requestType>Absence Request</wd:requestType>
<wd:date>2020-08-21-07:00</wd:date>
</wd:Report_Entry>
<wd:Report_Entry>
<wd:workerGroup>
<wd:staffID>111</wd:staffID>
</wd:workerGroup>
<wd:workerType>Staff</wd:workerType>
<wd:requestType>Absence Request</wd:requestType>
<wd:date>2020-08-22-07:00</wd:date>
</wd:Report_Entry>
<wd:Report_Entry>
<wd:workerGroup>
<wd:staffID>222</wd:staffID>
</wd:workerGroup>
<wd:workerType>Staff</wd:workerType>
<wd:requestType>Absence Request</wd:requestType>
<wd:date>2020-08-23-07:00</wd:date>
</wd:Report_Entry>
<wd:Report_Entry>
<wd:workerGroup>
<wd:staffID>333</wd:staffID>
</wd:workerGroup>
<wd:workerType>Staff</wd:workerType>
<wd:requestType>Absence Request</wd:requestType>
<wd:date>2020-08-15-07:00</wd:date>
</wd:Report_Entry>
<wd:Report_Entry>
<wd:workerGroup>
<wd:staffID>333</wd:staffID>
</wd:workerGroup>
<wd:workerType>Staff</wd:workerType>
<wd:requestType>Absence Request</wd:requestType>
<wd:date>2020-08-16-07:00</wd:date>
</wd:Report_Entry>
<wd:Report_Entry>
<wd:workerGroup>
<wd:staffID>333</wd:staffID>
</wd:workerGroup>
<wd:workerType>Staff</wd:workerType>
<wd:requestType>Absence Request</wd:requestType>
<wd:date>2020-08-29-07:00</wd:date>
</wd:Report_Entry>
</wd:Report_Data>
我想要一个输出,它将连续几天的开始日期和结束日期分组如下:
<wd:Report_Entry>
<worker>
<staffID>111</staffID>
<start_date>2020-08-21-07:00</start_date>
<end_date>2020-08-22-07:00</end_date>
</worker>
<worker>
<staffID>222</staffID>
<start_date>2020-08-23-07:00</start_date>
<end_date>2020-08-23-07:00</end_date>
</worker>
<worker>
<staffID>333</staffID>
<start_date>2020-08-15-07:00</start_date>
<end_date>2020-08-16-07:00</end_date>
</worker>
<worker>
<staffID>333</staffID>
<start_date>2020-08-29-07:00</start_date>
<end_date>2020-08-29-07:00</end_date>
</worker>
</wd:Report_Entry>
我试过使用 group-starting-with 但它是分组而不考虑 staffID。
<xsl:template match="/wd:Report_Data">
<wd:Report_Entry>
<xsl:for-each-group select="wd:Report_Entry" group-starting-with="*[not(xs:date(wd:date) = xs:date(preceding-sibling::*[1]/wd:date) + xs:dayTimeDuration('P1D'))] ">
<worker>
<staffID>
<xsl:value-of select="wd:workerGroup/wd:staffID"/>
</staffID>
<start_date>
<xsl:value-of select="wd:date"/>
</start_date>
<end_date>
<xsl:value-of select="current-group()[last()]/wd:date"/>
</end_date>
</worker>
</xsl:for-each-group>
<!-- </xsl:for-each-group> -->
</wd:Report_Entry>
</xsl:template>
得到如下输出错误:
<wd:Report_Entry>
<worker>
<staffID>111</staffID>
<start_date>2020-08-21-07:00</start_date>
<end_date>2020-08-23-07:00</end_date>
</worker>
</wd:Report_Entry>
首先对员工 ID 使用 group-by
,然后按日期对组进行排序,然后按相邻日期减去 position() * 1 天进行分组似乎就足够了:
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="3.0"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xpath-default-namespace="http://example.com/wd"
exclude-result-prefixes="#all"
expand-text="yes">
<xsl:output method="xml" indent="yes"/>
<xsl:template match="Report_Data">
<root>
<xsl:for-each-group select="Report_Entry" group-by="workerGroup/staffID">
<xsl:for-each-group select="sort(current-group(), (), function($e) { $e/date })" group-adjacent="xs:date(date) - position() * xs:dayTimeDuration('P1D')">
<worker>
<staffID>
<xsl:value-of select="workerGroup/staffID"/>
</staffID>
<start_date>
<xsl:value-of select="date[1]"/>
</start_date>
<end_date>
<xsl:value-of select="current-group()[last()]/date"/>
</end_date>
</worker>
</xsl:for-each-group>
</xsl:for-each-group>
</root>
</xsl:template>
</xsl:stylesheet>
对于 XSLT 2 处理器,您可能需要根据 xsl:perform-sort
:
在 user-defined 函数中实现排序
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="3.0"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:mf="http://example.com/mf"
xpath-default-namespace="http://example.com/wd"
exclude-result-prefixes="#all"
expand-text="yes">
<xsl:function name="mf:sort-by-date" as="element(Report_Entry)*">
<xsl:param name="entries" as="element(Report_Entry)*"/>
<xsl:perform-sort select="$entries">
<xsl:sort select="date"/>
</xsl:perform-sort>
</xsl:function>
<xsl:output method="xml" indent="yes"/>
<xsl:template match="Report_Data">
<root>
<xsl:for-each-group select="Report_Entry" group-by="workerGroup/staffID">
<xsl:for-each-group select="mf:sort-by-date(current-group())" group-adjacent="xs:date(date) - position() * xs:dayTimeDuration('P1D')">
<worker>
<staffID>
<xsl:value-of select="workerGroup/staffID"/>
</staffID>
<start_date>
<xsl:value-of select="date[1]"/>
</start_date>
<end_date>
<xsl:value-of select="current-group()[last()]/date"/>
</end_date>
</worker>
</xsl:for-each-group>
</xsl:for-each-group>
</root>
</xsl:template>
</xsl:stylesheet>
当然,只有在输入未按员工 ID 和日期排序时才需要排序,否则显示的分组就足够了。
我有一个 xml 同一员工在不同日期有不同条目,如下所示:
<wd:Report_Data
<wd:Report_Entry>
<wd:workerGroup>
<wd:staffID>111</wd:staffID>
</wd:workerGroup>
<wd:workerType>Staff</wd:workerType>
<wd:requestType>Absence Request</wd:requestType>
<wd:date>2020-08-21-07:00</wd:date>
</wd:Report_Entry>
<wd:Report_Entry>
<wd:workerGroup>
<wd:staffID>111</wd:staffID>
</wd:workerGroup>
<wd:workerType>Staff</wd:workerType>
<wd:requestType>Absence Request</wd:requestType>
<wd:date>2020-08-22-07:00</wd:date>
</wd:Report_Entry>
<wd:Report_Entry>
<wd:workerGroup>
<wd:staffID>222</wd:staffID>
</wd:workerGroup>
<wd:workerType>Staff</wd:workerType>
<wd:requestType>Absence Request</wd:requestType>
<wd:date>2020-08-23-07:00</wd:date>
</wd:Report_Entry>
<wd:Report_Entry>
<wd:workerGroup>
<wd:staffID>333</wd:staffID>
</wd:workerGroup>
<wd:workerType>Staff</wd:workerType>
<wd:requestType>Absence Request</wd:requestType>
<wd:date>2020-08-15-07:00</wd:date>
</wd:Report_Entry>
<wd:Report_Entry>
<wd:workerGroup>
<wd:staffID>333</wd:staffID>
</wd:workerGroup>
<wd:workerType>Staff</wd:workerType>
<wd:requestType>Absence Request</wd:requestType>
<wd:date>2020-08-16-07:00</wd:date>
</wd:Report_Entry>
<wd:Report_Entry>
<wd:workerGroup>
<wd:staffID>333</wd:staffID>
</wd:workerGroup>
<wd:workerType>Staff</wd:workerType>
<wd:requestType>Absence Request</wd:requestType>
<wd:date>2020-08-29-07:00</wd:date>
</wd:Report_Entry>
</wd:Report_Data>
我想要一个输出,它将连续几天的开始日期和结束日期分组如下:
<wd:Report_Entry>
<worker>
<staffID>111</staffID>
<start_date>2020-08-21-07:00</start_date>
<end_date>2020-08-22-07:00</end_date>
</worker>
<worker>
<staffID>222</staffID>
<start_date>2020-08-23-07:00</start_date>
<end_date>2020-08-23-07:00</end_date>
</worker>
<worker>
<staffID>333</staffID>
<start_date>2020-08-15-07:00</start_date>
<end_date>2020-08-16-07:00</end_date>
</worker>
<worker>
<staffID>333</staffID>
<start_date>2020-08-29-07:00</start_date>
<end_date>2020-08-29-07:00</end_date>
</worker>
</wd:Report_Entry>
我试过使用 group-starting-with 但它是分组而不考虑 staffID。
<xsl:template match="/wd:Report_Data">
<wd:Report_Entry>
<xsl:for-each-group select="wd:Report_Entry" group-starting-with="*[not(xs:date(wd:date) = xs:date(preceding-sibling::*[1]/wd:date) + xs:dayTimeDuration('P1D'))] ">
<worker>
<staffID>
<xsl:value-of select="wd:workerGroup/wd:staffID"/>
</staffID>
<start_date>
<xsl:value-of select="wd:date"/>
</start_date>
<end_date>
<xsl:value-of select="current-group()[last()]/wd:date"/>
</end_date>
</worker>
</xsl:for-each-group>
<!-- </xsl:for-each-group> -->
</wd:Report_Entry>
</xsl:template>
得到如下输出错误:
<wd:Report_Entry>
<worker>
<staffID>111</staffID>
<start_date>2020-08-21-07:00</start_date>
<end_date>2020-08-23-07:00</end_date>
</worker>
</wd:Report_Entry>
首先对员工 ID 使用 group-by
,然后按日期对组进行排序,然后按相邻日期减去 position() * 1 天进行分组似乎就足够了:
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="3.0"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xpath-default-namespace="http://example.com/wd"
exclude-result-prefixes="#all"
expand-text="yes">
<xsl:output method="xml" indent="yes"/>
<xsl:template match="Report_Data">
<root>
<xsl:for-each-group select="Report_Entry" group-by="workerGroup/staffID">
<xsl:for-each-group select="sort(current-group(), (), function($e) { $e/date })" group-adjacent="xs:date(date) - position() * xs:dayTimeDuration('P1D')">
<worker>
<staffID>
<xsl:value-of select="workerGroup/staffID"/>
</staffID>
<start_date>
<xsl:value-of select="date[1]"/>
</start_date>
<end_date>
<xsl:value-of select="current-group()[last()]/date"/>
</end_date>
</worker>
</xsl:for-each-group>
</xsl:for-each-group>
</root>
</xsl:template>
</xsl:stylesheet>
对于 XSLT 2 处理器,您可能需要根据 xsl:perform-sort
:
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="3.0"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:mf="http://example.com/mf"
xpath-default-namespace="http://example.com/wd"
exclude-result-prefixes="#all"
expand-text="yes">
<xsl:function name="mf:sort-by-date" as="element(Report_Entry)*">
<xsl:param name="entries" as="element(Report_Entry)*"/>
<xsl:perform-sort select="$entries">
<xsl:sort select="date"/>
</xsl:perform-sort>
</xsl:function>
<xsl:output method="xml" indent="yes"/>
<xsl:template match="Report_Data">
<root>
<xsl:for-each-group select="Report_Entry" group-by="workerGroup/staffID">
<xsl:for-each-group select="mf:sort-by-date(current-group())" group-adjacent="xs:date(date) - position() * xs:dayTimeDuration('P1D')">
<worker>
<staffID>
<xsl:value-of select="workerGroup/staffID"/>
</staffID>
<start_date>
<xsl:value-of select="date[1]"/>
</start_date>
<end_date>
<xsl:value-of select="current-group()[last()]/date"/>
</end_date>
</worker>
</xsl:for-each-group>
</xsl:for-each-group>
</root>
</xsl:template>
</xsl:stylesheet>
当然,只有在输入未按员工 ID 和日期排序时才需要排序,否则显示的分组就足够了。