Teradata 分区顺序事件
Teradata partitioning sequential events
Teradata - 下面是两节到达车站的轨道车。 773 清空 (RI/RE) 然后加载 (RI/RL) 然后离开 TD。 819 只是清空然后离开。我想在 table 下面创建结果。我试过在汽车上使用 case 语句分组,但同一辆车多次经过车站,所以 max 和 min 给了我 unpredictable 结果。我已经阅读了有关分区的内容,但我很难想象这一点。任何帮助表示赞赏。
*注意:最好将初始搜索限制在 EVT_CD='TA' AND STN=DEST 以消除任何仅通过车站的汽车。但我不能将整个记录集限制为那个,因为 TD 有不同的目的地。
CAR_NUMB EVT_DT EVT_TM EVT_CD EVST_CD WB_ID STN DEST
773 03/08/2016 19.05.00 TA 582016 BOSTON BOSTON
773 03/12/2016 04.04.00 AP PU 582016 BOSTON BOSTON
773 03/12/2016 14.35.00 RI RE 412016 BOSTON BOSTON
773 03/12/2016 14.37.00 AP PL 412016 BOSTON BOSTON
773 03/12/2016 14.45.00 RI RL 812016 BOSTON HOUSTON
773 03/14/2016 12.22.00 TD 812016 BOSTON HOUSTON
819 03/04/2016 17.50.00 TA 362016 STLOUIS STLOUIS
819 03/06/2016 13.50.00 AP PU 362016 STLOUIS STLOUIS
819 03/06/2016 17.27.55 RI RE 042016 STLOUIS STLOUIS
819 03/07/2016 00.37.00 RI PR 042016 STLOUIS PORTLAND
819 03/11/2016 01.47.00 TD 042016 STLOUIS PORTLAND
Desired output:
CAR_NUMB TA AP RIRE RIRL TD
773 03/08/2016 19.05.00 03/12/20.. 03/12/20.. 03/12/20.. 03/14/2016 12.22.00
819 03/04/2016 17.50.00 03/06/20.. 03/06/20.. null 03/11/2016 01.47.00
上面的[..]。我为了格式化把时间戳截掉了。
您可以使用 CASE
使用窗口聚合函数在接下来的 5 行中搜索给定 EVT_CD
的最小值 date/time。
我将日期和时间合并到时间戳中,因为它更易于使用:
SELECT tab.*
,CAST(EVT_DT AS TIMESTAMP(0)) + (EVT_TM - TIME '00:00:00' HOUR TO SECOND) AS TA
,MIN(CASE WHEN EVT_CD = 'AP'
THEN CAST(EVT_DT AS TIMESTAMP(0)) + (EVT_TM - TIME '00:00:00' HOUR TO SECOND)
END)
OVER (PARTITION BY CAR_NUMB
ORDER BY EVT_DT, EVT_TM
ROWS BETWEEN CURRENT ROW AND 5 FOLLOWING) AS AP
,MIN(CASE WHEN EVT_CD = 'RI' AND EVST_CD = 'RE'
THEN CAST(EVT_DT AS TIMESTAMP(0)) + (EVT_TM - TIME '00:00:00' HOUR TO SECOND)
END)
OVER (PARTITION BY CAR_NUMB
ORDER BY EVT_DT, EVT_TM
ROWS BETWEEN CURRENT ROW AND 5 FOLLOWING) AS RIRE
,MIN(CASE WHEN EVT_CD = 'RI' AND EVST_CD = 'RL'
THEN CAST(EVT_DT AS TIMESTAMP(0)) + (EVT_TM - TIME '00:00:00' HOUR TO SECOND)
END)
OVER (PARTITION BY CAR_NUMB
ORDER BY EVT_DT, EVT_TM
ROWS BETWEEN CURRENT ROW AND 5 FOLLOWING) AS RIRL
,MIN(CASE WHEN EVT_CD = 'TD'
THEN CAST(EVT_DT AS TIMESTAMP(0)) + (EVT_TM - TIME '00:00:00' HOUR TO SECOND)
END)
OVER (PARTITION BY CAR_NUMB
ORDER BY EVT_DT, EVT_TM
ROWS BETWEEN CURRENT ROW AND 5 FOLLOWING) AS TD
FROM tab
QUALIFY -- finally return only the starting row
EVT_CD = 'TA'
如果 TA
和 TD
之间的行数可以大于 5,则必须相应地调整 ROWS
。如果数字变化很大并且缺少 EVT_CD
此方法可能会失败并报告下一次旅行的数据。这个问题可以通过添加另一个步骤来解决:
SELECT
CAR_NUMB
,TA
,CASE WHEN AP < TD THEN AP END AS AP
,CASE WHEN RIRE < TD THEN RIRE END AS RIRE
,CASE WHEN RIRL < TD THEN RIRL END AS RIRL
,TD
FROM
(
previous query
) AS dt
如果 TD
可能丢失,您可以采用不同的方法:找到之前的 TA
时间戳并按其分组:
SELECT
CAR_NUMB
,TA
,MIN(CASE WHEN EVT_CD = 'AP'
THEN CAST(EVT_DT AS TIMESTAMP(0)) + (EVT_TM - TIME '00:00:00' HOUR TO SECOND)
END) AS AP
,MIN(CASE WHEN EVT_CD = 'RI' AND EVST_CD = 'RE'
THEN CAST(EVT_DT AS TIMESTAMP(0)) + (EVT_TM - TIME '00:00:00' HOUR TO SECOND)
END) AS RIRE
,MIN(CASE WHEN EVT_CD = 'RI' AND EVST_CD = 'RL'
THEN CAST(EVT_DT AS TIMESTAMP(0)) + (EVT_TM - TIME '00:00:00' HOUR TO SECOND)
END) AS RIRL
,MIN(CASE WHEN EVT_CD = 'TD'
THEN CAST(EVT_DT AS TIMESTAMP(0)) + (EVT_TM - TIME '00:00:00' HOUR TO SECOND)
END) AS TD
FROM
(
SELECT tab.*
,MAX(CASE WHEN EVT_CD = 'TA'
THEN CAST(EVT_DT AS TIMESTAMP(0)) + (EVT_TM - TIME '00:00:00' HOUR TO SECOND)
END)
OVER (PARTITION BY CAR_NUMB
ORDER BY EVT_DT, EVT_TM
ROWS UNBOUNDED PRECEDING) AS TA
FROM tab
-- maybe QUALIFY TA IS NOT NULL?
) AS dt
GROUP BY
CAR_NUMB
,TA
Teradata - 下面是两节到达车站的轨道车。 773 清空 (RI/RE) 然后加载 (RI/RL) 然后离开 TD。 819 只是清空然后离开。我想在 table 下面创建结果。我试过在汽车上使用 case 语句分组,但同一辆车多次经过车站,所以 max 和 min 给了我 unpredictable 结果。我已经阅读了有关分区的内容,但我很难想象这一点。任何帮助表示赞赏。
*注意:最好将初始搜索限制在 EVT_CD='TA' AND STN=DEST 以消除任何仅通过车站的汽车。但我不能将整个记录集限制为那个,因为 TD 有不同的目的地。
CAR_NUMB EVT_DT EVT_TM EVT_CD EVST_CD WB_ID STN DEST
773 03/08/2016 19.05.00 TA 582016 BOSTON BOSTON
773 03/12/2016 04.04.00 AP PU 582016 BOSTON BOSTON
773 03/12/2016 14.35.00 RI RE 412016 BOSTON BOSTON
773 03/12/2016 14.37.00 AP PL 412016 BOSTON BOSTON
773 03/12/2016 14.45.00 RI RL 812016 BOSTON HOUSTON
773 03/14/2016 12.22.00 TD 812016 BOSTON HOUSTON
819 03/04/2016 17.50.00 TA 362016 STLOUIS STLOUIS
819 03/06/2016 13.50.00 AP PU 362016 STLOUIS STLOUIS
819 03/06/2016 17.27.55 RI RE 042016 STLOUIS STLOUIS
819 03/07/2016 00.37.00 RI PR 042016 STLOUIS PORTLAND
819 03/11/2016 01.47.00 TD 042016 STLOUIS PORTLAND
Desired output:
CAR_NUMB TA AP RIRE RIRL TD
773 03/08/2016 19.05.00 03/12/20.. 03/12/20.. 03/12/20.. 03/14/2016 12.22.00
819 03/04/2016 17.50.00 03/06/20.. 03/06/20.. null 03/11/2016 01.47.00
上面的[..]。我为了格式化把时间戳截掉了。
您可以使用 CASE
使用窗口聚合函数在接下来的 5 行中搜索给定 EVT_CD
的最小值 date/time。
我将日期和时间合并到时间戳中,因为它更易于使用:
SELECT tab.*
,CAST(EVT_DT AS TIMESTAMP(0)) + (EVT_TM - TIME '00:00:00' HOUR TO SECOND) AS TA
,MIN(CASE WHEN EVT_CD = 'AP'
THEN CAST(EVT_DT AS TIMESTAMP(0)) + (EVT_TM - TIME '00:00:00' HOUR TO SECOND)
END)
OVER (PARTITION BY CAR_NUMB
ORDER BY EVT_DT, EVT_TM
ROWS BETWEEN CURRENT ROW AND 5 FOLLOWING) AS AP
,MIN(CASE WHEN EVT_CD = 'RI' AND EVST_CD = 'RE'
THEN CAST(EVT_DT AS TIMESTAMP(0)) + (EVT_TM - TIME '00:00:00' HOUR TO SECOND)
END)
OVER (PARTITION BY CAR_NUMB
ORDER BY EVT_DT, EVT_TM
ROWS BETWEEN CURRENT ROW AND 5 FOLLOWING) AS RIRE
,MIN(CASE WHEN EVT_CD = 'RI' AND EVST_CD = 'RL'
THEN CAST(EVT_DT AS TIMESTAMP(0)) + (EVT_TM - TIME '00:00:00' HOUR TO SECOND)
END)
OVER (PARTITION BY CAR_NUMB
ORDER BY EVT_DT, EVT_TM
ROWS BETWEEN CURRENT ROW AND 5 FOLLOWING) AS RIRL
,MIN(CASE WHEN EVT_CD = 'TD'
THEN CAST(EVT_DT AS TIMESTAMP(0)) + (EVT_TM - TIME '00:00:00' HOUR TO SECOND)
END)
OVER (PARTITION BY CAR_NUMB
ORDER BY EVT_DT, EVT_TM
ROWS BETWEEN CURRENT ROW AND 5 FOLLOWING) AS TD
FROM tab
QUALIFY -- finally return only the starting row
EVT_CD = 'TA'
如果 TA
和 TD
之间的行数可以大于 5,则必须相应地调整 ROWS
。如果数字变化很大并且缺少 EVT_CD
此方法可能会失败并报告下一次旅行的数据。这个问题可以通过添加另一个步骤来解决:
SELECT
CAR_NUMB
,TA
,CASE WHEN AP < TD THEN AP END AS AP
,CASE WHEN RIRE < TD THEN RIRE END AS RIRE
,CASE WHEN RIRL < TD THEN RIRL END AS RIRL
,TD
FROM
(
previous query
) AS dt
如果 TD
可能丢失,您可以采用不同的方法:找到之前的 TA
时间戳并按其分组:
SELECT
CAR_NUMB
,TA
,MIN(CASE WHEN EVT_CD = 'AP'
THEN CAST(EVT_DT AS TIMESTAMP(0)) + (EVT_TM - TIME '00:00:00' HOUR TO SECOND)
END) AS AP
,MIN(CASE WHEN EVT_CD = 'RI' AND EVST_CD = 'RE'
THEN CAST(EVT_DT AS TIMESTAMP(0)) + (EVT_TM - TIME '00:00:00' HOUR TO SECOND)
END) AS RIRE
,MIN(CASE WHEN EVT_CD = 'RI' AND EVST_CD = 'RL'
THEN CAST(EVT_DT AS TIMESTAMP(0)) + (EVT_TM - TIME '00:00:00' HOUR TO SECOND)
END) AS RIRL
,MIN(CASE WHEN EVT_CD = 'TD'
THEN CAST(EVT_DT AS TIMESTAMP(0)) + (EVT_TM - TIME '00:00:00' HOUR TO SECOND)
END) AS TD
FROM
(
SELECT tab.*
,MAX(CASE WHEN EVT_CD = 'TA'
THEN CAST(EVT_DT AS TIMESTAMP(0)) + (EVT_TM - TIME '00:00:00' HOUR TO SECOND)
END)
OVER (PARTITION BY CAR_NUMB
ORDER BY EVT_DT, EVT_TM
ROWS UNBOUNDED PRECEDING) AS TA
FROM tab
-- maybe QUALIFY TA IS NOT NULL?
) AS dt
GROUP BY
CAR_NUMB
,TA