列中相同值的最小时间戳分区
Min time stamp partition by same value in column
我有一个下面的数据集,我试图在其中获取单个列中相同值的最小时间戳。
这是我的数据集。
我正在尝试创建一个列来查找与经销商的每次互动 user_first_comment。
喜欢下面
是否有任何技术可用于为列中的相同值添加增量值。
例如列 RANK_Incremental,这样我就可以使用 Min 和 hpartition 来获得最终输出。
使用常见的 table 表达式(或派生的 tables)和 window 函数支持,请尝试以下操作:
WITH step1 AS (
SELECT *, CASE WHEN LAG(note_type) OVER (PARTITION BY incident ORDER BY note_date) = note_type THEN 0 ELSE 1 END AS edge FROM incidents
)
, step2 AS (
SELECT *, SUM(edge) OVER (PARTITION BY incident ORDER BY note_date) AS grp FROM step1
)
SELECT *, FIRST_VALUE(note_date) OVER (PARTITION BY incident, grp ORDER BY note_date) AS first_date FROM step2
;
结果:
+----------+---------------------+----------------+------+------+---------------------+
| incident | note_date | note_type | edge | grp | first_date |
+----------+---------------------+----------------+------+------+---------------------+
| 5498091 | 2021-12-15 17:20:00 | USER_COMMENT | 1 | 1 | 2021-12-15 17:20:00 |
| 5498091 | 2021-12-15 17:21:00 | USER_COMMENT | 0 | 1 | 2021-12-15 17:20:00 |
| 5498091 | 2021-12-15 17:55:00 | DEALER_COMMENT | 1 | 2 | 2021-12-15 17:55:00 |
| 5498091 | 2021-12-15 17:59:00 | USER_COMMENT | 1 | 3 | 2021-12-15 17:59:00 |
| 5498091 | 2021-12-16 11:02:00 | USER_COMMENT | 0 | 3 | 2021-12-15 17:59:00 |
| 5498091 | 2021-12-16 16:46:00 | DEALER_COMMENT | 1 | 4 | 2021-12-16 16:46:00 |
+----------+---------------------+----------------+------+------+---------------------+
设置:
CREATE TABLE incidents (
incident int
, note_date datetime
, note_type varchar(20)
)
;
INSERT INTO incidents VALUES
(5498091, '2021-12-15 17:20', 'USER_COMMENT')
, (5498091, '2021-12-15 17:21', 'USER_COMMENT')
, (5498091, '2021-12-15 17:55', 'DEALER_COMMENT')
, (5498091, '2021-12-15 17:59', 'USER_COMMENT')
, (5498091, '2021-12-16 11:02', 'USER_COMMENT')
, (5498091, '2021-12-16 16:46', 'DEALER_COMMENT')
;
我有一个下面的数据集,我试图在其中获取单个列中相同值的最小时间戳。
这是我的数据集。
我正在尝试创建一个列来查找与经销商的每次互动 user_first_comment。
喜欢下面
是否有任何技术可用于为列中的相同值添加增量值。 例如列 RANK_Incremental,这样我就可以使用 Min 和 hpartition 来获得最终输出。
使用常见的 table 表达式(或派生的 tables)和 window 函数支持,请尝试以下操作:
WITH step1 AS (
SELECT *, CASE WHEN LAG(note_type) OVER (PARTITION BY incident ORDER BY note_date) = note_type THEN 0 ELSE 1 END AS edge FROM incidents
)
, step2 AS (
SELECT *, SUM(edge) OVER (PARTITION BY incident ORDER BY note_date) AS grp FROM step1
)
SELECT *, FIRST_VALUE(note_date) OVER (PARTITION BY incident, grp ORDER BY note_date) AS first_date FROM step2
;
结果:
+----------+---------------------+----------------+------+------+---------------------+
| incident | note_date | note_type | edge | grp | first_date |
+----------+---------------------+----------------+------+------+---------------------+
| 5498091 | 2021-12-15 17:20:00 | USER_COMMENT | 1 | 1 | 2021-12-15 17:20:00 |
| 5498091 | 2021-12-15 17:21:00 | USER_COMMENT | 0 | 1 | 2021-12-15 17:20:00 |
| 5498091 | 2021-12-15 17:55:00 | DEALER_COMMENT | 1 | 2 | 2021-12-15 17:55:00 |
| 5498091 | 2021-12-15 17:59:00 | USER_COMMENT | 1 | 3 | 2021-12-15 17:59:00 |
| 5498091 | 2021-12-16 11:02:00 | USER_COMMENT | 0 | 3 | 2021-12-15 17:59:00 |
| 5498091 | 2021-12-16 16:46:00 | DEALER_COMMENT | 1 | 4 | 2021-12-16 16:46:00 |
+----------+---------------------+----------------+------+------+---------------------+
设置:
CREATE TABLE incidents (
incident int
, note_date datetime
, note_type varchar(20)
)
;
INSERT INTO incidents VALUES
(5498091, '2021-12-15 17:20', 'USER_COMMENT')
, (5498091, '2021-12-15 17:21', 'USER_COMMENT')
, (5498091, '2021-12-15 17:55', 'DEALER_COMMENT')
, (5498091, '2021-12-15 17:59', 'USER_COMMENT')
, (5498091, '2021-12-16 11:02', 'USER_COMMENT')
, (5498091, '2021-12-16 16:46', 'DEALER_COMMENT')
;