在 ID 列 + select prev/next 日期列中找到差距
Find the gap in ID column + select the prev/next date column
我们有一个 table 和 ID
带间隙的自动增量列。每行还有一个 created
日期。
我们正在尝试为每个间隔 ID(缺失的)找出我们 table 中可用的 prev/next 日期。
我们已经构建了一个 SQL 来识别间隙(来自 here 的解决方案),所以我将它们缓存在 table 中,但是从这里如何找到 prev/next created
日期弥补了原来 table 的差距。
输入:
+----+------------+
| 84 | 1443728132 |
| 91 | 1443728489 |
| 93 | 1443729058 |
| 94 | 1443729200 |
+----+------------+
输出
+--------+------------+------------+
| gap_id | prev_dt | next_dt |
+--------+------------+------------+
| 85 | 1443728132 | 1443728489 |
| 86 | 1443728132 | 1443728489 |
| 87 | 1443728132 | 1443728489 |
| 88 | 1443728132 | 1443728489 |
| 89 | 1443728132 | 1443728489 |
| 90 | 1443728132 | 1443728489 |
| 92 | 1443728489 | 1443729058 |
+--------+------------+------------+
我最后做了 Google BigQuery。
应该适用于 MySQL
以外的大多数数据库
select *
from (select lag (id) over (order by id) + 1 as gap_start
,id - 1 as gap_end
,lag (dt) over (order by id) as dt_before_gap
,dt as dt_after_gap
,case when lag (id) over (order by id) + 1 <> id then 'Y' end as is_gap
from t
) t
where is_gap = 'Y'
;
我建议将结果分组到范围内:
select min(id + 1) as first_missing_id,
(next_id - 1) as last_missing_id,
next_dte
from (select t.*,
lead(id) over (order by id) as next_id,
lead(dte) over (order by id) as next_dte
from t
) t
where next_id <> id + 1
group by next_id, next_dte;
获取个人 ID 很棘手。毕竟,如果您有 1, 1000000, 1000000000 那么您可能会生成很多行。
对于 BigQuery 标准 SQL
WITH yourTable AS (
SELECT 84 AS id, 1443728132 AS dt UNION ALL
SELECT 91 AS id, 1443728489 AS dt UNION ALL
SELECT 93 AS id, 1443729058 AS dt UNION ALL
SELECT 94 AS id, 1443729200 AS dt
),
nums AS (
SELECT num
FROM UNNEST(GENERATE_ARRAY((SELECT MIN(id) FROM YourTable), (SELECT MAX(id) FROM YourTable))) AS num
),
gaps AS (
SELECT
LAG (id) OVER (ORDER BY id) + 1 AS gap_start,
id - 1 AS gap_end,
LAG (dt) OVER (ORDER BY id) AS prev_dt,
dt AS next_dt,
CASE
WHEN LAG (id) OVER (ORDER BY id) + 1 <> id THEN 'Y'
END AS is_gap
FROM
yourTable
)
SELECT num as gap_id, prev_dt, next_dt
FROM gaps JOIN nums
ON num BETWEEN gap_start AND gap_end
WHERE is_gap = 'Y'
ORDER BY num
输出:
gap_id prev_dt next_dt
85 1443728132 1443728489
86 1443728132 1443728489
87 1443728132 1443728489
88 1443728132 1443728489
89 1443728132 1443728489
90 1443728132 1443728489
92 1443728489 1443729058
我们有一个 table 和 ID
带间隙的自动增量列。每行还有一个 created
日期。
我们正在尝试为每个间隔 ID(缺失的)找出我们 table 中可用的 prev/next 日期。
我们已经构建了一个 SQL 来识别间隙(来自 here 的解决方案),所以我将它们缓存在 table 中,但是从这里如何找到 prev/next created
日期弥补了原来 table 的差距。
输入:
+----+------------+
| 84 | 1443728132 |
| 91 | 1443728489 |
| 93 | 1443729058 |
| 94 | 1443729200 |
+----+------------+
输出
+--------+------------+------------+
| gap_id | prev_dt | next_dt |
+--------+------------+------------+
| 85 | 1443728132 | 1443728489 |
| 86 | 1443728132 | 1443728489 |
| 87 | 1443728132 | 1443728489 |
| 88 | 1443728132 | 1443728489 |
| 89 | 1443728132 | 1443728489 |
| 90 | 1443728132 | 1443728489 |
| 92 | 1443728489 | 1443729058 |
+--------+------------+------------+
我最后做了 Google BigQuery。
应该适用于 MySQL
以外的大多数数据库select *
from (select lag (id) over (order by id) + 1 as gap_start
,id - 1 as gap_end
,lag (dt) over (order by id) as dt_before_gap
,dt as dt_after_gap
,case when lag (id) over (order by id) + 1 <> id then 'Y' end as is_gap
from t
) t
where is_gap = 'Y'
;
我建议将结果分组到范围内:
select min(id + 1) as first_missing_id,
(next_id - 1) as last_missing_id,
next_dte
from (select t.*,
lead(id) over (order by id) as next_id,
lead(dte) over (order by id) as next_dte
from t
) t
where next_id <> id + 1
group by next_id, next_dte;
获取个人 ID 很棘手。毕竟,如果您有 1, 1000000, 1000000000 那么您可能会生成很多行。
对于 BigQuery 标准 SQL
WITH yourTable AS (
SELECT 84 AS id, 1443728132 AS dt UNION ALL
SELECT 91 AS id, 1443728489 AS dt UNION ALL
SELECT 93 AS id, 1443729058 AS dt UNION ALL
SELECT 94 AS id, 1443729200 AS dt
),
nums AS (
SELECT num
FROM UNNEST(GENERATE_ARRAY((SELECT MIN(id) FROM YourTable), (SELECT MAX(id) FROM YourTable))) AS num
),
gaps AS (
SELECT
LAG (id) OVER (ORDER BY id) + 1 AS gap_start,
id - 1 AS gap_end,
LAG (dt) OVER (ORDER BY id) AS prev_dt,
dt AS next_dt,
CASE
WHEN LAG (id) OVER (ORDER BY id) + 1 <> id THEN 'Y'
END AS is_gap
FROM
yourTable
)
SELECT num as gap_id, prev_dt, next_dt
FROM gaps JOIN nums
ON num BETWEEN gap_start AND gap_end
WHERE is_gap = 'Y'
ORDER BY num
输出:
gap_id prev_dt next_dt
85 1443728132 1443728489
86 1443728132 1443728489
87 1443728132 1443728489
88 1443728132 1443728489
89 1443728132 1443728489
90 1443728132 1443728489
92 1443728489 1443729058