重叠条件 - 在字段内
Overlap Condition - Inside a Field
如果我有一个 table 像 ID, IntrvalStartPoints, IntervalEndPoints
其中 StartPoints 包含间隔的开始,EndPoints 是间隔的结束。
例如:
ID: 1000
StartPoints: 94994731,94997876,94998645,95001520,95005812,95007092, ENDPoints: 94996152,94998036,94998824,95001720,95005924,95007413,
这里有 6 个区间 <94994731,94996152>, <94997876,94998036>, ...
我们可以写一个查询来检查例如 ID:1000, Start:95005812, End:95005815 是否与任何间隔重叠。
谢谢!
以下适用于 BigQuery 标准 SQL
#standardSQL
SELECT t.id, StartPoint, EndPoint, interval_start, interval_end
FROM (
SELECT id, CAST(StartPoint AS INT64) StartPoint, CAST(EndPoint AS INT64) EndPoint
FROM `project.dataset.intervals` t,
UNNEST(SPLIT(StartPoints)) StartPoint WITH OFFSET pos1
JOIN UNNEST(SPLIT(EndPoints)) EndPoint WITH OFFSET pos2
ON pos1 = pos2
) t
JOIN `project.dataset.checks` c ON c.id = t.id AND
(interval_start BETWEEN StartPoint AND EndPoint
OR interval_end BETWEEN StartPoint AND EndPoint)
您可以使用问题中的虚拟数据来测试/使用它,如下所示
#standardSQL
WITH `project.dataset.intervals` AS (
SELECT 1000 id,
'94994731,94997876,94998645,95001520,95005812,95007092' StartPoints,
'94996152,94998036,94998824,95001720,95005924,95007413' EndPoints
UNION ALL
SELECT 2000 id,
'74994731' StartPoints,
'74996152' EndPoints
), `project.dataset.checks` AS (
SELECT 1000 id, 95005812 interval_start, 95005815 interval_end
)
SELECT t.id, StartPoint, EndPoint, interval_start, interval_end
FROM (
SELECT id, CAST(StartPoint AS INT64) StartPoint, CAST(EndPoint AS INT64) EndPoint
FROM `project.dataset.intervals` t,
UNNEST(SPLIT(StartPoints)) StartPoint WITH OFFSET pos1
JOIN UNNEST(SPLIT(EndPoints)) EndPoint WITH OFFSET pos2
ON pos1 = pos2
) t
JOIN `project.dataset.checks` c ON c.id = t.id AND
(interval_start BETWEEN StartPoint AND EndPoint
OR interval_end BETWEEN StartPoint AND EndPoint)
结果为
Row id StartPoint EndPoint interval_start interval_end
1 1000 95005812 95005924 95005812 95005815
如果我有一个 table 像 ID, IntrvalStartPoints, IntervalEndPoints
其中 StartPoints 包含间隔的开始,EndPoints 是间隔的结束。
例如:
ID: 1000
StartPoints: 94994731,94997876,94998645,95001520,95005812,95007092, ENDPoints: 94996152,94998036,94998824,95001720,95005924,95007413,
这里有 6 个区间 <94994731,94996152>, <94997876,94998036>, ...
我们可以写一个查询来检查例如 ID:1000, Start:95005812, End:95005815 是否与任何间隔重叠。
谢谢!
以下适用于 BigQuery 标准 SQL
#standardSQL
SELECT t.id, StartPoint, EndPoint, interval_start, interval_end
FROM (
SELECT id, CAST(StartPoint AS INT64) StartPoint, CAST(EndPoint AS INT64) EndPoint
FROM `project.dataset.intervals` t,
UNNEST(SPLIT(StartPoints)) StartPoint WITH OFFSET pos1
JOIN UNNEST(SPLIT(EndPoints)) EndPoint WITH OFFSET pos2
ON pos1 = pos2
) t
JOIN `project.dataset.checks` c ON c.id = t.id AND
(interval_start BETWEEN StartPoint AND EndPoint
OR interval_end BETWEEN StartPoint AND EndPoint)
您可以使用问题中的虚拟数据来测试/使用它,如下所示
#standardSQL
WITH `project.dataset.intervals` AS (
SELECT 1000 id,
'94994731,94997876,94998645,95001520,95005812,95007092' StartPoints,
'94996152,94998036,94998824,95001720,95005924,95007413' EndPoints
UNION ALL
SELECT 2000 id,
'74994731' StartPoints,
'74996152' EndPoints
), `project.dataset.checks` AS (
SELECT 1000 id, 95005812 interval_start, 95005815 interval_end
)
SELECT t.id, StartPoint, EndPoint, interval_start, interval_end
FROM (
SELECT id, CAST(StartPoint AS INT64) StartPoint, CAST(EndPoint AS INT64) EndPoint
FROM `project.dataset.intervals` t,
UNNEST(SPLIT(StartPoints)) StartPoint WITH OFFSET pos1
JOIN UNNEST(SPLIT(EndPoints)) EndPoint WITH OFFSET pos2
ON pos1 = pos2
) t
JOIN `project.dataset.checks` c ON c.id = t.id AND
(interval_start BETWEEN StartPoint AND EndPoint
OR interval_end BETWEEN StartPoint AND EndPoint)
结果为
Row id StartPoint EndPoint interval_start interval_end
1 1000 95005812 95005924 95005812 95005815