SQL 查询以拆分并仅保留前 N 个值
SQL query to split and keep only the top N values
我有以下 table 数据:
| name |items |
--------------------
| Bob |1, 2, 3 |
| Rick |5, 3, 8, 4|
| Bill |2, 4 |
我需要创建一个 table 并拆分 items 列,但限制为最多 N items 每个 name。例如。对于 N = 3,table 应如下所示:
|name |item|
-----------
|Bob |1 |
|Bob |2 |
|Bob |3 |
|Rick |5 |
|Rick |3 |
|Rick |8 |
|Bill |2 |
|Bill |4 |
我有以下查询正确拆分 items,但没有考虑最大数量 N。我应该在查询中修改什么(标准 SQL , BigQuery)占N?
WITH data_split AS (
SELECT name, SPLIT(items,',') AS item
FROM (
SELECT name, items
-- A lot of additional logic here
FROM data
)
)
SELECT name, item
FROM data_split
CROSS JOIN UNNEST(data_split.item) AS item
您可以尝试一种更半标准的方式 - 几乎无处不在:
WITH
-- your input ...
indata(id,nam,items) AS ( -- need a sorting column "id" to keep the sort order
SELECT 1, 'Bob' ,'1,2,3' -- blanks after comma can irritate
UNION ALL SELECT 2, 'Rick','5,3,8,4' -- the splitting function below ...
UNION ALL SELECT 3, 'Bill','2,4'
)
-- real query starts here, replace comma below with "WITH" ...
,
-- exactly 3 integers
i(i) AS (
SELECT 1 -- need to add FROM DUAL , in Oracle, for example ...
UNION ALL SELECT 2
UNION ALL SELECT 3
)
SELECT
id
, nam
, SPLIT(items,',',i) AS item -- SPLIT_PART in other DBMS-s
FROM indata CROSS JOIN i
WHERE SPLIT_PART(items,',',i) <> ''
ORDER BY 1, 3
;
-- out id | nam | item
-- out ----+------+------
-- out 1 | Bob | 1
-- out 1 | Bob | 2
-- out 1 | Bob | 3
-- out 2 | Rick | 3
-- out 2 | Rick | 5
-- out 2 | Rick | 8
-- out 3 | Bill | 2
-- out 3 | Bill | 4
考虑以下方法 (BigQuery)
select name, trim(item) item
from your_table, unnest(split(items)) item with offset
where offset < 3
如果应用于您问题中的示例数据 - 输出为
我有以下 table 数据:
| name |items |
--------------------
| Bob |1, 2, 3 |
| Rick |5, 3, 8, 4|
| Bill |2, 4 |
我需要创建一个 table 并拆分 items 列,但限制为最多 N items 每个 name。例如。对于 N = 3,table 应如下所示:
|name |item|
-----------
|Bob |1 |
|Bob |2 |
|Bob |3 |
|Rick |5 |
|Rick |3 |
|Rick |8 |
|Bill |2 |
|Bill |4 |
我有以下查询正确拆分 items,但没有考虑最大数量 N。我应该在查询中修改什么(标准 SQL , BigQuery)占N?
WITH data_split AS (
SELECT name, SPLIT(items,',') AS item
FROM (
SELECT name, items
-- A lot of additional logic here
FROM data
)
)
SELECT name, item
FROM data_split
CROSS JOIN UNNEST(data_split.item) AS item
您可以尝试一种更半标准的方式 - 几乎无处不在:
WITH
-- your input ...
indata(id,nam,items) AS ( -- need a sorting column "id" to keep the sort order
SELECT 1, 'Bob' ,'1,2,3' -- blanks after comma can irritate
UNION ALL SELECT 2, 'Rick','5,3,8,4' -- the splitting function below ...
UNION ALL SELECT 3, 'Bill','2,4'
)
-- real query starts here, replace comma below with "WITH" ...
,
-- exactly 3 integers
i(i) AS (
SELECT 1 -- need to add FROM DUAL , in Oracle, for example ...
UNION ALL SELECT 2
UNION ALL SELECT 3
)
SELECT
id
, nam
, SPLIT(items,',',i) AS item -- SPLIT_PART in other DBMS-s
FROM indata CROSS JOIN i
WHERE SPLIT_PART(items,',',i) <> ''
ORDER BY 1, 3
;
-- out id | nam | item
-- out ----+------+------
-- out 1 | Bob | 1
-- out 1 | Bob | 2
-- out 1 | Bob | 3
-- out 2 | Rick | 3
-- out 2 | Rick | 5
-- out 2 | Rick | 8
-- out 3 | Bill | 2
-- out 3 | Bill | 4
考虑以下方法 (BigQuery)
select name, trim(item) item
from your_table, unnest(split(items)) item with offset
where offset < 3
如果应用于您问题中的示例数据 - 输出为