按顺序分组
Group by rows which are in sequence
想想我有一个这样的 table
PASSENGER CITY DATE
43 NEW YORK 1-Jan-21
44 CHICAGO 4-Jan-21
43 NEW YORK 2-Jan-21
43 NEW YORK 3-Jan-21
44 ROME 5-Jan-21
43 LONDON 4-Jan-21
44 CHICAGO 6-Jan-21
44 CHICAGO 7-Jan-21
如何按顺序对 Passenger 和 City 列进行分组以获得如下结果?
PASSENGER CITY COUNT
43 NEW YORK 3
44 CHICAGO 1
44 ROME 1
43 LONDON 1
44 CHICAGO 2
从 Oracle 12 开始,您可以使用 MATCH_RECOGNIZE
:
SELECT *
FROM table_name
MATCH_RECOGNIZE (
PARTITION BY passenger
ORDER BY "DATE"
MEASURES
FIRST(city) AS city,
COUNT(*) AS count
PATTERN (same_city+)
DEFINE
same_city AS FIRST(city) = city
);
其中,对于示例数据:
CREATE TABLE table_name (PASSENGER, CITY, "DATE") AS
SELECT 43, 'NEW YORK', DATE '2021-01-01' FROM DUAL UNION ALL
SELECT 44, 'CHICAGO', DATE '2021-01-04' FROM DUAL UNION ALL
SELECT 43, 'NEW YORK', DATE '2021-01-02' FROM DUAL UNION ALL
SELECT 43, 'NEW YORK', DATE '2021-01-03' FROM DUAL UNION ALL
SELECT 44, 'ROME', DATE '2021-01-05' FROM DUAL UNION ALL
SELECT 43, 'LONDON', DATE '2021-01-04' FROM DUAL UNION ALL
SELECT 44, 'CHICAGO', DATE '2021-01-06' FROM DUAL UNION ALL
SELECT 44, 'CHICAGO', DATE '2021-01-07' FROM DUAL
输出:
PASSENGER
CITY
COUNT
43
NEW YORK
3
43
LONDON
1
44
CHICAGO
1
44
ROME
1
44
CHICAGO
2
如果您已经对输入结果集进行了排序(注意:表应该被认为是无序的)并且想要保持顺序则:
SELECT *
FROM (SELECT t.*, ROWNUM AS rn FROM table_name t)
MATCH_RECOGNIZE (
PARTITION BY passenger
ORDER BY RN
MEASURES
FIRST(rn) AS rn,
FIRST("DATE") AS "DATE",
FIRST(city) AS city,
COUNT(*) AS count
PATTERN (same_city+)
DEFINE
same_city AS FIRST(city) = city
)
ORDER BY rn
输出:
PASSENGER
RN
DATE
CITY
COUNT
43
1
01-JAN-21
NEW YORK
3
44
2
04-JAN-21
CHICAGO
1
44
5
05-JAN-21
ROME
1
43
6
04-JAN-21
LONDON
1
44
7
06-JAN-21
CHICAGO
2
db<>fiddle here
处理这种差距和孤岛问题的一种方法是计算差距的排名。
然后在那个排名上分组。
SELECT PASSENGER, CITY
, COUNT(*) AS "Count"
-- , MIN("DATE") AS StartDate
-- , MAX("DATE") AS EndDate
FROM (
SELECT q1.*
, SUM(gap) OVER (PARTITION BY PASSENGER ORDER BY "DATE") as Rnk
FROM (
SELECT PASSENGER, CITY, "DATE"
, CASE
WHEN 1 = TRUNC("DATE")
- TRUNC(LAG("DATE")
OVER (PARTITION BY PASSENGER, CITY ORDER BY "DATE"))
THEN 0 ELSE 1 END as gap
FROM table_name t
) q1
) q2
GROUP BY PASSENGER, CITY, Rnk
ORDER BY MIN("DATE"), PASSENGER
PASSENGER
CITY
Count
43
NEW YORK
3
43
LONDON
1
44
CHICAGO
1
44
ROME
1
44
CHICAGO
2
db<>fiddle here
想想我有一个这样的 table
PASSENGER CITY DATE
43 NEW YORK 1-Jan-21
44 CHICAGO 4-Jan-21
43 NEW YORK 2-Jan-21
43 NEW YORK 3-Jan-21
44 ROME 5-Jan-21
43 LONDON 4-Jan-21
44 CHICAGO 6-Jan-21
44 CHICAGO 7-Jan-21
如何按顺序对 Passenger 和 City 列进行分组以获得如下结果?
PASSENGER CITY COUNT
43 NEW YORK 3
44 CHICAGO 1
44 ROME 1
43 LONDON 1
44 CHICAGO 2
从 Oracle 12 开始,您可以使用 MATCH_RECOGNIZE
:
SELECT *
FROM table_name
MATCH_RECOGNIZE (
PARTITION BY passenger
ORDER BY "DATE"
MEASURES
FIRST(city) AS city,
COUNT(*) AS count
PATTERN (same_city+)
DEFINE
same_city AS FIRST(city) = city
);
其中,对于示例数据:
CREATE TABLE table_name (PASSENGER, CITY, "DATE") AS
SELECT 43, 'NEW YORK', DATE '2021-01-01' FROM DUAL UNION ALL
SELECT 44, 'CHICAGO', DATE '2021-01-04' FROM DUAL UNION ALL
SELECT 43, 'NEW YORK', DATE '2021-01-02' FROM DUAL UNION ALL
SELECT 43, 'NEW YORK', DATE '2021-01-03' FROM DUAL UNION ALL
SELECT 44, 'ROME', DATE '2021-01-05' FROM DUAL UNION ALL
SELECT 43, 'LONDON', DATE '2021-01-04' FROM DUAL UNION ALL
SELECT 44, 'CHICAGO', DATE '2021-01-06' FROM DUAL UNION ALL
SELECT 44, 'CHICAGO', DATE '2021-01-07' FROM DUAL
输出:
PASSENGER CITY COUNT 43 NEW YORK 3 43 LONDON 1 44 CHICAGO 1 44 ROME 1 44 CHICAGO 2
如果您已经对输入结果集进行了排序(注意:表应该被认为是无序的)并且想要保持顺序则:
SELECT *
FROM (SELECT t.*, ROWNUM AS rn FROM table_name t)
MATCH_RECOGNIZE (
PARTITION BY passenger
ORDER BY RN
MEASURES
FIRST(rn) AS rn,
FIRST("DATE") AS "DATE",
FIRST(city) AS city,
COUNT(*) AS count
PATTERN (same_city+)
DEFINE
same_city AS FIRST(city) = city
)
ORDER BY rn
输出:
PASSENGER RN DATE CITY COUNT 43 1 01-JAN-21 NEW YORK 3 44 2 04-JAN-21 CHICAGO 1 44 5 05-JAN-21 ROME 1 43 6 04-JAN-21 LONDON 1 44 7 06-JAN-21 CHICAGO 2
db<>fiddle here
处理这种差距和孤岛问题的一种方法是计算差距的排名。
然后在那个排名上分组。
SELECT PASSENGER, CITY
, COUNT(*) AS "Count"
-- , MIN("DATE") AS StartDate
-- , MAX("DATE") AS EndDate
FROM (
SELECT q1.*
, SUM(gap) OVER (PARTITION BY PASSENGER ORDER BY "DATE") as Rnk
FROM (
SELECT PASSENGER, CITY, "DATE"
, CASE
WHEN 1 = TRUNC("DATE")
- TRUNC(LAG("DATE")
OVER (PARTITION BY PASSENGER, CITY ORDER BY "DATE"))
THEN 0 ELSE 1 END as gap
FROM table_name t
) q1
) q2
GROUP BY PASSENGER, CITY, Rnk
ORDER BY MIN("DATE"), PASSENGER
PASSENGER | CITY | Count |
---|---|---|
43 | NEW YORK | 3 |
43 | LONDON | 1 |
44 | CHICAGO | 1 |
44 | ROME | 1 |
44 | CHICAGO | 2 |
db<>fiddle here