在数据中查找序列并按它分组
Finding sequence in data and grouping by it
我的 Temp_table 的 Phone_number 列中的数据如下所示
1234560200
1234560201
1234560202
2264540300
2264540301
2264540302
2264540303
2264540304
2264540305
2264540306
我想让它找到最后 4 位数字的序列,并找到它的序列的第一个和最后一个数字。例如。
前 3 行的顺序为 0200, 0201, 0202, so First = 0200 and Last = 0202
此查询的最终输出应为
First Last
0200 0202
0300 0306
我尝试了以下查询,但不确定这种方法。
WITH get_nxt_range AS
(
select substr(a.PHONE_NUMBER,7,4) range1,
LEAD(substr(a.PHONE_NUMBER,7,4)) OVER (ORDER BY a.PHONE_NUMBER ) nxt_range
from Temp_table a
)
SELECT range1,nxt_range FROM get_nxt_range
WHERE nxt_range = range1 +1
ORDER BY range1
我认为这样的方法可能有效:
select
min (substr (phone_number, -4, 4)) as first,
max (substr (phone_number, -4, 4)) as last
from temp_table
group by
substr (phone_number, -4, 2)
获取序列的一种方法是使用行号差值方法。这也适用于您的情况:
select substr(phone_number, 1, 6),
min(substr(phone_number, 7, 4)), max(substr(phone_number, 7, 4))
from (select t.*,
(row_number() over (order by phone_number) -
row_number() over (partition by substr(phone_number, 1, 6) order by phone_number)
) as grp
from temp_table t
) t
group by substr(phone_number, 1, 6), grp;
如果 1234560201 1234560203 1234560204 是两个实例,那么这应该有效:
with tt as (
select substr(PHONE_NUMBER,7,4) id from Temp_table
),
t as (
select
t1.id,
case when t3.id is null then 1 else 0 end start,
case when t2.id is null then 1 else 0 end "end"
from tt t1
-- no next adjacent element - we have an end of interval
left outer join tt t2 on t2.id - 1 = t1.id
-- not previous adjacent element - we have a start of interval
left outer join tt t3 on t3.id + 1 = t1.id
-- select starts and ends only
where t2.id is null or t3.id is null
)
-- find nearest end record for each start record (it may be the same record)
select t1.id, (select min(id) from t where id >= t1.id and "end" = 1)
from t t1
where t1.start = 1
SELECT DISTINCT
COALESCE(
first_in_sequence,
LAG( first_in_sequence ) IGNORE NULLS OVER ( ORDER BY phone_number )
) AS first_in_sequence,
COALESCE(
last_in_sequence,
LAG( last_in_sequence ) IGNORE NULLS OVER ( ORDER BY phone_number )
) AS last_in_sequence
FROM (
SELECT phone_number,
CASE phone_number
WHEN LAG( phone_number ) OVER ( ORDER BY phone_number ) + 1
THEN NULL
ELSE phone_number
END AS first_in_sequence,
CASE phone_number
WHEN LEAD( phone_number ) OVER ( ORDER BY phone_number ) - 1
THEN NULL
ELSE phone_number
END AS last_in_sequence
FROM temp_table
);
更新:
CREATE TABLE phone_numbers ( phone_number ) AS
select 1234560200 from dual union all
select 1234560201 from dual union all
select 1234560202 from dual union all
select 2264540300 from dual union all
select 2264540301 from dual union all
select 2264540302 from dual union all
select 2264540303 from dual union all
select 2264540304 from dual union all
select 2264540305 from dual union all
select 2264540306 from dual;
SELECT MIN( phone_number ) AS first_in_sequence,
MAX( phone_number ) AS last_in_sequence
FROM (
SELECT phone_number,
phone_number - ROW_NUMBER() OVER ( ORDER BY phone_number ) AS grp
FROM phone_numbers
)
GROUP BY grp;
输出:
FIRST_IN_SEQUENCE LAST_IN_SEQUENCE
----------------- ----------------
2264540300 2264540306
1234560200 1234560202
我看到大家已经回答了你的问题。
我只想提出我的变体如何解决这个任务:
with list_num (phone_number) as (
select 1234560200 from dual union all
select 1234560201 from dual union all
select 1234560202 from dual union all
select 2264540300 from dual union all
select 2264540301 from dual union all
select 2264540302 from dual union all
select 2264540303 from dual union all
select 2264540304 from dual union all
select 2264540305 from dual union all
select 2264540306 from dual)
select root as from_value,
max(phone_number) keep (dense_rank last order by lvl) as to_value
from
(select phone_number, level as lvl, CONNECT_BY_ROOT phone_number as root
from
(select phone_number,
decode(phone_number-lag (phone_number) over(order by phone_number),1,1,0) as start_value
from list_num) b
connect by nocycle phone_number = prior phone_number + 1
start with start_value = 0)
group by root
having count(1) > 1
如果您只需要最后 4 个数字,只需 substr 即可。
substr(root,7,4) as from_value,
substr(max(phone_number) keep (dense_rank last order by lvl),7,4) as to_value
谢谢。
我的 Temp_table 的 Phone_number 列中的数据如下所示
1234560200
1234560201
1234560202
2264540300
2264540301
2264540302
2264540303
2264540304
2264540305
2264540306
我想让它找到最后 4 位数字的序列,并找到它的序列的第一个和最后一个数字。例如。
前 3 行的顺序为 0200, 0201, 0202, so First = 0200 and Last = 0202
此查询的最终输出应为
First Last
0200 0202
0300 0306
我尝试了以下查询,但不确定这种方法。
WITH get_nxt_range AS
(
select substr(a.PHONE_NUMBER,7,4) range1,
LEAD(substr(a.PHONE_NUMBER,7,4)) OVER (ORDER BY a.PHONE_NUMBER ) nxt_range
from Temp_table a
)
SELECT range1,nxt_range FROM get_nxt_range
WHERE nxt_range = range1 +1
ORDER BY range1
我认为这样的方法可能有效:
select
min (substr (phone_number, -4, 4)) as first,
max (substr (phone_number, -4, 4)) as last
from temp_table
group by
substr (phone_number, -4, 2)
获取序列的一种方法是使用行号差值方法。这也适用于您的情况:
select substr(phone_number, 1, 6),
min(substr(phone_number, 7, 4)), max(substr(phone_number, 7, 4))
from (select t.*,
(row_number() over (order by phone_number) -
row_number() over (partition by substr(phone_number, 1, 6) order by phone_number)
) as grp
from temp_table t
) t
group by substr(phone_number, 1, 6), grp;
如果 1234560201 1234560203 1234560204 是两个实例,那么这应该有效:
with tt as (
select substr(PHONE_NUMBER,7,4) id from Temp_table
),
t as (
select
t1.id,
case when t3.id is null then 1 else 0 end start,
case when t2.id is null then 1 else 0 end "end"
from tt t1
-- no next adjacent element - we have an end of interval
left outer join tt t2 on t2.id - 1 = t1.id
-- not previous adjacent element - we have a start of interval
left outer join tt t3 on t3.id + 1 = t1.id
-- select starts and ends only
where t2.id is null or t3.id is null
)
-- find nearest end record for each start record (it may be the same record)
select t1.id, (select min(id) from t where id >= t1.id and "end" = 1)
from t t1
where t1.start = 1
SELECT DISTINCT
COALESCE(
first_in_sequence,
LAG( first_in_sequence ) IGNORE NULLS OVER ( ORDER BY phone_number )
) AS first_in_sequence,
COALESCE(
last_in_sequence,
LAG( last_in_sequence ) IGNORE NULLS OVER ( ORDER BY phone_number )
) AS last_in_sequence
FROM (
SELECT phone_number,
CASE phone_number
WHEN LAG( phone_number ) OVER ( ORDER BY phone_number ) + 1
THEN NULL
ELSE phone_number
END AS first_in_sequence,
CASE phone_number
WHEN LEAD( phone_number ) OVER ( ORDER BY phone_number ) - 1
THEN NULL
ELSE phone_number
END AS last_in_sequence
FROM temp_table
);
更新:
CREATE TABLE phone_numbers ( phone_number ) AS
select 1234560200 from dual union all
select 1234560201 from dual union all
select 1234560202 from dual union all
select 2264540300 from dual union all
select 2264540301 from dual union all
select 2264540302 from dual union all
select 2264540303 from dual union all
select 2264540304 from dual union all
select 2264540305 from dual union all
select 2264540306 from dual;
SELECT MIN( phone_number ) AS first_in_sequence,
MAX( phone_number ) AS last_in_sequence
FROM (
SELECT phone_number,
phone_number - ROW_NUMBER() OVER ( ORDER BY phone_number ) AS grp
FROM phone_numbers
)
GROUP BY grp;
输出:
FIRST_IN_SEQUENCE LAST_IN_SEQUENCE
----------------- ----------------
2264540300 2264540306
1234560200 1234560202
我看到大家已经回答了你的问题。 我只想提出我的变体如何解决这个任务:
with list_num (phone_number) as (
select 1234560200 from dual union all
select 1234560201 from dual union all
select 1234560202 from dual union all
select 2264540300 from dual union all
select 2264540301 from dual union all
select 2264540302 from dual union all
select 2264540303 from dual union all
select 2264540304 from dual union all
select 2264540305 from dual union all
select 2264540306 from dual)
select root as from_value,
max(phone_number) keep (dense_rank last order by lvl) as to_value
from
(select phone_number, level as lvl, CONNECT_BY_ROOT phone_number as root
from
(select phone_number,
decode(phone_number-lag (phone_number) over(order by phone_number),1,1,0) as start_value
from list_num) b
connect by nocycle phone_number = prior phone_number + 1
start with start_value = 0)
group by root
having count(1) > 1
如果您只需要最后 4 个数字,只需 substr 即可。
substr(root,7,4) as from_value,
substr(max(phone_number) keep (dense_rank last order by lvl),7,4) as to_value
谢谢。