仅缺口月和孤岛月
Gaps and Islands Months only
我正在处理客户及其日期记录的数据集。我正在尝试仅使用 MONTHS(当前为 var char 'YYYYMM')来应用间隙和孤岛问题。我需要记录个人记录并按间隔分组(无论年份如何)。我不知道如何去:
drop table RUNNING_LOG;
create table running_log (
run_date date not null,
time_in_seconds int not null,
distance_in_miles int not null,
SERV_YRMO VARCHAR2(6)
);
insert into running_log values (date'2018-01-01', 420, 1,'201801');
insert into running_log values (date'2018-01-02', 2400, 5,'201801');
insert into running_log values (date'2018-01-03', 2430, 5,'201801');
insert into running_log values (date'2018-01-06', 2350, 5,'201801');
insert into running_log values (date'2018-02-07', 410, 1,'201802');
insert into running_log values (date'2018-02-10', 400, 1,'201802');
insert into running_log values (date'2018-02-13', 2300, 5,'201802');
insert into running_log values (date'2018-12-31', 425, 1,'201803');
insert into running_log values (date'2019-01-01', 422, 1,'201901');
insert into running_log values (date'2019-01-06', 2350, 5,'201901');
insert into running_log values (date'2019-02-07', 410, 1,'201902');
insert into running_log values (date'2019-02-10', 400, 1,'201902');
insert into running_log values (date'2019-02-13', 2300, 5,'201902');
insert into running_log values (date'2019-03-14', 425, 1,'201903');
insert into running_log values (date'2019-03-15', 422, 1,'201903');
insert into running_log values (date'2020-03-01', 425, 1,'202003');
insert into running_log values (date'2021-03-31', 422, 1,'202103');
commit;
select * from running_log;
收件人:
一种方法是使用 dense_rank()
并将日期截断为月份:
select to_char(min(run_date), 'YYYY-MM'), to_char(max(run_date), 'YYYY-MM'), sum(distance)
from (select t.*,
dense_rank() over (order by trunc(run_date, 'Month')) as seqnum
from t
) t
group by trunc(run_date, 'Month') - seqnum * interval '1' month
order by min(run_date);
下面的解决方案使用 tabibitosan 方法创建组。如果您不熟悉这个概念,google - 您会发现很多关于它的好文章。 (有时也称为 "fixed differences" 方法。)该方法的核心是在子查询中创建组; select 子查询和 运行 它本身,没有外部查询,看看它做了什么。特别注意子查询中的 GRP 列;如果您问自己它是如何做到的,那就是您需要阅读该方法的地方。
正如我在您的问题下的评论中所解释的那样,不需要 SERV_YRMO
列(如果它是根据 RUN_DATE
值计算得出的),实际上您的 INSERT
该列中的语句有错误。下面的解决方案只使用 RUN_DATE
- 你也可以删除 SERV_YRMO
列,这只会造成麻烦。
另请注意,正如我在您问题下的另一条评论中指出的那样,您的算术似乎是错误的。由于这个原因,我的输出与你的不同。
select to_char(min(run_date), 'yyyymm') as min_yrmo,
to_char(max(run_date), 'yyyymm') as max_yrmo,
sum(distance_in_miles) as total_distance
from (
select rl.*,
add_months( trunc(run_date, 'mm'),
-dense_rank() over (order by trunc(run_date, 'mm'))
) as grp
from running_log rl
)
group by grp
order by min_yrmo
;
MIN_YRMO MAX_YRMO TOTAL_DISTANCE
-------- -------- --------------
201801 201802 23
201812 201903 16
202003 202003 1
202103 202103 1
编辑
OP的版本是某些描述的11。不过,对于可能有相同问题且拥有 Oracle 12.1 或更高版本的读者,MATCH_RECOGNIZE
可用于更有效的解决方案。它看起来像这样:
select *
from running_log
match_recognize(
order by run_date
measures to_char(first(run_date), 'yyyymm') as min_yrmo,
to_char(last (run_date), 'yyyymm') as max_yrmo,
sum(distance_in_miles) as total_distance
pattern ( a b* )
define b as run_date < add_months(trunc(prev(run_date), 'mm'), 2)
)
;
检查下方 SQL,其中使用 tabibitosan 方法查找间隙。结果将 count=1 视为湖泊 count >1 作为 Island
select min(run_date), MAX(run_date), count(grp), decode (count(grp),1,'LAKE','ISLAND')
from (
select run_date, run_date - rownum as grp
from omc.running_log
order by RUN_DATE
)
group by grp ;
我正在处理客户及其日期记录的数据集。我正在尝试仅使用 MONTHS(当前为 var char 'YYYYMM')来应用间隙和孤岛问题。我需要记录个人记录并按间隔分组(无论年份如何)。我不知道如何去:
drop table RUNNING_LOG;
create table running_log (
run_date date not null,
time_in_seconds int not null,
distance_in_miles int not null,
SERV_YRMO VARCHAR2(6)
);
insert into running_log values (date'2018-01-01', 420, 1,'201801');
insert into running_log values (date'2018-01-02', 2400, 5,'201801');
insert into running_log values (date'2018-01-03', 2430, 5,'201801');
insert into running_log values (date'2018-01-06', 2350, 5,'201801');
insert into running_log values (date'2018-02-07', 410, 1,'201802');
insert into running_log values (date'2018-02-10', 400, 1,'201802');
insert into running_log values (date'2018-02-13', 2300, 5,'201802');
insert into running_log values (date'2018-12-31', 425, 1,'201803');
insert into running_log values (date'2019-01-01', 422, 1,'201901');
insert into running_log values (date'2019-01-06', 2350, 5,'201901');
insert into running_log values (date'2019-02-07', 410, 1,'201902');
insert into running_log values (date'2019-02-10', 400, 1,'201902');
insert into running_log values (date'2019-02-13', 2300, 5,'201902');
insert into running_log values (date'2019-03-14', 425, 1,'201903');
insert into running_log values (date'2019-03-15', 422, 1,'201903');
insert into running_log values (date'2020-03-01', 425, 1,'202003');
insert into running_log values (date'2021-03-31', 422, 1,'202103');
commit;
select * from running_log;
收件人:
一种方法是使用 dense_rank()
并将日期截断为月份:
select to_char(min(run_date), 'YYYY-MM'), to_char(max(run_date), 'YYYY-MM'), sum(distance)
from (select t.*,
dense_rank() over (order by trunc(run_date, 'Month')) as seqnum
from t
) t
group by trunc(run_date, 'Month') - seqnum * interval '1' month
order by min(run_date);
下面的解决方案使用 tabibitosan 方法创建组。如果您不熟悉这个概念,google - 您会发现很多关于它的好文章。 (有时也称为 "fixed differences" 方法。)该方法的核心是在子查询中创建组; select 子查询和 运行 它本身,没有外部查询,看看它做了什么。特别注意子查询中的 GRP 列;如果您问自己它是如何做到的,那就是您需要阅读该方法的地方。
正如我在您的问题下的评论中所解释的那样,不需要 SERV_YRMO
列(如果它是根据 RUN_DATE
值计算得出的),实际上您的 INSERT
该列中的语句有错误。下面的解决方案只使用 RUN_DATE
- 你也可以删除 SERV_YRMO
列,这只会造成麻烦。
另请注意,正如我在您问题下的另一条评论中指出的那样,您的算术似乎是错误的。由于这个原因,我的输出与你的不同。
select to_char(min(run_date), 'yyyymm') as min_yrmo,
to_char(max(run_date), 'yyyymm') as max_yrmo,
sum(distance_in_miles) as total_distance
from (
select rl.*,
add_months( trunc(run_date, 'mm'),
-dense_rank() over (order by trunc(run_date, 'mm'))
) as grp
from running_log rl
)
group by grp
order by min_yrmo
;
MIN_YRMO MAX_YRMO TOTAL_DISTANCE
-------- -------- --------------
201801 201802 23
201812 201903 16
202003 202003 1
202103 202103 1
编辑
OP的版本是某些描述的11。不过,对于可能有相同问题且拥有 Oracle 12.1 或更高版本的读者,MATCH_RECOGNIZE
可用于更有效的解决方案。它看起来像这样:
select *
from running_log
match_recognize(
order by run_date
measures to_char(first(run_date), 'yyyymm') as min_yrmo,
to_char(last (run_date), 'yyyymm') as max_yrmo,
sum(distance_in_miles) as total_distance
pattern ( a b* )
define b as run_date < add_months(trunc(prev(run_date), 'mm'), 2)
)
;
检查下方 SQL,其中使用 tabibitosan 方法查找间隙。结果将 count=1 视为湖泊 count >1 作为 Island
select min(run_date), MAX(run_date), count(grp), decode (count(grp),1,'LAKE','ISLAND')
from (
select run_date, run_date - rownum as grp
from omc.running_log
order by RUN_DATE
)
group by grp ;