数据库查询:GROUPing 提取第一行和最后一行
Database querying: GROUPing extracting first and last row
我有以下 "starting" 查询:
select fecha as date,velocidad as speed, velocidad>100 as overspeed
from reports.avl_historico_354898046636089
where fecha between '2017-04-19 00:00:00-03' and '2017-04-20 00:00:00-03'
and velocidad>2 and ignicion=1
order by fecha;
产生以下输出:
date speed overspeed
2017-04-19 11:35:41+00,16,f
2017-04-19 11:37:01+00,24,f
2017-04-19 11:37:41+00,72,f
2017-04-19 11:38:21+00,82,f
2017-04-19 11:39:01+00,13,f
2017-04-19 11:39:41+00,68,f
2017-04-19 11:40:21+00,23,f
2017-04-19 11:41:01+00,57,f
2017-04-19 11:41:41+00,97,f
2017-04-19 11:42:21+00,96,f
2017-04-19 11:43:01+00,102,t
2017-04-19 11:43:41+00,104,t
2017-04-19 11:44:21+00,106,t
2017-04-19 11:45:01+00,109,t
2017-04-19 11:45:41+00,109,t
2017-04-19 11:46:21+00,114,t
2017-04-19 11:47:01+00,56,f
2017-04-19 11:47:28+00,54,f
2017-04-19 11:47:41+00,54,f
2017-04-19 11:48:21+00,54,f
2017-04-19 11:49:01+00,102,t
2017-04-19 11:49:07+00,104,t
2017-04-19 11:54:21+00,114,t
2017-04-19 11:55:01+00,118,t
2017-04-19 11:55:41+00,115,t
2017-04-19 11:56:21+00,111,t
2017-04-19 11:57:01+00,85,f
2017-04-19 11:57:41+00,45,f
2017-04-19 11:58:21+00,29,f
2017-04-19 12:00:35+00,4,f
2017-04-19 12:00:36+00,4,f
...
而且我一直在尝试使用 LAG/LEAD
来为 overspeed
列为 TRUE
的每组行获取 first/last 日期,但我一直没能达到想要的效果,可能是这样的:
start stop
2017-04-19 11:43:01+00 2017-04-19 11:46:21+00
2017-04-19 11:49:01+00 2017-04-19 11:56:21+00
任何有关如何获得此类输出的想法都将不胜感激。
原始 table DDL:
CREATE TABLE avl_historico_354898046636089 (
fecha timestamp with time zone NOT NULL,
latitud double precision DEFAULT 0 NOT NULL,
longitud double precision DEFAULT 0 NOT NULL,
altitud double precision DEFAULT 0 NOT NULL,
velocidad double precision DEFAULT 0 NOT NULL,
cog double precision DEFAULT 0 NOT NULL,
nsat integer DEFAULT 0 NOT NULL,
tipo character(1),
utc_hora time without time zone,
fix_fecha date,
imei bigint NOT NULL,
registro timestamp with time zone,
input1 integer DEFAULT 0,
input2 integer DEFAULT 0,
input3 integer DEFAULT 0,
input4 integer DEFAULT 0,
hdop double precision,
adc double precision DEFAULT (-99),
ignicion integer DEFAULT 1,
adc2 double precision,
power integer,
driverid integer,
ibutton2 integer,
ibutton3 integer,
ibutton4 integer,
trailerid integer,
adc3 double precision,
adc4 double precision,
horometro bigint,
odometro bigint,
panico integer DEFAULT 0,
bateria double precision,
bateriaint double precision
);
SELECT grp, min(date) AS start, max(date) AS stop
FROM (
SELECT date, speed, count(is_reset) OVER () AS grp
FROM (
SELECT
date,
speed,
CASE
WHEN overspeed <> lag(overspeed) OVER (ORDER BY date) THEN 1
END AS is_reset
FROM (
select fecha as date,velocidad as speed, velocidad>100 as overspeed
from reports.avl_historico_354898046636089
where fecha between '2017-04-19 00:00:00-03' and '2017-04-20 00:00:00-03'
and velocidad>2 and ignicion=1
) AS t
) AS t2
) AS t3
GROUP BY grp;
这是一个分组和 WINDOW 样本。
注意我编辑了一些结果只是为了让它更小。
create table test (fecha timestamp, velocidad int, overspeed bool);
insert into test values
('2017-04-19 20:18:17+00', 77, FALSE),
('2017-04-19 20:18:57+00', 96, FALSE),
('2017-04-19 20:19:37+00', 108, TRUE),
('2017-04-19 20:20:17+00', 111, TRUE),
('2017-04-19 20:20:57+00', 114, TRUE),
('2017-04-19 20:21:37+00', 112, TRUE),
('2017-04-19 20:22:17+00', 108, FALSE),
('2017-04-19 20:22:57+00', 107, FALSE),
('2017-04-19 20:23:37+00', 113, FALSE),
('2017-04-19 20:24:17+00', 116, TRUE),
('2017-04-19 20:24:57+00', 111, TRUE),
('2017-04-19 20:25:37+00', 113, TRUE),
('2017-04-19 20:26:17+00', 115, FALSE),
('2017-04-19 20:26:28+00', 115, FALSE),
('2017-04-19 20:26:57+00', 115, TRUE),
('2017-04-19 20:27:37+00', 115, TRUE),
('2017-04-19 20:27:58+00', 60, FALSE);
with ResetPoint as
(
select fecha, velocidad, overspeed,
case when lag(overspeed) over (order by fecha) = overspeed then null else 1 end as reset
from test
)
--= Set a group each time overspeed changes
, SetGroup as
(
select fecha, velocidad, overspeed,
count(reset) over (order by fecha) as grp
from ResetPoint
)
select *
from SetGroup;
fecha | velocidad | overspeed | grp
:------------------ | --------: | :-------- | --:
2017-04-19 20:18:17 | 77 | f | 1
2017-04-19 20:18:57 | 96 | f | 1
2017-04-19 20:19:37 | 108 | t | 2
2017-04-19 20:20:17 | 111 | t | 2
2017-04-19 20:20:57 | 114 | t | 2
2017-04-19 20:21:37 | 112 | t | 2
2017-04-19 20:22:17 | 108 | f | 3
2017-04-19 20:22:57 | 107 | f | 3
2017-04-19 20:23:37 | 113 | f | 3
2017-04-19 20:24:17 | 116 | t | 4
2017-04-19 20:24:57 | 111 | t | 4
2017-04-19 20:25:37 | 113 | t | 4
2017-04-19 20:26:17 | 115 | f | 5
2017-04-19 20:26:28 | 115 | f | 5
2017-04-19 20:26:57 | 115 | t | 6
2017-04-19 20:27:37 | 115 | t | 6
2017-04-19 20:27:58 | 60 | f | 7
--= Set a reset point each time overspeed changes
--
with ResetPoint as
(
select fecha, velocidad, overspeed,
case when lag(overspeed) over (order by fecha) = overspeed then null else 1 end as reset
from test
)
--= Set a group each time overspeed changes
, SetGroup as
(
select fecha, velocidad, overspeed,
count(reset) over (order by fecha) as grp
from ResetPoint
)
--= Retruns MIN and MAX date of each group
select grp, min(fecha) as Start, max(fecha) as End
from SetGroup
group by grp;
grp | start | end
--: | :------------------ | :------------------
4 | 2017-04-19 20:24:17 | 2017-04-19 20:25:37
1 | 2017-04-19 20:18:17 | 2017-04-19 20:18:57
5 | 2017-04-19 20:26:17 | 2017-04-19 20:26:28
3 | 2017-04-19 20:22:17 | 2017-04-19 20:23:37
6 | 2017-04-19 20:26:57 | 2017-04-19 20:27:37
2 | 2017-04-19 20:19:37 | 2017-04-19 20:21:37
7 | 2017-04-19 20:27:58 | 2017-04-19 20:27:58
dbfiddle here
这可以更简单。减去两次 row_number()
调用:
SELECT min(date) AS start
, max(date) AS stop
FROM (
SELECT date, overspeed
, row_number() OVER (ORDER BY date)
- row_number() OVER (PARTITION BY overspeed ORDER BY date) AS grp
FROM tbl -- result of your starting query
) sub
WHERE overspeed
GROUP BY grp
ORDER BY grp;
第一个生成 运行 个数字,第二个按 overspeed
划分。当您从第一个减去第二个时,每个组都以相同的组号结束,每个分区不同。
然后在外部查询中过滤带有 overspeed
的那些,并每组取最小值和最大值。瞧。
详细解释:
旁白:时间戳不是日期。这是一个令人困惑的列名称。
整合您的子查询
处理您的评论。将 tbl
替换为原始查询作为子查询,如下所示:
SELECT min(date) AS start
, max(date) AS stop
FROM (
SELECT date, overspeed
, row_number() OVER (ORDER BY date)
- row_number() OVER (PARTITION BY overspeed ORDER BY date) AS grp
FROM (
SELECT fecha AS date, velocidad AS speed, velocidad > 100 AS overspeed
FROM reports.avl_historico_354898046636089
WHERE fecha >= '2017-04-19 00:00:00-03' -- typically, you include the lower
AND fecha < '2017-04-20 00:00:00-03' -- and exclude the upper bound
AND velocidad > 2
AND ignicion = 1
-- drop the now useless inner ORDER BY
) sub1
) sub2
WHERE overspeed
GROUP BY grp
ORDER BY grp;
那你可以再简化一些:
SELECT min(fecha) AS start
, max(fecha) AS stop
FROM (
SELECT fecha, velocidad > 100 AS overspeed
, row_number() OVER (ORDER BY fecha)
- row_number() OVER (PARTITION BY velocidad > 100 ORDER BY fecha) AS grp
FROM reports.avl_historico_354898046636089
WHERE fecha >= '2017-04-19 00:00:00-03'
AND fecha < '2017-04-20 00:00:00-03'
AND velocidad > 2
AND ignicion = 1
) sub
WHERE overspeed
GROUP BY grp
ORDER BY grp;
我有以下 "starting" 查询:
select fecha as date,velocidad as speed, velocidad>100 as overspeed
from reports.avl_historico_354898046636089
where fecha between '2017-04-19 00:00:00-03' and '2017-04-20 00:00:00-03'
and velocidad>2 and ignicion=1
order by fecha;
产生以下输出:
date speed overspeed
2017-04-19 11:35:41+00,16,f
2017-04-19 11:37:01+00,24,f
2017-04-19 11:37:41+00,72,f
2017-04-19 11:38:21+00,82,f
2017-04-19 11:39:01+00,13,f
2017-04-19 11:39:41+00,68,f
2017-04-19 11:40:21+00,23,f
2017-04-19 11:41:01+00,57,f
2017-04-19 11:41:41+00,97,f
2017-04-19 11:42:21+00,96,f
2017-04-19 11:43:01+00,102,t
2017-04-19 11:43:41+00,104,t
2017-04-19 11:44:21+00,106,t
2017-04-19 11:45:01+00,109,t
2017-04-19 11:45:41+00,109,t
2017-04-19 11:46:21+00,114,t
2017-04-19 11:47:01+00,56,f
2017-04-19 11:47:28+00,54,f
2017-04-19 11:47:41+00,54,f
2017-04-19 11:48:21+00,54,f
2017-04-19 11:49:01+00,102,t
2017-04-19 11:49:07+00,104,t
2017-04-19 11:54:21+00,114,t
2017-04-19 11:55:01+00,118,t
2017-04-19 11:55:41+00,115,t
2017-04-19 11:56:21+00,111,t
2017-04-19 11:57:01+00,85,f
2017-04-19 11:57:41+00,45,f
2017-04-19 11:58:21+00,29,f
2017-04-19 12:00:35+00,4,f
2017-04-19 12:00:36+00,4,f
...
而且我一直在尝试使用 LAG/LEAD
来为 overspeed
列为 TRUE
的每组行获取 first/last 日期,但我一直没能达到想要的效果,可能是这样的:
start stop
2017-04-19 11:43:01+00 2017-04-19 11:46:21+00
2017-04-19 11:49:01+00 2017-04-19 11:56:21+00
任何有关如何获得此类输出的想法都将不胜感激。
原始 table DDL:
CREATE TABLE avl_historico_354898046636089 (
fecha timestamp with time zone NOT NULL,
latitud double precision DEFAULT 0 NOT NULL,
longitud double precision DEFAULT 0 NOT NULL,
altitud double precision DEFAULT 0 NOT NULL,
velocidad double precision DEFAULT 0 NOT NULL,
cog double precision DEFAULT 0 NOT NULL,
nsat integer DEFAULT 0 NOT NULL,
tipo character(1),
utc_hora time without time zone,
fix_fecha date,
imei bigint NOT NULL,
registro timestamp with time zone,
input1 integer DEFAULT 0,
input2 integer DEFAULT 0,
input3 integer DEFAULT 0,
input4 integer DEFAULT 0,
hdop double precision,
adc double precision DEFAULT (-99),
ignicion integer DEFAULT 1,
adc2 double precision,
power integer,
driverid integer,
ibutton2 integer,
ibutton3 integer,
ibutton4 integer,
trailerid integer,
adc3 double precision,
adc4 double precision,
horometro bigint,
odometro bigint,
panico integer DEFAULT 0,
bateria double precision,
bateriaint double precision
);
SELECT grp, min(date) AS start, max(date) AS stop
FROM (
SELECT date, speed, count(is_reset) OVER () AS grp
FROM (
SELECT
date,
speed,
CASE
WHEN overspeed <> lag(overspeed) OVER (ORDER BY date) THEN 1
END AS is_reset
FROM (
select fecha as date,velocidad as speed, velocidad>100 as overspeed
from reports.avl_historico_354898046636089
where fecha between '2017-04-19 00:00:00-03' and '2017-04-20 00:00:00-03'
and velocidad>2 and ignicion=1
) AS t
) AS t2
) AS t3
GROUP BY grp;
这是一个分组和 WINDOW 样本。
注意我编辑了一些结果只是为了让它更小。
create table test (fecha timestamp, velocidad int, overspeed bool); insert into test values ('2017-04-19 20:18:17+00', 77, FALSE), ('2017-04-19 20:18:57+00', 96, FALSE), ('2017-04-19 20:19:37+00', 108, TRUE), ('2017-04-19 20:20:17+00', 111, TRUE), ('2017-04-19 20:20:57+00', 114, TRUE), ('2017-04-19 20:21:37+00', 112, TRUE), ('2017-04-19 20:22:17+00', 108, FALSE), ('2017-04-19 20:22:57+00', 107, FALSE), ('2017-04-19 20:23:37+00', 113, FALSE), ('2017-04-19 20:24:17+00', 116, TRUE), ('2017-04-19 20:24:57+00', 111, TRUE), ('2017-04-19 20:25:37+00', 113, TRUE), ('2017-04-19 20:26:17+00', 115, FALSE), ('2017-04-19 20:26:28+00', 115, FALSE), ('2017-04-19 20:26:57+00', 115, TRUE), ('2017-04-19 20:27:37+00', 115, TRUE), ('2017-04-19 20:27:58+00', 60, FALSE);
with ResetPoint as ( select fecha, velocidad, overspeed, case when lag(overspeed) over (order by fecha) = overspeed then null else 1 end as reset from test ) --= Set a group each time overspeed changes , SetGroup as ( select fecha, velocidad, overspeed, count(reset) over (order by fecha) as grp from ResetPoint ) select * from SetGroup;
fecha | velocidad | overspeed | grp :------------------ | --------: | :-------- | --: 2017-04-19 20:18:17 | 77 | f | 1 2017-04-19 20:18:57 | 96 | f | 1 2017-04-19 20:19:37 | 108 | t | 2 2017-04-19 20:20:17 | 111 | t | 2 2017-04-19 20:20:57 | 114 | t | 2 2017-04-19 20:21:37 | 112 | t | 2 2017-04-19 20:22:17 | 108 | f | 3 2017-04-19 20:22:57 | 107 | f | 3 2017-04-19 20:23:37 | 113 | f | 3 2017-04-19 20:24:17 | 116 | t | 4 2017-04-19 20:24:57 | 111 | t | 4 2017-04-19 20:25:37 | 113 | t | 4 2017-04-19 20:26:17 | 115 | f | 5 2017-04-19 20:26:28 | 115 | f | 5 2017-04-19 20:26:57 | 115 | t | 6 2017-04-19 20:27:37 | 115 | t | 6 2017-04-19 20:27:58 | 60 | f | 7
--= Set a reset point each time overspeed changes -- with ResetPoint as ( select fecha, velocidad, overspeed, case when lag(overspeed) over (order by fecha) = overspeed then null else 1 end as reset from test ) --= Set a group each time overspeed changes , SetGroup as ( select fecha, velocidad, overspeed, count(reset) over (order by fecha) as grp from ResetPoint ) --= Retruns MIN and MAX date of each group select grp, min(fecha) as Start, max(fecha) as End from SetGroup group by grp;
grp | start | end --: | :------------------ | :------------------ 4 | 2017-04-19 20:24:17 | 2017-04-19 20:25:37 1 | 2017-04-19 20:18:17 | 2017-04-19 20:18:57 5 | 2017-04-19 20:26:17 | 2017-04-19 20:26:28 3 | 2017-04-19 20:22:17 | 2017-04-19 20:23:37 6 | 2017-04-19 20:26:57 | 2017-04-19 20:27:37 2 | 2017-04-19 20:19:37 | 2017-04-19 20:21:37 7 | 2017-04-19 20:27:58 | 2017-04-19 20:27:58
dbfiddle here
这可以更简单。减去两次 row_number()
调用:
SELECT min(date) AS start
, max(date) AS stop
FROM (
SELECT date, overspeed
, row_number() OVER (ORDER BY date)
- row_number() OVER (PARTITION BY overspeed ORDER BY date) AS grp
FROM tbl -- result of your starting query
) sub
WHERE overspeed
GROUP BY grp
ORDER BY grp;
第一个生成 运行 个数字,第二个按 overspeed
划分。当您从第一个减去第二个时,每个组都以相同的组号结束,每个分区不同。
然后在外部查询中过滤带有 overspeed
的那些,并每组取最小值和最大值。瞧。
详细解释:
旁白:时间戳不是日期。这是一个令人困惑的列名称。
整合您的子查询
处理您的评论。将 tbl
替换为原始查询作为子查询,如下所示:
SELECT min(date) AS start
, max(date) AS stop
FROM (
SELECT date, overspeed
, row_number() OVER (ORDER BY date)
- row_number() OVER (PARTITION BY overspeed ORDER BY date) AS grp
FROM (
SELECT fecha AS date, velocidad AS speed, velocidad > 100 AS overspeed
FROM reports.avl_historico_354898046636089
WHERE fecha >= '2017-04-19 00:00:00-03' -- typically, you include the lower
AND fecha < '2017-04-20 00:00:00-03' -- and exclude the upper bound
AND velocidad > 2
AND ignicion = 1
-- drop the now useless inner ORDER BY
) sub1
) sub2
WHERE overspeed
GROUP BY grp
ORDER BY grp;
那你可以再简化一些:
SELECT min(fecha) AS start
, max(fecha) AS stop
FROM (
SELECT fecha, velocidad > 100 AS overspeed
, row_number() OVER (ORDER BY fecha)
- row_number() OVER (PARTITION BY velocidad > 100 ORDER BY fecha) AS grp
FROM reports.avl_historico_354898046636089
WHERE fecha >= '2017-04-19 00:00:00-03'
AND fecha < '2017-04-20 00:00:00-03'
AND velocidad > 2
AND ignicion = 1
) sub
WHERE overspeed
GROUP BY grp
ORDER BY grp;