数据库查询:GROUPing 提取第一行和最后一行

Database querying: GROUPing extracting first and last row

我有以下 "starting" 查询:

select fecha as date,velocidad as speed, velocidad>100 as overspeed 
from reports.avl_historico_354898046636089 
where fecha between '2017-04-19 00:00:00-03' and '2017-04-20 00:00:00-03'
and velocidad>2 and ignicion=1
order by fecha;

产生以下输出:

date                  speed overspeed

2017-04-19 11:35:41+00,16,f
2017-04-19 11:37:01+00,24,f
2017-04-19 11:37:41+00,72,f
2017-04-19 11:38:21+00,82,f
2017-04-19 11:39:01+00,13,f
2017-04-19 11:39:41+00,68,f
2017-04-19 11:40:21+00,23,f
2017-04-19 11:41:01+00,57,f
2017-04-19 11:41:41+00,97,f
2017-04-19 11:42:21+00,96,f
2017-04-19 11:43:01+00,102,t
2017-04-19 11:43:41+00,104,t
2017-04-19 11:44:21+00,106,t
2017-04-19 11:45:01+00,109,t
2017-04-19 11:45:41+00,109,t
2017-04-19 11:46:21+00,114,t
2017-04-19 11:47:01+00,56,f
2017-04-19 11:47:28+00,54,f
2017-04-19 11:47:41+00,54,f
2017-04-19 11:48:21+00,54,f
2017-04-19 11:49:01+00,102,t
2017-04-19 11:49:07+00,104,t
2017-04-19 11:54:21+00,114,t
2017-04-19 11:55:01+00,118,t
2017-04-19 11:55:41+00,115,t
2017-04-19 11:56:21+00,111,t
2017-04-19 11:57:01+00,85,f
2017-04-19 11:57:41+00,45,f
2017-04-19 11:58:21+00,29,f
2017-04-19 12:00:35+00,4,f
2017-04-19 12:00:36+00,4,f
...

而且我一直在尝试使用 LAG/LEAD 来为 overspeed 列为 TRUE 的每组行获取 first/last 日期,但我一直没能达到想要的效果,可能是这样的:

start                     stop
2017-04-19 11:43:01+00    2017-04-19 11:46:21+00
2017-04-19 11:49:01+00    2017-04-19 11:56:21+00

任何有关如何获得此类输出的想法都将不胜感激。

原始 table DDL:

CREATE TABLE avl_historico_354898046636089 (
    fecha timestamp with time zone NOT NULL,
    latitud double precision DEFAULT 0 NOT NULL,
    longitud double precision DEFAULT 0 NOT NULL,
    altitud double precision DEFAULT 0 NOT NULL,
    velocidad double precision DEFAULT 0 NOT NULL,
    cog double precision DEFAULT 0 NOT NULL,
    nsat integer DEFAULT 0 NOT NULL,
    tipo character(1),
    utc_hora time without time zone,
    fix_fecha date,
    imei bigint NOT NULL,
    registro timestamp with time zone,
    input1 integer DEFAULT 0,
    input2 integer DEFAULT 0,
    input3 integer DEFAULT 0,
    input4 integer DEFAULT 0,
    hdop double precision,
    adc double precision DEFAULT (-99),
    ignicion integer DEFAULT 1,
    adc2 double precision,
    power integer,
    driverid integer,
    ibutton2 integer,
    ibutton3 integer,
    ibutton4 integer,
    trailerid integer,
    adc3 double precision,
    adc4 double precision,
    horometro bigint,
    odometro bigint,
    panico integer DEFAULT 0,
    bateria double precision,
    bateriaint double precision
);
SELECT grp, min(date) AS start, max(date) AS stop
FROM (

    SELECT date, speed, count(is_reset) OVER () AS grp
    FROM (

        SELECT
          date,
          speed,
          CASE
            WHEN overspeed <> lag(overspeed) OVER (ORDER BY date) THEN 1
          END AS is_reset
        FROM (

            select fecha as date,velocidad as speed, velocidad>100 as overspeed 
            from reports.avl_historico_354898046636089 
            where fecha between '2017-04-19 00:00:00-03' and '2017-04-20 00:00:00-03'
            and velocidad>2 and ignicion=1

        ) AS t

    ) AS t2

) AS t3
GROUP BY grp;

这是一个分组和 WINDOW 样本。

注意我编辑了一些结果只是为了让它更小。

create table test (fecha timestamp, velocidad int, overspeed  bool);
insert into test values
('2017-04-19 20:18:17+00',  77,  FALSE),
('2017-04-19 20:18:57+00',  96,  FALSE),
('2017-04-19 20:19:37+00',  108, TRUE),
('2017-04-19 20:20:17+00',  111, TRUE),
('2017-04-19 20:20:57+00',  114, TRUE),
('2017-04-19 20:21:37+00',  112, TRUE),
('2017-04-19 20:22:17+00',  108, FALSE),
('2017-04-19 20:22:57+00',  107, FALSE),
('2017-04-19 20:23:37+00', 113, FALSE),
('2017-04-19 20:24:17+00', 116, TRUE),
('2017-04-19 20:24:57+00',  111, TRUE),
('2017-04-19 20:25:37+00',  113, TRUE),
('2017-04-19 20:26:17+00',  115, FALSE),
('2017-04-19 20:26:28+00',  115, FALSE),
('2017-04-19 20:26:57+00',  115, TRUE),
('2017-04-19 20:27:37+00',  115, TRUE),
('2017-04-19 20:27:58+00',  60,  FALSE);
with ResetPoint as
(
    select fecha, velocidad, overspeed,
           case when lag(overspeed) over (order by fecha) = overspeed then null else 1 end as reset
    from test
)
    --= Set a group each time overspeed changes
    , SetGroup as
    (
        select fecha, velocidad, overspeed, 
               count(reset) over (order by fecha) as grp
        from ResetPoint
    )
    select *
    from SetGroup;
fecha               | velocidad | overspeed | grp
:------------------ | --------: | :-------- | --:
2017-04-19 20:18:17 |        77 | f         |   1
2017-04-19 20:18:57 |        96 | f         |   1
2017-04-19 20:19:37 |       108 | t         |   2
2017-04-19 20:20:17 |       111 | t         |   2
2017-04-19 20:20:57 |       114 | t         |   2
2017-04-19 20:21:37 |       112 | t         |   2
2017-04-19 20:22:17 |       108 | f         |   3
2017-04-19 20:22:57 |       107 | f         |   3
2017-04-19 20:23:37 |       113 | f         |   3
2017-04-19 20:24:17 |       116 | t         |   4
2017-04-19 20:24:57 |       111 | t         |   4
2017-04-19 20:25:37 |       113 | t         |   4
2017-04-19 20:26:17 |       115 | f         |   5
2017-04-19 20:26:28 |       115 | f         |   5
2017-04-19 20:26:57 |       115 | t         |   6
2017-04-19 20:27:37 |       115 | t         |   6
2017-04-19 20:27:58 |        60 | f         |   7
--= Set a reset point each time overspeed changes
--
with ResetPoint as
(
    select fecha, velocidad, overspeed,
           case when lag(overspeed) over (order by fecha) = overspeed then null else 1 end as reset
    from test
)
    --= Set a group each time overspeed changes
    , SetGroup as
    (
        select fecha, velocidad, overspeed, 
               count(reset) over (order by fecha) as grp
        from ResetPoint
    )
    --= Retruns MIN and MAX date of each group
    select grp, min(fecha) as Start, max(fecha) as End
    from SetGroup
    group by grp;
grp | start               | end                
--: | :------------------ | :------------------
  4 | 2017-04-19 20:24:17 | 2017-04-19 20:25:37
  1 | 2017-04-19 20:18:17 | 2017-04-19 20:18:57
  5 | 2017-04-19 20:26:17 | 2017-04-19 20:26:28
  3 | 2017-04-19 20:22:17 | 2017-04-19 20:23:37
  6 | 2017-04-19 20:26:57 | 2017-04-19 20:27:37
  2 | 2017-04-19 20:19:37 | 2017-04-19 20:21:37
  7 | 2017-04-19 20:27:58 | 2017-04-19 20:27:58

dbfiddle here

这可以更简单。减去两次 row_number() 调用:

SELECT min(date) AS start
     , max(date) AS stop
FROM  (
   SELECT date, overspeed
        , row_number() OVER (ORDER BY date)
        - row_number() OVER (PARTITION BY overspeed ORDER BY date) AS grp
   FROM tbl  -- result of your starting query
   ) sub
WHERE  overspeed
GROUP  BY grp
ORDER  BY grp;

第一个生成 运行 个数字,第二个按 overspeed 划分。当您从第一个减去第二个时,每个组都以相同的组号结束,每个分区不同。

然后在外部查询中过滤带有 overspeed 的那些,并每组取最小值和最大值。瞧。

详细解释:

旁白:时间戳不是日期。这是一个令人困惑的列名称。

整合您的子查询

处理您的评论。将 tbl 替换为原始查询作为子查询,如下所示:

SELECT min(date) AS start
     , max(date) AS stop
FROM  (
   SELECT date, overspeed
        , row_number() OVER (ORDER BY date)
        - row_number() OVER (PARTITION BY overspeed ORDER BY date) AS grp
   FROM (
      SELECT fecha AS date, velocidad AS speed, velocidad > 100 AS overspeed 
      FROM   reports.avl_historico_354898046636089 
      WHERE  fecha >= '2017-04-19 00:00:00-03'  -- typically, you include the lower
      AND    fecha <  '2017-04-20 00:00:00-03'  -- and exclude the upper bound
      AND    velocidad > 2
      AND    ignicion = 1
      -- drop the now useless inner ORDER BY
      ) sub1
   ) sub2
WHERE  overspeed
GROUP  BY grp
ORDER  BY grp;

那你可以再简化一些:

SELECT min(fecha) AS start
     , max(fecha) AS stop
FROM  (
   SELECT fecha, velocidad > 100 AS overspeed 
        , row_number() OVER (ORDER BY fecha)
        - row_number() OVER (PARTITION BY velocidad > 100 ORDER BY fecha) AS grp
   FROM   reports.avl_historico_354898046636089 
   WHERE  fecha >= '2017-04-19 00:00:00-03'
   AND    fecha <  '2017-04-20 00:00:00-03'
   AND    velocidad > 2
   AND    ignicion = 1
   ) sub
WHERE  overspeed
GROUP  BY grp
ORDER  BY grp;