Postgres GROUP BY 查看日期范围
Postgres GROUP BY looking at dates ranges
我有一个 table 具有 "Code" 值更改的历史记录。每个月,此 table 都会获得一条新记录,其中包含指定月份的 "Code" 的新值。
+----------+------------+------------+------+
| Employee | FromDate | ToDate | Code |
+----------+------------+------------+------+
| Employee | 01/07/2016 | 31/07/2016 | 4 |
| Employee | 01/06/2016 | 30/06/2016 | 2 |
| Employee | 01/05/2016 | 31/05/2016 | 2 |
| Employee | 01/04/2016 | 30/04/2016 | 3 |
| Employee | 01/03/2016 | 31/03/2016 | 3 |
| Employee | 01/02/2016 | 29/02/2016 | 4 |
| Employee | 01/01/2016 | 31/01/2016 | 4 |
+----------+------------+------------+------+
每次 "Code" 更改时,我需要按此数据分组以获得新记录,并为 "From date" 取最小值,为 "To date" 取最大值。数据必须按 "FromDate" 降序排列。通过我的查询,我得到了这个结果:
+----------+------------+------------+------+
| Employee | FromDate | ToDate | Code |
+----------+------------+------------+------+
| Employee | 01/05/2016 | 30/06/2016 | 2 |
| Employee | 01/03/2016 | 30/04/2016 | 3 |
| Employee | 01/01/2016 | 31/07/2016 | 4 |
+----------+------------+------------+------+
它工作正常,但如果相同的 "Code" 有更多的日期范围(请参阅第一个 table 中的 4 代码),我每个代码只有一行。我想用 2 条记录中的 4 个代码得到这个结果,因为它的周期不是连续的,但它被其他代码(3 和 2)打破了:
+----------+------------+------------+------+
| Employee | FromDate | ToDate | Code |
+----------+------------+------------+------+
| Employee | 01/07/2016 | 31/07/2016 | 4 |
| Employee | 01/05/2016 | 30/06/2016 | 2 |
| Employee | 01/03/2016 | 30/04/2016 | 3 |
| Employee | 01/01/2016 | 29/02/2016 | 4 |
+----------+------------+------------+------+
我使用这个查询:
SELECT
d."Employee",
MIN (d."FromDate") AS "FromDate",
MAX (d."ToDate") AS "ToDate",
d."Code"
FROM
(
SELECT
"Employees"."FromDate",
"Employees"."ToDate",
"Employees"."Code",
"Employees"."Employee"
FROM
schema_estelspa."Employees"
ORDER BY
"Employees"."FromDate" DESC
) d
GROUP BY
d."Code",
d."Employee"
ORDER BY
(MIN(d."FromDate")) DESC
有什么技巧可以得到我想要的结果吗?
日期格式为:dd/MM/yyyy
这里需要设置日期范围,将from_date作为按列分组的一部分。您还需要自行加入才能达到此结果。我准备在 teradata 中关注 SQL。请对您的数据库进行必要的更改(coalesc 用作 if null 表达式,您也可以使用 nvl 或 case 语句)
查询:
SELECT E.EMPLOYEE, E.CODE,COALESCE(ET1.FROMdATE,E.FROMDATE)FROM_DATE ,MAX(E.TODATE)TO_D
FROM EMP_TEST E
LEFT OUTER JOIN EMP_TEST ET1
ON E.EMPLOYEE=ET1.EMPLOYEE
AND E.CODE=ET1.CODE
AND E.FromDate=ET1.ToDate+1
GROUP BY 1,2,3
ORDER BY FROM_DATE
输出:
Employee Code FROM_DATE TO_D
1 Employee 4 1/1/2016 2/29/2016
2 Employee 2 5/1/2016 6/30/2016
3 Employee 4 7/1/2016 7/31/2016
4 Employee 3 3/1/2016 4/30/2016
连接点
的标准递归解决方案
- 在实践中,半开区间 (
lower_limit <= X < upper_limit
) 更容易处理
- 递归从任何没有 lower neigbor
的段开始
- 相邻的片段粘在右边,构建更长的链
- 最终查询抑制了部分结果
注意:下面的代码不处理重叠间隔。
-- Table
CREATE TABLE ecode
( employee varchar NOT NULL
, code INTEGER NOT NULL
, fromdate DATE NOT NULL
, uptodate DATE NOT NULL
);
SET datestyle = 'DMY' ;
-- Data
INSERT INTO ecode(employee, fromdate, uptodate, code) VALUES
('Employee','01/07/2016','31/07/2016', 4)
, ('Employee','01/06/2016','30/06/2016', 2)
, ('Employee','01/05/2016','31/05/2016', 2)
, ('Employee','01/04/2016','30/04/2016', 3)
, ('Employee','01/03/2016','31/03/2016', 3)
, ('Employee','01/02/2016','29/02/2016', 4)
, ('Employee','01/01/2016','31/01/2016', 4)
;
-- Convert to half-open interval
UPDATE ecode SET uptodate = uptodate + '1 day'::interval;
-- SELECT * FROM ecode;
WITH RECURSIVE zzz AS (
SELECT employee, code, fromdate, uptodate
FROM ecode e0
WHERE NOT EXISTS ( -- first one in series
SELECT * FROM ecode nx
WHERE nx.employee = e0.employee
AND nx.code = e0.code
AND nx.uptodate = e0.fromdate
)
UNION ALL -- append consecutive intervals
SELECT e1.employee, e1.code, zzz.fromdate, e1.uptodate
FROM ecode e1
JOIN zzz ON zzz.employee = e1.employee
AND zzz.code = e1.code
AND zzz.uptodate = e1.fromdate
)
SELECT * FROM zzz
-- suppress the partial results
WHERE NOT EXISTS (SELECT * FROM ecode nx
WHERE nx.employee = zzz.employee
AND nx.code = zzz.code
AND nx.fromdate = zzz.uptodate
)
ORDER BY employee, code, fromdate
;
结果:
DROP SCHEMA
CREATE SCHEMA
SET
CREATE TABLE
SET
INSERT 0 7
UPDATE 7
employee | code | fromdate | uptodate
----------+------+------------+------------
Employee | 2 | 2016-05-01 | 2016-07-01
Employee | 3 | 2016-03-01 | 2016-05-01
Employee | 4 | 2016-01-01 | 2016-03-01
Employee | 4 | 2016-07-01 | 2016-08-01
(4 rows)
我有一个 table 具有 "Code" 值更改的历史记录。每个月,此 table 都会获得一条新记录,其中包含指定月份的 "Code" 的新值。
+----------+------------+------------+------+
| Employee | FromDate | ToDate | Code |
+----------+------------+------------+------+
| Employee | 01/07/2016 | 31/07/2016 | 4 |
| Employee | 01/06/2016 | 30/06/2016 | 2 |
| Employee | 01/05/2016 | 31/05/2016 | 2 |
| Employee | 01/04/2016 | 30/04/2016 | 3 |
| Employee | 01/03/2016 | 31/03/2016 | 3 |
| Employee | 01/02/2016 | 29/02/2016 | 4 |
| Employee | 01/01/2016 | 31/01/2016 | 4 |
+----------+------------+------------+------+
每次 "Code" 更改时,我需要按此数据分组以获得新记录,并为 "From date" 取最小值,为 "To date" 取最大值。数据必须按 "FromDate" 降序排列。通过我的查询,我得到了这个结果:
+----------+------------+------------+------+
| Employee | FromDate | ToDate | Code |
+----------+------------+------------+------+
| Employee | 01/05/2016 | 30/06/2016 | 2 |
| Employee | 01/03/2016 | 30/04/2016 | 3 |
| Employee | 01/01/2016 | 31/07/2016 | 4 |
+----------+------------+------------+------+
它工作正常,但如果相同的 "Code" 有更多的日期范围(请参阅第一个 table 中的 4 代码),我每个代码只有一行。我想用 2 条记录中的 4 个代码得到这个结果,因为它的周期不是连续的,但它被其他代码(3 和 2)打破了:
+----------+------------+------------+------+
| Employee | FromDate | ToDate | Code |
+----------+------------+------------+------+
| Employee | 01/07/2016 | 31/07/2016 | 4 |
| Employee | 01/05/2016 | 30/06/2016 | 2 |
| Employee | 01/03/2016 | 30/04/2016 | 3 |
| Employee | 01/01/2016 | 29/02/2016 | 4 |
+----------+------------+------------+------+
我使用这个查询:
SELECT
d."Employee",
MIN (d."FromDate") AS "FromDate",
MAX (d."ToDate") AS "ToDate",
d."Code"
FROM
(
SELECT
"Employees"."FromDate",
"Employees"."ToDate",
"Employees"."Code",
"Employees"."Employee"
FROM
schema_estelspa."Employees"
ORDER BY
"Employees"."FromDate" DESC
) d
GROUP BY
d."Code",
d."Employee"
ORDER BY
(MIN(d."FromDate")) DESC
有什么技巧可以得到我想要的结果吗?
日期格式为:dd/MM/yyyy
这里需要设置日期范围,将from_date作为按列分组的一部分。您还需要自行加入才能达到此结果。我准备在 teradata 中关注 SQL。请对您的数据库进行必要的更改(coalesc 用作 if null 表达式,您也可以使用 nvl 或 case 语句)
查询:
SELECT E.EMPLOYEE, E.CODE,COALESCE(ET1.FROMdATE,E.FROMDATE)FROM_DATE ,MAX(E.TODATE)TO_D
FROM EMP_TEST E
LEFT OUTER JOIN EMP_TEST ET1
ON E.EMPLOYEE=ET1.EMPLOYEE
AND E.CODE=ET1.CODE
AND E.FromDate=ET1.ToDate+1
GROUP BY 1,2,3
ORDER BY FROM_DATE
输出:
Employee Code FROM_DATE TO_D
1 Employee 4 1/1/2016 2/29/2016
2 Employee 2 5/1/2016 6/30/2016
3 Employee 4 7/1/2016 7/31/2016
4 Employee 3 3/1/2016 4/30/2016
连接点
的标准递归解决方案- 在实践中,半开区间 (
lower_limit <= X < upper_limit
) 更容易处理 - 递归从任何没有 lower neigbor 的段开始
- 相邻的片段粘在右边,构建更长的链
- 最终查询抑制了部分结果
注意:下面的代码不处理重叠间隔。
-- Table
CREATE TABLE ecode
( employee varchar NOT NULL
, code INTEGER NOT NULL
, fromdate DATE NOT NULL
, uptodate DATE NOT NULL
);
SET datestyle = 'DMY' ;
-- Data
INSERT INTO ecode(employee, fromdate, uptodate, code) VALUES
('Employee','01/07/2016','31/07/2016', 4)
, ('Employee','01/06/2016','30/06/2016', 2)
, ('Employee','01/05/2016','31/05/2016', 2)
, ('Employee','01/04/2016','30/04/2016', 3)
, ('Employee','01/03/2016','31/03/2016', 3)
, ('Employee','01/02/2016','29/02/2016', 4)
, ('Employee','01/01/2016','31/01/2016', 4)
;
-- Convert to half-open interval
UPDATE ecode SET uptodate = uptodate + '1 day'::interval;
-- SELECT * FROM ecode;
WITH RECURSIVE zzz AS (
SELECT employee, code, fromdate, uptodate
FROM ecode e0
WHERE NOT EXISTS ( -- first one in series
SELECT * FROM ecode nx
WHERE nx.employee = e0.employee
AND nx.code = e0.code
AND nx.uptodate = e0.fromdate
)
UNION ALL -- append consecutive intervals
SELECT e1.employee, e1.code, zzz.fromdate, e1.uptodate
FROM ecode e1
JOIN zzz ON zzz.employee = e1.employee
AND zzz.code = e1.code
AND zzz.uptodate = e1.fromdate
)
SELECT * FROM zzz
-- suppress the partial results
WHERE NOT EXISTS (SELECT * FROM ecode nx
WHERE nx.employee = zzz.employee
AND nx.code = zzz.code
AND nx.fromdate = zzz.uptodate
)
ORDER BY employee, code, fromdate
;
结果:
DROP SCHEMA
CREATE SCHEMA
SET
CREATE TABLE
SET
INSERT 0 7
UPDATE 7
employee | code | fromdate | uptodate
----------+------+------------+------------
Employee | 2 | 2016-05-01 | 2016-07-01
Employee | 3 | 2016-03-01 | 2016-05-01
Employee | 4 | 2016-01-01 | 2016-03-01
Employee | 4 | 2016-07-01 | 2016-08-01
(4 rows)