根据postgresql中的日期获取数据和总和
Fetching data and sums according to date in postgresql
我在 Greenplum - postgresql DB 工作,数据结构如下:
在此我需要以下逻辑来实现(其中一些我已经实现):
CASE WHEN PDATE IS NOT NULL THEN to_char(PDATE,'YYYY-MM-DD')
WHEN PDATE IS NULL THEN to_char(NDATE,'YYYY-MM-DD N')
WHEN NDATEIS NULL THEN 'NO PO' ELSE 'NO PO' END
据此我需要 QTY 和 VName。
QTY: Sum(Qty) according to min (PDATE and NDATE)
VName: VName according to min (PDATE and NDATE)
期望的输出:
到目前为止我已经做了以下查询:
SELECT
ITEM ,
MIN(CASE WHEN PDATE IS NOT NULL THEN to_char(PDATE,'YYYY-MM-DD')
WHEN PDATE IS NULL THEN to_char(NDATE,'YYYY-MM-DD N')
WHEN NDATE IS NULL THEN 'NO PO' ELSE 'NO PO' END) AS PRO
FROM
Table
GROUP BY
ITEM
请帮我解答一下
以下答案假定您使用的是 Postgres 8.4 或更高版本,它附带一个名为 ARRAY_AGG()
的聚合函数。我使用 ARRAY_AGG()
为每个 ITEM
-MIN(DATE)
组的 VNAME
创建以逗号分隔的列表。
SELECT t1.ITEM, t1.DATE, t1.QTY, t1.VNAME
FROM
(
SELECT t.ITEM, t.DATE AS DATE, SUM(t.QTY) AS QTY, ARRAY_AGG(VNAME) AS VNAME
FROM
(
SELECT t.ITEM, LEAST(t.PDATE, t.NDATE) AS DATE, t.QTY, t.VNAME
FROM Table t
) t
GROUP BY t.ITEM, t.DATE
) t1
INNER JOIN
(
SELECT t.ITEM, MIN(LEAST(t.PDATE, t.NDATE)) AS DATE
FROM Table t
GROUP BY t.ITEM
) t2
ON t1.ITEM = t2.ITEM AND t1.PDATE = t2.PDATE
解释:
第一个查询获取每个 ITEM
-PDATE
组合的 QTY
总和和 VNAME
CSV 聚合。但是,您只需要每个组中 最早 日期的聚合。第二个查询与第一个查询相结合,过滤掉不需要的组,为您留下所需的结果。
由于您使用的是 Postgres 8.2,因此您必须定义自己的自定义函数:
CREATE AGGREGATE ARRAY_AGG (anyelement)
(
sfunc = array_append,
stype = anyarray,
initcond = '{}'
);
感谢 Tim 的帮助。我花了一些时间来创建查询,但最终完成了。为了节省时间,我在论坛上发布了问题,结果对我来说也是如此 - 它花了时间..
下面是查询
SELECT
FO.ID ,
(CASE WHEN FO.DateQ IS NOT NULL THEN to_char(FO.DateQ ,'YYYY-MM-DD')
WHEN FO.DateQ IS NULL THEN to_char(FO.Datew ,'YYYY-MM-DD N')
WHEN FO.Datew IS NULL AND FO.DateQ IS NULL THEN 'NO PO' END) AS DATER ,
FO.QTY ,
FO.VNAME
FROM
(
SELECT
NT.ID ,
PT.DATEQ ,
PT.DATEW ,
SUM(NT.QTY) AS QTY ,
array_to_string(array_agg(NT.VNAME) ,', ') AS VNAME
FROM
TABLENAME NT INNER JOIN(
SELECT
AST.ID ,
AST.DateQ ,
(CASE WHEN AST.DateQ IS NULL THEN AST.DateW ELSE NULL END) AS DateW
FROM
(
SELECT
ID ,
MIN(PDATE) AS DATEQ ,
MIN(CASE WHEN pdate IS NULL THEN ndate END) DATEW
FROM
TABLENAME
GROUP BY
ID
) AST
) PT
ON NT.ID = PT.ID
AND NT.PDATE = PT.DATEQ
OR NT.NDATE = PT.DATEW
GROUP BY
NT.ID ,
PT.DATEQ ,
PT.DATEW
) FO
ORDER BY
FO.ID
将 ID 视为项目。
Anshul,您的解决方案有效,但是当您加入 table 两次时它会带来性能影响,这会强制数据库扫描您的 table 两次。更好的解决方案是使用分析函数并且只引用 table 一次。
这是一个例子:
CREATE TABLE anshul
(
item character varying,
pdate date,
ndate date,
qty integer,
vname character varying
)
WITH (APPENDONLY=true)
DISTRIBUTED BY (item);
INSERT INTO ANSHUL VALUES
('ABC', NULL, '2015-12-31', 10, 'Y JACK SOLLEN'),
('HRD', '2016-01-29', '2016-1-8', 5, 'H HARRIS'),
('HRD', '2015-09-07', '2015-10-09', 31, 'G JOE'),
('HRD', '2015-09-30', '2015-09-07', 28, 'K KAMATH'),
('GGT', '2015-12-10', '2015-12-12', 10, 'P QUIK'),
('GGT', '2015-12-27', NULL, 20, NULL),
('GGT', '2015-12-10', '2016-01-04', 22, 'U RITZ'),
('GGT', '2016-01-07', '2016-01-07', 22, 'S SUE DAL'),
('OWE', NULL, '2015-12-22', 6, 'J JASON NIT'),
('OWE', NULL, '2015-11-05', 2, 'P QUEER'),
('OWE', NULL, '2015-11-05', 5, 'K KITTAN');
这里的查询借用了您已经知道的一些代码。
SELECT item,
sum(qty) AS qty,
array_to_string(array_agg(vname), ',') AS vname
FROM (
SELECT item,
rank() OVER(PARTITION BY item ORDER BY desired_date) AS rank,
qty,
vname
FROM (SELECT item,
qty,
vname,
CASE WHEN PDATE IS NOT NULL THEN pdate
WHEN PDATE IS NULL THEN ndate END AS desired_date
FROM anshul
) AS sub1
) AS sub
WHERE sub.rank = 1
GROUP BY item
ORDER BY item;
结果:
item | qty | vname
------+-----+------------------
ABC | 10 | Y JACK SOLLEN
GGT | 32 | P QUIK,U RITZ
HRD | 31 | G JOE
OWE | 7 | K KITTAN,P QUEER
我在 Greenplum - postgresql DB 工作,数据结构如下:
在此我需要以下逻辑来实现(其中一些我已经实现):
CASE WHEN PDATE IS NOT NULL THEN to_char(PDATE,'YYYY-MM-DD')
WHEN PDATE IS NULL THEN to_char(NDATE,'YYYY-MM-DD N')
WHEN NDATEIS NULL THEN 'NO PO' ELSE 'NO PO' END
据此我需要 QTY 和 VName。
QTY: Sum(Qty) according to min (PDATE and NDATE)
VName: VName according to min (PDATE and NDATE)
期望的输出:
到目前为止我已经做了以下查询:
SELECT
ITEM ,
MIN(CASE WHEN PDATE IS NOT NULL THEN to_char(PDATE,'YYYY-MM-DD')
WHEN PDATE IS NULL THEN to_char(NDATE,'YYYY-MM-DD N')
WHEN NDATE IS NULL THEN 'NO PO' ELSE 'NO PO' END) AS PRO
FROM
Table
GROUP BY
ITEM
请帮我解答一下
以下答案假定您使用的是 Postgres 8.4 或更高版本,它附带一个名为 ARRAY_AGG()
的聚合函数。我使用 ARRAY_AGG()
为每个 ITEM
-MIN(DATE)
组的 VNAME
创建以逗号分隔的列表。
SELECT t1.ITEM, t1.DATE, t1.QTY, t1.VNAME
FROM
(
SELECT t.ITEM, t.DATE AS DATE, SUM(t.QTY) AS QTY, ARRAY_AGG(VNAME) AS VNAME
FROM
(
SELECT t.ITEM, LEAST(t.PDATE, t.NDATE) AS DATE, t.QTY, t.VNAME
FROM Table t
) t
GROUP BY t.ITEM, t.DATE
) t1
INNER JOIN
(
SELECT t.ITEM, MIN(LEAST(t.PDATE, t.NDATE)) AS DATE
FROM Table t
GROUP BY t.ITEM
) t2
ON t1.ITEM = t2.ITEM AND t1.PDATE = t2.PDATE
解释:
第一个查询获取每个 ITEM
-PDATE
组合的 QTY
总和和 VNAME
CSV 聚合。但是,您只需要每个组中 最早 日期的聚合。第二个查询与第一个查询相结合,过滤掉不需要的组,为您留下所需的结果。
由于您使用的是 Postgres 8.2,因此您必须定义自己的自定义函数:
CREATE AGGREGATE ARRAY_AGG (anyelement)
(
sfunc = array_append,
stype = anyarray,
initcond = '{}'
);
感谢 Tim 的帮助。我花了一些时间来创建查询,但最终完成了。为了节省时间,我在论坛上发布了问题,结果对我来说也是如此 - 它花了时间..
下面是查询
SELECT
FO.ID ,
(CASE WHEN FO.DateQ IS NOT NULL THEN to_char(FO.DateQ ,'YYYY-MM-DD')
WHEN FO.DateQ IS NULL THEN to_char(FO.Datew ,'YYYY-MM-DD N')
WHEN FO.Datew IS NULL AND FO.DateQ IS NULL THEN 'NO PO' END) AS DATER ,
FO.QTY ,
FO.VNAME
FROM
(
SELECT
NT.ID ,
PT.DATEQ ,
PT.DATEW ,
SUM(NT.QTY) AS QTY ,
array_to_string(array_agg(NT.VNAME) ,', ') AS VNAME
FROM
TABLENAME NT INNER JOIN(
SELECT
AST.ID ,
AST.DateQ ,
(CASE WHEN AST.DateQ IS NULL THEN AST.DateW ELSE NULL END) AS DateW
FROM
(
SELECT
ID ,
MIN(PDATE) AS DATEQ ,
MIN(CASE WHEN pdate IS NULL THEN ndate END) DATEW
FROM
TABLENAME
GROUP BY
ID
) AST
) PT
ON NT.ID = PT.ID
AND NT.PDATE = PT.DATEQ
OR NT.NDATE = PT.DATEW
GROUP BY
NT.ID ,
PT.DATEQ ,
PT.DATEW
) FO
ORDER BY
FO.ID
将 ID 视为项目。
Anshul,您的解决方案有效,但是当您加入 table 两次时它会带来性能影响,这会强制数据库扫描您的 table 两次。更好的解决方案是使用分析函数并且只引用 table 一次。
这是一个例子:
CREATE TABLE anshul
(
item character varying,
pdate date,
ndate date,
qty integer,
vname character varying
)
WITH (APPENDONLY=true)
DISTRIBUTED BY (item);
INSERT INTO ANSHUL VALUES
('ABC', NULL, '2015-12-31', 10, 'Y JACK SOLLEN'),
('HRD', '2016-01-29', '2016-1-8', 5, 'H HARRIS'),
('HRD', '2015-09-07', '2015-10-09', 31, 'G JOE'),
('HRD', '2015-09-30', '2015-09-07', 28, 'K KAMATH'),
('GGT', '2015-12-10', '2015-12-12', 10, 'P QUIK'),
('GGT', '2015-12-27', NULL, 20, NULL),
('GGT', '2015-12-10', '2016-01-04', 22, 'U RITZ'),
('GGT', '2016-01-07', '2016-01-07', 22, 'S SUE DAL'),
('OWE', NULL, '2015-12-22', 6, 'J JASON NIT'),
('OWE', NULL, '2015-11-05', 2, 'P QUEER'),
('OWE', NULL, '2015-11-05', 5, 'K KITTAN');
这里的查询借用了您已经知道的一些代码。
SELECT item,
sum(qty) AS qty,
array_to_string(array_agg(vname), ',') AS vname
FROM (
SELECT item,
rank() OVER(PARTITION BY item ORDER BY desired_date) AS rank,
qty,
vname
FROM (SELECT item,
qty,
vname,
CASE WHEN PDATE IS NOT NULL THEN pdate
WHEN PDATE IS NULL THEN ndate END AS desired_date
FROM anshul
) AS sub1
) AS sub
WHERE sub.rank = 1
GROUP BY item
ORDER BY item;
结果:
item | qty | vname
------+-----+------------------
ABC | 10 | Y JACK SOLLEN
GGT | 32 | P QUIK,U RITZ
HRD | 31 | G JOE
OWE | 7 | K KITTAN,P QUEER