SQL 查询不同并按雪花分组
SQL query with distinct and group by snowflake
我想要完成的是获取给定 MPN 的所有记录,但是,我只想要来自 shpm
的最新 DeliveryDate
但考虑到 MAX
函数需要在group by子句中,它不获取最新记录,它获取所有记录因为不同的DeliveryDate
,它获取两条记录而不是一条记录,我怎么能实现这个?这是雪花。
这是我的SQL代码
SELECT
MD.MPN,
MD.LOTCODE,
MD.DATECODE,
SHIP.ITEMCODE AS SYSTEMPARTNUMBER,
SHIP.SERIALNUMBER AS SYSTEMSERIALNUMBER,
SHIP.CUSTOMERNAME,
SHIP.SHIPTOADDRESS AS ADDRESS,
SUM(IFNULL(SHIP.QUANTITY,0)) AS QUANTITY,
SHIP.DELIVERYDATE
FROM cunits UNITS
JOIN unc UC ON UC.CHILDUNITID = UNITS.ID
JOIN shpm SHIP ON SHIP.SERIALNUMBER = UC.SYSSN
JOIN tsern SN ON SN.UNITID = UNITS.ID
JOIN machined MD ON MD.SERIALNUMBER = SN.SERIALNUMBER
WHERE --SYSTEMSERIALNUMBER = '001801055469' and
MPN = 'XC0402A105KP5CNN-S'
GROUP BY MD.MPN,MD.LOTCODE,MD.DATECODE,SHIP.ITEMCODE,SHIP.SERIALNUMBER,SHIP.CUSTOMERNAME,SHIP.SHIPTOADDRESS
使用ROW_NUMBER()
和QUALIFY
:
SELECT MD.MPN, MD.LOTCODE, MD.DATECODE,
SHIP.ITEMCODE AS SYSTEMPARTNUMBER, SHIP.SERIALNUMBER AS SYSTEMSERIALNUMBER,
SHIP.CUSTOMERNAME, SHIP.SHIPTOADDRESS AS ADDRESS,
SUM(COALESCE(SHIP.QUANTITY, 0)) AS QUANTITY,
SHIP.DELIVERYDATE
FROM cunits UNITS JOIN
unc UC
ON UC.CHILDUNITID = UNITS.ID JOIN
shpm SHIP
ON SHIP.SERIALNUMBER = UC.SYSSN JOIN
tsern SN
ON SN.UNITID = UNITS.ID JOIN
machined MD
ON MD.SERIALNUMBER = SN.SERIALNUMBER
WHERE '001801055469' and MPN = 'XC0402A105KP5CNN-S'
GROUP BY MD.MPN, MD.LOTCODE, MD.DATECODE, SHIP.ITEMCODE, SHIP.SERIALNUMBER, SHIP.CUSTOMERNAME, SHIP.SHIPTOADDRESS
QUALIFY ROW_NUMBER() OVER (PARTITION BY MD.MPN, SHIP.SERIALNUMBER ORDER BY SHIP.SHIPDATE DESC) = 1;
每 MPN
行的 returns 这就是我对您的问题的解释。您可能还需要 PARTITION BY
中的其他列。
所以猜测一些数据来匹配 SQL
WITH cunits AS (
SELECT * from values (1) v(id)
), unc AS (
SELECT * FROM VALUES (1,'123') v(CHILDUNITID,SYSSN)
), shpm AS (
SELECT * FROM VALUES ('a', '123', 10, '2020-02-01'),
('a', '123', 20, '2020-01-01')
v(ITEMCODE, SERIALNUMBER, QUANTITY, DELIVERYDATE)
), tsern AS (
SELECT * FROM VALUES (1,'zxc') v(UNITID,SERIALNUMBER)
), machined as (
SELECT * FROM VALUES ('zxc', 'XC0402A105KP5CNN-S') v(SERIALNUMBER, MPN)
)
并从示例中删除一些无关紧要的列
SELECT
MD.MPN,
SHIP.ITEMCODE AS SYSTEMPARTNUMBER,
SHIP.SERIALNUMBER AS SYSTEMSERIALNUMBER,
SUM(IFNULL(SHIP.QUANTITY,0)) AS QUANTITY,
SHIP.DELIVERYDATE
FROM cunits UNITS
JOIN unc UC ON UC.CHILDUNITID = UNITS.ID
JOIN shpm SHIP ON SHIP.SERIALNUMBER = UC.SYSSN
JOIN tsern SN ON SN.UNITID = UNITS.ID
JOIN machined MD ON MD.SERIALNUMBER = SN.SERIALNUMBER
WHERE
MPN = 'XC0402A105KP5CNN-S'
GROUP BY MD.MPN,SHIP.ITEMCODE,SHIP.SERIALNUMBER;
现在必须将 SHIP.DELIVERYDATE
添加到 group by
子句中,否则此代码将永远 运行,甚至忽略您不想看到 2020-01-01
数据的愿望
一旦你添加了你不想要的两行。
MPN SYSTEMPARTNUMBER SYSTEMSERIALNUMBER QUANTITY DELIVERYDATE
XC0402A105KP5CNN-S a 123 10 2020-02-01
XC0402A105KP5CNN-S a 123 20 2020-01-01
Gordon 的解决方案,添加一个 QUALIFY
QUALIFY ROW_NUMBER() OVER (PARTITION BY MD.MPN, SHIP.SERIALNUMBER ORDER BY SHIP.DELIVERYDATE DESC) = 1;
正确地给出了答案,但是计算了所有的结果,然后 p运行ing 那些不想要的结果..取决于你的数据集大小和你的 shpm
table,预过滤器的 CTE 可能会更好..
WITH cunits AS (
SELECT * from values (1) v(id)
), unc AS (
SELECT * FROM VALUES (1,'123') v(CHILDUNITID,SYSSN)
), shpm AS (
SELECT * FROM VALUES ('a', '123', 10, '2020-02-01'),
('a', '123', 20, '2020-01-01')
v(ITEMCODE, SERIALNUMBER, QUANTITY, DELIVERYDATE)
), tsern AS (
SELECT * FROM VALUES (1,'zxc') v(UNITID,SERIALNUMBER)
), machined as (
SELECT * FROM VALUES ('zxc', 'XC0402A105KP5CNN-S') v(SERIALNUMBER, MPN)
), pre_filtered_shpm AS (
select * from shpm
QUALIFY ROW_NUMBER() OVER (PARTITION BY SERIALNUMBER ORDER BY DELIVERYDATE DESC) = 1
)
SELECT
MD.MPN,
SHIP.ITEMCODE AS SYSTEMPARTNUMBER,
SHIP.SERIALNUMBER AS SYSTEMSERIALNUMBER,
SUM(IFNULL(SHIP.QUANTITY,0)) AS QUANTITY,
SHIP.DELIVERYDATE
FROM cunits UNITS
JOIN unc UC ON UC.CHILDUNITID = UNITS.ID
JOIN pre_filtered_shpm SHIP ON SHIP.SERIALNUMBER = UC.SYSSN
JOIN tsern SN ON SN.UNITID = UNITS.ID
JOIN machined MD ON MD.SERIALNUMBER = SN.SERIALNUMBER
WHERE
MPN = 'XC0402A105KP5CNN-S'
GROUP BY MD.MPN,SHIP.ITEMCODE,SHIP.SERIALNUMBER,SHIP.DELIVERYDATE;
我想要完成的是获取给定 MPN 的所有记录,但是,我只想要来自 shpm
的最新 DeliveryDate
但考虑到 MAX
函数需要在group by子句中,它不获取最新记录,它获取所有记录因为不同的DeliveryDate
,它获取两条记录而不是一条记录,我怎么能实现这个?这是雪花。
这是我的SQL代码
SELECT
MD.MPN,
MD.LOTCODE,
MD.DATECODE,
SHIP.ITEMCODE AS SYSTEMPARTNUMBER,
SHIP.SERIALNUMBER AS SYSTEMSERIALNUMBER,
SHIP.CUSTOMERNAME,
SHIP.SHIPTOADDRESS AS ADDRESS,
SUM(IFNULL(SHIP.QUANTITY,0)) AS QUANTITY,
SHIP.DELIVERYDATE
FROM cunits UNITS
JOIN unc UC ON UC.CHILDUNITID = UNITS.ID
JOIN shpm SHIP ON SHIP.SERIALNUMBER = UC.SYSSN
JOIN tsern SN ON SN.UNITID = UNITS.ID
JOIN machined MD ON MD.SERIALNUMBER = SN.SERIALNUMBER
WHERE --SYSTEMSERIALNUMBER = '001801055469' and
MPN = 'XC0402A105KP5CNN-S'
GROUP BY MD.MPN,MD.LOTCODE,MD.DATECODE,SHIP.ITEMCODE,SHIP.SERIALNUMBER,SHIP.CUSTOMERNAME,SHIP.SHIPTOADDRESS
使用ROW_NUMBER()
和QUALIFY
:
SELECT MD.MPN, MD.LOTCODE, MD.DATECODE,
SHIP.ITEMCODE AS SYSTEMPARTNUMBER, SHIP.SERIALNUMBER AS SYSTEMSERIALNUMBER,
SHIP.CUSTOMERNAME, SHIP.SHIPTOADDRESS AS ADDRESS,
SUM(COALESCE(SHIP.QUANTITY, 0)) AS QUANTITY,
SHIP.DELIVERYDATE
FROM cunits UNITS JOIN
unc UC
ON UC.CHILDUNITID = UNITS.ID JOIN
shpm SHIP
ON SHIP.SERIALNUMBER = UC.SYSSN JOIN
tsern SN
ON SN.UNITID = UNITS.ID JOIN
machined MD
ON MD.SERIALNUMBER = SN.SERIALNUMBER
WHERE '001801055469' and MPN = 'XC0402A105KP5CNN-S'
GROUP BY MD.MPN, MD.LOTCODE, MD.DATECODE, SHIP.ITEMCODE, SHIP.SERIALNUMBER, SHIP.CUSTOMERNAME, SHIP.SHIPTOADDRESS
QUALIFY ROW_NUMBER() OVER (PARTITION BY MD.MPN, SHIP.SERIALNUMBER ORDER BY SHIP.SHIPDATE DESC) = 1;
每 MPN
行的 returns 这就是我对您的问题的解释。您可能还需要 PARTITION BY
中的其他列。
所以猜测一些数据来匹配 SQL
WITH cunits AS (
SELECT * from values (1) v(id)
), unc AS (
SELECT * FROM VALUES (1,'123') v(CHILDUNITID,SYSSN)
), shpm AS (
SELECT * FROM VALUES ('a', '123', 10, '2020-02-01'),
('a', '123', 20, '2020-01-01')
v(ITEMCODE, SERIALNUMBER, QUANTITY, DELIVERYDATE)
), tsern AS (
SELECT * FROM VALUES (1,'zxc') v(UNITID,SERIALNUMBER)
), machined as (
SELECT * FROM VALUES ('zxc', 'XC0402A105KP5CNN-S') v(SERIALNUMBER, MPN)
)
并从示例中删除一些无关紧要的列
SELECT
MD.MPN,
SHIP.ITEMCODE AS SYSTEMPARTNUMBER,
SHIP.SERIALNUMBER AS SYSTEMSERIALNUMBER,
SUM(IFNULL(SHIP.QUANTITY,0)) AS QUANTITY,
SHIP.DELIVERYDATE
FROM cunits UNITS
JOIN unc UC ON UC.CHILDUNITID = UNITS.ID
JOIN shpm SHIP ON SHIP.SERIALNUMBER = UC.SYSSN
JOIN tsern SN ON SN.UNITID = UNITS.ID
JOIN machined MD ON MD.SERIALNUMBER = SN.SERIALNUMBER
WHERE
MPN = 'XC0402A105KP5CNN-S'
GROUP BY MD.MPN,SHIP.ITEMCODE,SHIP.SERIALNUMBER;
现在必须将 SHIP.DELIVERYDATE
添加到 group by
子句中,否则此代码将永远 运行,甚至忽略您不想看到 2020-01-01
数据的愿望
一旦你添加了你不想要的两行。
MPN SYSTEMPARTNUMBER SYSTEMSERIALNUMBER QUANTITY DELIVERYDATE
XC0402A105KP5CNN-S a 123 10 2020-02-01
XC0402A105KP5CNN-S a 123 20 2020-01-01
Gordon 的解决方案,添加一个 QUALIFY
QUALIFY ROW_NUMBER() OVER (PARTITION BY MD.MPN, SHIP.SERIALNUMBER ORDER BY SHIP.DELIVERYDATE DESC) = 1;
正确地给出了答案,但是计算了所有的结果,然后 p运行ing 那些不想要的结果..取决于你的数据集大小和你的 shpm
table,预过滤器的 CTE 可能会更好..
WITH cunits AS (
SELECT * from values (1) v(id)
), unc AS (
SELECT * FROM VALUES (1,'123') v(CHILDUNITID,SYSSN)
), shpm AS (
SELECT * FROM VALUES ('a', '123', 10, '2020-02-01'),
('a', '123', 20, '2020-01-01')
v(ITEMCODE, SERIALNUMBER, QUANTITY, DELIVERYDATE)
), tsern AS (
SELECT * FROM VALUES (1,'zxc') v(UNITID,SERIALNUMBER)
), machined as (
SELECT * FROM VALUES ('zxc', 'XC0402A105KP5CNN-S') v(SERIALNUMBER, MPN)
), pre_filtered_shpm AS (
select * from shpm
QUALIFY ROW_NUMBER() OVER (PARTITION BY SERIALNUMBER ORDER BY DELIVERYDATE DESC) = 1
)
SELECT
MD.MPN,
SHIP.ITEMCODE AS SYSTEMPARTNUMBER,
SHIP.SERIALNUMBER AS SYSTEMSERIALNUMBER,
SUM(IFNULL(SHIP.QUANTITY,0)) AS QUANTITY,
SHIP.DELIVERYDATE
FROM cunits UNITS
JOIN unc UC ON UC.CHILDUNITID = UNITS.ID
JOIN pre_filtered_shpm SHIP ON SHIP.SERIALNUMBER = UC.SYSSN
JOIN tsern SN ON SN.UNITID = UNITS.ID
JOIN machined MD ON MD.SERIALNUMBER = SN.SERIALNUMBER
WHERE
MPN = 'XC0402A105KP5CNN-S'
GROUP BY MD.MPN,SHIP.ITEMCODE,SHIP.SERIALNUMBER,SHIP.DELIVERYDATE;