vsql/Vertica:Select 组前 5 行
vsql/Vertica: Select top 5 rows by group
我有这个查询,它应该获取分组数据的前 n 行。我将 RANK()
与 OVER PARTITION BY
结合使用来确定每个组的前 n 行:
SELECT X.USERID, X.ARTID, X.AVGTIMEONPAGE,EDP.ARTDSC,
RANK() OVER (PARTITION BY X.USERID ORDER BY X.AVGTIMEONPAGE DESC) as rank
FROM
(SELECT GANG.userID AS USERID,GANG.avgTimeOnPage AS AVGTIMEONPAGE,
split_part(GANG.pageTitle,' -',1) as ARTID
FROM GoogleAnalytics.navigazioneG AS GANG
WHERE GANG.pagePath LIKE '%DataSheets%' ) AS X
LEFT JOIN ESPDDS.ESP_DPRODUCT AS EDP
ON EDP.ARTID=X.ARTID AND EDP.SCD_IS_CURRENT=1
AND EDP.COMPANYID=1
WHERE X.ARTID NOT LIKE '%Company%' AND rank in (1,2,3,4,5)
它给我一个错误,说排名列不存在。如果我评论 WHERE 子句的最后一部分,我可以看到列排名计算正确。
谢谢
错误的原因是 rank
别名在同一级别不可用。请注意,使用 dense_rank
函数,因为在出现平局时不会跳过数字。
SELECT USERID,ARTID, AVGTIMEONPAGE,ARTDSC,RANK
FROM
(SELECT GANG.userID AS USERID
,GANG.avgTimeOnPage AS AVGTIMEONPAGE
,split_part(GANG.pageTitle,' -',1) as ARTID
,RANK() OVER (PARTITION BY X.USERID ORDER BY X.AVGTIMEONPAGE DESC) as rank
FROM GoogleAnalytics.navigazioneG AS GANG
LEFT JOIN ESPDDS.ESP_DPRODUCT AS EDP ON EDP.ARTID=X.ARTID AND EDP.SCD_IS_CURRENT=1
AND EDP.COMPANYID=1
WHERE GANG.pagePath LIKE '%DataSheets%'
) T
WHERE ARTID NOT LIKE '%Company%' AND rank <= 5
WHERE
子句在 SELECT
子句之前被求值。所以当时rank
是未知的。您可以使用进一步的子查询来访问它:
SELECT *
FROM
(
SELECT
X.USERID,
X.ARTID,
X.AVGTIMEONPAGE,
EDP.ARTDSC,
RANK() OVER (PARTITION BY X.USERID ORDER BY X.AVGTIMEONPAGE DESC) as rank
FROM
(
SELECT
GANG.userID AS USERID,
GANG.avgTimeOnPage AS AVGTIMEONPAGE,
split_part(GANG.pageTitle,' -',1) as ARTID
FROM GoogleAnalytics.navigazioneG AS GANG
WHERE GANG.pagePath LIKE '%DataSheets%'
) AS X
LEFT JOIN ESPDDS.ESP_DPRODUCT AS EDP ON EDP.ARTID = X.ARTID
AND EDP.SCD_IS_CURRENT = 1
AND EDP.COMPANYID = 1
WHERE X.ARTID NOT LIKE '%Company%'
) ranked
WHERE rank in (1,2,3,4,5);
我有这个查询,它应该获取分组数据的前 n 行。我将 RANK()
与 OVER PARTITION BY
结合使用来确定每个组的前 n 行:
SELECT X.USERID, X.ARTID, X.AVGTIMEONPAGE,EDP.ARTDSC,
RANK() OVER (PARTITION BY X.USERID ORDER BY X.AVGTIMEONPAGE DESC) as rank
FROM
(SELECT GANG.userID AS USERID,GANG.avgTimeOnPage AS AVGTIMEONPAGE,
split_part(GANG.pageTitle,' -',1) as ARTID
FROM GoogleAnalytics.navigazioneG AS GANG
WHERE GANG.pagePath LIKE '%DataSheets%' ) AS X
LEFT JOIN ESPDDS.ESP_DPRODUCT AS EDP
ON EDP.ARTID=X.ARTID AND EDP.SCD_IS_CURRENT=1
AND EDP.COMPANYID=1
WHERE X.ARTID NOT LIKE '%Company%' AND rank in (1,2,3,4,5)
它给我一个错误,说排名列不存在。如果我评论 WHERE 子句的最后一部分,我可以看到列排名计算正确。
谢谢
错误的原因是 rank
别名在同一级别不可用。请注意,使用 dense_rank
函数,因为在出现平局时不会跳过数字。
SELECT USERID,ARTID, AVGTIMEONPAGE,ARTDSC,RANK
FROM
(SELECT GANG.userID AS USERID
,GANG.avgTimeOnPage AS AVGTIMEONPAGE
,split_part(GANG.pageTitle,' -',1) as ARTID
,RANK() OVER (PARTITION BY X.USERID ORDER BY X.AVGTIMEONPAGE DESC) as rank
FROM GoogleAnalytics.navigazioneG AS GANG
LEFT JOIN ESPDDS.ESP_DPRODUCT AS EDP ON EDP.ARTID=X.ARTID AND EDP.SCD_IS_CURRENT=1
AND EDP.COMPANYID=1
WHERE GANG.pagePath LIKE '%DataSheets%'
) T
WHERE ARTID NOT LIKE '%Company%' AND rank <= 5
WHERE
子句在 SELECT
子句之前被求值。所以当时rank
是未知的。您可以使用进一步的子查询来访问它:
SELECT *
FROM
(
SELECT
X.USERID,
X.ARTID,
X.AVGTIMEONPAGE,
EDP.ARTDSC,
RANK() OVER (PARTITION BY X.USERID ORDER BY X.AVGTIMEONPAGE DESC) as rank
FROM
(
SELECT
GANG.userID AS USERID,
GANG.avgTimeOnPage AS AVGTIMEONPAGE,
split_part(GANG.pageTitle,' -',1) as ARTID
FROM GoogleAnalytics.navigazioneG AS GANG
WHERE GANG.pagePath LIKE '%DataSheets%'
) AS X
LEFT JOIN ESPDDS.ESP_DPRODUCT AS EDP ON EDP.ARTID = X.ARTID
AND EDP.SCD_IS_CURRENT = 1
AND EDP.COMPANYID = 1
WHERE X.ARTID NOT LIKE '%Company%'
) ranked
WHERE rank in (1,2,3,4,5);