使用 PARTITION BY (HIVE) 时如何过滤掉组中的重复元素
How to filter out duplicate elements in a group when using PARTITION BY (HIVE)
假设我有以下 table(动物):
**Color** **Species** **Weight**
White Dog 20
White Dog 8
White Dog 33
Black Dog 55
Brown Dog 80
White Cat 10
Black Cat 14
White Cat 9
我想按物种分组,过滤每个物种内的独特颜色,并为每个过滤组找到两种颜色最浅的动物。
结果 table 应如下所示:
**Color** **Species** **Weight**
White Dog 8
Black Dog 55
White Cat 9
Black Cat 14
我正在使用以下查询(我知道这是不正确的):
SELECT color, species, weight
FROM (
SELECT species, color, weight, rank()
over (PARTITION BY species ORDER BY weight ASC) as rank
FROM animals) ranked_animals
WHERE ranked_animals.rank <= 2;
我不知道如何在上面的代码中过滤出独特的颜色。有什么建议吗?
谢谢!
样本TABLE
CREATE TABLE #TEMP([COLOR] VARCHAR(20),Species VARCHAR(20), [Weight] INT)
INSERT INTO #TEMP
SELECT 'White' , 'Dog', 20
UNION ALL
SELECT 'White', 'Dog', 8
UNION ALL
SELECT 'White', 'Dog', 33
UNION ALL
SELECT 'Black' , 'Dog', 55
UNION ALL
SELECT 'Brown' , 'Dog', 80
UNION ALL
SELECT 'White', 'Cat', 10
UNION ALL
SELECT 'Black', 'Cat', 14
UNION ALL
SELECT 'White' , 'Cat' , 9
查询
;WITH CTE AS
(
-- First partition with [COLOR],Species and generate ROW_NUMBER
SELECT DISTINCT [COLOR],Species,[Weight],
ROW_NUMBER() OVER (PARTITION BY [COLOR],Species ORDER BY [Weight] ) RNO
FROM #TEMP
)
,CTE2 AS
(
-- Next partition with Species only and generate ROW_NUMBER
SELECT *,ROW_NUMBER() OVER (PARTITION BY Species ORDER BY [Weight] ) RNO2
FROM CTE
WHERE RNO = 1
)
-- Now take new ROW_NUMBER() ie, RNO2 <= 2
SELECT [COLOR],Species,[Weight]
FROM CTE2
WHERE RNO2< = 2
ORDER BY Species DESC,[COLOR] DESC
- Click here 查看结果
假设我有以下 table(动物):
**Color** **Species** **Weight**
White Dog 20
White Dog 8
White Dog 33
Black Dog 55
Brown Dog 80
White Cat 10
Black Cat 14
White Cat 9
我想按物种分组,过滤每个物种内的独特颜色,并为每个过滤组找到两种颜色最浅的动物。
结果 table 应如下所示:
**Color** **Species** **Weight**
White Dog 8
Black Dog 55
White Cat 9
Black Cat 14
我正在使用以下查询(我知道这是不正确的):
SELECT color, species, weight
FROM (
SELECT species, color, weight, rank()
over (PARTITION BY species ORDER BY weight ASC) as rank
FROM animals) ranked_animals
WHERE ranked_animals.rank <= 2;
我不知道如何在上面的代码中过滤出独特的颜色。有什么建议吗?
谢谢!
样本TABLE
CREATE TABLE #TEMP([COLOR] VARCHAR(20),Species VARCHAR(20), [Weight] INT)
INSERT INTO #TEMP
SELECT 'White' , 'Dog', 20
UNION ALL
SELECT 'White', 'Dog', 8
UNION ALL
SELECT 'White', 'Dog', 33
UNION ALL
SELECT 'Black' , 'Dog', 55
UNION ALL
SELECT 'Brown' , 'Dog', 80
UNION ALL
SELECT 'White', 'Cat', 10
UNION ALL
SELECT 'Black', 'Cat', 14
UNION ALL
SELECT 'White' , 'Cat' , 9
查询
;WITH CTE AS
(
-- First partition with [COLOR],Species and generate ROW_NUMBER
SELECT DISTINCT [COLOR],Species,[Weight],
ROW_NUMBER() OVER (PARTITION BY [COLOR],Species ORDER BY [Weight] ) RNO
FROM #TEMP
)
,CTE2 AS
(
-- Next partition with Species only and generate ROW_NUMBER
SELECT *,ROW_NUMBER() OVER (PARTITION BY Species ORDER BY [Weight] ) RNO2
FROM CTE
WHERE RNO = 1
)
-- Now take new ROW_NUMBER() ie, RNO2 <= 2
SELECT [COLOR],Species,[Weight]
FROM CTE2
WHERE RNO2< = 2
ORDER BY Species DESC,[COLOR] DESC
- Click here 查看结果