从 table 生成计数后需要在 Hive 中生成百分比列
Need to generate percentage column in Hive after generating Count from the table
我创建了以下 HIVE
table,这有助于确定源蜂巢 table 中个体的数量和类型。现在我需要从中创建进一步的百分比突破:
CREATE TABLE hh_member_type AS
SELECT count(*) AS count ,
CASE WHEN (LEAD_HOUSEHLD_STATUS = 'D') THEN 'Deceased'
WHEN (LEAD_HOUSEHLD_STATUS = 'H') THEN 'Head'
WHEN (LEAD_HOUSEHLD_STATUS = 'P') THEN 'Aged Parent At Home'
WHEN (LEAD_HOUSEHLD_STATUS = 'U') THEN 'Unknown'
WHEN (LEAD_HOUSEHLD_STATUS = 'W') THEN 'Spouse'
WHEN (LEAD_HOUSEHLD_STATUS = 'Y') THEN 'Young Adult' ELSE 'NULL'
END AS HH_STATUS
FROM (
SELECT LEAD_HOUSEHLD_STATUS FROM customer
UNION ALL SELECT MEM1_HH_STATUS FROM customer
UNION ALL SELECT MEM2_HH_STATUS FROM customer
UNION ALL SELECT MEM3_HH_STATUS FROM customer
UNION ALL SELECT MEM4_HH_STATUS FROM customer
UNION ALL SELECT MEM5_HH_STATUS FROM customer
UNION ALL SELECT MEM6_HH_STATUS FROM customer
UNION ALL SELECT MEM7_HH_STATUS FROM customer
UNION ALL SELECT MEM8_HH_STATUS FROM customer)A
WHERE LEAD_HOUSEHLD_STATUS IN ('H','D','P','U','W','Y')
GROUP BY LEAD_HOUSEHLD_STATUS ;
以上代码的输出是这样的:
hive> select * from hh_member_type ;
OK
277 Deceased
20636 Head
3547 Aged Parent At Home
18926 Unknown
16184 Spouse
7082 Young Adult
Time taken: 0.195 seconds, Fetched: 6 row(s)
我尝试使用以下代码创建百分比突破,但没有获得所需的输出。任何建议都会很棒。
CREATE TABLE hh_member_percent AS
SELECT a.hh_status,round(a.count/sum(a.count) * 100 ,2)
from hh_member_type a
group by a.count,a.hh_status ;
使用分析 sum():
SELECT a.hh_status,round(a.count/sum(a.count) over() * 100 ,2)
from hh_member_type a
;
我创建了以下 HIVE
table,这有助于确定源蜂巢 table 中个体的数量和类型。现在我需要从中创建进一步的百分比突破:
CREATE TABLE hh_member_type AS
SELECT count(*) AS count ,
CASE WHEN (LEAD_HOUSEHLD_STATUS = 'D') THEN 'Deceased'
WHEN (LEAD_HOUSEHLD_STATUS = 'H') THEN 'Head'
WHEN (LEAD_HOUSEHLD_STATUS = 'P') THEN 'Aged Parent At Home'
WHEN (LEAD_HOUSEHLD_STATUS = 'U') THEN 'Unknown'
WHEN (LEAD_HOUSEHLD_STATUS = 'W') THEN 'Spouse'
WHEN (LEAD_HOUSEHLD_STATUS = 'Y') THEN 'Young Adult' ELSE 'NULL'
END AS HH_STATUS
FROM (
SELECT LEAD_HOUSEHLD_STATUS FROM customer
UNION ALL SELECT MEM1_HH_STATUS FROM customer
UNION ALL SELECT MEM2_HH_STATUS FROM customer
UNION ALL SELECT MEM3_HH_STATUS FROM customer
UNION ALL SELECT MEM4_HH_STATUS FROM customer
UNION ALL SELECT MEM5_HH_STATUS FROM customer
UNION ALL SELECT MEM6_HH_STATUS FROM customer
UNION ALL SELECT MEM7_HH_STATUS FROM customer
UNION ALL SELECT MEM8_HH_STATUS FROM customer)A
WHERE LEAD_HOUSEHLD_STATUS IN ('H','D','P','U','W','Y')
GROUP BY LEAD_HOUSEHLD_STATUS ;
以上代码的输出是这样的:
hive> select * from hh_member_type ;
OK
277 Deceased
20636 Head
3547 Aged Parent At Home
18926 Unknown
16184 Spouse
7082 Young Adult
Time taken: 0.195 seconds, Fetched: 6 row(s)
我尝试使用以下代码创建百分比突破,但没有获得所需的输出。任何建议都会很棒。
CREATE TABLE hh_member_percent AS
SELECT a.hh_status,round(a.count/sum(a.count) * 100 ,2)
from hh_member_type a
group by a.count,a.hh_status ;
使用分析 sum():
SELECT a.hh_status,round(a.count/sum(a.count) over() * 100 ,2)
from hh_member_type a
;