如何在 groupby 中连接行但将所有变量保留在 Hive 中
How to concat rows in a groupby but keep all variables in Hive
假设我有以下最小的、可重现的示例(我正在使用 Hive):
CREATE TABLE have (id INT, sub_id INT, name STRING, sub_name STRING, value STRING, A STRING, B STRING, C STRING);
INSERT INTO have VALUES
(1,111111,'ITEM1','Score','AAA','concat1_1','concat2_1','concat3_1'),
(1,111111,'ITEM1','Score','AAA','concat1_2','concat2_2','concat3_2'),
(1,111111,'ITEM1','Weight','2+','conc1_1','conc2_1','conc3_1'),
(1,111111,'ITEM1','Weight','2+','conc1_2','conc2_2','conc3_2'),
(1,222222,'ITEM1','Score','BBB','c1_1','c2_1','c3_1'),
(1,222222,'ITEM1','Score','BBB','c1_2','c2_2','c3_2')
;
我想将具有相同 sub_id
和 sub_name
的行的 a
、b
和 c
连接起来
使用 concat_ws
和 group by
我可以跨行连接:
create table want as
select sub_id, sub_name,
concat_ws("|", collect_set(a)) as a,
concat_ws("|", collect_set(b)) as b,
concat_ws("|", collect_set(c)) as c
from have
group by sub_id, sub_name;
但是我怎样才能同时检索其他列?
当我尝试时
create table want as
select id, sub_id, name, sub_name, value,
concat_ws("|", collect_set(a)) as a,
concat_ws("|", collect_set(b)) as b,
concat_ws("|", collect_set(c)) as c
from have
group by sub_id, sub_name;
我收到以下错误:
Error while compiling statement: FAILED: SemanticException [Error 10025]: line 17:7 Expression not in GROUP BY key 'id'
我想要的输出是:
+----+--------+-------+---------+-------+---------------------+---------------------+---------------------+--+--+
| id | sub_id | name | subname | value | a | b | c | | |
+----+--------+-------+---------+-------+---------------------+---------------------+---------------------+--+--+
| 1 | 111111 | ITEM1 | Score | AAA | concat1_1|concat1_2 | concat2_1|concat2_2 | concat3_1|concat3_2 | | |
| 1 | 111111 | ITEM1 | Weight | 2+ | conc1_1|conc1_2 | conc2_1|conc2_2 | conc3_1|conc3_2 | | |
| 1 | 222222 | ITEM1 | Score | BBB | c1_1|c1_2 | c2_1|c2_2 | c3_1|c3_2 | | |
+----+--------+-------+---------+-------+---------------------+---------------------+---------------------+--+--+
因为使用聚合函数需要在group by
中按列添加non-group
select id, sub_id, name, sub_name, value,
concat_ws("|", collect_set(a)) as a,
concat_ws("|", collect_set(b)) as b,
concat_ws("|", collect_set(c)) as c
from have
group by id, sub_id, name, sub_name, value;
假设我有以下最小的、可重现的示例(我正在使用 Hive):
CREATE TABLE have (id INT, sub_id INT, name STRING, sub_name STRING, value STRING, A STRING, B STRING, C STRING);
INSERT INTO have VALUES
(1,111111,'ITEM1','Score','AAA','concat1_1','concat2_1','concat3_1'),
(1,111111,'ITEM1','Score','AAA','concat1_2','concat2_2','concat3_2'),
(1,111111,'ITEM1','Weight','2+','conc1_1','conc2_1','conc3_1'),
(1,111111,'ITEM1','Weight','2+','conc1_2','conc2_2','conc3_2'),
(1,222222,'ITEM1','Score','BBB','c1_1','c2_1','c3_1'),
(1,222222,'ITEM1','Score','BBB','c1_2','c2_2','c3_2')
;
我想将具有相同 sub_id
和 sub_name
a
、b
和 c
连接起来
使用 concat_ws
和 group by
我可以跨行连接:
create table want as
select sub_id, sub_name,
concat_ws("|", collect_set(a)) as a,
concat_ws("|", collect_set(b)) as b,
concat_ws("|", collect_set(c)) as c
from have
group by sub_id, sub_name;
但是我怎样才能同时检索其他列?
当我尝试时
create table want as
select id, sub_id, name, sub_name, value,
concat_ws("|", collect_set(a)) as a,
concat_ws("|", collect_set(b)) as b,
concat_ws("|", collect_set(c)) as c
from have
group by sub_id, sub_name;
我收到以下错误:
Error while compiling statement: FAILED: SemanticException [Error 10025]: line 17:7 Expression not in GROUP BY key 'id'
我想要的输出是:
+----+--------+-------+---------+-------+---------------------+---------------------+---------------------+--+--+
| id | sub_id | name | subname | value | a | b | c | | |
+----+--------+-------+---------+-------+---------------------+---------------------+---------------------+--+--+
| 1 | 111111 | ITEM1 | Score | AAA | concat1_1|concat1_2 | concat2_1|concat2_2 | concat3_1|concat3_2 | | |
| 1 | 111111 | ITEM1 | Weight | 2+ | conc1_1|conc1_2 | conc2_1|conc2_2 | conc3_1|conc3_2 | | |
| 1 | 222222 | ITEM1 | Score | BBB | c1_1|c1_2 | c2_1|c2_2 | c3_1|c3_2 | | |
+----+--------+-------+---------+-------+---------------------+---------------------+---------------------+--+--+
因为使用聚合函数需要在group by
select id, sub_id, name, sub_name, value,
concat_ws("|", collect_set(a)) as a,
concat_ws("|", collect_set(b)) as b,
concat_ws("|", collect_set(c)) as c
from have
group by id, sub_id, name, sub_name, value;