在带有附加列的新外部 table 中插入 2 个 Hive 外部 table 的数据
Insert data of 2 Hive external tables in new External table with additional column
我有 2 个外部配置单元 table,如下所示。我使用 sqoop 从 oracle 中填充了数据。
create external table transaction_usa
(
tran_id int,
acct_id int,
tran_date string,
amount double,
description string,
branch_code string,
tran_state string,
tran_city string,
speendby string,
tran_zip int
)
row format delimited
stored as textfile
location '/user/stg/bank_stg/tran_usa';
create external table transaction_canada
(
tran_id int,
acct_id int,
tran_date string,
amount double,
description string,
branch_code string,
tran_state string,
tran_city string,
speendby string,
tran_zip int
)
row format delimited
stored as textfile
location '/user/stg/bank_stg/tran_canada';
现在我想合并上面 2 tables 数据,因为它在 1 个外部配置单元 table 中,所有字段都与上面 2 tables 相同,但多了 1 个列来标识哪个数据来自哪个 table。新的外部 table,附加列为 source_table
。新的外部table如下
create external table transaction_usa_canada
(
tran_id int,
acct_id int,
tran_date string,
amount double,
description string,
branch_code string,
tran_state string,
tran_city string,
speendby string,
tran_zip int,
source_table string
)
row format delimited
stored as textfile
location '/user/gds/bank_ds/tran_usa_canada';
我该怎么做?
你也可以通过 manual partitioning
完成。
CREATE TABLE transaction_new_table (
tran_id int,
acct_id int,
tran_date string,
amount double,
description string,
branch_code string,
tran_state string,
tran_city string,
speendby string,
tran_zip int
)
PARTITIONED BY (sourcetablename String)
然后运行下面的命令,
load data inpath 'hdfspath' into table transaction_new_table partition(sourcetablename='1')
你可以使用 Hive 的 INSERT INTO 子句
INSERT INTO TABLE table transaction_usa_canada
SELECT tran_id, acct_id, tran_date, ...'transaction_usa' FROM transaction_usa;
INSERT INTO TABLE table transaction_usa_canada
SELECT tran_id, acct_id, tran_date, ...'transaction_canada' FROM transaction_canada;
您对每个 table 执行 SELECT
并对这些结果执行 UNION ALL
操作,最后将结果插入第三个 table。
下面是最终的配置单元查询:
INSERT INTO TABLE transaction_usa_canada
SELECT tran_id, acct_id, tran_date, amount, description, branch_code, tran_state, tran_city, speendby, tran_zip, 'transaction_usa' AS source_table FROM transaction_usa
UNION ALL
SELECT tran_id, acct_id, tran_date, amount, description, branch_code, tran_state, tran_city, speendby, tran_zip, 'transaction_canada' AS source_table FROM transaction_canada;
希望对你有帮助!!!
我有 2 个外部配置单元 table,如下所示。我使用 sqoop 从 oracle 中填充了数据。
create external table transaction_usa
(
tran_id int,
acct_id int,
tran_date string,
amount double,
description string,
branch_code string,
tran_state string,
tran_city string,
speendby string,
tran_zip int
)
row format delimited
stored as textfile
location '/user/stg/bank_stg/tran_usa';
create external table transaction_canada
(
tran_id int,
acct_id int,
tran_date string,
amount double,
description string,
branch_code string,
tran_state string,
tran_city string,
speendby string,
tran_zip int
)
row format delimited
stored as textfile
location '/user/stg/bank_stg/tran_canada';
现在我想合并上面 2 tables 数据,因为它在 1 个外部配置单元 table 中,所有字段都与上面 2 tables 相同,但多了 1 个列来标识哪个数据来自哪个 table。新的外部 table,附加列为 source_table
。新的外部table如下
create external table transaction_usa_canada
(
tran_id int,
acct_id int,
tran_date string,
amount double,
description string,
branch_code string,
tran_state string,
tran_city string,
speendby string,
tran_zip int,
source_table string
)
row format delimited
stored as textfile
location '/user/gds/bank_ds/tran_usa_canada';
我该怎么做?
你也可以通过 manual partitioning
完成。
CREATE TABLE transaction_new_table (
tran_id int,
acct_id int,
tran_date string,
amount double,
description string,
branch_code string,
tran_state string,
tran_city string,
speendby string,
tran_zip int
)
PARTITIONED BY (sourcetablename String)
然后运行下面的命令,
load data inpath 'hdfspath' into table transaction_new_table partition(sourcetablename='1')
你可以使用 Hive 的 INSERT INTO 子句
INSERT INTO TABLE table transaction_usa_canada
SELECT tran_id, acct_id, tran_date, ...'transaction_usa' FROM transaction_usa;
INSERT INTO TABLE table transaction_usa_canada
SELECT tran_id, acct_id, tran_date, ...'transaction_canada' FROM transaction_canada;
您对每个 table 执行 SELECT
并对这些结果执行 UNION ALL
操作,最后将结果插入第三个 table。
下面是最终的配置单元查询:
INSERT INTO TABLE transaction_usa_canada
SELECT tran_id, acct_id, tran_date, amount, description, branch_code, tran_state, tran_city, speendby, tran_zip, 'transaction_usa' AS source_table FROM transaction_usa
UNION ALL
SELECT tran_id, acct_id, tran_date, amount, description, branch_code, tran_state, tran_city, speendby, tran_zip, 'transaction_canada' AS source_table FROM transaction_canada;
希望对你有帮助!!!