使用 NOT IN 子句替代 Hive 查询
Alternative to Hive Query with NOT IN clause
我有以下一组配置单元表:
create table image_additions (
customer_id STRING,
image_key STRING,
image_size STRING
);
create table image_removals (
customer_id STRING,
image_key STRING,
image_size STRING
);
create table images_stored (
customer_id STRING,
image_key STRING,
image_size STRING
);
我想 运行 从如下查询中插入:
insert into images_stored
select ia.customer_id,
ia.image_key,
ia.image_size
from image_additions ia
where ia.image_key not in
(select ir.image_key from image_removals ir);
这会产生笛卡尔积,而 Hive 不允许我 运行 它。
我该怎么做,使用替代查询?
使用左连接 + where is null;
insert into images_stored
select ia.customer_id,
ia.image_key,
ia.image_size
from image_additions ia
left join image_removals ir on ia.image_key=ir.image_key
where ir.image_key is null;
使用不存在:
insert into images_stored
select ia.customer_id,
ia.image_key,
ia.image_size
from image_additions ia
where not exists (select 1 from image_removals ir where ia.image_key=ir.image_key);
我有以下一组配置单元表:
create table image_additions (
customer_id STRING,
image_key STRING,
image_size STRING
);
create table image_removals (
customer_id STRING,
image_key STRING,
image_size STRING
);
create table images_stored (
customer_id STRING,
image_key STRING,
image_size STRING
);
我想 运行 从如下查询中插入:
insert into images_stored
select ia.customer_id,
ia.image_key,
ia.image_size
from image_additions ia
where ia.image_key not in
(select ir.image_key from image_removals ir);
这会产生笛卡尔积,而 Hive 不允许我 运行 它。 我该怎么做,使用替代查询?
使用左连接 + where is null;
insert into images_stored
select ia.customer_id,
ia.image_key,
ia.image_size
from image_additions ia
left join image_removals ir on ia.image_key=ir.image_key
where ir.image_key is null;
使用不存在:
insert into images_stored
select ia.customer_id,
ia.image_key,
ia.image_size
from image_additions ia
where not exists (select 1 from image_removals ir where ia.image_key=ir.image_key);