有没有Hive数组比较functions/udf的
Are there any Hive array comparison functions/udf's
hive 是否有任何数组比较 functions/udf 以查看是否 array=array
?
例如:
Select a.xyz, b.abc from a left join b on a.C=b.D
其中 C
和 D
是数组
你可以通过
比较两个数组的值
array_diff($array1,$array2);
它 return 一个数组,其中包含 array1 中不存在于 array2 或 array3 等中的条目
您可以使用 hive-third-functions。它提供了一些有用的 json、数组和映射函数。对于这个问题,你可以这样使用:
Select a.xyz, b.abc from a left join b on array_equals(a.C,b.D)
数组是有序结构,
concat_ws( <separator> , <array> )
此函数将使用分隔符将所有数组元素连接成一个字符串。
create table arrayDemo( id bigint, list array<String> );
create table dummy( a int );
insert into table dummy values ( 1 ) ;
insert into arraydemo select 1, array("Paperino", "Topolino") from dummy;
insert into arraydemo select 2, array("Pippo", "Pluto") from dummy;
insert into arraydemo select 1, array("Pippo", "Pluto") from dummy;
select * from arraydemo;
+---------------+--------------------------+--+
| arraydemo.id | arraydemo.list |
+---------------+--------------------------+--+
| 1 | ["Paperino","Topolino"] |
| 2 | ["Pippo","Pluto"] |
| 1 | ["Pippo","Pluto"] |
+---------------+--------------------------+--+
select *
from arraydemo as a1
inner join arraydemo as a2
on concat_ws( "|", a1.list ) = concat_ws("|", a2.list );
+--------+--------------------------+--------+--------------------------+--+
| a1.id | a1.list | a2.id | a2.list |
+--------+--------------------------+--------+--------------------------+--+
| 1 | ["Paperino","Topolino"] | 1 | ["Paperino","Topolino"] |
| 2 | ["Pippo","Pluto"] | 2 | ["Pippo","Pluto"] |
| 1 | ["Pippo","Pluto"] | 2 | ["Pippo","Pluto"] |
| 2 | ["Pippo","Pluto"] | 1 | ["Pippo","Pluto"] |
| 1 | ["Pippo","Pluto"] | 1 | ["Pippo","Pluto"] |
+--------+--------------------------+--------+--------------------------+--+
我在此示例中以 "toString()" 方式使用它。有时最好的方法是只比较数组的重要部分
select *
from arraydemo as a1
inner join arraydemo as a2
on a1.list[0] = a2.list[0];
希望对您有所帮助。
hive 是否有任何数组比较 functions/udf 以查看是否 array=array
?
例如:
Select a.xyz, b.abc from a left join b on a.C=b.D
其中 C
和 D
是数组
你可以通过
比较两个数组的值array_diff($array1,$array2);
它 return 一个数组,其中包含 array1 中不存在于 array2 或 array3 等中的条目
您可以使用 hive-third-functions。它提供了一些有用的 json、数组和映射函数。对于这个问题,你可以这样使用:
Select a.xyz, b.abc from a left join b on array_equals(a.C,b.D)
数组是有序结构,
concat_ws( <separator> , <array> )
此函数将使用分隔符将所有数组元素连接成一个字符串。
create table arrayDemo( id bigint, list array<String> );
create table dummy( a int );
insert into table dummy values ( 1 ) ;
insert into arraydemo select 1, array("Paperino", "Topolino") from dummy;
insert into arraydemo select 2, array("Pippo", "Pluto") from dummy;
insert into arraydemo select 1, array("Pippo", "Pluto") from dummy;
select * from arraydemo;
+---------------+--------------------------+--+
| arraydemo.id | arraydemo.list |
+---------------+--------------------------+--+
| 1 | ["Paperino","Topolino"] |
| 2 | ["Pippo","Pluto"] |
| 1 | ["Pippo","Pluto"] |
+---------------+--------------------------+--+
select *
from arraydemo as a1
inner join arraydemo as a2
on concat_ws( "|", a1.list ) = concat_ws("|", a2.list );
+--------+--------------------------+--------+--------------------------+--+
| a1.id | a1.list | a2.id | a2.list |
+--------+--------------------------+--------+--------------------------+--+
| 1 | ["Paperino","Topolino"] | 1 | ["Paperino","Topolino"] |
| 2 | ["Pippo","Pluto"] | 2 | ["Pippo","Pluto"] |
| 1 | ["Pippo","Pluto"] | 2 | ["Pippo","Pluto"] |
| 2 | ["Pippo","Pluto"] | 1 | ["Pippo","Pluto"] |
| 1 | ["Pippo","Pluto"] | 1 | ["Pippo","Pluto"] |
+--------+--------------------------+--------+--------------------------+--+
我在此示例中以 "toString()" 方式使用它。有时最好的方法是只比较数组的重要部分
select *
from arraydemo as a1
inner join arraydemo as a2
on a1.list[0] = a2.list[0];
希望对您有所帮助。