Hive 查询按位置映射 3 个数组列
Hive query to map 3 array columns position wise
i/p:
c1 c2 c3
[[1,2,3],[4],[5,6]] ['v1','v2','v3'] [['sam'], ['tam'], ['bam']]
o/p:
c1 c2 c3
[1,2,3] 'v1' ['sam']
[4] 'v2' ['tam']
[5,6] 'v3' ['bam']
有人可以建议我如何为上述问题编写查询吗?
使用explode
:
select explode(c1) as c1 from tab;
如果您的用例更复杂,则与 lateral view
一起使用:
select
c1_exploded,
a,b,c
from
tab t
lateral view explode(t.c1) tf as c1_exploded
;
参考:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF
使用posexplode():
with your_data as (
select array(array(1,2,3),array(4),array(5,6)) c1, array('v1','v2','v3') c2, array(array('sam'), array('tam'), array('bam')) c3
--returns [[1,2,3],[4],[5,6]] ["v1","v2","v3"] [["sam"],["tam"],["bam"]]
)
select a1.c1, a2.c2, a3.c3
from your_data d
lateral view posexplode(d.c1) a1 as p1, c1
lateral view posexplode(d.c2) a2 as p2, c2
lateral view posexplode(d.c3) a3 as p3, c3
where a1.p1=a2.p2 and a1.p1=a3.p3 --match positions in exploded arrays
--without this where condition
--lateral views will produce cartesian product
--alternatively you can explode arrays in subqueries and join them
--using positions, in such way you can do left-join, not only inner
;
结果:
OK
c1 c2 c3
[1,2,3] v1 ["sam"]
[4] v2 ["tam"]
[5,6] v3 ["bam"]
Time taken: 0.078 seconds, Fetched: 3 row(s)
简化版,感谢@GrzegorzSkibinski 的建议:
with your_data as (
select array(array(1,2,3),array(4),array(5,6)) c1, array('v1','v2','v3') c2, array(array('sam'), array('tam'), array('bam')) c3
--returns [[1,2,3],[4],[5,6]] ["v1","v2","v3"] [["sam"],["tam"],["bam"]]
)
select a1.c1, d.c2[a1.p1] as c2, d.c3[a1.p1] as c3
from your_data d
lateral view posexplode(d.c1) a1 as p1, c1
;
i/p:
c1 c2 c3
[[1,2,3],[4],[5,6]] ['v1','v2','v3'] [['sam'], ['tam'], ['bam']]
o/p:
c1 c2 c3
[1,2,3] 'v1' ['sam']
[4] 'v2' ['tam']
[5,6] 'v3' ['bam']
有人可以建议我如何为上述问题编写查询吗?
使用explode
:
select explode(c1) as c1 from tab;
如果您的用例更复杂,则与 lateral view
一起使用:
select
c1_exploded,
a,b,c
from
tab t
lateral view explode(t.c1) tf as c1_exploded
;
参考:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF
使用posexplode():
with your_data as (
select array(array(1,2,3),array(4),array(5,6)) c1, array('v1','v2','v3') c2, array(array('sam'), array('tam'), array('bam')) c3
--returns [[1,2,3],[4],[5,6]] ["v1","v2","v3"] [["sam"],["tam"],["bam"]]
)
select a1.c1, a2.c2, a3.c3
from your_data d
lateral view posexplode(d.c1) a1 as p1, c1
lateral view posexplode(d.c2) a2 as p2, c2
lateral view posexplode(d.c3) a3 as p3, c3
where a1.p1=a2.p2 and a1.p1=a3.p3 --match positions in exploded arrays
--without this where condition
--lateral views will produce cartesian product
--alternatively you can explode arrays in subqueries and join them
--using positions, in such way you can do left-join, not only inner
;
结果:
OK
c1 c2 c3
[1,2,3] v1 ["sam"]
[4] v2 ["tam"]
[5,6] v3 ["bam"]
Time taken: 0.078 seconds, Fetched: 3 row(s)
简化版,感谢@GrzegorzSkibinski 的建议:
with your_data as (
select array(array(1,2,3),array(4),array(5,6)) c1, array('v1','v2','v3') c2, array(array('sam'), array('tam'), array('bam')) c3
--returns [[1,2,3],[4],[5,6]] ["v1","v2","v3"] [["sam"],["tam"],["bam"]]
)
select a1.c1, d.c2[a1.p1] as c2, d.c3[a1.p1] as c3
from your_data d
lateral view posexplode(d.c1) a1 as p1, c1
;