ESRI Hive ST_Contains 无法正常工作
ESRI Hive ST_Contains does not work properly
尝试使用我能找到的 JAR(不确定它们是否是最佳选择,我需要使用 ESRI 并在 Hive 中执行):
ADD JAR /home/user/lib/esri-geometry-api-1.2.1.jar;
ADD JAR /home/user/lib/spatial-sdk-hive-1.1.1-SNAPSHOT.jar;
ADD JAR /home/user/lib/esri-geometry-api.jar;
ADD JAR /home/user/lib/spatial-sdk-hadoop.jar;
CREATE TEMPORARY FUNCTION ST_Polygon AS 'com.esri.hadoop.hive.ST_Polygon';
CREATE TEMPORARY FUNCTION ST_Point AS 'com.esri.hadoop.hive.ST_Point';
CREATE TEMPORARY FUNCTION ST_Contains AS 'com.esri.hadoop.hive.ST_Contains';
CREATE TEMPORARY FUNCTION ST_Geometry AS 'com.esri.hadoop.hive.ST_Geometry';
运行 以下查询:
SELECT
IF(1=1, 40.7484445, 0) AS latitude,
IF(1=1,-73.9878531, 0) AS longitude
FROM any_table
WHERE
NOT ST_Contains(
ST_POLYGON('POLYGON((170.0 20.0, -170.0 73.0, -50.0 20.0, -50.0 73.0))'),
ST_Point(CAST(longitude AS DOUBLE), CAST(latitude AS DOUBLE)))
LIMIT 1;
其中多边形 'POLYGON((170.0 20.0, -170.0 73.0, -50.0 20.0, -50.0 73.0))'
大致是美国框,给定坐标 40.7484445,-73.9878531
属于纽约。 WHERE NOT 结果应该是空的,但它仍然是 returns 这些坐标。它没有按预期进行过滤。
我做错了什么?
只应加载几何体的一个版本 API。同样,只有一个 spatial-sdk-hadoop 或一对 spatial-sdk-json 和 spatial-sdk-hive .
WKT 多边形以重复起始顶点的结束顶点闭合。
多边形需要按周长的顺序指定顶点,而不是锯齿形顺序。
几何 API 是平面的,不支持环绕国际日期变更线。
可能是 -170 而不是 +170 纬度。
wget https://github.com/Esri/spatial-framework-for-hadoop/releases/download/v1.1/spatial-sdk-hive-1.1.jar\
https://github.com/Esri/spatial-framework-for-hadoop/releases/download/v1.1/spatial-sdk-json-1.1.jar\
https://github.com/Esri/geometry-api-java/releases/download/v1.2.1/esri-geometry-api-1.2.1.jar
蜂巢-S
添加 jar /pathto/esri-geometry-api-1.2.1.jar
/pathto/spatial-sdk-json-1.1.jar
/pathto/spatial-sdk-hive-1.1.jar ;
创建临时函数 ST_AsBinary 为 'com.esri.hadoop.hive.ST_AsBinary';
-- ...
select ST_Contains(ST_Polygon(1, 1, 1, 4, 4, 4, 4, 1), ST_Point(2, 3)) ;
真
select ST_Contains(ST_Polygon('POLYGON((1 1, 1 4, 4 4, 4 1, 1 1))'), ST_Point(2, 3));
真
select ST_Contains(ST_POLYGON('POLYGON((-170.0 20.0, -170.0 73.0, -50.0 20.0, -50.0 73.0, -170.0 20.0))'), ST_Point(-73.9878531, 40.7484445));
真
select 不是 ST_Contains(ST_POLYGON('POLYGON((-170.0 20.0, -170.0 73.0, -50.0 20.0, -50.0 73.0, -170.0 20.0))'), ST_Point(-73.9878531, 40.7484445));
假
add jar /home/..../esri-geometry-api-1.2.1.jar;
add jar /home/..../spatial-sdk-json-1.2.0.jar;
add jar /home/..../spatial-sdk-hive-1.2.0.jar;
add jar /home/..../spatial-sdk-hadoop.jar;
create temporary function ST_AsBinary as 'com.esri.hadoop.hive.ST_AsBinary';
CREATE TEMPORARY FUNCTION ST_Polygon AS 'com.esri.hadoop.hive.ST_Polygon';
CREATE TEMPORARY FUNCTION ST_Point AS 'com.esri.hadoop.hive.ST_Point';
CREATE TEMPORARY FUNCTION ST_Contains AS 'com.esri.hadoop.hive.ST_Contains';
CREATE TEMPORARY FUNCTION ST_Geometry AS 'com.esri.hadoop.hive.ST_Geometry';
A) load table from geojson data to hive:
CREATE TABLE default.lim_xxx_pais
(
NOM_PLAN string,
NMO_PLAN string,
APROXIMADO string,
ID1 string,
BoundaryShape binary
)
ROW FORMAT SERDE 'com.esri.hadoop.hive.serde.GeoJsonSerDe'
STORED AS INPUTFORMAT 'com.esri.json.hadoop.EnclosedGeoJsonInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
B)
LOAD DATA INPATH '/user/.../lim_xxx_pais.geojson' OVERWRITE INTO TABLE lim_xxx_pais
;
C)
select NOM_PLAN, NMO_PLAN,APROXIMADO,ID1 from default.lim_centrales_pais aa
where ST_Contains(aa.boundaryshape, ST_POINT(-72.08726603,-36.62627804) )
;
尝试使用我能找到的 JAR(不确定它们是否是最佳选择,我需要使用 ESRI 并在 Hive 中执行):
ADD JAR /home/user/lib/esri-geometry-api-1.2.1.jar;
ADD JAR /home/user/lib/spatial-sdk-hive-1.1.1-SNAPSHOT.jar;
ADD JAR /home/user/lib/esri-geometry-api.jar;
ADD JAR /home/user/lib/spatial-sdk-hadoop.jar;
CREATE TEMPORARY FUNCTION ST_Polygon AS 'com.esri.hadoop.hive.ST_Polygon';
CREATE TEMPORARY FUNCTION ST_Point AS 'com.esri.hadoop.hive.ST_Point';
CREATE TEMPORARY FUNCTION ST_Contains AS 'com.esri.hadoop.hive.ST_Contains';
CREATE TEMPORARY FUNCTION ST_Geometry AS 'com.esri.hadoop.hive.ST_Geometry';
运行 以下查询:
SELECT
IF(1=1, 40.7484445, 0) AS latitude,
IF(1=1,-73.9878531, 0) AS longitude
FROM any_table
WHERE
NOT ST_Contains(
ST_POLYGON('POLYGON((170.0 20.0, -170.0 73.0, -50.0 20.0, -50.0 73.0))'),
ST_Point(CAST(longitude AS DOUBLE), CAST(latitude AS DOUBLE)))
LIMIT 1;
其中多边形 'POLYGON((170.0 20.0, -170.0 73.0, -50.0 20.0, -50.0 73.0))'
大致是美国框,给定坐标 40.7484445,-73.9878531
属于纽约。 WHERE NOT 结果应该是空的,但它仍然是 returns 这些坐标。它没有按预期进行过滤。
我做错了什么?
只应加载几何体的一个版本 API。同样,只有一个 spatial-sdk-hadoop 或一对 spatial-sdk-json 和 spatial-sdk-hive .
WKT 多边形以重复起始顶点的结束顶点闭合。
多边形需要按周长的顺序指定顶点,而不是锯齿形顺序。
几何 API 是平面的,不支持环绕国际日期变更线。
可能是 -170 而不是 +170 纬度。
wget https://github.com/Esri/spatial-framework-for-hadoop/releases/download/v1.1/spatial-sdk-hive-1.1.jar\
https://github.com/Esri/spatial-framework-for-hadoop/releases/download/v1.1/spatial-sdk-json-1.1.jar\
https://github.com/Esri/geometry-api-java/releases/download/v1.2.1/esri-geometry-api-1.2.1.jar
蜂巢-S
添加 jar /pathto/esri-geometry-api-1.2.1.jar
/pathto/spatial-sdk-json-1.1.jar
/pathto/spatial-sdk-hive-1.1.jar ;
创建临时函数 ST_AsBinary 为 'com.esri.hadoop.hive.ST_AsBinary';
-- ...
select ST_Contains(ST_Polygon(1, 1, 1, 4, 4, 4, 4, 1), ST_Point(2, 3)) ;
真
select ST_Contains(ST_Polygon('POLYGON((1 1, 1 4, 4 4, 4 1, 1 1))'), ST_Point(2, 3));
真
select ST_Contains(ST_POLYGON('POLYGON((-170.0 20.0, -170.0 73.0, -50.0 20.0, -50.0 73.0, -170.0 20.0))'), ST_Point(-73.9878531, 40.7484445));
真
select 不是 ST_Contains(ST_POLYGON('POLYGON((-170.0 20.0, -170.0 73.0, -50.0 20.0, -50.0 73.0, -170.0 20.0))'), ST_Point(-73.9878531, 40.7484445));
假
add jar /home/..../esri-geometry-api-1.2.1.jar;
add jar /home/..../spatial-sdk-json-1.2.0.jar;
add jar /home/..../spatial-sdk-hive-1.2.0.jar;
add jar /home/..../spatial-sdk-hadoop.jar;
create temporary function ST_AsBinary as 'com.esri.hadoop.hive.ST_AsBinary';
CREATE TEMPORARY FUNCTION ST_Polygon AS 'com.esri.hadoop.hive.ST_Polygon';
CREATE TEMPORARY FUNCTION ST_Point AS 'com.esri.hadoop.hive.ST_Point';
CREATE TEMPORARY FUNCTION ST_Contains AS 'com.esri.hadoop.hive.ST_Contains';
CREATE TEMPORARY FUNCTION ST_Geometry AS 'com.esri.hadoop.hive.ST_Geometry';
A) load table from geojson data to hive:
CREATE TABLE default.lim_xxx_pais
(
NOM_PLAN string,
NMO_PLAN string,
APROXIMADO string,
ID1 string,
BoundaryShape binary
)
ROW FORMAT SERDE 'com.esri.hadoop.hive.serde.GeoJsonSerDe'
STORED AS INPUTFORMAT 'com.esri.json.hadoop.EnclosedGeoJsonInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
B)
LOAD DATA INPATH '/user/.../lim_xxx_pais.geojson' OVERWRITE INTO TABLE lim_xxx_pais
;
C)
select NOM_PLAN, NMO_PLAN,APROXIMADO,ID1 from default.lim_centrales_pais aa
where ST_Contains(aa.boundaryshape, ST_POINT(-72.08726603,-36.62627804) )
;