Hive 通用 UDF:Hive 未按预期进行转换,原因是:java.lang.ClassCastException:java.util.ArrayList 无法转换为 java.util.Map
Hive Generic UDF : Hive does not cast as expected, Caused by: java.lang.ClassCastException: java.util.ArrayList cannot be cast to java.util.Map
我正在尝试为我的配置单元查询创建一个简单的通用 udf。
这是我的蜂巢table
CREATE TABLE `dum`(`val` map<string,array<string>>);
insert into dum select map('A',array('1','2','3'),'B',array('4','5','6'));
这是它的样子
select * from dum;
{"A":["1","2","3"],"B":["4","5","6"]}
我正在尝试创建一个简单的 UDF,它可以将上述地图值中的所有项目组合到一个列表中。这是我想看到的
select modudf(val) from dum;
["1","2","3","4","5","6"]
所以我创造了
package some.package;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.udf.UDFType;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@UDFType(deterministic = true)
public class CustomUDF extends UDF {
public List<String> evaluate(Map<String, List<String>> inMap) {
List<String> res = new ArrayList<String>();
for(Map.Entry<String, List<String>> ent : inMap.entrySet()){
for(String item : ent.getValue())
res.add(item);
}
return res;
}
}
这在我尝试调用它时完美无缺
add jar /path/to/my/jar;
CREATE TEMPORARY FUNCTION modudf AS 'some.package.CustomUDF';
select modudf(val) from dum;
我得到
["1","2","3","4","5","6"]
但是,我想创建一个通用的 udf,所以我尝试了
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.*;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class CustomUDF2 extends GenericUDF {
private MapObjectInspector inputMapOI = null;
private Converter inputMapKeyConverter = null;
private ListObjectInspector inputMapValueListOI = null;
private Converter inputMapValueListElementConverter = null;
@Override
public String getDisplayString(String[] arguments) {
return "Some message";
}
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if ((null == arguments) || (arguments.length != 1)) {
throw new UDFArgumentLengthException("1 argument is expected.");
}
if (!(arguments[0] instanceof MapObjectInspector)) {
throw new UDFArgumentException("The first parameter should be a map object.");
}
this.inputMapOI = (MapObjectInspector) arguments[0];
ObjectInspector mapKeyOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
this.inputMapKeyConverter = ObjectInspectorConverters.getConverter(this.inputMapOI.getMapKeyObjectInspector(), mapKeyOI);
if (!(this.inputMapOI.getMapValueObjectInspector() instanceof ListObjectInspector)) {
throw new UDFArgumentException("The map value type must be a list (aka array)");
}
this.inputMapValueListOI = (ListObjectInspector) this.inputMapOI.getMapValueObjectInspector();
ObjectInspector inputListElementOI = this.inputMapValueListOI.getListElementObjectInspector();
ObjectInspector outputListElementOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
this.inputMapValueListElementConverter = ObjectInspectorConverters.getConverter(inputListElementOI, outputListElementOI);
ObjectInspector outputMapKeyOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
ObjectInspector outputMapValueListElementOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
ObjectInspector outputMapValueListOI = ObjectInspectorFactory.getStandardListObjectInspector(outputMapValueListElementOI);
return ObjectInspectorFactory.getStandardMapObjectInspector(outputMapKeyOI, outputMapValueListOI);
}
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
if ((null == arguments) || (arguments.length != 1)) {
throw new UDFArgumentLengthException("1 argument is expected.");
}
Map<?, ?> map = inputMapOI.getMap(arguments[0].get());
List<String> dataList = new ArrayList<String>();
for (Object key : map.keySet()) {
List<?> valueList = this.inputMapValueListOI.getList(map.get(key));
if ((valueList == null) || (valueList.size() == 0)) {
continue;
}
String strKey = (String) this.inputMapKeyConverter.convert(key);
for (Object value : valueList) {
String strValue = (String) this.inputMapValueListElementConverter.convert(value);
dataList.add(strValue);
}
}
return dataList;
}
}
但是这次当我调用它时出现错误
add jar /path/to/my/jar;
CREATE TEMPORARY FUNCTION modudf AS 'some.package.CustomUDF2';
select modudf(val) from dum;
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"val":{"A":["1","2","3"],"B":["4","5","6"]}}
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:562)
at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:148)
... 8 more
Caused by: java.lang.ClassCastException: java.util.ArrayList cannot be cast to java.util.Map
at org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector.getMap(StandardMapObjectInspector.java:85)
at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serialize(LazySimpleSerDe.java:321)
at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serializeField(LazySimpleSerDe.java:247)
at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.doSerialize(LazySimpleSerDe.java:231)
at org.apache.hadoop.hive.serde2.AbstractEncodingAwareSerDe.serialize(AbstractEncodingAwareSerDe.java:55)
at org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:725)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130)
at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:148)
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:547)
... 9 more
据我所知,我从未尝试将 ArrayList 转换为 Map。
我做错了什么?
你需要在initialize
中return的是ObjectInspector
对应于UDF的returning类型(ListObjectInspector
在你的情况下) .
写的时候
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
...
return ObjectInspectorFactory.getStandardMapObjectInspector(outputMapKeyOI, outputMapValueListOI);
}
Hive 期望找到 Map
作为输出。
所以当evaluate
之后执行的代码发现了一个List
,就会抛出异常
我正在尝试为我的配置单元查询创建一个简单的通用 udf。
这是我的蜂巢table
CREATE TABLE `dum`(`val` map<string,array<string>>);
insert into dum select map('A',array('1','2','3'),'B',array('4','5','6'));
这是它的样子
select * from dum;
{"A":["1","2","3"],"B":["4","5","6"]}
我正在尝试创建一个简单的 UDF,它可以将上述地图值中的所有项目组合到一个列表中。这是我想看到的
select modudf(val) from dum;
["1","2","3","4","5","6"]
所以我创造了
package some.package;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.udf.UDFType;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@UDFType(deterministic = true)
public class CustomUDF extends UDF {
public List<String> evaluate(Map<String, List<String>> inMap) {
List<String> res = new ArrayList<String>();
for(Map.Entry<String, List<String>> ent : inMap.entrySet()){
for(String item : ent.getValue())
res.add(item);
}
return res;
}
}
这在我尝试调用它时完美无缺
add jar /path/to/my/jar;
CREATE TEMPORARY FUNCTION modudf AS 'some.package.CustomUDF';
select modudf(val) from dum;
我得到
["1","2","3","4","5","6"]
但是,我想创建一个通用的 udf,所以我尝试了
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.*;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class CustomUDF2 extends GenericUDF {
private MapObjectInspector inputMapOI = null;
private Converter inputMapKeyConverter = null;
private ListObjectInspector inputMapValueListOI = null;
private Converter inputMapValueListElementConverter = null;
@Override
public String getDisplayString(String[] arguments) {
return "Some message";
}
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if ((null == arguments) || (arguments.length != 1)) {
throw new UDFArgumentLengthException("1 argument is expected.");
}
if (!(arguments[0] instanceof MapObjectInspector)) {
throw new UDFArgumentException("The first parameter should be a map object.");
}
this.inputMapOI = (MapObjectInspector) arguments[0];
ObjectInspector mapKeyOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
this.inputMapKeyConverter = ObjectInspectorConverters.getConverter(this.inputMapOI.getMapKeyObjectInspector(), mapKeyOI);
if (!(this.inputMapOI.getMapValueObjectInspector() instanceof ListObjectInspector)) {
throw new UDFArgumentException("The map value type must be a list (aka array)");
}
this.inputMapValueListOI = (ListObjectInspector) this.inputMapOI.getMapValueObjectInspector();
ObjectInspector inputListElementOI = this.inputMapValueListOI.getListElementObjectInspector();
ObjectInspector outputListElementOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
this.inputMapValueListElementConverter = ObjectInspectorConverters.getConverter(inputListElementOI, outputListElementOI);
ObjectInspector outputMapKeyOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
ObjectInspector outputMapValueListElementOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
ObjectInspector outputMapValueListOI = ObjectInspectorFactory.getStandardListObjectInspector(outputMapValueListElementOI);
return ObjectInspectorFactory.getStandardMapObjectInspector(outputMapKeyOI, outputMapValueListOI);
}
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
if ((null == arguments) || (arguments.length != 1)) {
throw new UDFArgumentLengthException("1 argument is expected.");
}
Map<?, ?> map = inputMapOI.getMap(arguments[0].get());
List<String> dataList = new ArrayList<String>();
for (Object key : map.keySet()) {
List<?> valueList = this.inputMapValueListOI.getList(map.get(key));
if ((valueList == null) || (valueList.size() == 0)) {
continue;
}
String strKey = (String) this.inputMapKeyConverter.convert(key);
for (Object value : valueList) {
String strValue = (String) this.inputMapValueListElementConverter.convert(value);
dataList.add(strValue);
}
}
return dataList;
}
}
但是这次当我调用它时出现错误
add jar /path/to/my/jar;
CREATE TEMPORARY FUNCTION modudf AS 'some.package.CustomUDF2';
select modudf(val) from dum;
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"val":{"A":["1","2","3"],"B":["4","5","6"]}}
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:562)
at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:148)
... 8 more
Caused by: java.lang.ClassCastException: java.util.ArrayList cannot be cast to java.util.Map
at org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector.getMap(StandardMapObjectInspector.java:85)
at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serialize(LazySimpleSerDe.java:321)
at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.serializeField(LazySimpleSerDe.java:247)
at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.doSerialize(LazySimpleSerDe.java:231)
at org.apache.hadoop.hive.serde2.AbstractEncodingAwareSerDe.serialize(AbstractEncodingAwareSerDe.java:55)
at org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:725)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130)
at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:148)
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:547)
... 9 more
据我所知,我从未尝试将 ArrayList 转换为 Map。
我做错了什么?
你需要在initialize
中return的是ObjectInspector
对应于UDF的returning类型(ListObjectInspector
在你的情况下) .
写的时候
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
...
return ObjectInspectorFactory.getStandardMapObjectInspector(outputMapKeyOI, outputMapValueListOI);
}
Hive 期望找到 Map
作为输出。
所以当evaluate
之后执行的代码发现了一个List
,就会抛出异常