Hive 使用自定义 udf 将数组 <string, string> 转换为数组 <struct<key:string, value:string>>
Hive converting array<string, string> to array<struct<key:string, value:string>> with custom udf
我需要在配置单元中创建自定义 UDF 以将 array<map<string, string>>
转换为 array<struct<key:string, value:string>>
我正在尝试以下 class:
import java.util.List;
import java.util.Map;
import com.google.common.collect.Lists;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.Text;
public class ArrayOfMapToArrayOfStructUdf extends GenericUDF {
private static final String UDF_NAME = "convertArrayMapToArrayStruct";
@Override
public String getUdfName() {
return UDF_NAME;
}
@Override
public ObjectInspector initialize(ObjectInspector[] objectInspectors) throws UDFArgumentException {
if (objectInspectors.length != 1) {
throw new UDFArgumentLengthException(UDF_NAME + " takes 1 argument of type array<map<key, value>>");
}
if (!(validateArgumentType(objectInspectors))) {
throw new IllegalArgumentException("Code should never reach this section!");
}
return createReturnObjectInspector();
}
private boolean validateArgumentType(ObjectInspector[] objectInspectors) throws UDFArgumentException {
if (!(objectInspectors[0] instanceof ListObjectInspector)) {
throw new UDFArgumentException("the argument must be of type: array<map<key, value>>");
}
ListObjectInspector listObjectInspector = (ListObjectInspector) objectInspectors[0];
if (!(listObjectInspector.getListElementObjectInspector() instanceof MapObjectInspector)) {
throw new UDFArgumentException("the array contents must be of type: map<key, value>");
}
return true;
}
private ObjectInspector createReturnObjectInspector() {
List<String> structFieldNames = Lists.newArrayList("key", "value");
List<ObjectInspector> structFieldObjectInspectors =
Lists.newArrayList(PrimitiveObjectInspectorFactory.javaStringObjectInspector,
PrimitiveObjectInspectorFactory.javaStringObjectInspector);
StructObjectInspector structObjectInspector =
ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors);
return ObjectInspectorFactory.getStandardListObjectInspector(structObjectInspector);
}
@Override
public Object evaluate(DeferredObject[] deferredObjects) throws HiveException {
if (deferredObjects == null || deferredObjects.length < 1) {
return null;
}
List<Map<String, String>> arrayOfMap = (List<Map<String, String>>) deferredObjects[0].get();
if (arrayOfMap == null) {
return null;
}
List<Object> arrayOfStruct = Lists.newArrayList();
for (Map<String, String> map : arrayOfMap) {
Object[] object = new Object[2];
object[0] = new Text(map.get("key"));
object[1] = new Text(map.get("value"));
arrayOfStruct.add(object);
}
return arrayOfStruct;
}
@Override
public String getDisplayString(String[] strings) {
return UDF_NAME;
}
}
我收到以下错误:
Failed with exception java.io.IOException:org.apache.hadoop.hive.ql.metadata.HiveException: Error evaluating convertArrayMapToArrayStruct
我不知道如何在评估方法中将对象构建到 return。
我尝试转换的列有如下数据:
[{"key": "key1", "value": "value1"}, {"key": "key2", "value": "value2"}, ..., {"key": "keyN", "value": "valueN"}]
谢谢!
这有效:
@Override
public Object evaluate(DeferredObject[] deferredObjects) throws HiveException {
if (deferredObjects == null || deferredObjects.length < 1) {
return null;
}
LazyArray lazyArray = (LazyArray) deferredObjects[0].get();
if (lazyArray == null) {
return null;
}
List<Object> lazyList = lazyArray.getList();
List<Object> finalList = Lists.newArrayList();
for (Object o : lazyList) {
LazyMap lazyMap = (LazyMap) o;
String key = "";
String value = "";
for (Map.Entry<?, ?> entry : lazyMap.getMap().entrySet()) {
if (entry.getKey().toString().equals("key")) {
key = entry.getValue().toString();
} else if (entry.getKey().toString().equals("value")) {
value = entry.getValue().toString();
}
}
finalList.add(Lists.newArrayList(key, value));
}
return finalList;
}
我需要在配置单元中创建自定义 UDF 以将 array<map<string, string>>
转换为 array<struct<key:string, value:string>>
我正在尝试以下 class:
import java.util.List;
import java.util.Map;
import com.google.common.collect.Lists;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.Text;
public class ArrayOfMapToArrayOfStructUdf extends GenericUDF {
private static final String UDF_NAME = "convertArrayMapToArrayStruct";
@Override
public String getUdfName() {
return UDF_NAME;
}
@Override
public ObjectInspector initialize(ObjectInspector[] objectInspectors) throws UDFArgumentException {
if (objectInspectors.length != 1) {
throw new UDFArgumentLengthException(UDF_NAME + " takes 1 argument of type array<map<key, value>>");
}
if (!(validateArgumentType(objectInspectors))) {
throw new IllegalArgumentException("Code should never reach this section!");
}
return createReturnObjectInspector();
}
private boolean validateArgumentType(ObjectInspector[] objectInspectors) throws UDFArgumentException {
if (!(objectInspectors[0] instanceof ListObjectInspector)) {
throw new UDFArgumentException("the argument must be of type: array<map<key, value>>");
}
ListObjectInspector listObjectInspector = (ListObjectInspector) objectInspectors[0];
if (!(listObjectInspector.getListElementObjectInspector() instanceof MapObjectInspector)) {
throw new UDFArgumentException("the array contents must be of type: map<key, value>");
}
return true;
}
private ObjectInspector createReturnObjectInspector() {
List<String> structFieldNames = Lists.newArrayList("key", "value");
List<ObjectInspector> structFieldObjectInspectors =
Lists.newArrayList(PrimitiveObjectInspectorFactory.javaStringObjectInspector,
PrimitiveObjectInspectorFactory.javaStringObjectInspector);
StructObjectInspector structObjectInspector =
ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors);
return ObjectInspectorFactory.getStandardListObjectInspector(structObjectInspector);
}
@Override
public Object evaluate(DeferredObject[] deferredObjects) throws HiveException {
if (deferredObjects == null || deferredObjects.length < 1) {
return null;
}
List<Map<String, String>> arrayOfMap = (List<Map<String, String>>) deferredObjects[0].get();
if (arrayOfMap == null) {
return null;
}
List<Object> arrayOfStruct = Lists.newArrayList();
for (Map<String, String> map : arrayOfMap) {
Object[] object = new Object[2];
object[0] = new Text(map.get("key"));
object[1] = new Text(map.get("value"));
arrayOfStruct.add(object);
}
return arrayOfStruct;
}
@Override
public String getDisplayString(String[] strings) {
return UDF_NAME;
}
}
我收到以下错误:
Failed with exception java.io.IOException:org.apache.hadoop.hive.ql.metadata.HiveException: Error evaluating convertArrayMapToArrayStruct
我不知道如何在评估方法中将对象构建到 return。
我尝试转换的列有如下数据:
[{"key": "key1", "value": "value1"}, {"key": "key2", "value": "value2"}, ..., {"key": "keyN", "value": "valueN"}]
谢谢!
这有效:
@Override
public Object evaluate(DeferredObject[] deferredObjects) throws HiveException {
if (deferredObjects == null || deferredObjects.length < 1) {
return null;
}
LazyArray lazyArray = (LazyArray) deferredObjects[0].get();
if (lazyArray == null) {
return null;
}
List<Object> lazyList = lazyArray.getList();
List<Object> finalList = Lists.newArrayList();
for (Object o : lazyList) {
LazyMap lazyMap = (LazyMap) o;
String key = "";
String value = "";
for (Map.Entry<?, ?> entry : lazyMap.getMap().entrySet()) {
if (entry.getKey().toString().equals("key")) {
key = entry.getValue().toString();
} else if (entry.getKey().toString().equals("value")) {
value = entry.getValue().toString();
}
}
finalList.add(Lists.newArrayList(key, value));
}
return finalList;
}