有什么方法可以从值的哈希图中生成 Avro 模式吗?
Is there any way to generate an Avro schema from a hashmap of values?
我们将一行的所有数据点放入哈希图中。我们不想使用 pojo,因为每次的值都是不同的。例如,我们可能会在某些记录上得到 "place",而在其他记录上可能会得到 "hometown"。实际上我们有数千种不同的列名可供选择。我们的代码如下所示:
Map<String, Object> aMap = new HashMap<>();
aMap.put("id", Integer.valueOf(1));
aMap.put("age", Integer.valueOf(45));
aMap.put("name", "mark");
aMap.put("place", "home");
final GenericRecord record = new GenericData.Record(avroSchema);
aMap.forEach((k, v) -> {
record.put(k, v);
});
writer.write(record);
我们想将所有值放入映射中,然后生成模式。由于使用 Reflect api,它可以为 pojo 完成,我想知道它是否也可以从 hashmap 完成?
附带一个问题,有没有办法去掉上面的forEach,只写map?
这是我们想出的。我们还有嵌套列。
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Parser;
import org.apache.avro.Schema.Type;
/**
* This does NOT do all types. So far just the types we think we need. See
* https://docs.oracle.com/database/nosql-12.1.3.0/GettingStartedGuide/avroschemas.html
* <p>
* We need some error handlling here and when we don't have the correct type, call it out!
* <p>
* This runs in 1-2ms even with a large payload.
*/
public class AvroSchemaBuilder {
/**
* Construct!
*/
private AvroSchemaBuilder() {
//private constructor. All methods are static.
}
/**
* Build the Avro schema and return it.
*
* @param name Name of object.
* @param nameTypeConsumer The nameTypeConsumer of objects being saved.
* @return the Avro schema.
*/
public static Schema getAvroSchema(String name, NameTypeConsumer nameTypeConsumer) {
String json = Lson.toJson(getAvroSchemaAsMap(name, nameTypeConsumer, true));
Parser parser = new Parser().setValidate(true);
return parser.parse(json);
}
/**
* Returns the map with all the attributes to build a schema. This would be recursive if we need
* to build a complex schema. For example for Trends this would build a complex schema where some
* of the types are maps that are themselves described as another nested schema.
*/
private static Map<String, Object> getAvroSchemaAsMap(String name,
NameTypeConsumer nameTypeConsumer,
boolean addNameSpace) {
Map<String, Object> schemaMap = new LinkedHashMap<>();
schemaMap.put("type", "record");
schemaMap.put("name", name);
if (addNameSpace) {
schemaMap.put("namespace", "com.blah.blah");
}
List<Field> fields = new ArrayList();
nameTypeConsumer.consumeNestedNameType((columnName, nestedNameType) -> {
Object avroType;
if (nestedNameType.getNameTypeConsumer() != null) {
avroType = getAvroSchemaAsMap(columnName, nestedNameType.getNameTypeConsumer(), false);
} else {
avroType = getAvroType(nestedNameType.getType()).getName();
}
Object[] types = {"null", avroType}; //adding null first always.
fields.add(new Field(columnName, types));
});
schemaMap.put("fields", fields);
return schemaMap;
}
/**
* Finds the avro type by class.
*
* @param type the Type (this is an avro type).
* @return avro constant.
*/
private static Type getAvroType(Class<?> type) {
if (type.equals(Integer.class)) {
return Type.INT;
}
if (type.equals(Long.class)) {
return Type.LONG;
}
if (type.equals(Float.class)) {
return Type.FLOAT;
}
if (type.equals(Double.class)) {
return Type.DOUBLE;
}
if (type.equals(String.class)) {
return Type.STRING;
}
if (type.equals(Boolean.class)) {
return Type.BOOLEAN;
}
throw new GenericRuntimeException("Cannot get Avro type for type " + type.getName());
}
/**
* Nested class to make our field.
*/
private static class Field {
public final String name;
public final Object[] type;
public Field(String name, Object[] type) {
this.name = name;
this.type = type;
}
}
}
我们将一行的所有数据点放入哈希图中。我们不想使用 pojo,因为每次的值都是不同的。例如,我们可能会在某些记录上得到 "place",而在其他记录上可能会得到 "hometown"。实际上我们有数千种不同的列名可供选择。我们的代码如下所示:
Map<String, Object> aMap = new HashMap<>();
aMap.put("id", Integer.valueOf(1));
aMap.put("age", Integer.valueOf(45));
aMap.put("name", "mark");
aMap.put("place", "home");
final GenericRecord record = new GenericData.Record(avroSchema);
aMap.forEach((k, v) -> {
record.put(k, v);
});
writer.write(record);
我们想将所有值放入映射中,然后生成模式。由于使用 Reflect api,它可以为 pojo 完成,我想知道它是否也可以从 hashmap 完成?
附带一个问题,有没有办法去掉上面的forEach,只写map?
这是我们想出的。我们还有嵌套列。
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Parser;
import org.apache.avro.Schema.Type;
/**
* This does NOT do all types. So far just the types we think we need. See
* https://docs.oracle.com/database/nosql-12.1.3.0/GettingStartedGuide/avroschemas.html
* <p>
* We need some error handlling here and when we don't have the correct type, call it out!
* <p>
* This runs in 1-2ms even with a large payload.
*/
public class AvroSchemaBuilder {
/**
* Construct!
*/
private AvroSchemaBuilder() {
//private constructor. All methods are static.
}
/**
* Build the Avro schema and return it.
*
* @param name Name of object.
* @param nameTypeConsumer The nameTypeConsumer of objects being saved.
* @return the Avro schema.
*/
public static Schema getAvroSchema(String name, NameTypeConsumer nameTypeConsumer) {
String json = Lson.toJson(getAvroSchemaAsMap(name, nameTypeConsumer, true));
Parser parser = new Parser().setValidate(true);
return parser.parse(json);
}
/**
* Returns the map with all the attributes to build a schema. This would be recursive if we need
* to build a complex schema. For example for Trends this would build a complex schema where some
* of the types are maps that are themselves described as another nested schema.
*/
private static Map<String, Object> getAvroSchemaAsMap(String name,
NameTypeConsumer nameTypeConsumer,
boolean addNameSpace) {
Map<String, Object> schemaMap = new LinkedHashMap<>();
schemaMap.put("type", "record");
schemaMap.put("name", name);
if (addNameSpace) {
schemaMap.put("namespace", "com.blah.blah");
}
List<Field> fields = new ArrayList();
nameTypeConsumer.consumeNestedNameType((columnName, nestedNameType) -> {
Object avroType;
if (nestedNameType.getNameTypeConsumer() != null) {
avroType = getAvroSchemaAsMap(columnName, nestedNameType.getNameTypeConsumer(), false);
} else {
avroType = getAvroType(nestedNameType.getType()).getName();
}
Object[] types = {"null", avroType}; //adding null first always.
fields.add(new Field(columnName, types));
});
schemaMap.put("fields", fields);
return schemaMap;
}
/**
* Finds the avro type by class.
*
* @param type the Type (this is an avro type).
* @return avro constant.
*/
private static Type getAvroType(Class<?> type) {
if (type.equals(Integer.class)) {
return Type.INT;
}
if (type.equals(Long.class)) {
return Type.LONG;
}
if (type.equals(Float.class)) {
return Type.FLOAT;
}
if (type.equals(Double.class)) {
return Type.DOUBLE;
}
if (type.equals(String.class)) {
return Type.STRING;
}
if (type.equals(Boolean.class)) {
return Type.BOOLEAN;
}
throw new GenericRuntimeException("Cannot get Avro type for type " + type.getName());
}
/**
* Nested class to make our field.
*/
private static class Field {
public final String name;
public final Object[] type;
public Field(String name, Object[] type) {
this.name = name;
this.type = type;
}
}
}