如何将 Avro GenericRecord 转换为有效的 Json,同时将时间戳字段从毫秒转换为日期时间?

How to convert Avro GenericRecord to a valid Json using while coverting timestamp fields from milliseconds to datetime?

如何将 Avro GenericRecord 转换为 Json,同时将时间戳字段从毫秒转换为日期时间?

目前正在使用 Avro 1.8.2

    Timestamp tsp = new Timestamp(1530228588182l);
    Schema schema  = SchemaBuilder.builder()
            .record("hello")
            .fields()
            .name("tsp").type(LogicalTypes.timestampMillis().addToSchema(Schema.create(Schema.Type.LONG))).noDefault()
            .endRecord();
    System.out.println(schema.toString());

    GenericRecord genericRecord = new GenericData.Record(schema);
    genericRecord.put("tsp",tsp.getTime()); //Assume I cannot change this
    System.out.println(genericRecord.toString());

我尝试使用下面的函数,但结果与 genericrecord.toString()

相同
public static String toJsonString(Schema schema, GenericRecord genericRecord) throws IOException {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
    writer.getData().addLogicalTypeConversion(new TimeConversions.TimestampConversion());
    JsonEncoder encoder = EncoderFactory.get().jsonEncoder(schema, baos, false);
    writer.write(genericRecord, encoder);
    encoder.flush();
    return baos.toString();
}

第三次尝试

public static GenericRecord deserialize(final Schema schema, byte[] data) throws IOException {
        final GenericData genericData = new GenericData(){
            @Override
            public String toString(Object datum) {
                StringBuilder buffer = new StringBuilder();
                // Since these types are not quoted and produce a malformed JSON string, quote it here.
                if (datum instanceof java.sql.Timestamp || datum instanceof java.sql.Time || datum instanceof java.sql.Date) {
                    return buffer.append("\"").append(datum).append("\"").toString();
                }
                return super.toString(datum);
            }
        };
        genericData.addLogicalTypeConversion(new TimeConversions.TimestampConversion());
        genericData.addLogicalTypeConversion(new TimeConversions.TimeConversion());
        try (final InputStream is = new ByteArrayInputStream(data)) {
            final Decoder decoder = DecoderFactory.get().binaryDecoder(is, null);
            final DatumReader<GenericRecord> reader = new GenericDatumReader<>(schema, schema, genericData);
            return reader.read(null, decoder);
        }
    }

架构

{"type":"record","name":"tsp_name","fields":[{"name":"tsp","type":{"type":"long","logicalType":"timestamp-millis"}}]}

当前输出

{"tsp":2018-06-28T23:29:48.182Z} // missing quotes so not a valid json

预期输出

{"tsp": "2018-06-28T23:29:48.182Z"}

第一次尝试的问题是 LogicalType 信息永远不会离开模式,GenericRecord 只看到一个长。

我怀疑第二次尝试失败,因为它正在将 json 写入 avro 格式,查看 GenericDatumWriter 转换是写入基本类型(这与 avro-tools 所做的相同转储数据时。)

也许您需要一个特殊情况的解码器来将时间戳转换为您想要的格式的字符串?

要更改投影,您可以将转换扩展为 return timestamp-millis 逻辑类型的字符串。以下代码会产生您预期的输出

import org.apache.avro.*;
import org.apache.avro.data.TimeConversions;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.*;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.sql.Timestamp;

public class Main5 {
    public static void main(String [] args ) throws IOException {
        Timestamp tsp = new Timestamp(1530228588182L);
        String strSchema = "{\"type\":\"record\",\"name\":\"tsp_name\",\"fields\":[{\"name\":\"tsp\",\"type\":{\"type\":\"long\",\"logicalType\":\"timestamp-millis\"}}]}\n";
        Schema schema = new Schema.Parser().parse(strSchema);
        System.out.println(new DateTime(tsp.getTime(), DateTimeZone.UTC));
        GenericRecord genericRecord = new GenericData.Record(schema);
        genericRecord.put("tsp",tsp.getTime()); //Assume I cannot change this
        System.out.println(genericRecord);
        System.out.println(deserialize(schema, toByteArray(schema , genericRecord)));
    }

    public static byte [] toByteArray(Schema schema, GenericRecord genericRecord) throws IOException {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
        writer.getData().addLogicalTypeConversion(new TimeConversions.TimestampConversion());
        BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(baos, null);
        writer.write(genericRecord, encoder);
        encoder.flush();
        return baos.toByteArray();
    }


    public static GenericRecord deserialize(Schema schema, byte[] data) throws IOException {
        final GenericData genericData = new GenericData();
        genericData.addLogicalTypeConversion(new MyTimestampConversion());
        InputStream is = new ByteArrayInputStream(data);
        Decoder decoder = DecoderFactory.get().binaryDecoder(is, null);
        DatumReader<GenericRecord> reader = new GenericDatumReader<>(schema, schema, genericData);
        return reader.read(null, decoder);
    }

    public static class MyTimestampConversion extends Conversion<String> {
        public MyTimestampConversion() {
        }

        public Class<String> getConvertedType() {
            return String.class;
        }

        public String getLogicalTypeName() {
            return "timestamp-millis";
        }

        public String fromLong(Long millisFromEpoch, Schema schema, LogicalType type) {
            return (new DateTime(millisFromEpoch, DateTimeZone.UTC)).toString();
        }

        public Long toLong(String timestamp, Schema schema, LogicalType type) {
            return new Long(timestamp);
        }
    }
}

输出{"tsp": "2018-06-28T23:29:48.182Z"} `