如何创建有效的 Avro 格式文件 nodejs
How to create valid Avro format file nodejs
我正在尝试创建一个有效的 avro 文件以上传到 google-bigquery。
var avro = require('node-avro-io').DataFile.AvroFile();
var schema = {
"name": "data",
"type": "record",
"fields": [
{"name":"key","type": "string"},
{"name":"value","type": "string"},
{"name":"flag","type": "boolean"},
{"name":"subrecord","type":"record","fields":[
{"name":"key","type":"string"},
{"name":"value","type":["string","int","null"]}
]}
]
};
var writer = avro.open("myAvroFile.avro", schema, { flags: 'w' , codec: 'deflate'});
writer
.append({ key:"john", value:"hive", flag: true, subrecord: { key: "preference", value: 2}})
.append({ key:"eric", value:"lola", flag: true, subrecord: { key: "postcode", value: null}})
.end({ key:"fred", value:"wonka", flag: false, subrecord: { key: "city", value: "michigan"}});
这里是 myAvroFile.avro
:
Obj�avro.codec�deflate�avro.schema�{"name":"data","type":"record","fields":[{"name":"key","type":"string"},{"name":"value","type":"string"},{"name":"flag","type":"boolean"},{"name":"subrecord","type":"record","fields":[{"name":"key","type":"string"},{"name":"value","type":["string","int","null"]}]}]} �3�ä0�x���A� @0O� )�T�%H4��:�*Uy�>P0��%�05k��n�d�T�������\����I�3�ä0�x�
但是当我尝试将它上传到大查询时它失败了:
The Apache Avro library failed to parse file
当我尝试从以下 link 上传解压缩的 avro fli 时,我成功了。 https://cloud.google.com/bigquery/docs/yob1900.avro.zip
为什么 myAvroFile.avro
无效?
如何使用 node-avro-io
库创建有效的 avro 文件?
看起来评论的限制很小。我会 post 作为答案。
这是我使用 avro-tools 时收到的错误消息:
线程 "main" org.apache.avro.SchemaParseException 中的异常:"record" 不是定义的名称。 "subrecord" 字段的类型必须是定义的名称或 {"type": ...} 表达式。
我发现 avro-tools 可以方便地处理 avro 文件。 link 解释了如何使用它。
"subrecord" 字段的类型字段需要是模式。根据 Avro specification:
type: A JSON object defining a schema, or a JSON string naming a
record definition (required).
所以你应该这样改:
{"name":"subrecord",
"type": {"name":"subrecord_type",
"type":"record",
"fields":[{"name":"key","type":"string"},
{"name":"value","type":["string","int","null"]}
]
}
}
我正在尝试创建一个有效的 avro 文件以上传到 google-bigquery。
var avro = require('node-avro-io').DataFile.AvroFile();
var schema = {
"name": "data",
"type": "record",
"fields": [
{"name":"key","type": "string"},
{"name":"value","type": "string"},
{"name":"flag","type": "boolean"},
{"name":"subrecord","type":"record","fields":[
{"name":"key","type":"string"},
{"name":"value","type":["string","int","null"]}
]}
]
};
var writer = avro.open("myAvroFile.avro", schema, { flags: 'w' , codec: 'deflate'});
writer
.append({ key:"john", value:"hive", flag: true, subrecord: { key: "preference", value: 2}})
.append({ key:"eric", value:"lola", flag: true, subrecord: { key: "postcode", value: null}})
.end({ key:"fred", value:"wonka", flag: false, subrecord: { key: "city", value: "michigan"}});
这里是 myAvroFile.avro
:
Obj�avro.codec�deflate�avro.schema�{"name":"data","type":"record","fields":[{"name":"key","type":"string"},{"name":"value","type":"string"},{"name":"flag","type":"boolean"},{"name":"subrecord","type":"record","fields":[{"name":"key","type":"string"},{"name":"value","type":["string","int","null"]}]}]} �3�ä0�x���A� @0O� )�T�%H4��:�*Uy�>P0��%�05k��n�d�T�������\����I�3�ä0�x�
但是当我尝试将它上传到大查询时它失败了:
The Apache Avro library failed to parse file
当我尝试从以下 link 上传解压缩的 avro fli 时,我成功了。 https://cloud.google.com/bigquery/docs/yob1900.avro.zip
为什么 myAvroFile.avro
无效?
如何使用 node-avro-io
库创建有效的 avro 文件?
看起来评论的限制很小。我会 post 作为答案。
这是我使用 avro-tools 时收到的错误消息:
线程 "main" org.apache.avro.SchemaParseException 中的异常:"record" 不是定义的名称。 "subrecord" 字段的类型必须是定义的名称或 {"type": ...} 表达式。
我发现 avro-tools 可以方便地处理 avro 文件。 link 解释了如何使用它。
"subrecord" 字段的类型字段需要是模式。根据 Avro specification:
type: A JSON object defining a schema, or a JSON string naming a record definition (required).
所以你应该这样改:
{"name":"subrecord",
"type": {"name":"subrecord_type",
"type":"record",
"fields":[{"name":"key","type":"string"},
{"name":"value","type":["string","int","null"]}
]
}
}