Python3 apache avro 1.8.2 不提供别名
Python3 apache avro 1.8.2 not providing aliases
我有以下 python3 avro 程序:
import avro.schema
import json
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter
write_schema = avro.schema.parse(json.dumps({
"namespace": "example.avro",
"type": "record",
"name": "User",
"fields": [
{"name": "name", "type": "string"},
{"name": "favorite_number", "type": ["int", "null"]},
{"name": "favorite_color", "type": ["string", "null"]}
]
}))
writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), write_schema)
writer.append({"name": "Alyssa", "favorite_number": 256})
writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"})
writer.close()
read_schema = avro.schema.parse(json.dumps({
"namespace": "example.avro",
"type": "record",
"name": "User",
"fields": [
{"name": "first_name", "type": "string", "default": "", "aliases": ["name"]},
{"name": "favorite_number", "type": ["int", "null"]},
{"name": "favorite_color", "type": ["string", "null"]}
]
}))
reader = DataFileReader(open("users.avro", "rb"), DatumReader(write_schema, read_schema))
new_schema = reader.get_meta("avro.schema")
users = []
for user in reader:
users.append(user)
reader.close()
users
内容如下:
[{'favorite_color': None, 'favorite_number': 256, 'first_name': ''},
{'favorite_color': 'red', 'favorite_number': 7, 'first_name': ''}]
我本以为 first_name
字段中会有 'Ben' 和 'Alyssa'。这个库中的别名是如何工作的?这是否符合规范?
遗憾的是,我现在发现标准 avro 包的 python 版本没有实现别名 (https://issues.apache.org/jira/browse/AVRO-1303)。由于自 2013/2014 年以来一直存在这种情况,我预计短期内不会出现标准修复。更有前途的是正在修复的 fastavro 项目。
这确实适用于 fastavro
。这是使用该库的相同代码段:
from io import BytesIO
from fastavro import reader, writer
write_schema = {
"namespace": "example.avro",
"type": "record",
"name": "User",
"fields": [
{"name": "name", "type": "string"},
{"name": "favorite_number", "type": ["int", "null"]},
{"name": "favorite_color", "type": ["string", "null"]}
]
}
records = [
{"name": "Alyssa", "favorite_number": 256},
{"name": "Ben", "favorite_number": 7, "favorite_color": "red"}
]
bio = BytesIO()
writer(bio, write_schema, records)
bio.seek(0)
read_schema = {
"namespace": "example.avro",
"type": "record",
"name": "User",
"fields": [
{"name": "first_name", "type": "string", "default": "", "aliases": ["name"]},
{"name": "favorite_number", "type": ["int", "null"]},
{"name": "favorite_color", "type": ["string", "null"]}
]
}
for record in reader(bio, read_schema):
print(record)
并且输出:
{'first_name': 'Alyssa', 'favorite_number': 256, 'favorite_color': None}
{'first_name': 'Ben', 'favorite_number': 7, 'favorite_color': 'red'}
我有以下 python3 avro 程序:
import avro.schema
import json
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter
write_schema = avro.schema.parse(json.dumps({
"namespace": "example.avro",
"type": "record",
"name": "User",
"fields": [
{"name": "name", "type": "string"},
{"name": "favorite_number", "type": ["int", "null"]},
{"name": "favorite_color", "type": ["string", "null"]}
]
}))
writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), write_schema)
writer.append({"name": "Alyssa", "favorite_number": 256})
writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"})
writer.close()
read_schema = avro.schema.parse(json.dumps({
"namespace": "example.avro",
"type": "record",
"name": "User",
"fields": [
{"name": "first_name", "type": "string", "default": "", "aliases": ["name"]},
{"name": "favorite_number", "type": ["int", "null"]},
{"name": "favorite_color", "type": ["string", "null"]}
]
}))
reader = DataFileReader(open("users.avro", "rb"), DatumReader(write_schema, read_schema))
new_schema = reader.get_meta("avro.schema")
users = []
for user in reader:
users.append(user)
reader.close()
users
内容如下:
[{'favorite_color': None, 'favorite_number': 256, 'first_name': ''},
{'favorite_color': 'red', 'favorite_number': 7, 'first_name': ''}]
我本以为 first_name
字段中会有 'Ben' 和 'Alyssa'。这个库中的别名是如何工作的?这是否符合规范?
遗憾的是,我现在发现标准 avro 包的 python 版本没有实现别名 (https://issues.apache.org/jira/browse/AVRO-1303)。由于自 2013/2014 年以来一直存在这种情况,我预计短期内不会出现标准修复。更有前途的是正在修复的 fastavro 项目。
这确实适用于 fastavro
。这是使用该库的相同代码段:
from io import BytesIO
from fastavro import reader, writer
write_schema = {
"namespace": "example.avro",
"type": "record",
"name": "User",
"fields": [
{"name": "name", "type": "string"},
{"name": "favorite_number", "type": ["int", "null"]},
{"name": "favorite_color", "type": ["string", "null"]}
]
}
records = [
{"name": "Alyssa", "favorite_number": 256},
{"name": "Ben", "favorite_number": 7, "favorite_color": "red"}
]
bio = BytesIO()
writer(bio, write_schema, records)
bio.seek(0)
read_schema = {
"namespace": "example.avro",
"type": "record",
"name": "User",
"fields": [
{"name": "first_name", "type": "string", "default": "", "aliases": ["name"]},
{"name": "favorite_number", "type": ["int", "null"]},
{"name": "favorite_color", "type": ["string", "null"]}
]
}
for record in reader(bio, read_schema):
print(record)
并且输出:
{'first_name': 'Alyssa', 'favorite_number': 256, 'favorite_color': None}
{'first_name': 'Ben', 'favorite_number': 7, 'favorite_color': 'red'}