如何在 ProtobufAnnotationSerializer 中获取 protobuf 扩展字段
How to get protobuf extension field in ProtobufAnnotationSerializer
我是 protocol-buffers 的新手,正在尝试弄清楚如何在 Stanford CoreNLP 库中扩展消息类型,如下所述:https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializer.html
问题:我可以设置扩展字段,但我无法获取。我把问题归结为下面的代码。在原始消息中,字段名称是 [edu.stanford.nlp.pipeline.myNewField]
,但在反序列化消息中被字段编号 101
替换。
如何获取 myNewField 的值?
PS:这个post建议应该和调用getExtension(MyAppProtos.myNewField)
一样简单
custom.proto
syntax = "proto2";
package edu.stanford.nlp.pipeline;
option java_package = "com.example.my.awesome.nlp.app";
option java_outer_classname = "MyAppProtos";
import "CoreNLP.proto";
extend Sentence {
optional uint32 myNewField = 101;
}
ProtoTest.java
import com.example.my.awesome.nlp.app.MyAppProtos;
import com.google.protobuf.ExtensionRegistry;
import com.google.protobuf.InvalidProtocolBufferException;
import edu.stanford.nlp.pipeline.CoreNLPProtos;
import edu.stanford.nlp.pipeline.CoreNLPProtos.Sentence;
public class ProtoTest {
static {
ExtensionRegistry registry = ExtensionRegistry.newInstance();
registry.add(MyAppProtos.myNewField);
CoreNLPProtos.registerAllExtensions(registry);
}
public static void main(String[] args) throws InvalidProtocolBufferException {
Sentence originalSentence = Sentence.newBuilder()
.setText("Hello world!")
.setTokenOffsetBegin(0)
.setTokenOffsetEnd(12)
.setExtension(MyAppProtos.myNewField, 13)
.build();
System.out.println("Original:\n" + originalSentence);
byte[] serialized = originalSentence.toByteArray();
Sentence deserializedSentence = Sentence.parseFrom(serialized);
System.out.println("Deserialized:\n" + deserializedSentence);
Integer myNewField = deserializedSentence.getExtension(MyAppProtos.myNewField);
System.out.println("MyNewField: " + myNewField);
}
}
输出:
Original:
tokenOffsetBegin: 0
tokenOffsetEnd: 12
text: "Hello world!"
[edu.stanford.nlp.pipeline.myNewField]: 13
Deserialized:
tokenOffsetBegin: 0
tokenOffsetEnd: 12
text: "Hello world!"
101: 13
MyNewField: 0
更新
因为这个问题是关于扩展 CoreNLP 消息类型并将它们与 ProtobufAnnotationSerializer
一起使用,所以这是我的扩展序列化程序的样子:
import java.io.IOException;
import java.io.InputStream;
import java.util.Set;
import com.example.my.awesome.nlp.app.MyAppProtos;
import com.google.protobuf.ExtensionRegistry;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.CoreNLPProtos;
import edu.stanford.nlp.pipeline.CoreNLPProtos.Sentence;
import edu.stanford.nlp.pipeline.CoreNLPProtos.Sentence.Builder;
import edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Pair;
public class MySerializer extends ProtobufAnnotationSerializer {
private static ExtensionRegistry registry;
static {
registry = ExtensionRegistry.newInstance();
registry.add(MyAppProtos.myNewField);
CoreNLPProtos.registerAllExtensions(registry);
}
@Override
protected Builder toProtoBuilder(CoreMap sentence, Set<Class<?>> keysToSerialize) {
keysToSerialize.remove(MyAnnotation.class);
Builder builder = super.toProtoBuilder(sentence, keysToSerialize);
builder.setExtension(MyAppProtos.myNewField, 13);
return builder;
}
@Override
public Pair<Annotation, InputStream> read(InputStream is)
throws IOException, ClassNotFoundException, ClassCastException {
CoreNLPProtos.Document doc = CoreNLPProtos.Document.parseDelimitedFrom(is, registry);
return Pair.makePair(fromProto(doc), is);
}
@Override
protected CoreMap fromProtoNoTokens(Sentence proto) {
CoreMap result = super.fromProtoNoTokens(proto);
result.set(MyAnnotation.class, proto.getExtension(MyAppProtos.myNewField));
return result;
}
}
错误是我没有提供带有扩展注册表的 parseFrom
调用。
将 Sentence deserializedSentence = Sentence.parseFrom(serialized);
更改为 Sentence deserializedSentence = Sentence.parseFrom(serialized, registry);
成功了!
我是 protocol-buffers 的新手,正在尝试弄清楚如何在 Stanford CoreNLP 库中扩展消息类型,如下所述:https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializer.html
问题:我可以设置扩展字段,但我无法获取。我把问题归结为下面的代码。在原始消息中,字段名称是 [edu.stanford.nlp.pipeline.myNewField]
,但在反序列化消息中被字段编号 101
替换。
如何获取 myNewField 的值?
PS:这个post建议应该和调用getExtension(MyAppProtos.myNewField)
custom.proto
syntax = "proto2";
package edu.stanford.nlp.pipeline;
option java_package = "com.example.my.awesome.nlp.app";
option java_outer_classname = "MyAppProtos";
import "CoreNLP.proto";
extend Sentence {
optional uint32 myNewField = 101;
}
ProtoTest.java
import com.example.my.awesome.nlp.app.MyAppProtos;
import com.google.protobuf.ExtensionRegistry;
import com.google.protobuf.InvalidProtocolBufferException;
import edu.stanford.nlp.pipeline.CoreNLPProtos;
import edu.stanford.nlp.pipeline.CoreNLPProtos.Sentence;
public class ProtoTest {
static {
ExtensionRegistry registry = ExtensionRegistry.newInstance();
registry.add(MyAppProtos.myNewField);
CoreNLPProtos.registerAllExtensions(registry);
}
public static void main(String[] args) throws InvalidProtocolBufferException {
Sentence originalSentence = Sentence.newBuilder()
.setText("Hello world!")
.setTokenOffsetBegin(0)
.setTokenOffsetEnd(12)
.setExtension(MyAppProtos.myNewField, 13)
.build();
System.out.println("Original:\n" + originalSentence);
byte[] serialized = originalSentence.toByteArray();
Sentence deserializedSentence = Sentence.parseFrom(serialized);
System.out.println("Deserialized:\n" + deserializedSentence);
Integer myNewField = deserializedSentence.getExtension(MyAppProtos.myNewField);
System.out.println("MyNewField: " + myNewField);
}
}
输出:
Original:
tokenOffsetBegin: 0
tokenOffsetEnd: 12
text: "Hello world!"
[edu.stanford.nlp.pipeline.myNewField]: 13
Deserialized:
tokenOffsetBegin: 0
tokenOffsetEnd: 12
text: "Hello world!"
101: 13
MyNewField: 0
更新
因为这个问题是关于扩展 CoreNLP 消息类型并将它们与 ProtobufAnnotationSerializer
一起使用,所以这是我的扩展序列化程序的样子:
import java.io.IOException;
import java.io.InputStream;
import java.util.Set;
import com.example.my.awesome.nlp.app.MyAppProtos;
import com.google.protobuf.ExtensionRegistry;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.CoreNLPProtos;
import edu.stanford.nlp.pipeline.CoreNLPProtos.Sentence;
import edu.stanford.nlp.pipeline.CoreNLPProtos.Sentence.Builder;
import edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Pair;
public class MySerializer extends ProtobufAnnotationSerializer {
private static ExtensionRegistry registry;
static {
registry = ExtensionRegistry.newInstance();
registry.add(MyAppProtos.myNewField);
CoreNLPProtos.registerAllExtensions(registry);
}
@Override
protected Builder toProtoBuilder(CoreMap sentence, Set<Class<?>> keysToSerialize) {
keysToSerialize.remove(MyAnnotation.class);
Builder builder = super.toProtoBuilder(sentence, keysToSerialize);
builder.setExtension(MyAppProtos.myNewField, 13);
return builder;
}
@Override
public Pair<Annotation, InputStream> read(InputStream is)
throws IOException, ClassNotFoundException, ClassCastException {
CoreNLPProtos.Document doc = CoreNLPProtos.Document.parseDelimitedFrom(is, registry);
return Pair.makePair(fromProto(doc), is);
}
@Override
protected CoreMap fromProtoNoTokens(Sentence proto) {
CoreMap result = super.fromProtoNoTokens(proto);
result.set(MyAnnotation.class, proto.getExtension(MyAppProtos.myNewField));
return result;
}
}
错误是我没有提供带有扩展注册表的 parseFrom
调用。
将 Sentence deserializedSentence = Sentence.parseFrom(serialized);
更改为 Sentence deserializedSentence = Sentence.parseFrom(serialized, registry);
成功了!