VDS.RDF.Parsing.RdfXmlParser 缺少加载(IRdfHandler 处理程序、XmlDocument 文档)
VDS.RDF.Parsing.RdfXmlParser Load(IRdfHandler handler, XmlDocument document) missing
我正在寻找具有上述签名的重载。
我需要从 XmlDocument
加载,因为直接从 owl 文件加载或通过流加载会导致错误:
"The input document has exceeded a limit set by MaxCharactersFromEntities."
有什么我不知道的显而易见的事情吗?
谢谢,
一月
编辑 1 - 添加显示异常的代码
我尝试解析 cell line ontology (~100MB)。因为我只需要一些特定的内容,所以我想使用一个处理程序来专注于有趣的东西。为了演示我的问题,我使用 CountHandler
private static void loadCellLineOntology()
{
try
{
var settings = new System.Xml.XmlReaderSettings()
{
MaxCharactersFromEntities = 0,
DtdProcessing = System.Xml.DtdProcessing.Parse
};
var doc = new System.Xml.XmlDocument();
var parser = new VDS.RDF.Parsing.RdfXmlParser(VDS.RDF.Parsing.RdfXmlParserMode.DOM);
//using (var stream = new System.IO.FileStream(@"C:\Users\jan.hummel\Downloads\clo.owl", System.IO.FileMode.Open))
//using (var reader = System.Xml.XmlReader.Create(stream, settings))
using (IGraph g = new NonIndexedGraph())
{
//doc.Load(reader);
//parser.Load(g, @"C:\Users\jahu\Downloads\clo.owl");
var handler = new VDS.RDF.Parsing.Handlers.CountHandler();
parser.Load(handler, @"C:\Users\jahu\Downloads\clo.owl");
//parser.Load(handler, doc);
}
}
catch (Exception ex)
{
Debugger.Break();
}
}
没有什么明显的。您正在寻找的重载不存在,并且 RDF/XML 解析器基础结构不允许您设置 XmlReaderSettings.MaxCharactersFromEntities
.
我能够通过复制解析器的相关部分尽可能地更改该设置来解决这个问题。请注意,这依赖于内部实现细节,因此所有私有调度都使用 Reflection
.
有趣的是 CellLineOntology.RdfXmlParser.Context.Generator.ctor(Stream)
。
如果您有以下代码,可以拨打
var handler = new VDS.RDF.Parsing.Handlers.CountHandler();
CellLineOntology.RdfXmlParser.Load(handler, @"..\..\..\..\clo.owl");
我使用您链接的文件得到了 1,387,097 个语句。
namespace CellLineOntology
{
using System;
using System.IO;
using System.Reflection;
using System.Xml;
using VDS.RDF;
using VDS.RDF.Parsing.Contexts;
using VDS.RDF.Parsing.Events;
using VDS.RDF.Parsing.Events.RdfXml;
using VDS.RDF.Parsing.Handlers;
internal class RdfXmlParser
{
public static void Load(IRdfHandler handler, string filename)
{
using (var input = File.OpenRead(filename))
{
Parse(new Context(handler, input));
}
}
private static void Parse(RdfXmlParserContext context) => typeof(VDS.RDF.Parsing.RdfXmlParser).GetMethod("Parse", BindingFlags.Instance | BindingFlags.NonPublic).Invoke(new VDS.RDF.Parsing.RdfXmlParser(), new[] { context });
private class Context : RdfXmlParserContext
{
private IEventQueue<IRdfXmlEvent> _queue
{
set => typeof(RdfXmlParserContext).GetField("_queue", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(this, value);
}
public Context(IRdfHandler handler, Stream input)
: base(handler, Stream.Null)
{
_queue = new StreamingEventQueue<IRdfXmlEvent>(new Generator(input, ToSafeString(GetBaseUri(handler))));
}
private static Uri GetBaseUri(IRdfHandler handler) => (Uri)typeof(HandlerExtensions).GetMethod("GetBaseUri", BindingFlags.Static | BindingFlags.NonPublic).Invoke(null, new[] { handler });
private static string ToSafeString(Uri uri) => (uri == null) ? string.Empty : uri.AbsoluteUri;
private class Generator : StreamingEventGenerator
{
private XmlReader _reader
{
set => typeof(StreamingEventGenerator).GetField("_reader", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(this, value);
}
private bool _hasLineInfo
{
set => typeof(StreamingEventGenerator).GetField("_hasLineInfo", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(this, value);
}
private string _currentBaseUri
{
set => typeof(StreamingEventGenerator).GetField("_currentBaseUri", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(this, value);
}
public Generator(Stream stream)
: base(Stream.Null)
{
var settings = GetSettings();
// This is why we're here
settings.MaxCharactersFromEntities = 0;
var reader = XmlReader.Create(stream, settings);
_reader = reader;
_hasLineInfo = reader is IXmlLineInfo;
}
public Generator(Stream stream, string baseUri)
: this(stream)
{
_currentBaseUri = baseUri;
}
private XmlReaderSettings GetSettings() => (XmlReaderSettings)typeof(StreamingEventGenerator).GetMethod("GetSettings", BindingFlags.Instance | BindingFlags.NonPublic).Invoke(this, null);
}
}
}
}
我正在寻找具有上述签名的重载。
我需要从 XmlDocument
加载,因为直接从 owl 文件加载或通过流加载会导致错误:
"The input document has exceeded a limit set by MaxCharactersFromEntities."
有什么我不知道的显而易见的事情吗?
谢谢, 一月
编辑 1 - 添加显示异常的代码
我尝试解析 cell line ontology (~100MB)。因为我只需要一些特定的内容,所以我想使用一个处理程序来专注于有趣的东西。为了演示我的问题,我使用 CountHandler
private static void loadCellLineOntology()
{
try
{
var settings = new System.Xml.XmlReaderSettings()
{
MaxCharactersFromEntities = 0,
DtdProcessing = System.Xml.DtdProcessing.Parse
};
var doc = new System.Xml.XmlDocument();
var parser = new VDS.RDF.Parsing.RdfXmlParser(VDS.RDF.Parsing.RdfXmlParserMode.DOM);
//using (var stream = new System.IO.FileStream(@"C:\Users\jan.hummel\Downloads\clo.owl", System.IO.FileMode.Open))
//using (var reader = System.Xml.XmlReader.Create(stream, settings))
using (IGraph g = new NonIndexedGraph())
{
//doc.Load(reader);
//parser.Load(g, @"C:\Users\jahu\Downloads\clo.owl");
var handler = new VDS.RDF.Parsing.Handlers.CountHandler();
parser.Load(handler, @"C:\Users\jahu\Downloads\clo.owl");
//parser.Load(handler, doc);
}
}
catch (Exception ex)
{
Debugger.Break();
}
}
没有什么明显的。您正在寻找的重载不存在,并且 RDF/XML 解析器基础结构不允许您设置 XmlReaderSettings.MaxCharactersFromEntities
.
我能够通过复制解析器的相关部分尽可能地更改该设置来解决这个问题。请注意,这依赖于内部实现细节,因此所有私有调度都使用 Reflection
.
有趣的是 CellLineOntology.RdfXmlParser.Context.Generator.ctor(Stream)
。
如果您有以下代码,可以拨打
var handler = new VDS.RDF.Parsing.Handlers.CountHandler();
CellLineOntology.RdfXmlParser.Load(handler, @"..\..\..\..\clo.owl");
我使用您链接的文件得到了 1,387,097 个语句。
namespace CellLineOntology
{
using System;
using System.IO;
using System.Reflection;
using System.Xml;
using VDS.RDF;
using VDS.RDF.Parsing.Contexts;
using VDS.RDF.Parsing.Events;
using VDS.RDF.Parsing.Events.RdfXml;
using VDS.RDF.Parsing.Handlers;
internal class RdfXmlParser
{
public static void Load(IRdfHandler handler, string filename)
{
using (var input = File.OpenRead(filename))
{
Parse(new Context(handler, input));
}
}
private static void Parse(RdfXmlParserContext context) => typeof(VDS.RDF.Parsing.RdfXmlParser).GetMethod("Parse", BindingFlags.Instance | BindingFlags.NonPublic).Invoke(new VDS.RDF.Parsing.RdfXmlParser(), new[] { context });
private class Context : RdfXmlParserContext
{
private IEventQueue<IRdfXmlEvent> _queue
{
set => typeof(RdfXmlParserContext).GetField("_queue", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(this, value);
}
public Context(IRdfHandler handler, Stream input)
: base(handler, Stream.Null)
{
_queue = new StreamingEventQueue<IRdfXmlEvent>(new Generator(input, ToSafeString(GetBaseUri(handler))));
}
private static Uri GetBaseUri(IRdfHandler handler) => (Uri)typeof(HandlerExtensions).GetMethod("GetBaseUri", BindingFlags.Static | BindingFlags.NonPublic).Invoke(null, new[] { handler });
private static string ToSafeString(Uri uri) => (uri == null) ? string.Empty : uri.AbsoluteUri;
private class Generator : StreamingEventGenerator
{
private XmlReader _reader
{
set => typeof(StreamingEventGenerator).GetField("_reader", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(this, value);
}
private bool _hasLineInfo
{
set => typeof(StreamingEventGenerator).GetField("_hasLineInfo", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(this, value);
}
private string _currentBaseUri
{
set => typeof(StreamingEventGenerator).GetField("_currentBaseUri", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(this, value);
}
public Generator(Stream stream)
: base(Stream.Null)
{
var settings = GetSettings();
// This is why we're here
settings.MaxCharactersFromEntities = 0;
var reader = XmlReader.Create(stream, settings);
_reader = reader;
_hasLineInfo = reader is IXmlLineInfo;
}
public Generator(Stream stream, string baseUri)
: this(stream)
{
_currentBaseUri = baseUri;
}
private XmlReaderSettings GetSettings() => (XmlReaderSettings)typeof(StreamingEventGenerator).GetMethod("GetSettings", BindingFlags.Instance | BindingFlags.NonPublic).Invoke(this, null);
}
}
}
}