将 xml 转换为流并禁用 dtd 中断反序列化
Converting xml to stream and disabling dtd breaking Deserialization
我创建了以下包装方法来禁用 DTD
public class Program
{
public static void Main(string[] args)
{
string s = @"<?xml version =""1.0"" encoding=""utf-16""?>
<ArrayOfSerializingTemplateItem xmlns:xsd=""http://www.w3.org/2001/XMLSchema"" xmlns:xsi=""http://www.w3.org/2001/XMLSchema-instance"">
<SerializingTemplateItem>
</SerializingTemplateItem>
</ArrayOfSerializingTemplateItem >";
try
{
XmlReader reader = XmlWrapper.CreateXmlReaderObject(s);
XmlSerializer sr = new XmlSerializer(typeof(List<SerializingTemplateItem>));
Object ob = sr.Deserialize(reader);
}
catch (Exception ex)
{
Console.WriteLine(ex);
throw;
}
Console.ReadLine();
}
}
public class XmlWrapper
{
public static XmlReader CreateXmlReaderObject(string sr)
{
byte[] byteArray = Encoding.UTF8.GetBytes(sr);
MemoryStream stream = new MemoryStream(byteArray);
stream.Position = 0;
XmlReaderSettings settings = new XmlReaderSettings();
settings.ValidationType = ValidationType.None;
settings.DtdProcessing = DtdProcessing.Ignore;
return XmlReader.Create(stream, settings);
}
}
public class SerializingTemplateItem
{
}
以上抛出异常"There is no Unicode byte order mark. Cannot switch to Unicode."(此处演示fiddle:https://dotnetfiddle.net/pGxOE9)。
但是如果我使用下面的代码创建 XmlReader
而不是调用 XmlWrapper 方法。效果很好。
StringReader stringReader = new StringReader( xml );
XmlReader reader = new XmlTextReader( stringReader );
但我需要使用包装器方法作为禁用 DTD 的安全要求。我不知道为什么在调用包装器方法后无法反序列化。任何帮助将不胜感激。
您的问题是您使用 Encoding.UTF8
将 XML 编码为 MemoryStream
,但 XML 字符串本身声称在 [=21= 中编码]:
<?xml version ="1.0" encoding="utf-16"?>
<ArrayOfSerializingTemplateItem>
<!-- Content omitted -->
</ArrayOfSerializingTemplateItem >
显然,当 XmlReader
encounters this declaration, it tries honor the declaration and switch from UTF-8 to UTF-16 but fails for some reason - possibly because the stream really is encoded in UTF-8. Conversely when the deprecated XmlTextReader
遇到声明时,它显然只是将其忽略为未实现,这恰好导致事情在这种情况下成功运行。
解决此问题的最简单方法是 使用 StringReader
和 XmlReader.Create(TextReader, XmlReaderSettings)
:
直接从字符串中读取
public class XmlWrapper
{
public static XmlReader CreateXmlReaderObject(string sr)
{
var settings = new XmlReaderSettings
{
ValidationType = ValidationType.None,
DtdProcessing = DtdProcessing.Ignore,
};
return XmlReader.Create(new StringReader(sr), settings);
}
}
由于 c# 字符串始终 在内部以 UTF-16 编码,XML 中的编码语句将被忽略为无关紧要。这也将提高性能,因为完全跳过了到中间字节数组的转换。
顺便说一下,您应该通过 using
语句处理您的 XmlReader
:
Object ob;
using (var reader = XmlWrapper.CreateXmlReaderObject(s))
{
XmlSerializer sr = new XmlSerializer(typeof(List<SerializingTemplateItem>));
ob = sr.Deserialize(reader);
}
工作样本 fiddle here.
相关问题:
- Meaning of - <?xml version="1.0" encoding="utf-8"?>
- Ignoring specified encoding when deserializing XML
我创建了以下包装方法来禁用 DTD
public class Program
{
public static void Main(string[] args)
{
string s = @"<?xml version =""1.0"" encoding=""utf-16""?>
<ArrayOfSerializingTemplateItem xmlns:xsd=""http://www.w3.org/2001/XMLSchema"" xmlns:xsi=""http://www.w3.org/2001/XMLSchema-instance"">
<SerializingTemplateItem>
</SerializingTemplateItem>
</ArrayOfSerializingTemplateItem >";
try
{
XmlReader reader = XmlWrapper.CreateXmlReaderObject(s);
XmlSerializer sr = new XmlSerializer(typeof(List<SerializingTemplateItem>));
Object ob = sr.Deserialize(reader);
}
catch (Exception ex)
{
Console.WriteLine(ex);
throw;
}
Console.ReadLine();
}
}
public class XmlWrapper
{
public static XmlReader CreateXmlReaderObject(string sr)
{
byte[] byteArray = Encoding.UTF8.GetBytes(sr);
MemoryStream stream = new MemoryStream(byteArray);
stream.Position = 0;
XmlReaderSettings settings = new XmlReaderSettings();
settings.ValidationType = ValidationType.None;
settings.DtdProcessing = DtdProcessing.Ignore;
return XmlReader.Create(stream, settings);
}
}
public class SerializingTemplateItem
{
}
以上抛出异常"There is no Unicode byte order mark. Cannot switch to Unicode."(此处演示fiddle:https://dotnetfiddle.net/pGxOE9)。
但是如果我使用下面的代码创建 XmlReader
而不是调用 XmlWrapper 方法。效果很好。
StringReader stringReader = new StringReader( xml );
XmlReader reader = new XmlTextReader( stringReader );
但我需要使用包装器方法作为禁用 DTD 的安全要求。我不知道为什么在调用包装器方法后无法反序列化。任何帮助将不胜感激。
您的问题是您使用 Encoding.UTF8
将 XML 编码为 MemoryStream
,但 XML 字符串本身声称在 [=21= 中编码]:
<?xml version ="1.0" encoding="utf-16"?>
<ArrayOfSerializingTemplateItem>
<!-- Content omitted -->
</ArrayOfSerializingTemplateItem >
显然,当 XmlReader
encounters this declaration, it tries honor the declaration and switch from UTF-8 to UTF-16 but fails for some reason - possibly because the stream really is encoded in UTF-8. Conversely when the deprecated XmlTextReader
遇到声明时,它显然只是将其忽略为未实现,这恰好导致事情在这种情况下成功运行。
解决此问题的最简单方法是 使用 StringReader
和 XmlReader.Create(TextReader, XmlReaderSettings)
:
public class XmlWrapper
{
public static XmlReader CreateXmlReaderObject(string sr)
{
var settings = new XmlReaderSettings
{
ValidationType = ValidationType.None,
DtdProcessing = DtdProcessing.Ignore,
};
return XmlReader.Create(new StringReader(sr), settings);
}
}
由于 c# 字符串始终 在内部以 UTF-16 编码,XML 中的编码语句将被忽略为无关紧要。这也将提高性能,因为完全跳过了到中间字节数组的转换。
顺便说一下,您应该通过 using
语句处理您的 XmlReader
:
Object ob;
using (var reader = XmlWrapper.CreateXmlReaderObject(s))
{
XmlSerializer sr = new XmlSerializer(typeof(List<SerializingTemplateItem>));
ob = sr.Deserialize(reader);
}
工作样本 fiddle here.
相关问题:
- Meaning of - <?xml version="1.0" encoding="utf-8"?>
- Ignoring specified encoding when deserializing XML