XML - System.Xml.XmlException - 十六进制值 0x06
XML - System.Xml.XmlException - hexadecimal value 0x06
我收到这个错误。后来查了下我的XML里面出现非法字符的原因和解决办法。但我无权编辑这些文件中的任何一个。我的工作是读取和获取标签值、属性值和类似的东西。所以我不能用像'\x01'这样的转义符替换二进制字符和。我还尝试在 XML 阅读器设置中包含 CheckCharacters =false。它不需要这个。它仍然抛出相同的错误。
无法在XML阅读器中修复吗?我阅读了 XMLtextReader。它可以跳过异常。但我已经使用 XMLreader 为我的所有功能编写了代码。如果我能为此找到解决方案,那就太好了。否则我将不得不更改我的所有代码。
我的代码:
private void button1_Click(object sender, EventArgs e)
{
int i = 0;
var filenames = System.IO.Directory
.EnumerateFiles(textBox1.Text, "*.xml", System.IO.SearchOption.AllDirectories)
.Select(System.IO.Path.GetFullPath);
foreach (var f in filenames)
{
var resolver = new XmlUrlOverrideResolver();
resolver.DtdFileMap[@"X1.DTD"] = @"\location\X1.DTD";
resolver.DtdFileMap[@"R2.DTD"] = @"\location\X2.DTD";
resolver.DtdFileMap[@"R5.DTD"] = @"\location\R5.DTD";
XmlReaderSettings settings = new XmlReaderSettings();
settings.DtdProcessing = DtdProcessing.Parse;
settings.XmlResolver = resolver;
XmlReader doc = XmlReader.Create(f, settings);
while (doc.Read())
{
if ((doc.NodeType == XmlNodeType.Element) && (doc.Name == "ap"))
{
if (doc.HasAttributes)
{
String fin = doc.GetAttribute("ap");
if (fin == "no")
{
String[] array = new String[10000];
array[i] = (f);
File.AppendAllText(@"\location\NAPP.txt", array[i] + Environment.NewLine);
i++;
}
else
{
String[] abs = new String[10000];
abs[i] = (f);
File.AppendAllText(@"\location\APP.txt", abs[i] + Environment.NewLine);
i++;
}
}
}
}
}
MessageBox.Show("Done");
}
这是一个非常简单的字符 "filter" 示例,它将用 space:
替换 0x06 字符
public class MyStreamReader : StreamReader {
public MyStreamReader(string path)
: base(path) {
}
public override int Read(char[] buffer, int index, int count) {
int res = base.Read(buffer, index, count);
for (int i = 0; i < res; i++) {
if (buffer[i] == 0x06) {
buffer[i] = ' ';
}
}
return res;
}
}
你这样使用它:
using (var sr = new MyStreamReader(f)) {
var doc = XmlReader.Create(sr, settings);
请注意,它非常简单,因为它是将一个字符 (0x06) 替换为具有相同 "length" 的另一个字符 (space)。如果你想用 "sequence" 个字符替换一个字符(以转义它),它会变得更复杂(并非不可能,30 分钟的工作很困难)
(我已经检查过了,似乎 XmlTextReader
只使用了那个方法而不是 Read()
方法)
一如既往,当程序员告诉你 30 分钟时,它意味着 0 分钟或 2 小时:-)
这是"more complex" ReplacingStreamReader
:
/// <summary>
/// Only the Read methods are supported!
/// </summary>
public class ReplacingStreamReader : StreamReader
{
public ReplacingStreamReader(string path)
: base(path)
{
}
public Func<char, string> ReplaceWith { get; set; }
protected char[] RemainingChars { get; set; }
protected int RemainingCharsIndex { get; set; }
public override int Read()
{
int ch;
if (RemainingChars != null)
{
ch = RemainingChars[RemainingCharsIndex];
RemainingCharsIndex++;
if (RemainingCharsIndex == RemainingChars.Length)
{
RemainingCharsIndex = 0;
RemainingChars = null;
}
}
else
{
ch = base.Read();
if (ch != -1)
{
string replace = ReplaceWith((char)ch);
if (replace == null)
{
// Do nothing
}
else if (replace.Length == 1)
{
ch = replace[0];
}
else
{
ch = replace[0];
RemainingChars = replace.ToCharArray(1, replace.Length - 1);
RemainingCharsIndex = 0;
}
}
}
return ch;
}
public override int Read(char[] buffer, int index, int count)
{
int res = 0;
// We leave error handling to the StreamReader :-)
// We handle only "working" parameters
if (RemainingChars != null && buffer != null && index >= 0 && count > 0 && index + count <= buffer.Length)
{
int remainingCharsCount = RemainingChars.Length - RemainingCharsIndex;
res = Math.Min(remainingCharsCount, count);
Array.Copy(RemainingChars, RemainingCharsIndex, buffer, index, res);
RemainingCharsIndex += res;
if (RemainingCharsIndex == RemainingChars.Length)
{
RemainingCharsIndex = 0;
RemainingChars = null;
}
if (res == count)
{
return res;
}
index += res;
count -= res;
}
while (true)
{
List<char> sb = null;
int res2 = base.Read(buffer, index, count);
if (res2 == 0 || ReplaceWith == null)
{
return res;
}
int j = 0;
for (int i = 0; i < res2; i++)
{
char ch = buffer[index + i];
string replace = ReplaceWith(ch);
if (sb != null)
{
if (replace == null)
{
sb.Add(ch);
}
else
{
sb.AddRange(replace);
}
}
else if (replace == null)
{
buffer[j] = ch;
j++;
}
else if (replace.Length == 1)
{
buffer[j] = replace[0];
j++;
}
else if (replace.Length == 0)
{
// We do not advance
}
else
{
sb = new List<char>();
sb.AddRange(replace);
}
}
res2 = j;
if (sb != null)
{
int res3 = Math.Min(sb.Count, count - res2);
sb.CopyTo(0, buffer, index + res2, res3);
if (res3 < sb.Count)
{
RemainingChars = new char[sb.Count - res3];
RemainingCharsIndex = 0;
sb.CopyTo(res3, RemainingChars, 0, RemainingChars.Length);
}
res += res3;
}
else
{
res2 = j;
// Can't happen if sb != null (at least a character must
// have been added)
if (res2 == 0)
{
continue;
}
}
res += res2;
return res;
}
}
}
像这样使用它:
using (var sr = new ReplacingStreamReader(f))
{
sr.ReplaceWith = x =>
{
return x == 0x6 ? " " : null;
// return x == '.' ? " " : null; // Replace all . with
};
var doc = XmlReader.Create(sr, settings);
请注意 ReplacingStreamReader
不会 "know" 它正在修改 xml 的哪一部分,因此很少有 "blind" 替换是可以的 :-) 其他超出此限制,您可以用任何字符串替换任何字符(ReplaceWith
中的 null
表示 "keep the current character",相当于给定示例中的 x.ToString()
。返回 string.Empty
有效,表示删除当前字符)。
class 非常有趣:它保留 char[] RemainingChars
包含已读取(并由 ReplaceWith
过滤)但尚未被 [=返回的字符=15=] 方法,因为传递的缓冲区太小(ReplaceWith
方法可以 "enlarge" 读取字符串,使其对于 buffer
来说太大了!)。请注意 sb
是 List<char>
而不是 StringBuilder
。可能使用一个或另一个在代码方面几乎是等效的。
你可以先把内容读入string
替换(转义)内容,然后载入XmlReader
:
foreach (var f in filenames) {
string text;
using (StreamReader s = new StreamReader(f,Encoding.UTF8)) {
text = s.ReadToEnd();
}
text = text.Replace("\x01",@""); //replace the content
//load some settings
var resolver = new XmlUrlOverrideResolver();
resolver.DtdFileMap[@"X1.DTD"] = @"\location\X1.DTD";
resolver.DtdFileMap[@"R2.DTD"] = @"\location\X2.DTD";
resolver.DtdFileMap[@"R5.DTD"] = @"\location\R5.DTD";
XmlReaderSettings settings = new XmlReaderSettings();
settings.DtdProcessing = DtdProcessing.Parse;
settings.XmlResolver = resolver;
XmlReader doc = XmlReader.Create(text, settings);
//perform processing task
//...
}
我收到这个错误。后来查了下我的XML里面出现非法字符的原因和解决办法。但我无权编辑这些文件中的任何一个。我的工作是读取和获取标签值、属性值和类似的东西。所以我不能用像'\x01'这样的转义符替换二进制字符和。我还尝试在 XML 阅读器设置中包含 CheckCharacters =false。它不需要这个。它仍然抛出相同的错误。
无法在XML阅读器中修复吗?我阅读了 XMLtextReader。它可以跳过异常。但我已经使用 XMLreader 为我的所有功能编写了代码。如果我能为此找到解决方案,那就太好了。否则我将不得不更改我的所有代码。
我的代码:
private void button1_Click(object sender, EventArgs e)
{
int i = 0;
var filenames = System.IO.Directory
.EnumerateFiles(textBox1.Text, "*.xml", System.IO.SearchOption.AllDirectories)
.Select(System.IO.Path.GetFullPath);
foreach (var f in filenames)
{
var resolver = new XmlUrlOverrideResolver();
resolver.DtdFileMap[@"X1.DTD"] = @"\location\X1.DTD";
resolver.DtdFileMap[@"R2.DTD"] = @"\location\X2.DTD";
resolver.DtdFileMap[@"R5.DTD"] = @"\location\R5.DTD";
XmlReaderSettings settings = new XmlReaderSettings();
settings.DtdProcessing = DtdProcessing.Parse;
settings.XmlResolver = resolver;
XmlReader doc = XmlReader.Create(f, settings);
while (doc.Read())
{
if ((doc.NodeType == XmlNodeType.Element) && (doc.Name == "ap"))
{
if (doc.HasAttributes)
{
String fin = doc.GetAttribute("ap");
if (fin == "no")
{
String[] array = new String[10000];
array[i] = (f);
File.AppendAllText(@"\location\NAPP.txt", array[i] + Environment.NewLine);
i++;
}
else
{
String[] abs = new String[10000];
abs[i] = (f);
File.AppendAllText(@"\location\APP.txt", abs[i] + Environment.NewLine);
i++;
}
}
}
}
}
MessageBox.Show("Done");
}
这是一个非常简单的字符 "filter" 示例,它将用 space:
替换 0x06 字符public class MyStreamReader : StreamReader {
public MyStreamReader(string path)
: base(path) {
}
public override int Read(char[] buffer, int index, int count) {
int res = base.Read(buffer, index, count);
for (int i = 0; i < res; i++) {
if (buffer[i] == 0x06) {
buffer[i] = ' ';
}
}
return res;
}
}
你这样使用它:
using (var sr = new MyStreamReader(f)) {
var doc = XmlReader.Create(sr, settings);
请注意,它非常简单,因为它是将一个字符 (0x06) 替换为具有相同 "length" 的另一个字符 (space)。如果你想用 "sequence" 个字符替换一个字符(以转义它),它会变得更复杂(并非不可能,30 分钟的工作很困难)
(我已经检查过了,似乎 XmlTextReader
只使用了那个方法而不是 Read()
方法)
一如既往,当程序员告诉你 30 分钟时,它意味着 0 分钟或 2 小时:-)
这是"more complex" ReplacingStreamReader
:
/// <summary>
/// Only the Read methods are supported!
/// </summary>
public class ReplacingStreamReader : StreamReader
{
public ReplacingStreamReader(string path)
: base(path)
{
}
public Func<char, string> ReplaceWith { get; set; }
protected char[] RemainingChars { get; set; }
protected int RemainingCharsIndex { get; set; }
public override int Read()
{
int ch;
if (RemainingChars != null)
{
ch = RemainingChars[RemainingCharsIndex];
RemainingCharsIndex++;
if (RemainingCharsIndex == RemainingChars.Length)
{
RemainingCharsIndex = 0;
RemainingChars = null;
}
}
else
{
ch = base.Read();
if (ch != -1)
{
string replace = ReplaceWith((char)ch);
if (replace == null)
{
// Do nothing
}
else if (replace.Length == 1)
{
ch = replace[0];
}
else
{
ch = replace[0];
RemainingChars = replace.ToCharArray(1, replace.Length - 1);
RemainingCharsIndex = 0;
}
}
}
return ch;
}
public override int Read(char[] buffer, int index, int count)
{
int res = 0;
// We leave error handling to the StreamReader :-)
// We handle only "working" parameters
if (RemainingChars != null && buffer != null && index >= 0 && count > 0 && index + count <= buffer.Length)
{
int remainingCharsCount = RemainingChars.Length - RemainingCharsIndex;
res = Math.Min(remainingCharsCount, count);
Array.Copy(RemainingChars, RemainingCharsIndex, buffer, index, res);
RemainingCharsIndex += res;
if (RemainingCharsIndex == RemainingChars.Length)
{
RemainingCharsIndex = 0;
RemainingChars = null;
}
if (res == count)
{
return res;
}
index += res;
count -= res;
}
while (true)
{
List<char> sb = null;
int res2 = base.Read(buffer, index, count);
if (res2 == 0 || ReplaceWith == null)
{
return res;
}
int j = 0;
for (int i = 0; i < res2; i++)
{
char ch = buffer[index + i];
string replace = ReplaceWith(ch);
if (sb != null)
{
if (replace == null)
{
sb.Add(ch);
}
else
{
sb.AddRange(replace);
}
}
else if (replace == null)
{
buffer[j] = ch;
j++;
}
else if (replace.Length == 1)
{
buffer[j] = replace[0];
j++;
}
else if (replace.Length == 0)
{
// We do not advance
}
else
{
sb = new List<char>();
sb.AddRange(replace);
}
}
res2 = j;
if (sb != null)
{
int res3 = Math.Min(sb.Count, count - res2);
sb.CopyTo(0, buffer, index + res2, res3);
if (res3 < sb.Count)
{
RemainingChars = new char[sb.Count - res3];
RemainingCharsIndex = 0;
sb.CopyTo(res3, RemainingChars, 0, RemainingChars.Length);
}
res += res3;
}
else
{
res2 = j;
// Can't happen if sb != null (at least a character must
// have been added)
if (res2 == 0)
{
continue;
}
}
res += res2;
return res;
}
}
}
像这样使用它:
using (var sr = new ReplacingStreamReader(f))
{
sr.ReplaceWith = x =>
{
return x == 0x6 ? " " : null;
// return x == '.' ? " " : null; // Replace all . with
};
var doc = XmlReader.Create(sr, settings);
请注意 ReplacingStreamReader
不会 "know" 它正在修改 xml 的哪一部分,因此很少有 "blind" 替换是可以的 :-) 其他超出此限制,您可以用任何字符串替换任何字符(ReplaceWith
中的 null
表示 "keep the current character",相当于给定示例中的 x.ToString()
。返回 string.Empty
有效,表示删除当前字符)。
class 非常有趣:它保留 char[] RemainingChars
包含已读取(并由 ReplaceWith
过滤)但尚未被 [=返回的字符=15=] 方法,因为传递的缓冲区太小(ReplaceWith
方法可以 "enlarge" 读取字符串,使其对于 buffer
来说太大了!)。请注意 sb
是 List<char>
而不是 StringBuilder
。可能使用一个或另一个在代码方面几乎是等效的。
你可以先把内容读入string
替换(转义)内容,然后载入XmlReader
:
foreach (var f in filenames) {
string text;
using (StreamReader s = new StreamReader(f,Encoding.UTF8)) {
text = s.ReadToEnd();
}
text = text.Replace("\x01",@""); //replace the content
//load some settings
var resolver = new XmlUrlOverrideResolver();
resolver.DtdFileMap[@"X1.DTD"] = @"\location\X1.DTD";
resolver.DtdFileMap[@"R2.DTD"] = @"\location\X2.DTD";
resolver.DtdFileMap[@"R5.DTD"] = @"\location\R5.DTD";
XmlReaderSettings settings = new XmlReaderSettings();
settings.DtdProcessing = DtdProcessing.Parse;
settings.XmlResolver = resolver;
XmlReader doc = XmlReader.Create(text, settings);
//perform processing task
//...
}