从网络上用 ISO-8859-1 编码解析 XML
Parse XML with the encoding ISO-8859-1 from the web
我需要从网络读取编码为 ISO-8859-1 的 XML 文件。用它创建 XmlDocument 后,我试图将它的一些 InnerText 转换为 UTF。但这没有用。然后我尝试更改 HttpClient 上的编码。响应字符串格式正确,但在创建 XmlDocument 时,应用程序崩溃并出现异常:HRESULT:0xC00CE55F 或 XML 字符串中出现非预期字符。我该如何解决这个问题?
代码片段:
private static async Task<string> GetResultsAsync(string uri)
{
var client = new HttpClient();
var response = await client.GetByteArrayAsync(uri);
var responseString = Encoding.GetEncoding("iso-8859-1").GetString(response, 0, response.Length - 1);
return responseString;
}
public static async Task GetPodcasts(string url)
{
var progrmas = await GetGroupAsync("prog");
HttpClient client = new HttpClient();
//Task<string> pedido = client.GetStringAsync(url);
//string res = await pedido; //Gets the string with the wrong chars, LoadXml doesn't fails
res = await GetResultsAsync(url); //Gets the string properly formatted
XmlDocument doc = new XmlDocument();
doc.LoadXml(res); //Crashes here
XmlElement root = doc.DocumentElement;
XmlNodeList nodes = root.SelectNodes("//item");
//Title
var node_titles = root.SelectNodes("//item/title");
IEnumerable<string> query_titles = from nodess in node_titles select nodess.InnerText;
List<string> list_titles = query_titles.ToList();
//........
for (int i = 0; i < list_titles.Count; i++)
{
PodcastItem podcast = new PodcastItem();
string title = list_titles[i];
//First attempt to convert a field from the XmlDocument, with the wrong chars. Only replaces the bad encoding with a '?':
//Encoding iso = Encoding.GetEncoding("ISO-8859-1");
//Encoding utf8 = Encoding.UTF8;
//byte[] utfBytes = utf8.GetBytes(title);
//byte[] isoBytes = Encoding.Convert(utf8, iso, utfBytes);
//string msg = iso.GetString(isoBytes, 0, isoBytes.Length - 1);
PodcastItem dataItem = new PodcastItem(title + pubdate, title, link, description, "", pubdate);
progrmas.Items.Add(dataItem);
}
}
我不确定您为什么尝试 fiddle 对自己进行编码,但它对您造成如此严重崩溃的原因可能是因为您忘记获取数组的最后一个字节。此代码对我有用:
static async Task<string> LoadDecoced()
{
var client = new HttpClient();
var response = await client.GetByteArrayAsync("http://www.rtp.pt/play/podcast/469");
var responseString = Encoding
.GetEncoding("iso-8859-1")
.GetString(response, 0, response.Length); // no -1 here, we want all bytes!
return responseString;
}
如果我让 HttpClient 知道你的代码对我有用:
static async Task<string> Load()
{
var hc = new HttpClient();
string s = await hc.GetStringAsync("http://www.rtp.pt/play/podcast/469");
return s;
}
static void Main(string[] args)
{
var xd = new XmlDocument();
string res = Load().Result;
xd.LoadXml(res);
var node_titles = xd.DocumentElement.SelectNodes("//item/title");
Console.WriteLine(node_titles.Count);
}
如果您在 non-mobile/non-WinRT 上,XmlDocument.Load 接受流也是如此:
static async Task<Stream> LoadStream()
{
var hc = new HttpClient();
var stream = await hc.GetStreamAsync("http://www.rtp.pt/play/podcast/469");
return stream;
}
static void Main(string[] args)
{
var xd2 = new XmlDocument();
xd2.Load(LoadStream().Result);
var node_titles2 = xd2.DocumentElement.SelectNodes("//item/title");
Console.WriteLine(node_titles2.Count);
}
这是我的控制台中的结果:
你确定你没有在其他地方编码吗?
作为一般建议:框架 类 能够处理最常见的编码方案。尝试让它工作而不必 fiddle 编码 类.
我需要从网络读取编码为 ISO-8859-1 的 XML 文件。用它创建 XmlDocument 后,我试图将它的一些 InnerText 转换为 UTF。但这没有用。然后我尝试更改 HttpClient 上的编码。响应字符串格式正确,但在创建 XmlDocument 时,应用程序崩溃并出现异常:HRESULT:0xC00CE55F 或 XML 字符串中出现非预期字符。我该如何解决这个问题?
代码片段:
private static async Task<string> GetResultsAsync(string uri)
{
var client = new HttpClient();
var response = await client.GetByteArrayAsync(uri);
var responseString = Encoding.GetEncoding("iso-8859-1").GetString(response, 0, response.Length - 1);
return responseString;
}
public static async Task GetPodcasts(string url)
{
var progrmas = await GetGroupAsync("prog");
HttpClient client = new HttpClient();
//Task<string> pedido = client.GetStringAsync(url);
//string res = await pedido; //Gets the string with the wrong chars, LoadXml doesn't fails
res = await GetResultsAsync(url); //Gets the string properly formatted
XmlDocument doc = new XmlDocument();
doc.LoadXml(res); //Crashes here
XmlElement root = doc.DocumentElement;
XmlNodeList nodes = root.SelectNodes("//item");
//Title
var node_titles = root.SelectNodes("//item/title");
IEnumerable<string> query_titles = from nodess in node_titles select nodess.InnerText;
List<string> list_titles = query_titles.ToList();
//........
for (int i = 0; i < list_titles.Count; i++)
{
PodcastItem podcast = new PodcastItem();
string title = list_titles[i];
//First attempt to convert a field from the XmlDocument, with the wrong chars. Only replaces the bad encoding with a '?':
//Encoding iso = Encoding.GetEncoding("ISO-8859-1");
//Encoding utf8 = Encoding.UTF8;
//byte[] utfBytes = utf8.GetBytes(title);
//byte[] isoBytes = Encoding.Convert(utf8, iso, utfBytes);
//string msg = iso.GetString(isoBytes, 0, isoBytes.Length - 1);
PodcastItem dataItem = new PodcastItem(title + pubdate, title, link, description, "", pubdate);
progrmas.Items.Add(dataItem);
}
}
我不确定您为什么尝试 fiddle 对自己进行编码,但它对您造成如此严重崩溃的原因可能是因为您忘记获取数组的最后一个字节。此代码对我有用:
static async Task<string> LoadDecoced()
{
var client = new HttpClient();
var response = await client.GetByteArrayAsync("http://www.rtp.pt/play/podcast/469");
var responseString = Encoding
.GetEncoding("iso-8859-1")
.GetString(response, 0, response.Length); // no -1 here, we want all bytes!
return responseString;
}
如果我让 HttpClient 知道你的代码对我有用:
static async Task<string> Load()
{
var hc = new HttpClient();
string s = await hc.GetStringAsync("http://www.rtp.pt/play/podcast/469");
return s;
}
static void Main(string[] args)
{
var xd = new XmlDocument();
string res = Load().Result;
xd.LoadXml(res);
var node_titles = xd.DocumentElement.SelectNodes("//item/title");
Console.WriteLine(node_titles.Count);
}
如果您在 non-mobile/non-WinRT 上,XmlDocument.Load 接受流也是如此:
static async Task<Stream> LoadStream()
{
var hc = new HttpClient();
var stream = await hc.GetStreamAsync("http://www.rtp.pt/play/podcast/469");
return stream;
}
static void Main(string[] args)
{
var xd2 = new XmlDocument();
xd2.Load(LoadStream().Result);
var node_titles2 = xd2.DocumentElement.SelectNodes("//item/title");
Console.WriteLine(node_titles2.Count);
}
这是我的控制台中的结果:
你确定你没有在其他地方编码吗?
作为一般建议:框架 类 能够处理最常见的编码方案。尝试让它工作而不必 fiddle 编码 类.