html 在 C# 中使用 HtmlAgilityPack 进行解析
html parse with HtmlAgilityPack in C#
WebClient webClient = new WebClient();
string page = webClient.DownloadString(
"http://www.deu.edu.tr/DEUWeb/Guncel/v2_index_cron.html");
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(page);
我想解析上面给出的页面,但我想获取table的行信息。我试过几个例子,但我做不到。任何建议
下面是枚举所有 table 单元格并将每个单元格的内部文本写入控制台的示例
WebClient webClient = new WebClient();
var page = webClient.DownloadString("http://www.deu.edu.tr/DEUWeb/Guncel/v2_index_cron.html");
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(page);
foreach (var td in doc.DocumentNode.SelectNodes("//table/tr/td"))
{
Console.WriteLine(td.InnerText);
}
例如,您可以像这样解析行:
using System.Net;
using HtmlAgilityPack;
namespace ConsoleApplication5
{
class Program
{
static void Main(string[] args)
{
WebClient webClient = new WebClient();
string page = webClient.DownloadString("http://www.deu.edu.tr/DEUWeb/Guncel/v2_index_cron.html");
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(page);
HtmlNode table = doc.DocumentNode.SelectSingleNode("//table");
foreach (var cell in table.SelectNodes("tr/td"))
{
string someVariable = cell.InnerText;
}
}
}
}
为了完整起见,使用 LINQ 您可以轻松创建一个包含所有非空行值的枚举:
private static void Main(string[] args)
{
WebClient webClient = new WebClient();
string page = webClient.DownloadString("http://www.deu.edu.tr/DEUWeb/Guncel/v2_index_cron.html");
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(page);
HtmlNode table = doc.DocumentNode.SelectSingleNode("//table");
var rows = table.SelectNodes("tr/td").Select(cell => cell.InnerText).Where(someVariable => !String.IsNullOrWhiteSpace(someVariable)).ToList();
}
WebClient webClient = new WebClient();
string page = webClient.DownloadString(
"http://www.deu.edu.tr/DEUWeb/Guncel/v2_index_cron.html");
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(page);
我想解析上面给出的页面,但我想获取table的行信息。我试过几个例子,但我做不到。任何建议
下面是枚举所有 table 单元格并将每个单元格的内部文本写入控制台的示例
WebClient webClient = new WebClient();
var page = webClient.DownloadString("http://www.deu.edu.tr/DEUWeb/Guncel/v2_index_cron.html");
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(page);
foreach (var td in doc.DocumentNode.SelectNodes("//table/tr/td"))
{
Console.WriteLine(td.InnerText);
}
例如,您可以像这样解析行:
using System.Net;
using HtmlAgilityPack;
namespace ConsoleApplication5
{
class Program
{
static void Main(string[] args)
{
WebClient webClient = new WebClient();
string page = webClient.DownloadString("http://www.deu.edu.tr/DEUWeb/Guncel/v2_index_cron.html");
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(page);
HtmlNode table = doc.DocumentNode.SelectSingleNode("//table");
foreach (var cell in table.SelectNodes("tr/td"))
{
string someVariable = cell.InnerText;
}
}
}
}
为了完整起见,使用 LINQ 您可以轻松创建一个包含所有非空行值的枚举:
private static void Main(string[] args)
{
WebClient webClient = new WebClient();
string page = webClient.DownloadString("http://www.deu.edu.tr/DEUWeb/Guncel/v2_index_cron.html");
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(page);
HtmlNode table = doc.DocumentNode.SelectSingleNode("//table");
var rows = table.SelectNodes("tr/td").Select(cell => cell.InnerText).Where(someVariable => !String.IsNullOrWhiteSpace(someVariable)).ToList();
}