C# 使用 htmlagilitypack 解析 table 中的特定行
C# Using htmlagilitypack to parse a specific row from a table
我正在开发一个 C# 应用程序,我需要在其中解析 table 中的特定行并将其不同值与给定字符串进行比较。
我要解析的URL是
http://wsrpg.com/clans/57
<table cellspacing="0" cellpadding="0" border="0" class="table">
<thead>
<tr>
<th width="100">Rank</th>
<th width="150">Nick</th>
<th width="100">RCASH</th>
<th width="150">Activity in December</th>
<th width="100">Comportment</th>
<th width="100">Online</th>
<th width="150">Last Login</th>
</tr>
</thead>
<tr><td>Leader</td><td>robbie_william</td><td>351</td><td>1024</td><td>1195</td><td class='offline'><span class="label label-danger">Offline</span></td><td>03/01/2016</td></tr><tr><td>Boss</td><td>Alloy_</td><td>1418</td><td>1043</td><td>354</td><td class='offline'><span class="label label-danger">Offline</span></td><td>26/12/2015</td></tr><tr><td>Boss</td><td>AnonYmous_</td><td>32976</td><td>5142</td><td>937</td><td class='offline'><span class="label label-danger">Offline</span></td><td>04/01/2016</td></tr><tr><td>Boss</td><td>dJones</td><td>2739</td><td>6152</td><td>1044</td><td class='online'><span class="label label-success">Online</span></td><td>04/01/2016</td></tr><tr><td>Boss</td><td>SHARP</td><td>9015</td><td>1216</td><td>32</td><td class='offline'><span class="label label-danger">Offline</span></td><td>04/01/2016</td></tr><tr><td>Boss</td><td>Steffie</td><td>7888</td><td>6043</td><td>887</td><td class='online'><span class="label label-success">Online</span></td><td>04/01/2016</td></tr><tr><td>Boss</td><td>YOLOvsYODO</td><td>10950</td><td>2703</td><td>385</td><td class='offline'><span class="label label-danger">Offline</span></td><td>04/01/2016</td></tr><tr><td>Member</td><td>Angel_</td><td>8629</td><td>3256</td><td>167</td><td class='offline'><span class="label label-danger">Offline</span></td><td>04/01/2016</td></tr><tr><td>Member</td><td>asad</td><td>2452</td><td>3938</td><td>183</td><td class='offline'><span class="label label-danger">Offline</span></td><td>03/01/2016</td></tr><tr><td>Member</td><td>D3nim</td><td>1285</td><td>3217</td><td>31</td><td class='offline'><span class="label label-danger">Offline</span></td><td>03/01/2016</td></tr><tr><td>Member</td><td>Dell</td><td>5025</td><td>3305</td><td>182</td><td class='offline'><span class="label label-danger">Offline</span></td><td>01/01/2016</td></tr><tr><td>Member</td><td>Habib</td><td>1650</td><td>3860</td><td>36</td><td class='offline'><span class="label label-danger">Offline</span></td><td>04/01/2016</td></tr><tr><td>Member</td><td>Iron_MiXx</td><td>2569</td><td>485</td><td>525</td><td class='offline'><span class="label label-danger">Offline</span></td><td>03/01/2016</td></tr><tr><td>Member</td><td>MCool</td><td>4960</td><td>12739</td><td>290</td><td class='online'><span class="label label-success">Online</span></td><td>04/01/2016</td></tr><tr><td>Member</td><td>PREXEN</td><td>127</td><td>3873</td><td>1547</td><td class='offline'><span class="label label-danger">Offline</span></td><td>04/01/2016</td></tr><tr><td>Member</td><td>Sensation_</td><td>2733</td><td>1944</td><td>338</td><td class='offline'><span class="label label-danger">Offline</span></td><td>03/01/2016</td></tr><tr><td>Member</td><td>Wizard_</td><td>2081</td><td>2578</td><td>46</td><td class='offline'><span class="label label-danger">Offline</span></td><td>03/01/2016</td></tr>
</table>
我只想将昵称存储在字符串或字符串数组中,以便我可以使用它与已经给定的字符串进行比较。
我要实现的,就是检查用户输入的nick是否存在于这个table中
我将使用 bool 方法来实现。
解决方法:
我使用了 Tim Schmelter 的代码,下面是我的使用方法:
private bool Authenticate(string nick)
{
using (WebClient client = new WebClient())
{
string html = client.DownloadString("http://wsrpg.com/clans/57");
DataTable table = GetTable(html, "table", true);
string[] nicks = table.AsEnumerable().Select(r => r.Field<string>("nick")).ToArray();
if(nicks.Contains(nick))
{
return true;
}
else
{
return false;
}
}
}
然后被
调用
bool Authenticated = Authenticate(Player.GetName());
最简单的方法是使用元素的 XPath,因此从我拥有的应用程序解析不同的 table 我拥有:
string tableResult = htmlDocument.DocumentNode.SelectSingleNode("//table[@class='output']/tr[3]/td[3]").InnerText;
您可以使用此方法解析 HTML 并用包含给定 class 名称的第一个 table 填充 DataTable
:
public static DataTable GetTable(string html, string tableClass, bool firstRowContainsHeader = false)
{
var doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(html);
string xpath = string.Format("//table[contains(@class,'{0}')]", tableClass);
var firstTable = doc.DocumentNode.SelectSingleNode(xpath);
if (firstTable == null) return null;
DataTable table = new DataTable();
var tableRows = firstTable.Descendants("tr");
var tableData = tableRows.Skip(firstRowContainsHeader ? 1 : 0)
.Select(row => row.Descendants("td")
.Select((cell, index) => new { row, cell, index, cell.InnerText })
.ToList());
var headerCells = tableRows.First().Descendants()
.Where(n => n.Name == "td" || n.Name == "th");
int columnIndex = 0;
foreach (HtmlNode cell in headerCells)
{
string colName = firstRowContainsHeader
? cell.InnerText
: String.Format("Column {0}", (++columnIndex).ToString());
table.Columns.Add(colName, typeof(string));
}
foreach (var rowCells in tableData)
{
DataRow row = table.Rows.Add();
for (int i = 0; i < Math.Min(rowCells.Count, table.Columns.Count); i++)
{
row.SetField(i, rowCells[i].InnerText);
}
}
return table;
}
然后您可以使用 LINQ-To-DataTable 检查它是否包含给定的昵称:
string html = File.ReadAllText("C:\Temp\html.txt"); // loading your sample from file
DataTable table = GetTable(html, "table", true);
string nick = "robbie_william"; // input example
bool isContained = table.AsEnumerable()
.Any(r => nick.Equals(r.Field<string>("nick"), StringComparison.InvariantCultureIgnoreCase));
如果您只想填写 string[]
或 List<string>
:
string[] nicks = table.AsEnumerable().Select(r => r.Field<string>("nick")).ToArray(); // or ToList()
我正在开发一个 C# 应用程序,我需要在其中解析 table 中的特定行并将其不同值与给定字符串进行比较。
我要解析的URL是 http://wsrpg.com/clans/57
<table cellspacing="0" cellpadding="0" border="0" class="table">
<thead>
<tr>
<th width="100">Rank</th>
<th width="150">Nick</th>
<th width="100">RCASH</th>
<th width="150">Activity in December</th>
<th width="100">Comportment</th>
<th width="100">Online</th>
<th width="150">Last Login</th>
</tr>
</thead>
<tr><td>Leader</td><td>robbie_william</td><td>351</td><td>1024</td><td>1195</td><td class='offline'><span class="label label-danger">Offline</span></td><td>03/01/2016</td></tr><tr><td>Boss</td><td>Alloy_</td><td>1418</td><td>1043</td><td>354</td><td class='offline'><span class="label label-danger">Offline</span></td><td>26/12/2015</td></tr><tr><td>Boss</td><td>AnonYmous_</td><td>32976</td><td>5142</td><td>937</td><td class='offline'><span class="label label-danger">Offline</span></td><td>04/01/2016</td></tr><tr><td>Boss</td><td>dJones</td><td>2739</td><td>6152</td><td>1044</td><td class='online'><span class="label label-success">Online</span></td><td>04/01/2016</td></tr><tr><td>Boss</td><td>SHARP</td><td>9015</td><td>1216</td><td>32</td><td class='offline'><span class="label label-danger">Offline</span></td><td>04/01/2016</td></tr><tr><td>Boss</td><td>Steffie</td><td>7888</td><td>6043</td><td>887</td><td class='online'><span class="label label-success">Online</span></td><td>04/01/2016</td></tr><tr><td>Boss</td><td>YOLOvsYODO</td><td>10950</td><td>2703</td><td>385</td><td class='offline'><span class="label label-danger">Offline</span></td><td>04/01/2016</td></tr><tr><td>Member</td><td>Angel_</td><td>8629</td><td>3256</td><td>167</td><td class='offline'><span class="label label-danger">Offline</span></td><td>04/01/2016</td></tr><tr><td>Member</td><td>asad</td><td>2452</td><td>3938</td><td>183</td><td class='offline'><span class="label label-danger">Offline</span></td><td>03/01/2016</td></tr><tr><td>Member</td><td>D3nim</td><td>1285</td><td>3217</td><td>31</td><td class='offline'><span class="label label-danger">Offline</span></td><td>03/01/2016</td></tr><tr><td>Member</td><td>Dell</td><td>5025</td><td>3305</td><td>182</td><td class='offline'><span class="label label-danger">Offline</span></td><td>01/01/2016</td></tr><tr><td>Member</td><td>Habib</td><td>1650</td><td>3860</td><td>36</td><td class='offline'><span class="label label-danger">Offline</span></td><td>04/01/2016</td></tr><tr><td>Member</td><td>Iron_MiXx</td><td>2569</td><td>485</td><td>525</td><td class='offline'><span class="label label-danger">Offline</span></td><td>03/01/2016</td></tr><tr><td>Member</td><td>MCool</td><td>4960</td><td>12739</td><td>290</td><td class='online'><span class="label label-success">Online</span></td><td>04/01/2016</td></tr><tr><td>Member</td><td>PREXEN</td><td>127</td><td>3873</td><td>1547</td><td class='offline'><span class="label label-danger">Offline</span></td><td>04/01/2016</td></tr><tr><td>Member</td><td>Sensation_</td><td>2733</td><td>1944</td><td>338</td><td class='offline'><span class="label label-danger">Offline</span></td><td>03/01/2016</td></tr><tr><td>Member</td><td>Wizard_</td><td>2081</td><td>2578</td><td>46</td><td class='offline'><span class="label label-danger">Offline</span></td><td>03/01/2016</td></tr>
</table>
我只想将昵称存储在字符串或字符串数组中,以便我可以使用它与已经给定的字符串进行比较。
我要实现的,就是检查用户输入的nick是否存在于这个table中
我将使用 bool 方法来实现。
解决方法: 我使用了 Tim Schmelter 的代码,下面是我的使用方法:
private bool Authenticate(string nick)
{
using (WebClient client = new WebClient())
{
string html = client.DownloadString("http://wsrpg.com/clans/57");
DataTable table = GetTable(html, "table", true);
string[] nicks = table.AsEnumerable().Select(r => r.Field<string>("nick")).ToArray();
if(nicks.Contains(nick))
{
return true;
}
else
{
return false;
}
}
}
然后被
调用bool Authenticated = Authenticate(Player.GetName());
最简单的方法是使用元素的 XPath,因此从我拥有的应用程序解析不同的 table 我拥有:
string tableResult = htmlDocument.DocumentNode.SelectSingleNode("//table[@class='output']/tr[3]/td[3]").InnerText;
您可以使用此方法解析 HTML 并用包含给定 class 名称的第一个 table 填充 DataTable
:
public static DataTable GetTable(string html, string tableClass, bool firstRowContainsHeader = false)
{
var doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(html);
string xpath = string.Format("//table[contains(@class,'{0}')]", tableClass);
var firstTable = doc.DocumentNode.SelectSingleNode(xpath);
if (firstTable == null) return null;
DataTable table = new DataTable();
var tableRows = firstTable.Descendants("tr");
var tableData = tableRows.Skip(firstRowContainsHeader ? 1 : 0)
.Select(row => row.Descendants("td")
.Select((cell, index) => new { row, cell, index, cell.InnerText })
.ToList());
var headerCells = tableRows.First().Descendants()
.Where(n => n.Name == "td" || n.Name == "th");
int columnIndex = 0;
foreach (HtmlNode cell in headerCells)
{
string colName = firstRowContainsHeader
? cell.InnerText
: String.Format("Column {0}", (++columnIndex).ToString());
table.Columns.Add(colName, typeof(string));
}
foreach (var rowCells in tableData)
{
DataRow row = table.Rows.Add();
for (int i = 0; i < Math.Min(rowCells.Count, table.Columns.Count); i++)
{
row.SetField(i, rowCells[i].InnerText);
}
}
return table;
}
然后您可以使用 LINQ-To-DataTable 检查它是否包含给定的昵称:
string html = File.ReadAllText("C:\Temp\html.txt"); // loading your sample from file
DataTable table = GetTable(html, "table", true);
string nick = "robbie_william"; // input example
bool isContained = table.AsEnumerable()
.Any(r => nick.Equals(r.Field<string>("nick"), StringComparison.InvariantCultureIgnoreCase));
如果您只想填写 string[]
或 List<string>
:
string[] nicks = table.AsEnumerable().Select(r => r.Field<string>("nick")).ToArray(); // or ToList()