如何使用 HtmlAgilityPack 将评论之间的 table 抓取到网格视图中?
How to scrape table between comments into grid view with HtmlAgilityPack?
目前正在尝试使用 htmlagilitypack 将 table 从 this page 抓取到 gridview 中。我相信我的代码成功地从评论之间拉出 table,但是当它构建数据时 table 它说它找不到第 8 列,这显然不应该存在于这种情况下。我对此有些陌生,非常感谢对我做错了什么的解释
private void GetTeamStats()
{
var webGet = new HtmlWeb();
var getPage = webGet.Load("https://www.teamrankings.com/nba/stat/effective-field-goal-pct");
var commentNode = getPage.DocumentNode.SelectNodes("//comment()[contains(.,'table-filters')]/following::*[not(preceding::comment()[contains(.,'main-wrapper')])]");
var commentHtml = commentNode.Select(c1 => c1.SelectSingleNode("//table"));
DataTable dt = new DataTable();
dt.Columns.Add("Rk", typeof(string));
dt.Columns.Add("Team", typeof(string));
dt.Columns.Add("2018", typeof(string));
dt.Columns.Add("Last3", typeof(string));
dt.Columns.Add("Last1", typeof(string));
dt.Columns.Add("Home", typeof(string));
dt.Columns.Add("Away", typeof(string));
dt.Columns.Add("2017", typeof(string));
foreach (var table in commentHtml)
{
foreach (var row in table.SelectNodes("//tr"))
{
var dr = dt.NewRow();
dt.Rows.Add(dr);
int i = 0;
foreach (var cell in row.SelectNodes("//td"))
{
dr[i++] = cell.InnerText;
}
}
gvTeamStats.DataSource = dt;
}
}
异常显示 "System.IndexOutOfRangeException: 'Cannot find column 8.'" 并且被这行代码抛出
dr[i++] = cell.InnerText;
我做了一些修改:
table View source of page 中的结构是:
<table>
<thead>
<tr>
<th>Rank</th>
<th>Team</th>
<th>2018</th>
<th>Last 3</th>
<th>Last 1</th>
<th>Home</th>
<th>Away</th>
<th>2017</th>
</tr>
</thead>
<tbody>
<tr>
<td></td>
</tr>
</table>
var webGet = new HtmlWeb();
var getPage = webGet.Load("https://www.teamrankings.com/nba/stat/effective-field-goal-pct");
var tableHeader = getPage.DocumentNode.SelectNodes("//table/thead/tr");
var tableData = getPage.DocumentNode.SelectNodes("//table/tbody/tr");
DataTable dataTable = new DataTable();
var headers = tableHeader
.Elements("th")
.Select(th => th.InnerText.Trim());
foreach (var header in headers)
{
dataTable.Columns.Add(header);
}
var rows = tableData.Select(tr => tr
.Elements("td")
.Select(td => td.InnerText.Trim())
.ToArray());
foreach (var row in rows)
{
dataTable.Rows.Add(row);
}
// print our datatable
foreach (DataRow dataRow in dataTable.Rows)
{
foreach (var item in dataRow.ItemArray)
{
Console.Write(item + " ");
}
Console.WriteLine();
}
目前正在尝试使用 htmlagilitypack 将 table 从 this page 抓取到 gridview 中。我相信我的代码成功地从评论之间拉出 table,但是当它构建数据时 table 它说它找不到第 8 列,这显然不应该存在于这种情况下。我对此有些陌生,非常感谢对我做错了什么的解释
private void GetTeamStats()
{
var webGet = new HtmlWeb();
var getPage = webGet.Load("https://www.teamrankings.com/nba/stat/effective-field-goal-pct");
var commentNode = getPage.DocumentNode.SelectNodes("//comment()[contains(.,'table-filters')]/following::*[not(preceding::comment()[contains(.,'main-wrapper')])]");
var commentHtml = commentNode.Select(c1 => c1.SelectSingleNode("//table"));
DataTable dt = new DataTable();
dt.Columns.Add("Rk", typeof(string));
dt.Columns.Add("Team", typeof(string));
dt.Columns.Add("2018", typeof(string));
dt.Columns.Add("Last3", typeof(string));
dt.Columns.Add("Last1", typeof(string));
dt.Columns.Add("Home", typeof(string));
dt.Columns.Add("Away", typeof(string));
dt.Columns.Add("2017", typeof(string));
foreach (var table in commentHtml)
{
foreach (var row in table.SelectNodes("//tr"))
{
var dr = dt.NewRow();
dt.Rows.Add(dr);
int i = 0;
foreach (var cell in row.SelectNodes("//td"))
{
dr[i++] = cell.InnerText;
}
}
gvTeamStats.DataSource = dt;
}
}
异常显示 "System.IndexOutOfRangeException: 'Cannot find column 8.'" 并且被这行代码抛出
dr[i++] = cell.InnerText;
我做了一些修改:
table View source of page 中的结构是:
<table>
<thead>
<tr>
<th>Rank</th>
<th>Team</th>
<th>2018</th>
<th>Last 3</th>
<th>Last 1</th>
<th>Home</th>
<th>Away</th>
<th>2017</th>
</tr>
</thead>
<tbody>
<tr>
<td></td>
</tr>
</table>
var webGet = new HtmlWeb();
var getPage = webGet.Load("https://www.teamrankings.com/nba/stat/effective-field-goal-pct");
var tableHeader = getPage.DocumentNode.SelectNodes("//table/thead/tr");
var tableData = getPage.DocumentNode.SelectNodes("//table/tbody/tr");
DataTable dataTable = new DataTable();
var headers = tableHeader
.Elements("th")
.Select(th => th.InnerText.Trim());
foreach (var header in headers)
{
dataTable.Columns.Add(header);
}
var rows = tableData.Select(tr => tr
.Elements("td")
.Select(td => td.InnerText.Trim())
.ToArray());
foreach (var row in rows)
{
dataTable.Rows.Add(row);
}
// print our datatable
foreach (DataRow dataRow in dataTable.Rows)
{
foreach (var item in dataRow.ItemArray)
{
Console.Write(item + " ");
}
Console.WriteLine();
}