C# 只在 foreach 循环中获取第一个 html 节点

C# Only getting first html node in a foreach loop

我正在使用 HTML 敏捷包并尝试将网站中的一些数据解析为数据 table。我已成功生成第一个节点,但在生成下一个玩家信息时遇到了一些问题。

  DataTable dt5 = new DataTable();
            dt5.Columns.Add("Team");
            dt5.Columns.Add("Name");
            dt5.Columns.Add("Position");
            dt5.Columns.Add("Injury");
            dt5.Columns.Add("Status");

            var doc = new HtmlWeb().Load("https://www.cbssports.com/nba/injuries/daily/");
            DataRow row;




            foreach (HtmlAgilityPack.HtmlNode node in doc.DocumentNode.SelectNodes("//*[@id='TableBase']"))
            {

                row = dt5.NewRow();
                //TEAM
                foreach (HtmlAgilityPack.HtmlNode node2 in node.SelectNodes(".//tr[1]//td[1]"))
                {

                    row["Team"] = Regex.Replace(node2.InnerText, @"\r\n?|\n| ", "") ;

                }
                //PLAYER NAME
                foreach (HtmlAgilityPack.HtmlNode node3 in node.SelectNodes(".//tr[1]//td[2]//span"))
                {
                    row["Name"] = Regex.Replace(node3.InnerText, @"\r\n?|\n| ", "");
                }
                //PLAYER POSITION
                foreach (HtmlAgilityPack.HtmlNode node4 in node.SelectNodes(".//tr[1]//td[3]"))
                {
                    row["Position"] = Regex.Replace(node4.InnerText, @"\r\n?|\n| ", "");
                }
                //PLAYER INJURY
                foreach (HtmlAgilityPack.HtmlNode node5 in node.SelectNodes(".//tr[1]//td[4]"))
                {
                    row["Injury"] = Regex.Replace(node5.InnerText, @"\r\n?|\n| ", "");
                }
                //PLAYER STATUS
                foreach (HtmlAgilityPack.HtmlNode node6 in node.SelectNodes(".//tr[1]//td[5]"))
                {
                    row["Status"] = Regex.Replace(node6.InnerText, @"\r\n?|\n| ", "");
                }


                dt5.Rows.Add(row);

            }


            dataGridView3.DataSource = dt5;

只是想知道您是否尝试过使用后代?一年前我遇到过类似的问题。记忆模糊。但我认为这是类似于..

foreach(HtmlNode node in htmlDoc.DocumentNode.Descendants().Where(o => !string.IsNullOrEmpty(o.GetAttributeValue("id", "TableBase")))) {

}

试试这个。它适用于我,我刚刚在控制台应用程序上对其进行了测试。

 public static void Process() {
    DataTable dt5 = new DataTable();
    dt5.Columns.Add("Team");
    dt5.Columns.Add("Name");
    dt5.Columns.Add("Position");
    dt5.Columns.Add("Injury");
    dt5.Columns.Add("Status");

    var doc = new HtmlWeb().Load("https://www.cbssports.com/nba/injuries/daily/");
    DataRow row;
        var tables = doc.DocumentNode.SelectNodes("//*[@id='TableBase']/div/div/table");
        foreach (HtmlAgilityPack.HtmlNode node in tables) {
            var rows = node.SelectNodes("//tr");
            foreach (HtmlAgilityPack.HtmlNode node2 in rows) {
                row = dt5.NewRow();
                var tdNodes = node2.DescendantNodes().Where(o => o.Name == "td");
                if (tdNodes.Any()) {
                    row["Team"] = Regex.Replace(tdNodes.ElementAt(0).InnerText, @"\r\n?|\n| ", "");
                    row["Name"] = Regex.Replace(tdNodes.ElementAt(1).InnerText, @"\r\n?|\n| ", "");
                    row["Position"] = Regex.Replace(tdNodes.ElementAt(2).InnerText, @"\r\n?|\n| ", "");
                    row["Injury"] = Regex.Replace(tdNodes.ElementAt(3).InnerText, @"\r\n?|\n| ", "");
                    row["Status"] = Regex.Replace(tdNodes.ElementAt(4).InnerText, @"\r\n?|\n| ", "");
                    dt5.Rows.Add(row);
                }
            }
        }

        foreach(DataRow item in dt5.Rows) {
            Console.WriteLine(item.ItemArray[0] + " " + item.ItemArray[1]);
        }
    }

尤里卡!感谢大家的帮助,这是我的最终解决方案:

    private void button7_Click(object sender, EventArgs e)
    {
        DataTable dt5 = new DataTable();
        dt5.Columns.Add("Team");
        dt5.Columns.Add("Name");
        dt5.Columns.Add("Position");
        dt5.Columns.Add("Injury");
        dt5.Columns.Add("Status");

        var doc = new HtmlWeb().Load("https://www.cbssports.com/nba/injuries/");



        var products = doc.DocumentNode.SelectNodes("//*[@id='TableBase']");


        foreach (HtmlNode product  in products)
        {
            DataRow row = dt5.NewRow();
            var teamName = product.SelectNodes(".//span[@class='TeamLogoNameLockup-name']");
            var playerName = product.SelectNodes(".//span[@class='CellPlayerName--long']");
            foreach (HtmlNode T in teamName)
            {

                row["Team"] = (Regex.Replace(T.InnerText, @"\r\n", "").Replace(" ",""));
                dt5.Rows.Add(row);
            }




            foreach(HtmlNode P in  playerName)
            {

                row["Name"] = (Regex.Replace(P.InnerText, @"\r\n?|\n| ", ""));

                Console.WriteLine(Regex.Replace(P.InnerText, @"\r\n?|\n| ", ""));
            }

            var position = product.SelectNodes(".//td[2][contains(@class, 'TableBase-bodyTd')]");

                foreach (HtmlNode Pos in position)
                {


                row["Position"] = (Regex.Replace(Pos.InnerText, @"\r\n?|\n| ", ""));


                }

            var injury = product.SelectNodes(".//td[4][contains(@class, 'TableBase-bodyTd')]");

            foreach (HtmlNode inj in injury)
            {


                row["Injury"] = (Regex.Replace(inj.InnerText, @"\r\n?|\n| ", ""));


            }


            var status = product.SelectNodes(".//td[5][contains(@class, 'TableBase-bodyTd')]");

            foreach (HtmlNode stat in status)

            {


                row["Status"] = (Regex.Replace(stat.InnerText, @"\r\n?|\n| ", ""));



            }


        }



        dataGridView3.DataSource = dt5;


        }