C# 使用 Web 浏览器控件读取 ajax 数据时遇到问题

C# Facing problem to read ajax data using web browser control

这是一个网站 https://www.wsj.com/news/types/newsplus,其数据在运行时由 ajax 加载。我必须阅读所有文章标题文本。从早上开始,我尝试了很多代码,但仍然没有代码工作,因为数据正在按 ajax.

加载

这是我试过的代码。

HtmlDocument hd = GetHtmlAjax(new Uri("https://www.wsj.com/news/types/newsplus"), 300, true);
ParseData(hd);


HtmlElementCollection main_element = hd.GetElementsByTagName("h3");
if (main_element != null)
{
    foreach (HtmlElement element in main_element)
    {
        string cls = element.GetAttribute("className");
        if (String.IsNullOrEmpty(cls) || !cls.Equals("WSJTheme--headline--unZqjb45 undefined WSJTheme--heading-3--2z_phq5h typography--serif-display--ZXeuhS5E"))
            continue;

        HtmlElementCollection childDivs = element.Children.GetElementsByName("a");
        foreach (HtmlElement childElement in childDivs)
        {
            //grab links and other stuff same way
            string linktxt = childElement.InnerText;
        }
    }
}           


WebBrowser wb = null;
public HtmlDocument GetHtmlAjax(Uri uri, int AjaxTimeLoadTimeOut,bool loadurl)
{
    if (loadurl)
    {
            wb = new WebBrowser();
            wb.ScriptErrorsSuppressed = true;
            wb.Navigate(uri);
    }

    while (wb.ReadyState != WebBrowserReadyState.Complete)
        Application.DoEvents();

    Thread.Sleep(AjaxTimeLoadTimeOut);
    Application.DoEvents();
    return wb.Document;
}

我跟随许多 link 来处理这个问题但失败了。这些是我关注的 links。

htmlagilitypack and dynamic content issue Retrieve ajax/JavaScript return results from webpage in c#

How to extract dynamic ajax content from a web page

请告诉我要在我的代码中更改什么以解析标题 link 文本。谢谢

Post 代码来自@aepot

private static HttpClient client = new HttpClient();

        private static async Task<T> GetJsonPageAsync<T>(string url)
        {
            using (HttpResponseMessage response = await client.GetAsync(url, HttpCompletionOption.ResponseHeadersRead))
            {
                response.EnsureSuccessStatusCode();
                string text = await response.Content.ReadAsStringAsync();
                return JsonConvert.DeserializeObject<T>(text);
            }
        }

        private async void button1_Click(object sender, EventArgs e)
        {
            try
            {
                dynamic newsList = await GetJsonPageAsync<dynamic>("https://www.wsj.com/news/types/newsplus?id={%22query%22:%22type:=\%22NewsPlus\%22%22,%22db%22:%22wsjie,blog,interactivemedia%22}&type=search_collection");
                List<Task<dynamic>> tasks = new List<Task<dynamic>>();
                foreach (dynamic item in newsList.collection)
                {
                    string strUrl = "https://www.wsj.com/news/types/newsplus?id=" + item.id + "&type=article";
                    tasks.Add(GetJsonPageAsync<dynamic>(strUrl));

                    //tasks.Add(GetJsonPageAsync<dynamic>($"https://www.wsj.com/news/types/newsplus?id={item.id}&type=article"));
                }

                dynamic[] newsDataList = await Task.WhenAll(tasks);
                foreach (dynamic newItem in newsDataList)
                {
                    //Console.WriteLine(newItem.data.headline);
                    //Console.WriteLine(newItem.data.url);

                    txtData.Text += newItem.data.headline + System.Environment.NewLine;
                    txtData.Text += new string('-', 200); + System.Environment.NewLine;
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
        }

AJAX 是简单的 GET 或 POST 请求。

使用常规的浏览器开发工具,我发现该页面发送简单的 GET 请求并接收 JSON 数据。 JSON 可以通过 reader 去实现或探索。

对于 JSON 解析我使用了 Newtonsoft.Json NuGet 包

这是基于 WinForms 应用程序的简单示例。

public partial class Form1 : Form
{
    private static readonly HttpClient client = new HttpClient();

    private async Task<T> GetJsonPageAsync<T>(string url)
    {
        using (HttpResponseMessage response = await client.GetAsync(url, HttpCompletionOption.ResponseHeadersRead))
        {
            response.EnsureSuccessStatusCode();
            string text = await response.Content.ReadAsStringAsync();
            return JsonConvert.DeserializeObject<T>(text);
        }
    }

    public Form1()
    {
        InitializeComponent();
        ServicePointManager.DefaultConnectionLimit = 10; // to make it faster
        ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12;
    }

    private async void button1_Click(object sender, EventArgs e)
    {
        try
        {
            dynamic newsList = await GetJsonPageAsync<dynamic>("https://www.wsj.com/news/types/newsplus?id={%22query%22:%22type:=\%22NewsPlus\%22%22,%22db%22:%22wsjie,blog,interactivemedia%22}&type=search_collection");
            List<Task<dynamic>> tasks = new List<Task<dynamic>>();
            foreach (dynamic item in newsList.collection)
            {
                tasks.Add(GetJsonPageAsync<dynamic>($"https://www.wsj.com/news/types/newsplus?id={item.id}&type=article"));
            }
            dynamic[] newsDataList = await Task.WhenAll(tasks);
            foreach (dynamic newItem in newsDataList)
            {
                textBox1.Text += newItem.data.headline + Environment.NewLine;
                textBox1.Text += new string('-', 200) + Environment.NewLine;
            }
        }
        catch (Exception ex)
        {
            textBox1.Text = ex.Message;
        }
    }
}

更新: 为 .NET Framework 4.5.2 添加了修复