无法使用 HttpWebRequest 或 HtmlAgilityPack 获取数据

Unable to fetch data using HttpWebRequest or HtmlAgilityPack

我正在尝试用 C# 为 NSE 制作网络抓取工具。该代码适用于其他站点,但是当 https://www.nseindia.com/ 上的 运行 时,它给出错误 - 发送请求时发生错误。无法从 t运行 运动连接读取数据:操作超时。

我尝试了两种不同的方法 Try1() 和 Try2()。 谁能告诉我我的代码中缺少什么?

class Program
{
    public void Try1() {
        HtmlWeb web = new HtmlWeb();
        HttpStatusCode statusCode = HttpStatusCode.OK;

        web.UserAgent = GetUserAgent();

        web.PostResponse = (request, response) =>
        {
            if (response != null)
            {
                statusCode = response.StatusCode;
                Console.WriteLine("Status Code: " + statusCode);
            }
        };

        Task<HtmlDocument> task = web.LoadFromWebAsync(GetURL());
        HtmlDocument document = task.Result;
    }

    public void Try2() {
        HttpWebRequest request = (HttpWebRequest)WebRequest.Create(GetURL());
        request.UserAgent = GetUserAgent();
        request.Accept= "*/*;";

        using (var response = (HttpWebResponse)(request.GetResponse()))
        {
            HttpStatusCode code = response.StatusCode;
            if (code == HttpStatusCode.OK)
            {
                using (StreamReader streamReader = new StreamReader(response.GetResponseStream(), Encoding.UTF8))
                {
                    HtmlDocument htmlDoc = new HtmlDocument();
                    htmlDoc.OptionFixNestedTags = true;
                    htmlDoc.Load(streamReader);
                    Console.WriteLine("Document Loaded.");
                }
            }
        }
    }

    private string GetURL() {
        // return "https://html-agility-pack.net/";
        return "https://www.nseindia.com/";
    }

    private string GetUserAgent() {
        return "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36";
    }
}

您对 Accept 和其他人缺少 headers,因此无法回复。 除此之外,我建议您使用 HttpClient 而不是 HttpWebRequest

public static async Task GetHtmlData(string url)
{
    HttpClient httpClient = new HttpClient();
    using (var request = new HttpRequestMessage(HttpMethod.Get, new Uri(url)))
    {
        request.Headers.TryAddWithoutValidation("Accept", "text/html,application/xhtml+xml,application/xml, charset=UTF-8, text/javascript, */*; q=0.01");
        request.Headers.TryAddWithoutValidation("Accept-Encoding", "gzip, deflate, br");
        request.Headers.TryAddWithoutValidation("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36 OPR/67.0.3575.137");
        request.Headers.TryAddWithoutValidation("Accept-Charset", "ISO-8859-1");
        request.Headers.TryAddWithoutValidation("X-Requested-With", "XMLHttpRequest");

        using (var response = await httpClient.SendAsync(request).ConfigureAwait(false))
        {
            response.EnsureSuccessStatusCode();
            using (var responseStream = await response.Content.ReadAsStreamAsync().ConfigureAwait(false))
            using (var decompressedStream = new GZipStream(responseStream, CompressionMode.Decompress))
            using (var streamReader = new StreamReader(decompressedStream))
            {
                var result = await streamReader.ReadToEndAsync().ConfigureAwait(false);

                HtmlDocument htmlDoc = new HtmlDocument();
                htmlDoc.OptionFixNestedTags = true;
                htmlDoc.LoadHtml(result);
                Console.WriteLine(result);
                Console.WriteLine("Document Loaded.");
            }
        }
    }

使用
await GetHtmlData("https://www.nseindia.com/");