使用 HttpWebRequest 在 C# 中获取 http 响应时如何处理 JavaScript?
How to deal with JavaScript when fetching http Response in C# using HttpWebRequest?
我有一个 C# 代码(它是一个 Web 应用程序,托管在 IIS 上),我在其中使用 HttpWebRequest 获取 HttpWebResponse。在那里我向任何网站发出请求并以字符串形式获得响应,然后我分析响应字符串。但最近我得到的响应是 JavaScript 在浏览器中加载页面后执行数据获取。
我尝试在 firebug 中对此进行调试,发现在响应的底部有一个 JavaScript 函数可以在页面加载后更新 dom 元素。有什么方法可以在我的 C# 代码中做同样的事情。我在网上搜索了这个,至今没有找到解决方案。
以下是我使用的代码:
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
foreach (Cookie cook in response.Cookies)
{
Cookie cookie = new Cookie();
cookie.Name = cook.Name;
cookie.Value = cook.Value;
cookie.Domain = cook.Domain;
cookie.Expires = DateTime.Now.AddDays(10);
cookieList.Add(cookie);
}
string postData = string.Format("username=" + txtUserID.Text + "&password=" + txtPwd.Text + "&url=https://example.com/&game=");
byte[] postBytes = Encoding.UTF8.GetBytes(postData);
HttpWebRequest req = (HttpWebRequest)WebRequest.Create("https://login.example.com/Login/authenticate");
req.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0";
req.KeepAlive = true;
req.AutomaticDecompression = DecompressionMethods.GZip;
////set the cookie
req.CookieContainer = new CookieContainer();
foreach (Cookie cook in cookieList)
{
Cookie cookie = new Cookie();
cookie.Name = cook.Name;
cookie.Value = cook.Value;
cookie.Domain = cook.Domain;
cookie.Expires = DateTime.Now.AddDays(10);
req.CookieContainer.Add(cookie);
}
req.Headers.Add("Accept-Encoding", "gzip, deflate");
req.Headers.Add("Accept-Language", "en-GB,en-US;q=0.8,en;q=0.6");//en-GB,en-US;q=0.8,en;q=0.6
req.Method = "POST";
req.Host = "login.example.com";
req.Referer = "https://login.example.com/Login/logout";
req.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
req.ContentType = "application/x-www-form-urlencoded;";
req.ContentLength = postBytes.Length;
//getting the request stream and posting data
StreamWriter requestwriter = new StreamWriter(req.GetRequestStream(), System.Text.Encoding.ASCII);
requestwriter.Write(postData);
requestwriter.Close();
HttpWebResponse myHttpWebResponse = (HttpWebResponse)req.GetResponse();
Stream responseStream = myHttpWebResponse.GetResponseStream();
StreamReader myStreamReader = new StreamReader(responseStream, Encoding.ASCII);
string responseString = myStreamReader.ReadToEnd();
myStreamReader.Close();
responseStream.Close();
myHttpWebResponse.Close();
我终于找到了满足我需求的简单解决方案。以下是我关注的 link:
Link to tutorial
以下是将获得结果的代码:
首先您需要导入以下内容:
using System.Drawing;
using OpenQA.Selenium;
using OpenQA.Selenium.PhantomJS;
using System.Text.RegularExpressions;
using System.IO;
using HtmlAgilityPack;
现在代码:
var options = new PhantomJSOptions();
var driver = new PhantomJSDriver(options);
driver.Manage().Window.Size = new Size(1360, 728);
var size = driver.Manage().Window.Size;
driver.Navigate().GoToUrl("https://example.com/");
string url = driver.Url;
//the driver can now provide you with what you need (it will execute the script)
//get the source of the page
var source = driver.PageSource;
//fully navigate the dom
var pathElement1 = driver.FindElementByName("username");
var pathElement2 = driver.FindElementByName("password");
var pathElement3 = driver.FindElementByXPath("//button[@class='SubmitButton']");
pathElement1.Clear();
pathElement1.SendKeys("username");
pathElement2.Clear();
pathElement2.SendKeys("password");
pathElement3.Click();
//Now get the response after login button click
source = driver.PageSource;
我有一个 C# 代码(它是一个 Web 应用程序,托管在 IIS 上),我在其中使用 HttpWebRequest 获取 HttpWebResponse。在那里我向任何网站发出请求并以字符串形式获得响应,然后我分析响应字符串。但最近我得到的响应是 JavaScript 在浏览器中加载页面后执行数据获取。
我尝试在 firebug 中对此进行调试,发现在响应的底部有一个 JavaScript 函数可以在页面加载后更新 dom 元素。有什么方法可以在我的 C# 代码中做同样的事情。我在网上搜索了这个,至今没有找到解决方案。
以下是我使用的代码:
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
foreach (Cookie cook in response.Cookies)
{
Cookie cookie = new Cookie();
cookie.Name = cook.Name;
cookie.Value = cook.Value;
cookie.Domain = cook.Domain;
cookie.Expires = DateTime.Now.AddDays(10);
cookieList.Add(cookie);
}
string postData = string.Format("username=" + txtUserID.Text + "&password=" + txtPwd.Text + "&url=https://example.com/&game=");
byte[] postBytes = Encoding.UTF8.GetBytes(postData);
HttpWebRequest req = (HttpWebRequest)WebRequest.Create("https://login.example.com/Login/authenticate");
req.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0";
req.KeepAlive = true;
req.AutomaticDecompression = DecompressionMethods.GZip;
////set the cookie
req.CookieContainer = new CookieContainer();
foreach (Cookie cook in cookieList)
{
Cookie cookie = new Cookie();
cookie.Name = cook.Name;
cookie.Value = cook.Value;
cookie.Domain = cook.Domain;
cookie.Expires = DateTime.Now.AddDays(10);
req.CookieContainer.Add(cookie);
}
req.Headers.Add("Accept-Encoding", "gzip, deflate");
req.Headers.Add("Accept-Language", "en-GB,en-US;q=0.8,en;q=0.6");//en-GB,en-US;q=0.8,en;q=0.6
req.Method = "POST";
req.Host = "login.example.com";
req.Referer = "https://login.example.com/Login/logout";
req.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
req.ContentType = "application/x-www-form-urlencoded;";
req.ContentLength = postBytes.Length;
//getting the request stream and posting data
StreamWriter requestwriter = new StreamWriter(req.GetRequestStream(), System.Text.Encoding.ASCII);
requestwriter.Write(postData);
requestwriter.Close();
HttpWebResponse myHttpWebResponse = (HttpWebResponse)req.GetResponse();
Stream responseStream = myHttpWebResponse.GetResponseStream();
StreamReader myStreamReader = new StreamReader(responseStream, Encoding.ASCII);
string responseString = myStreamReader.ReadToEnd();
myStreamReader.Close();
responseStream.Close();
myHttpWebResponse.Close();
我终于找到了满足我需求的简单解决方案。以下是我关注的 link: Link to tutorial
以下是将获得结果的代码:
首先您需要导入以下内容:
using System.Drawing;
using OpenQA.Selenium;
using OpenQA.Selenium.PhantomJS;
using System.Text.RegularExpressions;
using System.IO;
using HtmlAgilityPack;
现在代码:
var options = new PhantomJSOptions();
var driver = new PhantomJSDriver(options);
driver.Manage().Window.Size = new Size(1360, 728);
var size = driver.Manage().Window.Size;
driver.Navigate().GoToUrl("https://example.com/");
string url = driver.Url;
//the driver can now provide you with what you need (it will execute the script)
//get the source of the page
var source = driver.PageSource;
//fully navigate the dom
var pathElement1 = driver.FindElementByName("username");
var pathElement2 = driver.FindElementByName("password");
var pathElement3 = driver.FindElementByXPath("//button[@class='SubmitButton']");
pathElement1.Clear();
pathElement1.SendKeys("username");
pathElement2.Clear();
pathElement2.SendKeys("password");
pathElement3.Click();
//Now get the response after login button click
source = driver.PageSource;