如果网站 returns 未加载,我如何从 HTML 抓取中获取某些信息?
How would I grab certain information from HTML scraping, if the website returns it as not loaded?
当我将 HTML 写入文件并打开文件时,它显示 "The inventory is not available" 我尝试使用用户代理,但没有成功。有什么帮助吗?
我希望的结果是获取整个库存,并将其库存中所需页面的列表打印到列表框中。 (Windows 表单应用程序,HtmlAgilityPack)
namespace projectWFA
{
public partial class Form1 : Form
{
public string selectedCurrency;
public string urlBase = "http://www.steamcommunity.com/id/";
public string text_username;
// HTMLAGILITYPACK
public HtmlWeb htmlweb;
public HtmlAgilityPack.HtmlDocument htmldocument_pageInventory;
public HtmlAgilityPack.HtmlDocument htmldocument;
public Form1()
{
InitializeComponent();
comboboxCurrency.SelectedIndex = 0;
comboboxPlaceHolder1.SelectedIndex = 0;
comboboxPlaceHolder2.SelectedIndex = 0;
// WEB STUFF
htmlweb = new HtmlWeb();
}
private void Form1_Load(object sender, EventArgs e)
{
selectedCurrency = comboboxCurrency.Items[comboboxCurrency.SelectedIndex].ToString();
Console.Out.WriteLine("selCur: " + selectedCurrency);
}
private void buttonAccept_Click(object sender, EventArgs e)
{
labelPrintUsername.Text = "Loading webpage.. Please wait..";
// LOAD WEBPAGE
htmldocument = htmlweb.Load(urlBase + textboxProfileLink.Text);
Console.Out.WriteLine("Link: " + urlBase + textboxProfileLink.Text);
if (htmldocument != null)
{
Console.Out.WriteLine("Found profile");
HtmlNode node_username = htmldocument.DocumentNode.SelectSingleNode("//span[@class='actual_persona_name']");
if (node_username != null)
{
text_username = node_username.InnerText;
labelPrintUsername.Text = text_username + "'s Inventory";
listboxItems.Items.Clear();
htmldocument_pageInventory = htmlweb.Load(urlBase + textboxProfileLink.Text + "/inventory/");
System.IO.File.WriteAllText(@"C:\Users\...\Desktop\asd.html", htmldocument_pageInventory.DocumentNode.InnerHtml);
}
}
else if (htmldocument == null)
{
Console.Out.WriteLine("Couldn't find profile");
}
}
}
}
要像使用浏览器一样获取页面,请改用:
string data = "";
using (WebClient client = new WebClient())
{
data = client.DownloadString(url);
}
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
MemoryStream stream = new MemoryStream(Encoding.UTF8.GetBytes(data));
doc.Load(stream);
当我将 HTML 写入文件并打开文件时,它显示 "The inventory is not available" 我尝试使用用户代理,但没有成功。有什么帮助吗? 我希望的结果是获取整个库存,并将其库存中所需页面的列表打印到列表框中。 (Windows 表单应用程序,HtmlAgilityPack)
namespace projectWFA
{
public partial class Form1 : Form
{
public string selectedCurrency;
public string urlBase = "http://www.steamcommunity.com/id/";
public string text_username;
// HTMLAGILITYPACK
public HtmlWeb htmlweb;
public HtmlAgilityPack.HtmlDocument htmldocument_pageInventory;
public HtmlAgilityPack.HtmlDocument htmldocument;
public Form1()
{
InitializeComponent();
comboboxCurrency.SelectedIndex = 0;
comboboxPlaceHolder1.SelectedIndex = 0;
comboboxPlaceHolder2.SelectedIndex = 0;
// WEB STUFF
htmlweb = new HtmlWeb();
}
private void Form1_Load(object sender, EventArgs e)
{
selectedCurrency = comboboxCurrency.Items[comboboxCurrency.SelectedIndex].ToString();
Console.Out.WriteLine("selCur: " + selectedCurrency);
}
private void buttonAccept_Click(object sender, EventArgs e)
{
labelPrintUsername.Text = "Loading webpage.. Please wait..";
// LOAD WEBPAGE
htmldocument = htmlweb.Load(urlBase + textboxProfileLink.Text);
Console.Out.WriteLine("Link: " + urlBase + textboxProfileLink.Text);
if (htmldocument != null)
{
Console.Out.WriteLine("Found profile");
HtmlNode node_username = htmldocument.DocumentNode.SelectSingleNode("//span[@class='actual_persona_name']");
if (node_username != null)
{
text_username = node_username.InnerText;
labelPrintUsername.Text = text_username + "'s Inventory";
listboxItems.Items.Clear();
htmldocument_pageInventory = htmlweb.Load(urlBase + textboxProfileLink.Text + "/inventory/");
System.IO.File.WriteAllText(@"C:\Users\...\Desktop\asd.html", htmldocument_pageInventory.DocumentNode.InnerHtml);
}
}
else if (htmldocument == null)
{
Console.Out.WriteLine("Couldn't find profile");
}
}
}
}
要像使用浏览器一样获取页面,请改用:
string data = "";
using (WebClient client = new WebClient())
{
data = client.DownloadString(url);
}
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
MemoryStream stream = new MemoryStream(Encoding.UTF8.GetBytes(data));
doc.Load(stream);