如何获取给定 HTML 文档或网页中特定 HTML 元素的值(通过 URL)?
How to get the value of a specific HTML element in a given HTML document or webpage (by URL)?
我想将包含 <span id="spanID"> value </span>
标记的网页的 url 传递给类似 setTextBoxText(string url, string id)
的方法,该方法是在 wpf 应用程序代码隐藏 (MainWindow.xaml.cs
) 中编写的并将特定 TextBox
控件的文本设置为跨度值,而不加载网页。 (例如,亚马逊产品的跟踪价格)
我更喜欢执行JavaScript代码来获取html元素的值,并将wpf控件的内容设置为js代码(函数)的结果
像这样:
public partial class MainWindow : Window
{
string url = "https://websiteaddress.com/rest";
setTextBoxText(url, "spanID");
static void setTextBoxText(string url, string id)
{
// code to get document by given url
txtPrice.Text = getHtmlElementValue(id);
}
string getHtmlElementValue(string id)
{
// what code should be written here?
// any combination of js and c#?
// var result = document.getElementById(id).textContent;
// return result;
}
}
您可以使用 HttpClient
加载 URL 的 HTML 内容,然后在类似 JavaScript 的语法中处理 DOM 对象将响应包装到 mshtml.HTMLDocument
- 需要参考 Microsoft.mshtml.dll:
private mshtml.HTMLDocument HtmlDocument { get; set; }
private async Task SetTextBoxTextAsync(string url, string id)
{
await UpdateHtmlDocumentAsync(url);
var value = GetHtmlElementValueById(id);
txtPrice.Text = value;
}
public async Task UpdateHtmlDocumentAsync(string url)
{
using (HttpClient httpClient = new HttpClient())
{
byte[] response = await httpClient.GetByteArrayAsync(url);
string httpResponseText = Encoding.GetEncoding("utf-8").GetString(response, 0, response.Length - 1);
string htmlContent = WebUtility.HtmlDecode(httpResponseText);
this.HtmlDocument = new HTMLDocument();
(this.HtmlDocument as IHTMLDocument2).write(htmlContent);
}
}
public string GetHtmlElementValueById(string elementId)
=> this.HtmlDocument.getElementById(elementId).innerText;
我想将包含 <span id="spanID"> value </span>
标记的网页的 url 传递给类似 setTextBoxText(string url, string id)
的方法,该方法是在 wpf 应用程序代码隐藏 (MainWindow.xaml.cs
) 中编写的并将特定 TextBox
控件的文本设置为跨度值,而不加载网页。 (例如,亚马逊产品的跟踪价格)
我更喜欢执行JavaScript代码来获取html元素的值,并将wpf控件的内容设置为js代码(函数)的结果
像这样:
public partial class MainWindow : Window
{
string url = "https://websiteaddress.com/rest";
setTextBoxText(url, "spanID");
static void setTextBoxText(string url, string id)
{
// code to get document by given url
txtPrice.Text = getHtmlElementValue(id);
}
string getHtmlElementValue(string id)
{
// what code should be written here?
// any combination of js and c#?
// var result = document.getElementById(id).textContent;
// return result;
}
}
您可以使用 HttpClient
加载 URL 的 HTML 内容,然后在类似 JavaScript 的语法中处理 DOM 对象将响应包装到 mshtml.HTMLDocument
- 需要参考 Microsoft.mshtml.dll:
private mshtml.HTMLDocument HtmlDocument { get; set; }
private async Task SetTextBoxTextAsync(string url, string id)
{
await UpdateHtmlDocumentAsync(url);
var value = GetHtmlElementValueById(id);
txtPrice.Text = value;
}
public async Task UpdateHtmlDocumentAsync(string url)
{
using (HttpClient httpClient = new HttpClient())
{
byte[] response = await httpClient.GetByteArrayAsync(url);
string httpResponseText = Encoding.GetEncoding("utf-8").GetString(response, 0, response.Length - 1);
string htmlContent = WebUtility.HtmlDecode(httpResponseText);
this.HtmlDocument = new HTMLDocument();
(this.HtmlDocument as IHTMLDocument2).write(htmlContent);
}
}
public string GetHtmlElementValueById(string elementId)
=> this.HtmlDocument.getElementById(elementId).innerText;