如何使用 HTMLAgilityPack 获取数据预连接 url
How to get the the data-preconnect-urls using HTMLAgilityPack
我正在使用 HTMLAgilityPack,我正在尝试抓取 link http://www.hundsun.co.jp/,它位于 data-preconnect-网址。我怎样才能得到它?
<h3>
<a style="display:none" href="/aclk?sa=L&ai=DChcSEwimnPnc5OvQAhWRl70KHcxqCEAYABAA&ei=9hZNWLqlCIyY8gXA04vACg&sig=AOD64_3SZuXd57_-qOs8nnhn8rqw8GlIgw&q=&sqi=2&ved=0ahUKEwi6-PTc5OvQAhUMjLwKHcDpAqgQ0QwIGA&adurl=" id="s0p1c0"></a>
<a href="https://www.google.co.jp/aclk?sa=L&ai=DChcSEwimnPnc5OvQAhWRl70KHcxqCEAYABAA&ei=9hZNWLqlCIyY8gXA04vACg&sig=AOD64_3SZuXd57_-qOs8nnhn8rqw8GlIgw&q=&sqi=2&ved=0ahUKEwi6-PTc5OvQAhUMjLwKHcDpAqgQ0QwIGA&adurl=" id="vs0p1c0" onmousedown="return google.arwt(this)" data-preconnect-urls="http://www.hundsun.co.jp/" jsl="$t t-zxXzjt1d4B0;$x 0;" class="r-iA_xzYkgkx2Y">ブリッジSE募集中 - hundsun.co.jp</a>
你可以这样做:
using System;
using HtmlAgilityPack;
using System.Xml;
public class Program
{
public static void Main()
{
string html = "<html><body><h3><a style=\"display:none\" href=\"/aclk?sa=L&ai=DChcSEwimnPnc5OvQAhWRl70KHcxqCEAYABAA&ei=9hZNWLqlCIyY8gXA04vACg&sig=AOD64_3SZuXd57_-qOs8nnhn8rqw8GlIgw&q=&sqi=2&ved=0ahUKEwi6-PTc5OvQAhUMjLwKHcDpAqgQ0QwIGA&adurl=\" id=\"s0p1c0\"></a><a href=\"https://www.google.co.jp/aclk?sa=L&ai=DChcSEwimnPnc5OvQAhWRl70KHcxqCEAYABAA&ei=9hZNWLqlCIyY8gXA04vACg&sig=AOD64_3SZuXd57_-qOs8nnhn8rqw8GlIgw&q=&sqi=2&ved=0ahUKEwi6-PTc5OvQAhUMjLwKHcDpAqgQ0QwIGA&adurl=\" id=\"vs0p1c0\" onmousedown=\"return google.arwt(this)\" data-preconnect-urls=\"http://www.hundsun.co.jp/\" jsl=\"$t t-zxXzjt1d4B0;$x 0;\" class=\"r-iA_xzYkgkx2Y\">ブリッジSE募集中 - hundsun.co.jp</a></h3></body></html>";
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(html);
var links = doc.DocumentNode.SelectNodes("//a[@data-preconnect-urls]");
if (links == null)
{
Console.WriteLine("no links contain attribute data-preconnect-urls");
return;
}
foreach(var htmlNode in links)
{
var attr = htmlNode.Attributes["data-preconnect-urls"];
Console.WriteLine(attr.Value);
}
}
}
你可以在这里试试:
我正在使用 HTMLAgilityPack,我正在尝试抓取 link http://www.hundsun.co.jp/,它位于 data-preconnect-网址。我怎样才能得到它?
<h3>
<a style="display:none" href="/aclk?sa=L&ai=DChcSEwimnPnc5OvQAhWRl70KHcxqCEAYABAA&ei=9hZNWLqlCIyY8gXA04vACg&sig=AOD64_3SZuXd57_-qOs8nnhn8rqw8GlIgw&q=&sqi=2&ved=0ahUKEwi6-PTc5OvQAhUMjLwKHcDpAqgQ0QwIGA&adurl=" id="s0p1c0"></a>
<a href="https://www.google.co.jp/aclk?sa=L&ai=DChcSEwimnPnc5OvQAhWRl70KHcxqCEAYABAA&ei=9hZNWLqlCIyY8gXA04vACg&sig=AOD64_3SZuXd57_-qOs8nnhn8rqw8GlIgw&q=&sqi=2&ved=0ahUKEwi6-PTc5OvQAhUMjLwKHcDpAqgQ0QwIGA&adurl=" id="vs0p1c0" onmousedown="return google.arwt(this)" data-preconnect-urls="http://www.hundsun.co.jp/" jsl="$t t-zxXzjt1d4B0;$x 0;" class="r-iA_xzYkgkx2Y">ブリッジSE募集中 - hundsun.co.jp</a>
你可以这样做:
using System;
using HtmlAgilityPack;
using System.Xml;
public class Program
{
public static void Main()
{
string html = "<html><body><h3><a style=\"display:none\" href=\"/aclk?sa=L&ai=DChcSEwimnPnc5OvQAhWRl70KHcxqCEAYABAA&ei=9hZNWLqlCIyY8gXA04vACg&sig=AOD64_3SZuXd57_-qOs8nnhn8rqw8GlIgw&q=&sqi=2&ved=0ahUKEwi6-PTc5OvQAhUMjLwKHcDpAqgQ0QwIGA&adurl=\" id=\"s0p1c0\"></a><a href=\"https://www.google.co.jp/aclk?sa=L&ai=DChcSEwimnPnc5OvQAhWRl70KHcxqCEAYABAA&ei=9hZNWLqlCIyY8gXA04vACg&sig=AOD64_3SZuXd57_-qOs8nnhn8rqw8GlIgw&q=&sqi=2&ved=0ahUKEwi6-PTc5OvQAhUMjLwKHcDpAqgQ0QwIGA&adurl=\" id=\"vs0p1c0\" onmousedown=\"return google.arwt(this)\" data-preconnect-urls=\"http://www.hundsun.co.jp/\" jsl=\"$t t-zxXzjt1d4B0;$x 0;\" class=\"r-iA_xzYkgkx2Y\">ブリッジSE募集中 - hundsun.co.jp</a></h3></body></html>";
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(html);
var links = doc.DocumentNode.SelectNodes("//a[@data-preconnect-urls]");
if (links == null)
{
Console.WriteLine("no links contain attribute data-preconnect-urls");
return;
}
foreach(var htmlNode in links)
{
var attr = htmlNode.Attributes["data-preconnect-urls"];
Console.WriteLine(attr.Value);
}
}
}
你可以在这里试试: