simple_html_dom 在 div 中访问 ul
simple_html_dom access ul inside div
我正在使用 simple_html_dom
作为 class html 抓取脚本,我在 div
中尝试抓取 ul 时遇到问题
HTML
<div class="attributes">
<div class="headline">test header</div>
<ul>
<li>test 1</li>
<li>test 2</li>
<li>test 3</li>
</ul>
</div>
PHP
//call to function
$url = 'http://example.com';
$data = dlPage2($url,'.attributes');
echo $data;
//function
function dlPage2($href,$element) {
$curl = curl_init();
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($curl, CURLOPT_HEADER, false);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_URL, $href);
curl_setopt($curl, CURLOPT_REFERER, $href);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.125 Safari/533.4");
$str = curl_exec($curl);
curl_close($curl);
// Create a DOM object
$dom = new simple_html_dom();
// Load HTML from a string
$dom->load($str);
$dom= $dom->find($element,0)->outertext;
return $dom;
}
上面的代码我可以抓取整个 <div class="attributes">
但我需要在 div,
中获取 <ul>
标签的 html
谁能帮我改一下
您必须 select <ul>
在 $element
中使用
$dom = $dom->find($element.' ul', 0)->outertext;
我正在使用 simple_html_dom
作为 class html 抓取脚本,我在 div
HTML
<div class="attributes">
<div class="headline">test header</div>
<ul>
<li>test 1</li>
<li>test 2</li>
<li>test 3</li>
</ul>
</div>
PHP
//call to function
$url = 'http://example.com';
$data = dlPage2($url,'.attributes');
echo $data;
//function
function dlPage2($href,$element) {
$curl = curl_init();
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($curl, CURLOPT_HEADER, false);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_URL, $href);
curl_setopt($curl, CURLOPT_REFERER, $href);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.125 Safari/533.4");
$str = curl_exec($curl);
curl_close($curl);
// Create a DOM object
$dom = new simple_html_dom();
// Load HTML from a string
$dom->load($str);
$dom= $dom->find($element,0)->outertext;
return $dom;
}
上面的代码我可以抓取整个 <div class="attributes">
但我需要在 div,
<ul>
标签的 html
谁能帮我改一下
您必须 select <ul>
在 $element
中使用
$dom = $dom->find($element.' ul', 0)->outertext;