我如何使用 simple_html_dom 与 phantomjs
how do I use simple_html_dom with phantomjs
我正在尝试让这两个库相互协作我当前的代码如下所示:
phantomjs.js
var page = require('webpage').create();
var system = require('system');
var address = system.args[1];
page.open(address, function () {
var content = page.content;
console.log(content);
phantom.exit();
});
scraper.php
exec('phantomjs assets/phantomjs.js '.$page, $output);
$html2 = str_get_html($output);
我得到的是这样的:
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 91
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1139
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1149
Warning: preg_match_all() expects parameter 2 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1620
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1632
Warning: preg_match_all() expects parameter 2 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1620
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1632
Warning: preg_match_all() expects parameter 2 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1620
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1632
Warning: preg_match_all() expects parameter 2 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1620
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1632
Warning: preg_match_all() expects parameter 2 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1620
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1632
Warning: preg_match_all() expects parameter 2 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1620
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1632
Warning: preg_match_all() expects parameter 2 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1620
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1632
Warning: preg_match_all() expects parameter 2 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1620
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1632
Warning: preg_match_all() expects parameter 2 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1620
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1632
simple_html_dom 返回错误的函数:
// remove noise from html content
// save the noise in the $this->noise array.
protected function remove_noise($pattern, $remove_tag=false)
{
global $debugObject;
if (is_object($debugObject)) { $debugObject->debugLogEntry(1); }
$count = preg_match_all($pattern, $this->doc, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE);
for ($i=$count-1; $i>-1; --$i)
{
$key = '___noise___'.sprintf('% 5d', count($this->noise)+1000);
if (is_object($debugObject)) { $debugObject->debugLog(2, 'key is: ' . $key); }
$idx = ($remove_tag) ? 0 : 1;
$this->noise[$key] = $matches[$i][$idx][0];
$this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0]));
}
// reset the length of content
$this->size = strlen($this->doc);
if ($this->size>0)
{
$this->char = $this->doc[0];
}
}
当我使用 var_dump($output)
时,我得到了站点的 html,所以我知道该命令正在运行,但 simple_html_dom 似乎不接受它!
问题是这样的:$output
是数组,但 str_get_html
需要一个字符串作为参数。因此,请确保在解析之前将 $output 转换为字符串。
类似于:
$body = `phantomjs myscript.js`;
$doc = str_get_html($body);
您可能需要在 phantomjs 脚本中设置超时,您没有给 dom 加载很多时间。
我正在尝试让这两个库相互协作我当前的代码如下所示:
phantomjs.js
var page = require('webpage').create();
var system = require('system');
var address = system.args[1];
page.open(address, function () {
var content = page.content;
console.log(content);
phantom.exit();
});
scraper.php
exec('phantomjs assets/phantomjs.js '.$page, $output);
$html2 = str_get_html($output);
我得到的是这样的:
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 91
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1139
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1149
Warning: preg_match_all() expects parameter 2 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1620
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1632
Warning: preg_match_all() expects parameter 2 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1620
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1632
Warning: preg_match_all() expects parameter 2 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1620
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1632
Warning: preg_match_all() expects parameter 2 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1620
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1632
Warning: preg_match_all() expects parameter 2 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1620
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1632
Warning: preg_match_all() expects parameter 2 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1620
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1632
Warning: preg_match_all() expects parameter 2 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1620
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1632
Warning: preg_match_all() expects parameter 2 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1620
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1632
Warning: preg_match_all() expects parameter 2 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1620
Warning: strlen() expects parameter 1 to be string, array given in D:\XAMPP\htdocs\assets\php\simple_html_dom.php on line 1632
simple_html_dom 返回错误的函数:
// remove noise from html content
// save the noise in the $this->noise array.
protected function remove_noise($pattern, $remove_tag=false)
{
global $debugObject;
if (is_object($debugObject)) { $debugObject->debugLogEntry(1); }
$count = preg_match_all($pattern, $this->doc, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE);
for ($i=$count-1; $i>-1; --$i)
{
$key = '___noise___'.sprintf('% 5d', count($this->noise)+1000);
if (is_object($debugObject)) { $debugObject->debugLog(2, 'key is: ' . $key); }
$idx = ($remove_tag) ? 0 : 1;
$this->noise[$key] = $matches[$i][$idx][0];
$this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0]));
}
// reset the length of content
$this->size = strlen($this->doc);
if ($this->size>0)
{
$this->char = $this->doc[0];
}
}
当我使用 var_dump($output)
时,我得到了站点的 html,所以我知道该命令正在运行,但 simple_html_dom 似乎不接受它!
问题是这样的:$output
是数组,但 str_get_html
需要一个字符串作为参数。因此,请确保在解析之前将 $output 转换为字符串。
类似于:
$body = `phantomjs myscript.js`;
$doc = str_get_html($body);
您可能需要在 phantomjs 脚本中设置超时,您没有给 dom 加载很多时间。