Xpath 查询不会 Return 结果
Xpath Query Won't Return Results
我正在尝试 return 来自 Xpath 查询的一些结果,但它不会 select 正确的元素。我正在使用以下代码:
public function getTrustPilotReviews($amount)
{
$trustPilotUrl = 'https://www.trustpilot.co.uk/review/purplegriffon.com';
$html5 = new HTML5;
$document = $html5->loadHtml(file_get_contents($trustPilotUrl));
$document->validateOnParse = true;
$xpath = new DOMXpath($document);
$reviewsDomNodeList = $xpath->query('//div[@id="reviews-container"]//div[@itemprop="review"]');
$reviews = new Collection;
foreach ($reviewsDomNodeList as $key => $reviewDomElement)
{
$xpath = new DOMXpath($reviewDomElement->ownerDocument);
if ((int) $xpath->query('//*[@itemprop="ratingValue"]')->item($key)->getAttribute('content') >= 4)
{
$review = [
'title' => 'Test',
'author' => $xpath->query('//*[@itemprop="author"]')->item($key)->nodeValue,
'date' => $xpath->query('//*[@class="ndate"]')->item($key)->nodeValue,
'rating' => $xpath->query('//*[@itemprop="ratingValue"]')->item($key)->nodeValue,
'body' => $xpath->query('//*[@itemprop="reviewBody"]')->item($key)->nodeValue,
];
$reviews->add((object) $review);
}
}
return $reviews->take($amount);
}
这段代码不会return任何东西:
//div[@id="reviews-container"]//div[@itemprop="review"]
但是如果我把它改成:
//*[@id="reviews-container"]//*[@itemprop="review"]
它部分有效,但 return 没有正确的结果。
您似乎在使用 HTML5-PHP 库。如果需要,则需要使用名称空间。该库将 HTML5 加载到 XHTML 文档中。如果将 DOM 文档另存为 XML,则可以进行测试。输出将类似于:
<?xml version="1.0" encoding="UTF-8"?>
<html xmlns="http://www.w3.org/1999/xhtml">
...
</html>
因此,如果您使用 XPath,则需要为 XHTML 命名空间注册和添加前缀,并将其用于元素名称。
...
$xpath = new DOMXPath($document);
$xpath->registerNamespace('x', 'http://www.w3.org/1999/xhtml');
$reviewNodes= $xpath->evaluate(
'//x:div[@id="reviews-container"]//x:div[@itemprop="review"]'
);
foreach ($reviewNodes as $reviewNode) {
...
}
...
您在循环内有一个条件,它可以是用于获取评论的外部 XPath 的一部分:
$expression =
'//x:div[@id="reviews-container"]
//x:div[
@itemprop="review" and
(.//*[@itemprop = "ratingValue"]/@content > 4)
]'
不要使用DOMXPath::query()
而是DOMXPath::evaluate()
,它可以让你直接得到标量。方法的第二个参数是上下文节点。使用相对位置路径(表达式开头没有 /
)。
...
foreach ($reviewNodes as $reviewNode) {
$review = [
'title' => 'Test',
'author'=> $xpath->evaluate('string(.//*@itemprop="author"])', $reviewNode),
'date'=> $xpath->evaluate('string(.//*[@class="ndate"])', $reviewNode),
'rating'=> $xpath->evaluate('string(.//*[@class="ratingValue"])', $reviewNode),
'body'=> $xpath->evaluate('string(.//*[@class="reviewBody"])', $reviewNode)
];
...
}
感谢 ##php IRC 中的 Viper-7、biberu 和 salathe,我现在可以使用:
public function getTrustPilotReviews($amount)
{
$context = stream_context_create(array('ssl' => array('verify_peer' => false)));
$url = 'https://www.trustpilot.co.uk/review/purplegriffon.com';
$data = file_get_contents($url, false, $context);
libxml_use_internal_errors(true);
$doc = new \DOMDocument();
$doc->loadHTML($data);
$xpath = new DOMXpath($doc);
$reviews = new Collection;
foreach($xpath->query('//div[@id="reviews-container"]/div[@itemprop="review"]') as $node)
{
$xpath = new DOMXpath($doc);
$rating = $xpath->query('.//*[@itemprop="ratingValue"]', $node)->item(0)->getAttribute('content');
if ($rating >= 4)
{
$review = [
'title' => $xpath->evaluate('normalize-space(descendant::*[@itemprop="headline"]/a)', $node),
'author' => $xpath->evaluate('normalize-space(descendant::*[@itemprop="author"])', $node),
'date' => $xpath->evaluate('normalize-space(descendant::*[@class="ndate"])', $node),
'rating' => $xpath->evaluate('number(descendant::*[@itemprop="ratingValue"]/@content)', $node),
'body' => $xpath->evaluate('normalize-space(descendant::*[@itemprop="reviewBody"])', $node),
];
$reviews->add((object) $review);
}
}
return $reviews->take($amount);
}
我正在尝试 return 来自 Xpath 查询的一些结果,但它不会 select 正确的元素。我正在使用以下代码:
public function getTrustPilotReviews($amount)
{
$trustPilotUrl = 'https://www.trustpilot.co.uk/review/purplegriffon.com';
$html5 = new HTML5;
$document = $html5->loadHtml(file_get_contents($trustPilotUrl));
$document->validateOnParse = true;
$xpath = new DOMXpath($document);
$reviewsDomNodeList = $xpath->query('//div[@id="reviews-container"]//div[@itemprop="review"]');
$reviews = new Collection;
foreach ($reviewsDomNodeList as $key => $reviewDomElement)
{
$xpath = new DOMXpath($reviewDomElement->ownerDocument);
if ((int) $xpath->query('//*[@itemprop="ratingValue"]')->item($key)->getAttribute('content') >= 4)
{
$review = [
'title' => 'Test',
'author' => $xpath->query('//*[@itemprop="author"]')->item($key)->nodeValue,
'date' => $xpath->query('//*[@class="ndate"]')->item($key)->nodeValue,
'rating' => $xpath->query('//*[@itemprop="ratingValue"]')->item($key)->nodeValue,
'body' => $xpath->query('//*[@itemprop="reviewBody"]')->item($key)->nodeValue,
];
$reviews->add((object) $review);
}
}
return $reviews->take($amount);
}
这段代码不会return任何东西:
//div[@id="reviews-container"]//div[@itemprop="review"]
但是如果我把它改成:
//*[@id="reviews-container"]//*[@itemprop="review"]
它部分有效,但 return 没有正确的结果。
您似乎在使用 HTML5-PHP 库。如果需要,则需要使用名称空间。该库将 HTML5 加载到 XHTML 文档中。如果将 DOM 文档另存为 XML,则可以进行测试。输出将类似于:
<?xml version="1.0" encoding="UTF-8"?>
<html xmlns="http://www.w3.org/1999/xhtml">
...
</html>
因此,如果您使用 XPath,则需要为 XHTML 命名空间注册和添加前缀,并将其用于元素名称。
...
$xpath = new DOMXPath($document);
$xpath->registerNamespace('x', 'http://www.w3.org/1999/xhtml');
$reviewNodes= $xpath->evaluate(
'//x:div[@id="reviews-container"]//x:div[@itemprop="review"]'
);
foreach ($reviewNodes as $reviewNode) {
...
}
...
您在循环内有一个条件,它可以是用于获取评论的外部 XPath 的一部分:
$expression =
'//x:div[@id="reviews-container"]
//x:div[
@itemprop="review" and
(.//*[@itemprop = "ratingValue"]/@content > 4)
]'
不要使用DOMXPath::query()
而是DOMXPath::evaluate()
,它可以让你直接得到标量。方法的第二个参数是上下文节点。使用相对位置路径(表达式开头没有 /
)。
...
foreach ($reviewNodes as $reviewNode) {
$review = [
'title' => 'Test',
'author'=> $xpath->evaluate('string(.//*@itemprop="author"])', $reviewNode),
'date'=> $xpath->evaluate('string(.//*[@class="ndate"])', $reviewNode),
'rating'=> $xpath->evaluate('string(.//*[@class="ratingValue"])', $reviewNode),
'body'=> $xpath->evaluate('string(.//*[@class="reviewBody"])', $reviewNode)
];
...
}
感谢 ##php IRC 中的 Viper-7、biberu 和 salathe,我现在可以使用:
public function getTrustPilotReviews($amount)
{
$context = stream_context_create(array('ssl' => array('verify_peer' => false)));
$url = 'https://www.trustpilot.co.uk/review/purplegriffon.com';
$data = file_get_contents($url, false, $context);
libxml_use_internal_errors(true);
$doc = new \DOMDocument();
$doc->loadHTML($data);
$xpath = new DOMXpath($doc);
$reviews = new Collection;
foreach($xpath->query('//div[@id="reviews-container"]/div[@itemprop="review"]') as $node)
{
$xpath = new DOMXpath($doc);
$rating = $xpath->query('.//*[@itemprop="ratingValue"]', $node)->item(0)->getAttribute('content');
if ($rating >= 4)
{
$review = [
'title' => $xpath->evaluate('normalize-space(descendant::*[@itemprop="headline"]/a)', $node),
'author' => $xpath->evaluate('normalize-space(descendant::*[@itemprop="author"])', $node),
'date' => $xpath->evaluate('normalize-space(descendant::*[@class="ndate"])', $node),
'rating' => $xpath->evaluate('number(descendant::*[@itemprop="ratingValue"]/@content)', $node),
'body' => $xpath->evaluate('normalize-space(descendant::*[@itemprop="reviewBody"])', $node),
];
$reviews->add((object) $review);
}
}
return $reviews->take($amount);
}