Php cURL 网页抓取
Php cURL Web Scraping
我想从网站 url 抓取手机 phone 的价格:http://www.flipkart.com/apple-iphone-5s/p/itmdv6f75dyxhmt4?pid=MOBDPPZZDX8WSPAT
如果查看代码,价格放在下面SPAN
<div class="pricing line">
<div class="prices" itemprop="offers" itemscope="" itemtype="http://schema.org/Offer">
<div>
<span class="selling-price omniture-field" data-omnifield="eVar48" data-eVar48="37500">Rs. 37,500</span> // Fetch this price
</div>
<span class="sticky-message">Selling Price</span>
<meta itemprop="price" content="37,500">
<meta itemprop="priceCurrency" content="INR">
</div>
</div>
到目前为止,我获取此代码的代码是:
<?php
$curl = curl_init('http://www.flipkart.com/apple-iphone-5s/p/itmdv6f75dyxhmt4?pid=MOBDPPZZDX8WSPAT');
curl_setopt($curl, CURLOPT_RETURNTRANSFER, TRUE);
$page = curl_exec($curl);
if(!empty($curl)){ //if any html is actually returned
$pokemon_doc->loadHTML($curl);
libxml_clear_errors(); //remove errors for yucky html
$pokemon_xpath = new DOMXPath($pokemon_doc);
//get all the h2's with an id
$pokemon_row = $pokemon_xpath->query('//h2[@id]');
if($pokemon_row->length > 0){
foreach($pokemon_row as $row){
echo $row->nodeValue . "<br/>";
}
}
}
else
print "Not found";
?>
这显示错误:
Fatal error: Call to a member function loadHTML() on a non-object in
D:\xampp\htdocs\jiteen\php-scrape\phpScrape.php on line 9
怎么办,查不到错误
首先,您忘记实例化 DOMDocument
class,(至少在您在这个问题中的代码上)。
$curl = curl_init('http://www.flipkart.com/apple-iphone-5s/p/itmdv6f75dyxhmt4?pid=MOBDPPZZDX8WSPAT');
curl_setopt($curl, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($curl,CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
$page = curl_exec($curl);
if(!empty($curl)) { //if any html is actually returned
$pokemon_doc = new DOMDocument;
libxml_use_internal_errors(true);
$pokemon_doc->loadHTML($page);
libxml_clear_errors();
$pokemon_xpath = new DOMXPath($pokemon_doc);
$price = $pokemon_xpath->evaluate('string(//div[@class="prices"]/meta[@itemprop="price"]/@content)');
echo $price;
$rupees = $pokemon_xpath->evaluate('string(//div[@class="prices"]/div/span)');
echo $rupees;
}
else {
print "Not found";
}
我想从网站 url 抓取手机 phone 的价格:http://www.flipkart.com/apple-iphone-5s/p/itmdv6f75dyxhmt4?pid=MOBDPPZZDX8WSPAT
如果查看代码,价格放在下面SPAN
<div class="pricing line">
<div class="prices" itemprop="offers" itemscope="" itemtype="http://schema.org/Offer">
<div>
<span class="selling-price omniture-field" data-omnifield="eVar48" data-eVar48="37500">Rs. 37,500</span> // Fetch this price
</div>
<span class="sticky-message">Selling Price</span>
<meta itemprop="price" content="37,500">
<meta itemprop="priceCurrency" content="INR">
</div>
</div>
到目前为止,我获取此代码的代码是:
<?php
$curl = curl_init('http://www.flipkart.com/apple-iphone-5s/p/itmdv6f75dyxhmt4?pid=MOBDPPZZDX8WSPAT');
curl_setopt($curl, CURLOPT_RETURNTRANSFER, TRUE);
$page = curl_exec($curl);
if(!empty($curl)){ //if any html is actually returned
$pokemon_doc->loadHTML($curl);
libxml_clear_errors(); //remove errors for yucky html
$pokemon_xpath = new DOMXPath($pokemon_doc);
//get all the h2's with an id
$pokemon_row = $pokemon_xpath->query('//h2[@id]');
if($pokemon_row->length > 0){
foreach($pokemon_row as $row){
echo $row->nodeValue . "<br/>";
}
}
}
else
print "Not found";
?>
这显示错误:
Fatal error: Call to a member function loadHTML() on a non-object in D:\xampp\htdocs\jiteen\php-scrape\phpScrape.php on line 9
怎么办,查不到错误
首先,您忘记实例化 DOMDocument
class,(至少在您在这个问题中的代码上)。
$curl = curl_init('http://www.flipkart.com/apple-iphone-5s/p/itmdv6f75dyxhmt4?pid=MOBDPPZZDX8WSPAT');
curl_setopt($curl, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($curl,CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
$page = curl_exec($curl);
if(!empty($curl)) { //if any html is actually returned
$pokemon_doc = new DOMDocument;
libxml_use_internal_errors(true);
$pokemon_doc->loadHTML($page);
libxml_clear_errors();
$pokemon_xpath = new DOMXPath($pokemon_doc);
$price = $pokemon_xpath->evaluate('string(//div[@class="prices"]/meta[@itemprop="price"]/@content)');
echo $price;
$rupees = $pokemon_xpath->evaluate('string(//div[@class="prices"]/div/span)');
echo $rupees;
}
else {
print "Not found";
}