为什么我不能用没有整数值的 cURL 得到 url link?
Why Can't I get That url link with cURL without integer value?
我在该页面中得到了多少钱的整数值。但我想获取产品名称、url 和价格。我怎样才能得到它?我该如何解决这个问题?
if (isset($_POST['search'])) {
$search = $_POST['search'];
$search = preg_replace("#[^0-9a-z]#", "", $search);
$url = "https://ryanscomputers.com/catalogsearch/result/?q='.$search";
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
$result = curl_exec($curl);
preg_match_all('/<span class="price">(.*?)<\/span>/', $result, $matches);
$info = array_values(array_unique($matches[0]));
for ($i=0; $i < count($info); $i++){
echo $info[$i];
}
preg_match_all('!<h2 class="product-name"><a href="[^\s]*?"!', $result, $matches);
$infourl = array_values(array_unique($matches[0]));
for ($i=0; $i < count ($infourl); $i++) {
echo $infourl[$i];
}
curl_close($curl);
}
您应该阅读 how to parse XML/HTML with PHP, because you are doing it very wrong。 (不要用正则表达式解析 html)
也就是说,这可以用 DOMDocument + DOMXPath 很容易地解析出来,检查
<?php
header("content-type: text/plain;charset=utf-8");
$ch = curl_init('https://ryanscomputers.com/catalogsearch/result/?q=cat');
curl_setopt_array($ch, array(
CURLOPT_ENCODING => '',
CURLOPT_RETURNTRANSFER => 1
));
$html = curl_exec($ch);
curl_close($ch);
$domd = @DOMDocument::loadHTML($html);
$xp = new DOMXPath($domd);
foreach ($xp->query("//div[contains(@class,'item-inner')]") as $item) {
//var_dump($domd->saveHTML($item));
$name = $xp->query(".//h2[contains(@class,'product-name')]", $item)->item(0)->textContent;
$name = trim($name);
$price = $xp->query(".//div[contains(@class,'price-box')]", $item)->item(0)->textContent;
$price = trim(preg_replace('/\s+/', ' ',$price));
$url=$item->getElementsByTagName("a")->item(0)->getAttribute("href");
print_r([
'name' => $name,
'price' => $price,
'url' => $url,
]);
}
输出:
Array
(
[name] => Black Cat BC-01 Laptop Cooler
[price] => Regular Price Tk 660 Special Price Tk 650
[url] => https://ryanscomputers.com/black-cat-bc-01-laptop-cooler.html
)
Array
(
[name] => Black Cat BC-02 Laptop Cooler
[price] => Regular Price Tk 660 Special Price Tk 650
[url] => https://ryanscomputers.com/black-cat-bc-02-laptop-cooler.html
)
Array
(
[name] => Black Cat BC-06 Laptop Cooler
[price] => Regular Price Tk 910 Special Price Tk 900
[url] => https://ryanscomputers.com/black-cat-bc-06-laptop-cooler.html
)
Array
(
[name] => Black Cat BC-07 Laptop Cooler
[price] => Regular Price Tk 910 Special Price Tk 900
[url] => https://ryanscomputers.com/black-cat-bc-07-laptop-cooler.html
)
Array
(
[name] => Black Cat BC-05 Notebook Cooler
[price] => Regular Price Tk 720 Special Price Tk 700
[url] => https://ryanscomputers.com/black-cat-bc-05-notebook-cooler.html
)
Array
(
[name] => Black Cat K-680 Mini USB Keyboard
[price] => Regular Price Tk 420 Special Price Tk 400
[url] => https://ryanscomputers.com/black-cat-k-680-mini-usb-keyboard.html
)
~~truncated
我在该页面中得到了多少钱的整数值。但我想获取产品名称、url 和价格。我怎样才能得到它?我该如何解决这个问题?
if (isset($_POST['search'])) {
$search = $_POST['search'];
$search = preg_replace("#[^0-9a-z]#", "", $search);
$url = "https://ryanscomputers.com/catalogsearch/result/?q='.$search";
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
$result = curl_exec($curl);
preg_match_all('/<span class="price">(.*?)<\/span>/', $result, $matches);
$info = array_values(array_unique($matches[0]));
for ($i=0; $i < count($info); $i++){
echo $info[$i];
}
preg_match_all('!<h2 class="product-name"><a href="[^\s]*?"!', $result, $matches);
$infourl = array_values(array_unique($matches[0]));
for ($i=0; $i < count ($infourl); $i++) {
echo $infourl[$i];
}
curl_close($curl);
}
您应该阅读 how to parse XML/HTML with PHP, because you are doing it very wrong。 (不要用正则表达式解析 html)
也就是说,这可以用 DOMDocument + DOMXPath 很容易地解析出来,检查
<?php
header("content-type: text/plain;charset=utf-8");
$ch = curl_init('https://ryanscomputers.com/catalogsearch/result/?q=cat');
curl_setopt_array($ch, array(
CURLOPT_ENCODING => '',
CURLOPT_RETURNTRANSFER => 1
));
$html = curl_exec($ch);
curl_close($ch);
$domd = @DOMDocument::loadHTML($html);
$xp = new DOMXPath($domd);
foreach ($xp->query("//div[contains(@class,'item-inner')]") as $item) {
//var_dump($domd->saveHTML($item));
$name = $xp->query(".//h2[contains(@class,'product-name')]", $item)->item(0)->textContent;
$name = trim($name);
$price = $xp->query(".//div[contains(@class,'price-box')]", $item)->item(0)->textContent;
$price = trim(preg_replace('/\s+/', ' ',$price));
$url=$item->getElementsByTagName("a")->item(0)->getAttribute("href");
print_r([
'name' => $name,
'price' => $price,
'url' => $url,
]);
}
输出:
Array
(
[name] => Black Cat BC-01 Laptop Cooler
[price] => Regular Price Tk 660 Special Price Tk 650
[url] => https://ryanscomputers.com/black-cat-bc-01-laptop-cooler.html
)
Array
(
[name] => Black Cat BC-02 Laptop Cooler
[price] => Regular Price Tk 660 Special Price Tk 650
[url] => https://ryanscomputers.com/black-cat-bc-02-laptop-cooler.html
)
Array
(
[name] => Black Cat BC-06 Laptop Cooler
[price] => Regular Price Tk 910 Special Price Tk 900
[url] => https://ryanscomputers.com/black-cat-bc-06-laptop-cooler.html
)
Array
(
[name] => Black Cat BC-07 Laptop Cooler
[price] => Regular Price Tk 910 Special Price Tk 900
[url] => https://ryanscomputers.com/black-cat-bc-07-laptop-cooler.html
)
Array
(
[name] => Black Cat BC-05 Notebook Cooler
[price] => Regular Price Tk 720 Special Price Tk 700
[url] => https://ryanscomputers.com/black-cat-bc-05-notebook-cooler.html
)
Array
(
[name] => Black Cat K-680 Mini USB Keyboard
[price] => Regular Price Tk 420 Special Price Tk 400
[url] => https://ryanscomputers.com/black-cat-k-680-mini-usb-keyboard.html
)
~~truncated