foreach within for each (preg match url, and imgurl)
foreach within for each (preg match url, and imgurl)
我在一个变量($data)中有 html 并找到 URL,我称之为 $url
和 imgurl
,
但我不知道如何为每个项目也找到 URL(它最终会回显所有匹配项,而不是每个匹配项)。
我正在尝试学习一般的解决方案,假设您有一个 html 页面,您想要提取 img src 和 href src 并为每个页面回显它们...
$data = "75http://upload.wikimedia.org/wikipedia/commons/7/7a/Britney_Spears_2013.jpgimgrefurlhttp://en.wikipedia.org/wiki/Britney_Spears\?imgurlhttp://www.spirituelquotes.com/wp-content/uploads/2014/12/britney-spears.jpgimgrefurlhttp://www.spirituelquotes.com/celebrities/britney-spears/";
//finds url; (below is code rough to give an idea )
preg_match_all("/ru=\'(.*?)\'\;/is", $data, $matches);
matches = $url;
//finds imgurl
preg_match_all("/imgurl%5C75(.*?)%5C/is", $data, $matches);
matches = $img;
foreach $maches(){
echo $url $img;
//whatever else you want to do
}
output would be:
http://www.example.com http://www.example.com/image.jpg
http://www.example.com/dir/name2 http://www.example.com/image2.jpg
http://www.example.com/dir/name3 http://www.example.com/image3.jpg
http://www.example.com/dir/name3 http://www.example.com/image4.jpg
或:
$data = getfile contents(http://example.com)
//finds url;
preg_match_all("/ru=\'(.*?)\'\;/is", $data, $matches);
matches = $url
//finds the img url
preg_match_all("/imgurl%5C75(.*?)%5C/is", $data, $matches);
foreach($matches[1] as $imgurl){
echo $imgurl $url and (basically want to echo url for each matches in foreach command)
}
此代码有效,但我只想在 for each:
中添加 url
preg_match_all("/ru=\'(.*?)\'\;/is", $data, $matches);
foreach($matches[1] as $url)
preg_match_all("/imgurl%5C75(.*?)%5C/is", $data, $matches);
foreach($matches[1] as $imgurl){
$filename = basename($imgurl);
echo $imgurl; //works
echo $url; //displays all $urls found
$img = "$filename";
error_reporting(E_ALL);
$ch = curl_init();
$timeout = 0;
curl_setopt ($ch, CURLOPT_URL, $imgurl);
curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
$image = curl_exec($ch);
curl_close($ch);
$f = fopen($img, 'w');
fwrite($f, $image);
fclose($f);
}
}
}
尝试用正则表达式解析像 HTML 这样复杂的东西总是一个坏主意。您应该为这样的工作使用解析器。
<?php
// Suppress errors on unknown elements.
libxml_use_internal_errors(true);
// Create a new libxml2 DOM document instance.
$domDocument = new \DOMDocument();
// Parse the HTML string.
$domDocument->loadHTML("<a href=http://www.example.com/dir/name><img src=http://www.example.com/image.jpg></a><a href=http://www.example.com/dir/name2><img src=http://www.example.com/image2.jpg></a><a href=http://www.example.com/dir/name3><img src=http://www.example.com/image3.jpg></a><a href=http://www.example.com/dir/name4><img src=http://www.example.com/image4.jpg></a>");
// Go through all DOM elements we are interested in.
foreach (array("a" => "href", "img" => "src") as $tagName => $attribute) {
foreach ($domDocument->getElementsByTagName($tagName) as $domElement) {
// Make sure the element has the desired attribute.
if ($domElement->hasAttribute($attribute)) {
echo $domElement->getAttribute($attribute) , PHP_EOL;
}
}
}
请注意,如果您尝试解析包含一些新元素的 HTML5 文档,libxml2 可能会抛出错误。您可以使用 libxml_use_internal_errors
.
解决此问题
如果你坚持使用正则表达式或者你只是想学习正则表达式(查看regx101.com[无从属关系]学习正则表达式)那么解决方案是:
<?php
$data = "<a href=http://www.example.com/dir/name><img src=http://www.example.com/image.jpg></a><a href=http://www.example.com/dir/name2><img src=http://www.example.com/image2.jpg></a><a href=http://www.example.com/dir/name3><img src=http://www.example.com/image3.jpg></a><a href=http://www.example.com/dir/name4><img src=http://www.example.com/image4.jpg></a>";
preg_match_all("/(?:href|src)=(?:\"|')?([^\"'> ]*)(?:\"|'| |>)/", $data, $matches);
var_dump($matches);
/* OUTPUT:
array(2) {
[0]=>
array(8) {
[0]=>
string(37) "href=http://www.example.com/dir/name>"
[1]=>
string(37) "src=http://www.example.com/image.jpg>"
[2]=>
string(38) "href=http://www.example.com/dir/name2>"
[3]=>
string(38) "src=http://www.example.com/image2.jpg>"
[4]=>
string(38) "href=http://www.example.com/dir/name3>"
[5]=>
string(38) "src=http://www.example.com/image3.jpg>"
[6]=>
string(38) "href=http://www.example.com/dir/name4>"
[7]=>
string(38) "src=http://www.example.com/image4.jpg>"
}
[1]=>
array(8) {
[0]=>
string(31) "http://www.example.com/dir/name"
[1]=>
string(32) "http://www.example.com/image.jpg"
[2]=>
string(32) "http://www.example.com/dir/name2"
[3]=>
string(33) "http://www.example.com/image2.jpg"
[4]=>
string(32) "http://www.example.com/dir/name3"
[5]=>
string(33) "http://www.example.com/image3.jpg"
[6]=>
string(32) "http://www.example.com/dir/name4"
[7]=>
string(33) "http://www.example.com/image4.jpg"
}
}
*/
我在一个变量($data)中有 html 并找到 URL,我称之为 $url
和 imgurl
,
但我不知道如何为每个项目也找到 URL(它最终会回显所有匹配项,而不是每个匹配项)。
我正在尝试学习一般的解决方案,假设您有一个 html 页面,您想要提取 img src 和 href src 并为每个页面回显它们...
$data = "75http://upload.wikimedia.org/wikipedia/commons/7/7a/Britney_Spears_2013.jpgimgrefurlhttp://en.wikipedia.org/wiki/Britney_Spears\?imgurlhttp://www.spirituelquotes.com/wp-content/uploads/2014/12/britney-spears.jpgimgrefurlhttp://www.spirituelquotes.com/celebrities/britney-spears/";
//finds url; (below is code rough to give an idea )
preg_match_all("/ru=\'(.*?)\'\;/is", $data, $matches);
matches = $url;
//finds imgurl
preg_match_all("/imgurl%5C75(.*?)%5C/is", $data, $matches);
matches = $img;
foreach $maches(){
echo $url $img;
//whatever else you want to do
}
output would be:
http://www.example.com http://www.example.com/image.jpg
http://www.example.com/dir/name2 http://www.example.com/image2.jpg
http://www.example.com/dir/name3 http://www.example.com/image3.jpg
http://www.example.com/dir/name3 http://www.example.com/image4.jpg
或:
$data = getfile contents(http://example.com)
//finds url;
preg_match_all("/ru=\'(.*?)\'\;/is", $data, $matches);
matches = $url
//finds the img url
preg_match_all("/imgurl%5C75(.*?)%5C/is", $data, $matches);
foreach($matches[1] as $imgurl){
echo $imgurl $url and (basically want to echo url for each matches in foreach command)
}
此代码有效,但我只想在 for each:
中添加 urlpreg_match_all("/ru=\'(.*?)\'\;/is", $data, $matches);
foreach($matches[1] as $url)
preg_match_all("/imgurl%5C75(.*?)%5C/is", $data, $matches);
foreach($matches[1] as $imgurl){
$filename = basename($imgurl);
echo $imgurl; //works
echo $url; //displays all $urls found
$img = "$filename";
error_reporting(E_ALL);
$ch = curl_init();
$timeout = 0;
curl_setopt ($ch, CURLOPT_URL, $imgurl);
curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
$image = curl_exec($ch);
curl_close($ch);
$f = fopen($img, 'w');
fwrite($f, $image);
fclose($f);
}
}
}
尝试用正则表达式解析像 HTML 这样复杂的东西总是一个坏主意。您应该为这样的工作使用解析器。
<?php
// Suppress errors on unknown elements.
libxml_use_internal_errors(true);
// Create a new libxml2 DOM document instance.
$domDocument = new \DOMDocument();
// Parse the HTML string.
$domDocument->loadHTML("<a href=http://www.example.com/dir/name><img src=http://www.example.com/image.jpg></a><a href=http://www.example.com/dir/name2><img src=http://www.example.com/image2.jpg></a><a href=http://www.example.com/dir/name3><img src=http://www.example.com/image3.jpg></a><a href=http://www.example.com/dir/name4><img src=http://www.example.com/image4.jpg></a>");
// Go through all DOM elements we are interested in.
foreach (array("a" => "href", "img" => "src") as $tagName => $attribute) {
foreach ($domDocument->getElementsByTagName($tagName) as $domElement) {
// Make sure the element has the desired attribute.
if ($domElement->hasAttribute($attribute)) {
echo $domElement->getAttribute($attribute) , PHP_EOL;
}
}
}
请注意,如果您尝试解析包含一些新元素的 HTML5 文档,libxml2 可能会抛出错误。您可以使用 libxml_use_internal_errors
.
如果你坚持使用正则表达式或者你只是想学习正则表达式(查看regx101.com[无从属关系]学习正则表达式)那么解决方案是:
<?php
$data = "<a href=http://www.example.com/dir/name><img src=http://www.example.com/image.jpg></a><a href=http://www.example.com/dir/name2><img src=http://www.example.com/image2.jpg></a><a href=http://www.example.com/dir/name3><img src=http://www.example.com/image3.jpg></a><a href=http://www.example.com/dir/name4><img src=http://www.example.com/image4.jpg></a>";
preg_match_all("/(?:href|src)=(?:\"|')?([^\"'> ]*)(?:\"|'| |>)/", $data, $matches);
var_dump($matches);
/* OUTPUT:
array(2) {
[0]=>
array(8) {
[0]=>
string(37) "href=http://www.example.com/dir/name>"
[1]=>
string(37) "src=http://www.example.com/image.jpg>"
[2]=>
string(38) "href=http://www.example.com/dir/name2>"
[3]=>
string(38) "src=http://www.example.com/image2.jpg>"
[4]=>
string(38) "href=http://www.example.com/dir/name3>"
[5]=>
string(38) "src=http://www.example.com/image3.jpg>"
[6]=>
string(38) "href=http://www.example.com/dir/name4>"
[7]=>
string(38) "src=http://www.example.com/image4.jpg>"
}
[1]=>
array(8) {
[0]=>
string(31) "http://www.example.com/dir/name"
[1]=>
string(32) "http://www.example.com/image.jpg"
[2]=>
string(32) "http://www.example.com/dir/name2"
[3]=>
string(33) "http://www.example.com/image2.jpg"
[4]=>
string(32) "http://www.example.com/dir/name3"
[5]=>
string(33) "http://www.example.com/image3.jpg"
[6]=>
string(32) "http://www.example.com/dir/name4"
[7]=>
string(33) "http://www.example.com/image4.jpg"
}
}
*/