提取多引号变量中的字符串
Extract string inside multi-quoted variable
我有一个带有多个单引号的变量,想提取其中的一个字符串。
我的密码是:
$image['src'] = addslashes($image['src']);
preg_match('~src=["|\'](.*?)["|\']~', $image['src'], $matches);
$image['src'] = $matches[1];
$image['src'] 包含此字符串:
tooltip_html(this, '<div style="display: block; width: 262px"><img src="https://url.com/var/galerie/15773_262.jpg"/></div>');
我以为一切都会好起来的,但是 $image['src'] returns null。 addslashes 方法工作正常,returns 这个:
tooltip_html(this, \'<div style="display: block; width: 262px"><img src="https://url.com/var/galerie/15773_262.jpg"/></div>\');
我不明白这里的问题,我是不是漏掉了什么?
=====更新======
全部代码:
<?php
error_reporting(E_ALL);
header("Content-Type: application/json", true);
define('SITE', 'https://akipa-autohandel.autrado.de/');
include_once('simple_html_dom.php');
/**
* Create CDATA-Method for XML Output
*/
class SimpleXMLExtended extends SimpleXMLElement {
public function addCData($cdata_text) {
$node = dom_import_simplexml($this);
$no = $node->ownerDocument;
$node->appendChild($no->createCDATASection($cdata_text));
}
}
/**
* Get a web file (HTML, XHTML, XML, image, etc.) from a URL. Return an
* array containing the HTTP server response header fields and content.
*/
function get_web_page( $url ) {
$user_agent='Mozilla/5.0 (Windows NT 6.1; rv:8.0) Gecko/20100101 Firefox/8.0';
$options = array(
CURLOPT_CUSTOMREQUEST =>"GET", //set request type post or get
CURLOPT_POST =>false, //set to GET
CURLOPT_USERAGENT => $user_agent, //set user agent
CURLOPT_COOKIEFILE =>"cookie.txt", //set cookie file
CURLOPT_COOKIEJAR =>"cookie.txt", //set cookie jar
CURLOPT_RETURNTRANSFER => true, // return web page
CURLOPT_HEADER => false, // don't return headers
CURLOPT_FOLLOWLOCATION => true, // follow redirects
CURLOPT_ENCODING => "", // handle all encodings
CURLOPT_AUTOREFERER => true, // set referer on redirect
CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect
CURLOPT_TIMEOUT => 120, // timeout on response
CURLOPT_MAXREDIRS => 10, // stop after 10 redirects
);
$ch = curl_init( $url );
curl_setopt_array( $ch, $options );
$content = curl_exec( $ch );
$err = curl_errno( $ch );
$errmsg = curl_error( $ch );
$header = curl_getinfo( $ch );
if($content === FALSE) {
// when output is false it can't be used in str_get_html()
// output a proper error message in such cases
echo 'output error';
die(curl_error($ch));
}
curl_close( $ch );
$header['errno'] = $err;
$header['errmsg'] = $errmsg;
$header['content'] = $content;
return $header;
}
function renderPage( $uri ) {
$rendering = get_web_page( $uri );
if ( $rendering['errno'] != 0 )
echo 'bad url, timeout, redirect loop';
if ( $rendering['http_code'] != 200 )
echo 'no page, no permissions, no service';
$content = $rendering['content'];
if(!empty($content)) {
$parsing = str_get_html($content);
}
return $parsing;
}
/**
* Get all current car data of the selected autrado site
*/
function models() {
$paramURI = SITE . 'schnellsuche.php?suche_hersteller=14&suche_modell=&suche_from=form&suche_action=suche&itemsperpage=500';
$content = renderPage($paramURI);
foreach ($content->find('tr[class*=fahrzeugliste]') as $auto) {
$item['src'] = $auto->find('a[onmouseover]', 0)->onmouseover;
preg_match('~src=["\'](.*?)["\']~', $item['src'], $matches);
echo $matches[1];
}
}
if(isset($_POST['action']) && !empty($_POST['action'])) {
$action = $_POST['action'];
if((string) $action == 'test') {
$output = models();
json_encode($output);
}
}
?>
$image['src']的内容和你上面写的不一样。我现在 运行 你的脚本,内容是:
tooltip_html(this, '<div style="display: block; width: 262px"><img src="http://server12.autrado.de/autradogalerie_copy/var/galerie/127915_262.jpg" /></div>');
如果在 preg_match 之前添加以下行,它将起作用:
$item['src']= html_entity_decode($item['src']);
我有一个带有多个单引号的变量,想提取其中的一个字符串。
我的密码是:
$image['src'] = addslashes($image['src']);
preg_match('~src=["|\'](.*?)["|\']~', $image['src'], $matches);
$image['src'] = $matches[1];
$image['src'] 包含此字符串:
tooltip_html(this, '<div style="display: block; width: 262px"><img src="https://url.com/var/galerie/15773_262.jpg"/></div>');
我以为一切都会好起来的,但是 $image['src'] returns null。 addslashes 方法工作正常,returns 这个:
tooltip_html(this, \'<div style="display: block; width: 262px"><img src="https://url.com/var/galerie/15773_262.jpg"/></div>\');
我不明白这里的问题,我是不是漏掉了什么?
=====更新======
全部代码:
<?php
error_reporting(E_ALL);
header("Content-Type: application/json", true);
define('SITE', 'https://akipa-autohandel.autrado.de/');
include_once('simple_html_dom.php');
/**
* Create CDATA-Method for XML Output
*/
class SimpleXMLExtended extends SimpleXMLElement {
public function addCData($cdata_text) {
$node = dom_import_simplexml($this);
$no = $node->ownerDocument;
$node->appendChild($no->createCDATASection($cdata_text));
}
}
/**
* Get a web file (HTML, XHTML, XML, image, etc.) from a URL. Return an
* array containing the HTTP server response header fields and content.
*/
function get_web_page( $url ) {
$user_agent='Mozilla/5.0 (Windows NT 6.1; rv:8.0) Gecko/20100101 Firefox/8.0';
$options = array(
CURLOPT_CUSTOMREQUEST =>"GET", //set request type post or get
CURLOPT_POST =>false, //set to GET
CURLOPT_USERAGENT => $user_agent, //set user agent
CURLOPT_COOKIEFILE =>"cookie.txt", //set cookie file
CURLOPT_COOKIEJAR =>"cookie.txt", //set cookie jar
CURLOPT_RETURNTRANSFER => true, // return web page
CURLOPT_HEADER => false, // don't return headers
CURLOPT_FOLLOWLOCATION => true, // follow redirects
CURLOPT_ENCODING => "", // handle all encodings
CURLOPT_AUTOREFERER => true, // set referer on redirect
CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect
CURLOPT_TIMEOUT => 120, // timeout on response
CURLOPT_MAXREDIRS => 10, // stop after 10 redirects
);
$ch = curl_init( $url );
curl_setopt_array( $ch, $options );
$content = curl_exec( $ch );
$err = curl_errno( $ch );
$errmsg = curl_error( $ch );
$header = curl_getinfo( $ch );
if($content === FALSE) {
// when output is false it can't be used in str_get_html()
// output a proper error message in such cases
echo 'output error';
die(curl_error($ch));
}
curl_close( $ch );
$header['errno'] = $err;
$header['errmsg'] = $errmsg;
$header['content'] = $content;
return $header;
}
function renderPage( $uri ) {
$rendering = get_web_page( $uri );
if ( $rendering['errno'] != 0 )
echo 'bad url, timeout, redirect loop';
if ( $rendering['http_code'] != 200 )
echo 'no page, no permissions, no service';
$content = $rendering['content'];
if(!empty($content)) {
$parsing = str_get_html($content);
}
return $parsing;
}
/**
* Get all current car data of the selected autrado site
*/
function models() {
$paramURI = SITE . 'schnellsuche.php?suche_hersteller=14&suche_modell=&suche_from=form&suche_action=suche&itemsperpage=500';
$content = renderPage($paramURI);
foreach ($content->find('tr[class*=fahrzeugliste]') as $auto) {
$item['src'] = $auto->find('a[onmouseover]', 0)->onmouseover;
preg_match('~src=["\'](.*?)["\']~', $item['src'], $matches);
echo $matches[1];
}
}
if(isset($_POST['action']) && !empty($_POST['action'])) {
$action = $_POST['action'];
if((string) $action == 'test') {
$output = models();
json_encode($output);
}
}
?>
$image['src']的内容和你上面写的不一样。我现在 运行 你的脚本,内容是:
tooltip_html(this, '<div style="display: block; width: 262px"><img src="http://server12.autrado.de/autradogalerie_copy/var/galerie/127915_262.jpg" /></div>');
如果在 preg_match 之前添加以下行,它将起作用:
$item['src']= html_entity_decode($item['src']);