PHP 缓冲输出缩小,不是 textarea/pre
PHP bufffer output minify, NOT textarea/pre
我正在使用缓冲区清理器,如 PHP 手动注释中所示,但在文本区域中使用双换行符时遇到问题。
当从我的数据库中提取一个包含 double/triple/quadruple 个换行符的字符串并将其放入 textarea
时,换行符将减少为只有一个换行符。
因此:是否可以让函数排除 <pre>
、<textarea>
和 </pre>
、</textarea>
之间的所有输出?
看到这个问题,How to minify php html output without removing IE conditional comments?,我想我需要使用preg_match
,但我不确定如何将它实现到这个函数中。
我使用的函数是
function sanitize_output($buffer) {
$search = array(
'/\>[^\S ]+/s', // strip whitespaces after tags, except space
'/[^\S ]+\</s', // strip whitespaces before tags, except space
'/(\s)+/s' // shorten multiple whitespace sequences
);
$replace = array(
'>',
'<',
'\1'
);
$buffer = preg_replace($search, $replace, $buffer);
return $buffer;
}
ob_start("sanitize_output");
是的,我同时使用了这种消毒剂和 GZIP
以获得尽可能小的尺寸。
PRE
有一个简单的解决方案,但对 TEXTAREA
不起作用:将空格替换为
,然后使用 nl2br()
将换行符替换为 BR
元素在输出值之前。它并不优雅,但它有效:
<pre><?php
echo(nl2br(str_replace(' ', ' ', htmlspecialchars($value))));
?></pre>
遗憾的是,它不能用于 TEXTAREA
,因为浏览器将 <br />
显示为文本。
这里是评论中提到的函数的实现:
function sanitize_output($buffer) {
// Searching textarea and pre
preg_match_all('#\<textarea.*\>.*\<\/textarea\>#Uis', $buffer, $foundTxt);
preg_match_all('#\<pre.*\>.*\<\/pre\>#Uis', $buffer, $foundPre);
// replacing both with <textarea>$index</textarea> / <pre>$index</pre>
$buffer = str_replace($foundTxt[0], array_map(function($el){ return '<textarea>'.$el.'</textarea>'; }, array_keys($foundTxt[0])), $buffer);
$buffer = str_replace($foundPre[0], array_map(function($el){ return '<pre>'.$el.'</pre>'; }, array_keys($foundPre[0])), $buffer);
// your stuff
$search = array(
'/\>[^\S ]+/s', // strip whitespaces after tags, except space
'/[^\S ]+\</s', // strip whitespaces before tags, except space
'/(\s)+/s' // shorten multiple whitespace sequences
);
$replace = array(
'>',
'<',
'\1'
);
$buffer = preg_replace($search, $replace, $buffer);
// Replacing back with content
$buffer = str_replace(array_map(function($el){ return '<textarea>'.$el.'</textarea>'; }, array_keys($foundTxt[0])), $foundTxt[0], $buffer);
$buffer = str_replace(array_map(function($el){ return '<pre>'.$el.'</pre>'; }, array_keys($foundPre[0])), $foundPre[0], $buffer);
return $buffer;
}
总有优化的余地,但行之有效
function nl2ascii($str){
return str_replace(array("\n","\r"), array(" "," "), $str);
}
$StrTest = "test\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\rtest";
ob_start("sanitize_output");
?>
<textarea><?php echo nl2ascii($StrTest); ?></textarea>
<textarea><?php echo $StrTest; ?></textarea>
<pre style="border: 1px solid red"><?php echo nl2ascii($StrTest); ?></pre>
<pre style="border: 1px solid red"><?php echo $StrTest; ?></pre>
<?php
ob_flush();
原始输出
<textarea>test test</textarea>
<textarea>test
test</textarea>
<pre style="border: 1px solid red">test test</pre>
<pre style="border: 1px solid red">test
test</pre>
视觉输出
也许这会给您所需的结果。
但总的来说,我不推荐这种消毒工作,这对性能不利。在这些日子里,真的不需要从 html 输出中去除空白字符。
function sanitize_output($buffer) {
$ignoreTags = array("textarea", "pre");
# find tags that must be ignored and replace it with a placeholder
$tmpReplacements = array();
foreach($ignoreTags as $tag){
preg_match_all("~<$tag.*?>.*?</$tag>~is", $buffer, $match);
if($match && $match[0]){
foreach($match[0] as $key => $value){
if(!isset($tmpReplacements[$tag])) $tmpReplacements[$tag] = array();
$index = count($tmpReplacements[$tag]);
$replacementValue = "<tmp-replacement>$index</tmp-relacement>";
$tmpReplacements[$tag][$index] = array($value, $replacementValue);
$buffer = str_replace($value, $replacementValue, $buffer);
}
}
}
$search = array(
'/\>[^\S ]+/s', // strip whitespaces after tags, except space
'/[^\S ]+\</s', // strip whitespaces before tags, except space
'/(\s)+/s' // shorten multiple whitespace sequences
);
$replace = array(
'>',
'<',
'\1'
);
$buffer = preg_replace($search, $replace, $buffer);
# re-insert previously ignored tags
foreach($tmpReplacements as $tag => $rows){
foreach($rows as $values){
$buffer = str_replace($values[1], $values[0], $buffer);
}
}
return $buffer;
}
这是我的清理版本 HTML。我已经注释了代码,所以应该清楚它是做什么的。
function comprimeer($html = '', $arr_tags = ['textarea', 'pre']) {
$arr_found = [];
$arr_back = [];
$arr_temp = [];
// foreach tag get an array with tag and its content
// the array is like: $arr_temp[0] = [ 0 = ['<tag>content</tag>'] ];
foreach ($arr_tags as $tag) {
if(preg_match_all('#\<' . $tag . '.*\>.*\<\/' . $tag . '\>#Uis', $html, $arr_temp)) {
// the tag is present
foreach($arr_temp as $key => $arr_item) {
// for every item of the tag keep the item
$arr_found[$tag][] = $arr_item[0];
// make an nmubered replace <tag>1</tag>
$arr_back[$tag][] = '<' . $tag . '>' . $key . '</' . $tag . '>';
}
// replace all the present tags with the numbered ones
$html = str_replace((array) $arr_found[$tag], (array) $arr_back[$tag], $html);
}
} // end foreach
// clean the html
$arr_search = [
'/\>[^\S ]+/s', // strip whitespaces after tags, except space
'/[^\S ]+\</s', // strip whitespaces before tags, except space
'/(\s)+/s' // shorten multiple whitespace sequences
];
$arr_replace = [
'>',
'<',
'\1'
];
$clean = preg_replace($arr_search, $arr_replace, $html);
// put the kept items back
foreach ($arr_tags as $tag) {
if(isset($arr_found[$tag])) {
// the tag was present replace them back
$clean = str_replace($arr_back[$tag], $arr_found[$tag], $clean);
}
} // end foreach
// give the cleaned html back
return $clean;
} // end function
我正在使用缓冲区清理器,如 PHP 手动注释中所示,但在文本区域中使用双换行符时遇到问题。
当从我的数据库中提取一个包含 double/triple/quadruple 个换行符的字符串并将其放入 textarea
时,换行符将减少为只有一个换行符。
因此:是否可以让函数排除 <pre>
、<textarea>
和 </pre>
、</textarea>
之间的所有输出?
看到这个问题,How to minify php html output without removing IE conditional comments?,我想我需要使用preg_match
,但我不确定如何将它实现到这个函数中。
我使用的函数是
function sanitize_output($buffer) {
$search = array(
'/\>[^\S ]+/s', // strip whitespaces after tags, except space
'/[^\S ]+\</s', // strip whitespaces before tags, except space
'/(\s)+/s' // shorten multiple whitespace sequences
);
$replace = array(
'>',
'<',
'\1'
);
$buffer = preg_replace($search, $replace, $buffer);
return $buffer;
}
ob_start("sanitize_output");
是的,我同时使用了这种消毒剂和 GZIP
以获得尽可能小的尺寸。
PRE
有一个简单的解决方案,但对 TEXTAREA
不起作用:将空格替换为
,然后使用 nl2br()
将换行符替换为 BR
元素在输出值之前。它并不优雅,但它有效:
<pre><?php
echo(nl2br(str_replace(' ', ' ', htmlspecialchars($value))));
?></pre>
遗憾的是,它不能用于 TEXTAREA
,因为浏览器将 <br />
显示为文本。
这里是评论中提到的函数的实现:
function sanitize_output($buffer) {
// Searching textarea and pre
preg_match_all('#\<textarea.*\>.*\<\/textarea\>#Uis', $buffer, $foundTxt);
preg_match_all('#\<pre.*\>.*\<\/pre\>#Uis', $buffer, $foundPre);
// replacing both with <textarea>$index</textarea> / <pre>$index</pre>
$buffer = str_replace($foundTxt[0], array_map(function($el){ return '<textarea>'.$el.'</textarea>'; }, array_keys($foundTxt[0])), $buffer);
$buffer = str_replace($foundPre[0], array_map(function($el){ return '<pre>'.$el.'</pre>'; }, array_keys($foundPre[0])), $buffer);
// your stuff
$search = array(
'/\>[^\S ]+/s', // strip whitespaces after tags, except space
'/[^\S ]+\</s', // strip whitespaces before tags, except space
'/(\s)+/s' // shorten multiple whitespace sequences
);
$replace = array(
'>',
'<',
'\1'
);
$buffer = preg_replace($search, $replace, $buffer);
// Replacing back with content
$buffer = str_replace(array_map(function($el){ return '<textarea>'.$el.'</textarea>'; }, array_keys($foundTxt[0])), $foundTxt[0], $buffer);
$buffer = str_replace(array_map(function($el){ return '<pre>'.$el.'</pre>'; }, array_keys($foundPre[0])), $foundPre[0], $buffer);
return $buffer;
}
总有优化的余地,但行之有效
function nl2ascii($str){
return str_replace(array("\n","\r"), array(" "," "), $str);
}
$StrTest = "test\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\rtest";
ob_start("sanitize_output");
?>
<textarea><?php echo nl2ascii($StrTest); ?></textarea>
<textarea><?php echo $StrTest; ?></textarea>
<pre style="border: 1px solid red"><?php echo nl2ascii($StrTest); ?></pre>
<pre style="border: 1px solid red"><?php echo $StrTest; ?></pre>
<?php
ob_flush();
原始输出
<textarea>test test</textarea>
<textarea>test
test</textarea>
<pre style="border: 1px solid red">test test</pre>
<pre style="border: 1px solid red">test
test</pre>
视觉输出
也许这会给您所需的结果。 但总的来说,我不推荐这种消毒工作,这对性能不利。在这些日子里,真的不需要从 html 输出中去除空白字符。
function sanitize_output($buffer) {
$ignoreTags = array("textarea", "pre");
# find tags that must be ignored and replace it with a placeholder
$tmpReplacements = array();
foreach($ignoreTags as $tag){
preg_match_all("~<$tag.*?>.*?</$tag>~is", $buffer, $match);
if($match && $match[0]){
foreach($match[0] as $key => $value){
if(!isset($tmpReplacements[$tag])) $tmpReplacements[$tag] = array();
$index = count($tmpReplacements[$tag]);
$replacementValue = "<tmp-replacement>$index</tmp-relacement>";
$tmpReplacements[$tag][$index] = array($value, $replacementValue);
$buffer = str_replace($value, $replacementValue, $buffer);
}
}
}
$search = array(
'/\>[^\S ]+/s', // strip whitespaces after tags, except space
'/[^\S ]+\</s', // strip whitespaces before tags, except space
'/(\s)+/s' // shorten multiple whitespace sequences
);
$replace = array(
'>',
'<',
'\1'
);
$buffer = preg_replace($search, $replace, $buffer);
# re-insert previously ignored tags
foreach($tmpReplacements as $tag => $rows){
foreach($rows as $values){
$buffer = str_replace($values[1], $values[0], $buffer);
}
}
return $buffer;
}
这是我的清理版本 HTML。我已经注释了代码,所以应该清楚它是做什么的。
function comprimeer($html = '', $arr_tags = ['textarea', 'pre']) {
$arr_found = [];
$arr_back = [];
$arr_temp = [];
// foreach tag get an array with tag and its content
// the array is like: $arr_temp[0] = [ 0 = ['<tag>content</tag>'] ];
foreach ($arr_tags as $tag) {
if(preg_match_all('#\<' . $tag . '.*\>.*\<\/' . $tag . '\>#Uis', $html, $arr_temp)) {
// the tag is present
foreach($arr_temp as $key => $arr_item) {
// for every item of the tag keep the item
$arr_found[$tag][] = $arr_item[0];
// make an nmubered replace <tag>1</tag>
$arr_back[$tag][] = '<' . $tag . '>' . $key . '</' . $tag . '>';
}
// replace all the present tags with the numbered ones
$html = str_replace((array) $arr_found[$tag], (array) $arr_back[$tag], $html);
}
} // end foreach
// clean the html
$arr_search = [
'/\>[^\S ]+/s', // strip whitespaces after tags, except space
'/[^\S ]+\</s', // strip whitespaces before tags, except space
'/(\s)+/s' // shorten multiple whitespace sequences
];
$arr_replace = [
'>',
'<',
'\1'
];
$clean = preg_replace($arr_search, $arr_replace, $html);
// put the kept items back
foreach ($arr_tags as $tag) {
if(isset($arr_found[$tag])) {
// the tag was present replace them back
$clean = str_replace($arr_back[$tag], $arr_found[$tag], $clean);
}
} // end foreach
// give the cleaned html back
return $clean;
} // end function