根据引号将字符串设为小写 - Javascript
Make a string lowercase respecting quotes - Javascript
我正在为 Domain Specific Language,(或 DSL)构建一个解析器,我正在尝试将字符串转换为全部小写。我知道 toLowerCase
可以轻松完成此任务,但我需要在其原始情况下保留用双引号或单引号("
或 '
)引用的字符串。例如,见下文:
输入:
ThIs iS a teST "sTriNg Y'alL" aS yOu cAN sEE 'hEllO woRl\' o miNE'
输出:
this is a test "sTriNg Y'alL" as you can see 'hEllO woRl\' o miNE'
编辑:添加反斜杠引号
我确定有一个正则表达式解决方案,但这里有另一个解决方案,它在将引用的字符串小写之前替换它:
String.prototype.toLowerCaseQuoted = function() {
var str = this.valueOf();
var replacements = [];
var I = 0;
str = str
.replace(/((\".+\")|(\'.+\'))/g, function(s) {
console.log(s)
replacements.push(s);
return "%s"+(I++)+"%"
})
.toLowerCase()
.replace(/%s([0-9]+)%/g, function(s) {
var k = parseInt(s.match(/([0-9])+/)[0]);
console.log(k)
return replacements[k];
});
return str;
}
例如:
"WILL BE LOWER CASE \"QUOTED\" \'MORE QUOTED\'".toLowerCaseQuoted()
Returns
"will be lower case "QUOTED" 'MORE QUOTED'"
只是拼凑了一个快速解析器,不确定它的效果如何,但它应该处理无限制的反斜杠转义
function string_to_block(str) {
var blocks = [],
i, j, k;
function isEscaped(str, i) {
var escaped = false;
while (str[--i] === '\') escaped = !escaped;
return escaped;
}
start: for (i = 0; i < str.length; i = j + 1) {
find: for (j = i; j < str.length; ++j) {
if (str[j] === '"' && !isEscaped(str, j)) {
if (j > i) {
blocks.push({type: 'regular', str: str.slice(i, j)});
}
end: for (k = j + 1; k < str.length; ++k) {
if (str[k] === '"' && !isEscaped(str, k)) {
// found a "str" block
blocks.push({type: 'quote', str: str.slice(j, k + 1)});
j = k;
break find;
}
}
throw new SyntaxError('unclosed "str... starting at index ' + j);
}
if (str[j] === "'" && !isEscaped(str, j)) {
if (j > i) {
blocks.push({type: 'regular', str: str.slice(i, j)});
}
end: for (k = j + 1; k < str.length; ++k) {
if (str[k] === "'" && !isEscaped(str, k)) {
// found a 'str' block
blocks.push({type: 'quote', str: str.slice(j, k + 1)});
j = k;
break find;
}
}
throw new SyntaxError("unclosed 'str... starting at index " + j);
}
}
}
if (k + 1 < str.length) {
blocks.push({type: 'regular', str: str.slice(k + 1)});
}
return blocks;
}
现在
var foo = string_to_block("ThIs iS a teST \"sTriNg Y'alL\" aS yOu cAN sEE 'hEllO woRl\' o miNE'");
/*
[
{"type": "regular", "str": "ThIs iS a teST "},
{"type": "quote" , "str": "\"sTriNg Y'alL\""},
{"type": "regular", "str": " aS yOu cAN sEE "},
{"type": "quote" , "str": "'hEllO woRl\' o miNE'"}
]
*/
因此我们可以根据需要重新构建您的字符串;
var i, str = '';
for (i = 0; i < foo.length; ++i) {
if (foo[i].type === 'regular') str += foo[i].str.toLowerCase();
else str += foo[i].str;
}
str; // this is a test "sTriNg Y'alL" as you can see 'hEllO woRl\' o miNE'
String.prototype.toLowerCaseQuoted = function() {
var oldValue = this.valueOf();
var newValue = '';
var inside = 0;
for (var i = 0; i < oldValue.length; i++) {
if (oldValue[i] == '"') {
if (inside == 0) {
inside = 1;
} else {
inside = 0;
}
}
if (inside == 1) {
newValue += oldValue[i];
} else {
newValue += oldValue[i].toLowerCase();
}
}
return newValue;
}
这是@Paul S 的后续。这应该处理没有引号块的字符串...
function string_to_block(str) {
var blocks = [],
i, j, k;
function isEscaped(str, i) {
var escaped = false;
while (str[--i] === '\') escaped = !escaped;
return escaped;
}
start: for (i = 0; i < str.length; i = j + 1) {
find: for (j = i; j <= str.length; ++j) {
if (str[j] === '"' && !isEscaped(str, j)) {
if (j > i) {
blocks.push({type: 'regular', str: str.slice(i, j)});
}
end: for (k = j + 1; k < str.length; ++k) {
if (str[k] === '"' && !isEscaped(str, k)) {
// found a "str" block
blocks.push({type: 'quote', str: str.slice(j, k + 1)});
j = k;
break find;
}
}
throw new SyntaxError('unclosed "str... starting at index ' + j);
}
if (str[j] === "'" && !isEscaped(str, j)) {
if (j > i) {
blocks.push({type: 'regular', str: str.slice(i, j)});
}
end: for (k = j + 1; k < str.length; ++k) {
if (str[k] === "'" && !isEscaped(str, k)) {
// found a 'str' block
blocks.push({type: 'quote', str: str.slice(j, k + 1)});
j = k;
break find;
}
}
throw new SyntaxError("unclosed 'str... starting at index " + j);
}
if (j === str.length) {
// We reached the end without finding any quote blocks
if (j > i) {
blocks.push({type: 'regular', str: str.slice(i,j)});
}
}
}
}
return blocks;
}
我正在为 Domain Specific Language,(或 DSL)构建一个解析器,我正在尝试将字符串转换为全部小写。我知道 toLowerCase
可以轻松完成此任务,但我需要在其原始情况下保留用双引号或单引号("
或 '
)引用的字符串。例如,见下文:
输入:
ThIs iS a teST "sTriNg Y'alL" aS yOu cAN sEE 'hEllO woRl\' o miNE'
输出:
this is a test "sTriNg Y'alL" as you can see 'hEllO woRl\' o miNE'
编辑:添加反斜杠引号
我确定有一个正则表达式解决方案,但这里有另一个解决方案,它在将引用的字符串小写之前替换它:
String.prototype.toLowerCaseQuoted = function() {
var str = this.valueOf();
var replacements = [];
var I = 0;
str = str
.replace(/((\".+\")|(\'.+\'))/g, function(s) {
console.log(s)
replacements.push(s);
return "%s"+(I++)+"%"
})
.toLowerCase()
.replace(/%s([0-9]+)%/g, function(s) {
var k = parseInt(s.match(/([0-9])+/)[0]);
console.log(k)
return replacements[k];
});
return str;
}
例如:
"WILL BE LOWER CASE \"QUOTED\" \'MORE QUOTED\'".toLowerCaseQuoted()
Returns
"will be lower case "QUOTED" 'MORE QUOTED'"
只是拼凑了一个快速解析器,不确定它的效果如何,但它应该处理无限制的反斜杠转义
function string_to_block(str) {
var blocks = [],
i, j, k;
function isEscaped(str, i) {
var escaped = false;
while (str[--i] === '\') escaped = !escaped;
return escaped;
}
start: for (i = 0; i < str.length; i = j + 1) {
find: for (j = i; j < str.length; ++j) {
if (str[j] === '"' && !isEscaped(str, j)) {
if (j > i) {
blocks.push({type: 'regular', str: str.slice(i, j)});
}
end: for (k = j + 1; k < str.length; ++k) {
if (str[k] === '"' && !isEscaped(str, k)) {
// found a "str" block
blocks.push({type: 'quote', str: str.slice(j, k + 1)});
j = k;
break find;
}
}
throw new SyntaxError('unclosed "str... starting at index ' + j);
}
if (str[j] === "'" && !isEscaped(str, j)) {
if (j > i) {
blocks.push({type: 'regular', str: str.slice(i, j)});
}
end: for (k = j + 1; k < str.length; ++k) {
if (str[k] === "'" && !isEscaped(str, k)) {
// found a 'str' block
blocks.push({type: 'quote', str: str.slice(j, k + 1)});
j = k;
break find;
}
}
throw new SyntaxError("unclosed 'str... starting at index " + j);
}
}
}
if (k + 1 < str.length) {
blocks.push({type: 'regular', str: str.slice(k + 1)});
}
return blocks;
}
现在
var foo = string_to_block("ThIs iS a teST \"sTriNg Y'alL\" aS yOu cAN sEE 'hEllO woRl\' o miNE'");
/*
[
{"type": "regular", "str": "ThIs iS a teST "},
{"type": "quote" , "str": "\"sTriNg Y'alL\""},
{"type": "regular", "str": " aS yOu cAN sEE "},
{"type": "quote" , "str": "'hEllO woRl\' o miNE'"}
]
*/
因此我们可以根据需要重新构建您的字符串;
var i, str = '';
for (i = 0; i < foo.length; ++i) {
if (foo[i].type === 'regular') str += foo[i].str.toLowerCase();
else str += foo[i].str;
}
str; // this is a test "sTriNg Y'alL" as you can see 'hEllO woRl\' o miNE'
String.prototype.toLowerCaseQuoted = function() {
var oldValue = this.valueOf();
var newValue = '';
var inside = 0;
for (var i = 0; i < oldValue.length; i++) {
if (oldValue[i] == '"') {
if (inside == 0) {
inside = 1;
} else {
inside = 0;
}
}
if (inside == 1) {
newValue += oldValue[i];
} else {
newValue += oldValue[i].toLowerCase();
}
}
return newValue;
}
这是@Paul S 的后续。这应该处理没有引号块的字符串...
function string_to_block(str) {
var blocks = [],
i, j, k;
function isEscaped(str, i) {
var escaped = false;
while (str[--i] === '\') escaped = !escaped;
return escaped;
}
start: for (i = 0; i < str.length; i = j + 1) {
find: for (j = i; j <= str.length; ++j) {
if (str[j] === '"' && !isEscaped(str, j)) {
if (j > i) {
blocks.push({type: 'regular', str: str.slice(i, j)});
}
end: for (k = j + 1; k < str.length; ++k) {
if (str[k] === '"' && !isEscaped(str, k)) {
// found a "str" block
blocks.push({type: 'quote', str: str.slice(j, k + 1)});
j = k;
break find;
}
}
throw new SyntaxError('unclosed "str... starting at index ' + j);
}
if (str[j] === "'" && !isEscaped(str, j)) {
if (j > i) {
blocks.push({type: 'regular', str: str.slice(i, j)});
}
end: for (k = j + 1; k < str.length; ++k) {
if (str[k] === "'" && !isEscaped(str, k)) {
// found a 'str' block
blocks.push({type: 'quote', str: str.slice(j, k + 1)});
j = k;
break find;
}
}
throw new SyntaxError("unclosed 'str... starting at index " + j);
}
if (j === str.length) {
// We reached the end without finding any quote blocks
if (j > i) {
blocks.push({type: 'regular', str: str.slice(i,j)});
}
}
}
}
return blocks;
}