从 javascript 文件中删除注释
Strip comments from javascript file
我正在尝试构建一个正则表达式以从 javascript 代码中去除所有注释,包括单行 (//...) 和多行 (/*..*/)。这就是我想出的:
/\"[^\"]*\"|'[^']*'|(\/\/.*$|\/\*[^\*]*\*\/)/mg
描述:如您所见,它还搜索字符串文字。这是因为字符串文字可以包含本来可以匹配注释模式的内容(例如:location.href = "http://www.domain.com"; 将匹配为单行注释)。所以我把字符串文字模式放在替代模式中的第一位。接下来是两种模式,分别用于捕获单行注释和多行注释。它们包含在同一个捕获组中,因此我可以使用 string.replace(pattern, "") 删除注释。
我已经用几个 js 文件测试了表达式,它似乎可以正常工作。 我的问题是是否还有其他模式我应该寻找或者是否有任何其他事情需要考虑(例如,如果某些浏览器对正则表达式或替代实现的支持有限需要考虑)。
看看这段代码。尽管这是 PHP,但我认为这种模式是正确的。您可以调整 JavaScript 的模式。
可以做到(没有纯正则表达式 javascript),但有一些限制。我确实为您即时实现了一些东西(25 分钟)。使用的方法是逐行解析源文件。
如果您的 js 文件正确并且您没有 3 个异常,则结果是正确的。
在这里找到植入:http://jsfiddle.net/ch14em6w/
这里是关键代码部分:
//parse file input
function displayFileLineByLine(contents)
{
var lines = contents.split('\n');
var element = document.getElementById('file-content');
var output = '';
for(var line = 0; line < lines.length; line++){
var normedline = stripOut(lines[line]);
if (normedline.length > 0 )
{
output += normedline;
}
}
element.innerHTML = output;
}
// globa scope flag showing '/*' is open
var GlobalComentOpen = false;
//recursive line coments removal method
function stripOut(stringline, step){
//index global coment start
var igcS = stringline.indexOf('/*');
//index global coment end
var igcE = stringline.indexOf('*/');
//index inline coment pos
var iicP = stringline.indexOf('//');
var gorecursive = false;
if (igcS != -1)
{
gorecursive = true;
if (igcS < igcE) {
stringline = stringline.replace(stringline.slice(igcS, igcE +2), "");
}
else if (igcS > igcE && GlobalComentOpen) {
stringline = stringline.replace(stringline.slice(0, igcE +2), "");
igcS = stringline.indexOf('/*');
stringline = stringline.replace(stringline.slice(igcS, stringline.length), "");
}
else if (igcE == -1){
GlobalComentOpen = true;
stringline = stringline.replace(stringline.slice(igcS, stringline.length), "");
}
else
{
console.log('incorect format');
}
}
if (!gorecursive && igcE != -1)
{
gorecursive = true;
GlobalComentOpen = false;
stringline = stringline.replace(stringline.slice(0, igcE +2), "");
}
if (!gorecursive && iicP != -1)
{
gorecursive = true;
stringline = stringline.replace(stringline.slice(iicP, stringline.length), "");
}
if (!gorecursive && GlobalComentOpen && step == undefined)
{
return "";
}
if (gorecursive)
{
step = step == undefined ? 0 : step++;
return stripOut(stringline, step);
}
return stringline;
}
使用 C/C++ 风格的注释剥离器。
下面的正则表达式做了这些事情
- 剥离 /**/ 和 // 样式
- 处理续行样式
- 保留格式
regex有两种形式做格式保存:
- 水平制表符
\h
和换行符\n
构造
- Space & 选项卡
[ \t]
和 \r?\n
构造
标志是多行和全局。
替换为捕获组 2,</code> 或 <code>
。
表格 1:
raw: ((?:(?:^\h*)?(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/(?:\h*\n(?=\h*(?:\n|/\*|//)))?|//(?:[^\]|\\n?)*?(?:\n(?=\h*(?:\n|/\*|//))|(?=\n))))+)|("(?:\[\S\s]|[^"\])*"|'(?:\[\S\s]|[^'\])*'|[\S\s][^/"'\\s]*)
delimited: /((?:(?:^\h*)?(?:\/\*[^*]*\*+(?:[^\/*][^*]*\*+)*\/(?:\h*\n(?=\h*(?:\n|\/\*|\/\/)))?|\/\/(?:[^\]|\\n?)*?(?:\n(?=\h*(?:\n|\/\*|\/\/))|(?=\n))))+)|("(?:\[\S\s]|[^"\])*"|'(?:\[\S\s]|[^'\])*'|[\S\s][^\/"'\\s]*)/mg
表格 2:
raw: ((?:(?:^[ \t]*)?(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/(?:[ \t]*\r?\n(?=[ \t]*(?:\r?\n|/\*|//)))?|//(?:[^\]|\(?:\r?\n)?)*?(?:\r?\n(?=[ \t]*(?:\r?\n|/\*|//))|(?=\r?\n))))+)|("(?:\[\S\s]|[^"\])*"|'(?:\[\S\s]|[^'\])*'|(?:\r?\n|[\S\s])[^/"'\\s]*)
delimited: /((?:(?:^[ \t]*)?(?:\/\*[^*]*\*+(?:[^\/*][^*]*\*+)*\/(?:[ \t]*\r?\n(?=[ \t]*(?:\r?\n|\/\*|\/\/)))?|\/\/(?:[^\]|\(?:\r?\n)?)*?(?:\r?\n(?=[ \t]*(?:\r?\n|\/\*|\/\/))|(?=\r?\n))))+)|("(?:\[\S\s]|[^"\])*"|'(?:\[\S\s]|[^'\])*'|(?:\r?\n|[\S\s])[^\/"'\\s]*)/mg
Form 2 的扩展(使用 this 格式化)版本:
( # (1 start), Comments
(?:
(?: ^ [ \t]* )? # <- To preserve formatting
(?:
/\* # Start /* .. */ comment
[^*]* \*+
(?: [^/*] [^*]* \*+ )*
/ # End /* .. */ comment
(?: # <- To preserve formatting
[ \t]* \r? \n
(?=
[ \t]*
(?: \r? \n | /\* | // )
)
)?
|
// # Start // comment
(?: # Possible line-continuation
[^\]
| \
(?: \r? \n )?
)*?
(?: # End // comment
\r? \n
(?= # <- To preserve formatting
[ \t]*
(?: \r? \n | /\* | // )
)
| (?= \r? \n )
)
)
)+ # Grab multiple comment blocks if need be
) # (1 end)
| ## OR
( # (2 start), Non - comments
"
(?: \ [\S\s] | [^"\] )* # Double quoted text
"
| '
(?: \ [\S\s] | [^'\] )* # Single quoted text
'
| (?: \r? \n | [\S\s] ) # Linebreak or Any other char
[^/"'\\s]* # Chars which doesn't start a comment, string, escape,
# or line continuation (escape + newline)
) # (2 end)
import prettier from 'prettier';
function decomment(jsCodeStr) {
const options = { printWidth: 160, singleQuote: true, trailingComma: 'none' };
// actually strip comments:
options.parser = (text, { babel }) => {
const ast = babel(text);
delete ast.comments;
return ast;
};
return prettier.format(jsCodeStr, options);
}
变得更漂亮
Update:这是 C# 代码,我认为这不是它的正确位置。不管怎样,就在这里。
我用下面的class效果不错
未对字符串内的注释进行测试,例如
a = "hi /* comment */ there";
a = "hi there // ";
class 至少检测行首或 space 之后的 // 注释。所以下面的作品。
a = "hi// there";
a = "hi//there";
这是代码
static public class CommentRemover
{
static readonly RegexOptions ROptions = RegexOptions.CultureInvariant | RegexOptions.IgnoreCase | RegexOptions.Multiline;
const string SSingleLineComments = @"\s//.*"; // comments with // in the beginning of a line or after a space
const string SMultiLineComments = @"/\*[\s\S]*?\*/";
const string SCommentPattern = SSingleLineComments + "|" + SMultiLineComments;
const string SEmptyLinePattern = @"^\s+$[\r\n]*";
static Regex CommentRegex;
static Regex EmptyLineRegex;
static public string RemoveEmptyLines(string Text)
{
if (EmptyLineRegex == null)
EmptyLineRegex = new Regex(SEmptyLinePattern, ROptions);
return EmptyLineRegex.Replace(Text, string.Empty);
}
static public string RemoveComments(string Text)
{
if (CommentRegex == null)
CommentRegex = new Regex(SCommentPattern, ROptions);
return CommentRegex.Replace(Text, string.Empty);
}
static public string RemoveComments(string Text, string Pattern)
{
Regex R = new Regex(Pattern, ROptions);
return R.Replace(Text, string.Empty);
}
static public string Execute(string Text)
{
Text = RemoveComments(Text);
Text = RemoveEmptyLines(Text);
return Text;
}
static public void ExecuteFile(string SourceFilePth, string DestFilePath)
{
string DestFolder = Path.GetDirectoryName(DestFilePath);
Directory.CreateDirectory(DestFolder);
string Text = File.ReadAllText(SourceFilePth);
Text = Execute(Text);
File.WriteAllText(DestFilePath, Text);
}
static public void ExecuteFolder(string FilePattern, string SourcePath, string DestPath, bool Recursive = true)
{
string[] FilePathList = Directory.GetFiles(SourcePath, FilePattern, Recursive? SearchOption.AllDirectories: SearchOption.TopDirectoryOnly);
string FileName;
string DestFilePath;
foreach (string SourceFilePath in FilePathList)
{
FileName = Path.GetFileName(SourceFilePath);
DestFilePath = Path.Combine(DestPath, FileName);
ExecuteFile(SourceFilePath, DestFilePath);
}
}
static public void ExecuteCommandLine(string[] Args)
{
void DisplayCommandLineHelp()
{
string Text = @"
-h, --help Flag. Displays this message. E.g. -h
-s, --source Source folder when the -p is present. Else source filename. E.g. -s C:\app\js or -s C:\app\js\main.js
-d, --dest Dest folder when the -p is present. Else dest filename. E.g. -d C:\app\js\out or -d C:\app\js\out\main.js
-p, --pattern The pattern to use when finding files. E.g. -p *.js
-r, --recursive Flag. Search in sub-folders too. E.g. -r
EXAMPLE
CommentStripper -s .\Source -d .\Dest -p *.js
";
Console.WriteLine(Text.Trim());
}
string Pattern = null;
string Source = null;
string Dest = null;
bool Recursive = false;
bool Help = false;
string Arg;
if (Args.Length > 0)
{
try
{
for (int i = 0; i < Args.Length; i++)
{
Arg = Args[i].ToLower();
switch (Arg)
{
case "-s":
case "--source":
Source = Args[i + 1].Trim();
break;
case "-d":
case "--dest":
Dest = Args[i + 1].Trim();
break;
case "-p":
case "--pattern":
Pattern = Args[i + 1].Trim();
break;
case "-r":
case "--recursive":
Recursive = true;
break;
case "-h":
case "--help":
Help = true;
break;
}
}
if (Help)
{
DisplayCommandLineHelp();
}
else
{
if (!string.IsNullOrWhiteSpace(Pattern))
{
ExecuteFolder(Pattern, Source, Dest, Recursive);
}
else
{
ExecuteFile(Source, Dest);
}
}
// Console.ReadLine();
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
Console.WriteLine();
DisplayCommandLineHelp();
}
}
}
}
祝你好运。
我正在尝试构建一个正则表达式以从 javascript 代码中去除所有注释,包括单行 (//...) 和多行 (/*..*/)。这就是我想出的:
/\"[^\"]*\"|'[^']*'|(\/\/.*$|\/\*[^\*]*\*\/)/mg
描述:如您所见,它还搜索字符串文字。这是因为字符串文字可以包含本来可以匹配注释模式的内容(例如:location.href = "http://www.domain.com"; 将匹配为单行注释)。所以我把字符串文字模式放在替代模式中的第一位。接下来是两种模式,分别用于捕获单行注释和多行注释。它们包含在同一个捕获组中,因此我可以使用 string.replace(pattern, "") 删除注释。
我已经用几个 js 文件测试了表达式,它似乎可以正常工作。 我的问题是是否还有其他模式我应该寻找或者是否有任何其他事情需要考虑(例如,如果某些浏览器对正则表达式或替代实现的支持有限需要考虑)。
看看这段代码。尽管这是 PHP,但我认为这种模式是正确的。您可以调整 JavaScript 的模式。
可以做到(没有纯正则表达式 javascript),但有一些限制。我确实为您即时实现了一些东西(25 分钟)。使用的方法是逐行解析源文件。 如果您的 js 文件正确并且您没有 3 个异常,则结果是正确的。
在这里找到植入:http://jsfiddle.net/ch14em6w/
这里是关键代码部分:
//parse file input
function displayFileLineByLine(contents)
{
var lines = contents.split('\n');
var element = document.getElementById('file-content');
var output = '';
for(var line = 0; line < lines.length; line++){
var normedline = stripOut(lines[line]);
if (normedline.length > 0 )
{
output += normedline;
}
}
element.innerHTML = output;
}
// globa scope flag showing '/*' is open
var GlobalComentOpen = false;
//recursive line coments removal method
function stripOut(stringline, step){
//index global coment start
var igcS = stringline.indexOf('/*');
//index global coment end
var igcE = stringline.indexOf('*/');
//index inline coment pos
var iicP = stringline.indexOf('//');
var gorecursive = false;
if (igcS != -1)
{
gorecursive = true;
if (igcS < igcE) {
stringline = stringline.replace(stringline.slice(igcS, igcE +2), "");
}
else if (igcS > igcE && GlobalComentOpen) {
stringline = stringline.replace(stringline.slice(0, igcE +2), "");
igcS = stringline.indexOf('/*');
stringline = stringline.replace(stringline.slice(igcS, stringline.length), "");
}
else if (igcE == -1){
GlobalComentOpen = true;
stringline = stringline.replace(stringline.slice(igcS, stringline.length), "");
}
else
{
console.log('incorect format');
}
}
if (!gorecursive && igcE != -1)
{
gorecursive = true;
GlobalComentOpen = false;
stringline = stringline.replace(stringline.slice(0, igcE +2), "");
}
if (!gorecursive && iicP != -1)
{
gorecursive = true;
stringline = stringline.replace(stringline.slice(iicP, stringline.length), "");
}
if (!gorecursive && GlobalComentOpen && step == undefined)
{
return "";
}
if (gorecursive)
{
step = step == undefined ? 0 : step++;
return stripOut(stringline, step);
}
return stringline;
}
使用 C/C++ 风格的注释剥离器。
下面的正则表达式做了这些事情
- 剥离 /**/ 和 // 样式
- 处理续行样式
- 保留格式
regex有两种形式做格式保存:
- 水平制表符
\h
和换行符\n
构造 - Space & 选项卡
[ \t]
和\r?\n
构造
标志是多行和全局。
替换为捕获组 2,</code> 或 <code>
。
表格 1:
raw: ((?:(?:^\h*)?(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/(?:\h*\n(?=\h*(?:\n|/\*|//)))?|//(?:[^\]|\\n?)*?(?:\n(?=\h*(?:\n|/\*|//))|(?=\n))))+)|("(?:\[\S\s]|[^"\])*"|'(?:\[\S\s]|[^'\])*'|[\S\s][^/"'\\s]*)
delimited: /((?:(?:^\h*)?(?:\/\*[^*]*\*+(?:[^\/*][^*]*\*+)*\/(?:\h*\n(?=\h*(?:\n|\/\*|\/\/)))?|\/\/(?:[^\]|\\n?)*?(?:\n(?=\h*(?:\n|\/\*|\/\/))|(?=\n))))+)|("(?:\[\S\s]|[^"\])*"|'(?:\[\S\s]|[^'\])*'|[\S\s][^\/"'\\s]*)/mg
表格 2:
raw: ((?:(?:^[ \t]*)?(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/(?:[ \t]*\r?\n(?=[ \t]*(?:\r?\n|/\*|//)))?|//(?:[^\]|\(?:\r?\n)?)*?(?:\r?\n(?=[ \t]*(?:\r?\n|/\*|//))|(?=\r?\n))))+)|("(?:\[\S\s]|[^"\])*"|'(?:\[\S\s]|[^'\])*'|(?:\r?\n|[\S\s])[^/"'\\s]*)
delimited: /((?:(?:^[ \t]*)?(?:\/\*[^*]*\*+(?:[^\/*][^*]*\*+)*\/(?:[ \t]*\r?\n(?=[ \t]*(?:\r?\n|\/\*|\/\/)))?|\/\/(?:[^\]|\(?:\r?\n)?)*?(?:\r?\n(?=[ \t]*(?:\r?\n|\/\*|\/\/))|(?=\r?\n))))+)|("(?:\[\S\s]|[^"\])*"|'(?:\[\S\s]|[^'\])*'|(?:\r?\n|[\S\s])[^\/"'\\s]*)/mg
Form 2 的扩展(使用 this 格式化)版本:
( # (1 start), Comments
(?:
(?: ^ [ \t]* )? # <- To preserve formatting
(?:
/\* # Start /* .. */ comment
[^*]* \*+
(?: [^/*] [^*]* \*+ )*
/ # End /* .. */ comment
(?: # <- To preserve formatting
[ \t]* \r? \n
(?=
[ \t]*
(?: \r? \n | /\* | // )
)
)?
|
// # Start // comment
(?: # Possible line-continuation
[^\]
| \
(?: \r? \n )?
)*?
(?: # End // comment
\r? \n
(?= # <- To preserve formatting
[ \t]*
(?: \r? \n | /\* | // )
)
| (?= \r? \n )
)
)
)+ # Grab multiple comment blocks if need be
) # (1 end)
| ## OR
( # (2 start), Non - comments
"
(?: \ [\S\s] | [^"\] )* # Double quoted text
"
| '
(?: \ [\S\s] | [^'\] )* # Single quoted text
'
| (?: \r? \n | [\S\s] ) # Linebreak or Any other char
[^/"'\\s]* # Chars which doesn't start a comment, string, escape,
# or line continuation (escape + newline)
) # (2 end)
import prettier from 'prettier';
function decomment(jsCodeStr) {
const options = { printWidth: 160, singleQuote: true, trailingComma: 'none' };
// actually strip comments:
options.parser = (text, { babel }) => {
const ast = babel(text);
delete ast.comments;
return ast;
};
return prettier.format(jsCodeStr, options);
}
变得更漂亮
Update:这是 C# 代码,我认为这不是它的正确位置。不管怎样,就在这里。
我用下面的class效果不错
未对字符串内的注释进行测试,例如
a = "hi /* comment */ there";
a = "hi there // ";
class 至少检测行首或 space 之后的 // 注释。所以下面的作品。
a = "hi// there";
a = "hi//there";
这是代码
static public class CommentRemover
{
static readonly RegexOptions ROptions = RegexOptions.CultureInvariant | RegexOptions.IgnoreCase | RegexOptions.Multiline;
const string SSingleLineComments = @"\s//.*"; // comments with // in the beginning of a line or after a space
const string SMultiLineComments = @"/\*[\s\S]*?\*/";
const string SCommentPattern = SSingleLineComments + "|" + SMultiLineComments;
const string SEmptyLinePattern = @"^\s+$[\r\n]*";
static Regex CommentRegex;
static Regex EmptyLineRegex;
static public string RemoveEmptyLines(string Text)
{
if (EmptyLineRegex == null)
EmptyLineRegex = new Regex(SEmptyLinePattern, ROptions);
return EmptyLineRegex.Replace(Text, string.Empty);
}
static public string RemoveComments(string Text)
{
if (CommentRegex == null)
CommentRegex = new Regex(SCommentPattern, ROptions);
return CommentRegex.Replace(Text, string.Empty);
}
static public string RemoveComments(string Text, string Pattern)
{
Regex R = new Regex(Pattern, ROptions);
return R.Replace(Text, string.Empty);
}
static public string Execute(string Text)
{
Text = RemoveComments(Text);
Text = RemoveEmptyLines(Text);
return Text;
}
static public void ExecuteFile(string SourceFilePth, string DestFilePath)
{
string DestFolder = Path.GetDirectoryName(DestFilePath);
Directory.CreateDirectory(DestFolder);
string Text = File.ReadAllText(SourceFilePth);
Text = Execute(Text);
File.WriteAllText(DestFilePath, Text);
}
static public void ExecuteFolder(string FilePattern, string SourcePath, string DestPath, bool Recursive = true)
{
string[] FilePathList = Directory.GetFiles(SourcePath, FilePattern, Recursive? SearchOption.AllDirectories: SearchOption.TopDirectoryOnly);
string FileName;
string DestFilePath;
foreach (string SourceFilePath in FilePathList)
{
FileName = Path.GetFileName(SourceFilePath);
DestFilePath = Path.Combine(DestPath, FileName);
ExecuteFile(SourceFilePath, DestFilePath);
}
}
static public void ExecuteCommandLine(string[] Args)
{
void DisplayCommandLineHelp()
{
string Text = @"
-h, --help Flag. Displays this message. E.g. -h
-s, --source Source folder when the -p is present. Else source filename. E.g. -s C:\app\js or -s C:\app\js\main.js
-d, --dest Dest folder when the -p is present. Else dest filename. E.g. -d C:\app\js\out or -d C:\app\js\out\main.js
-p, --pattern The pattern to use when finding files. E.g. -p *.js
-r, --recursive Flag. Search in sub-folders too. E.g. -r
EXAMPLE
CommentStripper -s .\Source -d .\Dest -p *.js
";
Console.WriteLine(Text.Trim());
}
string Pattern = null;
string Source = null;
string Dest = null;
bool Recursive = false;
bool Help = false;
string Arg;
if (Args.Length > 0)
{
try
{
for (int i = 0; i < Args.Length; i++)
{
Arg = Args[i].ToLower();
switch (Arg)
{
case "-s":
case "--source":
Source = Args[i + 1].Trim();
break;
case "-d":
case "--dest":
Dest = Args[i + 1].Trim();
break;
case "-p":
case "--pattern":
Pattern = Args[i + 1].Trim();
break;
case "-r":
case "--recursive":
Recursive = true;
break;
case "-h":
case "--help":
Help = true;
break;
}
}
if (Help)
{
DisplayCommandLineHelp();
}
else
{
if (!string.IsNullOrWhiteSpace(Pattern))
{
ExecuteFolder(Pattern, Source, Dest, Recursive);
}
else
{
ExecuteFile(Source, Dest);
}
}
// Console.ReadLine();
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
Console.WriteLine();
DisplayCommandLineHelp();
}
}
}
}
祝你好运。