javascript 中字幕的正则表达式中的可变行数
Number of variable lines in regex for subtitles in javascript
我随时使用正则表达式创建一个字幕数组,其中包括[数字、开始、结束、文本]
/(\d+)\n([\d:,]+)\s+-{2}\>\s+([\d:,]+)\n([\s\S]*?(?=\n{2}|$))/gm
但问题是,在正文部分,如果行数超过2行,将不会被阅读。
Here you can see the relevant image
我不想只将节中文本的第一行视为文本,如果还有其他行,也应将其视为文本。
你帮了我一个大忙。感恩
let subtitle = document.getElementById('subtitle').value;
console.log(_subtitle(subtitle));
function _subtitle(text) {
let Subtitle = text;
let Pattern = /(\d+)\n([\d:,]+)\s+-{2}\>\s+([\d:,]+)\n([\s\S]*?(?=\n{2}|$))/gm;
let _regExp = new RegExp(Pattern);
let result = [];
if (typeof (text) != "string") throw "Sorry, Parser accept string only.";
if (Subtitle === null) return Subtitle;
let Parse = Subtitle.replace(/\r\n|\r|\n/g, '\n');
let Matches;
while ((Matches = Pattern.exec(Parse)) != null) {
result.push({
Line: Matches[1],
Start: Matches[2],
End: Matches[3],
Text: Matches[4],
})
}
return result;
}
#warning{
background-color:#e74e4e;
color:#fff;
font-family:Roboto;
padding:14px;
border-radius:4px;
margin-bottom:14px
}
textarea{
width:100%;
min-height:100px;
}
<div id="warning">The output is on the console</div>
<textarea id="subtitle">1
00:00:00,000 --> 00:00:00,600
Hi my friends
2
00:00:00,610 --> 00:00:01,050
In the first line, everything works properly
But there is a problem in the second line that I could not solve :(
3
00:00:01,080 --> 00:00:03,080
But then everything is in order and good
4
00:00:03,280 --> 00:00:05,280
You do me a great favor by helping me. Thankful</textarea>
将 /gm
替换为 /g
,否则,$
指的是第一个“文本”行的末尾,Regex 不会尝试匹配其后的任何内容:
let subtitle = document.getElementById('subtitle').value;
console.log(_subtitle(subtitle));
function _subtitle(text) {
let Subtitle = text;
let Pattern = /(\d+)\n([\d:,]+)\s+-{2}\>\s+([\d:,]+)\n([\s\S]*?(?=\n{2}|$))/g;
let _regExp = new RegExp(Pattern);
let result = [];
if (typeof(text) != "string") throw "Sorry, Parser accept string only.";
if (Subtitle === null) return Subtitle;
let Parse = Subtitle.replace(/\r\n|\r|\n/g, '\n');
let Matches;
while ((Matches = Pattern.exec(Parse)) != null) {
result.push({
Line: Matches[1],
Start: Matches[2],
End: Matches[3],
Text: Matches[4],
})
}
return result;
}
#warning {
background-color: #e74e4e;
color: #fff;
font-family: Roboto;
padding: 14px;
border-radius: 4px;
margin-bottom: 14px
}
textarea {
width: 100%;
min-height: 100px;
}
<div id="warning">The output is on the console</div>
<textarea id="subtitle">1
00:00:00,000 --> 00:00:00,600
Hi my friends
2
00:00:00,610 --> 00:00:01,050
In the first line, everything works properly
But there is a problem in the second line that I could not solve :(
3
00:00:01,080 --> 00:00:03,080
But then everything is in order and good
4
00:00:03,280 --> 00:00:05,280
You do me a great favor by helping me. Thankful</textarea>
我随时使用正则表达式创建一个字幕数组,其中包括[数字、开始、结束、文本]
/(\d+)\n([\d:,]+)\s+-{2}\>\s+([\d:,]+)\n([\s\S]*?(?=\n{2}|$))/gm
但问题是,在正文部分,如果行数超过2行,将不会被阅读。
Here you can see the relevant image
我不想只将节中文本的第一行视为文本,如果还有其他行,也应将其视为文本。
你帮了我一个大忙。感恩
let subtitle = document.getElementById('subtitle').value;
console.log(_subtitle(subtitle));
function _subtitle(text) {
let Subtitle = text;
let Pattern = /(\d+)\n([\d:,]+)\s+-{2}\>\s+([\d:,]+)\n([\s\S]*?(?=\n{2}|$))/gm;
let _regExp = new RegExp(Pattern);
let result = [];
if (typeof (text) != "string") throw "Sorry, Parser accept string only.";
if (Subtitle === null) return Subtitle;
let Parse = Subtitle.replace(/\r\n|\r|\n/g, '\n');
let Matches;
while ((Matches = Pattern.exec(Parse)) != null) {
result.push({
Line: Matches[1],
Start: Matches[2],
End: Matches[3],
Text: Matches[4],
})
}
return result;
}
#warning{
background-color:#e74e4e;
color:#fff;
font-family:Roboto;
padding:14px;
border-radius:4px;
margin-bottom:14px
}
textarea{
width:100%;
min-height:100px;
}
<div id="warning">The output is on the console</div>
<textarea id="subtitle">1
00:00:00,000 --> 00:00:00,600
Hi my friends
2
00:00:00,610 --> 00:00:01,050
In the first line, everything works properly
But there is a problem in the second line that I could not solve :(
3
00:00:01,080 --> 00:00:03,080
But then everything is in order and good
4
00:00:03,280 --> 00:00:05,280
You do me a great favor by helping me. Thankful</textarea>
将 /gm
替换为 /g
,否则,$
指的是第一个“文本”行的末尾,Regex 不会尝试匹配其后的任何内容:
let subtitle = document.getElementById('subtitle').value;
console.log(_subtitle(subtitle));
function _subtitle(text) {
let Subtitle = text;
let Pattern = /(\d+)\n([\d:,]+)\s+-{2}\>\s+([\d:,]+)\n([\s\S]*?(?=\n{2}|$))/g;
let _regExp = new RegExp(Pattern);
let result = [];
if (typeof(text) != "string") throw "Sorry, Parser accept string only.";
if (Subtitle === null) return Subtitle;
let Parse = Subtitle.replace(/\r\n|\r|\n/g, '\n');
let Matches;
while ((Matches = Pattern.exec(Parse)) != null) {
result.push({
Line: Matches[1],
Start: Matches[2],
End: Matches[3],
Text: Matches[4],
})
}
return result;
}
#warning {
background-color: #e74e4e;
color: #fff;
font-family: Roboto;
padding: 14px;
border-radius: 4px;
margin-bottom: 14px
}
textarea {
width: 100%;
min-height: 100px;
}
<div id="warning">The output is on the console</div>
<textarea id="subtitle">1
00:00:00,000 --> 00:00:00,600
Hi my friends
2
00:00:00,610 --> 00:00:01,050
In the first line, everything works properly
But there is a problem in the second line that I could not solve :(
3
00:00:01,080 --> 00:00:03,080
But then everything is in order and good
4
00:00:03,280 --> 00:00:05,280
You do me a great favor by helping me. Thankful</textarea>