在正则表达式匹配处断句并使用 javascript 创建数组
Break sentence at regex match and create array using javascript
我有一个包含多项选择题和答案的字符串,如下所示:
(1) Capital of Bangladesh is-
(A) Dhaka (B) Rangpur (C) Chittagong (D) Comilla। Ans (A) Dhaka
(2) Largest city of Bangladesh is-
(A) Mirpur (B) Rangpur (C) Chittagong (D) Comilla। Ans (C) Chittagong
(3) Smallest city of Bangladesh is-
(A) Dhaka (B) Rangpur (C) Chittagong (D) Meherpur। Ans (B) Rangpur
我需要根据上面的字符串创建 json,以便它创建单独的问题和答案
最后的 json 会是这样的:
{"questions":
[
{
"options":["Dhaka","Rangpur","Chittagong","Comilla"],
"body":"Capital of Bangladesh is-",
"answers":["A"]
},
{
"options":["Mirpur","Rangpur","Chittagong","Comilla"],
"body":"Capital of Bangladesh is-",
"answers":["C"]
}
]
}
我试过
var result = reader.result.split('\n');
for (var index = 0; index < result.length; index++) {
var question = result[index]
if(question.match("/[(/)]/g")){
questions.push = question
}
else {
questions.push = question
}
}
console.log(questions)
我怎样才能做到
试一试
我们需要 /u 来处理 unicode 然后 .+ 而不是 \w 因为双字节
使用 Unicode 正则表达式的更多内容
Regular expression \p{L} and \p{N}
const str = `(1) The main language of Bangladesh is-
(ক) বাংলা (খ) ইংরেজি (C) Hindi (D) French। Ans (ক) বাংলা
(2) Largest city of Bangladesh is-
(A) Mirpur (B) Rangpur (C) Chittagong (D) Comilla। Ans (C) Chittagong
(3) Smallest city of Bangladesh is-
(A) Dhaka (B) Rangpur (C) Chittagong (D) Meherpur। Ans (B) Rangpur`;
const obj = str.split(/\n/u).reduce((acc,line,i) => {
if (i%2===0) acc.questions.push({"body":line.match(/\(.+\) (.*)/u)[1]}); // remove the (X) from the question
else {
const curItem = acc.questions[acc.questions.length-1]; // last pushed object
let [optionStr,answer] = line.split(/। /u);// split on this special character
// assuming 4 options
curItem.options = optionStr
.match(/\(.+\) (.+) \(.+\) (.+) \(.+\) (.+) \(.+\) (.+)/u)
.slice(1); // drop the first element from the result (full match)
answer = answer.match(/\((.+)\)/u)[1]; // just get the letter from the bracket
curItem.answers = [answer];
}
return acc
},{questions:[]})
console.log(obj)
您还可以使用模式来获取捕获组中的问答部分。然后对于答案部分,您可以在括号之间拆分大写字符。
带捕获组的模式:
^\(\d+\) (.+)\n(\([A-Z]\).*?)। Ans \(([A-Z])\)
^
字符串开头
\(\d+\)
匹配括号和 space 之间的 1+ 个数字
(.+)\n
捕获 组 1,匹配行的其余部分和换行符
(\([A-Z]\).*?)
捕获 组 2,匹配括号之间的大写字符,后跟尽可能少的字符
। Ans
字面匹配
\(([A-Z])\)
在 组 3 中捕获括号之间的大写字符
或使用 unicode categories 如果支持:
^\(\p{Nd}\)\s+(.+)\n(\(\p{L}\).*?)।\s+Ans\s+\((\p{L})\)
代码中第1组的值用i[1]
等表示。
const str = `(1) Capital of Bangladesh is-
(A) Dhaka (B) Rangpur (C) Chittagong (D) Comilla। Ans (A) Dhaka
(2) Largest city of Bangladesh is-
(A) Mirpur (B) Rangpur (C) Chittagong (D) Comilla। Ans (C) Chittagong
(3) Smallest city of Bangladesh is-
(ক) বাংলা (খ) ইংরেজি । Ans (B) Rangpur`;
const regex = /^\(\p{Nd}+\)\s+(.+)\n(\(\p{L}\).*?)।\s+Ans\s+\((\p{L})\)/gum;
let result = {
questions: Array.from(str.matchAll(regex)).map(i =>
({
options: i[2].split(/\s*\(\p{L}\)\s*/u).filter(Boolean),
body: i[1],
answers: [i[3]]
})
)
};
console.log(result);
或者使用否定字符 类 [^()]+
来匹配括号之间的内容的示例。
const str = `(1) Capital of Bangladesh is-
(A) Dhaka (B) Rangpur (C) Chittagong (D) Comilla। Ans (A) Dhaka
(2) Largest city of Bangladesh is-
(A) Mirpur (B) Rangpur (C) Chittagong (D) Comilla। Ans (C) Chittagong
(3) Smallest city of Bangladesh is-
(ক) বাংলা (খ) ইংরেজি । Ans (B) Rangpur`;
const regex = /^\([^()]+\)\s+(.+)\n(\([^()]+\).*?)।\s+Ans\s+\(([^()]+)\)/gm;
let result = {
questions: Array.from(str.matchAll(regex)).map(i =>
({
options: i[2].split(/\s*\([^()]+\)\s*/).filter(Boolean),
body: i[1],
answers: [i[3]]
})
)
};
console.log(result);
我有一个包含多项选择题和答案的字符串,如下所示:
(1) Capital of Bangladesh is-
(A) Dhaka (B) Rangpur (C) Chittagong (D) Comilla। Ans (A) Dhaka
(2) Largest city of Bangladesh is-
(A) Mirpur (B) Rangpur (C) Chittagong (D) Comilla। Ans (C) Chittagong
(3) Smallest city of Bangladesh is-
(A) Dhaka (B) Rangpur (C) Chittagong (D) Meherpur। Ans (B) Rangpur
我需要根据上面的字符串创建 json,以便它创建单独的问题和答案
最后的 json 会是这样的:
{"questions":
[
{
"options":["Dhaka","Rangpur","Chittagong","Comilla"],
"body":"Capital of Bangladesh is-",
"answers":["A"]
},
{
"options":["Mirpur","Rangpur","Chittagong","Comilla"],
"body":"Capital of Bangladesh is-",
"answers":["C"]
}
]
}
我试过
var result = reader.result.split('\n');
for (var index = 0; index < result.length; index++) {
var question = result[index]
if(question.match("/[(/)]/g")){
questions.push = question
}
else {
questions.push = question
}
}
console.log(questions)
我怎样才能做到
试一试
我们需要 /u 来处理 unicode 然后 .+ 而不是 \w 因为双字节
使用 Unicode 正则表达式的更多内容
Regular expression \p{L} and \p{N}
const str = `(1) The main language of Bangladesh is-
(ক) বাংলা (খ) ইংরেজি (C) Hindi (D) French। Ans (ক) বাংলা
(2) Largest city of Bangladesh is-
(A) Mirpur (B) Rangpur (C) Chittagong (D) Comilla। Ans (C) Chittagong
(3) Smallest city of Bangladesh is-
(A) Dhaka (B) Rangpur (C) Chittagong (D) Meherpur। Ans (B) Rangpur`;
const obj = str.split(/\n/u).reduce((acc,line,i) => {
if (i%2===0) acc.questions.push({"body":line.match(/\(.+\) (.*)/u)[1]}); // remove the (X) from the question
else {
const curItem = acc.questions[acc.questions.length-1]; // last pushed object
let [optionStr,answer] = line.split(/। /u);// split on this special character
// assuming 4 options
curItem.options = optionStr
.match(/\(.+\) (.+) \(.+\) (.+) \(.+\) (.+) \(.+\) (.+)/u)
.slice(1); // drop the first element from the result (full match)
answer = answer.match(/\((.+)\)/u)[1]; // just get the letter from the bracket
curItem.answers = [answer];
}
return acc
},{questions:[]})
console.log(obj)
您还可以使用模式来获取捕获组中的问答部分。然后对于答案部分,您可以在括号之间拆分大写字符。
带捕获组的模式:
^\(\d+\) (.+)\n(\([A-Z]\).*?)। Ans \(([A-Z])\)
^
字符串开头\(\d+\)
匹配括号和 space 之间的 1+ 个数字
(.+)\n
捕获 组 1,匹配行的其余部分和换行符(\([A-Z]\).*?)
捕获 组 2,匹配括号之间的大写字符,后跟尽可能少的字符। Ans
字面匹配\(([A-Z])\)
在 组 3 中捕获括号之间的大写字符
或使用 unicode categories 如果支持:
^\(\p{Nd}\)\s+(.+)\n(\(\p{L}\).*?)।\s+Ans\s+\((\p{L})\)
代码中第1组的值用i[1]
等表示。
const str = `(1) Capital of Bangladesh is-
(A) Dhaka (B) Rangpur (C) Chittagong (D) Comilla। Ans (A) Dhaka
(2) Largest city of Bangladesh is-
(A) Mirpur (B) Rangpur (C) Chittagong (D) Comilla। Ans (C) Chittagong
(3) Smallest city of Bangladesh is-
(ক) বাংলা (খ) ইংরেজি । Ans (B) Rangpur`;
const regex = /^\(\p{Nd}+\)\s+(.+)\n(\(\p{L}\).*?)।\s+Ans\s+\((\p{L})\)/gum;
let result = {
questions: Array.from(str.matchAll(regex)).map(i =>
({
options: i[2].split(/\s*\(\p{L}\)\s*/u).filter(Boolean),
body: i[1],
answers: [i[3]]
})
)
};
console.log(result);
或者使用否定字符 类 [^()]+
来匹配括号之间的内容的示例。
const str = `(1) Capital of Bangladesh is-
(A) Dhaka (B) Rangpur (C) Chittagong (D) Comilla। Ans (A) Dhaka
(2) Largest city of Bangladesh is-
(A) Mirpur (B) Rangpur (C) Chittagong (D) Comilla। Ans (C) Chittagong
(3) Smallest city of Bangladesh is-
(ক) বাংলা (খ) ইংরেজি । Ans (B) Rangpur`;
const regex = /^\([^()]+\)\s+(.+)\n(\([^()]+\).*?)।\s+Ans\s+\(([^()]+)\)/gm;
let result = {
questions: Array.from(str.matchAll(regex)).map(i =>
({
options: i[2].split(/\s*\([^()]+\)\s*/).filter(Boolean),
body: i[1],
answers: [i[3]]
})
)
};
console.log(result);