JavaScript:for循环自动构建多维数组
JavaScript: For loop to build multidimensional array automatically
您好,过去几个小时我一直在使用 cheerio.js 编写以下代码和非结构化内容。
到目前为止我还没有成功,非常感谢你的帮助。
我正在迭代源内容以生成一个数组,只是输出数组的格式不正确。
这是来源HTML。
var cheerio = require('cheerio');
var htmlContent2 = '<div id="header1"><p class="headpara">Header content</p> <p class="elem"><b>a. </b>Lorem dolor sit amet, consectetur:</p><p class="elem"><b>1. </b>Perferendis iure doloremque iusto facilis.</p><p class="elem"><b>2. </b>Asperiores impedit officiis cumque molestias at rerum !</p><p class="elem"><b>b. </b>More dummy text.</p> <p class="elem"><b>1. </b>Additional dummy text: </p> <p class="elem"><b>(a).</b>Asperiores impedit officiis.</p> <p class="elem"><b>(b).</b>Lolestiae asperiores ad repellat est obcaecati.</p> <p class="elem"><b>2. </b>Lorem ipsum dolor sit amet 1.</p> <p class="elem"><b>3.</b>Lorem ipsum dolor sit amet 2. </p> <p class="notelem">Dignissimos maiores facere consequuntur quod.</p><p class="notelem"> maiores facere consequuntur quod.</p> <p class="elem"><b>c. </b>Ea consectetur excepturi aperiam.</p></div>';
这是我的代码:
var $ = cheerio.load(htmlContent2);
//Regex to filter content based on pattern:
var regex1 = /[a-z]\.\s/,
regex2 = /[0-9]\./,
regex3 = /\([a-z]\)/,
regex4 = /\([0-9]\)/;
allList = [];
var newElements = $(".elem b").filter(function () {
var newList = [];
var item1,
item2,
item3,
item4;
newList.push($(this).parent().text());
for (var i = 0, j = newList.length; i < j; i++) {
if (regex1.test($(this).text())) {
item1 = $(this).text();
allList.push(item1);
}
if (regex2.test($(this).text())) {
item2 = $(this).text();
allList.push(item2);
}
if (regex3.test($(this).text())) {
item3 = $(this).text();
allList.push(item3);
}
}
});
console.log(JSON.stringify(allList));
当前结果为:
["a. ","1. ","2. ","b. ","1. ","(a).","(b).","2. ","3.","c. "]
期望的结果应该是:
["a. ",["1. ","2. "],"b. ",["1. ",["(a).","(b)."],"2. ","3."],"c. "]
数组嵌套级别因源内容而异。我已经在论坛上搜索了类似的方法,但无济于事。
谢谢
你已经接近了。只要您只需要测试您设置为正则表达式的 4 个条件,这应该可以正常工作。基本上,它只是检查您所处的级别,然后将数组压入或弹出堆栈以构建您想要的结构。
编辑:我清理了代码,这样它就不会污染你的范围了。我还稍微更改了您的正则表达式,有些是必要的,有些只是试图预测数据更改(即在值或列表高于 9 之后没有 space)。
EDIT2:我还注意到您正在尝试对内容做一些事情,所以我给了您一种嵌套对象 属性 的方法您可以获得有用的信息以及嵌套的 "labels".
EDIT3:添加了一个打印功能来帮助您测试您遇到的问题,并修复了一个错误,该错误导致从更高级别跳到下面的多个级别会错误地减少一级。您会看到我们将内容分开,以便它可以通过标签嵌套,然后在输出过程中根据需要重新组合它们。这里只是记录,但当然可以很容易地将 html 标签附加到文档。
var autoNest = function(list, prop) {
var regex1 = /[a-z]\.\s?/,
regex2 = /[0-9]+\.\s?/,
regex3 = /\([a-z]\)\.\s?/,
regex4 = /\([0-9]+\)\.\s?/;
var getLevel = function(text) {
if (regex1.test(text)) {
return 0;
}
if (regex2.test(text)) {
return 1;
}
if (regex3.test(text)) {
return 2;
}
if (regex4.test(text)) {
return 3;
}
throw new Error('Unexpected content');
};
var peek = function(arr) {
return arr[arr.length - 1];
};
var result = [];
var stack = [result];
var lastLevel = 0;
list.forEach(function(obj) {
var value = prop ? obj[prop] : obj;
var currentLevel = getLevel(value);
var arr = peek(stack);
if (currentLevel > lastLevel) {
var next = [];
arr.push(next);
stack.push(next);
} else if (currentLevel < lastLevel) {
var diff = lastLevel - currentLevel;
for (var i = 0; i < diff; i++) {
stack.pop();
}
}
arr = peek(stack);
arr.push(obj);
lastLevel = currentLevel;
});
return result;
};
$('.elem b').each(function() {
var label = $(this).text();
var content = $(this).parent().text();
items.push({
label: label,
content: content
});
});
function printArray(items, level) {
level = level || 0;
items.forEach(function(item) {
if (Array.isArray(item)) {
printArray(item, level + 1);
} else {
var output = '';
for (var i = 0; i < level; i++) {
output += ' ';
}
output += item.label + ' ' + item.content;
console.log(output);
}
});
}
var nested = autoNest(items, 'label');
printArray(nested);
只对您的代码进行了一些修改。我所做的不同之处在于对不同的级别使用不同的数组:
var $ = cheerio.load(htmlContent2);
//Regex to filter content based on pattern:
var regex1 = /[a-z]\.\s/,
regex2 = /[0-9]\./,
regex3 = /\([a-z]\)/,
regex4 = /\([0-9]\)/;
allList = [];
//these should be here:
var level2 = [];
var level3 = [];
var newElements = $(".elem b").filter(function () {
var newList = [];
var item1,
item2,
item3,
item4;
newList.push($(this).parent().text());
//var level2 = [];
//var level3 = [];
for (var i = 0, j = newList.length; i < j; i++) {
if (regex1.test($(this).text())) {
if(level2.length > 0)
{
allList.push(level2);
level2=[];
}
if(level3.length > 0)
{
level2.push(level3);
allList.push(level2);
level3=[];
}
item1 = $(this).text();
allList.push(item1);
continue;
}
if (regex2.test($(this).text())) {
if(level3.length > 0)
{
level2.push(level3);
level3=[];
}
item2 = $(this).text();
level2.push(item2);
continue;
}
if (regex3.test($(this).text())) {
item3 = $(this).text();
level3.push(item3);
continue;
}
}
});
console.log(JSON.stringify(allList));
Hi
伙计们,这是我的解决方案,它将直接将 a
样式数组转换为 b
样式数组。
var a = ["a. ", "1. ", "2. ", "b. ", "1. ", "(a).", "(b).", "2. ", "3.", "c. "];
var b = ["a. ", ["1. ", "2. "], "b. ", ["1. ", ["(a).", "(b)."], "2. ", "3."], "c. "];
var level = function(o) {
if (/[a-z]\.\s/.test(o))
return 1;
if (/[0-9]\./.test(o))
return 2;
if (/\([a-z]\)/.test(o))
return 3;
if (/\([0-9]\)/.test(o))
return 4;
return 0;
};
var shadow = function(arr) {
var i, j, arr2 = [];
for (i = 0, j = arr.length; i < j; i++) {
arr2[i] = level(arr[i]);
}
return arr2;
};
var convert = function(arr) {
var ia, ib, temp;
var s = shadow(arr);
if (arr.length > 1) {
for (ia = 0, ib = 0; ia < arr.length; ia = ia + 2) {
ib = s.indexOf(s[ia], ia + 1);
if (ib > -1 && ib - ia > 1) {
temp = arr.slice(ia + 1, ib);
arr.splice(ia + 1, ib - ia - 1, convert(temp));
s.splice(ia + 1, ib - ia - 1, []);
}
}
}
return arr;
};
测试:
let x = convert(a);
let txt =JSON.stringify(x);
ut.writeLog(txt);<br>
ut.assertEqual(x, b, joCompare);
测试开始 - (1) --
["a. ",["1.","2."],"b. ",["1.",["(a).","(b)."],"2. ","3."],"c. "]
1. [通过]:(...) ==> (Equal
) - (Equal
) 预期
-测试结束-(1)--
总计:1,通过:1,失败:0,统计:100% 通过
您好,过去几个小时我一直在使用 cheerio.js 编写以下代码和非结构化内容。
到目前为止我还没有成功,非常感谢你的帮助。
我正在迭代源内容以生成一个数组,只是输出数组的格式不正确。
这是来源HTML。
var cheerio = require('cheerio');
var htmlContent2 = '<div id="header1"><p class="headpara">Header content</p> <p class="elem"><b>a. </b>Lorem dolor sit amet, consectetur:</p><p class="elem"><b>1. </b>Perferendis iure doloremque iusto facilis.</p><p class="elem"><b>2. </b>Asperiores impedit officiis cumque molestias at rerum !</p><p class="elem"><b>b. </b>More dummy text.</p> <p class="elem"><b>1. </b>Additional dummy text: </p> <p class="elem"><b>(a).</b>Asperiores impedit officiis.</p> <p class="elem"><b>(b).</b>Lolestiae asperiores ad repellat est obcaecati.</p> <p class="elem"><b>2. </b>Lorem ipsum dolor sit amet 1.</p> <p class="elem"><b>3.</b>Lorem ipsum dolor sit amet 2. </p> <p class="notelem">Dignissimos maiores facere consequuntur quod.</p><p class="notelem"> maiores facere consequuntur quod.</p> <p class="elem"><b>c. </b>Ea consectetur excepturi aperiam.</p></div>';
这是我的代码:
var $ = cheerio.load(htmlContent2);
//Regex to filter content based on pattern:
var regex1 = /[a-z]\.\s/,
regex2 = /[0-9]\./,
regex3 = /\([a-z]\)/,
regex4 = /\([0-9]\)/;
allList = [];
var newElements = $(".elem b").filter(function () {
var newList = [];
var item1,
item2,
item3,
item4;
newList.push($(this).parent().text());
for (var i = 0, j = newList.length; i < j; i++) {
if (regex1.test($(this).text())) {
item1 = $(this).text();
allList.push(item1);
}
if (regex2.test($(this).text())) {
item2 = $(this).text();
allList.push(item2);
}
if (regex3.test($(this).text())) {
item3 = $(this).text();
allList.push(item3);
}
}
});
console.log(JSON.stringify(allList));
当前结果为:
["a. ","1. ","2. ","b. ","1. ","(a).","(b).","2. ","3.","c. "]
期望的结果应该是:
["a. ",["1. ","2. "],"b. ",["1. ",["(a).","(b)."],"2. ","3."],"c. "]
数组嵌套级别因源内容而异。我已经在论坛上搜索了类似的方法,但无济于事。
谢谢
你已经接近了。只要您只需要测试您设置为正则表达式的 4 个条件,这应该可以正常工作。基本上,它只是检查您所处的级别,然后将数组压入或弹出堆栈以构建您想要的结构。
编辑:我清理了代码,这样它就不会污染你的范围了。我还稍微更改了您的正则表达式,有些是必要的,有些只是试图预测数据更改(即在值或列表高于 9 之后没有 space)。
EDIT2:我还注意到您正在尝试对内容做一些事情,所以我给了您一种嵌套对象 属性 的方法您可以获得有用的信息以及嵌套的 "labels".
EDIT3:添加了一个打印功能来帮助您测试您遇到的问题,并修复了一个错误,该错误导致从更高级别跳到下面的多个级别会错误地减少一级。您会看到我们将内容分开,以便它可以通过标签嵌套,然后在输出过程中根据需要重新组合它们。这里只是记录,但当然可以很容易地将 html 标签附加到文档。
var autoNest = function(list, prop) {
var regex1 = /[a-z]\.\s?/,
regex2 = /[0-9]+\.\s?/,
regex3 = /\([a-z]\)\.\s?/,
regex4 = /\([0-9]+\)\.\s?/;
var getLevel = function(text) {
if (regex1.test(text)) {
return 0;
}
if (regex2.test(text)) {
return 1;
}
if (regex3.test(text)) {
return 2;
}
if (regex4.test(text)) {
return 3;
}
throw new Error('Unexpected content');
};
var peek = function(arr) {
return arr[arr.length - 1];
};
var result = [];
var stack = [result];
var lastLevel = 0;
list.forEach(function(obj) {
var value = prop ? obj[prop] : obj;
var currentLevel = getLevel(value);
var arr = peek(stack);
if (currentLevel > lastLevel) {
var next = [];
arr.push(next);
stack.push(next);
} else if (currentLevel < lastLevel) {
var diff = lastLevel - currentLevel;
for (var i = 0; i < diff; i++) {
stack.pop();
}
}
arr = peek(stack);
arr.push(obj);
lastLevel = currentLevel;
});
return result;
};
$('.elem b').each(function() {
var label = $(this).text();
var content = $(this).parent().text();
items.push({
label: label,
content: content
});
});
function printArray(items, level) {
level = level || 0;
items.forEach(function(item) {
if (Array.isArray(item)) {
printArray(item, level + 1);
} else {
var output = '';
for (var i = 0; i < level; i++) {
output += ' ';
}
output += item.label + ' ' + item.content;
console.log(output);
}
});
}
var nested = autoNest(items, 'label');
printArray(nested);
只对您的代码进行了一些修改。我所做的不同之处在于对不同的级别使用不同的数组:
var $ = cheerio.load(htmlContent2);
//Regex to filter content based on pattern:
var regex1 = /[a-z]\.\s/,
regex2 = /[0-9]\./,
regex3 = /\([a-z]\)/,
regex4 = /\([0-9]\)/;
allList = [];
//these should be here:
var level2 = [];
var level3 = [];
var newElements = $(".elem b").filter(function () {
var newList = [];
var item1,
item2,
item3,
item4;
newList.push($(this).parent().text());
//var level2 = [];
//var level3 = [];
for (var i = 0, j = newList.length; i < j; i++) {
if (regex1.test($(this).text())) {
if(level2.length > 0)
{
allList.push(level2);
level2=[];
}
if(level3.length > 0)
{
level2.push(level3);
allList.push(level2);
level3=[];
}
item1 = $(this).text();
allList.push(item1);
continue;
}
if (regex2.test($(this).text())) {
if(level3.length > 0)
{
level2.push(level3);
level3=[];
}
item2 = $(this).text();
level2.push(item2);
continue;
}
if (regex3.test($(this).text())) {
item3 = $(this).text();
level3.push(item3);
continue;
}
}
});
console.log(JSON.stringify(allList));
Hi
伙计们,这是我的解决方案,它将直接将 a
样式数组转换为 b
样式数组。
var a = ["a. ", "1. ", "2. ", "b. ", "1. ", "(a).", "(b).", "2. ", "3.", "c. "];
var b = ["a. ", ["1. ", "2. "], "b. ", ["1. ", ["(a).", "(b)."], "2. ", "3."], "c. "];
var level = function(o) {
if (/[a-z]\.\s/.test(o))
return 1;
if (/[0-9]\./.test(o))
return 2;
if (/\([a-z]\)/.test(o))
return 3;
if (/\([0-9]\)/.test(o))
return 4;
return 0;
};
var shadow = function(arr) {
var i, j, arr2 = [];
for (i = 0, j = arr.length; i < j; i++) {
arr2[i] = level(arr[i]);
}
return arr2;
};
var convert = function(arr) {
var ia, ib, temp;
var s = shadow(arr);
if (arr.length > 1) {
for (ia = 0, ib = 0; ia < arr.length; ia = ia + 2) {
ib = s.indexOf(s[ia], ia + 1);
if (ib > -1 && ib - ia > 1) {
temp = arr.slice(ia + 1, ib);
arr.splice(ia + 1, ib - ia - 1, convert(temp));
s.splice(ia + 1, ib - ia - 1, []);
}
}
}
return arr;
};
测试:
let x = convert(a);
let txt =JSON.stringify(x);
ut.writeLog(txt);<br>
ut.assertEqual(x, b, joCompare);
测试开始 - (1) --
["a. ",["1.","2."],"b. ",["1.",["(a).","(b)."],"2. ","3."],"c. "]
1. [通过]:(...) ==> (Equal
) - (Equal
) 预期
-测试结束-(1)--
总计:1,通过:1,失败:0,统计:100% 通过