JavaScript:for循环自动构建多维数组

JavaScript: For loop to build multidimensional array automatically

您好,过去几个小时我一直在使用 cheerio.js 编写以下代码和非结构化内容。

到目前为止我还没有成功,非常感谢你的帮助。

我正在迭代源内容以生成一个数组,只是输出数组的格式不正确。

这是来源HTML。

var cheerio = require('cheerio');
var htmlContent2 = '<div id="header1"><p class="headpara">Header content</p> <p class="elem"><b>a. </b>Lorem  dolor sit amet, consectetur:</p><p class="elem"><b>1. </b>Perferendis iure doloremque iusto  facilis.</p><p class="elem"><b>2. </b>Asperiores impedit officiis cumque molestias at rerum !</p><p class="elem"><b>b. </b>More dummy text.</p> <p class="elem"><b>1. </b>Additional dummy text: </p> <p class="elem"><b>(a).</b>Asperiores impedit officiis.</p> <p class="elem"><b>(b).</b>Lolestiae asperiores ad repellat est obcaecati.</p> <p class="elem"><b>2. </b>Lorem ipsum dolor sit amet 1.</p> <p class="elem"><b>3.</b>Lorem ipsum dolor sit amet 2. </p> <p class="notelem">Dignissimos maiores facere consequuntur quod.</p><p class="notelem"> maiores facere consequuntur quod.</p>  <p class="elem"><b>c. </b>Ea consectetur excepturi aperiam.</p></div>';

这是我的代码:

var $ = cheerio.load(htmlContent2);
//Regex to filter content based on pattern:

var regex1 = /[a-z]\.\s/,
    regex2 = /[0-9]\./,
    regex3 = /\([a-z]\)/,
    regex4 = /\([0-9]\)/;
allList = [];
var newElements = $(".elem b").filter(function () {
    var newList = [];    
    var item1,
    item2,
    item3,
    item4;
    newList.push($(this).parent().text());

    for (var i = 0, j = newList.length; i < j; i++) {
        if (regex1.test($(this).text())) {
            item1 = $(this).text();
           allList.push(item1);
        }
        if (regex2.test($(this).text())) {
            item2 = $(this).text();
            allList.push(item2);           
        }
        if (regex3.test($(this).text())) {
            item3 = $(this).text();
          allList.push(item3);   
        }
    }
});
console.log(JSON.stringify(allList));

当前结果为:

["a. ","1. ","2. ","b. ","1. ","(a).","(b).","2. ","3.","c. "]

期望的结果应该是:

["a. ",["1. ","2. "],"b. ",["1. ",["(a).","(b)."],"2. ","3."],"c. "]

数组嵌套级别因源内容而异。我已经在论坛上搜索了类似的方法,但无济于事。

谢谢

你已经接近了。只要您只需要测试您设置为正则表达式的 4 个条件,这应该可以正常工作。基本上,它只是检查您所处的级别,然后将数组压入或弹出堆栈以构建您想要的结构。

编辑:我清理了代码,这样它就不会污染你的范围了。我还稍微更改了您的正则表达式,有些是必要的,有些只是试图预测数据更改(即在值或列表高于 9 之后没有 space)。

EDIT2:我还注意到您正在尝试对内容做一些事情,所以我给了您一种嵌套对象 属性 的方法您可以获得有用的信息以及嵌套的 "labels".

EDIT3:添加了一个打印功能来帮助您测试您遇到的问题,并修复了一个错误,该错误导致从更高级别跳到下面的多个级别会错误地减少一级。您会看到我们将内容分开,以便它可以通过标签嵌套,然后在输出过程中根据需要重新组合它们。这里只是记录,但当然可以很容易地将 html 标签附加到文档。

var autoNest = function(list, prop) {

    var regex1 = /[a-z]\.\s?/,
        regex2 = /[0-9]+\.\s?/,
        regex3 = /\([a-z]\)\.\s?/,
        regex4 = /\([0-9]+\)\.\s?/;

    var getLevel = function(text) {
        if (regex1.test(text)) {
            return 0;
        }    
        if (regex2.test(text)) {
            return 1;        
        }
        if (regex3.test(text)) {
            return 2;
        }
        if (regex4.test(text)) {
            return 3;
        }
        throw new Error('Unexpected content');
    };

    var peek = function(arr) {
        return arr[arr.length - 1];
    };

    var result = [];
    var stack = [result];    
    var lastLevel = 0;

    list.forEach(function(obj) {
        var value = prop ? obj[prop] : obj;
        var currentLevel = getLevel(value);
        var arr = peek(stack);
        if (currentLevel > lastLevel) {
            var next = [];
            arr.push(next);
            stack.push(next);
        } else if (currentLevel < lastLevel) {
            var diff = lastLevel - currentLevel;
            for (var i = 0; i < diff; i++) {
                stack.pop();
            }
        }
        arr = peek(stack);
        arr.push(obj);
        lastLevel = currentLevel;
    });
    return result;
};

$('.elem b').each(function() {
    var label = $(this).text();
    var content = $(this).parent().text();
    items.push({
        label: label,
        content: content
    });
});

function printArray(items, level) {
    level = level || 0;
    items.forEach(function(item) {
        if (Array.isArray(item)) {
            printArray(item, level + 1);
        } else {
            var output = '';
            for (var i = 0; i < level; i++) {
                output += '  ';
            }
            output += item.label + ' ' + item.content;
            console.log(output);
        }
    });
}

var nested = autoNest(items, 'label');
printArray(nested);

只对您的代码进行了一些修改。我所做的不同之处在于对不同的级别使用不同的数组:

var $ = cheerio.load(htmlContent2);
//Regex to filter content based on pattern:

var regex1 = /[a-z]\.\s/,
regex2 = /[0-9]\./,
regex3 = /\([a-z]\)/,
regex4 = /\([0-9]\)/;
allList = [];

//these should be here:
var level2 = [];
var level3 = [];


var newElements = $(".elem b").filter(function () {
var newList = [];    
var item1,
item2,
item3,
item4;
newList.push($(this).parent().text());
//var level2 = [];
//var level3 = [];  

for (var i = 0, j = newList.length; i < j; i++) {
    if (regex1.test($(this).text())) {

        if(level2.length > 0)
            {
            allList.push(level2);
            level2=[];
            }
        if(level3.length > 0)
            {
            level2.push(level3);
            allList.push(level2);
            level3=[];
            }

       item1 = $(this).text();
       allList.push(item1);
       continue;
    }


    if (regex2.test($(this).text())) {

        if(level3.length > 0)
            {
            level2.push(level3);
            level3=[];
            }


        item2 = $(this).text();
        level2.push(item2);
        continue;           
    }
    if (regex3.test($(this).text())) {


        item3 = $(this).text();
        level3.push(item3); 
        continue;
    }
   }
});
console.log(JSON.stringify(allList));

Hi伙计们,这是我的解决方案,它将直接将 a 样式数组转换为 b 样式数组。

var a = ["a. ", "1. ", "2. ", "b. ", "1. ", "(a).", "(b).", "2. ", "3.", "c. "];
var b = ["a. ", ["1. ", "2. "], "b. ", ["1. ", ["(a).", "(b)."], "2. ", "3."], "c. "];
var level = function(o) {
    if (/[a-z]\.\s/.test(o))
        return 1;
    if (/[0-9]\./.test(o))
        return 2;
    if (/\([a-z]\)/.test(o))
        return 3;
    if (/\([0-9]\)/.test(o))
        return 4;
    return 0;
};
var shadow = function(arr) {
    var i, j, arr2 = [];
    for (i = 0, j = arr.length; i < j; i++) {
        arr2[i] = level(arr[i]);
    }
    return arr2;
};
var convert = function(arr) {
    var ia, ib, temp;
    var s = shadow(arr);

    if (arr.length > 1) {
        for (ia = 0, ib = 0; ia < arr.length; ia = ia + 2) {
            ib = s.indexOf(s[ia], ia + 1);
            if (ib > -1 && ib - ia > 1) {
                temp = arr.slice(ia + 1, ib);
                arr.splice(ia + 1, ib - ia - 1, convert(temp));
                s.splice(ia + 1, ib - ia - 1, []);
            }
        }
    }
    return arr;
};

测试:

let x = convert(a);
let txt =JSON.stringify(x);
ut.writeLog(txt);<br>
ut.assertEqual(x, b, joCompare);

测试开始 - (1) --
["a. ",["1.","2."],"b. ",["1.",["(a).","(b)."],"2. ","3."],"c. "]
1. [通过]:(...) ==> (Equal) - (Equal) 预期

-测试结束-(1)--

总计:1,通过:1,失败:0,统计:100% 通过