从 JavaScript 中的自定义语法创建 AST

Question

我正在构建一个小型框架，它可以读取我自定义的 HTML 语法并将其转换为 HTML 代码。但是，我坚持对我的代码进行标记化并创建 AST。我知道该算法需要递归方法，但我不知道如何正确执行。

这是我在 app.txt 文件中的自定义代码：

View {
    Heading {
        
    }

    Text {

    }
}

到目前为止，这是我的递归解析器：

function parse(source) {
    let tag = "";
    let children = [];

    for (let i = 0; i < source.length; i++) {
        const char = source[i];

        if (char === "{") {
            const child = parse(source.substring(i + 1, source.length));
            children.push(child);
        } else if (char === "}") {
            return {
                tag: tag,
                children: children
            };
        } else {
            tag += char;
        }
    }

    return;
}

预计解析会产生这样的结果（并且应该能够达到任何深度）：

{
    tag: "View",
    children: [
        {
            tag: "Heading",
            children: []
        },
        {
            tag: "Text",
            children: []
        }
    ]
}

我做错了什么？如果有任何帮助，我将不胜感激。

Answer 1

让我们或多或少正式地写下你的语法：

tag := name '{' children '}'
name := letter | letter name
children := tag | tag children
letter := [a-z]

然后，让我们为语法中的每条规则编写一个解析函数。我们需要两个辅助函数：getsym，returns 来自输入的第一个有意义的（非空白）符号，以及 nextsym，它删除该符号。

工作示例：

function parse(text) {
    let chars = [...text]

    function getsym() {
        while (chars.length > 0 && /\s/.test(chars[0]))
            chars.shift()
        return chars[0] || ''
    }

    function nextsym() {
        return chars.shift()
    }

    return tag()

    //

    function tag() {
        let n = name()

        if (getsym() !== '{')
            throw new SyntaxError()
        nextsym()

        let c = children(text)

        if (getsym() !== '}')
            throw new SyntaxError()
        nextsym()

        return {name: n, children: c}
    }

    function name() {
        let t = letter()
        if (t)
            return t + name()
        return ''
    }

    function letter() {
        if (/[a-z]/i.test(getsym()))
            return nextsym()
    }

    function children() {
        if (getsym() === '}')
            return []
        let t = tag()
        return [t, ...children()]
    }

}

///

text = ` View {
    Heading {
        Content {
            One {}
            Two {}
            Three {}
        }
    }
    Text {
        More {}
    }
}
`

console.log(parse(text))

也就是说，如果您计划使用更复杂的语法，更实用的选择是使用像 peg.js.

这样的解析器生成器

从 JavaScript 中的自定义语法创建 AST

Creating AST from custom syntax in JavaScript

javascript

parsing

abstract-syntax-tree