将标准查询解析为 JSON 子句树

Parsing a Standard Query into JSON Clause Tree

我想为我的系统构建一个查询,外部系统可以根据条件使用该查询进行配置。

在后端,我发现很容易有一个 JSON 子句树,它可以递归计算。

[
  "AND",
  [
    {
      "operator": "eq",
      "field": "section1.fieldabc",
      "value": "value1"
    },
    [
      "OR",
      {
        "operator": "lt",
        "field": "section2.fieldxyz",
        "value": 5000
      },
      {
        "operator": "gt",
        "field": "section2.fieldxyz",
        "value": 1000
      }
    ]
  ]
]

或类似的东西。 (上面我把它表示成一个 s 表达式树)

问题是我希望它作为后端的 JSON 子句树,但我不希望用户需要编写这样的东西。 如果我可以创建类似 JQL(Jira 查询语言)之类的查询,那就太好了。 但我不想花很多精力为要转换的语言制作一个完整的证明解析器。

有没有标准化的方法来实现这个?也许是使用库转换的标准化查询语言(在 JS 或 Java 中)。

从最终用户的角度来看,我希望上面的查询像

section1.fieldabc == value1 AND (section2.fieldxyz<5000 OR section2.fieldxyz>10000)

用 TypeScript 编写了一个(相对)简单的解析器,可以解析二元运算符(具有正确的操作顺序)和常量、处理括号、全局变量和简单的字段访问。源代码可在 my GitHub (将来可能会或可能不会更新) 上找到,而这里是一个带有 JS 版本的片段:

const BINARY_OPERATORS = {
    // AND/OR
    'AND': 1,
    'OR': 0,
    // Equal stuff
    '==': 2,
    '!=': 2,
    '<': 2,
    '<=': 2,
    '>': 2,
    '>=': 2,
}

function parseConstant(input) {
    // Numbers (including floats, octals and hexadecimals)
    let match = input.match(/^\s*((?:0[xo]|\d*\.)?\d+)/);
    if (match) {
        const [{ length }, digits] = match;
        if (digits.includes('.')) {
            return [length, { type: 'constant', value: parseFloat(digits) }];
        }
        return [length, { type: 'constant', value: parseInt(digits) }];
    }
    // Strings
    match = input.match(/^(\s*)(["'])/);
    if (match) {
        const [, white, quote] = match;
        let value = '';
        let escape = false;
        for (let i = white.length; i < input.length; i++) {
            const ch = input[i];
            if (ch === '\' && !escape) {
                escape = true;
            } else if (escape) {
                escape = false;
                value += ch;
            } else if (ch === quote) {
                return [i + 1, { type: 'constant', value }];
            } else {
                value += ch;
            }
        }
        return [white.length];
    }
    // Booleans
    match = input.match(/^\s*(true|false)/);
    if (match) {
        const [{ length }, bool] = match;
        return [length, { type: 'constant', value: bool === 'true' }];
    }
    return [0];
}

function parseVariable(input) {
    const match = input.match(/^\s*(\w+[\w\d]*)/);
    if (!match) return [0];
    return [match[0].length, { type: 'variable', name: match[1] }];
}

function orderBinaryOperations(expr) {
    const { left, right } = expr;
    const priority = BINARY_OPERATORS[expr.operator];
    if (left.type == 'binop' && BINARY_OPERATORS[left.operator] < priority) {
        // LOP < EXP
        // (leftL LOP leftR) EXP exprR) => leftL LOP (leftR EXP exprR)
        return orderBinaryOperations({
            type: 'binop',
            operator: left.operator,
            left: left.left,
            right: {
                type: 'binop',
                operator: expr.operator,
                left: left.right,
                right: expr.right,
            },
        });
    } else if (right.type === 'binop' && BINARY_OPERATORS[right.operator] <= priority) {
        // EXP >= ROP
        // exprL EXP (rightL ROP rightR) => (exprL EXP rightL) ROP rightR
        return orderBinaryOperations({
            type: 'binop',
            operator: right.operator,
            left: {
                type: 'binop',
                operator: expr.operator,
                left: expr.left,
                right: right.left,
            },
            right: right.right,
        });
    }
    return expr;
}

function parsePostExpression(expr, input) {
    if (!expr[1]) return expr;
    const trimmed = input.trimLeft();
    const white = input.length - trimmed.length;
    // Binary operation
    for (const operator in BINARY_OPERATORS) {
        if (trimmed.startsWith(operator)) {
            const offset = expr[0] + white + operator.length;
            const rightResult = parseExpression(trimmed.slice(operator.length));
            if (!rightResult[1]) throw new Error(`Missing right-hand side expression for ${operator}`);
            return parsePostExpression([
                offset + rightResult[0],
                orderBinaryOperations({
                    type: 'binop',
                    operator,
                    left: expr[1],
                    right: rightResult[1],
                })
            ], trimmed.slice(rightResult[0]));
        }
    }
    // Field access
    const match = input.match(/^\.(\w+[\w\d]*)/);
    if (match) {
        const [{ length }, field] = match;
        return parsePostExpression([
            expr[0] + white + length,
            { type: 'field', object: expr[1], field }
        ], trimmed.slice(length));
    }
    return expr;
}

function parseExpression(input) {
    // Constants
    let result = parseConstant(input);
    // Variables
    if (!result[1]) result = parseVariable(input);
    // Brackets
    if (!result[1]) {
        const match = input.match(/^\s*\(/);
        if (match) {
            const [{ length }] = match;
            const brackets = parseExpression(input.slice(length));
            if (brackets[1]) {
                const offset = brackets[0] + length;
                const endBracket = input.slice(offset).match(/^\s*\)/);
                if (!endBracket) throw new Error(`Missing ')' in '${input}'`);
                result = [offset + endBracket[0].length, {
                    type: 'brackets', expr: brackets[1]
                }];
            }
        }
    }
    return parsePostExpression(result, input.slice(result[0]));
}

function parse(input) {
    const [length, expr] = parseExpression(input);
    if (length === input.length) {
        if (expr) return expr;
        throw new Error(`Unfinished expression`);
    }
    if (!expr) throw new Error(`Unexpected character at ${length}`);
    throw new Error(`Unexpected character at ${length}`);
}

const parsed = parse('(section2.fieldxyz<5000 OR section2.fieldxyz>10000) AND section1.fieldabc == value1');
console.log(JSON.stringify(parsed, null, 4));

function formatExpression(expr) {
    if (expr.type === 'binop') {
        // Wrapping in [] so the order of operations is clearly visible
        return `[${formatExpression(expr.left)} ${expr.operator} ${formatExpression(expr.right)}]`;
    } else if (expr.type === 'brackets') {
        return `(${formatExpression(expr.expr)})`;
    } else if (expr.type === 'constant') {
        return JSON.stringify(expr.value);
    } else if (expr.type === 'field') {
        return `${formatExpression(expr.object)}.${expr.field}`;
    } else if (expr.type === 'variable') {
        return expr.name;
    }
    throw new Error(`Unexpected expression type '${expr.type}'`);
}

console.log('=>', formatExpression(parsed));

转换为 JSON 时的示例输出:

{
    "type": "binop",
    "operator": "AND",
    "left": {
        "type": "brackets",
        "expr": {
            "type": "binop",
            "operator": "OR",
            "left": {
                "type": "binop",
                "operator": "<",
                "left": {
                    "type": "field",
                    "object": {
                        "type": "variable",
                        "name": "section2"
                    },
                    "field": "fieldxyz"
                },
                "right": {
                    "type": "constant",
                    "value": 5000
                }
            },
            "right": {
                "type": "binop",
                "operator": ">",
                "left": {
                    "type": "field",
                    "object": {
                        "type": "variable",
                        "name": "section2"
                    },
                    "field": "fieldxyz"
                },
                "right": {
                    "type": "constant",
                    "value": 10000
                }
            }
        }
    },
    "right": {
        "type": "binop",
        "operator": "==",
        "left": {
            "type": "field",
            "object": {
                "type": "variable",
                "name": "section1"
            },
            "field": "fieldabc"
        },
        "right": {
            "type": "variable",
            "name": "value1"
        }
    }
}

我一直使用带有 type 字段的对象,尽管您仍然可以将 binop 对象转换为例如['AND', expr1, expr2]。而不是简单地让二元运算总是在一个只是 a.b.c.etc 字符串的字段上,我的更高级一些。不过,仍然可以添加限制,至少基础工作在那里。

我已经解决了这个问题,因为我喜欢写这类东西。实际上,我建议采纳 Chandan 关于使用 jQuery QueryBuilder or react-query-builder 的建议,以使其对您的用户更加轻松和友好。

如果您更针对喜欢 SQL 语法的“超级用户”,我的代码可以提供帮助。不过,可能有许多更好的库可以帮助解决这个问题,例如,它们可能更强大。报告语法错误或试图访问不存在的 variables/fields。再说一次,由于我的代码只有大约 150 行(如果包含类型,则为 200 行)并且写得不是太奇怪,如果更适合您的话,根据您的需要进行调整应该不会太难。