将标准查询解析为 JSON 子句树
Parsing a Standard Query into JSON Clause Tree
我想为我的系统构建一个查询,外部系统可以根据条件使用该查询进行配置。
在后端,我发现很容易有一个 JSON 子句树,它可以递归计算。
[
"AND",
[
{
"operator": "eq",
"field": "section1.fieldabc",
"value": "value1"
},
[
"OR",
{
"operator": "lt",
"field": "section2.fieldxyz",
"value": 5000
},
{
"operator": "gt",
"field": "section2.fieldxyz",
"value": 1000
}
]
]
]
或类似的东西。 (上面我把它表示成一个 s 表达式树)
问题是我希望它作为后端的 JSON 子句树,但我不希望用户需要编写这样的东西。
如果我可以创建类似 JQL(Jira 查询语言)之类的查询,那就太好了。
但我不想花很多精力为要转换的语言制作一个完整的证明解析器。
有没有标准化的方法来实现这个?也许是使用库转换的标准化查询语言(在 JS 或 Java 中)。
从最终用户的角度来看,我希望上面的查询像
section1.fieldabc == value1 AND (section2.fieldxyz<5000 OR section2.fieldxyz>10000)
用 TypeScript 编写了一个(相对)简单的解析器,可以解析二元运算符(具有正确的操作顺序)和常量、处理括号、全局变量和简单的字段访问。源代码可在 my GitHub (将来可能会或可能不会更新) 上找到,而这里是一个带有 JS 版本的片段:
const BINARY_OPERATORS = {
// AND/OR
'AND': 1,
'OR': 0,
// Equal stuff
'==': 2,
'!=': 2,
'<': 2,
'<=': 2,
'>': 2,
'>=': 2,
}
function parseConstant(input) {
// Numbers (including floats, octals and hexadecimals)
let match = input.match(/^\s*((?:0[xo]|\d*\.)?\d+)/);
if (match) {
const [{ length }, digits] = match;
if (digits.includes('.')) {
return [length, { type: 'constant', value: parseFloat(digits) }];
}
return [length, { type: 'constant', value: parseInt(digits) }];
}
// Strings
match = input.match(/^(\s*)(["'])/);
if (match) {
const [, white, quote] = match;
let value = '';
let escape = false;
for (let i = white.length; i < input.length; i++) {
const ch = input[i];
if (ch === '\' && !escape) {
escape = true;
} else if (escape) {
escape = false;
value += ch;
} else if (ch === quote) {
return [i + 1, { type: 'constant', value }];
} else {
value += ch;
}
}
return [white.length];
}
// Booleans
match = input.match(/^\s*(true|false)/);
if (match) {
const [{ length }, bool] = match;
return [length, { type: 'constant', value: bool === 'true' }];
}
return [0];
}
function parseVariable(input) {
const match = input.match(/^\s*(\w+[\w\d]*)/);
if (!match) return [0];
return [match[0].length, { type: 'variable', name: match[1] }];
}
function orderBinaryOperations(expr) {
const { left, right } = expr;
const priority = BINARY_OPERATORS[expr.operator];
if (left.type == 'binop' && BINARY_OPERATORS[left.operator] < priority) {
// LOP < EXP
// (leftL LOP leftR) EXP exprR) => leftL LOP (leftR EXP exprR)
return orderBinaryOperations({
type: 'binop',
operator: left.operator,
left: left.left,
right: {
type: 'binop',
operator: expr.operator,
left: left.right,
right: expr.right,
},
});
} else if (right.type === 'binop' && BINARY_OPERATORS[right.operator] <= priority) {
// EXP >= ROP
// exprL EXP (rightL ROP rightR) => (exprL EXP rightL) ROP rightR
return orderBinaryOperations({
type: 'binop',
operator: right.operator,
left: {
type: 'binop',
operator: expr.operator,
left: expr.left,
right: right.left,
},
right: right.right,
});
}
return expr;
}
function parsePostExpression(expr, input) {
if (!expr[1]) return expr;
const trimmed = input.trimLeft();
const white = input.length - trimmed.length;
// Binary operation
for (const operator in BINARY_OPERATORS) {
if (trimmed.startsWith(operator)) {
const offset = expr[0] + white + operator.length;
const rightResult = parseExpression(trimmed.slice(operator.length));
if (!rightResult[1]) throw new Error(`Missing right-hand side expression for ${operator}`);
return parsePostExpression([
offset + rightResult[0],
orderBinaryOperations({
type: 'binop',
operator,
left: expr[1],
right: rightResult[1],
})
], trimmed.slice(rightResult[0]));
}
}
// Field access
const match = input.match(/^\.(\w+[\w\d]*)/);
if (match) {
const [{ length }, field] = match;
return parsePostExpression([
expr[0] + white + length,
{ type: 'field', object: expr[1], field }
], trimmed.slice(length));
}
return expr;
}
function parseExpression(input) {
// Constants
let result = parseConstant(input);
// Variables
if (!result[1]) result = parseVariable(input);
// Brackets
if (!result[1]) {
const match = input.match(/^\s*\(/);
if (match) {
const [{ length }] = match;
const brackets = parseExpression(input.slice(length));
if (brackets[1]) {
const offset = brackets[0] + length;
const endBracket = input.slice(offset).match(/^\s*\)/);
if (!endBracket) throw new Error(`Missing ')' in '${input}'`);
result = [offset + endBracket[0].length, {
type: 'brackets', expr: brackets[1]
}];
}
}
}
return parsePostExpression(result, input.slice(result[0]));
}
function parse(input) {
const [length, expr] = parseExpression(input);
if (length === input.length) {
if (expr) return expr;
throw new Error(`Unfinished expression`);
}
if (!expr) throw new Error(`Unexpected character at ${length}`);
throw new Error(`Unexpected character at ${length}`);
}
const parsed = parse('(section2.fieldxyz<5000 OR section2.fieldxyz>10000) AND section1.fieldabc == value1');
console.log(JSON.stringify(parsed, null, 4));
function formatExpression(expr) {
if (expr.type === 'binop') {
// Wrapping in [] so the order of operations is clearly visible
return `[${formatExpression(expr.left)} ${expr.operator} ${formatExpression(expr.right)}]`;
} else if (expr.type === 'brackets') {
return `(${formatExpression(expr.expr)})`;
} else if (expr.type === 'constant') {
return JSON.stringify(expr.value);
} else if (expr.type === 'field') {
return `${formatExpression(expr.object)}.${expr.field}`;
} else if (expr.type === 'variable') {
return expr.name;
}
throw new Error(`Unexpected expression type '${expr.type}'`);
}
console.log('=>', formatExpression(parsed));
转换为 JSON 时的示例输出:
{
"type": "binop",
"operator": "AND",
"left": {
"type": "brackets",
"expr": {
"type": "binop",
"operator": "OR",
"left": {
"type": "binop",
"operator": "<",
"left": {
"type": "field",
"object": {
"type": "variable",
"name": "section2"
},
"field": "fieldxyz"
},
"right": {
"type": "constant",
"value": 5000
}
},
"right": {
"type": "binop",
"operator": ">",
"left": {
"type": "field",
"object": {
"type": "variable",
"name": "section2"
},
"field": "fieldxyz"
},
"right": {
"type": "constant",
"value": 10000
}
}
}
},
"right": {
"type": "binop",
"operator": "==",
"left": {
"type": "field",
"object": {
"type": "variable",
"name": "section1"
},
"field": "fieldabc"
},
"right": {
"type": "variable",
"name": "value1"
}
}
}
我一直使用带有 type
字段的对象,尽管您仍然可以将 binop
对象转换为例如['AND', expr1, expr2]
。而不是简单地让二元运算总是在一个只是 a.b.c.etc
字符串的字段上,我的更高级一些。不过,仍然可以添加限制,至少基础工作在那里。
我已经解决了这个问题,因为我喜欢写这类东西。实际上,我建议采纳 Chandan 关于使用 jQuery QueryBuilder or react-query-builder 的建议,以使其对您的用户更加轻松和友好。
如果您更针对喜欢 SQL 语法的“超级用户”,我的代码可以提供帮助。不过,可能有许多更好的库可以帮助解决这个问题,例如,它们可能更强大。报告语法错误或试图访问不存在的 variables/fields。再说一次,由于我的代码只有大约 150 行(如果包含类型,则为 200 行)并且写得不是太奇怪,如果更适合您的话,根据您的需要进行调整应该不会太难。
我想为我的系统构建一个查询,外部系统可以根据条件使用该查询进行配置。
在后端,我发现很容易有一个 JSON 子句树,它可以递归计算。
[
"AND",
[
{
"operator": "eq",
"field": "section1.fieldabc",
"value": "value1"
},
[
"OR",
{
"operator": "lt",
"field": "section2.fieldxyz",
"value": 5000
},
{
"operator": "gt",
"field": "section2.fieldxyz",
"value": 1000
}
]
]
]
或类似的东西。 (上面我把它表示成一个 s 表达式树)
问题是我希望它作为后端的 JSON 子句树,但我不希望用户需要编写这样的东西。 如果我可以创建类似 JQL(Jira 查询语言)之类的查询,那就太好了。 但我不想花很多精力为要转换的语言制作一个完整的证明解析器。
有没有标准化的方法来实现这个?也许是使用库转换的标准化查询语言(在 JS 或 Java 中)。
从最终用户的角度来看,我希望上面的查询像
section1.fieldabc == value1 AND (section2.fieldxyz<5000 OR section2.fieldxyz>10000)
用 TypeScript 编写了一个(相对)简单的解析器,可以解析二元运算符(具有正确的操作顺序)和常量、处理括号、全局变量和简单的字段访问。源代码可在 my GitHub (将来可能会或可能不会更新) 上找到,而这里是一个带有 JS 版本的片段:
const BINARY_OPERATORS = {
// AND/OR
'AND': 1,
'OR': 0,
// Equal stuff
'==': 2,
'!=': 2,
'<': 2,
'<=': 2,
'>': 2,
'>=': 2,
}
function parseConstant(input) {
// Numbers (including floats, octals and hexadecimals)
let match = input.match(/^\s*((?:0[xo]|\d*\.)?\d+)/);
if (match) {
const [{ length }, digits] = match;
if (digits.includes('.')) {
return [length, { type: 'constant', value: parseFloat(digits) }];
}
return [length, { type: 'constant', value: parseInt(digits) }];
}
// Strings
match = input.match(/^(\s*)(["'])/);
if (match) {
const [, white, quote] = match;
let value = '';
let escape = false;
for (let i = white.length; i < input.length; i++) {
const ch = input[i];
if (ch === '\' && !escape) {
escape = true;
} else if (escape) {
escape = false;
value += ch;
} else if (ch === quote) {
return [i + 1, { type: 'constant', value }];
} else {
value += ch;
}
}
return [white.length];
}
// Booleans
match = input.match(/^\s*(true|false)/);
if (match) {
const [{ length }, bool] = match;
return [length, { type: 'constant', value: bool === 'true' }];
}
return [0];
}
function parseVariable(input) {
const match = input.match(/^\s*(\w+[\w\d]*)/);
if (!match) return [0];
return [match[0].length, { type: 'variable', name: match[1] }];
}
function orderBinaryOperations(expr) {
const { left, right } = expr;
const priority = BINARY_OPERATORS[expr.operator];
if (left.type == 'binop' && BINARY_OPERATORS[left.operator] < priority) {
// LOP < EXP
// (leftL LOP leftR) EXP exprR) => leftL LOP (leftR EXP exprR)
return orderBinaryOperations({
type: 'binop',
operator: left.operator,
left: left.left,
right: {
type: 'binop',
operator: expr.operator,
left: left.right,
right: expr.right,
},
});
} else if (right.type === 'binop' && BINARY_OPERATORS[right.operator] <= priority) {
// EXP >= ROP
// exprL EXP (rightL ROP rightR) => (exprL EXP rightL) ROP rightR
return orderBinaryOperations({
type: 'binop',
operator: right.operator,
left: {
type: 'binop',
operator: expr.operator,
left: expr.left,
right: right.left,
},
right: right.right,
});
}
return expr;
}
function parsePostExpression(expr, input) {
if (!expr[1]) return expr;
const trimmed = input.trimLeft();
const white = input.length - trimmed.length;
// Binary operation
for (const operator in BINARY_OPERATORS) {
if (trimmed.startsWith(operator)) {
const offset = expr[0] + white + operator.length;
const rightResult = parseExpression(trimmed.slice(operator.length));
if (!rightResult[1]) throw new Error(`Missing right-hand side expression for ${operator}`);
return parsePostExpression([
offset + rightResult[0],
orderBinaryOperations({
type: 'binop',
operator,
left: expr[1],
right: rightResult[1],
})
], trimmed.slice(rightResult[0]));
}
}
// Field access
const match = input.match(/^\.(\w+[\w\d]*)/);
if (match) {
const [{ length }, field] = match;
return parsePostExpression([
expr[0] + white + length,
{ type: 'field', object: expr[1], field }
], trimmed.slice(length));
}
return expr;
}
function parseExpression(input) {
// Constants
let result = parseConstant(input);
// Variables
if (!result[1]) result = parseVariable(input);
// Brackets
if (!result[1]) {
const match = input.match(/^\s*\(/);
if (match) {
const [{ length }] = match;
const brackets = parseExpression(input.slice(length));
if (brackets[1]) {
const offset = brackets[0] + length;
const endBracket = input.slice(offset).match(/^\s*\)/);
if (!endBracket) throw new Error(`Missing ')' in '${input}'`);
result = [offset + endBracket[0].length, {
type: 'brackets', expr: brackets[1]
}];
}
}
}
return parsePostExpression(result, input.slice(result[0]));
}
function parse(input) {
const [length, expr] = parseExpression(input);
if (length === input.length) {
if (expr) return expr;
throw new Error(`Unfinished expression`);
}
if (!expr) throw new Error(`Unexpected character at ${length}`);
throw new Error(`Unexpected character at ${length}`);
}
const parsed = parse('(section2.fieldxyz<5000 OR section2.fieldxyz>10000) AND section1.fieldabc == value1');
console.log(JSON.stringify(parsed, null, 4));
function formatExpression(expr) {
if (expr.type === 'binop') {
// Wrapping in [] so the order of operations is clearly visible
return `[${formatExpression(expr.left)} ${expr.operator} ${formatExpression(expr.right)}]`;
} else if (expr.type === 'brackets') {
return `(${formatExpression(expr.expr)})`;
} else if (expr.type === 'constant') {
return JSON.stringify(expr.value);
} else if (expr.type === 'field') {
return `${formatExpression(expr.object)}.${expr.field}`;
} else if (expr.type === 'variable') {
return expr.name;
}
throw new Error(`Unexpected expression type '${expr.type}'`);
}
console.log('=>', formatExpression(parsed));
转换为 JSON 时的示例输出:
{
"type": "binop",
"operator": "AND",
"left": {
"type": "brackets",
"expr": {
"type": "binop",
"operator": "OR",
"left": {
"type": "binop",
"operator": "<",
"left": {
"type": "field",
"object": {
"type": "variable",
"name": "section2"
},
"field": "fieldxyz"
},
"right": {
"type": "constant",
"value": 5000
}
},
"right": {
"type": "binop",
"operator": ">",
"left": {
"type": "field",
"object": {
"type": "variable",
"name": "section2"
},
"field": "fieldxyz"
},
"right": {
"type": "constant",
"value": 10000
}
}
}
},
"right": {
"type": "binop",
"operator": "==",
"left": {
"type": "field",
"object": {
"type": "variable",
"name": "section1"
},
"field": "fieldabc"
},
"right": {
"type": "variable",
"name": "value1"
}
}
}
我一直使用带有 type
字段的对象,尽管您仍然可以将 binop
对象转换为例如['AND', expr1, expr2]
。而不是简单地让二元运算总是在一个只是 a.b.c.etc
字符串的字段上,我的更高级一些。不过,仍然可以添加限制,至少基础工作在那里。
我已经解决了这个问题,因为我喜欢写这类东西。实际上,我建议采纳 Chandan 关于使用 jQuery QueryBuilder or react-query-builder 的建议,以使其对您的用户更加轻松和友好。
如果您更针对喜欢 SQL 语法的“超级用户”,我的代码可以提供帮助。不过,可能有许多更好的库可以帮助解决这个问题,例如,它们可能更强大。报告语法错误或试图访问不存在的 variables/fields。再说一次,由于我的代码只有大约 150 行(如果包含类型,则为 200 行)并且写得不是太奇怪,如果更适合您的话,根据您的需要进行调整应该不会太难。