解析自定义过滤器语法的最佳方式
Best way to parse custom Filtersyntax
我有一个程序允许用户在 DataGridView 的 header 列的文本框中输入过滤器。
然后将此文本解析为 FilterOperations 列表。
目前我将字符串标记化,然后在饥饿中构建列表 For-loop。
我可以使用哪些设计模式来摆脱巨大的构造?
我可以采取任何其他措施来改进设计吗?
在当前状态下,很难添加对其他运算符、数据类型的支持或构建过滤器列表之外的其他内容。可以说我需要用构建表达式(很快就会出现这种情况)或构建 SQL Where 子句来替换过滤器列表。
过滤器语法
过滤器遵循此语法并且对字符串、数字和日期时间有效:
测距运算符
下限 .. 上限
29..52 将被解析为过滤器列表中的两个元素 "x >= 29" 和 "x <=52"
低于
.. 上限
..52 将被解析为 "x < 52"
大于
下限 ..
29.. 将被解析为 "x > 29"
通配符
*someText*
等于 SQL
中的 x LIKE "%someText%"
字符串文字
' 运算符,如 .. 或 * 在单引号之间被忽略 '
代币
所以我定义了三个Token
RangeOperator 用于 ..
通配符 *
Text 用于纯值和单引号中的值
我构建列表的丑陋代码
public static FilterList<T> Parse<T>(string filter, string columnname, Type dataType) where T : class
{
if (dataType != typeof(float) && dataType != typeof(DateTime) && dataType != typeof(string))
throw new NotSupportedException(String.Format("Data Type is not supported '{0}'", dataType));
Token[] filterParts = tokenize(filter);
filterParts = cleanUp(filterParts);
StringBuilder sb = new StringBuilder();
for (int i = 0; i < filterParts.Length; i++)
{
Token currentToken = filterParts[i];
//BereichsFilter prüfen und bauen
if (currentToken.TokenType == TokenType.RangeOperator)
{
if (filterParts.Length < 2)
{
throw new FilterException("Missing argument for RangeOperator");
}
if (filterParts.Length > 3)
{
throw new FilterException("RangeOperator can't be mixed with other operators");
}
if (i == 0)
{
if (filterParts.Length == 2)
{
//Bis Operator
Token right = filterParts[1];
if (right.TokenType != TokenType.Text)
throw new FilterException("TextToken expected");
if (String.IsNullOrEmpty(right.Text))
throw new FilterException("Text must have value");
if (right.Text.StartsWith("."))
throw new FilterException("Text starting with a dot is not valid");
if (dataType == typeof(string))
return new FilterList<T> { { columnname, FilterOperator.Less, right.Text } };
//filterString = String.Format("({0} < '{1}' OR {0} IS NULL)", columnname, right.Text);
if (dataType == typeof(float))
{
float rightF;
if (!float.TryParse(right.Text, out rightF))
throw new FilterException(
String.Format("right parameter has wrong format '{0}'", right.Text));
return new FilterList<T> { { columnname, FilterOperator.Less, rightF } };
//filterString = String.Format("({0} < {1} OR {0} IS NULL)", columnname, rightF.ToString(CultureInfo.InvariantCulture));
}
if (dataType == typeof(DateTime))
{
DateTime rightDt = parseDateTime(right.Text);
return new FilterList<T> { { columnname, FilterOperator.Less, rightDt } };
//filterString = String.Format("({0} < '{1}' OR {0} IS NULL)", columnname, rightDT.ToString(CultureInfo.InvariantCulture));
}
break;
}
throw new FilterException("too many arguments");
}
if (i == 1)
{
if (filterParts.Length == 2)
{
//Von Operator
Token left = filterParts[0];
if (left.TokenType != TokenType.Text)
throw new FilterException("TextToken expected");
if (String.IsNullOrEmpty(left.Text))
throw new FilterException("Argument must have value");
if (dataType == typeof(string))
return new FilterList<T> { { columnname, FilterOperator.Greater, left.Text } };
//filterString = String.Format("({0} > '{1}')", columnname, left.Text);
if (dataType == typeof(float))
{
float leftF;
if (!float.TryParse(left.Text, out leftF))
throw new FilterException(String.Format(
"left parameter has wrong format '{0}'", left.Text));
return new FilterList<T> { { columnname, FilterOperator.Greater, leftF } };
//filterString = String.Format("({0} > {1})", columnname, leftF.ToString(CultureInfo.InvariantCulture));
}
if (dataType == typeof(DateTime))
{
DateTime leftDt = parseDateTime(left.Text);
return new FilterList<T> { { columnname, FilterOperator.Greater, leftDt } };
//filterString = String.Format("({0} > '{1}')", columnname, leftDT.ToString(CultureInfo.InvariantCulture));
}
break;
}
else
{
//BereichsOperator
Token left = filterParts[0];
if (left.TokenType != TokenType.Text)
throw new FilterException("TextToken expected");
if (String.IsNullOrEmpty(left.Text))
throw new FilterException("parameter must have value");
Token right = filterParts[2];
if (right.TokenType != TokenType.Text)
throw new FilterException("TextToken expected");
if (String.IsNullOrEmpty(right.Text))
throw new FilterException("parameter must have value");
if (dataType == typeof(string))
return new FilterList<T>
{
{columnname, FilterOperator.GreaterOrEqual, left.Text},
{columnname, FilterOperator.LessOrEqual, right.Text}
};
//filterString = String.Format("{0} >= '{1}' AND {0} <= '{2}'", columnname, left.Text, right.Text);
if (dataType == typeof(float))
{
float rightF;
if (!float.TryParse(right.Text, out rightF))
throw new FilterException(
String.Format("right parameter has wrong format '{0}'", right.Text));
float leftF;
if (!float.TryParse(left.Text, out leftF))
throw new FilterException(String.Format(
"left parameter has wrong format'{0}'", left.Text));
return new FilterList<T>
{
{columnname, FilterOperator.GreaterOrEqual, leftF},
{columnname, FilterOperator.LessOrEqual, rightF}
};
//filterString = String.Format("{0} >= {1} AND {0} <= {2}", columnname, leftF.ToString(CultureInfo.InvariantCulture), leftF.ToString(CultureInfo.InvariantCulture));
}
if (dataType == typeof(DateTime))
{
DateTime rightDt = parseDateTime(right.Text);
DateTime leftDt = parseDateTime(left.Text);
return new FilterList<T>
{
{columnname, FilterOperator.GreaterOrEqual, leftDt},
{columnname, FilterOperator.LessOrEqual, rightDt}
};
//filterString = String.Format("{0} >= '{1}' AND {0} <= '{2}'", columnname, leftDT.ToString(CultureInfo.InvariantCulture), rightDT.ToString(CultureInfo.InvariantCulture));
}
break;
}
}
throw new FilterException("unexpected parameter");
}
//Stringsuche Bauen
if (currentToken.TokenType == TokenType.Wildcard)
{
if (dataType != typeof(string))
throw new FilterException("Operator not allowed with this Data Type");
//Fehler wenn Datentyp kein string
sb.Append("%");
}
else if (currentToken.TokenType == TokenType.Text)
sb.Append(escape(currentToken.Text));
}
//Filterung auf Zeichenfolge
string text = sb.ToString();
if (dataType == typeof(string))
return new FilterList<T> { { columnname, FilterOperator.Like, text } };
//filterString = String.Format("{0} LIKE '{1}' ESCAPE '\'", columnname, text);
if (dataType == typeof(DateTime))
{
DateTime dt = parseDateTime(text);
return new FilterList<T> { { columnname, FilterOperator.Equal, dt } };
//filterString = String.Format("{0} = '{1}'", columnname, DT.ToString(CultureInfo.InvariantCulture));
}
if (dataType == typeof(float))
{
float f;
if (!float.TryParse(text, out f))
throw new FilterException(String.Format("parameter has wrong format '{0}'", text));
return new FilterList<T> { { columnname, FilterOperator.Equal, f } };
//filterString = String.Format("{0} = {1}", columnname, F.ToString(CultureInfo.InvariantCulture));
}
return null;
}
您可以使用 Gold Parser 来创建您的语法树或以任何其他方式拥有它。
这是 link http://goldparser.org/
除此之外,您还可以使用访问者设计模式来生成您的过滤器列表。
https://en.wikipedia.org/wiki/Visitor_pattern
有了这两个,您就可以做出一个相当可扩展的解决方案。
您需要找到一个基于 Parsing Expression Grammars 的 C# 代码生成器。它允许您定义一个语法,然后由生成器将其转换为代码。然后代码将能够解析符合您期望的语法的文本。
一个非常快速的 google-fu 表明 peg-sharp 可以工作。
为了学习使用 PEG,您可以尝试 the online version of PEG.js,它几乎可以按照您最终使用的工作流程工作:
- 键入 PEG 声明(左 window)
- javascript 动态更新解析器(右上window)
- 解析器解析您的输入并产生结果(右下 window)
作为概念证明,这里是您可以复制粘贴到 PEG.js 中的语法的暂定实现(我想可以设法将其嵌入到 Whosebug 小部件中):
语法如下:
start
= filters
filters
= left:filter " " right:filters { return {filter: left, operation: "AND", filters: right};}
/ filter
filter
= applicableRange:range {return {type: "range", range: applicableRange};}
/ openWord:wildcard {return {type: "wildcard", word: openWord};}
/ simpleWord:word {return simpleWord;}
/ sentence:sentence {return sentence;}
sentence
= "'" + letters:[0-9a-zA-Z *.]* "'" {return {type: "sentence", value: letters.join("")};}
word "aword"
= letters:[0-9a-zA-Z]+ { return {type: "word", value: letters.join("")}; }
wildcard
=
"*" word:word "*" {return {type: "wildcardBoth", value: word};}
/ "*" word:word {return {type: "wildcardStart", value: word};}
/ word:word "*" {return {type: "wildcardEnd", value: word};}
range "range"
= left:word? ".." right:word? {return {from: left, to: right};}
基本上,语法可以让您定义语言的组成部分,以及它们之间的关系。例如,过滤器可以是一个范围、一个通配符、一个词、一个句子或什么都不是(至少这是我在定义语法时所追求的;最后一个选项是结束过滤器中的递归)。
与这些块一起,您可以定义遇到这些块时的输出。在这种情况下,我输出一个 JSON 对象,它表示应该进行哪种过滤,以及过滤器将具有哪些参数。
如果您使用以下输入测试语法:
'testing range' 123..456 123.. ..abc 'and testing wildcards' word1* *word2 *word3* cool heh
你会得到一个描述应该根据语法构建的过滤器的结构:
{
"filter": {
"type": "sentence",
"value": "testing range"
},
"operation": "AND",
"filters": {
"filter": {
"type": "range",
"range": {
"from": {
"type": "word",
"value": "123"
},
"to": {
"type": "word",
"value": "456"
}
}
},
"operation": "AND",
"filters": {
"filter": {
"type": "range",
"range": {
"from": {
"type": "word",
"value": "123"
},
"to": null
}
},
"operation": "AND",
"filters": {
"filter": {
"type": "range",
"range": {
"from": null,
"to": {
"type": "word",
"value": "abc"
}
}
},
"operation": "AND",
"filters": {
"filter": {
"type": "sentence",
"value": "and testing wildcards"
},
"operation": "AND",
"filters": {
"filter": {
"type": "wildcard",
"word": {
"type": "wildcardEnd",
"value": {
"type": "word",
"value": "word1"
}
}
},
"operation": "AND",
"filters": {
"filter": {
"type": "wildcard",
"word": {
"type": "wildcardStart",
"value": {
"type": "word",
"value": "word2"
}
}
},
"operation": "AND",
"filters": {
"filter": {
"type": "wildcard",
"word": {
"type": "wildcardBoth",
"value": {
"type": "word",
"value": "word3"
}
}
},
"operation": "AND",
"filters": {
"filter": {
"type": "word",
"value": "cool"
},
"operation": "AND",
"filters": {
"type": "word",
"value": "heh"
}
}
}
}
}
}
}
}
}
}
C# 生成器的原理是相同的:将语法编译成一些能够解析您的输入的 C# 代码,并定义当解析遇到这个或那个块时应该发生什么。
如果发生更改,您将需要重新编译语法(尽管它可以很容易地包含在您的构建步骤中),但您将能够生成一个结构来表示已解析的过滤器并使用它来过滤您的搜索结果。
PEG 的一个巨大优势是该格式广为人知,并且有大量在线资源可供学习,因此这些知识可以转移到其他语言/用途
我有一个程序允许用户在 DataGridView 的 header 列的文本框中输入过滤器。 然后将此文本解析为 FilterOperations 列表。
目前我将字符串标记化,然后在饥饿中构建列表 For-loop。
我可以使用哪些设计模式来摆脱巨大的构造?
我可以采取任何其他措施来改进设计吗?
在当前状态下,很难添加对其他运算符、数据类型的支持或构建过滤器列表之外的其他内容。可以说我需要用构建表达式(很快就会出现这种情况)或构建 SQL Where 子句来替换过滤器列表。
过滤器语法
过滤器遵循此语法并且对字符串、数字和日期时间有效:
测距运算符
下限 .. 上限
29..52 将被解析为过滤器列表中的两个元素 "x >= 29" 和 "x <=52"
低于
.. 上限
..52 将被解析为 "x < 52"
大于
下限 ..
29.. 将被解析为 "x > 29"
通配符
*someText*
等于 SQL
字符串文字
' 运算符,如 .. 或 * 在单引号之间被忽略 '
代币
所以我定义了三个Token
RangeOperator 用于 ..
通配符 *
Text 用于纯值和单引号中的值
我构建列表的丑陋代码
public static FilterList<T> Parse<T>(string filter, string columnname, Type dataType) where T : class
{
if (dataType != typeof(float) && dataType != typeof(DateTime) && dataType != typeof(string))
throw new NotSupportedException(String.Format("Data Type is not supported '{0}'", dataType));
Token[] filterParts = tokenize(filter);
filterParts = cleanUp(filterParts);
StringBuilder sb = new StringBuilder();
for (int i = 0; i < filterParts.Length; i++)
{
Token currentToken = filterParts[i];
//BereichsFilter prüfen und bauen
if (currentToken.TokenType == TokenType.RangeOperator)
{
if (filterParts.Length < 2)
{
throw new FilterException("Missing argument for RangeOperator");
}
if (filterParts.Length > 3)
{
throw new FilterException("RangeOperator can't be mixed with other operators");
}
if (i == 0)
{
if (filterParts.Length == 2)
{
//Bis Operator
Token right = filterParts[1];
if (right.TokenType != TokenType.Text)
throw new FilterException("TextToken expected");
if (String.IsNullOrEmpty(right.Text))
throw new FilterException("Text must have value");
if (right.Text.StartsWith("."))
throw new FilterException("Text starting with a dot is not valid");
if (dataType == typeof(string))
return new FilterList<T> { { columnname, FilterOperator.Less, right.Text } };
//filterString = String.Format("({0} < '{1}' OR {0} IS NULL)", columnname, right.Text);
if (dataType == typeof(float))
{
float rightF;
if (!float.TryParse(right.Text, out rightF))
throw new FilterException(
String.Format("right parameter has wrong format '{0}'", right.Text));
return new FilterList<T> { { columnname, FilterOperator.Less, rightF } };
//filterString = String.Format("({0} < {1} OR {0} IS NULL)", columnname, rightF.ToString(CultureInfo.InvariantCulture));
}
if (dataType == typeof(DateTime))
{
DateTime rightDt = parseDateTime(right.Text);
return new FilterList<T> { { columnname, FilterOperator.Less, rightDt } };
//filterString = String.Format("({0} < '{1}' OR {0} IS NULL)", columnname, rightDT.ToString(CultureInfo.InvariantCulture));
}
break;
}
throw new FilterException("too many arguments");
}
if (i == 1)
{
if (filterParts.Length == 2)
{
//Von Operator
Token left = filterParts[0];
if (left.TokenType != TokenType.Text)
throw new FilterException("TextToken expected");
if (String.IsNullOrEmpty(left.Text))
throw new FilterException("Argument must have value");
if (dataType == typeof(string))
return new FilterList<T> { { columnname, FilterOperator.Greater, left.Text } };
//filterString = String.Format("({0} > '{1}')", columnname, left.Text);
if (dataType == typeof(float))
{
float leftF;
if (!float.TryParse(left.Text, out leftF))
throw new FilterException(String.Format(
"left parameter has wrong format '{0}'", left.Text));
return new FilterList<T> { { columnname, FilterOperator.Greater, leftF } };
//filterString = String.Format("({0} > {1})", columnname, leftF.ToString(CultureInfo.InvariantCulture));
}
if (dataType == typeof(DateTime))
{
DateTime leftDt = parseDateTime(left.Text);
return new FilterList<T> { { columnname, FilterOperator.Greater, leftDt } };
//filterString = String.Format("({0} > '{1}')", columnname, leftDT.ToString(CultureInfo.InvariantCulture));
}
break;
}
else
{
//BereichsOperator
Token left = filterParts[0];
if (left.TokenType != TokenType.Text)
throw new FilterException("TextToken expected");
if (String.IsNullOrEmpty(left.Text))
throw new FilterException("parameter must have value");
Token right = filterParts[2];
if (right.TokenType != TokenType.Text)
throw new FilterException("TextToken expected");
if (String.IsNullOrEmpty(right.Text))
throw new FilterException("parameter must have value");
if (dataType == typeof(string))
return new FilterList<T>
{
{columnname, FilterOperator.GreaterOrEqual, left.Text},
{columnname, FilterOperator.LessOrEqual, right.Text}
};
//filterString = String.Format("{0} >= '{1}' AND {0} <= '{2}'", columnname, left.Text, right.Text);
if (dataType == typeof(float))
{
float rightF;
if (!float.TryParse(right.Text, out rightF))
throw new FilterException(
String.Format("right parameter has wrong format '{0}'", right.Text));
float leftF;
if (!float.TryParse(left.Text, out leftF))
throw new FilterException(String.Format(
"left parameter has wrong format'{0}'", left.Text));
return new FilterList<T>
{
{columnname, FilterOperator.GreaterOrEqual, leftF},
{columnname, FilterOperator.LessOrEqual, rightF}
};
//filterString = String.Format("{0} >= {1} AND {0} <= {2}", columnname, leftF.ToString(CultureInfo.InvariantCulture), leftF.ToString(CultureInfo.InvariantCulture));
}
if (dataType == typeof(DateTime))
{
DateTime rightDt = parseDateTime(right.Text);
DateTime leftDt = parseDateTime(left.Text);
return new FilterList<T>
{
{columnname, FilterOperator.GreaterOrEqual, leftDt},
{columnname, FilterOperator.LessOrEqual, rightDt}
};
//filterString = String.Format("{0} >= '{1}' AND {0} <= '{2}'", columnname, leftDT.ToString(CultureInfo.InvariantCulture), rightDT.ToString(CultureInfo.InvariantCulture));
}
break;
}
}
throw new FilterException("unexpected parameter");
}
//Stringsuche Bauen
if (currentToken.TokenType == TokenType.Wildcard)
{
if (dataType != typeof(string))
throw new FilterException("Operator not allowed with this Data Type");
//Fehler wenn Datentyp kein string
sb.Append("%");
}
else if (currentToken.TokenType == TokenType.Text)
sb.Append(escape(currentToken.Text));
}
//Filterung auf Zeichenfolge
string text = sb.ToString();
if (dataType == typeof(string))
return new FilterList<T> { { columnname, FilterOperator.Like, text } };
//filterString = String.Format("{0} LIKE '{1}' ESCAPE '\'", columnname, text);
if (dataType == typeof(DateTime))
{
DateTime dt = parseDateTime(text);
return new FilterList<T> { { columnname, FilterOperator.Equal, dt } };
//filterString = String.Format("{0} = '{1}'", columnname, DT.ToString(CultureInfo.InvariantCulture));
}
if (dataType == typeof(float))
{
float f;
if (!float.TryParse(text, out f))
throw new FilterException(String.Format("parameter has wrong format '{0}'", text));
return new FilterList<T> { { columnname, FilterOperator.Equal, f } };
//filterString = String.Format("{0} = {1}", columnname, F.ToString(CultureInfo.InvariantCulture));
}
return null;
}
您可以使用 Gold Parser 来创建您的语法树或以任何其他方式拥有它。 这是 link http://goldparser.org/
除此之外,您还可以使用访问者设计模式来生成您的过滤器列表。 https://en.wikipedia.org/wiki/Visitor_pattern
有了这两个,您就可以做出一个相当可扩展的解决方案。
您需要找到一个基于 Parsing Expression Grammars 的 C# 代码生成器。它允许您定义一个语法,然后由生成器将其转换为代码。然后代码将能够解析符合您期望的语法的文本。
一个非常快速的 google-fu 表明 peg-sharp 可以工作。
为了学习使用 PEG,您可以尝试 the online version of PEG.js,它几乎可以按照您最终使用的工作流程工作:
- 键入 PEG 声明(左 window)
- javascript 动态更新解析器(右上window)
- 解析器解析您的输入并产生结果(右下 window)
作为概念证明,这里是您可以复制粘贴到 PEG.js 中的语法的暂定实现(我想可以设法将其嵌入到 Whosebug 小部件中):
语法如下:
start
= filters
filters
= left:filter " " right:filters { return {filter: left, operation: "AND", filters: right};}
/ filter
filter
= applicableRange:range {return {type: "range", range: applicableRange};}
/ openWord:wildcard {return {type: "wildcard", word: openWord};}
/ simpleWord:word {return simpleWord;}
/ sentence:sentence {return sentence;}
sentence
= "'" + letters:[0-9a-zA-Z *.]* "'" {return {type: "sentence", value: letters.join("")};}
word "aword"
= letters:[0-9a-zA-Z]+ { return {type: "word", value: letters.join("")}; }
wildcard
=
"*" word:word "*" {return {type: "wildcardBoth", value: word};}
/ "*" word:word {return {type: "wildcardStart", value: word};}
/ word:word "*" {return {type: "wildcardEnd", value: word};}
range "range"
= left:word? ".." right:word? {return {from: left, to: right};}
基本上,语法可以让您定义语言的组成部分,以及它们之间的关系。例如,过滤器可以是一个范围、一个通配符、一个词、一个句子或什么都不是(至少这是我在定义语法时所追求的;最后一个选项是结束过滤器中的递归)。
与这些块一起,您可以定义遇到这些块时的输出。在这种情况下,我输出一个 JSON 对象,它表示应该进行哪种过滤,以及过滤器将具有哪些参数。
如果您使用以下输入测试语法:
'testing range' 123..456 123.. ..abc 'and testing wildcards' word1* *word2 *word3* cool heh
你会得到一个描述应该根据语法构建的过滤器的结构:
{
"filter": {
"type": "sentence",
"value": "testing range"
},
"operation": "AND",
"filters": {
"filter": {
"type": "range",
"range": {
"from": {
"type": "word",
"value": "123"
},
"to": {
"type": "word",
"value": "456"
}
}
},
"operation": "AND",
"filters": {
"filter": {
"type": "range",
"range": {
"from": {
"type": "word",
"value": "123"
},
"to": null
}
},
"operation": "AND",
"filters": {
"filter": {
"type": "range",
"range": {
"from": null,
"to": {
"type": "word",
"value": "abc"
}
}
},
"operation": "AND",
"filters": {
"filter": {
"type": "sentence",
"value": "and testing wildcards"
},
"operation": "AND",
"filters": {
"filter": {
"type": "wildcard",
"word": {
"type": "wildcardEnd",
"value": {
"type": "word",
"value": "word1"
}
}
},
"operation": "AND",
"filters": {
"filter": {
"type": "wildcard",
"word": {
"type": "wildcardStart",
"value": {
"type": "word",
"value": "word2"
}
}
},
"operation": "AND",
"filters": {
"filter": {
"type": "wildcard",
"word": {
"type": "wildcardBoth",
"value": {
"type": "word",
"value": "word3"
}
}
},
"operation": "AND",
"filters": {
"filter": {
"type": "word",
"value": "cool"
},
"operation": "AND",
"filters": {
"type": "word",
"value": "heh"
}
}
}
}
}
}
}
}
}
}
C# 生成器的原理是相同的:将语法编译成一些能够解析您的输入的 C# 代码,并定义当解析遇到这个或那个块时应该发生什么。
如果发生更改,您将需要重新编译语法(尽管它可以很容易地包含在您的构建步骤中),但您将能够生成一个结构来表示已解析的过滤器并使用它来过滤您的搜索结果。
PEG 的一个巨大优势是该格式广为人知,并且有大量在线资源可供学习,因此这些知识可以转移到其他语言/用途