将 XML 解析为带有嵌套括号的字符串
Parse an XML into a string with nested parentheses
我正在尝试使用正则表达式遍历 XML 字符串,以便从中构建有意义的字符串。
XML表示一个嵌套的布尔表达式。
我有它,所以它会提取属于等式的值,但我不知道如何获得 AND/OR 运算符,也不知道如何获得最终结果中所需的括号。
这就是 XML 的样子:
<applic id="TCTO_709_PRE_ALL">
<displayText><simplePara>All Aircraft without Extended Range Capability</simplePara></displayText>
<!--BEGIN OR-->
<evaluate andOr="or">
<!-- ( -->
<assert applicPropertyIdent="partno" applicPropertyType="prodattr" applicPropertyValues="UHK97000-15" />
<!--BEGIN AND-->
<evaluate andOr="and">
<!-- ( -->
<!--BEGIN OR-->
<evaluate andOr="or">
<!-- ( -->
<assert applicPropertyIdent="partno" applicPropertyType="prodattr" applicPropertyValues="UHK97000-10" />
<assert applicPropertyIdent="partno" applicPropertyType="prodattr" applicPropertyValues="UHK97000-12" />
<!-- ) -->
</evaluate>
<!--BEGIN OR-->
<evaluate andOr="or">
<!-- ( -->
<assert applicPropertyIdent="TCTO_1Q-9A-709" applicPropertyType="condition" applicPropertyValues="PRE" />
<assert applicPropertyIdent="TCTO_1Q-9A-709" applicPropertyType="condition" applicPropertyValues="NOI" />
<!-- ) -->
</evaluate>
<!-- ) -->
</evaluate>
<!-- ) -->
</evaluate>
</applic>
所有 <assert>
元素都包含在 AND 或 OR <evaluate>
元素中。
这是 XML 的预期结果:
(partno="UHK97000-15" or ((partno="UHK97000-10" or partno="UHK97000-12") and (TCTO_1Q-9A-709="PRE" or TCTO_1Q-9A-709="NOI")))
这是我正在尝试的脚本:
var sApplic = '<applic id="TCTO_709_PRE_ALL"><displayText><simplePara>All Aircraft without Extended Range Capability</simplePara></displayText><evaluate andOr="or"><assert applicPropertyIdent="partno" applicPropertyType="prodattr" applicPropertyValues="UHK97000-15"></assert><evaluate andOr="and"><evaluate andOr="or"><assert applicPropertyIdent="partno" applicPropertyType="prodattr" applicPropertyValues="UHK97000-10"></assert><assert applicPropertyIdent="partno" applicPropertyType="prodattr" applicPropertyValues="UHK97000-12"></assert></evaluate><evaluate andOr="or"><assert applicPropertyIdent="TCTO_1Q-9A-709" applicPropertyType="condition" applicPropertyValues="PRE"></assert><assert applicPropertyIdent="TCTO_1Q-9A-709" applicPropertyType="condition" applicPropertyValues="NOI"></assert></evaluate></evaluate></evaluate></applic>';
var sRegXEval = /<assert applicPropertyIdent="(.*?)" applicPropertyType=".*?" applicPropertyValues=(".*?")(\/>|<\/assert>)?/g;
var sMatch = sRegXEval.exec(sApplic);
while (sMatch != null) {
var sFirst = sMatch[1] + "=" + sMatch[2];
document.write("<p>sMatch[" + i +"]" + sFirst);
sMatch = sRegXEval.exec(sApplic);
i++;
}
</script>
以下是该脚本的结果,与预期结果相去甚远:
sMatch[0]partno="UHK97000-15"
sMatch[1]partno="UHK97000-10"
sMatch[2]partno="UHK97000-12"
sMatch[3]TCTO_1Q-9A-709="PRE"
sMatch[4]TCTO_1Q-9A-709="NOI"
如何改进代码以获得所需的结果?
已更新
XML 字符串已更改为
var sApplic = '<datamodule><file>CClasic.sgm</file><applic><displayText><simplePara>Cooking Classics</simplePara></displayText><assert applicPropertyIdent="author" applicPropertyType="prodattr" applicPropertyValues="Crocker"/></applic></datamodule>';
自从我完成此操作后,@trincot 给我的代码就不再有效了。如何改进脚本以接受这个新字符串并允许我在元素中显示文件名?
您不应尝试使用正则表达式解析 XML:它们不适合此类任务。
而是使用 DOM parser that the Web API offers in all major browsers 和一个负责插入布尔运算符和括号的递归函数:
function parse(node) {
return Array.from(node.children, child =>
child.tagName === 'assert'
? child.getAttribute('applicPropertyIdent')
+ '="' + child.getAttribute('applicPropertyValues') + '"'
: child.tagName === 'evaluate'
? '(' + parse(child) + ')'
: parse(child)
).filter(Boolean).join(' ' + node.getAttribute('andOr') + ' ');
}
const sApplic = `<datamodule>
<file>CClasic.sgm</file>
<applic id="TCTO_709_PRE_ALL">
<displayText>
<simplePara>All Aircraft without Extended Range Capability</simplePara>
</displayText>
<evaluate andOr="or">
<assert applicPropertyIdent="partno" applicPropertyType="prodattr" applicPropertyValues="UHK97000-15"></assert>
<evaluate andOr="and">
<evaluate andOr="or">
<assert applicPropertyIdent="partno" applicPropertyType="prodattr" applicPropertyValues="UHK97000-10"></assert>
<assert applicPropertyIdent="partno" applicPropertyType="prodattr" applicPropertyValues="UHK97000-12"></assert>
</evaluate>
<evaluate andOr="or">
<assert applicPropertyIdent="TCTO_1Q-9A-709" applicPropertyType="condition" applicPropertyValues="PRE"></assert>
<assert applicPropertyIdent="TCTO_1Q-9A-709" applicPropertyType="condition" applicPropertyValues="NOI"></assert>
</evaluate>
</evaluate>
</evaluate>
</applic>
</datamodule>`;
const xml = ( new window.DOMParser() ).parseFromString(sApplic, "text/xml");
const result = parse(xml.documentElement);
console.log(result);
我正在尝试使用正则表达式遍历 XML 字符串,以便从中构建有意义的字符串。
XML表示一个嵌套的布尔表达式。
我有它,所以它会提取属于等式的值,但我不知道如何获得 AND/OR 运算符,也不知道如何获得最终结果中所需的括号。
这就是 XML 的样子:
<applic id="TCTO_709_PRE_ALL">
<displayText><simplePara>All Aircraft without Extended Range Capability</simplePara></displayText>
<!--BEGIN OR-->
<evaluate andOr="or">
<!-- ( -->
<assert applicPropertyIdent="partno" applicPropertyType="prodattr" applicPropertyValues="UHK97000-15" />
<!--BEGIN AND-->
<evaluate andOr="and">
<!-- ( -->
<!--BEGIN OR-->
<evaluate andOr="or">
<!-- ( -->
<assert applicPropertyIdent="partno" applicPropertyType="prodattr" applicPropertyValues="UHK97000-10" />
<assert applicPropertyIdent="partno" applicPropertyType="prodattr" applicPropertyValues="UHK97000-12" />
<!-- ) -->
</evaluate>
<!--BEGIN OR-->
<evaluate andOr="or">
<!-- ( -->
<assert applicPropertyIdent="TCTO_1Q-9A-709" applicPropertyType="condition" applicPropertyValues="PRE" />
<assert applicPropertyIdent="TCTO_1Q-9A-709" applicPropertyType="condition" applicPropertyValues="NOI" />
<!-- ) -->
</evaluate>
<!-- ) -->
</evaluate>
<!-- ) -->
</evaluate>
</applic>
所有 <assert>
元素都包含在 AND 或 OR <evaluate>
元素中。
这是 XML 的预期结果:
(partno="UHK97000-15" or ((partno="UHK97000-10" or partno="UHK97000-12") and (TCTO_1Q-9A-709="PRE" or TCTO_1Q-9A-709="NOI")))
这是我正在尝试的脚本:
var sApplic = '<applic id="TCTO_709_PRE_ALL"><displayText><simplePara>All Aircraft without Extended Range Capability</simplePara></displayText><evaluate andOr="or"><assert applicPropertyIdent="partno" applicPropertyType="prodattr" applicPropertyValues="UHK97000-15"></assert><evaluate andOr="and"><evaluate andOr="or"><assert applicPropertyIdent="partno" applicPropertyType="prodattr" applicPropertyValues="UHK97000-10"></assert><assert applicPropertyIdent="partno" applicPropertyType="prodattr" applicPropertyValues="UHK97000-12"></assert></evaluate><evaluate andOr="or"><assert applicPropertyIdent="TCTO_1Q-9A-709" applicPropertyType="condition" applicPropertyValues="PRE"></assert><assert applicPropertyIdent="TCTO_1Q-9A-709" applicPropertyType="condition" applicPropertyValues="NOI"></assert></evaluate></evaluate></evaluate></applic>';
var sRegXEval = /<assert applicPropertyIdent="(.*?)" applicPropertyType=".*?" applicPropertyValues=(".*?")(\/>|<\/assert>)?/g;
var sMatch = sRegXEval.exec(sApplic);
while (sMatch != null) {
var sFirst = sMatch[1] + "=" + sMatch[2];
document.write("<p>sMatch[" + i +"]" + sFirst);
sMatch = sRegXEval.exec(sApplic);
i++;
}
</script>
以下是该脚本的结果,与预期结果相去甚远:
sMatch[0]partno="UHK97000-15"
sMatch[1]partno="UHK97000-10"
sMatch[2]partno="UHK97000-12"
sMatch[3]TCTO_1Q-9A-709="PRE"
sMatch[4]TCTO_1Q-9A-709="NOI"
如何改进代码以获得所需的结果?
已更新 XML 字符串已更改为
var sApplic = '<datamodule><file>CClasic.sgm</file><applic><displayText><simplePara>Cooking Classics</simplePara></displayText><assert applicPropertyIdent="author" applicPropertyType="prodattr" applicPropertyValues="Crocker"/></applic></datamodule>';
自从我完成此操作后,@trincot 给我的代码就不再有效了。如何改进脚本以接受这个新字符串并允许我在元素中显示文件名?
您不应尝试使用正则表达式解析 XML:它们不适合此类任务。
而是使用 DOM parser that the Web API offers in all major browsers 和一个负责插入布尔运算符和括号的递归函数:
function parse(node) {
return Array.from(node.children, child =>
child.tagName === 'assert'
? child.getAttribute('applicPropertyIdent')
+ '="' + child.getAttribute('applicPropertyValues') + '"'
: child.tagName === 'evaluate'
? '(' + parse(child) + ')'
: parse(child)
).filter(Boolean).join(' ' + node.getAttribute('andOr') + ' ');
}
const sApplic = `<datamodule>
<file>CClasic.sgm</file>
<applic id="TCTO_709_PRE_ALL">
<displayText>
<simplePara>All Aircraft without Extended Range Capability</simplePara>
</displayText>
<evaluate andOr="or">
<assert applicPropertyIdent="partno" applicPropertyType="prodattr" applicPropertyValues="UHK97000-15"></assert>
<evaluate andOr="and">
<evaluate andOr="or">
<assert applicPropertyIdent="partno" applicPropertyType="prodattr" applicPropertyValues="UHK97000-10"></assert>
<assert applicPropertyIdent="partno" applicPropertyType="prodattr" applicPropertyValues="UHK97000-12"></assert>
</evaluate>
<evaluate andOr="or">
<assert applicPropertyIdent="TCTO_1Q-9A-709" applicPropertyType="condition" applicPropertyValues="PRE"></assert>
<assert applicPropertyIdent="TCTO_1Q-9A-709" applicPropertyType="condition" applicPropertyValues="NOI"></assert>
</evaluate>
</evaluate>
</evaluate>
</applic>
</datamodule>`;
const xml = ( new window.DOMParser() ).parseFromString(sApplic, "text/xml");
const result = parse(xml.documentElement);
console.log(result);