Java 以字符串作为输出的 Saxon xPath 示例

Java Saxon xPath example with String as output

我正在尝试编写 Java 将使用 Saxon xPath 的代码。我有 2 个问题:

  1. 我不太擅长java
  2. 我不确定将 net.sf.saxon.om.NodeInfo 转换为字符串的最佳方法是什么。

有人可以帮忙吗?我知道 http://www.saxonica.com/download/download_page.xml 上有一些很好的示例代码,但这还不够。

我看到了类似的 SO 讨论 XPath processor output as string .但是在这种情况下,我想使用 Saxon,它使用 NodeInfo。

<pre>
<!-- language: java --> 
public class helloSaxon {
    public static void main(String[] args) {
        String xml = "";
        String xPathStatement = "";
        String xPathResult = "";
        SaxonXPath xPathEvaluation = null;
        Boolean xPathResultMatch = false;
        
        xml="<root><a version = '1.0' encoding = 'UTF-8'>#BBB#</a><a>#CCC#</a><b><a>#DDD#</a></b></root>";

        //I'm using the following XPath Tester for test scenarios
        //https://www.freeformatter.com/xpath-tester.html#ad-output
        // Test #1
        xPathStatement="/root/a";
        xPathEvaluation = new SaxonXPath(xml, xPathStatement);
        xPathResult = xPathEvaluation.getxPathResult();
            System.out.println("Test #1 xPathResult - " + xPathResult);
            //xPathResult == "<a version = '1.0' encoding = 'UTF-8'>#BBB#</a><a>#CCC#</a>";
        xPathResultMatch = xPathEvaluation.getxPathResultMatch();
            System.out.println("Test #1 xPathResultMatch - " + xPathResultMatch);
            //xPathResultMatch == true;

        // Test #2
        xPathStatement="//a";
        xPathEvaluation.Reset(xml, xPathStatement);
        xPathResult = xPathEvaluation.getxPathResult();
            System.out.println("Test #2 xPathResult - " + xPathResult);
            //xPathResult == "<a version = '1.0' encoding = 'UTF-8'>#BBB#</a><a>#CCC#</a><a>#DDD#</a>";
        xPathResultMatch = xPathEvaluation.getxPathResultMatch();
            System.out.println("Test #2 xPathResultMatch - " + xPathResultMatch);
            //xPathResultMatch == true;

        // Test #3
        xPathStatement="/root/a[1]/text()";
        xPathEvaluation.Reset(xml, xPathStatement);
        xPathResult = xPathEvaluation.getxPathResult();
            System.out.println("Test #3 xPathResult - " + xPathResult);
            //xPathResult == "#BBB#";
        xPathResultMatch = xPathEvaluation.getxPathResultMatch();
            System.out.println("Test #3 xPathResultMatch - " + xPathResultMatch);
            //xPathResultMatch == true;

        // Test #4
        xPathStatement="/a/root/a/text()";
        xPathEvaluation.Reset(xml, xPathStatement);
        xPathResult = xPathEvaluation.getxPathResult();
            System.out.println("Test #4 xPathResult - " + xPathResult);
            //xPathResult == "";
        xPathResultMatch = xPathEvaluation.getxPathResultMatch();
            System.out.println("Test #4 xPathResultMatch - " + xPathResultMatch);
            //xPathResultMatch == false;
            
        // Test #5
        xPathStatement="/root";
        xPathEvaluation.Reset(xml, xPathStatement);
        xPathResult = xPathEvaluation.getxPathResult();
            System.out.println("Test #5 xPathResult - " + xPathResult);
            //xPathResult == "<root><a version = '1.0' encoding = 'UTF-8'>#BBB#</a><a>#CCC#</a><b><a>#DDD#</a></b></root>";
        xPathResultMatch = xPathEvaluation.getxPathResultMatch();
            System.out.println("Test #5 xPathResultMatch - " + xPathResultMatch);
            //xPathResultMatch == true;         
    }
    static class SaxonXPath{
        private String xml;
        private String xPathStatement;
        private String xPathResult;
        private Boolean xPathResultMatch;
        public SaxonXPath(String xml, String xPathStatement){
            this.Reset(xml, xPathStatement);
        }
        public void Reset(String xml, String xPathStatement){
            this.xml = xml;
            this.xPathStatement = xPathStatement;
            this.xPathResult = "";
            this.xPathResultMatch = null;
            this.Evaluate();
        }
        public void Evaluate(){
            try{
                System.setProperty("javax.xml.xpath.XPathFactory:" + NamespaceConstant.OBJECT_MODEL_SAXON, "net.sf.saxon.xpath.XPathFactoryImpl");
                XPathFactory xPathFactory = XPathFactory.newInstance(NamespaceConstant.OBJECT_MODEL_SAXON);
                XPath xPath = xPathFactory.newXPath();
                InputSource inputSource = new InputSource(new StringReader(this.xml));
                SAXSource saxSource = new SAXSource(inputSource);
                Configuration config = ((XPathFactoryImpl) xPathFactory).getConfiguration();
                DocumentInfo document = config.buildDocument(saxSource);      
                XPathExpression xPathExpression = xPath.compile(this.xPathStatement);

                List matches = (List) xPathExpression.evaluate(document, XPathConstants.NODESET);
                if (matches != null && matches.size()>0) {
                    this.xPathResultMatch = true;   
                    for (Iterator iter = matches.iterator(); iter.hasNext();) {
                        NodeInfo node = (NodeInfo) iter.next();
                        
                        //need to convert content of "node" to string
                        xPathResult += node.getStringValue();
                    }
                } else {
                    this.xPathResultMatch = false;
                }
            } catch(Exception e){
                e.printStackTrace();
            }           
        }
        public String getxPathResult(){
            return this.xPathResult;
        }
        public Boolean getxPathResultMatch(){
            return this.xPathResultMatch;
        }
    }
}
</code>

会有以下输入:

  1. XML 作为字符串
  2. xPath 表达式为字符串
    输出:
  3. xPath 评估为字符串
  4. xPath 结果匹配为布尔值

我还在代码注释中添加了一些测试示例,以便您更好地理解我正在尝试做的事情。

首先,我建议为此使用 s9api 接口而不是 JAXP XPath 接口。原因有很多,特别是:

  • JAXP 接口非常适合 XPath 1.0,例如它只能识别字符串、数字、布尔值和节点集等数据类型。 XPath 2.0 具有更丰富的类型系统

  • JAXP 接口与其对象模型 DOM 相关联,尽管它对使用其他模型的可能性做出了让步(并且 Saxon 实现通过支持 NodeInfo 利用了这一点, 这是 XDM 节点的实现)

  • JAXP 接口几乎没有类型安全;它广泛使用 Object 作为参数和结果类型,并且不使用 Java 泛型

  • 使用标准 API 的任何可移植性优势都是虚假的,因为 (a) 除了 Saxon 之外的所有已知实现都只支持 XPath 1.0,以及 (b) 可能存在的价值种类提供给声明为接受的接口 Object 对于每个产品都是不同的。

每次计算 XPath 表达式时,您的代码都会创建一个新的 XPathFactory。创建 XPathFactory 是一项非常昂贵的操作,因为它涉及搜索类路径并检查许多不同的 JAR 文件以查看哪个包含合适的 XPath 引擎。

此外,每次计算 XPath 表达式时,您的代码都会从头开始构建源文档。同样,这非常昂贵。

话虽如此,return使用 JAXP 处理字符串和布尔值并不是很困难。您只需要将说明预期结果类型的参数 XPathConstants.NODESET 更改为 XPathConstants.STRINGXPathConstants.BOOLEAN,并且 evaluate() 调用将 return 一个字符串或一个布尔值代替节点列表。但是,如果您想 return 一个日期或一个持续时间,您将被卡住,因为 JAXP 不支持它。

我只是想根据@MichaelKay 的输入添加经过编辑的代码。我仍然为每个调用做 buildDocumentTree,尽管它是昂贵的操作,因为我会有不同的 XML。我希望其他人也会发现它有用或会给出好的评论以提高性能:)

import java.io.StringReader;
import java.util.Iterator;
import java.util.List;
import javax.xml.transform.sax.SAXSource;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import javax.xml.xpath.XPathFactoryConfigurationException;
import net.sf.saxon.Configuration;
import net.sf.saxon.lib.NamespaceConstant;
import net.sf.saxon.om.NodeInfo;
import net.sf.saxon.om.TreeInfo;
import net.sf.saxon.xpath.XPathFactoryImpl;
import org.xml.sax.InputSource;

public class helloSaxon {

    public static void main(String[] args) {

        String xml = "";
        String xPathStatement = "";
        String xPathResult = "";
        SaxonXPath xPathEvaluation = null;
        Boolean xPathResultMatch = false;

        xml="<root version = '1.0' encoding = 'UTF-8' xmlns:bar='http://www.smth.org/'><bar:a>#BBB#</bar:a><a>#CCC#</a><b><a>#DDD#</a></b></root>";

        //I'm using the following XPath Tester for test scenarios
        //https://www.freeformatter.com/xpath-tester.html#ad-output
        // Test #1
        xPathStatement="/root/a";

        xPathEvaluation = new SaxonXPath(xml, xPathStatement);

        xPathResult = xPathEvaluation.getxPathResult();
            System.out.println("Test #1 xPathResult - " + xPathResult);
            //xPathResult == "<a version = '1.0' encoding = 'UTF-8'>#BBB#</a><a>#CCC#</a>";
        xPathResultMatch = xPathEvaluation.getxPathResultMatch();
            System.out.println("Test #1 xPathResultMatch - " + xPathResultMatch);
            //xPathResultMatch == true;

        // Test #2
        xPathStatement="//a";
        xPathEvaluation.Reset(xml, xPathStatement);
        xPathResult = xPathEvaluation.getxPathResult();
            System.out.println("Test #2 xPathResult - " + xPathResult);
            //xPathResult == "<a version = '1.0' encoding = 'UTF-8'>#BBB#</a><a>#CCC#</a><a>#DDD#</a>";
        xPathResultMatch = xPathEvaluation.getxPathResultMatch();
            System.out.println("Test #2 xPathResultMatch - " + xPathResultMatch);
            //xPathResultMatch == true;

        // Test #3
        xPathStatement="/root/a[1]/text()";
        xPathEvaluation.Reset(xml, xPathStatement);
        xPathResult = xPathEvaluation.getxPathResult();
            System.out.println("Test #3 xPathResult - " + xPathResult);
            //xPathResult == "#BBB#";
        xPathResultMatch = xPathEvaluation.getxPathResultMatch();
            System.out.println("Test #3 xPathResultMatch - " + xPathResultMatch);
            //xPathResultMatch == true;

        // Test #4
        xPathStatement="/a/root/a/text()";
        xPathEvaluation.Reset(xml, xPathStatement);
        xPathResult = xPathEvaluation.getxPathResult();
            System.out.println("Test #4 xPathResult - " + xPathResult);
            //xPathResult == "";
        xPathResultMatch = xPathEvaluation.getxPathResultMatch();
            System.out.println("Test #4 xPathResultMatch - " + xPathResultMatch);
            //xPathResultMatch == false;

        // Test #5
        xPathStatement="/root";
        xPathEvaluation.Reset(xml, xPathStatement);
        xPathResult = xPathEvaluation.getxPathResult();
            System.out.println("Test #5 xPathResult - " + xPathResult);
            //xPathResult == "<root><a version = '1.0' encoding = 'UTF-8'>#BBB#</a><a>#CCC#</a><b><a>#DDD#</a></b></root>";
        xPathResultMatch = xPathEvaluation.getxPathResultMatch();
            System.out.println("Test #5 xPathResultMatch - " + xPathResultMatch);
            //xPathResultMatch == true;         
    }
    static class SaxonXPath{
        private String xml;
        private String xPathStatement;
        private String xPathResult;
        private Boolean xPathResultMatch;
        private XPathFactory xPathFactory;
        private XPath xPath;
        public SaxonXPath(String xml, String xPathStatement){
            System.setProperty("javax.xml.xpath.XPathFactory:" + NamespaceConstant.OBJECT_MODEL_SAXON, "net.sf.saxon.xpath.XPathFactoryImpl");
            try {
                this.xPathFactory = XPathFactory.newInstance(NamespaceConstant.OBJECT_MODEL_SAXON);
            } catch (XPathFactoryConfigurationException e) {
                e.printStackTrace();
            }
            this.xPath = this.xPathFactory.newXPath();
            this.Reset(xml, xPathStatement);
        }
        public void Reset(String xml, String xPathStatement){
            this.xml = xml;
            this.xPathStatement = xPathStatement;
            this.xPathResult = "";
            this.xPathResultMatch = null;
            try{                
                InputSource inputSource = new InputSource(new StringReader(this.xml));
                SAXSource saxSource = new SAXSource(inputSource);
                Configuration config = ((XPathFactoryImpl) this.xPathFactory).getConfiguration();
                TreeInfo document = config.buildDocumentTree(saxSource);
                XPathExpression xPathExpression = this.xPath.compile(this.xPathStatement);
                List<NodeInfo> matches = (List<NodeInfo>) xPathExpression.evaluate(document, XPathConstants.NODESET);
                if (matches != null && matches.size()>0) {
                    this.xPathResultMatch = true;   
                    for (Iterator<NodeInfo> iter = matches.iterator(); iter.hasNext();) {
                        NodeInfo node = (NodeInfo) iter.next();

                        xPathResult += net.sf.saxon.query.QueryResult.serialize(node);
                    }
                } else {
                    this.xPathResultMatch = false;
                }
            } catch(Exception e){
                e.printStackTrace();
            }           
        }
        public String getxPathResult(){
            return this.xPathResult;
        }
        public Boolean getxPathResultMatch(){
            return this.xPathResultMatch;
        }
    }
}