Apache DTM 到明文以进行 css 查询

Apaches DTM to Plaintext for css-querying

我正在编写一个 XPath 函数表达式来通过 CSS 查询查找元素。

这是我的:

import javax.xml.transform.TransformerException;    
import org.apache.xml.dtm.ref.dom2dtm.DOM2DTM;
import org.apache.xpath.XPathContext;
import org.apache.xpath.functions.FunctionOneArg;
import org.apache.xpath.objects.XObject;
import org.apache.xpath.objects.XString;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class CSSFinder extends FunctionOneArg {
    @Override
    public XObject execute(XPathContext c) throws TransformerException {
        final DOM2DTM dtm = (DOM2DTM) c.getDTM(c.getContextNode());
        final Document parse = Jsoup.parse(dtm.toString()); // toString returns not html.
        final Elements parents = parse.select(getArg0().toString()).first().parents();
        String xpath = "/";
        for (final Element parent : parents) {
            xpath += "/*[" + parent.elementSiblingIndex() + "]";
        }
        // replace this later...
        return new XString("sc-login");
    }
}

问题是 toString 没有 return html。如何得到完整解析的html?

找到解决方案:

import java.io.IOException;
import java.util.Collections;

import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.xml.dtm.ref.dom2dtm.DOM2DTM;
import org.apache.xpath.XPathContext;
import org.apache.xpath.functions.FunctionOneArg;
import org.apache.xpath.objects.XNodeSetForDOM;
import org.apache.xpath.objects.XObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.w3c.dom.Node;

import com.gargoylesoftware.htmlunit.html.xpath.XPathUtils;

public class CSSFinder extends FunctionOneArg {
    @Override
    public XObject execute(XPathContext c) throws TransformerException {
        final DOM2DTM currentNode = (DOM2DTM) c.getDTM(c.getContextNode());
        final Node root = currentNode.getNode(currentNode.getDocumentRoot(c.getContextNode()));
        final Transformer transformer = TransformerFactory.newInstance().newTransformer();
        final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
        transformer.transform(new DOMSource(root), new StreamResult(outputStream));
        try {
            outputStream.close();
        } catch (final IOException e) {
            e.printStackTrace();
        }
        final Document parse = Jsoup.parse(outputStream.toString());
        final Element current = parse.select(getArg0().toString()).first();
        final Elements parents = current.parents();
        Collections.reverse(parents);
        String xpath = "";
        for (final Element parent : parents) {
            final int index = parent.elementSiblingIndex() + 1;
            xpath += "/*[" + index + "]";
        }
        xpath += "/*[" + (current.elementSiblingIndex()+1) + "]";
        return new XNodeSetForDOM((Node) XPathUtils.getByXPath(root, xpath).get(0), c.getDTMManager());
    }
}

我必须注册函数

XPathHelper.registerGlobalFunction("http://webtest.canoo.com", "css", CSSFinder.class);

现在我们进入 canoo 网络测试:

<clickElement xpath="wt:css('button.sc-login')" />