Apache DTM 到明文以进行 css 查询
Apaches DTM to Plaintext for css-querying
我正在编写一个 XPath 函数表达式来通过 CSS 查询查找元素。
这是我的:
import javax.xml.transform.TransformerException;
import org.apache.xml.dtm.ref.dom2dtm.DOM2DTM;
import org.apache.xpath.XPathContext;
import org.apache.xpath.functions.FunctionOneArg;
import org.apache.xpath.objects.XObject;
import org.apache.xpath.objects.XString;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class CSSFinder extends FunctionOneArg {
@Override
public XObject execute(XPathContext c) throws TransformerException {
final DOM2DTM dtm = (DOM2DTM) c.getDTM(c.getContextNode());
final Document parse = Jsoup.parse(dtm.toString()); // toString returns not html.
final Elements parents = parse.select(getArg0().toString()).first().parents();
String xpath = "/";
for (final Element parent : parents) {
xpath += "/*[" + parent.elementSiblingIndex() + "]";
}
// replace this later...
return new XString("sc-login");
}
}
问题是 toString
没有 return html。如何得到完整解析的html?
找到解决方案:
import java.io.IOException;
import java.util.Collections;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.xml.dtm.ref.dom2dtm.DOM2DTM;
import org.apache.xpath.XPathContext;
import org.apache.xpath.functions.FunctionOneArg;
import org.apache.xpath.objects.XNodeSetForDOM;
import org.apache.xpath.objects.XObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.w3c.dom.Node;
import com.gargoylesoftware.htmlunit.html.xpath.XPathUtils;
public class CSSFinder extends FunctionOneArg {
@Override
public XObject execute(XPathContext c) throws TransformerException {
final DOM2DTM currentNode = (DOM2DTM) c.getDTM(c.getContextNode());
final Node root = currentNode.getNode(currentNode.getDocumentRoot(c.getContextNode()));
final Transformer transformer = TransformerFactory.newInstance().newTransformer();
final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
transformer.transform(new DOMSource(root), new StreamResult(outputStream));
try {
outputStream.close();
} catch (final IOException e) {
e.printStackTrace();
}
final Document parse = Jsoup.parse(outputStream.toString());
final Element current = parse.select(getArg0().toString()).first();
final Elements parents = current.parents();
Collections.reverse(parents);
String xpath = "";
for (final Element parent : parents) {
final int index = parent.elementSiblingIndex() + 1;
xpath += "/*[" + index + "]";
}
xpath += "/*[" + (current.elementSiblingIndex()+1) + "]";
return new XNodeSetForDOM((Node) XPathUtils.getByXPath(root, xpath).get(0), c.getDTMManager());
}
}
我必须注册函数
XPathHelper.registerGlobalFunction("http://webtest.canoo.com", "css", CSSFinder.class);
现在我们进入 canoo 网络测试:
<clickElement xpath="wt:css('button.sc-login')" />
我正在编写一个 XPath 函数表达式来通过 CSS 查询查找元素。
这是我的:
import javax.xml.transform.TransformerException;
import org.apache.xml.dtm.ref.dom2dtm.DOM2DTM;
import org.apache.xpath.XPathContext;
import org.apache.xpath.functions.FunctionOneArg;
import org.apache.xpath.objects.XObject;
import org.apache.xpath.objects.XString;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class CSSFinder extends FunctionOneArg {
@Override
public XObject execute(XPathContext c) throws TransformerException {
final DOM2DTM dtm = (DOM2DTM) c.getDTM(c.getContextNode());
final Document parse = Jsoup.parse(dtm.toString()); // toString returns not html.
final Elements parents = parse.select(getArg0().toString()).first().parents();
String xpath = "/";
for (final Element parent : parents) {
xpath += "/*[" + parent.elementSiblingIndex() + "]";
}
// replace this later...
return new XString("sc-login");
}
}
问题是 toString
没有 return html。如何得到完整解析的html?
找到解决方案:
import java.io.IOException;
import java.util.Collections;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.xml.dtm.ref.dom2dtm.DOM2DTM;
import org.apache.xpath.XPathContext;
import org.apache.xpath.functions.FunctionOneArg;
import org.apache.xpath.objects.XNodeSetForDOM;
import org.apache.xpath.objects.XObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.w3c.dom.Node;
import com.gargoylesoftware.htmlunit.html.xpath.XPathUtils;
public class CSSFinder extends FunctionOneArg {
@Override
public XObject execute(XPathContext c) throws TransformerException {
final DOM2DTM currentNode = (DOM2DTM) c.getDTM(c.getContextNode());
final Node root = currentNode.getNode(currentNode.getDocumentRoot(c.getContextNode()));
final Transformer transformer = TransformerFactory.newInstance().newTransformer();
final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
transformer.transform(new DOMSource(root), new StreamResult(outputStream));
try {
outputStream.close();
} catch (final IOException e) {
e.printStackTrace();
}
final Document parse = Jsoup.parse(outputStream.toString());
final Element current = parse.select(getArg0().toString()).first();
final Elements parents = current.parents();
Collections.reverse(parents);
String xpath = "";
for (final Element parent : parents) {
final int index = parent.elementSiblingIndex() + 1;
xpath += "/*[" + index + "]";
}
xpath += "/*[" + (current.elementSiblingIndex()+1) + "]";
return new XNodeSetForDOM((Node) XPathUtils.getByXPath(root, xpath).get(0), c.getDTMManager());
}
}
我必须注册函数
XPathHelper.registerGlobalFunction("http://webtest.canoo.com", "css", CSSFinder.class);
现在我们进入 canoo 网络测试:
<clickElement xpath="wt:css('button.sc-login')" />