使用dom4j定位有行号的节点
Use dom4j to locate the node with line number
我用dom4j解析了一个xml文件,得到了一个带行号的文档,我想用行号定位所有带行号的节点并操作这些节点。
有没有办法使用 dom4j 或其他 DOM API 来实现这个?
您的检查器可以在您的节点中设置一个属性,以标记它以进行修复。
这将是读取文件行的替代方法。之后只需查找具有此特定属性的节点并对其进行重构。
<?xml version="1.0" encoding="UTF-8"?>
<tests>
<test>okay</test>
<test>good</test>
<test>error</test>
</tests>
现在解析xml文件并标记要修复的最后一个测试节点
public void parse(){
SAXReader reader = new SAXReader();
DocumentFactory documentFactory = DocumentFactory.getInstance();
Document document = reader.read("test.xml");
Element root = document.getRootElement();
for (Iterator<Element> iterator = root.elementIterator();iterator.hasNext();){
Element element = iterator.next();
//check for your error and set fix flag, if not already happend
if(element.getText().equals("error") &&
element.attributeValue("todo") == null) {
Attribute fix = documentFactory.createAttribute(element, "todo" ,"fix");
element.add(fix);
}
}
// update xml file
XMLWriter xmlWriter = new XMLWriter(new FileWriter("test.xml"));
xmlWriter.write(document);
xmlWriter.close();
}
输出xml
enter <?xml version="1.0" encoding="UTF-8"?>
<tests>
<test>okay</test>
<test>good</test>
<test todo="fix">error</test>
</tests>
可以扩展 DOM4J 以包含元素的行号,但这有点令人费解,而且不是 100% 准确(您可以获取开始元素的“>”字符的行号)。
可能更可靠的方法是报告每个有问题的元素的 XPath 表达式,然后使用这些表达式来修复它。
但是,为了好玩,这里有一个完整的示例,说明如何在 DOM4J 中包含行号信息:
public class LineNumber {
public static void main(String[] args) throws Exception {
SAXReader reader = new MySAXReader();
reader.setDocumentFactory(new LocatorAwareDocumentFactory());
Document doc = reader
.read(new StringReader("<root foo='bar' > \n<alfa/>\n<beta/>\n<gamma/>\n</root>\n"));
doc.accept(new VisitorSupport() {
@Override
public void visit(Element node) {
System.out.printf("%d: %s\n",
((LocationAwareElement) node).getLineNumber(),
node.getName());
}
});
}
static class MySAXReader extends SAXReader {
@Override
protected SAXContentHandler createContentHandler(XMLReader reader) {
return new MySAXContentHandler(getDocumentFactory(),
getDispatchHandler());
}
@Override
public void setDocumentFactory(DocumentFactory documentFactory) {
super.setDocumentFactory(documentFactory);
}
}
static class MySAXContentHandler extends SAXContentHandler {
private Locator locator;
// this is already in SAXContentHandler, but private
private DocumentFactory documentFactory;
public MySAXContentHandler(DocumentFactory documentFactory,
ElementHandler elementHandler) {
super(documentFactory, elementHandler);
this.documentFactory = documentFactory;
}
@Override
public void setDocumentLocator(Locator documentLocator) {
super.setDocumentLocator(documentLocator);
this.locator = documentLocator;
if (documentFactory instanceof LocatorAwareDocumentFactory) {
((LocatorAwareDocumentFactory)documentFactory).setLocator(documentLocator);
}
}
public Locator getLocator() {
return locator;
}
}
static class LocatorAwareDocumentFactory extends DocumentFactory {
private Locator locator;
public LocatorAwareDocumentFactory() {
super();
}
public void setLocator(Locator locator) {
this.locator = locator;
}
@Override
public Element createElement(QName qname) {
LocationAwareElement element = new LocationAwareElement(qname);
if (locator != null)
element.setLineNumber(locator.getLineNumber());
return element;
}
}
/**
* An Element that is aware of it location (line number in) in the source document
*/
static class LocationAwareElement extends DefaultElement {
private int lineNumber = -1;
public LocationAwareElement(QName qname) {
super(qname);
}
public LocationAwareElement(QName qname, int attributeCount) {
super(qname, attributeCount);
}
public LocationAwareElement(String name, Namespace namespace) {
super(name, namespace);
}
public LocationAwareElement(String name) {
super(name);
}
public int getLineNumber() {
return lineNumber;
}
public void setLineNumber(int lineNumber) {
this.lineNumber = lineNumber;
}
}
}
我用dom4j解析了一个xml文件,得到了一个带行号的文档,我想用行号定位所有带行号的节点并操作这些节点。
有没有办法使用 dom4j 或其他 DOM API 来实现这个?
您的检查器可以在您的节点中设置一个属性,以标记它以进行修复。 这将是读取文件行的替代方法。之后只需查找具有此特定属性的节点并对其进行重构。
<?xml version="1.0" encoding="UTF-8"?>
<tests>
<test>okay</test>
<test>good</test>
<test>error</test>
</tests>
现在解析xml文件并标记要修复的最后一个测试节点
public void parse(){
SAXReader reader = new SAXReader();
DocumentFactory documentFactory = DocumentFactory.getInstance();
Document document = reader.read("test.xml");
Element root = document.getRootElement();
for (Iterator<Element> iterator = root.elementIterator();iterator.hasNext();){
Element element = iterator.next();
//check for your error and set fix flag, if not already happend
if(element.getText().equals("error") &&
element.attributeValue("todo") == null) {
Attribute fix = documentFactory.createAttribute(element, "todo" ,"fix");
element.add(fix);
}
}
// update xml file
XMLWriter xmlWriter = new XMLWriter(new FileWriter("test.xml"));
xmlWriter.write(document);
xmlWriter.close();
}
输出xml
enter <?xml version="1.0" encoding="UTF-8"?>
<tests>
<test>okay</test>
<test>good</test>
<test todo="fix">error</test>
</tests>
可以扩展 DOM4J 以包含元素的行号,但这有点令人费解,而且不是 100% 准确(您可以获取开始元素的“>”字符的行号)。
可能更可靠的方法是报告每个有问题的元素的 XPath 表达式,然后使用这些表达式来修复它。
但是,为了好玩,这里有一个完整的示例,说明如何在 DOM4J 中包含行号信息:
public class LineNumber {
public static void main(String[] args) throws Exception {
SAXReader reader = new MySAXReader();
reader.setDocumentFactory(new LocatorAwareDocumentFactory());
Document doc = reader
.read(new StringReader("<root foo='bar' > \n<alfa/>\n<beta/>\n<gamma/>\n</root>\n"));
doc.accept(new VisitorSupport() {
@Override
public void visit(Element node) {
System.out.printf("%d: %s\n",
((LocationAwareElement) node).getLineNumber(),
node.getName());
}
});
}
static class MySAXReader extends SAXReader {
@Override
protected SAXContentHandler createContentHandler(XMLReader reader) {
return new MySAXContentHandler(getDocumentFactory(),
getDispatchHandler());
}
@Override
public void setDocumentFactory(DocumentFactory documentFactory) {
super.setDocumentFactory(documentFactory);
}
}
static class MySAXContentHandler extends SAXContentHandler {
private Locator locator;
// this is already in SAXContentHandler, but private
private DocumentFactory documentFactory;
public MySAXContentHandler(DocumentFactory documentFactory,
ElementHandler elementHandler) {
super(documentFactory, elementHandler);
this.documentFactory = documentFactory;
}
@Override
public void setDocumentLocator(Locator documentLocator) {
super.setDocumentLocator(documentLocator);
this.locator = documentLocator;
if (documentFactory instanceof LocatorAwareDocumentFactory) {
((LocatorAwareDocumentFactory)documentFactory).setLocator(documentLocator);
}
}
public Locator getLocator() {
return locator;
}
}
static class LocatorAwareDocumentFactory extends DocumentFactory {
private Locator locator;
public LocatorAwareDocumentFactory() {
super();
}
public void setLocator(Locator locator) {
this.locator = locator;
}
@Override
public Element createElement(QName qname) {
LocationAwareElement element = new LocationAwareElement(qname);
if (locator != null)
element.setLineNumber(locator.getLineNumber());
return element;
}
}
/**
* An Element that is aware of it location (line number in) in the source document
*/
static class LocationAwareElement extends DefaultElement {
private int lineNumber = -1;
public LocationAwareElement(QName qname) {
super(qname);
}
public LocationAwareElement(QName qname, int attributeCount) {
super(qname, attributeCount);
}
public LocationAwareElement(String name, Namespace namespace) {
super(name, namespace);
}
public LocationAwareElement(String name) {
super(name);
}
public int getLineNumber() {
return lineNumber;
}
public void setLineNumber(int lineNumber) {
this.lineNumber = lineNumber;
}
}
}