Java : 使用 SAX/XPATH 解析 xml 文件
Java : Parsing xml file using SAX/XPATH
我有一个 xml 文件,如下所述:
<?xml version="1.0" encoding="UTF-8"?>
<Workbook>
<ExcelWorkbook
xmlns="urn:schemas-microsoft-com:office:excel"/>
<Worksheet ss:Name="Table 1">
<Table>
<Row ss:Index="7" ss:AutoFitHeight="0" ss:Height="12">
<Cell ss:Index="1" ss:StyleID="s05">
<ss:Data ss:Type="String"
xmlns="http://www.w3.org/TR/REC-html40">
<Font html:Size="9" html:Face="Times New Roman" x:Family="Roman" html:Color="#000000">
ABCD
</Font>
</ss:Data>
</Cell>
</Row>
如何在 Java 中使用 SAX 或 XPATH 提取数据,"ABCD"?
编辑 1:
这是XML-
<Table>
<Row ss:Index="74" ss:AutoFitHeight="0" ss:Height="14">
<Cell ss:Index="1" ss:MergeAcross="3" ss:StyleID="s29">
<ss:Data ss:Type="Number" xmlns="http://www.w3.org/TR/REC-html40">
0.00
</ss:Data>
</Cell>
<Cell ss:Index="15" ss:MergeAcross="5" ss:StyleID="s29">
<ss:Data ss:Type="Number" xmlns="http://www.w3.org/TR/REC-html40">
4.57
</ss:Data>
</Cell>
</Row>
解决方案假定问题是如何根据行号和列号获取任何单元格的文本。
由于在输入文档中使用了名称空间,我花了一段时间才得到解决方案。显然,如果没有名称空间处理器和一个 hsa 来实现用于此目的的接口(没有默认值?),xpath 无法解析合格的元素和属性,所以我找到了一个基于映射的实现 here 并使用了它。
因此,假设您的源代码树中有 link 中的 class,则以下代码有效。为了清楚起见,我将搜索模式分解为几个变量
public static String getCellValue(String filename, int rowIdx, int colIdx) {
// search for Table element anywhere in the source
String tableElementPattern = "//*[name()='Table']";
// search for Row element with given number
String rowPattern = String.format("/*[name()='Row' and @ss:Index='%d']", rowIdx) ;
// search for Cell element with given column number
String cellPattern = String.format("/*[name()='Cell' and @ss:Index='%d']", colIdx) ;
// search for element that has ss:Type="String" attribute, search for element with text under it and get text name
String cellStringContent = "/*[@ss:Type='String']/*[text()]/text()";
String completePattern = tableElementPattern + rowPattern + cellPattern + cellStringContent;
try (FileReader reader = new FileReader(filename)) {
XPath xPath = getXpathProcessor();
Node n = (Node)xPath.compile(completePattern)
.evaluate(new InputSource(reader), XPathConstants.NODE);
if (n.getNodeType() == Node.TEXT_NODE) {
return n.getNodeValue().trim();
}
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
private static XPath getXpathProcessor() {
// this is where the custom implementation of NamespaceContext is used
NamespaceContext context = new NamespaceContextMap(
"html", "http://www.w3.org/TR/REC-html40",
"xsl", "http://www.w3.org/1999/XSL/Transform",
"o", "urn:schemas-microsoft-com:office:office",
"x", "urn:schemas-microsoft-com:office:excel",
"ss", "urn:schemas-microsoft-com:office:spreadsheet");
XPath xpath = XPathFactory.newInstance().newXPath();
xpath.setNamespaceContext(context);
return xpath;
}
呼叫:
System.out.println(getCellValue("C://Temp/xx.xml", 7, 1));
生成所需的输出
下面是使用 vtd-xml...
查询您的 XML 的代码
import com.ximpleware.*;
public class queryXML{
public static void main(String[] s) throws VTDException{
VTDGen vg = new VTDGen();
vg.selectLcDepth(5);
if (!vg.parseFile("d:\xml\test11.xml", false))
return;
VTDNav vn = vg.getNav();
AutoPilot ap = new AutoPilot(vn);
ap.declareNameSpace("ss","urn:schemas-microsoft-com:office:spreadsheet");
ap.selectXPath("/Workbook/ExcelWorkbook/Worksheet/Table/Cell/ss:data/font/text()");
int i=0;
while((i=ap.evalXPath())!=-1){
System.out.println(" data content ==>"+vn.toString(i);
}
}
}
我有一个 xml 文件,如下所述:
<?xml version="1.0" encoding="UTF-8"?>
<Workbook>
<ExcelWorkbook
xmlns="urn:schemas-microsoft-com:office:excel"/>
<Worksheet ss:Name="Table 1">
<Table>
<Row ss:Index="7" ss:AutoFitHeight="0" ss:Height="12">
<Cell ss:Index="1" ss:StyleID="s05">
<ss:Data ss:Type="String"
xmlns="http://www.w3.org/TR/REC-html40">
<Font html:Size="9" html:Face="Times New Roman" x:Family="Roman" html:Color="#000000">
ABCD
</Font>
</ss:Data>
</Cell>
</Row>
如何在 Java 中使用 SAX 或 XPATH 提取数据,"ABCD"?
编辑 1:
这是XML-
<Table>
<Row ss:Index="74" ss:AutoFitHeight="0" ss:Height="14">
<Cell ss:Index="1" ss:MergeAcross="3" ss:StyleID="s29">
<ss:Data ss:Type="Number" xmlns="http://www.w3.org/TR/REC-html40">
0.00
</ss:Data>
</Cell>
<Cell ss:Index="15" ss:MergeAcross="5" ss:StyleID="s29">
<ss:Data ss:Type="Number" xmlns="http://www.w3.org/TR/REC-html40">
4.57
</ss:Data>
</Cell>
</Row>
解决方案假定问题是如何根据行号和列号获取任何单元格的文本。
由于在输入文档中使用了名称空间,我花了一段时间才得到解决方案。显然,如果没有名称空间处理器和一个 hsa 来实现用于此目的的接口(没有默认值?),xpath 无法解析合格的元素和属性,所以我找到了一个基于映射的实现 here 并使用了它。
因此,假设您的源代码树中有 link 中的 class,则以下代码有效。为了清楚起见,我将搜索模式分解为几个变量
public static String getCellValue(String filename, int rowIdx, int colIdx) {
// search for Table element anywhere in the source
String tableElementPattern = "//*[name()='Table']";
// search for Row element with given number
String rowPattern = String.format("/*[name()='Row' and @ss:Index='%d']", rowIdx) ;
// search for Cell element with given column number
String cellPattern = String.format("/*[name()='Cell' and @ss:Index='%d']", colIdx) ;
// search for element that has ss:Type="String" attribute, search for element with text under it and get text name
String cellStringContent = "/*[@ss:Type='String']/*[text()]/text()";
String completePattern = tableElementPattern + rowPattern + cellPattern + cellStringContent;
try (FileReader reader = new FileReader(filename)) {
XPath xPath = getXpathProcessor();
Node n = (Node)xPath.compile(completePattern)
.evaluate(new InputSource(reader), XPathConstants.NODE);
if (n.getNodeType() == Node.TEXT_NODE) {
return n.getNodeValue().trim();
}
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
private static XPath getXpathProcessor() {
// this is where the custom implementation of NamespaceContext is used
NamespaceContext context = new NamespaceContextMap(
"html", "http://www.w3.org/TR/REC-html40",
"xsl", "http://www.w3.org/1999/XSL/Transform",
"o", "urn:schemas-microsoft-com:office:office",
"x", "urn:schemas-microsoft-com:office:excel",
"ss", "urn:schemas-microsoft-com:office:spreadsheet");
XPath xpath = XPathFactory.newInstance().newXPath();
xpath.setNamespaceContext(context);
return xpath;
}
呼叫:
System.out.println(getCellValue("C://Temp/xx.xml", 7, 1));
生成所需的输出
下面是使用 vtd-xml...
查询您的 XML 的代码import com.ximpleware.*;
public class queryXML{
public static void main(String[] s) throws VTDException{
VTDGen vg = new VTDGen();
vg.selectLcDepth(5);
if (!vg.parseFile("d:\xml\test11.xml", false))
return;
VTDNav vn = vg.getNav();
AutoPilot ap = new AutoPilot(vn);
ap.declareNameSpace("ss","urn:schemas-microsoft-com:office:spreadsheet");
ap.selectXPath("/Workbook/ExcelWorkbook/Worksheet/Table/Cell/ss:data/font/text()");
int i=0;
while((i=ap.evalXPath())!=-1){
System.out.println(" data content ==>"+vn.toString(i);
}
}
}