获取 xml returns 中子节点的名称 #text c++
Getting the name of child node in xml returns #text c++
我正在尝试检索 xml 文档中子节点标记的名称。我的 xml 文档看起来像这样:
<?xml version="1.0" encoding="utf-8"?>
<Parent>
<child1>
<grandchild1>someinfo1</grandchild1>
<grandchild2>someinfo2</grandchild2>
</child1>
<child2>
<grandchild3>someinfo3</grandchild3>
<grandchild4>someinfo4</grandchild4>
</child2>
</Parent>
我需要循环查找标签名称,例如 child1 grandchild1 等
我执行以下操作的代码如下:
IXMLDOMDocument *pXMLDom = NULL;
IXMLDOMNodeList *pNodes = NULL;
IXMLDOMNode *pNode = NULL;
pXMLDom->put_async(VARIANT_FALSE);
pXMLDom->put_validateOnParse(VARIANT_TRUE);
pXMLDom->put_resolveExternals(VARIANT_FALSE);
pXMLDom->put_preserveWhiteSpace(VARIANT_TRUE);
BSTR parentNode = SysAllocString(L"//Parent/*");
pXMLDom->selectNodes(parentNode, &pNodes);
pNodes->get_length(&length);
for (int i = 0; i < length; i++)
{
pNodes->get_item(i, &pNode);
BSTR temp = NULL;
pNode->get_xml(&temp);
printf("Node (%d), <%S>:\n", i, temp); // works fine until this point
IXMLDOMNode *firstChild;
pNode->get_firstChild(&firstChild);
IXMLDOMNodeList *childNodes;
pNode->get_childNodes(&childNodes);
firstChild->get_nodeName(&temp); // Does not work
firstChild->get_baseName(&temp); // Does not work
}
请注意,为了简单起见,我只提供了代码的极简版本。如果需要任何额外的说明或代码,我将很乐意提供。任何指向正确方向的指示都会有所帮助。大部分代码都是借助msdn编写的。
发布问题后,我得到了我要找的东西!
不保留空格有效:
pXMLDom->put_preserveWhiteSpace(VARIANT_FALSE);
XML由节点组成,节点有很多种(元素、属性、文本、命名空间、处理指令、注释、文档等)。
包含文本内容的 XML 元素节点将有一个名为 #text
的子节点。这是由 XML 规范规定的。因此,在您的示例中,grandchild1
、grandchild2
、grandchild3
和 grandchild4
都有一个子节点 #text
,例如:
Document
|
|_ PI: <?xml version="1.0" encoding="utf-8"?>
|
|_ Element: "Parent"
|
|_ Element: "child1"
| |
| |_ Element: "grandchild1"
| | |
| | |_ #text "someinfo1"
| |
| |_ Element: "grandchild2"
| |
| |_ #text "someinfo2"
|
|_ Element: "child2"
|
|_ Element: "grandchild3"
| |
| |_ #text: "someinfo3"
|
|_ Element: "grandchild4"
|
|_ #text: "someinfo4"
即使是元素之间的空格,即使只是换行符,也会作为额外的文本节点存储(因为您将 preserveWhiteSpace
选项设置为 true),例如:
Document
|
|_ PI: <?xml version="1.0" encoding="utf-8"?>
|
|_ #text "\r\n"
|
|_ Element: "Parent"
|
|_ #text "\r\n "
|
|_ Element: "child1"
| |
| |_ #text "\r\n "
| |
| |_ Element: "grandchild1"
| | |
| | |_ #text "someinfo1"
| |
| |_ #text "\r\n "
| |
| |_ Element: "grandchild2"
| |
| |_ #text "someinfo2"
|
|_ #text "\r\n "
|
|_ Element: "child2"
| |
| |_ #text "\r\n "
| |
| |_ Element: "grandchild3"
| | |
| | |_ #text: "someinfo3"
| |
| |_ #text "\r\n "
| |
| |_ Element: "grandchild4"
| | |
| | |_ #text: "someinfo4"
| |
| |_ #text "\r\n "
|
|_ #text "\r\n"
XPath 搜索所有节点,但 *
通配符只匹配元素节点。但是您是手动钻取找到的元素的子元素,因此您将遇到 #text
节点。对于您正在尝试做的事情,关闭空白保留以删除不需要的空白文本节点,然后仅关注元素子节点,例如:
IXMLDOMDocument *pXMLDom = NULL;
IXMLDOMNodeList *pNodes = NULL;
IXMLDOMNode *pNode = NULL;
long length = 0;
// create pXMLDom as needed ...
pXMLDom->put_async(VARIANT_FALSE);
pXMLDom->put_validateOnParse(VARIANT_TRUE);
pXMLDom->put_resolveExternals(VARIANT_FALSE);
pXMLDom->put_preserveWhiteSpace(VARIANT_FALSE); // <--
BSTR parentNode = SysAllocString(L"//Parent/*");
HRESULT hRes = pXMLDom->selectNodes(parentNode, &pNodes);
SysFreeString(parentNode);
if (SUCCEEDED(hRes))
{
pNodes->get_length(&length);
for (int i = 0; i < length; ++i)
{
hRes = pNodes->get_item(i, &pNode);
if (SUCCEEDED(hRes))
{
BSTR name = NULL;
hRes = pNode->get_nodeName(&name);
if (SUCCEEDED(hRes))
{
printf("Node (%d), <%S>:\n", i, name);
SysFreeString(name);
}
IXMLDOMNode *pChild = NULL;
hRes = pNode->get_firstChild(&pChild);
if (hRes == S_OK)
{
do
{
DOMNodeType type;
hRes = pChild->get_nodeType(&type);
if ((SUCCEEDED(hRes) && (type == NODE_ELEMENT))
{
hRes = pNode->get_nodeName(&name);
if (SUCCEEDED(hRes))
{
printf(" %S\n", name);
SysFreeString(name);
}
}
IXMLDOMNode *pSibling = NULL;
hRes = pChild->get_nextSibling(&pSibling);
if (hRes != S_OK) break;
pChild->Release();
pChild = pSibling;
}
while (true);
pChild->Release();
}
pNode->Release();
}
}
pNodes->Release();
}
...
pXMLDom->Release();
如果你需要超过 2 级深度,你应该设置一个递归循环,例如:
void processNode(IXMLDOMNode *pNode)
{
BSTR name = NULL;
hRes = pNode->get_nodeName(&name);
if (SUCCEEDED(hRes))
{
printf("%S\n", name);
SysFreeString(name);
}
IXMLDOMNode *pChild = NULL;
hRes = pNode->get_firstChild(&pChild);
if (hRes == S_OK)
{
do
{
DOMNodeType type;
hRes = pChild->get_nodeType(&type);
if ((SUCCEEDED(hRes) && (type == NODE_ELEMENT))
processNode(pChild);
IXMLDOMNode *pSibling = NULL;
hRes = pChild->get_nextSibling(&pSibling);
if (hRes != S_OK) break;
pChild->Release();
pChild = pSibling;
}
while (true);
pChild->Release();
}
}
...
IXMLDOMDocument *pXMLDom = NULL;
IXMLDOMNodeList *pNodes = NULL;
IXMLDOMNode *pNode = NULL;
long length = 0;
// create pXMLDom as needed ...
pXMLDom->put_async(VARIANT_FALSE);
pXMLDom->put_validateOnParse(VARIANT_TRUE);
pXMLDom->put_resolveExternals(VARIANT_FALSE);
pXMLDom->put_preserveWhiteSpace(VARIANT_FALSE); // <--
BSTR parentNode = SysAllocString(L"//Parent/*");
HRESULT hRes = pXMLDom->selectNodes(parentNode, &pNodes);
SysFreeString(parentNode);
if (SUCCEEDED(hRes))
{
pNodes->get_length(&length);
for (int i = 0; i < length; ++i)
{
hRes = pNodes->get_item(i, &pNode);
if (SUCCEEDED(hRes))
{
processNode(pNode);
pNode->Release();
}
}
pNodes->Release();
}
...
pXMLDom->Release();
我正在尝试检索 xml 文档中子节点标记的名称。我的 xml 文档看起来像这样:
<?xml version="1.0" encoding="utf-8"?>
<Parent>
<child1>
<grandchild1>someinfo1</grandchild1>
<grandchild2>someinfo2</grandchild2>
</child1>
<child2>
<grandchild3>someinfo3</grandchild3>
<grandchild4>someinfo4</grandchild4>
</child2>
</Parent>
我需要循环查找标签名称,例如 child1 grandchild1 等
我执行以下操作的代码如下:
IXMLDOMDocument *pXMLDom = NULL;
IXMLDOMNodeList *pNodes = NULL;
IXMLDOMNode *pNode = NULL;
pXMLDom->put_async(VARIANT_FALSE);
pXMLDom->put_validateOnParse(VARIANT_TRUE);
pXMLDom->put_resolveExternals(VARIANT_FALSE);
pXMLDom->put_preserveWhiteSpace(VARIANT_TRUE);
BSTR parentNode = SysAllocString(L"//Parent/*");
pXMLDom->selectNodes(parentNode, &pNodes);
pNodes->get_length(&length);
for (int i = 0; i < length; i++)
{
pNodes->get_item(i, &pNode);
BSTR temp = NULL;
pNode->get_xml(&temp);
printf("Node (%d), <%S>:\n", i, temp); // works fine until this point
IXMLDOMNode *firstChild;
pNode->get_firstChild(&firstChild);
IXMLDOMNodeList *childNodes;
pNode->get_childNodes(&childNodes);
firstChild->get_nodeName(&temp); // Does not work
firstChild->get_baseName(&temp); // Does not work
}
请注意,为了简单起见,我只提供了代码的极简版本。如果需要任何额外的说明或代码,我将很乐意提供。任何指向正确方向的指示都会有所帮助。大部分代码都是借助msdn编写的。
发布问题后,我得到了我要找的东西!
不保留空格有效:
pXMLDom->put_preserveWhiteSpace(VARIANT_FALSE);
XML由节点组成,节点有很多种(元素、属性、文本、命名空间、处理指令、注释、文档等)。
包含文本内容的 XML 元素节点将有一个名为 #text
的子节点。这是由 XML 规范规定的。因此,在您的示例中,grandchild1
、grandchild2
、grandchild3
和 grandchild4
都有一个子节点 #text
,例如:
Document | |_ PI: <?xml version="1.0" encoding="utf-8"?> | |_ Element: "Parent" | |_ Element: "child1" | | | |_ Element: "grandchild1" | | | | | |_ #text "someinfo1" | | | |_ Element: "grandchild2" | | | |_ #text "someinfo2" | |_ Element: "child2" | |_ Element: "grandchild3" | | | |_ #text: "someinfo3" | |_ Element: "grandchild4" | |_ #text: "someinfo4"
即使是元素之间的空格,即使只是换行符,也会作为额外的文本节点存储(因为您将 preserveWhiteSpace
选项设置为 true),例如:
Document | |_ PI: <?xml version="1.0" encoding="utf-8"?> | |_ #text "\r\n" | |_ Element: "Parent" | |_ #text "\r\n " | |_ Element: "child1" | | | |_ #text "\r\n " | | | |_ Element: "grandchild1" | | | | | |_ #text "someinfo1" | | | |_ #text "\r\n " | | | |_ Element: "grandchild2" | | | |_ #text "someinfo2" | |_ #text "\r\n " | |_ Element: "child2" | | | |_ #text "\r\n " | | | |_ Element: "grandchild3" | | | | | |_ #text: "someinfo3" | | | |_ #text "\r\n " | | | |_ Element: "grandchild4" | | | | | |_ #text: "someinfo4" | | | |_ #text "\r\n " | |_ #text "\r\n"
XPath 搜索所有节点,但 *
通配符只匹配元素节点。但是您是手动钻取找到的元素的子元素,因此您将遇到 #text
节点。对于您正在尝试做的事情,关闭空白保留以删除不需要的空白文本节点,然后仅关注元素子节点,例如:
IXMLDOMDocument *pXMLDom = NULL;
IXMLDOMNodeList *pNodes = NULL;
IXMLDOMNode *pNode = NULL;
long length = 0;
// create pXMLDom as needed ...
pXMLDom->put_async(VARIANT_FALSE);
pXMLDom->put_validateOnParse(VARIANT_TRUE);
pXMLDom->put_resolveExternals(VARIANT_FALSE);
pXMLDom->put_preserveWhiteSpace(VARIANT_FALSE); // <--
BSTR parentNode = SysAllocString(L"//Parent/*");
HRESULT hRes = pXMLDom->selectNodes(parentNode, &pNodes);
SysFreeString(parentNode);
if (SUCCEEDED(hRes))
{
pNodes->get_length(&length);
for (int i = 0; i < length; ++i)
{
hRes = pNodes->get_item(i, &pNode);
if (SUCCEEDED(hRes))
{
BSTR name = NULL;
hRes = pNode->get_nodeName(&name);
if (SUCCEEDED(hRes))
{
printf("Node (%d), <%S>:\n", i, name);
SysFreeString(name);
}
IXMLDOMNode *pChild = NULL;
hRes = pNode->get_firstChild(&pChild);
if (hRes == S_OK)
{
do
{
DOMNodeType type;
hRes = pChild->get_nodeType(&type);
if ((SUCCEEDED(hRes) && (type == NODE_ELEMENT))
{
hRes = pNode->get_nodeName(&name);
if (SUCCEEDED(hRes))
{
printf(" %S\n", name);
SysFreeString(name);
}
}
IXMLDOMNode *pSibling = NULL;
hRes = pChild->get_nextSibling(&pSibling);
if (hRes != S_OK) break;
pChild->Release();
pChild = pSibling;
}
while (true);
pChild->Release();
}
pNode->Release();
}
}
pNodes->Release();
}
...
pXMLDom->Release();
如果你需要超过 2 级深度,你应该设置一个递归循环,例如:
void processNode(IXMLDOMNode *pNode)
{
BSTR name = NULL;
hRes = pNode->get_nodeName(&name);
if (SUCCEEDED(hRes))
{
printf("%S\n", name);
SysFreeString(name);
}
IXMLDOMNode *pChild = NULL;
hRes = pNode->get_firstChild(&pChild);
if (hRes == S_OK)
{
do
{
DOMNodeType type;
hRes = pChild->get_nodeType(&type);
if ((SUCCEEDED(hRes) && (type == NODE_ELEMENT))
processNode(pChild);
IXMLDOMNode *pSibling = NULL;
hRes = pChild->get_nextSibling(&pSibling);
if (hRes != S_OK) break;
pChild->Release();
pChild = pSibling;
}
while (true);
pChild->Release();
}
}
...
IXMLDOMDocument *pXMLDom = NULL;
IXMLDOMNodeList *pNodes = NULL;
IXMLDOMNode *pNode = NULL;
long length = 0;
// create pXMLDom as needed ...
pXMLDom->put_async(VARIANT_FALSE);
pXMLDom->put_validateOnParse(VARIANT_TRUE);
pXMLDom->put_resolveExternals(VARIANT_FALSE);
pXMLDom->put_preserveWhiteSpace(VARIANT_FALSE); // <--
BSTR parentNode = SysAllocString(L"//Parent/*");
HRESULT hRes = pXMLDom->selectNodes(parentNode, &pNodes);
SysFreeString(parentNode);
if (SUCCEEDED(hRes))
{
pNodes->get_length(&length);
for (int i = 0; i < length; ++i)
{
hRes = pNodes->get_item(i, &pNode);
if (SUCCEEDED(hRes))
{
processNode(pNode);
pNode->Release();
}
}
pNodes->Release();
}
...
pXMLDom->Release();