find_xml_all return {xml_nodeset (0)}
find_xml_all return {xml_nodeset (0)}
我最近从这个 map 下载了 KML 文件,并尝试使用包 xml2
来提取露营地的信息,例如地理位置、站点周围的设施等;但最后我得到了{xml_nodeset (0)}
。
以下是我用过的代码,
library(xml2)
campsites <- read_xml("file_path")
xml_find_all(campsites, ".//Placemark")
这是KML文件的结构(你也可以试试xml_structure(campsites)
),
> library(magrittr)
> campsites
{xml_document}
<kml>
[1] <Document>\n<description><![CDATA[powered by <a href="http://www.wordpress.org">WordPress</a> & <a href="https://www.mapsmarker.com">MapsMarker.com</a>]] ...
>
> campsites %>% xml_children %>% xml_children %>% xml_children
{xml_nodeset (55)}
[1] <IconStyle>\n <Icon>\n <href>http://www.mountaineering-lohas.org/wp-content/uploads/leaflet-maps-marker-icons/tents.png</href>\n </Icon>\n</IconStyle>
[2] <IconStyle>\n <Icon>\n <href>http://www.mountaineering-lohas.org/wp-content/uploads/leaflet-maps-marker-icons/tents-1.png</href>\n </Icon>\n</IconStyle>
[3] <IconStyle>\n <Icon>\n <href>http://www.mountaineering-lohas.org/wp-content/uploads/leaflet-maps-marker-icons/tents1.png</href>\n </Icon>\n</IconStyle>
[4] <name>香港營地 Hong Kong Camp Site</name>
[5] <Placemark id="marker-1">\n<styleUrl>#tents</styleUrl>\n<name>æµæ°´éŸ¿ç‡Ÿåœ° ( Lau Shui Heung Camp Site )</name>\n<TimeStamp><when>2013-02-21T04:02:29+08: ...
[6] <Placemark id="marker-2">\n<styleUrl>#tents</styleUrl>\n<name>鶴藪營地(Hok Tau Camp Site)</name>\n<TimeStamp><when>2013-02-21T04:02:18+08:00</when></Tim ...
[7] <Placemark id="marker-3">\n<styleUrl>#tents</styleUrl>\n<name>涌背營地(Chung Pui Camp Site)</name>\n<TimeStamp><when>2013-02-22T11:02:02+08:00</when></T ...
[8] <Placemark id="marker-4">\n<styleUrl>#tents</styleUrl>\n<name>æ±å¹³æ´²ç‡Ÿåœ° (Tung Ping Chau Campsite)</name>\n<TimeStamp><when>2013-02-22T11:02:39+08:00</ ...
[9] <Placemark id="marker-5">\n<styleUrl>#tents</styleUrl>\n<name>ç£ä»”å—營地(Wan Tsai Peninsula South Campsite)</name>\n<TimeStamp><when>2013-02-22T11:02:2 ...
[10] <Placemark id="marker-6">\n<styleUrl>#tents</styleUrl>\n<name>ç£ä»”西營地 (Wan Tsai Peninsula West Campsite)</name>\n<TimeStamp><when>2013-02-22T11:02:3 ...
...
如你所见,有节点命名为"Placemark",为什么我找不到使用xml_find_all
的节点?我的代码有没有错误?
谢谢!
您似乎有几个命名空间。如果您将前缀添加到您的 xpath 中,您可以获得节点集。
xml_ns(campsites)
# d1 <-> http://www.opengis.net/kml/2.2
# atom <-> http://www.w3.org/2005/Atom
# gx <-> http://www.google.com/kml/ext/2.2
xml_find_all(campsites, ".//d1:Placemark", xml_ns(campsites))
# {xml_nodeset (45)}
# [1] <Placemark id="marker-1">\n<styleUrl>#tents</styleUrl>\n<name>流水響營地 ( La ...
# [2] <Placemark id="marker-2">\n<styleUrl>#tents</styleUrl>\n<name>鶴藪營地(Hok T ...
# ...
要获取 cdata 中的文本,您可以使用
xml_text(xml_find_all(campsites, "//d1:description", xml_ns(campsites)))
# or "//d1:description/text()"
我最近从这个 map 下载了 KML 文件,并尝试使用包 xml2
来提取露营地的信息,例如地理位置、站点周围的设施等;但最后我得到了{xml_nodeset (0)}
。
以下是我用过的代码,
library(xml2)
campsites <- read_xml("file_path")
xml_find_all(campsites, ".//Placemark")
这是KML文件的结构(你也可以试试xml_structure(campsites)
),
> library(magrittr)
> campsites
{xml_document}
<kml>
[1] <Document>\n<description><![CDATA[powered by <a href="http://www.wordpress.org">WordPress</a> & <a href="https://www.mapsmarker.com">MapsMarker.com</a>]] ...
>
> campsites %>% xml_children %>% xml_children %>% xml_children
{xml_nodeset (55)}
[1] <IconStyle>\n <Icon>\n <href>http://www.mountaineering-lohas.org/wp-content/uploads/leaflet-maps-marker-icons/tents.png</href>\n </Icon>\n</IconStyle>
[2] <IconStyle>\n <Icon>\n <href>http://www.mountaineering-lohas.org/wp-content/uploads/leaflet-maps-marker-icons/tents-1.png</href>\n </Icon>\n</IconStyle>
[3] <IconStyle>\n <Icon>\n <href>http://www.mountaineering-lohas.org/wp-content/uploads/leaflet-maps-marker-icons/tents1.png</href>\n </Icon>\n</IconStyle>
[4] <name>香港營地 Hong Kong Camp Site</name>
[5] <Placemark id="marker-1">\n<styleUrl>#tents</styleUrl>\n<name>æµæ°´éŸ¿ç‡Ÿåœ° ( Lau Shui Heung Camp Site )</name>\n<TimeStamp><when>2013-02-21T04:02:29+08: ...
[6] <Placemark id="marker-2">\n<styleUrl>#tents</styleUrl>\n<name>鶴藪營地(Hok Tau Camp Site)</name>\n<TimeStamp><when>2013-02-21T04:02:18+08:00</when></Tim ...
[7] <Placemark id="marker-3">\n<styleUrl>#tents</styleUrl>\n<name>涌背營地(Chung Pui Camp Site)</name>\n<TimeStamp><when>2013-02-22T11:02:02+08:00</when></T ...
[8] <Placemark id="marker-4">\n<styleUrl>#tents</styleUrl>\n<name>æ±å¹³æ´²ç‡Ÿåœ° (Tung Ping Chau Campsite)</name>\n<TimeStamp><when>2013-02-22T11:02:39+08:00</ ...
[9] <Placemark id="marker-5">\n<styleUrl>#tents</styleUrl>\n<name>ç£ä»”å—營地(Wan Tsai Peninsula South Campsite)</name>\n<TimeStamp><when>2013-02-22T11:02:2 ...
[10] <Placemark id="marker-6">\n<styleUrl>#tents</styleUrl>\n<name>ç£ä»”西營地 (Wan Tsai Peninsula West Campsite)</name>\n<TimeStamp><when>2013-02-22T11:02:3 ...
...
如你所见,有节点命名为"Placemark",为什么我找不到使用xml_find_all
的节点?我的代码有没有错误?
谢谢!
您似乎有几个命名空间。如果您将前缀添加到您的 xpath 中,您可以获得节点集。
xml_ns(campsites)
# d1 <-> http://www.opengis.net/kml/2.2
# atom <-> http://www.w3.org/2005/Atom
# gx <-> http://www.google.com/kml/ext/2.2
xml_find_all(campsites, ".//d1:Placemark", xml_ns(campsites))
# {xml_nodeset (45)}
# [1] <Placemark id="marker-1">\n<styleUrl>#tents</styleUrl>\n<name>流水響營地 ( La ...
# [2] <Placemark id="marker-2">\n<styleUrl>#tents</styleUrl>\n<name>鶴藪營地(Hok T ...
# ...
要获取 cdata 中的文本,您可以使用
xml_text(xml_find_all(campsites, "//d1:description", xml_ns(campsites)))
# or "//d1:description/text()"