如何根据从 XML 中提取的数据框在 R studio 中创建等值线图
How to create choropleth map in R studio from a data frame extracted from XML
(完全是 R 的新手)我已经下载了一个 XML 文件以在 R 中使用以根据数据创建等值线图。我正在使用美国流感数据。根据我的研究,我了解到我需要使 XML 文件成为 R 读取的数据框。所以我做到了。当我查看我的数据框时,我得到了所有 XML 格式。我的问题是如何获取所需的信息并将其提取出来以创建地图?在这一点上,我什至在绘制数据时都遇到了错误。我四处寻找这些信息,但到目前为止还没有找到。
setwd("C:/Users/Steven/Downloads/Map_Final")
> library (XML)
> library(ggplot2)
> library(maps)
> library(plyr)
> library(mapproj)
> map('state')
>
> xmlfile=xmlParse("flu.xml")
>
> class(xmlfile)
[1] "XMLInternalDocument" "XMLAbstractDocument"
> ggplot(xmlfile)
Error: ggplot2 doesn't know how to deal with data of class XMLInternalDocumentXMLAbstractDocument
> xmltop = xmlRoot(xmlfile) #gives content of root
>
> class(xmltop)#"XMLInternalElementNode" "XMLInternalNode" "XMLAbstractNode"
[1] "XMLInternalElementNode" "XMLInternalNode" "XMLAbstractNode"
>
> xmlName(xmltop) #give name of node, PubmedArticleSet
[1] "timeperiod"
>
> xmlSize(xmltop) #how many children in node, 19
[1] 54
>
> xmlName(xmltop[[1]]) #name of root's children
[1] "state"
>
> xmltop[[1]]
<state>
<abbrev>ME</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
>
> xmltop[[2]]
<state>
<abbrev>NH</abbrev>
<color>Local Activity</color>
<label>Local Activity</label>
</state>
>
> ggplot(xmltop)
Error: ggplot2 doesn't know how to deal with data of class XMLInternalElementNodeXMLInternalNodeXMLAbstractNode
> xmltop[[2]]
<state>
<abbrev>NH</abbrev>
<color>Local Activity</color>
<label>Local Activity</label>
</state>
>
> xmltop[[2]]
<state>
<abbrev>NH</abbrev>
<color>Local Activity</color>
<label>Local Activity</label>
</state>
>
> birdflu=ldply(xmlToList("flu.xml"), data.frame)
> ggplot(birdflu)
Error: No layers in plot
> View(birdflu)
XML 文件:
<timeperiod number="40" year="2014" subtitle="Week Ending October 11, 2014- Week 40">
<state>
<abbrev>ME</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>NH</abbrev>
<color>Local Activity</color>
<label>Local Activity</label>
</state>
<state>
<abbrev>VT</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>MA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>RI</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>CT</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>NY</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>NJ</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>PR</abbrev>
<color>Regional</color>
<label>Regional</label>
</state>
<state>
<abbrev>VI</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>PA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>DE</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>MD</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>DC</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>VA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>WV</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>NC</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>SC</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>GA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>FL</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>KY</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>TN</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>AL</abbrev>
<color>Local Activity</color>
<label>Local Activity</label>
</state>
<state>
<abbrev>MS</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>OH</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>IN</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>IL</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>MI</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>WI</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>MN</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>AR</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>LA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>OK</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>TX</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>NM</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>IA</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>MO</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>NE</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>KS</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>ND</abbrev>
<color>Local Activity</color>
<label>Local Activity</label>
</state>
<state>
<abbrev>SD</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>MT</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>WY</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>CO</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>UT</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>AZ</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>NV</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>CA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>HI</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>GU</abbrev>
<color>Widespread</color>
<label>Widespread</label>
</state>
<state>
<abbrev>ID</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>WA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>OR</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>AK</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
</timeperiod>
下面使用内置 (ggplot) 美国地图数据的基本注释示例。如果您需要地区(似乎有更多的爆发),您需要查看其他 SO 示例来表明(其中有很多)。
library(xml2)
library(dplyr)
library(ggplot2)
# read in the XML file
flu <- read_xml("flu.xml")
# get data from it into data frame
flu_dat <- data_frame(id=flu %>% xml_find_all("//state/abbrev") %>% xml_text,
value=flu %>% xml_find_all("//state/color") %>% xml_text)
# for built-in (ggplot) map data we need names, not abbreviations
state_name <- state.name
names(state_name) <- state.abb
us <- map_data("state")
# convert abbrev to name; ensure ordered factor, filter by what the
# built-in plot has. NOTE that if you need the territories, you'll
# need to use another base map of which there are many examples on SO
flu_dat %>%
mutate(id=state_name[id],
Level=factor(value,
levels=c("No Activity", "Local Activity",
"Sporadic", "Regional", "Widespread"),
ordered=TRUE)) %>%
filter(id %in% unique(us$region))-> flu_dat
us <- fortify(us, region="region")
# for theme_map convenience function
devtools::source_gist("33baa3a79c5cfef0f6df")
gg <- ggplot()
# plot outlines
gg <- gg + geom_map(data=us, map=us,
aes(x=long, y=lat, map_id=region, group=group),
fill="#ffffff", color="#7f7f7f", size=0.25)
# plot fills based on flu data
gg <- gg + geom_map(data=flu_dat, map=us,
aes(fill=Level, map_id=id),
color="#7f7f7f", size=0.25)
# manual fill scale showing all possible values on legend
gg <- gg + scale_fill_manual(values=c("#f2f0f7", "#dadaeb", "#bcbddc",
"#9e9ac8", "#756bb1"), drop=FALSE)
# a proper US projection
gg <- gg + coord_map("albers", lat0=39, lat1=45)
gg <- gg + theme_map()
gg <- gg + theme(legend.position="right")
gg
(完全是 R 的新手)我已经下载了一个 XML 文件以在 R 中使用以根据数据创建等值线图。我正在使用美国流感数据。根据我的研究,我了解到我需要使 XML 文件成为 R 读取的数据框。所以我做到了。当我查看我的数据框时,我得到了所有 XML 格式。我的问题是如何获取所需的信息并将其提取出来以创建地图?在这一点上,我什至在绘制数据时都遇到了错误。我四处寻找这些信息,但到目前为止还没有找到。
setwd("C:/Users/Steven/Downloads/Map_Final")
> library (XML)
> library(ggplot2)
> library(maps)
> library(plyr)
> library(mapproj)
> map('state')
>
> xmlfile=xmlParse("flu.xml")
>
> class(xmlfile)
[1] "XMLInternalDocument" "XMLAbstractDocument"
> ggplot(xmlfile)
Error: ggplot2 doesn't know how to deal with data of class XMLInternalDocumentXMLAbstractDocument
> xmltop = xmlRoot(xmlfile) #gives content of root
>
> class(xmltop)#"XMLInternalElementNode" "XMLInternalNode" "XMLAbstractNode"
[1] "XMLInternalElementNode" "XMLInternalNode" "XMLAbstractNode"
>
> xmlName(xmltop) #give name of node, PubmedArticleSet
[1] "timeperiod"
>
> xmlSize(xmltop) #how many children in node, 19
[1] 54
>
> xmlName(xmltop[[1]]) #name of root's children
[1] "state"
>
> xmltop[[1]]
<state>
<abbrev>ME</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
>
> xmltop[[2]]
<state>
<abbrev>NH</abbrev>
<color>Local Activity</color>
<label>Local Activity</label>
</state>
>
> ggplot(xmltop)
Error: ggplot2 doesn't know how to deal with data of class XMLInternalElementNodeXMLInternalNodeXMLAbstractNode
> xmltop[[2]]
<state>
<abbrev>NH</abbrev>
<color>Local Activity</color>
<label>Local Activity</label>
</state>
>
> xmltop[[2]]
<state>
<abbrev>NH</abbrev>
<color>Local Activity</color>
<label>Local Activity</label>
</state>
>
> birdflu=ldply(xmlToList("flu.xml"), data.frame)
> ggplot(birdflu)
Error: No layers in plot
> View(birdflu)
XML 文件:
<timeperiod number="40" year="2014" subtitle="Week Ending October 11, 2014- Week 40">
<state>
<abbrev>ME</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>NH</abbrev>
<color>Local Activity</color>
<label>Local Activity</label>
</state>
<state>
<abbrev>VT</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>MA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>RI</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>CT</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>NY</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>NJ</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>PR</abbrev>
<color>Regional</color>
<label>Regional</label>
</state>
<state>
<abbrev>VI</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>PA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>DE</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>MD</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>DC</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>VA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>WV</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>NC</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>SC</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>GA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>FL</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>KY</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>TN</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>AL</abbrev>
<color>Local Activity</color>
<label>Local Activity</label>
</state>
<state>
<abbrev>MS</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>OH</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>IN</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>IL</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>MI</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>WI</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>MN</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>AR</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>LA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>OK</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>TX</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>NM</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>IA</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>MO</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>NE</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>KS</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>ND</abbrev>
<color>Local Activity</color>
<label>Local Activity</label>
</state>
<state>
<abbrev>SD</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>MT</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>WY</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>CO</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>UT</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>AZ</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>NV</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>CA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>HI</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>GU</abbrev>
<color>Widespread</color>
<label>Widespread</label>
</state>
<state>
<abbrev>ID</abbrev>
<color>No Activity</color>
<label>No Activity</label>
</state>
<state>
<abbrev>WA</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>OR</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
<state>
<abbrev>AK</abbrev>
<color>Sporadic</color>
<label>Sporadic</label>
</state>
</timeperiod>
下面使用内置 (ggplot) 美国地图数据的基本注释示例。如果您需要地区(似乎有更多的爆发),您需要查看其他 SO 示例来表明(其中有很多)。
library(xml2)
library(dplyr)
library(ggplot2)
# read in the XML file
flu <- read_xml("flu.xml")
# get data from it into data frame
flu_dat <- data_frame(id=flu %>% xml_find_all("//state/abbrev") %>% xml_text,
value=flu %>% xml_find_all("//state/color") %>% xml_text)
# for built-in (ggplot) map data we need names, not abbreviations
state_name <- state.name
names(state_name) <- state.abb
us <- map_data("state")
# convert abbrev to name; ensure ordered factor, filter by what the
# built-in plot has. NOTE that if you need the territories, you'll
# need to use another base map of which there are many examples on SO
flu_dat %>%
mutate(id=state_name[id],
Level=factor(value,
levels=c("No Activity", "Local Activity",
"Sporadic", "Regional", "Widespread"),
ordered=TRUE)) %>%
filter(id %in% unique(us$region))-> flu_dat
us <- fortify(us, region="region")
# for theme_map convenience function
devtools::source_gist("33baa3a79c5cfef0f6df")
gg <- ggplot()
# plot outlines
gg <- gg + geom_map(data=us, map=us,
aes(x=long, y=lat, map_id=region, group=group),
fill="#ffffff", color="#7f7f7f", size=0.25)
# plot fills based on flu data
gg <- gg + geom_map(data=flu_dat, map=us,
aes(fill=Level, map_id=id),
color="#7f7f7f", size=0.25)
# manual fill scale showing all possible values on legend
gg <- gg + scale_fill_manual(values=c("#f2f0f7", "#dadaeb", "#bcbddc",
"#9e9ac8", "#756bb1"), drop=FALSE)
# a proper US projection
gg <- gg + coord_map("albers", lat0=39, lat1=45)
gg <- gg + theme_map()
gg <- gg + theme(legend.position="right")
gg