R & xml2:将 xml 文档值解析为向量或 data.frame
R & xml2: parsing an xml document values to vector or data.frame
我正在尝试解析下面 xml 中的变量名称、索引和值。对变量进行子集化是可行的,但是从每个变量中获取实际值有点困难。有人能给我指出正确的方向吗?
require(xml2)
xml_file <- '<?xml version = "1.0" encoding="UTF-8" standalone="yes"?>
<CPLEXSolution version="1.2">
<header
problemName="Oil-blending.lp"
objectiveValue="287750"
solutionTypeValue="1"
solutionTypeString="basic"
solutionStatusValue="1"
solutionStatusString="optimal"
solutionMethodString="dual"
primalFeasible="1"
dualFeasible="1"
simplexIterations="14"
writeLevel="1"/>
<quality
epRHS="1e-06"
epOpt="1e-06"
maxPrimalInfeas="0"
maxDualInfeas="0"
maxPrimalResidual="9.66338120633736e-13"
maxDualResidual="7.105427357601e-15"
maxX="7500"
maxPi="57.25"
maxSlack="4000"
maxRedCost="40.9"
kappa="83.7880434782609"/>
<linearConstraints>
<constraint name="ct_demand({"Super"})" index="0" status="LL" slack="0" dual="-20.8"/>
<constraint name="ct_demand({"Regular"})" index="1" status="LL" slack="0" dual="0.1"/>
<constraint name="ct_demand({"Diesel"})" index="2" status="LL" slack="0" dual="-40.8"/>
<constraint name="ct_capacity({"Crude1"})" index="3" status="LL" slack="0" dual="57.25"/>
<constraint name="ct_capacity({"Crude2"})" index="4" status="LL" slack="0" dual="20.9"/>
<constraint name="ct_capacity({"Crude3"})" index="5" status="BS" slack="1500" dual="0"/>
<constraint name="ct_total_max_prod" index="6" status="BS" slack="499.999999999997" dual="0"/>
<constraint name="ct_octane_min({"Super"})" index="7" status="BS" slack="-2000" dual="-0"/>
<constraint name="ct_octane_min({"Regular"})" index="8" status="LL" slack="0" dual="-1.77635683940025e-15"/>
<constraint name="ct_octane_min({"Diesel"})" index="9" status="BS" slack="-4000" dual="-0"/>
<constraint name="ct_lead_max({"Super"})" index="10" status="LL" slack="0" dual="30.9"/>
<constraint name="ct_lead_max({"Regular"})" index="11" status="LL" slack="0" dual="30.9"/>
<constraint name="ct_lead_max({"Diesel"})" index="12" status="LL" slack="0" dual="30.9"/>
</linearConstraints>
<variables>
<variable name="Blend({"Crude1"})({"Super"})" index="0" status="BS" value="2222.22222222222" reducedCost="-0"/>
<variable name="Blend({"Crude2"})({"Super"})" index="1" status="BS" value="444.444444444444" reducedCost="-0"/>
<variable name="Blend({"Crude3"})({"Super"})" index="2" status="BS" value="333.333333333333" reducedCost="-0"/>
<variable name="Blend({"Crude1"})({"Regular"})" index="3" status="BS" value="2111.11111111111" reducedCost="-0"/>
<variable name="Blend({"Crude2"})({"Regular"})" index="4" status="BS" value="4222.22222222222" reducedCost="-0"/>
<variable name="Blend({"Crude3"})({"Regular"})" index="5" status="BS" value="3166.66666666667" reducedCost="-0"/>
<variable name="Blend({"Crude1"})({"Diesel"})" index="6" status="BS" value="666.666666666667" reducedCost="-0"/>
<variable name="Blend({"Crude2"})({"Diesel"})" index="7" status="BS" value="333.333333333333" reducedCost="-0"/>
<variable name="Blend({"Crude3"})({"Diesel"})" index="8" status="LL" value="0" reducedCost="-7.105427357601e-15"/>
<variable name="Inventory({"Super"})" index="9" status="LL" value="0" reducedCost="-20.9"/>
<variable name="Inventory({"Regular"})" index="10" status="BS" value="7500" reducedCost="-0"/>
<variable name="Inventory({"Diesel"})" index="11" status="LL" value="0" reducedCost="-40.9"/>
<variable name="x13" index="12" status="UL" value="0" reducedCost="1"/>
</variables>
</CPLEXSolution>'
x <- read_xml(xml_file)
vars <- xml_find_all(x, "//variables")
使用 stringr
库
xml_file <- '<?xml version = "1.0" encoding="UTF-8" standalone="yes"?>
<CPLEXSolution version="1.2">
<header
problemName="Oil-blending.lp"
objectiveValue="287750"
solutionTypeValue="1"
solutionTypeString="basic"
solutionStatusValue="1"
solutionStatusString="optimal"
solutionMethodString="dual"
primalFeasible="1"
dualFeasible="1"
simplexIterations="14"
writeLevel="1"/>
<quality
epRHS="1e-06"
epOpt="1e-06"
maxPrimalInfeas="0"
maxDualInfeas="0"
maxPrimalResidual="9.66338120633736e-13"
maxDualResidual="7.105427357601e-15"
maxX="7500"
maxPi="57.25"
maxSlack="4000"
maxRedCost="40.9"
kappa="83.7880434782609"/>
<linearConstraints>
<constraint name="ct_demand({"Super"})" index="0" status="LL" slack="0" dual="-20.8"/>
<constraint name="ct_demand({"Regular"})" index="1" status="LL" slack="0" dual="0.1"/>
<constraint name="ct_demand({"Diesel"})" index="2" status="LL" slack="0" dual="-40.8"/>
<constraint name="ct_capacity({"Crude1"})" index="3" status="LL" slack="0" dual="57.25"/>
<constraint name="ct_capacity({"Crude2"})" index="4" status="LL" slack="0" dual="20.9"/>
<constraint name="ct_capacity({"Crude3"})" index="5" status="BS" slack="1500" dual="0"/>
<constraint name="ct_total_max_prod" index="6" status="BS" slack="499.999999999997" dual="0"/>
<constraint name="ct_octane_min({"Super"})" index="7" status="BS" slack="-2000" dual="-0"/>
<constraint name="ct_octane_min({"Regular"})" index="8" status="LL" slack="0" dual="-1.77635683940025e-15"/>
<constraint name="ct_octane_min({"Diesel"})" index="9" status="BS" slack="-4000" dual="-0"/>
<constraint name="ct_lead_max({"Super"})" index="10" status="LL" slack="0" dual="30.9"/>
<constraint name="ct_lead_max({"Regular"})" index="11" status="LL" slack="0" dual="30.9"/>
<constraint name="ct_lead_max({"Diesel"})" index="12" status="LL" slack="0" dual="30.9"/>
</linearConstraints>
<variables>
<variable name="Blend({"Crude1"})({"Super"})" index="0" status="BS" value="2222.22222222222" reducedCost="-0"/>
<variable name="Blend({"Crude2"})({"Super"})" index="1" status="BS" value="444.444444444444" reducedCost="-0"/>
<variable name="Blend({"Crude3"})({"Super"})" index="2" status="BS" value="333.333333333333" reducedCost="-0"/>
<variable name="Blend({"Crude1"})({"Regular"})" index="3" status="BS" value="2111.11111111111" reducedCost="-0"/>
<variable name="Blend({"Crude2"})({"Regular"})" index="4" status="BS" value="4222.22222222222" reducedCost="-0"/>
<variable name="Blend({"Crude3"})({"Regular"})" index="5" status="BS" value="3166.66666666667" reducedCost="-0"/>
<variable name="Blend({"Crude1"})({"Diesel"})" index="6" status="BS" value="666.666666666667" reducedCost="-0"/>
<variable name="Blend({"Crude2"})({"Diesel"})" index="7" status="BS" value="333.333333333333" reducedCost="-0"/>
<variable name="Blend({"Crude3"})({"Diesel"})" index="8" status="LL" value="0" reducedCost="-7.105427357601e-15"/>
<variable name="Inventory({"Super"})" index="9" status="LL" value="0" reducedCost="-20.9"/>
<variable name="Inventory({"Regular"})" index="10" status="BS" value="7500" reducedCost="-0"/>
<variable name="Inventory({"Diesel"})" index="11" status="LL" value="0" reducedCost="-40.9"/>
<variable name="x13" index="12" status="UL" value="0" reducedCost="1"/>
</variables>
</CPLEXSolution>'
library(stringr)
as.numeric(str_extract_all(xml_file,"(?<=value=\")[0-9]+.*[0-9]*(?=\" reducedCost)")[[1]])
[1] 2222.2222 444.4444 333.3333 2111.1111 4222.2222 3166.6667 666.6667 333.3333 0.0000 0.0000
[11] 7500.0000 0.0000 0.0000
xml2
包是解决这类问题的好选择。上面的起始代码很接近,您只需要解析出 "variable" 个子节点并从感兴趣的属性中提取文本。
library(xml2)
x <- read_xml(xml_file)
#Read parent node variables
vars <- xml_find_all(x, "//variables")
#parse the children nodes "variable"
variable<-xml_find_all(vars, "//variable")
#obtain the text from the "index" & "value" attributes and convert to numeric.
vnames<-xml_attr(variable, "name")
index<-as.integer((xml_attr(variable, "index")))
values<-as.numeric(xml_attr(variable, "value"))
data.frame(index, values)
示例输出:
data.frame(index, values)
index values
1 0 2222.2222
2 1 444.4444
3 2 333.3333
4 3 2111.1111
5 4 4222.2222
6 5 3166.6667
7 6 666.6667
8 7 333.3333
9 8 0.0000
10 9 0.0000
11 10 7500.0000
12 11 0.0000
13 12 0.0000
我正在尝试解析下面 xml 中的变量名称、索引和值。对变量进行子集化是可行的,但是从每个变量中获取实际值有点困难。有人能给我指出正确的方向吗?
require(xml2)
xml_file <- '<?xml version = "1.0" encoding="UTF-8" standalone="yes"?>
<CPLEXSolution version="1.2">
<header
problemName="Oil-blending.lp"
objectiveValue="287750"
solutionTypeValue="1"
solutionTypeString="basic"
solutionStatusValue="1"
solutionStatusString="optimal"
solutionMethodString="dual"
primalFeasible="1"
dualFeasible="1"
simplexIterations="14"
writeLevel="1"/>
<quality
epRHS="1e-06"
epOpt="1e-06"
maxPrimalInfeas="0"
maxDualInfeas="0"
maxPrimalResidual="9.66338120633736e-13"
maxDualResidual="7.105427357601e-15"
maxX="7500"
maxPi="57.25"
maxSlack="4000"
maxRedCost="40.9"
kappa="83.7880434782609"/>
<linearConstraints>
<constraint name="ct_demand({"Super"})" index="0" status="LL" slack="0" dual="-20.8"/>
<constraint name="ct_demand({"Regular"})" index="1" status="LL" slack="0" dual="0.1"/>
<constraint name="ct_demand({"Diesel"})" index="2" status="LL" slack="0" dual="-40.8"/>
<constraint name="ct_capacity({"Crude1"})" index="3" status="LL" slack="0" dual="57.25"/>
<constraint name="ct_capacity({"Crude2"})" index="4" status="LL" slack="0" dual="20.9"/>
<constraint name="ct_capacity({"Crude3"})" index="5" status="BS" slack="1500" dual="0"/>
<constraint name="ct_total_max_prod" index="6" status="BS" slack="499.999999999997" dual="0"/>
<constraint name="ct_octane_min({"Super"})" index="7" status="BS" slack="-2000" dual="-0"/>
<constraint name="ct_octane_min({"Regular"})" index="8" status="LL" slack="0" dual="-1.77635683940025e-15"/>
<constraint name="ct_octane_min({"Diesel"})" index="9" status="BS" slack="-4000" dual="-0"/>
<constraint name="ct_lead_max({"Super"})" index="10" status="LL" slack="0" dual="30.9"/>
<constraint name="ct_lead_max({"Regular"})" index="11" status="LL" slack="0" dual="30.9"/>
<constraint name="ct_lead_max({"Diesel"})" index="12" status="LL" slack="0" dual="30.9"/>
</linearConstraints>
<variables>
<variable name="Blend({"Crude1"})({"Super"})" index="0" status="BS" value="2222.22222222222" reducedCost="-0"/>
<variable name="Blend({"Crude2"})({"Super"})" index="1" status="BS" value="444.444444444444" reducedCost="-0"/>
<variable name="Blend({"Crude3"})({"Super"})" index="2" status="BS" value="333.333333333333" reducedCost="-0"/>
<variable name="Blend({"Crude1"})({"Regular"})" index="3" status="BS" value="2111.11111111111" reducedCost="-0"/>
<variable name="Blend({"Crude2"})({"Regular"})" index="4" status="BS" value="4222.22222222222" reducedCost="-0"/>
<variable name="Blend({"Crude3"})({"Regular"})" index="5" status="BS" value="3166.66666666667" reducedCost="-0"/>
<variable name="Blend({"Crude1"})({"Diesel"})" index="6" status="BS" value="666.666666666667" reducedCost="-0"/>
<variable name="Blend({"Crude2"})({"Diesel"})" index="7" status="BS" value="333.333333333333" reducedCost="-0"/>
<variable name="Blend({"Crude3"})({"Diesel"})" index="8" status="LL" value="0" reducedCost="-7.105427357601e-15"/>
<variable name="Inventory({"Super"})" index="9" status="LL" value="0" reducedCost="-20.9"/>
<variable name="Inventory({"Regular"})" index="10" status="BS" value="7500" reducedCost="-0"/>
<variable name="Inventory({"Diesel"})" index="11" status="LL" value="0" reducedCost="-40.9"/>
<variable name="x13" index="12" status="UL" value="0" reducedCost="1"/>
</variables>
</CPLEXSolution>'
x <- read_xml(xml_file)
vars <- xml_find_all(x, "//variables")
使用 stringr
库
xml_file <- '<?xml version = "1.0" encoding="UTF-8" standalone="yes"?>
<CPLEXSolution version="1.2">
<header
problemName="Oil-blending.lp"
objectiveValue="287750"
solutionTypeValue="1"
solutionTypeString="basic"
solutionStatusValue="1"
solutionStatusString="optimal"
solutionMethodString="dual"
primalFeasible="1"
dualFeasible="1"
simplexIterations="14"
writeLevel="1"/>
<quality
epRHS="1e-06"
epOpt="1e-06"
maxPrimalInfeas="0"
maxDualInfeas="0"
maxPrimalResidual="9.66338120633736e-13"
maxDualResidual="7.105427357601e-15"
maxX="7500"
maxPi="57.25"
maxSlack="4000"
maxRedCost="40.9"
kappa="83.7880434782609"/>
<linearConstraints>
<constraint name="ct_demand({"Super"})" index="0" status="LL" slack="0" dual="-20.8"/>
<constraint name="ct_demand({"Regular"})" index="1" status="LL" slack="0" dual="0.1"/>
<constraint name="ct_demand({"Diesel"})" index="2" status="LL" slack="0" dual="-40.8"/>
<constraint name="ct_capacity({"Crude1"})" index="3" status="LL" slack="0" dual="57.25"/>
<constraint name="ct_capacity({"Crude2"})" index="4" status="LL" slack="0" dual="20.9"/>
<constraint name="ct_capacity({"Crude3"})" index="5" status="BS" slack="1500" dual="0"/>
<constraint name="ct_total_max_prod" index="6" status="BS" slack="499.999999999997" dual="0"/>
<constraint name="ct_octane_min({"Super"})" index="7" status="BS" slack="-2000" dual="-0"/>
<constraint name="ct_octane_min({"Regular"})" index="8" status="LL" slack="0" dual="-1.77635683940025e-15"/>
<constraint name="ct_octane_min({"Diesel"})" index="9" status="BS" slack="-4000" dual="-0"/>
<constraint name="ct_lead_max({"Super"})" index="10" status="LL" slack="0" dual="30.9"/>
<constraint name="ct_lead_max({"Regular"})" index="11" status="LL" slack="0" dual="30.9"/>
<constraint name="ct_lead_max({"Diesel"})" index="12" status="LL" slack="0" dual="30.9"/>
</linearConstraints>
<variables>
<variable name="Blend({"Crude1"})({"Super"})" index="0" status="BS" value="2222.22222222222" reducedCost="-0"/>
<variable name="Blend({"Crude2"})({"Super"})" index="1" status="BS" value="444.444444444444" reducedCost="-0"/>
<variable name="Blend({"Crude3"})({"Super"})" index="2" status="BS" value="333.333333333333" reducedCost="-0"/>
<variable name="Blend({"Crude1"})({"Regular"})" index="3" status="BS" value="2111.11111111111" reducedCost="-0"/>
<variable name="Blend({"Crude2"})({"Regular"})" index="4" status="BS" value="4222.22222222222" reducedCost="-0"/>
<variable name="Blend({"Crude3"})({"Regular"})" index="5" status="BS" value="3166.66666666667" reducedCost="-0"/>
<variable name="Blend({"Crude1"})({"Diesel"})" index="6" status="BS" value="666.666666666667" reducedCost="-0"/>
<variable name="Blend({"Crude2"})({"Diesel"})" index="7" status="BS" value="333.333333333333" reducedCost="-0"/>
<variable name="Blend({"Crude3"})({"Diesel"})" index="8" status="LL" value="0" reducedCost="-7.105427357601e-15"/>
<variable name="Inventory({"Super"})" index="9" status="LL" value="0" reducedCost="-20.9"/>
<variable name="Inventory({"Regular"})" index="10" status="BS" value="7500" reducedCost="-0"/>
<variable name="Inventory({"Diesel"})" index="11" status="LL" value="0" reducedCost="-40.9"/>
<variable name="x13" index="12" status="UL" value="0" reducedCost="1"/>
</variables>
</CPLEXSolution>'
library(stringr)
as.numeric(str_extract_all(xml_file,"(?<=value=\")[0-9]+.*[0-9]*(?=\" reducedCost)")[[1]])
[1] 2222.2222 444.4444 333.3333 2111.1111 4222.2222 3166.6667 666.6667 333.3333 0.0000 0.0000
[11] 7500.0000 0.0000 0.0000
xml2
包是解决这类问题的好选择。上面的起始代码很接近,您只需要解析出 "variable" 个子节点并从感兴趣的属性中提取文本。
library(xml2)
x <- read_xml(xml_file)
#Read parent node variables
vars <- xml_find_all(x, "//variables")
#parse the children nodes "variable"
variable<-xml_find_all(vars, "//variable")
#obtain the text from the "index" & "value" attributes and convert to numeric.
vnames<-xml_attr(variable, "name")
index<-as.integer((xml_attr(variable, "index")))
values<-as.numeric(xml_attr(variable, "value"))
data.frame(index, values)
示例输出:
data.frame(index, values)
index values
1 0 2222.2222
2 1 444.4444
3 2 333.3333
4 3 2111.1111
5 4 4222.2222
6 5 3166.6667
7 6 666.6667
8 7 333.3333
9 8 0.0000
10 9 0.0000
11 10 7500.0000
12 11 0.0000
13 12 0.0000