R & xml2:将 xml 文档值解析为向量或 data.frame

R & xml2: parsing an xml document values to vector or data.frame

我正在尝试解析下面 xml 中的变量名称、索引和值。对变量进行子集化是可行的,但是从每个变量中获取实际值有点困难。有人能给我指出正确的方向吗?

require(xml2)
xml_file <- '<?xml version = "1.0" encoding="UTF-8" standalone="yes"?>
<CPLEXSolution version="1.2">
 <header
   problemName="Oil-blending.lp"
   objectiveValue="287750"
   solutionTypeValue="1"
   solutionTypeString="basic"
   solutionStatusValue="1"
   solutionStatusString="optimal"
   solutionMethodString="dual"
   primalFeasible="1"
   dualFeasible="1"
   simplexIterations="14"
   writeLevel="1"/>
 <quality
   epRHS="1e-06"
   epOpt="1e-06"
   maxPrimalInfeas="0"
   maxDualInfeas="0"
   maxPrimalResidual="9.66338120633736e-13"
   maxDualResidual="7.105427357601e-15"
   maxX="7500"
   maxPi="57.25"
   maxSlack="4000"
   maxRedCost="40.9"
   kappa="83.7880434782609"/>
 <linearConstraints>
  <constraint name="ct_demand({&quot;Super&quot;})" index="0" status="LL" slack="0" dual="-20.8"/>
  <constraint name="ct_demand({&quot;Regular&quot;})" index="1" status="LL" slack="0" dual="0.1"/>
  <constraint name="ct_demand({&quot;Diesel&quot;})" index="2" status="LL" slack="0" dual="-40.8"/>
  <constraint name="ct_capacity({&quot;Crude1&quot;})" index="3" status="LL" slack="0" dual="57.25"/>
  <constraint name="ct_capacity({&quot;Crude2&quot;})" index="4" status="LL" slack="0" dual="20.9"/>
  <constraint name="ct_capacity({&quot;Crude3&quot;})" index="5" status="BS" slack="1500" dual="0"/>
  <constraint name="ct_total_max_prod" index="6" status="BS" slack="499.999999999997" dual="0"/>
  <constraint name="ct_octane_min({&quot;Super&quot;})" index="7" status="BS" slack="-2000" dual="-0"/>
  <constraint name="ct_octane_min({&quot;Regular&quot;})" index="8" status="LL" slack="0" dual="-1.77635683940025e-15"/>
  <constraint name="ct_octane_min({&quot;Diesel&quot;})" index="9" status="BS" slack="-4000" dual="-0"/>
  <constraint name="ct_lead_max({&quot;Super&quot;})" index="10" status="LL" slack="0" dual="30.9"/>
  <constraint name="ct_lead_max({&quot;Regular&quot;})" index="11" status="LL" slack="0" dual="30.9"/>
  <constraint name="ct_lead_max({&quot;Diesel&quot;})" index="12" status="LL" slack="0" dual="30.9"/>
 </linearConstraints>
 <variables>
  <variable name="Blend({&quot;Crude1&quot;})({&quot;Super&quot;})" index="0" status="BS" value="2222.22222222222" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude2&quot;})({&quot;Super&quot;})" index="1" status="BS" value="444.444444444444" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude3&quot;})({&quot;Super&quot;})" index="2" status="BS" value="333.333333333333" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude1&quot;})({&quot;Regular&quot;})" index="3" status="BS" value="2111.11111111111" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude2&quot;})({&quot;Regular&quot;})" index="4" status="BS" value="4222.22222222222" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude3&quot;})({&quot;Regular&quot;})" index="5" status="BS" value="3166.66666666667" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude1&quot;})({&quot;Diesel&quot;})" index="6" status="BS" value="666.666666666667" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude2&quot;})({&quot;Diesel&quot;})" index="7" status="BS" value="333.333333333333" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude3&quot;})({&quot;Diesel&quot;})" index="8" status="LL" value="0" reducedCost="-7.105427357601e-15"/>
  <variable name="Inventory({&quot;Super&quot;})" index="9" status="LL" value="0" reducedCost="-20.9"/>
  <variable name="Inventory({&quot;Regular&quot;})" index="10" status="BS" value="7500" reducedCost="-0"/>
  <variable name="Inventory({&quot;Diesel&quot;})" index="11" status="LL" value="0" reducedCost="-40.9"/>
  <variable name="x13" index="12" status="UL" value="0" reducedCost="1"/>
 </variables>
</CPLEXSolution>'

x <- read_xml(xml_file)
vars <- xml_find_all(x, "//variables")

使用 stringr

xml_file <- '<?xml version = "1.0" encoding="UTF-8" standalone="yes"?>
<CPLEXSolution version="1.2">
<header
problemName="Oil-blending.lp"
objectiveValue="287750"
solutionTypeValue="1"
solutionTypeString="basic"
solutionStatusValue="1"
solutionStatusString="optimal"
solutionMethodString="dual"
primalFeasible="1"
dualFeasible="1"
simplexIterations="14"
writeLevel="1"/>
<quality
epRHS="1e-06"
epOpt="1e-06"
maxPrimalInfeas="0"
maxDualInfeas="0"
maxPrimalResidual="9.66338120633736e-13"
maxDualResidual="7.105427357601e-15"
maxX="7500"
maxPi="57.25"
maxSlack="4000"
maxRedCost="40.9"
kappa="83.7880434782609"/>
<linearConstraints>
<constraint name="ct_demand({&quot;Super&quot;})" index="0" status="LL" slack="0" dual="-20.8"/>
<constraint name="ct_demand({&quot;Regular&quot;})" index="1" status="LL" slack="0" dual="0.1"/>
<constraint name="ct_demand({&quot;Diesel&quot;})" index="2" status="LL" slack="0" dual="-40.8"/>
<constraint name="ct_capacity({&quot;Crude1&quot;})" index="3" status="LL" slack="0" dual="57.25"/>
<constraint name="ct_capacity({&quot;Crude2&quot;})" index="4" status="LL" slack="0" dual="20.9"/>
<constraint name="ct_capacity({&quot;Crude3&quot;})" index="5" status="BS" slack="1500" dual="0"/>
<constraint name="ct_total_max_prod" index="6" status="BS" slack="499.999999999997" dual="0"/>
<constraint name="ct_octane_min({&quot;Super&quot;})" index="7" status="BS" slack="-2000" dual="-0"/>
<constraint name="ct_octane_min({&quot;Regular&quot;})" index="8" status="LL" slack="0" dual="-1.77635683940025e-15"/>
<constraint name="ct_octane_min({&quot;Diesel&quot;})" index="9" status="BS" slack="-4000" dual="-0"/>
<constraint name="ct_lead_max({&quot;Super&quot;})" index="10" status="LL" slack="0" dual="30.9"/>
<constraint name="ct_lead_max({&quot;Regular&quot;})" index="11" status="LL" slack="0" dual="30.9"/>
<constraint name="ct_lead_max({&quot;Diesel&quot;})" index="12" status="LL" slack="0" dual="30.9"/>
</linearConstraints>
<variables>
<variable name="Blend({&quot;Crude1&quot;})({&quot;Super&quot;})" index="0" status="BS" value="2222.22222222222" reducedCost="-0"/>
<variable name="Blend({&quot;Crude2&quot;})({&quot;Super&quot;})" index="1" status="BS" value="444.444444444444" reducedCost="-0"/>
<variable name="Blend({&quot;Crude3&quot;})({&quot;Super&quot;})" index="2" status="BS" value="333.333333333333" reducedCost="-0"/>
<variable name="Blend({&quot;Crude1&quot;})({&quot;Regular&quot;})" index="3" status="BS" value="2111.11111111111" reducedCost="-0"/>
<variable name="Blend({&quot;Crude2&quot;})({&quot;Regular&quot;})" index="4" status="BS" value="4222.22222222222" reducedCost="-0"/>
<variable name="Blend({&quot;Crude3&quot;})({&quot;Regular&quot;})" index="5" status="BS" value="3166.66666666667" reducedCost="-0"/>
<variable name="Blend({&quot;Crude1&quot;})({&quot;Diesel&quot;})" index="6" status="BS" value="666.666666666667" reducedCost="-0"/>
<variable name="Blend({&quot;Crude2&quot;})({&quot;Diesel&quot;})" index="7" status="BS" value="333.333333333333" reducedCost="-0"/>
<variable name="Blend({&quot;Crude3&quot;})({&quot;Diesel&quot;})" index="8" status="LL" value="0" reducedCost="-7.105427357601e-15"/>
<variable name="Inventory({&quot;Super&quot;})" index="9" status="LL" value="0" reducedCost="-20.9"/>
<variable name="Inventory({&quot;Regular&quot;})" index="10" status="BS" value="7500" reducedCost="-0"/>
<variable name="Inventory({&quot;Diesel&quot;})" index="11" status="LL" value="0" reducedCost="-40.9"/>
<variable name="x13" index="12" status="UL" value="0" reducedCost="1"/>
</variables>
</CPLEXSolution>'

library(stringr)
as.numeric(str_extract_all(xml_file,"(?<=value=\")[0-9]+.*[0-9]*(?=\" reducedCost)")[[1]])
[1] 2222.2222  444.4444  333.3333 2111.1111 4222.2222 3166.6667  666.6667  333.3333    0.0000    0.0000
[11] 7500.0000    0.0000    0.0000

xml2 包是解决这类问题的好选择。上面的起始代码很接近,您只需要解析出 "variable" 个子节点并从感兴趣的属性中提取文本。

library(xml2)
x <- read_xml(xml_file)
#Read parent node variables
vars <- xml_find_all(x, "//variables")

#parse the children nodes "variable"
variable<-xml_find_all(vars, "//variable")
#obtain the text from the "index" & "value" attributes and convert to numeric.
vnames<-xml_attr(variable, "name")
index<-as.integer((xml_attr(variable, "index")))
values<-as.numeric(xml_attr(variable, "value"))

data.frame(index, values)

示例输出:

data.frame(index, values)
   index    values
1      0 2222.2222
2      1  444.4444
3      2  333.3333
4      3 2111.1111
5      4 4222.2222
6      5 3166.6667
7      6  666.6667
8      7  333.3333
9      8    0.0000
10     9    0.0000
11    10 7500.0000
12    11    0.0000
13    12    0.0000