在不知道的情况下展平复杂的 XDocument DOM
Flatten complex XDocument without knowing DOM
样本XML:
<Pricing>
<PriceGuide id="e4c3db5c">
<Name>Price Guide A</Name>
<Products>
<Product id="1">
<Name>Product 1</Name>
<Prices>
<Price>
<Region id="40">Chicago</Region>
<PriceLow>48</PriceLow>
<PriceHigh>52</PriceHigh>
<UnitOfMeasure>MT</UnitOfMeasure>
</Price>
<Price>
<Region id="71">Dallas</Region>
<PriceLow>45.5</PriceLow>
<PriceHigh>47</PriceHigh>
<UnitOfMeasure>MT</UnitOfMeasure>
</Price>
</Prices>
</Product>
<Product id="2">
<Name>Product 2</Name>
<Prices>
<Price>
<Region id="40">Chicago</Region>
<PriceLow>48</PriceLow>
<PriceHigh>49</PriceHigh>
<UnitOfMeasure>MT</UnitOfMeasure>
</Price>
<Price>
<Region id="101">Los Angeles </Region>
<PriceLow>43</PriceLow>
<PriceHigh>45</PriceHigh>
<UnitOfMeasure>MT</UnitOfMeasure>
</Price>
<Price>
<Region id="71">Dallas</Region>
<PriceLow>45.5</PriceLow>
<PriceHigh>48.5</PriceHigh>
<UnitOfMeasure>MT</UnitOfMeasure>
</Price>
</Prices>
</Product>
</Products>
</PriceGuide>
</Pricing>
预期结果:(数据写入 CSV 文件或转储到 DataTable)
Price Guide A, Product 1, Chicago, 48, 52, MT
Price Guide A, Product 1, Dallas, 45.5, 47, MT
Price Guide A, Product 2, Chicago, 48, 49, MT
Price Guide A, Product 2, Los Angeles, 43, 45, MT
Price Guide A, Product 2, Dallas, 45.5, 48.5, MT
主要问题:
我基本上得到一个未知的 XML 文件,我必须将其显示为平面 table.
这是我可能必须处理的众多文件之一的示例。 我不知道 DOM 提前所以我不能用给定的节点名称做一个直接的 LINQ 查询。 我尝试了遍历 DOM ,但是当你在递归中时,很难知道什么时候写出记录。
加分项:
从示例中,有时节点上有属性。如果有属性 "id" 我想在输出中包含该属性的值。在这种情况下,我的输出将是:
e4c3db5c, Price Guide A, 1, Product 1, 40, Chicago, 48, 52, MT
e4c3db5c, Price Guide A, 1, Product 1, 71, Dallas, 45.5, 47, MT
e4c3db5c, Price Guide A, 2, Product 2, 40, Chicago, 48, 49, MT
e4c3db5c, Price Guide A, 2, Product 2, 101, Los Angeles, 43, 45, MT
e4c3db5c, Price Guide A, 2, Product 2, 71, Dallas, 45.5, 48.5, MT
提前致谢。
编辑:
以下工作,但需要我提前知道 XML 结构。我希望概括这段代码:
var details =
from level1 in _xmlDoc.Root.Elements("PriceGuide")
from level2 in level1.Elements("Name")
from level3 in level2.Elements("Products")
from level4 in level3.Elements("Product")
from level5 in level4.Elements("Name")
from level6 in level5.Elements("Prices")
from level7 in level6.Elements("Price")
from level8a in level7.Elements("Region")
from level8b in level7.Elements("PriceLow")
from level8c in level7.Elements("PriceHigh")
from level8d in level7.Elements("UnitOfMeasure")
select new
{
PriceGuideId = (string)level1.Attribute("id"),
PriceGuideName = (string)level2.Value,
ProductId = (string)level3.Attribute("id"),
ProductName = (string)level4.Value,
RegionId = (string)level8a.Attribute("id"),
RegionName = (string)level8a.Value,
PriceLow = (string)level8b.Value,
PriceHigh = (string)level8c.Value,
UnitOfMeasure = (string)level8d.Value,
};
我知道这没什么用。
我不知道在 linq 中该怎么做。这是一个有效的快速而肮脏的代码
XmlDocument dom = new XmlDocument();
dom.LoadXml("<Pricing><PriceGuide id=\"e4c3db5c\"><Name>Price Guide A</Name><Products><Product id=\"1\"><Name>Product 1</Name><Prices><Price><Region id=\"40\">Chicago</Region><PriceLow>48</PriceLow><PriceHigh>52</PriceHigh><UnitOfMeasure>MT</UnitOfMeasure></Price><Price><Region id=\"71\">Dallas</Region><PriceLow>45.5</PriceLow><PriceHigh>47</PriceHigh><UnitOfMeasure>MT</UnitOfMeasure></Price></Prices></Product><Product id=\"2\"><Name>Product 2</Name><Prices><Price><Region id=\"40\">Chicago</Region><PriceLow>48</PriceLow><PriceHigh>49</PriceHigh><UnitOfMeasure>MT</UnitOfMeasure></Price><Price><Region id=\"101\">Los Angeles </Region><PriceLow>43</PriceLow><PriceHigh>45</PriceHigh><UnitOfMeasure>MT</UnitOfMeasure></Price><Price><Region id=\"71\">Dallas</Region><PriceLow>45.5</PriceLow><PriceHigh>48.5</PriceHigh><UnitOfMeasure>MT</UnitOfMeasure></Price></Prices></Product></Products></PriceGuide></Pricing>");
List<KeyValuePair<int, String>> result = FlattenXML(dom.DocumentElement, "", 0);
var q = result.Where(c => c.Key == result.Max(b => b.Key)).Select(b => b.Value.Substring(0, b.Value.Length - 1)).ToArray();
Console.WriteLine(String.Join(System.Environment.NewLine, q));
private List<KeyValuePair<int, String>> FlattenXML(XmlElement node, String parent, int level)
{
List<KeyValuePair<int, String>> result = new List<KeyValuePair<int, String>>();
String detail = "";
if (node.HasAttribute("id"))
parent += node.Attributes["id"].InnerText + ",";
if (node.InnerText == node.InnerXml && node.InnerText != "")
{
parent += node.InnerText + ",";
}
foreach (XmlElement child in node.ChildNodes)
{
if (child.InnerText == child.InnerXml && child.InnerText != "")
{
detail += child.InnerText + ",";
level++;
}
if (child.FirstChild != child.LastChild)
{
List<KeyValuePair<int, String>> childResult = FlattenXML(child, parent + detail, level);
result.AddRange(childResult);
}
}
result.Add(new KeyValuePair<int, String>(level, parent + detail));
return result;
}
样本XML:
<Pricing>
<PriceGuide id="e4c3db5c">
<Name>Price Guide A</Name>
<Products>
<Product id="1">
<Name>Product 1</Name>
<Prices>
<Price>
<Region id="40">Chicago</Region>
<PriceLow>48</PriceLow>
<PriceHigh>52</PriceHigh>
<UnitOfMeasure>MT</UnitOfMeasure>
</Price>
<Price>
<Region id="71">Dallas</Region>
<PriceLow>45.5</PriceLow>
<PriceHigh>47</PriceHigh>
<UnitOfMeasure>MT</UnitOfMeasure>
</Price>
</Prices>
</Product>
<Product id="2">
<Name>Product 2</Name>
<Prices>
<Price>
<Region id="40">Chicago</Region>
<PriceLow>48</PriceLow>
<PriceHigh>49</PriceHigh>
<UnitOfMeasure>MT</UnitOfMeasure>
</Price>
<Price>
<Region id="101">Los Angeles </Region>
<PriceLow>43</PriceLow>
<PriceHigh>45</PriceHigh>
<UnitOfMeasure>MT</UnitOfMeasure>
</Price>
<Price>
<Region id="71">Dallas</Region>
<PriceLow>45.5</PriceLow>
<PriceHigh>48.5</PriceHigh>
<UnitOfMeasure>MT</UnitOfMeasure>
</Price>
</Prices>
</Product>
</Products>
</PriceGuide>
</Pricing>
预期结果:(数据写入 CSV 文件或转储到 DataTable)
Price Guide A, Product 1, Chicago, 48, 52, MT
Price Guide A, Product 1, Dallas, 45.5, 47, MT
Price Guide A, Product 2, Chicago, 48, 49, MT
Price Guide A, Product 2, Los Angeles, 43, 45, MT
Price Guide A, Product 2, Dallas, 45.5, 48.5, MT
主要问题:
我基本上得到一个未知的 XML 文件,我必须将其显示为平面 table.
这是我可能必须处理的众多文件之一的示例。 我不知道 DOM 提前所以我不能用给定的节点名称做一个直接的 LINQ 查询。 我尝试了遍历 DOM ,但是当你在递归中时,很难知道什么时候写出记录。
加分项:
从示例中,有时节点上有属性。如果有属性 "id" 我想在输出中包含该属性的值。在这种情况下,我的输出将是:
e4c3db5c, Price Guide A, 1, Product 1, 40, Chicago, 48, 52, MT
e4c3db5c, Price Guide A, 1, Product 1, 71, Dallas, 45.5, 47, MT
e4c3db5c, Price Guide A, 2, Product 2, 40, Chicago, 48, 49, MT
e4c3db5c, Price Guide A, 2, Product 2, 101, Los Angeles, 43, 45, MT
e4c3db5c, Price Guide A, 2, Product 2, 71, Dallas, 45.5, 48.5, MT
提前致谢。
编辑:
以下工作,但需要我提前知道 XML 结构。我希望概括这段代码:
var details =
from level1 in _xmlDoc.Root.Elements("PriceGuide")
from level2 in level1.Elements("Name")
from level3 in level2.Elements("Products")
from level4 in level3.Elements("Product")
from level5 in level4.Elements("Name")
from level6 in level5.Elements("Prices")
from level7 in level6.Elements("Price")
from level8a in level7.Elements("Region")
from level8b in level7.Elements("PriceLow")
from level8c in level7.Elements("PriceHigh")
from level8d in level7.Elements("UnitOfMeasure")
select new
{
PriceGuideId = (string)level1.Attribute("id"),
PriceGuideName = (string)level2.Value,
ProductId = (string)level3.Attribute("id"),
ProductName = (string)level4.Value,
RegionId = (string)level8a.Attribute("id"),
RegionName = (string)level8a.Value,
PriceLow = (string)level8b.Value,
PriceHigh = (string)level8c.Value,
UnitOfMeasure = (string)level8d.Value,
};
我知道这没什么用。
我不知道在 linq 中该怎么做。这是一个有效的快速而肮脏的代码
XmlDocument dom = new XmlDocument();
dom.LoadXml("<Pricing><PriceGuide id=\"e4c3db5c\"><Name>Price Guide A</Name><Products><Product id=\"1\"><Name>Product 1</Name><Prices><Price><Region id=\"40\">Chicago</Region><PriceLow>48</PriceLow><PriceHigh>52</PriceHigh><UnitOfMeasure>MT</UnitOfMeasure></Price><Price><Region id=\"71\">Dallas</Region><PriceLow>45.5</PriceLow><PriceHigh>47</PriceHigh><UnitOfMeasure>MT</UnitOfMeasure></Price></Prices></Product><Product id=\"2\"><Name>Product 2</Name><Prices><Price><Region id=\"40\">Chicago</Region><PriceLow>48</PriceLow><PriceHigh>49</PriceHigh><UnitOfMeasure>MT</UnitOfMeasure></Price><Price><Region id=\"101\">Los Angeles </Region><PriceLow>43</PriceLow><PriceHigh>45</PriceHigh><UnitOfMeasure>MT</UnitOfMeasure></Price><Price><Region id=\"71\">Dallas</Region><PriceLow>45.5</PriceLow><PriceHigh>48.5</PriceHigh><UnitOfMeasure>MT</UnitOfMeasure></Price></Prices></Product></Products></PriceGuide></Pricing>");
List<KeyValuePair<int, String>> result = FlattenXML(dom.DocumentElement, "", 0);
var q = result.Where(c => c.Key == result.Max(b => b.Key)).Select(b => b.Value.Substring(0, b.Value.Length - 1)).ToArray();
Console.WriteLine(String.Join(System.Environment.NewLine, q));
private List<KeyValuePair<int, String>> FlattenXML(XmlElement node, String parent, int level)
{
List<KeyValuePair<int, String>> result = new List<KeyValuePair<int, String>>();
String detail = "";
if (node.HasAttribute("id"))
parent += node.Attributes["id"].InnerText + ",";
if (node.InnerText == node.InnerXml && node.InnerText != "")
{
parent += node.InnerText + ",";
}
foreach (XmlElement child in node.ChildNodes)
{
if (child.InnerText == child.InnerXml && child.InnerText != "")
{
detail += child.InnerText + ",";
level++;
}
if (child.FirstChild != child.LastChild)
{
List<KeyValuePair<int, String>> childResult = FlattenXML(child, parent + detail, level);
result.AddRange(childResult);
}
}
result.Add(new KeyValuePair<int, String>(level, parent + detail));
return result;
}