向段落添加运行
Adding Runs to Paragraphs
我正在尝试将带有标签的 xml 格式转换为 DOCX 文件。我不是在生成新文档,而是在模板文档中插入文本。
<p id="_fab91699-6d85-4ce5-b0b5-a17197520a7f">This document is amongst a series of International Standards dealing with the conversion of systems of writing produced by Technical Committee ISO/TC 46, <em>Information and documentation</em>, WG 3 <em>Conversion of written languages</em>.</p>
我将文本片段收集在一个数组中,然后尝试用这样的代码来处理它们:
foreach (var bkmkStart in wordDoc.MainDocumentPart.RootElement.Descendants<BookmarkStart>())
{
if (bkmkStart.Name == "ForewordText")
{
forewordbkmkParent = bkmkStart.Parent;
for (var y = 0; y <= ForewordArray.Length / (double)2 - 1; y++)
{
if (ForewordArray[0, y] == "Normal")
{
if (y < ForewordArray.Length / (double)2 - 1)
{
if (ForewordArray[0, y + 1] == "Normal")
{
forewordbkmkParent.InsertBeforeSelf(new Paragraph(new Run(new Text(ForewordArray[1, y]))));
}
else
{
fPara = forewordbkmkParent.InsertBeforeSelf(new Paragraph(new Run(new Text(ForewordArray[1, y]))));
}
}
else
{
fPara.InsertAfter(new Run(new Text(ForewordArray[1, y])), fPara.GetFirstChild<Run>());
}
}
else
{
NewRun = forewordbkmkParent.InsertBeforeSelf(new Run());
NewRunProps = new RunProperties();
NewRunProps.AppendChild<Italic>(new Italic());
NewRun.AppendChild<RunProperties>(NewRunProps);
NewRun.AppendChild(new Text(ForewordArray[1, y]));
}
}
}
}
但我最终得到了格式错误的 XML,因为运行是在段落之后而不是在段落内部插入的:
<w:p>
<w:r>
<w:t>This document is amongst a series of International Standards dealing with the conversion of systems of writing produced by Technical Committee ISO/TC 46, </w:t>
</w:r>
</w:p>
<w:r>
<w:rPr>
<w:i />
</w:rPr>
<w:t>Information and documentation</w:t>
</w:r>
<w:p>
<w:r>
<w:t>, WG 3 </w:t>
</w:r>
<w:r>
<w:t>.</w:t>
</w:r>
</w:p>
<w:r>
<w:rPr>
<w:i />
</w:rPr>
<w:t>Conversion of written languages</w:t>
</w:r>
最好使用 SDK 以正确的方式执行此操作。作为替代方案,我能够使用正则表达式创建一个包含所有正确 XML 和文本的字符串,但我找不到 WordprocessingDocument 方法将其转换为我可以插入的 XML 片段。
此类问题的解决方案是进行纯函数转换,如下代码示例所示。
代码示例使用问题中给出的示例 XML 元素 <p>
(请参阅下面的 Xml
常量)。它将其转换为相应的 Open XML w:p
元素,即根据 Open XML SDK 提供的强类型 类 的 Paragraph
实例. w:p
或 Paragraph
的预期外部 XML 由 OuterXml
常量定义。
using System;
using System.Linq;
using System.Xml.Linq;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Wordprocessing;
using Xunit;
namespace CodeSnippets.Tests.OpenXml.Wordprocessing
{
public class XmlTransformationTests
{
private const string Xml =
@"<p id=""_fab91699-6d85-4ce5-b0b5-a17197520a7f"">" +
@"This document is amongst a series of International Standards dealing with the conversion of systems of writing produced by Technical Committee ISO/TC 46, " +
@"<em>Information and documentation</em>" +
@", WG 3 " +
@"<em>Conversion of written languages</em>" +
@"." +
@"</p>";
private const string OuterXml =
@"<w:p xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">" +
@"<w:r><w:t xml:space=""preserve"">This document is amongst a series of International Standards dealing with the conversion of systems of writing produced by Technical Committee ISO/TC 46, </w:t></w:r>" +
@"<w:r><w:rPr><w:i /></w:rPr><w:t>Information and documentation</w:t></w:r>" +
@"<w:r><w:t xml:space=""preserve"">, WG 3 </w:t></w:r>" +
@"<w:r><w:rPr><w:i /></w:rPr><w:t>Conversion of written languages</w:t></w:r>" +
@"<w:r><w:t>.</w:t></w:r>" +
@"</w:p>";
[Fact]
public void CanTransformXmlToOpenXml()
{
// Arrange, creating an XElement based on the given XML.
var xmlParagraph = XElement.Parse(Xml);
// Act, transforming the XML into Open XML.
var paragraph = (Paragraph) TransformElementToOpenXml(xmlParagraph);
// Assert, demonstrating that we have indeed created an Open XML Paragraph instance.
Assert.Equal(OuterXml, paragraph.OuterXml);
}
private static OpenXmlElement TransformElementToOpenXml(XElement element)
{
return element.Name.LocalName switch
{
"p" => new Paragraph(element.Nodes().Select(TransformNodeToOpenXml)),
"em" => new Run(new RunProperties(new Italic()), CreateText(element.Value)),
"b" => new Run(new RunProperties(new Bold()), CreateText(element.Value)),
_ => throw new ArgumentOutOfRangeException()
};
}
private static OpenXmlElement TransformNodeToOpenXml(XNode node)
{
return node switch
{
XElement element => TransformElementToOpenXml(element),
XText text => new Run(CreateText(text.Value)),
_ => throw new ArgumentOutOfRangeException()
};
}
private static Text CreateText(string text)
{
return new Text(text)
{
Space = text.Length > 0 && (char.IsWhiteSpace(text[0]) || char.IsWhiteSpace(text[^1]))
? new EnumValue<SpaceProcessingModeValues>(SpaceProcessingModeValues.Preserve)
: null
};
}
}
}
以上示例涉及 <p>
(段落)、<em>
(强调/斜体)和 <b>
(粗体)元素。添加更多格式元素(例如下划线)很容易。
请注意,示例代码做了以下简化假设,即 <em>
、<b>
和潜在的其他格式化元素未嵌套。添加嵌套这些元素的功能会使示例代码稍微复杂一些(但这显然是可能的)。
我正在尝试将带有标签的 xml 格式转换为 DOCX 文件。我不是在生成新文档,而是在模板文档中插入文本。
<p id="_fab91699-6d85-4ce5-b0b5-a17197520a7f">This document is amongst a series of International Standards dealing with the conversion of systems of writing produced by Technical Committee ISO/TC 46, <em>Information and documentation</em>, WG 3 <em>Conversion of written languages</em>.</p>
我将文本片段收集在一个数组中,然后尝试用这样的代码来处理它们:
foreach (var bkmkStart in wordDoc.MainDocumentPart.RootElement.Descendants<BookmarkStart>())
{
if (bkmkStart.Name == "ForewordText")
{
forewordbkmkParent = bkmkStart.Parent;
for (var y = 0; y <= ForewordArray.Length / (double)2 - 1; y++)
{
if (ForewordArray[0, y] == "Normal")
{
if (y < ForewordArray.Length / (double)2 - 1)
{
if (ForewordArray[0, y + 1] == "Normal")
{
forewordbkmkParent.InsertBeforeSelf(new Paragraph(new Run(new Text(ForewordArray[1, y]))));
}
else
{
fPara = forewordbkmkParent.InsertBeforeSelf(new Paragraph(new Run(new Text(ForewordArray[1, y]))));
}
}
else
{
fPara.InsertAfter(new Run(new Text(ForewordArray[1, y])), fPara.GetFirstChild<Run>());
}
}
else
{
NewRun = forewordbkmkParent.InsertBeforeSelf(new Run());
NewRunProps = new RunProperties();
NewRunProps.AppendChild<Italic>(new Italic());
NewRun.AppendChild<RunProperties>(NewRunProps);
NewRun.AppendChild(new Text(ForewordArray[1, y]));
}
}
}
}
但我最终得到了格式错误的 XML,因为运行是在段落之后而不是在段落内部插入的:
<w:p>
<w:r>
<w:t>This document is amongst a series of International Standards dealing with the conversion of systems of writing produced by Technical Committee ISO/TC 46, </w:t>
</w:r>
</w:p>
<w:r>
<w:rPr>
<w:i />
</w:rPr>
<w:t>Information and documentation</w:t>
</w:r>
<w:p>
<w:r>
<w:t>, WG 3 </w:t>
</w:r>
<w:r>
<w:t>.</w:t>
</w:r>
</w:p>
<w:r>
<w:rPr>
<w:i />
</w:rPr>
<w:t>Conversion of written languages</w:t>
</w:r>
最好使用 SDK 以正确的方式执行此操作。作为替代方案,我能够使用正则表达式创建一个包含所有正确 XML 和文本的字符串,但我找不到 WordprocessingDocument 方法将其转换为我可以插入的 XML 片段。
此类问题的解决方案是进行纯函数转换,如下代码示例所示。
代码示例使用问题中给出的示例 XML 元素 <p>
(请参阅下面的 Xml
常量)。它将其转换为相应的 Open XML w:p
元素,即根据 Open XML SDK 提供的强类型 类 的 Paragraph
实例. w:p
或 Paragraph
的预期外部 XML 由 OuterXml
常量定义。
using System;
using System.Linq;
using System.Xml.Linq;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Wordprocessing;
using Xunit;
namespace CodeSnippets.Tests.OpenXml.Wordprocessing
{
public class XmlTransformationTests
{
private const string Xml =
@"<p id=""_fab91699-6d85-4ce5-b0b5-a17197520a7f"">" +
@"This document is amongst a series of International Standards dealing with the conversion of systems of writing produced by Technical Committee ISO/TC 46, " +
@"<em>Information and documentation</em>" +
@", WG 3 " +
@"<em>Conversion of written languages</em>" +
@"." +
@"</p>";
private const string OuterXml =
@"<w:p xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">" +
@"<w:r><w:t xml:space=""preserve"">This document is amongst a series of International Standards dealing with the conversion of systems of writing produced by Technical Committee ISO/TC 46, </w:t></w:r>" +
@"<w:r><w:rPr><w:i /></w:rPr><w:t>Information and documentation</w:t></w:r>" +
@"<w:r><w:t xml:space=""preserve"">, WG 3 </w:t></w:r>" +
@"<w:r><w:rPr><w:i /></w:rPr><w:t>Conversion of written languages</w:t></w:r>" +
@"<w:r><w:t>.</w:t></w:r>" +
@"</w:p>";
[Fact]
public void CanTransformXmlToOpenXml()
{
// Arrange, creating an XElement based on the given XML.
var xmlParagraph = XElement.Parse(Xml);
// Act, transforming the XML into Open XML.
var paragraph = (Paragraph) TransformElementToOpenXml(xmlParagraph);
// Assert, demonstrating that we have indeed created an Open XML Paragraph instance.
Assert.Equal(OuterXml, paragraph.OuterXml);
}
private static OpenXmlElement TransformElementToOpenXml(XElement element)
{
return element.Name.LocalName switch
{
"p" => new Paragraph(element.Nodes().Select(TransformNodeToOpenXml)),
"em" => new Run(new RunProperties(new Italic()), CreateText(element.Value)),
"b" => new Run(new RunProperties(new Bold()), CreateText(element.Value)),
_ => throw new ArgumentOutOfRangeException()
};
}
private static OpenXmlElement TransformNodeToOpenXml(XNode node)
{
return node switch
{
XElement element => TransformElementToOpenXml(element),
XText text => new Run(CreateText(text.Value)),
_ => throw new ArgumentOutOfRangeException()
};
}
private static Text CreateText(string text)
{
return new Text(text)
{
Space = text.Length > 0 && (char.IsWhiteSpace(text[0]) || char.IsWhiteSpace(text[^1]))
? new EnumValue<SpaceProcessingModeValues>(SpaceProcessingModeValues.Preserve)
: null
};
}
}
}
以上示例涉及 <p>
(段落)、<em>
(强调/斜体)和 <b>
(粗体)元素。添加更多格式元素(例如下划线)很容易。
请注意,示例代码做了以下简化假设,即 <em>
、<b>
和潜在的其他格式化元素未嵌套。添加嵌套这些元素的功能会使示例代码稍微复杂一些(但这显然是可能的)。