使用 PHP Word 阅读 MS word 文档
Read MS word document with PHP Word
我已经在 PHPStorm (IDE) 上安装并设置了 PHP Word。我正在尝试使用 PHP 从下面标题为 'helloWorld.docx' 的 Word 文档中阅读“从昨天学习,为今天而活,为明天而活...”这一行单词.
到目前为止,这是我加载和阅读文档的代码:
<?php
require_once 'PHPWord/bootstrap.php';
$objReader = \PhpOffice\PhpWord\IOFactory::createReader("Word2007");
$phpWord = $objReader->load("helloWorld.docx");
$sections = $phpWord->getSection(0);
echo var_dump($sections);
输出:
/usr/bin/php7.2 /home/wade/PhpstormProjects/getWord/readDoc.php
object(PhpOffice\PhpWord\Element\Section)#21 (21) {
["container":protected]=>
string(7) "Section"
["style":"PhpOffice\PhpWord\Element\Section":private]=>
object(PhpOffice\PhpWord\Style\Section)#22 (32) {
["orientation":"PhpOffice\PhpWord\Style\Section":private]=>
string(8) "portrait"
["paper":"PhpOffice\PhpWord\Style\Section":private]=>
object(PhpOffice\PhpWord\Style\Paper)#14 (8) {
["sizes":"PhpOffice\PhpWord\Style\Paper":private]=>
array(7) {
["A3"]=>
array(3) {
[0]=>
int(297)
[1]=>
int(420)
[2]=>
string(2) "mm"
}
["A4"]=>
array(3) {
[0]=>
int(210)
[1]=>
int(297)
[2]=>
string(2) "mm"
}
["A5"]=>
array(3) {
[0]=>
int(148)
[1]=>
int(210)
[2]=>
string(2) "mm"
}
["B5"]=>
array(3) {
[0]=>
int(176)
[1]=>
int(250)
[2]=>
string(2) "mm"
}
["Folio"]=>
array(3) {
[0]=>
float(8.5)
[1]=>
int(13)
[2]=>
string(2) "in"
}
["Legal"]=>
array(3) {
[0]=>
float(8.5)
[1]=>
int(14)
[2]=>
string(2) "in"
}
["Letter"]=>
array(3) {
[0]=>
float(8.5)
[1]=>
int(11)
[2]=>
string(2) "in"
}
}
["size":"PhpOffice\PhpWord\Style\Paper":private]=>
string(2) "A4"
["width":"PhpOffice\PhpWord\Style\Paper":private]=>
float(11905.511811024)
["height":"PhpOffice\PhpWord\Style\Paper":private]=>
float(16837.795275591)
["styleName":protected]=>
NULL
["index":protected]=>
NULL
["aliases":protected]=>
array(0) {
}
["isAuto":"PhpOffice\PhpWord\Style\AbstractStyle":private]=>
bool(false)
}
["pageSizeW":"PhpOffice\PhpWord\Style\Section":private]=>
string(15) "11905.511811024"
["pageSizeH":"PhpOffice\PhpWord\Style\Section":private]=>
string(15) "16837.795275591"
["marginTop":"PhpOffice\PhpWord\Style\Section":private]=>
string(4) "1440"
["marginLeft":"PhpOffice\PhpWord\Style\Section":private]=>
string(4) "1440"
["marginRight":"PhpOffice\PhpWord\Style\Section":private]=>
string(4) "1440"
["marginBottom":"PhpOffice\PhpWord\Style\Section":private]=>
string(4) "1440"
["gutter":"PhpOffice\PhpWord\Style\Section":private]=>
string(1) "0"
["headerHeight":"PhpOffice\PhpWord\Style\Section":private]=>
string(3) "720"
["footerHeight":"PhpOffice\PhpWord\Style\Section":private]=>
string(3) "720"
["pageNumberingStart":"PhpOffice\PhpWord\Style\Section":private]=>
NULL
["colsNum":"PhpOffice\PhpWord\Style\Section":private]=>
int(1)
["colsSpace":"PhpOffice\PhpWord\Style\Section":private]=>
string(3) "720"
["breakType":"PhpOffice\PhpWord\Style\Section":private]=>
NULL
["lineNumbering":"PhpOffice\PhpWord\Style\Section":private]=>
NULL
["borderTopSize":protected]=>
NULL
["borderTopColor":protected]=>
NULL
["borderTopStyle":protected]=>
NULL
["borderLeftSize":protected]=>
NULL
["borderLeftColor":protected]=>
NULL
["borderLeftStyle":protected]=>
NULL
["borderRightSize":protected]=>
NULL
["borderRightColor":protected]=>
NULL
["borderRightStyle":protected]=>
NULL
["borderBottomSize":protected]=>
NULL
["borderBottomColor":protected]=>
NULL
["borderBottomStyle":protected]=>
NULL
["styleName":protected]=>
NULL
["index":protected]=>
NULL
["aliases":protected]=>
array(0) {
}
["isAuto":"PhpOffice\PhpWord\Style\AbstractStyle":private]=>
bool(false)
}
["headers":"PhpOffice\PhpWord\Element\Section":private]=>
array(0) {
}
["footers":"PhpOffice\PhpWord\Element\Section":private]=>
array(0) {
}
["footnoteProperties":"PhpOffice\PhpWord\Element\Section":private]=>
NULL
["elements":protected]=>
array(4) {
[0]=>
object(PhpOffice\PhpWord\Element\TextRun)#34 (18) {
["container":protected]=>
string(7) "TextRun"
["paragraphStyle":protected]=>
object(PhpOffice\PhpWord\Style\Paragraph)#35 (34) {
["aliases":protected]=>
array(1) {
["line-height"]=>
string(10) "lineHeight"
}
["basedOn":"PhpOffice\PhpWord\Style\Paragraph":private]=>
string(6) "Normal"
["next":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["alignment":"PhpOffice\PhpWord\Style\Paragraph":private]=>
string(0) ""
["indentation":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["spacing":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["lineHeight":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["widowControl":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(true)
["keepNext":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["keepLines":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["pageBreakBefore":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["numStyle":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["numLevel":"PhpOffice\PhpWord\Style\Paragraph":private]=>
int(0)
["tabs":"PhpOffice\PhpWord\Style\Paragraph":private]=>
array(0) {
}
["shading":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["contextualSpacing":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["bidi":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["textAlignment":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["suppressAutoHyphens":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["borderTopSize":protected]=>
NULL
["borderTopColor":protected]=>
NULL
["borderTopStyle":protected]=>
NULL
["borderLeftSize":protected]=>
NULL
["borderLeftColor":protected]=>
NULL
["borderLeftStyle":protected]=>
NULL
["borderRightSize":protected]=>
NULL
["borderRightColor":protected]=>
NULL
["borderRightStyle":protected]=>
NULL
["borderBottomSize":protected]=>
NULL
["borderBottomColor":protected]=>
NULL
["borderBottomStyle":protected]=>
NULL
["styleName":protected]=>
NULL
["index":protected]=>
NULL
["isAuto":"PhpOffice\PhpWord\Style\AbstractStyle":private]=>
bool(false)
}
["elements":protected]=>
array(1) {
[0]=>
object(PhpOffice\PhpWord\Element\Text)#41 (18) {
["text":protected]=>
string(134) ""Learn from yesterday, live for today, hope for tomorrow. The important thing is not to stop questioning." (Albert Einstein)"
["fontStyle":protected]=>
object(PhpOffice\PhpWord\Style\Font)#43 (28) {
["aliases":protected]=>
array(1) {
["line-height"]=>
string(10) "lineHeight"
}
["type":"PhpOffice\PhpWord\Style\Font":private]=>
string(4) "text"
["name":"PhpOffice\PhpWord\Style\Font":private]=>
string(15) "Times New Roman"
["hint":"PhpOffice\PhpWord\Style\Font":private]=>
NULL
["size":"PhpOffice\PhpWord\Style\Font":private]=>
int(20)
["color":"PhpOffice\PhpWord\Style\Font":private]=>
NULL
["bold":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["italic":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["underline":"PhpOffice\PhpWord\Style\Font":private]=>
string(4) "none"
["superScript":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["subScript":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["strikethrough":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["doubleStrikethrough":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["smallCaps":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["allCaps":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["fgColor":"PhpOffice\PhpWord\Style\Font":private]=>
NULL
["scale":"PhpOffice\PhpWord\Style\Font":private]=>
NULL
["spacing":"PhpOffice\PhpWord\Style\Font":private]=>
NULL
["kerning":"PhpOffice\PhpWord\Style\Font":private]=>
NULL
["paragraph":"PhpOffice\PhpWord\Style\Font":private]=>
object(PhpOffice\PhpWord\Style\Paragraph)#42 (34) {
["aliases":protected]=>
array(1) {
["line-height"]=>
string(10) "lineHeight"
}
["basedOn":"PhpOffice\PhpWord\Style\Paragraph":private]=>
string(6) "Normal"
["next":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["alignment":"PhpOffice\PhpWord\Style\Paragraph":private]=>
string(0) ""
["indentation":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["spacing":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["lineHeight":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["widowControl":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(true)
["keepNext":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["keepLines":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["pageBreakBefore":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
完整输出太长 post 但如果向下滚动某种方式,您可以在此代码段中看到我要查找的字符串
我的主要问题是“有没有办法在不使用 var_dump 和搜索大量输出的情况下找到这个字符串?”
文本信息位于 [text]
个属性中,而这些属性又嵌套在 [elements]
个属性中。只需使用浏览器的 "find something in text" 功能在您在浏览器中获得的对象中搜索它们,即可查看您正在搜索的文本。
这两个属性受到保护,因此您必须将它们设为 public,以便 access/extract。
在 PHPWord 库中定义这些属性的位置:
制作完成后 public
,您可以开始切掉收到的对象的每一层,从而访问 [elements]->[text]
属性仅比 [=37 低一层的对象=].
所以,算法是1)找到这些[text]
属性,2)查看持有这些属性的对象的路径,3)逐层切断更高级别的对象和数组,4)获取一个对象,其中 [elements]->[text]
属性只是第 2 级,5) 将 [text]
属性的所有值收集在一个数组中。
不要尝试使用 foreach
循环、递归函数等尝试访问文本。生成的对象是巨大的。您不会获得这么大的内存或时间来迭代、展平、减少等如此大的多维关联数据数组。
或者,您可以对 PHPWord 库文件进行某些更改,并且在将 Word 文件加载到 PHPWord(样式、段落信息等)时获得的结果对象中不会获得不必要的属性和值。
在 PHPSpreadsheet 中,他们实现了一种仅从 Excel 文件中获取实际数据的方法(去除格式、样式信息等)。另一方面,PHPWord 也声明了 $readDataOnly
属性,但他们就此止步,并且出于某种原因没有实现仅读取实际文本数据的机制。
这是从 docx 文件中检索文本内容的示例代码。
$content = '';
require_once dirname(__FILE__) . '/includes/phpoffice/vendor/autoload.php';
$phpWord = \PhpOffice\PhpWord\IOFactory::load('helloworld.docx');
foreach($phpWord->getSections() as $section) {
foreach($section->getElements() as $element) {
if (method_exists($element, 'getElements')) {
foreach($element->getElements() as $childElement) {
if (method_exists($childElement, 'getText')) {
$content .= $childElement->getText() . ' ';
}
else if (method_exists($childElement, 'getContent')) {
$content .= $childElement->getContent() . ' ';
}
}
}
else if (method_exists($element, 'getText')) {
$content .= $element->getText() . ' ';
}
}
}
echo $content;
我已经在 PHPStorm (IDE) 上安装并设置了 PHP Word。我正在尝试使用 PHP 从下面标题为 'helloWorld.docx' 的 Word 文档中阅读“从昨天学习,为今天而活,为明天而活...”这一行单词.
到目前为止,这是我加载和阅读文档的代码:
<?php
require_once 'PHPWord/bootstrap.php';
$objReader = \PhpOffice\PhpWord\IOFactory::createReader("Word2007");
$phpWord = $objReader->load("helloWorld.docx");
$sections = $phpWord->getSection(0);
echo var_dump($sections);
输出:
/usr/bin/php7.2 /home/wade/PhpstormProjects/getWord/readDoc.php
object(PhpOffice\PhpWord\Element\Section)#21 (21) {
["container":protected]=>
string(7) "Section"
["style":"PhpOffice\PhpWord\Element\Section":private]=>
object(PhpOffice\PhpWord\Style\Section)#22 (32) {
["orientation":"PhpOffice\PhpWord\Style\Section":private]=>
string(8) "portrait"
["paper":"PhpOffice\PhpWord\Style\Section":private]=>
object(PhpOffice\PhpWord\Style\Paper)#14 (8) {
["sizes":"PhpOffice\PhpWord\Style\Paper":private]=>
array(7) {
["A3"]=>
array(3) {
[0]=>
int(297)
[1]=>
int(420)
[2]=>
string(2) "mm"
}
["A4"]=>
array(3) {
[0]=>
int(210)
[1]=>
int(297)
[2]=>
string(2) "mm"
}
["A5"]=>
array(3) {
[0]=>
int(148)
[1]=>
int(210)
[2]=>
string(2) "mm"
}
["B5"]=>
array(3) {
[0]=>
int(176)
[1]=>
int(250)
[2]=>
string(2) "mm"
}
["Folio"]=>
array(3) {
[0]=>
float(8.5)
[1]=>
int(13)
[2]=>
string(2) "in"
}
["Legal"]=>
array(3) {
[0]=>
float(8.5)
[1]=>
int(14)
[2]=>
string(2) "in"
}
["Letter"]=>
array(3) {
[0]=>
float(8.5)
[1]=>
int(11)
[2]=>
string(2) "in"
}
}
["size":"PhpOffice\PhpWord\Style\Paper":private]=>
string(2) "A4"
["width":"PhpOffice\PhpWord\Style\Paper":private]=>
float(11905.511811024)
["height":"PhpOffice\PhpWord\Style\Paper":private]=>
float(16837.795275591)
["styleName":protected]=>
NULL
["index":protected]=>
NULL
["aliases":protected]=>
array(0) {
}
["isAuto":"PhpOffice\PhpWord\Style\AbstractStyle":private]=>
bool(false)
}
["pageSizeW":"PhpOffice\PhpWord\Style\Section":private]=>
string(15) "11905.511811024"
["pageSizeH":"PhpOffice\PhpWord\Style\Section":private]=>
string(15) "16837.795275591"
["marginTop":"PhpOffice\PhpWord\Style\Section":private]=>
string(4) "1440"
["marginLeft":"PhpOffice\PhpWord\Style\Section":private]=>
string(4) "1440"
["marginRight":"PhpOffice\PhpWord\Style\Section":private]=>
string(4) "1440"
["marginBottom":"PhpOffice\PhpWord\Style\Section":private]=>
string(4) "1440"
["gutter":"PhpOffice\PhpWord\Style\Section":private]=>
string(1) "0"
["headerHeight":"PhpOffice\PhpWord\Style\Section":private]=>
string(3) "720"
["footerHeight":"PhpOffice\PhpWord\Style\Section":private]=>
string(3) "720"
["pageNumberingStart":"PhpOffice\PhpWord\Style\Section":private]=>
NULL
["colsNum":"PhpOffice\PhpWord\Style\Section":private]=>
int(1)
["colsSpace":"PhpOffice\PhpWord\Style\Section":private]=>
string(3) "720"
["breakType":"PhpOffice\PhpWord\Style\Section":private]=>
NULL
["lineNumbering":"PhpOffice\PhpWord\Style\Section":private]=>
NULL
["borderTopSize":protected]=>
NULL
["borderTopColor":protected]=>
NULL
["borderTopStyle":protected]=>
NULL
["borderLeftSize":protected]=>
NULL
["borderLeftColor":protected]=>
NULL
["borderLeftStyle":protected]=>
NULL
["borderRightSize":protected]=>
NULL
["borderRightColor":protected]=>
NULL
["borderRightStyle":protected]=>
NULL
["borderBottomSize":protected]=>
NULL
["borderBottomColor":protected]=>
NULL
["borderBottomStyle":protected]=>
NULL
["styleName":protected]=>
NULL
["index":protected]=>
NULL
["aliases":protected]=>
array(0) {
}
["isAuto":"PhpOffice\PhpWord\Style\AbstractStyle":private]=>
bool(false)
}
["headers":"PhpOffice\PhpWord\Element\Section":private]=>
array(0) {
}
["footers":"PhpOffice\PhpWord\Element\Section":private]=>
array(0) {
}
["footnoteProperties":"PhpOffice\PhpWord\Element\Section":private]=>
NULL
["elements":protected]=>
array(4) {
[0]=>
object(PhpOffice\PhpWord\Element\TextRun)#34 (18) {
["container":protected]=>
string(7) "TextRun"
["paragraphStyle":protected]=>
object(PhpOffice\PhpWord\Style\Paragraph)#35 (34) {
["aliases":protected]=>
array(1) {
["line-height"]=>
string(10) "lineHeight"
}
["basedOn":"PhpOffice\PhpWord\Style\Paragraph":private]=>
string(6) "Normal"
["next":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["alignment":"PhpOffice\PhpWord\Style\Paragraph":private]=>
string(0) ""
["indentation":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["spacing":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["lineHeight":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["widowControl":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(true)
["keepNext":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["keepLines":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["pageBreakBefore":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["numStyle":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["numLevel":"PhpOffice\PhpWord\Style\Paragraph":private]=>
int(0)
["tabs":"PhpOffice\PhpWord\Style\Paragraph":private]=>
array(0) {
}
["shading":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["contextualSpacing":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["bidi":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["textAlignment":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["suppressAutoHyphens":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["borderTopSize":protected]=>
NULL
["borderTopColor":protected]=>
NULL
["borderTopStyle":protected]=>
NULL
["borderLeftSize":protected]=>
NULL
["borderLeftColor":protected]=>
NULL
["borderLeftStyle":protected]=>
NULL
["borderRightSize":protected]=>
NULL
["borderRightColor":protected]=>
NULL
["borderRightStyle":protected]=>
NULL
["borderBottomSize":protected]=>
NULL
["borderBottomColor":protected]=>
NULL
["borderBottomStyle":protected]=>
NULL
["styleName":protected]=>
NULL
["index":protected]=>
NULL
["isAuto":"PhpOffice\PhpWord\Style\AbstractStyle":private]=>
bool(false)
}
["elements":protected]=>
array(1) {
[0]=>
object(PhpOffice\PhpWord\Element\Text)#41 (18) {
["text":protected]=>
string(134) ""Learn from yesterday, live for today, hope for tomorrow. The important thing is not to stop questioning." (Albert Einstein)"
["fontStyle":protected]=>
object(PhpOffice\PhpWord\Style\Font)#43 (28) {
["aliases":protected]=>
array(1) {
["line-height"]=>
string(10) "lineHeight"
}
["type":"PhpOffice\PhpWord\Style\Font":private]=>
string(4) "text"
["name":"PhpOffice\PhpWord\Style\Font":private]=>
string(15) "Times New Roman"
["hint":"PhpOffice\PhpWord\Style\Font":private]=>
NULL
["size":"PhpOffice\PhpWord\Style\Font":private]=>
int(20)
["color":"PhpOffice\PhpWord\Style\Font":private]=>
NULL
["bold":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["italic":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["underline":"PhpOffice\PhpWord\Style\Font":private]=>
string(4) "none"
["superScript":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["subScript":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["strikethrough":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["doubleStrikethrough":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["smallCaps":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["allCaps":"PhpOffice\PhpWord\Style\Font":private]=>
bool(false)
["fgColor":"PhpOffice\PhpWord\Style\Font":private]=>
NULL
["scale":"PhpOffice\PhpWord\Style\Font":private]=>
NULL
["spacing":"PhpOffice\PhpWord\Style\Font":private]=>
NULL
["kerning":"PhpOffice\PhpWord\Style\Font":private]=>
NULL
["paragraph":"PhpOffice\PhpWord\Style\Font":private]=>
object(PhpOffice\PhpWord\Style\Paragraph)#42 (34) {
["aliases":protected]=>
array(1) {
["line-height"]=>
string(10) "lineHeight"
}
["basedOn":"PhpOffice\PhpWord\Style\Paragraph":private]=>
string(6) "Normal"
["next":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["alignment":"PhpOffice\PhpWord\Style\Paragraph":private]=>
string(0) ""
["indentation":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["spacing":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["lineHeight":"PhpOffice\PhpWord\Style\Paragraph":private]=>
NULL
["widowControl":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(true)
["keepNext":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["keepLines":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
["pageBreakBefore":"PhpOffice\PhpWord\Style\Paragraph":private]=>
bool(false)
完整输出太长 post 但如果向下滚动某种方式,您可以在此代码段中看到我要查找的字符串
我的主要问题是“有没有办法在不使用 var_dump 和搜索大量输出的情况下找到这个字符串?”
文本信息位于 [text]
个属性中,而这些属性又嵌套在 [elements]
个属性中。只需使用浏览器的 "find something in text" 功能在您在浏览器中获得的对象中搜索它们,即可查看您正在搜索的文本。
这两个属性受到保护,因此您必须将它们设为 public,以便 access/extract。
在 PHPWord 库中定义这些属性的位置:
制作完成后 public
,您可以开始切掉收到的对象的每一层,从而访问 [elements]->[text]
属性仅比 [=37 低一层的对象=].
所以,算法是1)找到这些[text]
属性,2)查看持有这些属性的对象的路径,3)逐层切断更高级别的对象和数组,4)获取一个对象,其中 [elements]->[text]
属性只是第 2 级,5) 将 [text]
属性的所有值收集在一个数组中。
不要尝试使用 foreach
循环、递归函数等尝试访问文本。生成的对象是巨大的。您不会获得这么大的内存或时间来迭代、展平、减少等如此大的多维关联数据数组。
或者,您可以对 PHPWord 库文件进行某些更改,并且在将 Word 文件加载到 PHPWord(样式、段落信息等)时获得的结果对象中不会获得不必要的属性和值。
在 PHPSpreadsheet 中,他们实现了一种仅从 Excel 文件中获取实际数据的方法(去除格式、样式信息等)。另一方面,PHPWord 也声明了 $readDataOnly
属性,但他们就此止步,并且出于某种原因没有实现仅读取实际文本数据的机制。
这是从 docx 文件中检索文本内容的示例代码。
$content = '';
require_once dirname(__FILE__) . '/includes/phpoffice/vendor/autoload.php';
$phpWord = \PhpOffice\PhpWord\IOFactory::load('helloworld.docx');
foreach($phpWord->getSections() as $section) {
foreach($section->getElements() as $element) {
if (method_exists($element, 'getElements')) {
foreach($element->getElements() as $childElement) {
if (method_exists($childElement, 'getText')) {
$content .= $childElement->getText() . ' ';
}
else if (method_exists($childElement, 'getContent')) {
$content .= $childElement->getContent() . ' ';
}
}
}
else if (method_exists($element, 'getText')) {
$content .= $element->getText() . ' ';
}
}
}
echo $content;