如何使用 PHP 从 class 名称的 HTML 中获取元素和子元素?
How can I get elements and child elements from an HTML by class name using PHP?
我有一个 HTML 文件,其中的曲目在列表中。我想为每个轨道创建一个 PHP 对象并将所有对象保存到一个 PHP-Array.
HTML DOM 在我的 test.html 文件中:
<ul>
<li class="track">
<span id="primary-info">
<span class="interpret">Lorem ipsum</span>
<span class="title">dolor sit amet</span>
</span>
<span class="secondary-info">
<span class="playtime">6:00</span>
<span class="label">consetetur</span>
</span>
</li>
<li class="track">
<span id="primary-info">
<span class="interpret">sed diam</span>
<span class="title">nonumy eirmod</span>
</span>
<span class="secondary-info">
<span class="playtime">7:00</span>
<span class="label">invidunt</span>
</span>
</li>
</ul>
我的PHP代码:
<?php
$lTracklistArr = [];
// get the html
$HTML = file_get_contents("http://localhost/test.html");
// load the dom
$lDoc = new DOMDocument();
$lDoc->loadHTML($HTML);
// create XPath obj
$XPath = new DOMXPath($lDoc);
// get all tracks
$lTracks = $XPath->query("//*[@class='track']");
$i = 0;
while($lTracks->item($i))
{
// How can I get the values from the sub-elements from the DOM?
$lInterpret = $lTracks->item($i)-> ?
$lTitle = $lTracks->item($i)-> ?
$lPlaytime = $lTracks->item($i)-> ?
$lLabel = $lTracks->item($i)-> ?
$lTracklistArr[] = new Track($lInterpret, $lTitle, $lPlaytime, $lLabel);
$i++;
}
// show tracklist
print_r($lTracklistArr);
// PHP class about one track
Class Track
{
var $m_Interpret;
var $m_Title;
var $m_Playtime;
var $m_Label;
public function __construct($pInterpret, $pTitle, $pPlaytime, $pLabel)
{
$m_Interpret = $pInterpret;
$m_Title = $pTitle;
$m_Playtime = $pPlaytime;
$m_Label = $pLabel;
}
}
?>
拿到曲目没问题。但是我无法通过 class 名称从子元素中获取值。
注意:曲目中 DOM 的顺序可以更改。需要通过class名称获取元素。
您可以使用 SimpleXML 执行此操作:
<?php
$lTracklistArr = [];
// get the html
$HTML = file_get_contents("http://localhost/test.html");
$classes = ["interpret", "title", "playtime", "label"];
$data = simplexml_load_string($HTML);
foreach ($data->li as $e) {
$data = [];
$attr = (array) $e->attributes();
if ( !isset($attr["@attributes"]["class"])
|| ("track" !== $attr["@attributes"]["class"])
) {
continue;
}
foreach ($e->span as $e2) {
foreach ($e2->span as $e3) {
$attr = (array) $e3->attributes();
if (!isset($attr["@attributes"]["class"])) {
continue;
}
$class = $attr["@attributes"]["class"];
if (!in_array($class, $classes)) {
continue;
}
$data[$class] = (string) $e3;
}
}
$lTracklistArr[] = new Track($data["interpret"], $data["title"], $data["playtime"], $data["label"]);
}
// show tracklist
var_dump($lTracklistArr);
// PHP class about one track
Class Track
{
var $m_Interpret;
var $m_Title;
var $m_Playtime;
var $m_Label;
public function __construct($pInterpret, $pTitle, $pPlaytime, $pLabel)
{
$this->m_Interpret = $pInterpret;
$this->m_Title = $pTitle;
$this->m_Playtime = $pPlaytime;
$this->m_Label = $pLabel;
}
}
当我将每个轨道的 DOMElement 转换为 html 并将 html 再次转换为 DOMXPath
:
时,我可以对每个轨道使用 xpath
$lTracklistArr = [];
// get the html
$HTML = file_get_contents("http://localhost/test.html");
$XPath = GetXPathByHTML($HTML);
// get all tracks
$lTracks = $XPath->query("//*[@class='track']");
$i = 0;
while($lTracks->item($i))
{
//save DOMElement of the Track as HTML and Convert it back into DOMXPath
$XPathTrack = GetXPathByHTML($lTracks->item($i)->ownerDocument->saveHTML( $lTracks->item($i) ));
// How can I get the values from the sub-elements from the DOM?
$lInterpret = $XPathTrack->query("//*[@class='interpret']")->item(0)->nodeValue;
$lTitle = $XPathTrack->query("//*[@class='title']")->item(0)->nodeValue;
$lPlaytime = $XPathTrack->query("//*[@class='playtime']")->item(0)->nodeValue;
$lLabel = $XPathTrack->query("//*[@class='label']")->item(0)->nodeValue;
$lTracklistArr[] = new Track($lInterpret, $lTitle, $lPlaytime, $lLabel);
$i++;
}
function GetXPathByHTML($pHTML)
{
// load the dom
$lDoc = new DOMDocument();
libxml_use_internal_errors(true); // suppress warnings
$lDoc->loadHTML($pHTML);
// create XPath obj
return new DOMXPath($lDoc);
}
这对我有用。
A print_r($lTracklistArr)
正确显示结果:
Array ( [0] => Track Object ( [m_Interpret] => Lorem ipsum [m_Title] => dolor sit amet [m_Playtime] => 6:00 [m_Label] => consetetur ) [1] => Track Object ( [m_Interpret] => sed diam [m_Title] => nonumy eirmod [m_Playtime] => 7:00 [m_Label] => invidunt ) )
我有一个 HTML 文件,其中的曲目在列表中。我想为每个轨道创建一个 PHP 对象并将所有对象保存到一个 PHP-Array.
HTML DOM 在我的 test.html 文件中:
<ul>
<li class="track">
<span id="primary-info">
<span class="interpret">Lorem ipsum</span>
<span class="title">dolor sit amet</span>
</span>
<span class="secondary-info">
<span class="playtime">6:00</span>
<span class="label">consetetur</span>
</span>
</li>
<li class="track">
<span id="primary-info">
<span class="interpret">sed diam</span>
<span class="title">nonumy eirmod</span>
</span>
<span class="secondary-info">
<span class="playtime">7:00</span>
<span class="label">invidunt</span>
</span>
</li>
</ul>
我的PHP代码:
<?php
$lTracklistArr = [];
// get the html
$HTML = file_get_contents("http://localhost/test.html");
// load the dom
$lDoc = new DOMDocument();
$lDoc->loadHTML($HTML);
// create XPath obj
$XPath = new DOMXPath($lDoc);
// get all tracks
$lTracks = $XPath->query("//*[@class='track']");
$i = 0;
while($lTracks->item($i))
{
// How can I get the values from the sub-elements from the DOM?
$lInterpret = $lTracks->item($i)-> ?
$lTitle = $lTracks->item($i)-> ?
$lPlaytime = $lTracks->item($i)-> ?
$lLabel = $lTracks->item($i)-> ?
$lTracklistArr[] = new Track($lInterpret, $lTitle, $lPlaytime, $lLabel);
$i++;
}
// show tracklist
print_r($lTracklistArr);
// PHP class about one track
Class Track
{
var $m_Interpret;
var $m_Title;
var $m_Playtime;
var $m_Label;
public function __construct($pInterpret, $pTitle, $pPlaytime, $pLabel)
{
$m_Interpret = $pInterpret;
$m_Title = $pTitle;
$m_Playtime = $pPlaytime;
$m_Label = $pLabel;
}
}
?>
拿到曲目没问题。但是我无法通过 class 名称从子元素中获取值。
注意:曲目中 DOM 的顺序可以更改。需要通过class名称获取元素。
您可以使用 SimpleXML 执行此操作:
<?php
$lTracklistArr = [];
// get the html
$HTML = file_get_contents("http://localhost/test.html");
$classes = ["interpret", "title", "playtime", "label"];
$data = simplexml_load_string($HTML);
foreach ($data->li as $e) {
$data = [];
$attr = (array) $e->attributes();
if ( !isset($attr["@attributes"]["class"])
|| ("track" !== $attr["@attributes"]["class"])
) {
continue;
}
foreach ($e->span as $e2) {
foreach ($e2->span as $e3) {
$attr = (array) $e3->attributes();
if (!isset($attr["@attributes"]["class"])) {
continue;
}
$class = $attr["@attributes"]["class"];
if (!in_array($class, $classes)) {
continue;
}
$data[$class] = (string) $e3;
}
}
$lTracklistArr[] = new Track($data["interpret"], $data["title"], $data["playtime"], $data["label"]);
}
// show tracklist
var_dump($lTracklistArr);
// PHP class about one track
Class Track
{
var $m_Interpret;
var $m_Title;
var $m_Playtime;
var $m_Label;
public function __construct($pInterpret, $pTitle, $pPlaytime, $pLabel)
{
$this->m_Interpret = $pInterpret;
$this->m_Title = $pTitle;
$this->m_Playtime = $pPlaytime;
$this->m_Label = $pLabel;
}
}
当我将每个轨道的 DOMElement 转换为 html 并将 html 再次转换为 DOMXPath
:
$lTracklistArr = [];
// get the html
$HTML = file_get_contents("http://localhost/test.html");
$XPath = GetXPathByHTML($HTML);
// get all tracks
$lTracks = $XPath->query("//*[@class='track']");
$i = 0;
while($lTracks->item($i))
{
//save DOMElement of the Track as HTML and Convert it back into DOMXPath
$XPathTrack = GetXPathByHTML($lTracks->item($i)->ownerDocument->saveHTML( $lTracks->item($i) ));
// How can I get the values from the sub-elements from the DOM?
$lInterpret = $XPathTrack->query("//*[@class='interpret']")->item(0)->nodeValue;
$lTitle = $XPathTrack->query("//*[@class='title']")->item(0)->nodeValue;
$lPlaytime = $XPathTrack->query("//*[@class='playtime']")->item(0)->nodeValue;
$lLabel = $XPathTrack->query("//*[@class='label']")->item(0)->nodeValue;
$lTracklistArr[] = new Track($lInterpret, $lTitle, $lPlaytime, $lLabel);
$i++;
}
function GetXPathByHTML($pHTML)
{
// load the dom
$lDoc = new DOMDocument();
libxml_use_internal_errors(true); // suppress warnings
$lDoc->loadHTML($pHTML);
// create XPath obj
return new DOMXPath($lDoc);
}
这对我有用。
A print_r($lTracklistArr)
正确显示结果:
Array ( [0] => Track Object ( [m_Interpret] => Lorem ipsum [m_Title] => dolor sit amet [m_Playtime] => 6:00 [m_Label] => consetetur ) [1] => Track Object ( [m_Interpret] => sed diam [m_Title] => nonumy eirmod [m_Playtime] => 7:00 [m_Label] => invidunt ) )