如何使用 php DomDocument 获取某个标签的 html
How to get the html of certain tag using php DomDocument
首先我让它找到元素
$dom= new \DOMDocument();
$dom->loadHTML($html_string);
$only_p = $dom->getElementsByTagName('p')->item(1);
如果我尝试
$only_p->textContent; /* <-- it will only return the text inside the paragraph and even remove all the tags inside of it */
我需要的是
$only_p = $dom->getElementsByTagName('p')->item(1);
$only_p->outerHTML;
那会 return 它周围的 HTML,比如
<p class="something"><a href="link"> this is text </a></p>
而不仅仅是字符串 "this is text"
我就是这样解决的
/**
* Returns the outer HTML of a certain tag.
* You can decide whether the result should be returned as string or inside an array.
*
* @param $html_string
* @param $tagName
* @param string $return_as
* @return array|string
*/
public function getOuterHTMLFromTagName($html_string,$tagName,$return_as='array')
{
//create a new DomDocument based on the first parameter's html
$dom_doc= new \DOMDocument();
$dom_doc->loadHTML($html_string);
//set variables for the result type
$html_results_as_array = array();
$html_results_as_string = "";
// get tags from DocDocument
$elements_in_tag =$dom_doc->getElementsByTagName($tagName);
// loop through found tags
for($a=0; $a < $elements_in_tag->length; $a++)
{
// get tag of current key
$element_in_tag = $dom_doc->getElementsByTagName($tagName)->item($a);
//create a new DomDocument that only contains the tags HTML
$element_doc = new \DOMDocument();
$element_doc->appendChild($element_doc->importNode($element_in_tag,true));
//save the elements HTML in variables
$html_results_as_string .= $element_doc->saveHTML();
array_push($html_results_as_array,$element_doc->saveHTML());
}
//return either as array or string
if($return_as == 'array')
{
return $html_results_as_array;
}
else
{
return $html_results_as_string;
}
}
首先我让它找到元素
$dom= new \DOMDocument();
$dom->loadHTML($html_string);
$only_p = $dom->getElementsByTagName('p')->item(1);
如果我尝试
$only_p->textContent; /* <-- it will only return the text inside the paragraph and even remove all the tags inside of it */
我需要的是
$only_p = $dom->getElementsByTagName('p')->item(1);
$only_p->outerHTML;
那会 return 它周围的 HTML,比如
<p class="something"><a href="link"> this is text </a></p>
而不仅仅是字符串 "this is text"
我就是这样解决的
/**
* Returns the outer HTML of a certain tag.
* You can decide whether the result should be returned as string or inside an array.
*
* @param $html_string
* @param $tagName
* @param string $return_as
* @return array|string
*/
public function getOuterHTMLFromTagName($html_string,$tagName,$return_as='array')
{
//create a new DomDocument based on the first parameter's html
$dom_doc= new \DOMDocument();
$dom_doc->loadHTML($html_string);
//set variables for the result type
$html_results_as_array = array();
$html_results_as_string = "";
// get tags from DocDocument
$elements_in_tag =$dom_doc->getElementsByTagName($tagName);
// loop through found tags
for($a=0; $a < $elements_in_tag->length; $a++)
{
// get tag of current key
$element_in_tag = $dom_doc->getElementsByTagName($tagName)->item($a);
//create a new DomDocument that only contains the tags HTML
$element_doc = new \DOMDocument();
$element_doc->appendChild($element_doc->importNode($element_in_tag,true));
//save the elements HTML in variables
$html_results_as_string .= $element_doc->saveHTML();
array_push($html_results_as_array,$element_doc->saveHTML());
}
//return either as array or string
if($return_as == 'array')
{
return $html_results_as_array;
}
else
{
return $html_results_as_string;
}
}