简单 html dom 解析器 - 两行合二为一
Simple html dom parser - two rows in one
我想在数据库中插入一个 table,我想转换一个数组中的两行。
谁能帮我吗?
<table>
<tr class="pair"><td>1</td><td>2</td></tr>
<tr class="pair">td<>3</td><td>4</td></tr>
<tr class="unpair"><td>1</td><>2</td></tr>
<tr class="unpair"><td>3</td><td>4</td></tr>
</table>
<?php
require('simple_html_dom.php');
foreach($table->find('tr[class=pair') as $rowpair) {
$rowData = array();
foreach($rowpair->find('td') as $cell) {
$rowData[] = $cell->innertext;
}
foreach($table->find('tr[class=unpair') as $rowunpair) {
$rowData = array();
foreach($rowunpair->find('td') as $cell) {
$rowData[] = $cell->innertext;
}
?>
获得
<table>
<tr class="pair"><td>1</td><td>2</td><td>3</td><td>4</td></tr>
<tr class="unpair"><td>1</td><td>2</td><td>3</td><td>4</td></tr>
</table>
这应该可以按 class 对所有 table 行进行分组。
基本逻辑是循环遍历 table 中的所有行,并确定之前是否看到过 class。如果没有,它将将该行的引用存储为要使用的 'canonical' 行。如果它之前见过 class,它会将其所有子项转移到规范行。
此方法适用于博客中任意数量的 table 和任意一组 class 名称。
<?php
$str = '<table><tr class="pair"><td>1</td><td>2</td></tr><tr class="pair"><td>3</td><td>4</td></tr><tr class="unpair"><td>1</td><td>2</td></tr><tr class="unpair"><td>3</td><td>4</td></tr>
</table>';
$doc = new DOMDocument();
$doc->loadHTML($str);
$tables = $doc->getElementsByTagName('table');
foreach ($tables as $table) {
#For each TR in the table, group into rows
$table_classes = array();
$rows = $table->getElementsByTagName('tr');
$row_list = array();
foreach ($rows as $row) {
array_push($row_list, $row);
}
for($i=0; $i<count($row_list); $i++){
$row = $row_list[$i];
$row_class = $row->getAttribute('class');
if(!array_key_exists($row_class, $table_classes)){
#if this is the for occurrence of that clase, store this row as the original_row
$table_classes[$row_class] = $row;
}else{
$original_row = $table_classes[$row_class];
#Move children over to original row
foreach ($row->childNodes as $child) {
$clone = $child->cloneNode(true);
$original_row->appendChild($clone);
}
#Now delete original
$row->parentNode->removeChild($row);
}
}
}
echo htmlspecialchars($doc->saveXML());
?>
Returns:
<table>
<tr class="pair">
<td>1</td>
<td>2</td>
<td>3</td>
<td>4</td>
</tr>
<tr class="unpair">
<td>1</td>
<td>2</td>
<td>3</td>
<td>4</td>
</tr>
</table>
我想在数据库中插入一个 table,我想转换一个数组中的两行。 谁能帮我吗?
<table>
<tr class="pair"><td>1</td><td>2</td></tr>
<tr class="pair">td<>3</td><td>4</td></tr>
<tr class="unpair"><td>1</td><>2</td></tr>
<tr class="unpair"><td>3</td><td>4</td></tr>
</table>
<?php
require('simple_html_dom.php');
foreach($table->find('tr[class=pair') as $rowpair) {
$rowData = array();
foreach($rowpair->find('td') as $cell) {
$rowData[] = $cell->innertext;
}
foreach($table->find('tr[class=unpair') as $rowunpair) {
$rowData = array();
foreach($rowunpair->find('td') as $cell) {
$rowData[] = $cell->innertext;
}
?>
获得
<table>
<tr class="pair"><td>1</td><td>2</td><td>3</td><td>4</td></tr>
<tr class="unpair"><td>1</td><td>2</td><td>3</td><td>4</td></tr>
</table>
这应该可以按 class 对所有 table 行进行分组。
基本逻辑是循环遍历 table 中的所有行,并确定之前是否看到过 class。如果没有,它将将该行的引用存储为要使用的 'canonical' 行。如果它之前见过 class,它会将其所有子项转移到规范行。
此方法适用于博客中任意数量的 table 和任意一组 class 名称。
<?php
$str = '<table><tr class="pair"><td>1</td><td>2</td></tr><tr class="pair"><td>3</td><td>4</td></tr><tr class="unpair"><td>1</td><td>2</td></tr><tr class="unpair"><td>3</td><td>4</td></tr>
</table>';
$doc = new DOMDocument();
$doc->loadHTML($str);
$tables = $doc->getElementsByTagName('table');
foreach ($tables as $table) {
#For each TR in the table, group into rows
$table_classes = array();
$rows = $table->getElementsByTagName('tr');
$row_list = array();
foreach ($rows as $row) {
array_push($row_list, $row);
}
for($i=0; $i<count($row_list); $i++){
$row = $row_list[$i];
$row_class = $row->getAttribute('class');
if(!array_key_exists($row_class, $table_classes)){
#if this is the for occurrence of that clase, store this row as the original_row
$table_classes[$row_class] = $row;
}else{
$original_row = $table_classes[$row_class];
#Move children over to original row
foreach ($row->childNodes as $child) {
$clone = $child->cloneNode(true);
$original_row->appendChild($clone);
}
#Now delete original
$row->parentNode->removeChild($row);
}
}
}
echo htmlspecialchars($doc->saveXML());
?>
Returns:
<table>
<tr class="pair">
<td>1</td>
<td>2</td>
<td>3</td>
<td>4</td>
</tr>
<tr class="unpair">
<td>1</td>
<td>2</td>
<td>3</td>
<td>4</td>
</tr>
</table>