抓取文件以转换为 json

Scraping file to convert to json

我没有像预期的那样从下半部分得到任何输出。我可以抓取顶部 table 的数据,但我也在尝试抓取底部 table 的数据并将它们编码到 json 中。我需要抓取的列是

1.周约会回家注意事项

<?php

libxml_use_internal_errors(true);

$doc = new DOMDocument();
$doc->loadHTML(file_get_contents('https://www.leagueleader.net/sharedreport.php?operatorid=98&code=bc155b01-7492-412d-aa75-3c1e357248f1'));
$doc->strictErrorChecking = false;

$pre = [];
$keys = ['team', 'div', 'team-site-name', 'site-address', 'site-phone'];
$keys2 = ['week', 'date', 'home', 'away', 'at', 'notes'];
foreach ($doc->getElementsByTagName('table') as $k => $table) {

    if (strpos($table->getAttribute('class'), 'report') === false) {
        continue;
    }
    foreach ($table->getElementsByTagName('tr') as $i => $tr) {
        if ($tr->parentNode->nodeName === 'thead') continue; // skip headers 
        $row_values = [];
        foreach ($tr->childNodes as $td) {
            $text = trim($td->nodeValue);
            if ($text === '') continue;
            $row_values[] = $text;
        }

        if($k == 1 ){


            $row_values = array_combine($keys, $row_values);

        }else   if($k == 2 ){
            unset($row_values[1]);
        $row_values = array_combine($keys2, $row_values);

        }
          $pre[$row_values['name']][] = $row_values;

    }

}
$combined = [];
foreach($pre as $week => $row){
    $combined[$name] = [
        "week"=> $week,
        "team"=> $row[0]['team'],
        "div"=> $row[0]['div'],
        "team-site-name" => $row[0]['team-site-name'],
        "site-address" => $row[0]['site-address'],
        "site-phone" => $row[0]['site-phone'],

        //"week" => $row[1]['week'],
        "date" => $row[1]['date'],
        "home" => $row[1]['home'],
        "away" => $row[1]['away'],
        "at" => $row[1]['at'],
        "notes" => $row[1]['notes']
    ];
}

echo '<pre>'.json_encode($combined, JSON_PRETTY_PRINT).'</pre>';
?>

这是输出

{  
    "": {  
        "week": "",  
        "team": "1",  
        "div": "A",  
        "team-site-name": "Team 01Freer Bar",  
        "site-address": "\u00a07355 Michigan Ave Detroit, MI 48210",  
        "site-phone": "\u00a03138993699",  
        "date": null,  
        "home": null,  
        "away": null,  
        "at": null,  
        "notes": null  
    }  
}  

为了从第二个 table 中获取匹配的数据,我已将处理更改为使用 XPath。这从带有 class='report' 的第二个 table 的正文中提取 <tr> 标签(使用 //table[@class='report'][2]/tbody/tr)。

所以这将 return table 正文中的所有行。然后提取所有 <td> 元素并挑出行中的详细信息。如果存在 week/date,它只会覆盖当前数据,如果存在匹配详细信息,它会在输出中创建一行...

$xpath = new DOMXPath($doc);
$reportRow = $xpath->query("//table[@class='report'][2]/tbody/tr");
$matches = [];
$week = '';
$date = '';
foreach ($reportRow as $row) {
    $cells = $row->getElementsByTagName("td");
    // Set week and date if present in the current row
    $week = trim($cells[0]->textContent)?:$week;
    $date = trim($cells[1]->textContent)?:$date;
    // Extract the other details
    $teamHome = trim($cells[2]->textContent);
    $teamAway = trim($cells[3]->textContent);
    $at = trim($cells[4]->textContent);
    $notes = trim($cells[5]->textContent);

    // If there are some match details, the store them
    if ( !empty($teamHome) )    {
        $matches[] = ["week" => $week, "date" => $date,
            "teamHome" =>$teamHome, "teamAway" =>$teamAway,
            "at" => $at, "notes" => $notes
        ];
    }
}
print_r($matches);

这给...

Array
(
    [0] => Array
        (
            [week] => 1
            [date] => 09/10/2019
            [teamHome] => Team 01
            [teamAway] => BYE
            [at] => BYE
            [notes] => 
        )