我正在尝试从 media:content 标签 RSS FEED 中检索图像 URL 并将其存储在 mysql 中,但我已经厌倦了尝试
I am trying to retrieve the image URL from media:content tag RSS FEED and storing it in the mysql, but I am tired of trying
这是 RSS 提要的一部分
<item>
<title>
<![CDATA[
Japan backs coronavirus drug; Moscow keeps lockdown: Live updates
]]>
</title>
<description>
<![CDATA[
<div><img src="https://www.aljazeera.com/mritems/Images/2020/5/9/b738bc043efb400296eff230bcfd444f_18.jpg" style="width: 100%;"><div>Over 275,000 have died worldwide while more than 1.3 million have recovered as countries weigh easing of lockdown rules.</div></div>
]]>
</description>
<link>
https://www.aljazeera.com/news/2020/05/japan-backs-coronavirus-drug-moscow-lockdown-live-updates-200508232257831.html
</link>
<guid isPermaLink="false">592c6747e057269acbb31f383c9c110d</guid>
<dc:creator>
<![CDATA[ AJENews ]]>
</dc:creator>
<pubDate>Sat, 09 May 2020 01:18:50 GMT</pubDate>
<media:content medium="image" url="https://www.aljazeera.com/mritems/Images/2020/5/9/b738bc043efb400296eff230bcfd444f_18.jpg"/>
</item>
此处通过 DOMDocument() 加载 rss 提要内容并定义数组以保存内容并使用 getElementsByTagName
<?php
$rss = new DOMDocument();
$rss->load('https://rss.app/feeds/7e40Pu6EiBz9ourE.xml');
$feed = array();
foreach ($rss->getElementsByTagName('item') as $node) {
$item = array (
'title' => $node->getElementsByTagName('title')->item(0)->nodeValue,
'desc' => $node->getElementsByTagName('description')->item(0)->nodeValue,
'link' => $node->getElementsByTagName('link')->item(0)->nodeValue,
'date' => $node->getElementsByTagName('pubDate')->item(0)->nodeValue,
//'dcUri' => $rss->lookupNamespaceUri('dc'),
//'authors' => $node->getElementsByTagNameNS($item[dcUri], 'creator'),
//'cre' => $item[authors]->item(0)->nodeValue,
);
array_push($feed, $item);
}
在 mysql 数据库中存储数组内容的代码,我已成功保存(标题、des、日期)
$mysqli = new mysqli("localhost","root","","hmsdb");
if (mysqli_connect_errno()) {
printf("Connect failed: %s\n", mysqli_connect_error());
exit();
}
$stmt = $mysqli->prepare("INSERT INTO `rssitems` (`title`, `description`, `link`, `pubDate`) VALUES (?, ?, ?, ?)");
$stmt->bind_param('ssss', $title, $description, $link, $pubDate);
foreach( $feed as $RssItem){
$title = $RssItem["title"];
$description = $RssItem["desc"];
$link = $RssItem["link"];
$pubDate = $RssItem["date"];
//$creator= $RssItem["cre"];
$stmt->execute();
}
这是一个最小的可重现示例,说明如何提取图像 URL。
- 遍历所有节点
- 为所有子节点嵌套循环
- 获取特定类型所需的属性
$rss = new DOMDocument();
$rss->load('https://rss.app/feeds/7e40Pu6EiBz9ourE.xml');
$feed = array();
foreach ($rss->getElementsByTagName('item') as $node) {
foreach($node->childNodes as $childNode) {
if($childNode->tagName == 'media:content') {
var_dump($childNode->getAttribute('url'));
}
}
}
string(90) "https://www.aljazeera.com/mritems/Images/2020/5/10/1024c76c23214805817eff50f9eb45c0_18.jpg"
string(90) "https://www.aljazeera.com/mritems/Images/2020/4/24/d8cba9945a604b88a1dbb413197d0db0_18.jpg"
string(90) "https://www.aljazeera.com/mritems/Images/2020/5/10/bb5f767a4baf4d4098cf766f71a7b347_18.jpg"
string(90) "https://www.aljazeera.com/mritems/Images/2020/5/10/6adcf99eaeda40bea68a519d95ad9fa5_18.jpg"
string(90) "https://www.aljazeera.com/mritems/Images/2020/3/23/4cb97ca92a4543799b3b0c83e8d6df3f_18.jpg"
这是 RSS 提要的一部分
<item>
<title>
<![CDATA[
Japan backs coronavirus drug; Moscow keeps lockdown: Live updates
]]>
</title>
<description>
<![CDATA[
<div><img src="https://www.aljazeera.com/mritems/Images/2020/5/9/b738bc043efb400296eff230bcfd444f_18.jpg" style="width: 100%;"><div>Over 275,000 have died worldwide while more than 1.3 million have recovered as countries weigh easing of lockdown rules.</div></div>
]]>
</description>
<link>
https://www.aljazeera.com/news/2020/05/japan-backs-coronavirus-drug-moscow-lockdown-live-updates-200508232257831.html
</link>
<guid isPermaLink="false">592c6747e057269acbb31f383c9c110d</guid>
<dc:creator>
<![CDATA[ AJENews ]]>
</dc:creator>
<pubDate>Sat, 09 May 2020 01:18:50 GMT</pubDate>
<media:content medium="image" url="https://www.aljazeera.com/mritems/Images/2020/5/9/b738bc043efb400296eff230bcfd444f_18.jpg"/>
</item>
此处通过 DOMDocument() 加载 rss 提要内容并定义数组以保存内容并使用 getElementsByTagName
<?php
$rss = new DOMDocument();
$rss->load('https://rss.app/feeds/7e40Pu6EiBz9ourE.xml');
$feed = array();
foreach ($rss->getElementsByTagName('item') as $node) {
$item = array (
'title' => $node->getElementsByTagName('title')->item(0)->nodeValue,
'desc' => $node->getElementsByTagName('description')->item(0)->nodeValue,
'link' => $node->getElementsByTagName('link')->item(0)->nodeValue,
'date' => $node->getElementsByTagName('pubDate')->item(0)->nodeValue,
//'dcUri' => $rss->lookupNamespaceUri('dc'),
//'authors' => $node->getElementsByTagNameNS($item[dcUri], 'creator'),
//'cre' => $item[authors]->item(0)->nodeValue,
);
array_push($feed, $item);
}
在 mysql 数据库中存储数组内容的代码,我已成功保存(标题、des、日期)
$mysqli = new mysqli("localhost","root","","hmsdb");
if (mysqli_connect_errno()) {
printf("Connect failed: %s\n", mysqli_connect_error());
exit();
}
$stmt = $mysqli->prepare("INSERT INTO `rssitems` (`title`, `description`, `link`, `pubDate`) VALUES (?, ?, ?, ?)");
$stmt->bind_param('ssss', $title, $description, $link, $pubDate);
foreach( $feed as $RssItem){
$title = $RssItem["title"];
$description = $RssItem["desc"];
$link = $RssItem["link"];
$pubDate = $RssItem["date"];
//$creator= $RssItem["cre"];
$stmt->execute();
}
这是一个最小的可重现示例,说明如何提取图像 URL。
- 遍历所有节点
- 为所有子节点嵌套循环
- 获取特定类型所需的属性
$rss = new DOMDocument();
$rss->load('https://rss.app/feeds/7e40Pu6EiBz9ourE.xml');
$feed = array();
foreach ($rss->getElementsByTagName('item') as $node) {
foreach($node->childNodes as $childNode) {
if($childNode->tagName == 'media:content') {
var_dump($childNode->getAttribute('url'));
}
}
}
string(90) "https://www.aljazeera.com/mritems/Images/2020/5/10/1024c76c23214805817eff50f9eb45c0_18.jpg"
string(90) "https://www.aljazeera.com/mritems/Images/2020/4/24/d8cba9945a604b88a1dbb413197d0db0_18.jpg"
string(90) "https://www.aljazeera.com/mritems/Images/2020/5/10/bb5f767a4baf4d4098cf766f71a7b347_18.jpg"
string(90) "https://www.aljazeera.com/mritems/Images/2020/5/10/6adcf99eaeda40bea68a519d95ad9fa5_18.jpg"
string(90) "https://www.aljazeera.com/mritems/Images/2020/3/23/4cb97ca92a4543799b3b0c83e8d6df3f_18.jpg"