使用返回 null 的 curl 解析 XML
Parsing XML using curl returning null
我正在尝试解析 RSS 提要,但我得到的似乎是一个空的 DOM 文档对象。我当前的代码是:
$xml_url = "https://thehockeywriters.com/category/san-jose-sharks/feed/";
$curl = curl_init();
curl_setopt( $curl, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt( $curl, CURLOPT_URL, $xml_url );
$xml = curl_exec( $curl );
curl_close( $curl );
//$xml = iconv('UTF-8', 'UTF-8//IGNORE', $xml);
//$xml = utf8_encode($xml);
$document = new DOMDocument;
$document->loadXML( $xml );
if( ini_get('allow_url_fopen') ) {
echo "allow url fopen? Yes";
}
echo "<br />";
var_dump($document);
$items = $document->getElementsByTagName("item");
foreach ($items as $item) {
$title = $item->getElementsByTagName('title');
echo $title;
}
$url = 'https://thehockeywriters.com/category/san-jose-sharks/feed/';
$xml = simplexml_load_file($url);
foreach ($items as $item) {
$title = $item->title;
echo $title;
}
print_r($xml);
echo "<br />";
var_dump($xml);
echo "<br />hello?";
此代码是根据在堆栈溢出中找到的以下示例中给出的答案和建议,尝试解析相同内容的两次单独尝试 url:
我尝试过或查找过的东西:
1.检查以确保允许allow_url_fopen
2.确保有UTF编码
3. 验证 XML
4. 之前链接的 Stack Overflow 帖子中提供的代码示例
这是我的当前输出 var_dumps
和 echo's
allow url fopen? Yes
object(DOMDocument)#2 (34) { ["doctype"]=> NULL ["implementation"]=> string(22) "(object value omitted)"
["documentElement"]=> NULL ["actualEncoding"]=> NULL ["encoding"]=> NULL
["xmlEncoding"]=> NULL ["standalone"]=> bool(true) ["xmlStandalone"]=> bool(true)
["version"]=> string(3) "1.0" ["xmlVersion"]=> string(3) "1.0"
["strictErrorChecking"]=> bool(true) ["documentURI"]=> NULL ["config"]=> NULL
["formatOutput"]=> bool(false) ["validateOnParse"]=> bool(false) ["resolveExternals"]=> bool(false)
["preserveWhiteSpace"]=> bool(true) ["recover"]=> bool(false) ["substituteEntities"]=> bool(false)
["nodeName"]=> string(9) "#document" ["nodeValue"]=> NULL ["nodeType"]=> int(9) ["parentNode"]=> NULL
["childNodes"]=> string(22) "(object value omitted)" ["firstChild"]=> NULL ["lastChild"]=> NULL
["previousSibling"]=> NULL ["attributes"]=> NULL ["ownerDocument"]=> NULL ["namespaceURI"]=> NULL
["prefix"]=> string(0) "" ["localName"]=> NULL ["baseURI"]=> NULL ["textContent"]=> string(0) "" }
bool(false)
hello?
我对您的代码遇到的唯一问题是没有定义 user-agent 会给我错误 403 来访问提要。
以后可以使用curl_getinfo
提取请求的状态码,确保没有失败,进一步匹配200,也就是OK。
$httpcode = curl_getinfo($curl, CURLINFO_HTTP_CODE);
除此之外,您的循环中还有一些错误。
使用 SimpleXML:
<?php
$url = "https://thehockeywriters.com/category/san-jose-sharks/feed/";
$curl = curl_init();
curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:17.0) Gecko/20100101 Firefox/17.0");
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_URL, $url);
$data = curl_exec($curl);
$httpcode = curl_getinfo($curl, CURLINFO_HTTP_CODE);
curl_close($curl);
if ($httpcode !== 200)
{
echo "Failed to retrieve feed... Error code: $httpcode";
die();
}
$feed = new SimpleXMLElement($data);
// list all titles...
foreach ($feed->channel->item as $item)
{
echo $item->title, "<br>\n";
}
使用 DOMDocument:
<?php
$url = "https://thehockeywriters.com/category/san-jose-sharks/feed/";
$curl = curl_init();
curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:17.0) Gecko/20100101 Firefox/17.0");
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_URL, $url);
$data = curl_exec($curl);
$httpcode = curl_getinfo($curl, CURLINFO_HTTP_CODE);
curl_close($curl);
if ($httpcode !== 200)
{
echo "Failed to retrieve feed... Error code: $httpcode";
die();
}
$xml = new DOMDocument();
$xml->loadXML($data);
// list all titles...
foreach ($xml->getElementsByTagName("item") as $item)
{
foreach ($item->getElementsByTagName("title") as $title)
{
echo $title->nodeValue, "<br>\n";
}
}
如果您只想打印所有项目的 title/description:
foreach ($feed->channel->item as $item)
{
echo $item->title;
echo $item->description;
// uncomment the below line to print only the first entry.
// break;
}
如果您只想要第一个条目,而不使用 foreach:
echo $feed->channel->item[0]->title;
echo $feed->channel->item[0]->description;
将标题和描述保存到一个数组中供以后使用:
$result = [];
foreach ($feed->channel->item as $item)
{
$result[] =
[
'title' => (string)$item->title,
'description' => (string)$item->description
];
// could make a key => value alternatively from the above with
// title as key like this:
// $result[(string)$item->title] = (string)$item->description;
}
使用 MySQLi/PDO 准备语句的 Foreach:
foreach ($feed->channel->item as $item)
{
// MySQLi
$stmt->bind_param('ss', $item->title, $item->description);
$stmt->execute();
// PDO
//$stmt->bindParam(':title', $item->title, PDO::PARAM_STR);
//$stmt->bindParam(':description', $item->description, PDO::PARAM_STR);
//$stmt->execute();
}
我选择了 Prix 的回答来指出用户代理定义,但我想出了另一种执行循环的方法,它避免了嵌套循环并更容易访问其他节点。这是我正在使用的(DOM文档解决方案):
$xml_url = "https://thehockeywriters.com/category/san-jose-sharks/feed/";
$curl = curl_init();
curl_setopt( $curl, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt( $curl, CURLOPT_URL, $xml_url );
curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:17.0) Gecko/20100101 Firefox/17.0");
$xml = curl_exec( $curl );
curl_close( $curl );
$document = new DOMDocument;
$document->loadXML( $xml );
$items = $document->getElementsByTagName("item");
foreach ($items as $item) {
$title = $item->getElementsByTagName('title')->item(0)->nodeValue;
echo $title;
$desc = $item->getElementsByTagName('description')->item(0)->nodeValue;
echo $desc;
}
我正在尝试解析 RSS 提要,但我得到的似乎是一个空的 DOM 文档对象。我当前的代码是:
$xml_url = "https://thehockeywriters.com/category/san-jose-sharks/feed/";
$curl = curl_init();
curl_setopt( $curl, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt( $curl, CURLOPT_URL, $xml_url );
$xml = curl_exec( $curl );
curl_close( $curl );
//$xml = iconv('UTF-8', 'UTF-8//IGNORE', $xml);
//$xml = utf8_encode($xml);
$document = new DOMDocument;
$document->loadXML( $xml );
if( ini_get('allow_url_fopen') ) {
echo "allow url fopen? Yes";
}
echo "<br />";
var_dump($document);
$items = $document->getElementsByTagName("item");
foreach ($items as $item) {
$title = $item->getElementsByTagName('title');
echo $title;
}
$url = 'https://thehockeywriters.com/category/san-jose-sharks/feed/';
$xml = simplexml_load_file($url);
foreach ($items as $item) {
$title = $item->title;
echo $title;
}
print_r($xml);
echo "<br />";
var_dump($xml);
echo "<br />hello?";
此代码是根据在堆栈溢出中找到的以下示例中给出的答案和建议,尝试解析相同内容的两次单独尝试 url:
我尝试过或查找过的东西:
1.检查以确保允许allow_url_fopen
2.确保有UTF编码
3. 验证 XML
4. 之前链接的 Stack Overflow 帖子中提供的代码示例
这是我的当前输出 var_dumps
和 echo's
allow url fopen? Yes
object(DOMDocument)#2 (34) { ["doctype"]=> NULL ["implementation"]=> string(22) "(object value omitted)"
["documentElement"]=> NULL ["actualEncoding"]=> NULL ["encoding"]=> NULL
["xmlEncoding"]=> NULL ["standalone"]=> bool(true) ["xmlStandalone"]=> bool(true)
["version"]=> string(3) "1.0" ["xmlVersion"]=> string(3) "1.0"
["strictErrorChecking"]=> bool(true) ["documentURI"]=> NULL ["config"]=> NULL
["formatOutput"]=> bool(false) ["validateOnParse"]=> bool(false) ["resolveExternals"]=> bool(false)
["preserveWhiteSpace"]=> bool(true) ["recover"]=> bool(false) ["substituteEntities"]=> bool(false)
["nodeName"]=> string(9) "#document" ["nodeValue"]=> NULL ["nodeType"]=> int(9) ["parentNode"]=> NULL
["childNodes"]=> string(22) "(object value omitted)" ["firstChild"]=> NULL ["lastChild"]=> NULL
["previousSibling"]=> NULL ["attributes"]=> NULL ["ownerDocument"]=> NULL ["namespaceURI"]=> NULL
["prefix"]=> string(0) "" ["localName"]=> NULL ["baseURI"]=> NULL ["textContent"]=> string(0) "" }
bool(false)
hello?
我对您的代码遇到的唯一问题是没有定义 user-agent 会给我错误 403 来访问提要。
以后可以使用curl_getinfo
提取请求的状态码,确保没有失败,进一步匹配200,也就是OK。
$httpcode = curl_getinfo($curl, CURLINFO_HTTP_CODE);
除此之外,您的循环中还有一些错误。
使用 SimpleXML:
<?php
$url = "https://thehockeywriters.com/category/san-jose-sharks/feed/";
$curl = curl_init();
curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:17.0) Gecko/20100101 Firefox/17.0");
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_URL, $url);
$data = curl_exec($curl);
$httpcode = curl_getinfo($curl, CURLINFO_HTTP_CODE);
curl_close($curl);
if ($httpcode !== 200)
{
echo "Failed to retrieve feed... Error code: $httpcode";
die();
}
$feed = new SimpleXMLElement($data);
// list all titles...
foreach ($feed->channel->item as $item)
{
echo $item->title, "<br>\n";
}
使用 DOMDocument:
<?php
$url = "https://thehockeywriters.com/category/san-jose-sharks/feed/";
$curl = curl_init();
curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:17.0) Gecko/20100101 Firefox/17.0");
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_URL, $url);
$data = curl_exec($curl);
$httpcode = curl_getinfo($curl, CURLINFO_HTTP_CODE);
curl_close($curl);
if ($httpcode !== 200)
{
echo "Failed to retrieve feed... Error code: $httpcode";
die();
}
$xml = new DOMDocument();
$xml->loadXML($data);
// list all titles...
foreach ($xml->getElementsByTagName("item") as $item)
{
foreach ($item->getElementsByTagName("title") as $title)
{
echo $title->nodeValue, "<br>\n";
}
}
如果您只想打印所有项目的 title/description:
foreach ($feed->channel->item as $item)
{
echo $item->title;
echo $item->description;
// uncomment the below line to print only the first entry.
// break;
}
如果您只想要第一个条目,而不使用 foreach:
echo $feed->channel->item[0]->title;
echo $feed->channel->item[0]->description;
将标题和描述保存到一个数组中供以后使用:
$result = [];
foreach ($feed->channel->item as $item)
{
$result[] =
[
'title' => (string)$item->title,
'description' => (string)$item->description
];
// could make a key => value alternatively from the above with
// title as key like this:
// $result[(string)$item->title] = (string)$item->description;
}
使用 MySQLi/PDO 准备语句的 Foreach:
foreach ($feed->channel->item as $item)
{
// MySQLi
$stmt->bind_param('ss', $item->title, $item->description);
$stmt->execute();
// PDO
//$stmt->bindParam(':title', $item->title, PDO::PARAM_STR);
//$stmt->bindParam(':description', $item->description, PDO::PARAM_STR);
//$stmt->execute();
}
我选择了 Prix 的回答来指出用户代理定义,但我想出了另一种执行循环的方法,它避免了嵌套循环并更容易访问其他节点。这是我正在使用的(DOM文档解决方案):
$xml_url = "https://thehockeywriters.com/category/san-jose-sharks/feed/";
$curl = curl_init();
curl_setopt( $curl, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt( $curl, CURLOPT_URL, $xml_url );
curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:17.0) Gecko/20100101 Firefox/17.0");
$xml = curl_exec( $curl );
curl_close( $curl );
$document = new DOMDocument;
$document->loadXML( $xml );
$items = $document->getElementsByTagName("item");
foreach ($items as $item) {
$title = $item->getElementsByTagName('title')->item(0)->nodeValue;
echo $title;
$desc = $item->getElementsByTagName('description')->item(0)->nodeValue;
echo $desc;
}