为什么不能下载文件?
Why is not downloading files?
大家好,我制作了这个小程序来抓取一些 html 信息,但它没有下载一些文件...
代码如下:
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApplication2
{
class Program
{
static void Main(string[] args)
{
using (System.Net.WebClient client = new WebClient())
{
List<string> source = new List<string>();
HtmlWeb web = new HtmlWeb();
string url = "http://www.jornaldenegocios.pt/mercados/bolsa/detalhe/wall_street_cada_vez_mais_perto_de_maximo_historico.html";
HtmlDocument document = web.Load(url);
var head = document.DocumentNode.SelectSingleNode("//head");
var meta = head.SelectNodes("//meta").AsEnumerable();
var link = document.DocumentNode.SelectSingleNode("//head").SelectNodes("//link").AsEnumerable();
var urls = document.DocumentNode.Descendants("img")
.Select(e => e.GetAttributeValue("src", null))
.Where(s => !String.IsNullOrEmpty(s));
var titulo = "";
var descricao = "";
var linkImg = "";
var linkIcon = "";
var linkImgAlt = "";
int length = 0;
Uri myUri = new Uri(url);
string host = myUri.Host;
var fbProperties = (head.SelectNodes("//meta[contains(@property, 'og:')]") ?? Enumerable.Empty<HtmlNode>())
.ToDictionary(n => n.Attributes["property"].Value, n => n.Attributes["content"].Value);
linkIcon = (head.SelectSingleNode("//link[contains(@rel, 'apple-touch-icon')]")?.Attributes["href"]?.Value) ??
(head.SelectSingleNode("//link[contains(@rel, 'icon')]")?.Attributes["href"]?.Value) ??
host + "/favicon.ico";
var title = head.SelectSingleNode("//title")?.InnerText;
if (fbProperties.TryGetValue("og:title", out titulo) == false || titulo == null)
{
titulo = (title ?? host);
}
if (fbProperties.TryGetValue("og:description", out descricao) == false || descricao == null)
{
descricao = ("none");
}
if (fbProperties.TryGetValue("og:image", out linkImg) == false || linkImg == null)
{
linkImg = (linkImgAlt ?? "none");
}
foreach (var node in urls)
{
source.Add(node);
}
foreach (var links in source)
{
length = client.DownloadData(links).Length;
if (length<client.DownloadData(links).Length)
{
linkImgAlt = links;
}
}
Console.WriteLine("");
Console.WriteLine("Titulo:");
Console.WriteLine(titulo);
Console.WriteLine("");
Console.WriteLine("Descriçao:");
Console.WriteLine(descricao);
Console.WriteLine("");
Console.WriteLine("Link da Imagem:");
Console.WriteLine(linkImg);
Console.WriteLine("");
Console.WriteLine("Link do Icon:");
Console.WriteLine(linkIcon);
Console.WriteLine("");
Console.WriteLine("Link da Imagem:");
Console.WriteLine(length);
Console.WriteLine("");
Console.WriteLine("Link da Imagem:");
Console.WriteLine(linkImgAlt);
Console.ReadLine();
}
}
}
}
一切正常,只是这个小部分给我带来了一些问题:
foreach (var node in urls)
{
source.Add(node);
}
foreach (var links in source)
{
length = client.DownloadData(links).Length;
if (length<client.DownloadData(links).Length)
{
linkImgAlt = links;
}
}
这是我通过 运行 程序得到的错误:
'System.Net.WebException' 类型的未处理异常发生在 System.dll
附加信息:找不到文件 'C:\i\closePestana.png'。
打印:http://i.imgur.com/C9JPjtk.png
我认为这不是在下载文件,这就是给我这条错误消息的原因。你能帮我解决这个问题吗?
谢谢。
我没有得到答案...但如果有人遇到我遇到的问题,这里是解决方案。
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApplication2
{
class Program
{
static void Main(string[] args)
{
using (var client = new HttpClient())
{
List<string> source = new List<string>();
HtmlWeb web = new HtmlWeb();
string url = "http://www.jornaldenegocios.pt/mercados/bolsa/detalhe/wall_street_cada_vez_mais_perto_de_maximo_historico.html";
HtmlDocument document = web.Load(url);
Uri myUri = new Uri(url);
string host = myUri.Host;
var head = document.DocumentNode.SelectSingleNode("//head");
var meta = head.SelectNodes("//meta").AsEnumerable();
var link = document.DocumentNode.SelectSingleNode("//head").SelectNodes("//link").AsEnumerable();
var urls = document.DocumentNode.SelectNodes("//img")
.Select(e => e.GetAttributeValue("src", null))
.Where(s => !string.IsNullOrEmpty(s))
.Where(s => !s.StartsWith("//"))
.Select(s => s.StartsWith("http") ? s : myUri.Scheme + "://" + host + s);
var titulo = "";
var descricao = "";
var linkImg = "";
var linkIcon = "";
var linkImgAlt = "";
var length = 0L;
var fbProperties = (head.SelectNodes("//meta[contains(@property, 'og:')]") ?? Enumerable.Empty<HtmlNode>())
.ToDictionary(n => n.Attributes["property"].Value, n => n.Attributes["content"].Value);
linkIcon = (head.SelectSingleNode("//link[contains(@rel, 'apple-touch-icon')]")?.Attributes["href"]?.Value) ??
(head.SelectSingleNode("//link[contains(@rel, 'icon')]")?.Attributes["href"]?.Value) ??
host + "/favicon.ico";
var title = head.SelectSingleNode("//title")?.InnerText;
if (fbProperties.TryGetValue("og:title", out titulo) == false || titulo == null)
{
titulo = (title ?? host);
}
if (fbProperties.TryGetValue("og:description", out descricao) == false || descricao == null)
{
descricao = ("none");
}
if (fbProperties.TryGetValue("og:image", out linkImg) == false || linkImg == null)
{
linkImg = (linkImgAlt ?? "none");
}
foreach (var node in urls)
{
source.Add(node);
}
foreach (var links in source)
{
try
{
var response = client.SendAsync(new HttpRequestMessage
{
Method = HttpMethod.Head,
RequestUri = new Uri(links)
}).Result;
var fileLength = response.Content.Headers.ContentLength;
Console.WriteLine($"{links}: {fileLength} bytes");
if (length < fileLength)
{
linkImgAlt = links;
length = fileLength ?? 0;
}
}
catch (Exception e)
{
Console.WriteLine(e);
}
}
Console.WriteLine("");
Console.WriteLine("Titulo:");
Console.WriteLine(titulo);
Console.WriteLine("");
Console.WriteLine("Descriçao:");
Console.WriteLine(descricao);
Console.WriteLine("");
Console.WriteLine("Link da Imagem:");
Console.WriteLine(linkImg);
Console.WriteLine("");
Console.WriteLine("Link do Icon:");
Console.WriteLine(linkIcon);
Console.WriteLine("");
Console.WriteLine("Link da Imagem:");
Console.WriteLine(length);
Console.WriteLine("");
Console.WriteLine("Link da Imagem (alt):");
Console.WriteLine(linkImgAlt);
Console.ReadLine();
}
}
}
}
大家好,我制作了这个小程序来抓取一些 html 信息,但它没有下载一些文件...
代码如下:
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApplication2
{
class Program
{
static void Main(string[] args)
{
using (System.Net.WebClient client = new WebClient())
{
List<string> source = new List<string>();
HtmlWeb web = new HtmlWeb();
string url = "http://www.jornaldenegocios.pt/mercados/bolsa/detalhe/wall_street_cada_vez_mais_perto_de_maximo_historico.html";
HtmlDocument document = web.Load(url);
var head = document.DocumentNode.SelectSingleNode("//head");
var meta = head.SelectNodes("//meta").AsEnumerable();
var link = document.DocumentNode.SelectSingleNode("//head").SelectNodes("//link").AsEnumerable();
var urls = document.DocumentNode.Descendants("img")
.Select(e => e.GetAttributeValue("src", null))
.Where(s => !String.IsNullOrEmpty(s));
var titulo = "";
var descricao = "";
var linkImg = "";
var linkIcon = "";
var linkImgAlt = "";
int length = 0;
Uri myUri = new Uri(url);
string host = myUri.Host;
var fbProperties = (head.SelectNodes("//meta[contains(@property, 'og:')]") ?? Enumerable.Empty<HtmlNode>())
.ToDictionary(n => n.Attributes["property"].Value, n => n.Attributes["content"].Value);
linkIcon = (head.SelectSingleNode("//link[contains(@rel, 'apple-touch-icon')]")?.Attributes["href"]?.Value) ??
(head.SelectSingleNode("//link[contains(@rel, 'icon')]")?.Attributes["href"]?.Value) ??
host + "/favicon.ico";
var title = head.SelectSingleNode("//title")?.InnerText;
if (fbProperties.TryGetValue("og:title", out titulo) == false || titulo == null)
{
titulo = (title ?? host);
}
if (fbProperties.TryGetValue("og:description", out descricao) == false || descricao == null)
{
descricao = ("none");
}
if (fbProperties.TryGetValue("og:image", out linkImg) == false || linkImg == null)
{
linkImg = (linkImgAlt ?? "none");
}
foreach (var node in urls)
{
source.Add(node);
}
foreach (var links in source)
{
length = client.DownloadData(links).Length;
if (length<client.DownloadData(links).Length)
{
linkImgAlt = links;
}
}
Console.WriteLine("");
Console.WriteLine("Titulo:");
Console.WriteLine(titulo);
Console.WriteLine("");
Console.WriteLine("Descriçao:");
Console.WriteLine(descricao);
Console.WriteLine("");
Console.WriteLine("Link da Imagem:");
Console.WriteLine(linkImg);
Console.WriteLine("");
Console.WriteLine("Link do Icon:");
Console.WriteLine(linkIcon);
Console.WriteLine("");
Console.WriteLine("Link da Imagem:");
Console.WriteLine(length);
Console.WriteLine("");
Console.WriteLine("Link da Imagem:");
Console.WriteLine(linkImgAlt);
Console.ReadLine();
}
}
}
}
一切正常,只是这个小部分给我带来了一些问题:
foreach (var node in urls)
{
source.Add(node);
}
foreach (var links in source)
{
length = client.DownloadData(links).Length;
if (length<client.DownloadData(links).Length)
{
linkImgAlt = links;
}
}
这是我通过 运行 程序得到的错误:
'System.Net.WebException' 类型的未处理异常发生在 System.dll
附加信息:找不到文件 'C:\i\closePestana.png'。
打印:http://i.imgur.com/C9JPjtk.png
我认为这不是在下载文件,这就是给我这条错误消息的原因。你能帮我解决这个问题吗?
谢谢。
我没有得到答案...但如果有人遇到我遇到的问题,这里是解决方案。
using HtmlAgilityPack;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApplication2
{
class Program
{
static void Main(string[] args)
{
using (var client = new HttpClient())
{
List<string> source = new List<string>();
HtmlWeb web = new HtmlWeb();
string url = "http://www.jornaldenegocios.pt/mercados/bolsa/detalhe/wall_street_cada_vez_mais_perto_de_maximo_historico.html";
HtmlDocument document = web.Load(url);
Uri myUri = new Uri(url);
string host = myUri.Host;
var head = document.DocumentNode.SelectSingleNode("//head");
var meta = head.SelectNodes("//meta").AsEnumerable();
var link = document.DocumentNode.SelectSingleNode("//head").SelectNodes("//link").AsEnumerable();
var urls = document.DocumentNode.SelectNodes("//img")
.Select(e => e.GetAttributeValue("src", null))
.Where(s => !string.IsNullOrEmpty(s))
.Where(s => !s.StartsWith("//"))
.Select(s => s.StartsWith("http") ? s : myUri.Scheme + "://" + host + s);
var titulo = "";
var descricao = "";
var linkImg = "";
var linkIcon = "";
var linkImgAlt = "";
var length = 0L;
var fbProperties = (head.SelectNodes("//meta[contains(@property, 'og:')]") ?? Enumerable.Empty<HtmlNode>())
.ToDictionary(n => n.Attributes["property"].Value, n => n.Attributes["content"].Value);
linkIcon = (head.SelectSingleNode("//link[contains(@rel, 'apple-touch-icon')]")?.Attributes["href"]?.Value) ??
(head.SelectSingleNode("//link[contains(@rel, 'icon')]")?.Attributes["href"]?.Value) ??
host + "/favicon.ico";
var title = head.SelectSingleNode("//title")?.InnerText;
if (fbProperties.TryGetValue("og:title", out titulo) == false || titulo == null)
{
titulo = (title ?? host);
}
if (fbProperties.TryGetValue("og:description", out descricao) == false || descricao == null)
{
descricao = ("none");
}
if (fbProperties.TryGetValue("og:image", out linkImg) == false || linkImg == null)
{
linkImg = (linkImgAlt ?? "none");
}
foreach (var node in urls)
{
source.Add(node);
}
foreach (var links in source)
{
try
{
var response = client.SendAsync(new HttpRequestMessage
{
Method = HttpMethod.Head,
RequestUri = new Uri(links)
}).Result;
var fileLength = response.Content.Headers.ContentLength;
Console.WriteLine($"{links}: {fileLength} bytes");
if (length < fileLength)
{
linkImgAlt = links;
length = fileLength ?? 0;
}
}
catch (Exception e)
{
Console.WriteLine(e);
}
}
Console.WriteLine("");
Console.WriteLine("Titulo:");
Console.WriteLine(titulo);
Console.WriteLine("");
Console.WriteLine("Descriçao:");
Console.WriteLine(descricao);
Console.WriteLine("");
Console.WriteLine("Link da Imagem:");
Console.WriteLine(linkImg);
Console.WriteLine("");
Console.WriteLine("Link do Icon:");
Console.WriteLine(linkIcon);
Console.WriteLine("");
Console.WriteLine("Link da Imagem:");
Console.WriteLine(length);
Console.WriteLine("");
Console.WriteLine("Link da Imagem (alt):");
Console.WriteLine(linkImgAlt);
Console.ReadLine();
}
}
}
}