当 csv 文件中出现 HTTP 错误时跳过图像 url
Skip image url when HTTP Error appears in csv file
我有一个项目试图从网站抓取图像。我使用包含所有 url 的 csv 文件。有些 url 我没有权限打开(或者它们不存在)。我在 phyton 中收到了一个 Http 错误 403。我只想尝试 csv 文件中的下一个 url 并忽略错误。
import urllib.request
import csv
with open ('urls_01.csv') as images:
images = csv.reader(images)
img_count = 1
for image in images:
urllib.request.urlretrieve(image[0],
'images/image_{0}.jpg'.format(img_count))
img_count += 1
这是错误
Traceback (most recent call last):
File "c:\Users\Heigre\Documents\Phyton\img_test.py", line 8, in <module>
urllib.request.urlretrieve(image[0],
File "C:\Program Files\Python310\lib\urllib\request.py", line 241, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "C:\Program Files\Python310\lib\urllib\request.py", line 216, in urlopen
return opener.open(url, data, timeout)
File "C:\Program Files\Python310\lib\urllib\request.py", line 525, in open
response = meth(req, response)
File "C:\Program Files\Python310\lib\urllib\request.py", line 634, in http_response
response = self.parent.error(
File "C:\Program Files\Python310\lib\urllib\request.py", line 563, in error
return self._call_chain(*args)
File "C:\Program Files\Python310\lib\urllib\request.py", line 496, in _call_chain
result = func(*args)
File "C:\Program Files\Python310\lib\urllib\request.py", line 643, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden
使用 try 块捕获 http 错误:
import urllib.request
import csv
with open ('urls_01.csv') as images:
images = csv.reader(images)
img_count = 1
for image in images:
try:
urllib.request.urlretrieve(image[0],
'images/image_{0}.jpg'.format(img_count))
img_count += 1
except HTTPError:
pass
使用 try 块捕获 http 错误:
import urllib.request
import csv
with open ('urls_01.csv') as images:
images = csv.reader(images)
img_count = 1
for image in images:
try:
urllib.request.urlretrieve(image[0],
'images/image_{0}.jpg'.format(img_count))
img_count += 1
except HTTPError:
pass
不确定是否需要导入,但我根据您的错误输出提供了它。还提供了打印语句的想法,如果您只需要传递特定错误,它可能会有所帮助……
import urllib.request
from urllib.error import HTTPError
import csv
with open ('urls_01.csv') as images:
images = csv.reader(images)
img_count = 1
for image in images:
try:
urllib.request.urlretrieve(
image[0],
'images/image_{0}.jpg'.format(img_count)
)
img_count += 1
except HTTPError as ex:
# This will catch and hide any HTTPError the below print not tested...
# print(ex, ex.code)
pass
我有一个项目试图从网站抓取图像。我使用包含所有 url 的 csv 文件。有些 url 我没有权限打开(或者它们不存在)。我在 phyton 中收到了一个 Http 错误 403。我只想尝试 csv 文件中的下一个 url 并忽略错误。
import urllib.request
import csv
with open ('urls_01.csv') as images:
images = csv.reader(images)
img_count = 1
for image in images:
urllib.request.urlretrieve(image[0],
'images/image_{0}.jpg'.format(img_count))
img_count += 1
这是错误
Traceback (most recent call last):
File "c:\Users\Heigre\Documents\Phyton\img_test.py", line 8, in <module>
urllib.request.urlretrieve(image[0],
File "C:\Program Files\Python310\lib\urllib\request.py", line 241, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "C:\Program Files\Python310\lib\urllib\request.py", line 216, in urlopen
return opener.open(url, data, timeout)
File "C:\Program Files\Python310\lib\urllib\request.py", line 525, in open
response = meth(req, response)
File "C:\Program Files\Python310\lib\urllib\request.py", line 634, in http_response
response = self.parent.error(
File "C:\Program Files\Python310\lib\urllib\request.py", line 563, in error
return self._call_chain(*args)
File "C:\Program Files\Python310\lib\urllib\request.py", line 496, in _call_chain
result = func(*args)
File "C:\Program Files\Python310\lib\urllib\request.py", line 643, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden
使用 try 块捕获 http 错误:
import urllib.request
import csv
with open ('urls_01.csv') as images:
images = csv.reader(images)
img_count = 1
for image in images:
try:
urllib.request.urlretrieve(image[0],
'images/image_{0}.jpg'.format(img_count))
img_count += 1
except HTTPError:
pass
使用 try 块捕获 http 错误:
import urllib.request
import csv
with open ('urls_01.csv') as images:
images = csv.reader(images)
img_count = 1
for image in images:
try:
urllib.request.urlretrieve(image[0],
'images/image_{0}.jpg'.format(img_count))
img_count += 1
except HTTPError:
pass
不确定是否需要导入,但我根据您的错误输出提供了它。还提供了打印语句的想法,如果您只需要传递特定错误,它可能会有所帮助……
import urllib.request
from urllib.error import HTTPError
import csv
with open ('urls_01.csv') as images:
images = csv.reader(images)
img_count = 1
for image in images:
try:
urllib.request.urlretrieve(
image[0],
'images/image_{0}.jpg'.format(img_count)
)
img_count += 1
except HTTPError as ex:
# This will catch and hide any HTTPError the below print not tested...
# print(ex, ex.code)
pass