在 python 中下载批量图像
Download bulk images in python
看完有关如何使用 python 下载图片的视频后,我在视频中输入了代码,这是代码
import pandas as pd
import urllib.request
def url_to_jpg(i, url, file_path):
filename = 'image-{}.jpg'.format(i)
fullpath = '{}{}'.format(file_path, filename)
print(fullpath)
urllib.request.urlretrieve(url, fullpath)
print('{} saved.'.format(filename))
return None
FILENAME = 'Images URLs.csv'
FILE_PATH = 'Images/'
urls = pd.read_csv(FILENAME)
for i, url in enumerate(urls.values):
url_to_jpg(i, url, FILE_PATH)
测试代码时,我在这一行遇到了错误
urllib.request.urlretrieve(url, fullpath)
就是这样
Images/image-0.jpg
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-36-d92ed57d1d8e> in <module>
15
16 for i, url in enumerate(urls.values):
---> 17 url_to_jpg(i, url, FILE_PATH)
<ipython-input-36-d92ed57d1d8e> in url_to_jpg(i, url, file_path)
6 fullpath = '{}{}'.format(file_path, filename)
7 print(fullpath)
----> 8 urllib.request.urlretrieve(url, fullpath)
9 print('{} saved.'.format(filename))
10 return None
C:\ProgramData\Anaconda3\lib\urllib\request.py in urlretrieve(url, filename, reporthook, data)
243 data file as well as the resulting HTTPMessage object.
244 """
--> 245 url_type, path = _splittype(url)
246
247 with contextlib.closing(urlopen(url, data)) as fp:
C:\ProgramData\Anaconda3\lib\urllib\parse.py in _splittype(url)
1006 _typeprog = re.compile('([^/:]+):(.*)', re.DOTALL)
1007
-> 1008 match = _typeprog.match(url)
1009 if match:
1010 scheme, data = match.groups()
TypeError: cannot use a string pattern on a bytes-like object
关于这个错误有什么想法吗?
** 我找到了修改这一行的点的解决方案
url_to_jpg(i, url[0], FILE_PATH)
但似乎有些链接是不允许的,因为我又遇到了另一个错误
HTTPError: HTTP Error 403: Forbidden
我该如何克服这个问题?
** 我尝试按照建议添加 headers(代理),但不知道如何正确完成。在那种情况下如何使用 urlretrieve
?
import urllib.request
hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding': 'none',
'Accept-Language': 'en-US,en;q=0.8',
'Connection': 'keep-alive'}
response = urllib.request.Request("http://www.gunnerkrigg.com//comics/00000001.jpg", headers=hdr)
print(urllib.request.urlopen(response))
urllib.request.urlretrieve(urllib.request.urlopen(response).read(),'oo.jpg')
#urllib.request.urlretrieve("http://www.gunnerkrigg.com//comics/00000001.jpg", "00000001.jpg")
此代码将帮助您克服 HTTPError: HTTP Error 403: Forbidden
这是您的代码的 header 添加版本。
import pandas as pd
import urllib.request
# build an opener
opener = urllib.request.build_opener()
# add a header for opener
opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7')]
# install opener once
urllib.request.install_opener(opener)
def url_to_jpg(i, url, file_path):
filename = 'image-{}.jpg'.format(i)
fullpath = '{}{}'.format(file_path, filename)
print(fullpath)
urllib.request.urlretrieve(url, fullpath)
print('{} saved.'.format(filename))
return None
FILENAME = 'Images URLs.csv'
FILE_PATH = 'Images/'
urls = pd.read_csv(FILENAME)
for i, url in enumerate(urls.values):
url_to_jpg(i, url[0], FILE_PATH)
看完有关如何使用 python 下载图片的视频后,我在视频中输入了代码,这是代码
import pandas as pd
import urllib.request
def url_to_jpg(i, url, file_path):
filename = 'image-{}.jpg'.format(i)
fullpath = '{}{}'.format(file_path, filename)
print(fullpath)
urllib.request.urlretrieve(url, fullpath)
print('{} saved.'.format(filename))
return None
FILENAME = 'Images URLs.csv'
FILE_PATH = 'Images/'
urls = pd.read_csv(FILENAME)
for i, url in enumerate(urls.values):
url_to_jpg(i, url, FILE_PATH)
测试代码时,我在这一行遇到了错误
urllib.request.urlretrieve(url, fullpath)
就是这样
Images/image-0.jpg
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-36-d92ed57d1d8e> in <module>
15
16 for i, url in enumerate(urls.values):
---> 17 url_to_jpg(i, url, FILE_PATH)
<ipython-input-36-d92ed57d1d8e> in url_to_jpg(i, url, file_path)
6 fullpath = '{}{}'.format(file_path, filename)
7 print(fullpath)
----> 8 urllib.request.urlretrieve(url, fullpath)
9 print('{} saved.'.format(filename))
10 return None
C:\ProgramData\Anaconda3\lib\urllib\request.py in urlretrieve(url, filename, reporthook, data)
243 data file as well as the resulting HTTPMessage object.
244 """
--> 245 url_type, path = _splittype(url)
246
247 with contextlib.closing(urlopen(url, data)) as fp:
C:\ProgramData\Anaconda3\lib\urllib\parse.py in _splittype(url)
1006 _typeprog = re.compile('([^/:]+):(.*)', re.DOTALL)
1007
-> 1008 match = _typeprog.match(url)
1009 if match:
1010 scheme, data = match.groups()
TypeError: cannot use a string pattern on a bytes-like object
关于这个错误有什么想法吗?
** 我找到了修改这一行的点的解决方案
url_to_jpg(i, url[0], FILE_PATH)
但似乎有些链接是不允许的,因为我又遇到了另一个错误
HTTPError: HTTP Error 403: Forbidden
我该如何克服这个问题?
** 我尝试按照建议添加 headers(代理),但不知道如何正确完成。在那种情况下如何使用 urlretrieve
?
import urllib.request
hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding': 'none',
'Accept-Language': 'en-US,en;q=0.8',
'Connection': 'keep-alive'}
response = urllib.request.Request("http://www.gunnerkrigg.com//comics/00000001.jpg", headers=hdr)
print(urllib.request.urlopen(response))
urllib.request.urlretrieve(urllib.request.urlopen(response).read(),'oo.jpg')
#urllib.request.urlretrieve("http://www.gunnerkrigg.com//comics/00000001.jpg", "00000001.jpg")
此代码将帮助您克服 HTTPError: HTTP Error 403: Forbidden
这是您的代码的 header 添加版本。
import pandas as pd
import urllib.request
# build an opener
opener = urllib.request.build_opener()
# add a header for opener
opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7')]
# install opener once
urllib.request.install_opener(opener)
def url_to_jpg(i, url, file_path):
filename = 'image-{}.jpg'.format(i)
fullpath = '{}{}'.format(file_path, filename)
print(fullpath)
urllib.request.urlretrieve(url, fullpath)
print('{} saved.'.format(filename))
return None
FILENAME = 'Images URLs.csv'
FILE_PATH = 'Images/'
urls = pd.read_csv(FILENAME)
for i, url in enumerate(urls.values):
url_to_jpg(i, url[0], FILE_PATH)