UnicodeDecodeError: 'utf-8' codec can't decode byte 0xff in position 0: invalid start byte when I tried to encode('utf-8')
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xff in position 0: invalid start byte when I tried to encode('utf-8')
当我尝试使用 .encode('utf-8')
时,出现 "UnicodeDecodeError: 'utf-8' codec can't decode byte 0xff in position 0: invalid start byte"
。
我正在尝试抓取一些图片,但有一个问题让我很困惑。我使用 urllib.request.urlopen(url).read().decode('utf-8')
将 html 页面转换为获取图像的 url。后来我想救他们。
但是,我把所有关于urlopen
的代码都放到了一个函数里,所以我不想改变它们。然后我想也许我可以使用 encode('utf-8')
将它们转换回来。在那之后,我不知道为什么但引发了异常。
import urllib.request as ur
import os
def getresponse(url):
head = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'}
req = ur.Request(url, None, head)
response = ur.urlopen(req)
result = response.read().decode('utf-8')
return result
def saveimg(urlimg):
img = getresponse(urlimg).encode('utf-8')
file = open('xx.jpg', 'wb')
file.write(img)
file.close()
def downloadimg(url):
os.chdir('/Users/xxxxxxx/Desktop')
os.mkdir('xx')
os.chdir('xx')
saveimg(url)
downloadimg('https://cn.bing.com/th?id=OHR.WaterperryGardens_ZH-CN5767279278_1920x1080.jpg&rf=LaDigue_1920x1080.jpg&pid=hp')
我只想知道这是什么原因
我可以提出一个解决方案 requests
吗?
import os
import requests # pip install requests
def download(url, headers, dest_folder, new_file_name):
if not os.path.exists(dest_folder):
os.makedirs(dest_folder) # create folder if it does not exist
filename = new_file_name.replace(" ", "_") # be careful with file names
file_path = os.path.join(dest_folder, filename)
r = requests.get(url, headers=headers, stream=True)
if r.ok:
print("saving to", os.path.abspath(file_path))
with open(file_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024 * 8):
if chunk:
f.write(chunk)
f.flush()
os.fsync(f.fileno())
else: # HTTP status code 4XX/5XX
print("Download failed: status code {}\n{}".format(r.status_code, r.text))
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'}
url = 'https://cn.bing.com/th?id=OHR.WaterperryGardens_ZH-CN5767279278_1920x1080.jpg&rf=LaDigue_1920x1080.jpg&pid=hp'
dest_folder = '/Users/xxxxxxx/Desktop/yyyy'
new_file_name = 'xx.jpg'
# save file to /Users/xxxxxxx/Desktop/yyyy/xx.jpg
# appropriate folders will be created if they do not exist
download(url, headers, dest_folder, new_file_name)
当我尝试使用 .encode('utf-8')
时,出现 "UnicodeDecodeError: 'utf-8' codec can't decode byte 0xff in position 0: invalid start byte"
。
我正在尝试抓取一些图片,但有一个问题让我很困惑。我使用 urllib.request.urlopen(url).read().decode('utf-8')
将 html 页面转换为获取图像的 url。后来我想救他们。
但是,我把所有关于urlopen
的代码都放到了一个函数里,所以我不想改变它们。然后我想也许我可以使用 encode('utf-8')
将它们转换回来。在那之后,我不知道为什么但引发了异常。
import urllib.request as ur
import os
def getresponse(url):
head = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'}
req = ur.Request(url, None, head)
response = ur.urlopen(req)
result = response.read().decode('utf-8')
return result
def saveimg(urlimg):
img = getresponse(urlimg).encode('utf-8')
file = open('xx.jpg', 'wb')
file.write(img)
file.close()
def downloadimg(url):
os.chdir('/Users/xxxxxxx/Desktop')
os.mkdir('xx')
os.chdir('xx')
saveimg(url)
downloadimg('https://cn.bing.com/th?id=OHR.WaterperryGardens_ZH-CN5767279278_1920x1080.jpg&rf=LaDigue_1920x1080.jpg&pid=hp')
我只想知道这是什么原因
我可以提出一个解决方案 requests
吗?
import os
import requests # pip install requests
def download(url, headers, dest_folder, new_file_name):
if not os.path.exists(dest_folder):
os.makedirs(dest_folder) # create folder if it does not exist
filename = new_file_name.replace(" ", "_") # be careful with file names
file_path = os.path.join(dest_folder, filename)
r = requests.get(url, headers=headers, stream=True)
if r.ok:
print("saving to", os.path.abspath(file_path))
with open(file_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024 * 8):
if chunk:
f.write(chunk)
f.flush()
os.fsync(f.fileno())
else: # HTTP status code 4XX/5XX
print("Download failed: status code {}\n{}".format(r.status_code, r.text))
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'}
url = 'https://cn.bing.com/th?id=OHR.WaterperryGardens_ZH-CN5767279278_1920x1080.jpg&rf=LaDigue_1920x1080.jpg&pid=hp'
dest_folder = '/Users/xxxxxxx/Desktop/yyyy'
new_file_name = 'xx.jpg'
# save file to /Users/xxxxxxx/Desktop/yyyy/xx.jpg
# appropriate folders will be created if they do not exist
download(url, headers, dest_folder, new_file_name)