如何使用Python3.6 tarfile 模块从内存中读取?
How to use Python3.6 tarfile module to read from memory?
我想从 url
下载一个 tarfile 到内存,然后将其所有内容提取到文件夹 dst
。我该怎么办?
以下是我的尝试,但未能实现我的计划。
#!/usr/bin/python3.6
# -*- coding: utf-8 -*-
from pathlib import Path
from io import BytesIO
from urllib.request import Request, urlopen
from urllib.error import URLError
from tarfile import TarFile
def get_url_response( url ):
req = Request( url )
try:
response = urlopen( req )
except URLError as e:
if hasattr( e, 'reason' ):
print( 'We failed to reach a server.' )
print( 'Reason: ', e.reason )
elif hasattr( e, 'code'):
print( 'The server couldn\'t fulfill the request.' )
print( 'Error code: ', e.code )
else:
# everything is fine
return response
url = 'https://dl.opendesktop.org/api/files/download/id/1566630595/s/6cf6f74c4016e9b83f062dbb89092a0dfee862472300cebd0125c7a99463b78f4b912b3aaeb23adde33ea796ca9232decdde45bb65a8605bfd8abd05eaee37af/t/1567158438/c/6cf6f74c4016e9b83f062dbb89092a0dfee862472300cebd0125c7a99463b78f4b912b3aaeb23adde33ea796ca9232decdde45bb65a8605bfd8abd05eaee37af/lt/download/Blue-Maia.tar.xz'
dst = Path().cwd() / 'Tar'
response = get_url_response( url )
with TarFile( BytesIO( response.read() ) ) as tfile:
tfile.extractall( path=dst )
但是,我得到了这个错误:
Traceback (most recent call last):
File "~/test_tar.py", line 31, in <module>
with TarFile( BytesIO( response.read() ) ) as tfile:
File "/usr/lib/python3.6/tarfile.py", line 1434, in __init__
fileobj = bltn_open(name, self._mode)
TypeError: expected str, bytes or os.PathLike object, not _io.BytesIO
我尝试将 BytesIO
对象作为 fileobj
:
传递给 TarFile
with TarFile( fileobj=BytesIO( response.read() ) ) as tfile:
tfile.extractall( path=dst )
但是还是不行:
Traceback (most recent call last):
File "/usr/lib/python3.6/tarfile.py", line 188, in nti
s = nts(s, "ascii", "strict")
File "/usr/lib/python3.6/tarfile.py", line 172, in nts
return s.decode(encoding, errors)
UnicodeDecodeError: 'ascii' codec can't decode byte 0xd2 in position 0: ordinal not in range(128)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.6/tarfile.py", line 2297, in next
tarinfo = self.tarinfo.fromtarfile(self)
File "/usr/lib/python3.6/tarfile.py", line 1093, in fromtarfile
obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
File "/usr/lib/python3.6/tarfile.py", line 1035, in frombuf
chksum = nti(buf[148:156])
File "/usr/lib/python3.6/tarfile.py", line 191, in nti
raise InvalidHeaderError("invalid header")
tarfile.InvalidHeaderError: invalid header
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "~/test_tar.py", line 31, in <module>
with TarFile( fileobj=BytesIO( response.read() ) ) as tfile:
File "/usr/lib/python3.6/tarfile.py", line 1482, in __init__
self.firstmember = self.next()
File "/usr/lib/python3.6/tarfile.py", line 2309, in next
raise ReadError(str(e))
tarfile.ReadError: invalid header
奇怪的是,我设法使用 open() function, but not by instanciating a TarFile 对象使其工作。貌似第二个打开方式设置不正确...
无论如何,这有效:
from _io import BytesIO
import tarfile
with open('Blue-Maia.tar.xz', 'rb') as f:
tar = tarfile.open(fileobj=BytesIO( f.read() ), mode="r:xz")
tar.extractall( path="test" )
tar.close()
您可以添加 try...except...finally
以确保 tar 文件始终关闭。
更新:
在您的代码中:
response = get_url_response( url )
tar = tarfile.open(fileobj=BytesIO( response.read() ), mode="r:xz")
tar.extractall( path="test" )
tar.close()
这种方法非常接近正确:
with TarFile( fileobj=BytesIO( response.read() ) ) as tfile:
tfile.extractall( path=dst )
你应该使用tarfile.open
而不是TarFile
(参见docs),并告诉它你正在读取一个xz文件(mode='r:xz'
):
with tarfile.open( fileobj=BytesIO( response.read() ), mode='r:xz' ) as tfile:
tfile.extractall( path=dst )
但是,您会注意到,这还不够。
根本问题? 您正在从不允许热链接的站点下载。该网站阻止您尝试下载。尝试打印出响应,您会看到一堆垃圾 HTML 而不是 tar.xz 文件。
我想从 url
下载一个 tarfile 到内存,然后将其所有内容提取到文件夹 dst
。我该怎么办?
以下是我的尝试,但未能实现我的计划。
#!/usr/bin/python3.6
# -*- coding: utf-8 -*-
from pathlib import Path
from io import BytesIO
from urllib.request import Request, urlopen
from urllib.error import URLError
from tarfile import TarFile
def get_url_response( url ):
req = Request( url )
try:
response = urlopen( req )
except URLError as e:
if hasattr( e, 'reason' ):
print( 'We failed to reach a server.' )
print( 'Reason: ', e.reason )
elif hasattr( e, 'code'):
print( 'The server couldn\'t fulfill the request.' )
print( 'Error code: ', e.code )
else:
# everything is fine
return response
url = 'https://dl.opendesktop.org/api/files/download/id/1566630595/s/6cf6f74c4016e9b83f062dbb89092a0dfee862472300cebd0125c7a99463b78f4b912b3aaeb23adde33ea796ca9232decdde45bb65a8605bfd8abd05eaee37af/t/1567158438/c/6cf6f74c4016e9b83f062dbb89092a0dfee862472300cebd0125c7a99463b78f4b912b3aaeb23adde33ea796ca9232decdde45bb65a8605bfd8abd05eaee37af/lt/download/Blue-Maia.tar.xz'
dst = Path().cwd() / 'Tar'
response = get_url_response( url )
with TarFile( BytesIO( response.read() ) ) as tfile:
tfile.extractall( path=dst )
但是,我得到了这个错误:
Traceback (most recent call last):
File "~/test_tar.py", line 31, in <module>
with TarFile( BytesIO( response.read() ) ) as tfile:
File "/usr/lib/python3.6/tarfile.py", line 1434, in __init__
fileobj = bltn_open(name, self._mode)
TypeError: expected str, bytes or os.PathLike object, not _io.BytesIO
我尝试将 BytesIO
对象作为 fileobj
:
TarFile
with TarFile( fileobj=BytesIO( response.read() ) ) as tfile:
tfile.extractall( path=dst )
但是还是不行:
Traceback (most recent call last):
File "/usr/lib/python3.6/tarfile.py", line 188, in nti
s = nts(s, "ascii", "strict")
File "/usr/lib/python3.6/tarfile.py", line 172, in nts
return s.decode(encoding, errors)
UnicodeDecodeError: 'ascii' codec can't decode byte 0xd2 in position 0: ordinal not in range(128)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.6/tarfile.py", line 2297, in next
tarinfo = self.tarinfo.fromtarfile(self)
File "/usr/lib/python3.6/tarfile.py", line 1093, in fromtarfile
obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
File "/usr/lib/python3.6/tarfile.py", line 1035, in frombuf
chksum = nti(buf[148:156])
File "/usr/lib/python3.6/tarfile.py", line 191, in nti
raise InvalidHeaderError("invalid header")
tarfile.InvalidHeaderError: invalid header
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "~/test_tar.py", line 31, in <module>
with TarFile( fileobj=BytesIO( response.read() ) ) as tfile:
File "/usr/lib/python3.6/tarfile.py", line 1482, in __init__
self.firstmember = self.next()
File "/usr/lib/python3.6/tarfile.py", line 2309, in next
raise ReadError(str(e))
tarfile.ReadError: invalid header
奇怪的是,我设法使用 open() function, but not by instanciating a TarFile 对象使其工作。貌似第二个打开方式设置不正确...
无论如何,这有效:
from _io import BytesIO
import tarfile
with open('Blue-Maia.tar.xz', 'rb') as f:
tar = tarfile.open(fileobj=BytesIO( f.read() ), mode="r:xz")
tar.extractall( path="test" )
tar.close()
您可以添加 try...except...finally
以确保 tar 文件始终关闭。
更新:
在您的代码中:
response = get_url_response( url )
tar = tarfile.open(fileobj=BytesIO( response.read() ), mode="r:xz")
tar.extractall( path="test" )
tar.close()
这种方法非常接近正确:
with TarFile( fileobj=BytesIO( response.read() ) ) as tfile:
tfile.extractall( path=dst )
你应该使用tarfile.open
而不是TarFile
(参见docs),并告诉它你正在读取一个xz文件(mode='r:xz'
):
with tarfile.open( fileobj=BytesIO( response.read() ), mode='r:xz' ) as tfile:
tfile.extractall( path=dst )
但是,您会注意到,这还不够。
根本问题? 您正在从不允许热链接的站点下载。该网站阻止您尝试下载。尝试打印出响应,您会看到一堆垃圾 HTML 而不是 tar.xz 文件。