如何使用 python tarfile 模块将文件附加到 tar 文件?
How to append a file to a tar file use python tarfile module?
我想将文件附加到 tar 文件。例如test.tar.gz
中的文件是a.png, b.png, c.png
。我有一个名为 a.png
的新 png 文件,我想将 a.png
附加到 test.tar.gz
并覆盖 test.tar.gz
中的旧文件 a.png
。我的代码:
import tarfile
a = tarfile.open('test.tar.gz', 'w:gz')
a.add('a.png')
a.close()
然后,test.tar.gz
中的所有文件都消失了,但是 a.png
,如果我将代码更改为:
import tarfile
a = tarfile.open('test.tar.gz', 'a:')# or a:gz
a.add('a.png')
a.close()
程序崩溃,错误日志:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/tarfile.py", line 1678, in open
return func(name, filemode, fileobj, **kwargs)
File "/usr/lib/python2.7/tarfile.py", line 1705, in taropen
return cls(name, mode, fileobj, **kwargs)
File "/usr/lib/python2.7/tarfile.py", line 1588, in __init__
raise ReadError(str(e))
tarfile.ReadError: invalid header
我的错误是什么,我该怎么办?
更新。根据文档,gz
文件无法在 a
模式下打开。如果是这样,在现有存档中添加或更新文件的最佳方式是什么?
Note that 'a:gz'
or 'a:bz2'
is not possible. If mode is not suitable to open a certain (compressed) file for reading, ReadError
is raised. Use mode 'r' to avoid this. If a compression method is not supported, CompressionError
is raised.
所以我猜你应该使用gzip
library解压,在tarfile
中使用a:
模式添加文件,然后使用gzip
再次压缩。
Update. From the documentation, it follows that gz
files cannot be open in a
mode. If so, what is the best way to add or update files in an existing archive?
简答:
- 解压缩/解压缩存档
- 替换/添加文件
- 重新打包/压缩存档
我尝试使用 gzip
's and tarfile
和 file/stream 接口在内存中执行此操作,但未能成功 运行ning - tarball 无论如何都必须重写,因为替换文件显然是不可能的。所以最好只解压整个档案。
上的维基百科
脚本,如果直接 运行,也会尝试生成测试图像 "a.png, b.png, c.png, new.png"(需要 Pillow)和初始存档 "test.tar.gz"(如果它们不存在)。然后它将存档解压缩到临时目录中,用 "new.png" 的内容覆盖 "a.png",并打包所有文件,覆盖原始存档。
以下是单独的文件:
当然脚本的功能也可以运行在交互模式下依次执行,以便有机会查看文件。假设脚本的文件名是 "t.py":
>>> from t import *
>>> make_images()
>>> make_archive()
>>> replace_file()
Workaround
开始吧(精华部分在replace_file()
):
#!python3
#coding=utf-8
"""
Replace a file in a .tar.gz archive via temporary files
"""
import sys #
import pathlib # https://docs.python.org/3/library/pathlib.html
import tempfile # https://docs.python.org/3/library/tempfile.html
import tarfile # https://docs.python.org/3/library/tarfile.html
#import gzip # https://docs.python.org/3/library/gzip.html
gfn = "test.tar.gz"
iext = ".png"
replace = "a"+iext
replacement = "new"+iext
def make_images():
"""Generate 4 test images with Pillow (PIL fork, http://pillow.readthedocs.io/)"""
try:
from PIL import Image, ImageDraw, ImageFont
font = ImageFont.truetype("arial.ttf", 50)
for k,v in {"a":"red", "b":"green", "c":"blue", "new":"orange"}.items():
img = Image.new('RGB', (100, 100), color=v)
d = ImageDraw.Draw(img)
d.text((0, 0), k, fill=(0, 0, 0), font=font)
img.save(k+iext)
except Exception as e:
print(e, file=sys.stderr)
print("Could not create image files", file=sys.stderr)
print("(pip install pillow)", file=sys.stderr)
def make_archive():
"""Create gzip compressed tar file with the three images"""
try:
t = tarfile.open(gfn, 'w:gz')
for f in 'abc':
t.add(f+iext)
t.close()
except Exception as e:
print(e, file=sys.stderr)
print("Could not create archive", file=sys.stderr)
def make_files():
"""Generate sample images and archive"""
mi = False
for f in ['a','b','c','new']:
p = pathlib.Path(f+iext)
if not p.is_file():
mi = True
if mi:
make_images()
if not pathlib.Path(gfn).is_file():
make_archive()
def add_file_not():
"""Might even corrupt the existing file?"""
print("Not possible: tarfile with \"a:gz\" - failing now:", file=sys.stderr)
try:
a = tarfile.open(gfn, 'a:gz') # not possible!
a.add(replacement, arcname=replace)
a.close()
except Exception as e:
print(e, file=sys.stderr)
def replace_file():
"""Extract archive to temporary directory, replace file, replace archive """
print("Workaround", file=sys.stderr)
# tempdir
with tempfile.TemporaryDirectory() as td:
# dirname to Path
tdp = pathlib.Path(td)
# extract archive to temporry directory
with tarfile.open(gfn) as r:
r.extractall(td)
# print(list(tdp.iterdir()), file=sys.stderr)
# replace target in temporary directory
(tdp/replace).write_bytes( pathlib.Path(replacement).read_bytes() )
# replace archive, from all files in tempdir
with tarfile.open(gfn, "w:gz") as w:
for f in tdp.iterdir():
w.add(f, arcname=f.name)
#done
def test():
"""as the name suggests, this just runs some tests ;-)"""
make_files()
#add_file_not()
replace_file()
if __name__ == "__main__":
test()
如果你想添加文件而不是替换它们,显然只需省略替换临时文件的行,并将附加文件复制到临时目录中。确保 pathlib.Path.iterdir
然后 "sees" 将新文件添加到新存档中。
我把它放在了一个更有用的函数中:
def targz_add(targz=None, src=None, dst=None, replace=False):
"""Add <src> file(s) to <targz> file, optionally replacing existing file(s).
Uses temporary directory to modify archive contents.
TODO: complete error handling...
"""
import sys, pathlib, tempfile, tarfile
# ensure targz exists
tp = pathlib.Path(targz)
if not tp.is_file():
sys.stderr.write("Target '{}' does not exist!\n".format(tp) )
return 1
# src path(s)
if not src:
sys.stderr.write("No files given.\n")
return 1
# ensure iterable of string(s)
if not isinstance(src, (tuple, list, set)):
src = [src]
# ensure path(s) exist
srcp = []
for s in src:
sp = pathlib.Path(s)
if not sp.is_file():
sys.stderr.write("Source '{}' does not exist.\n".format(sp) )
else:
srcp.append(sp)
if not srcp:
sys.stderr.write("None of the files exist.\n")
return 1
# dst path(s) (filenames in archive)
dstp = []
if not dst:
# default: use filename only
dstp = [sp.name for sp in srcp]
else:
if callable(dst):
# map dst to each Path, ensure results are Path
dstp = [pathlib.Path(c) for c in map(dst, srcp)]
elif not isinstance(dst, (tuple, list, set)):
# ensure iterable of string(s)
dstp = [pathlib.Path(dst).name]
elif isinstance(dst, (tuple, list, set)):
# convert each string to Path
dstp = [pathlib.Path(d) for d in dst]
else:
# TODO directly support iterable of (src,dst) tuples
sys.stderr.write("Please fix me, I cannot handle the destination(s) '{}'\n".format(dst) )
return 1
if not dstp:
sys.stderr.write("None of the files exist.\n")
return 1
# combine src and dst paths
sdp = zip(srcp, dstp) # iterator of tuples
# temporary directory
with tempfile.TemporaryDirectory() as tempdir:
tempdirp = pathlib.Path(tempdir)
# extract original archive to temporry directory
with tarfile.open(tp) as r:
r.extractall(tempdirp)
# copy source(s) to target in temporary directory, optionally replacing it
for s,d in sdp:
dp = tempdirp/d
# TODO extend to allow flag individually
if not dp.is_file or replace:
sys.stderr.write("Writing '{1}' (from '{0}')\n".format(s,d) )
dp.write_bytes( s.read_bytes() )
else:
sys.stderr.write("Skipping '{1}' (from '{0}')\n".format(s,d) )
# replace original archive with new archive from all files in tempdir
with tarfile.open(tp, "w:gz") as w:
for f in tempdirp.iterdir():
w.add(f, arcname=f.name)
return None
还有几个"tests"例如:
# targz_add("test.tar.gz", "new.png", "a.png")
# targz_add("test.tar.gz", "new.png", "a.png", replace=True)
# targz_add("test.tar.gz", ["new.png"], "a.png")
# targz_add("test.tar.gz", "new.png", ["a.png"], replace=True)
targz_add("test.tar.gz", "new.png", lambda x:str(x).replace("new","a"), replace=True)
shutil
也支持存档,但不添加文件到一个:
https://docs.python.org/3/library/shutil.html#archiving-operations
New in version 3.2.
Changed in version 3.5: Added support for the xztar format.
High-level utilities to create and read compressed and archived files are also provided. They rely on the zipfile and tarfile modules.
这里是添加一个文件,方法是使用io.BytesIO提取到内存,添加并压缩:
import io
import gzip
import tarfile
gfn = "test.tar.gz"
replace = "a.png"
replacement = "new.png"
print("reading {}".format(gfn))
m = io.BytesIO()
with gzip.open(gfn) as g:
m.write(g.read())
print("opening tar in memory")
m.seek(0)
with tarfile.open(fileobj=m, mode="a") as t:
t.list()
print("adding {} as {}".format(replacement, replace))
t.add(replacement, arcname=replace)
t.list()
print("writing {}".format(gfn))
m.seek(0)
with gzip.open(gfn, "wb") as g:
g.write(m.read())
它打印
reading test.tar.gz
opening tar in memory
?rw-rw-rw- 0/0 877 2018-04-11 07:38:57 a.png
?rw-rw-rw- 0/0 827 2018-04-11 07:38:57 b.png
?rw-rw-rw- 0/0 787 2018-04-11 07:38:57 c.png
adding new.png as a.png
?rw-rw-rw- 0/0 877 2018-04-11 07:38:57 a.png
?rw-rw-rw- 0/0 827 2018-04-11 07:38:57 b.png
?rw-rw-rw- 0/0 787 2018-04-11 07:38:57 c.png
-rw-rw-rw- 0/0 2108 2018-04-11 07:38:57 a.png
writing test.tar.gz
欢迎优化!
我想将文件附加到 tar 文件。例如test.tar.gz
中的文件是a.png, b.png, c.png
。我有一个名为 a.png
的新 png 文件,我想将 a.png
附加到 test.tar.gz
并覆盖 test.tar.gz
中的旧文件 a.png
。我的代码:
import tarfile
a = tarfile.open('test.tar.gz', 'w:gz')
a.add('a.png')
a.close()
然后,test.tar.gz
中的所有文件都消失了,但是 a.png
,如果我将代码更改为:
import tarfile
a = tarfile.open('test.tar.gz', 'a:')# or a:gz
a.add('a.png')
a.close()
程序崩溃,错误日志:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/tarfile.py", line 1678, in open
return func(name, filemode, fileobj, **kwargs)
File "/usr/lib/python2.7/tarfile.py", line 1705, in taropen
return cls(name, mode, fileobj, **kwargs)
File "/usr/lib/python2.7/tarfile.py", line 1588, in __init__
raise ReadError(str(e))
tarfile.ReadError: invalid header
我的错误是什么,我该怎么办?
更新。根据文档,gz
文件无法在 a
模式下打开。如果是这样,在现有存档中添加或更新文件的最佳方式是什么?
Note that
'a:gz'
or'a:bz2'
is not possible. If mode is not suitable to open a certain (compressed) file for reading,ReadError
is raised. Use mode 'r' to avoid this. If a compression method is not supported,CompressionError
is raised.
所以我猜你应该使用gzip
library解压,在tarfile
中使用a:
模式添加文件,然后使用gzip
再次压缩。
Update. From the documentation, it follows that
gz
files cannot be open ina
mode. If so, what is the best way to add or update files in an existing archive?
简答:
- 解压缩/解压缩存档
- 替换/添加文件
- 重新打包/压缩存档
我尝试使用 gzip
's and tarfile
和 file/stream 接口在内存中执行此操作,但未能成功 运行ning - tarball 无论如何都必须重写,因为替换文件显然是不可能的。所以最好只解压整个档案。
脚本,如果直接 运行,也会尝试生成测试图像 "a.png, b.png, c.png, new.png"(需要 Pillow)和初始存档 "test.tar.gz"(如果它们不存在)。然后它将存档解压缩到临时目录中,用 "new.png" 的内容覆盖 "a.png",并打包所有文件,覆盖原始存档。 以下是单独的文件:
当然脚本的功能也可以运行在交互模式下依次执行,以便有机会查看文件。假设脚本的文件名是 "t.py":
>>> from t import *
>>> make_images()
>>> make_archive()
>>> replace_file()
Workaround
开始吧(精华部分在replace_file()
):
#!python3
#coding=utf-8
"""
Replace a file in a .tar.gz archive via temporary files
"""
import sys #
import pathlib # https://docs.python.org/3/library/pathlib.html
import tempfile # https://docs.python.org/3/library/tempfile.html
import tarfile # https://docs.python.org/3/library/tarfile.html
#import gzip # https://docs.python.org/3/library/gzip.html
gfn = "test.tar.gz"
iext = ".png"
replace = "a"+iext
replacement = "new"+iext
def make_images():
"""Generate 4 test images with Pillow (PIL fork, http://pillow.readthedocs.io/)"""
try:
from PIL import Image, ImageDraw, ImageFont
font = ImageFont.truetype("arial.ttf", 50)
for k,v in {"a":"red", "b":"green", "c":"blue", "new":"orange"}.items():
img = Image.new('RGB', (100, 100), color=v)
d = ImageDraw.Draw(img)
d.text((0, 0), k, fill=(0, 0, 0), font=font)
img.save(k+iext)
except Exception as e:
print(e, file=sys.stderr)
print("Could not create image files", file=sys.stderr)
print("(pip install pillow)", file=sys.stderr)
def make_archive():
"""Create gzip compressed tar file with the three images"""
try:
t = tarfile.open(gfn, 'w:gz')
for f in 'abc':
t.add(f+iext)
t.close()
except Exception as e:
print(e, file=sys.stderr)
print("Could not create archive", file=sys.stderr)
def make_files():
"""Generate sample images and archive"""
mi = False
for f in ['a','b','c','new']:
p = pathlib.Path(f+iext)
if not p.is_file():
mi = True
if mi:
make_images()
if not pathlib.Path(gfn).is_file():
make_archive()
def add_file_not():
"""Might even corrupt the existing file?"""
print("Not possible: tarfile with \"a:gz\" - failing now:", file=sys.stderr)
try:
a = tarfile.open(gfn, 'a:gz') # not possible!
a.add(replacement, arcname=replace)
a.close()
except Exception as e:
print(e, file=sys.stderr)
def replace_file():
"""Extract archive to temporary directory, replace file, replace archive """
print("Workaround", file=sys.stderr)
# tempdir
with tempfile.TemporaryDirectory() as td:
# dirname to Path
tdp = pathlib.Path(td)
# extract archive to temporry directory
with tarfile.open(gfn) as r:
r.extractall(td)
# print(list(tdp.iterdir()), file=sys.stderr)
# replace target in temporary directory
(tdp/replace).write_bytes( pathlib.Path(replacement).read_bytes() )
# replace archive, from all files in tempdir
with tarfile.open(gfn, "w:gz") as w:
for f in tdp.iterdir():
w.add(f, arcname=f.name)
#done
def test():
"""as the name suggests, this just runs some tests ;-)"""
make_files()
#add_file_not()
replace_file()
if __name__ == "__main__":
test()
如果你想添加文件而不是替换它们,显然只需省略替换临时文件的行,并将附加文件复制到临时目录中。确保 pathlib.Path.iterdir
然后 "sees" 将新文件添加到新存档中。
我把它放在了一个更有用的函数中:
def targz_add(targz=None, src=None, dst=None, replace=False):
"""Add <src> file(s) to <targz> file, optionally replacing existing file(s).
Uses temporary directory to modify archive contents.
TODO: complete error handling...
"""
import sys, pathlib, tempfile, tarfile
# ensure targz exists
tp = pathlib.Path(targz)
if not tp.is_file():
sys.stderr.write("Target '{}' does not exist!\n".format(tp) )
return 1
# src path(s)
if not src:
sys.stderr.write("No files given.\n")
return 1
# ensure iterable of string(s)
if not isinstance(src, (tuple, list, set)):
src = [src]
# ensure path(s) exist
srcp = []
for s in src:
sp = pathlib.Path(s)
if not sp.is_file():
sys.stderr.write("Source '{}' does not exist.\n".format(sp) )
else:
srcp.append(sp)
if not srcp:
sys.stderr.write("None of the files exist.\n")
return 1
# dst path(s) (filenames in archive)
dstp = []
if not dst:
# default: use filename only
dstp = [sp.name for sp in srcp]
else:
if callable(dst):
# map dst to each Path, ensure results are Path
dstp = [pathlib.Path(c) for c in map(dst, srcp)]
elif not isinstance(dst, (tuple, list, set)):
# ensure iterable of string(s)
dstp = [pathlib.Path(dst).name]
elif isinstance(dst, (tuple, list, set)):
# convert each string to Path
dstp = [pathlib.Path(d) for d in dst]
else:
# TODO directly support iterable of (src,dst) tuples
sys.stderr.write("Please fix me, I cannot handle the destination(s) '{}'\n".format(dst) )
return 1
if not dstp:
sys.stderr.write("None of the files exist.\n")
return 1
# combine src and dst paths
sdp = zip(srcp, dstp) # iterator of tuples
# temporary directory
with tempfile.TemporaryDirectory() as tempdir:
tempdirp = pathlib.Path(tempdir)
# extract original archive to temporry directory
with tarfile.open(tp) as r:
r.extractall(tempdirp)
# copy source(s) to target in temporary directory, optionally replacing it
for s,d in sdp:
dp = tempdirp/d
# TODO extend to allow flag individually
if not dp.is_file or replace:
sys.stderr.write("Writing '{1}' (from '{0}')\n".format(s,d) )
dp.write_bytes( s.read_bytes() )
else:
sys.stderr.write("Skipping '{1}' (from '{0}')\n".format(s,d) )
# replace original archive with new archive from all files in tempdir
with tarfile.open(tp, "w:gz") as w:
for f in tempdirp.iterdir():
w.add(f, arcname=f.name)
return None
还有几个"tests"例如:
# targz_add("test.tar.gz", "new.png", "a.png")
# targz_add("test.tar.gz", "new.png", "a.png", replace=True)
# targz_add("test.tar.gz", ["new.png"], "a.png")
# targz_add("test.tar.gz", "new.png", ["a.png"], replace=True)
targz_add("test.tar.gz", "new.png", lambda x:str(x).replace("new","a"), replace=True)
shutil
也支持存档,但不添加文件到一个:
https://docs.python.org/3/library/shutil.html#archiving-operations
New in version 3.2.
Changed in version 3.5: Added support for the xztar format.
High-level utilities to create and read compressed and archived files are also provided. They rely on the zipfile and tarfile modules.
这里是添加一个文件,方法是使用io.BytesIO提取到内存,添加并压缩:
import io
import gzip
import tarfile
gfn = "test.tar.gz"
replace = "a.png"
replacement = "new.png"
print("reading {}".format(gfn))
m = io.BytesIO()
with gzip.open(gfn) as g:
m.write(g.read())
print("opening tar in memory")
m.seek(0)
with tarfile.open(fileobj=m, mode="a") as t:
t.list()
print("adding {} as {}".format(replacement, replace))
t.add(replacement, arcname=replace)
t.list()
print("writing {}".format(gfn))
m.seek(0)
with gzip.open(gfn, "wb") as g:
g.write(m.read())
它打印
reading test.tar.gz
opening tar in memory
?rw-rw-rw- 0/0 877 2018-04-11 07:38:57 a.png
?rw-rw-rw- 0/0 827 2018-04-11 07:38:57 b.png
?rw-rw-rw- 0/0 787 2018-04-11 07:38:57 c.png
adding new.png as a.png
?rw-rw-rw- 0/0 877 2018-04-11 07:38:57 a.png
?rw-rw-rw- 0/0 827 2018-04-11 07:38:57 b.png
?rw-rw-rw- 0/0 787 2018-04-11 07:38:57 c.png
-rw-rw-rw- 0/0 2108 2018-04-11 07:38:57 a.png
writing test.tar.gz
欢迎优化!