无法打开 zips 和 gz 中的文件
Unable to open files within zips and gz
我正在尝试读取 zip 中的文件,但我得到了
"IOError: [Errno 2] No such file or directory: "
我怀疑它与 os.path.join 有关。如果我添加 "zname" 的打印语句,则不包括从根目录开始的完整路径。
我创建了一个类似的功能,它非常适合未压缩的文件,但我对如何在 zip 文件中工作感到困惑。
任何帮助将不胜感激。我是 python 的新手,所以如果我遗漏了一些明显的东西,请原谅我。
这是代码和错误消息:
def re_zip():
zcount = []
zpattern = ('.zip','.ZIP')
for root, directories, filenames in os.walk(path):
for filename in filenames:
if filename.endswith(zpattern):
zj = os.path.join(root,filename)
zf = zipfile.ZipFile(zj)
for zname in zf.namelist():
if zname.endswith(ext):
text = open(zname, 'r')
hits = 0
for line in text:
if re.match(regex, line):
hits = hits + 1
zcount.append(hits)
print (zname + " , " + str(hits))
output.write(str(hits) + " , " + zname + "\n")
text.close()
return(sum(zcount))
Traceback (most recent call last):
File "re_count.py", line 80, in <module>
total = re_match() + re_zip() + re_tar()
File "re_count.py", line 45, in re_zip
text = open(zname, 'r')
IOError: [Errno 2] No such file or directory: 'within_zip/folder/myfile.xml'
re_zip() 已通过将 text = open(zname, 'r') 更改为 text = zf.open(zname, 'r')
来修复
现在我正尝试对 tar gz 文件执行相同的操作。 tar 或 tar.gz 我得到同样的错误。我相信 tar 模块应该同时处理这两个问题。
def re_tar():
tcount = []
tars = ('tar','gz','tgz','TAR','GZ')
for root, directories, filenames in os.walk(path):
for filename in filenames:
if filename.endswith(tars):
tj = os.path.join(root,filename)
tf = tarfile.open(tj)
for tarinfo in tf.getmembers():
tname = tarinfo.name
if tname.endswith(ext):
text = tf.open(tname, 'r')
hits = 0
for line in text:
if re.match(regex, line):
hits = hits + 1
tcount.append(hits)
print (tname + " , " + str(hits))
output.write(str(hits) + " , " + tname + "\n")
text.close()
tf.close()
return(sum(tcount))
File "re_count_v2.py", line 68, in re_tar
text = tf.open(tname, 'r')
File "C:\Users\username\AppData\Local\Continuum\Anaconda2\lib\tarfile.py", line 1673, in open
return func(name, "r", fileobj, **kwargs)
File "C:\Users\username\AppData\Local\Continuum\Anaconda2\lib\tarfile.py", line 1738, in gzopen
fileobj = gzip.GzipFile(name, mode, compresslevel, fileobj)
File "C:\Users\username\AppData\Local\Continuum\Anaconda2\lib\gzip.py", line 94, in __init__
fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
IOError: [Errno 2] No such file or directory: 'httpopenaccessikuedutr8080oai_.scol11413101_1.xml'
与 os.listdir()
一样,namelist()
方法 returns 只有文件名,没有根目录。您不能使用通常的 open()
函数,而是使用 zipfile.open()
方法,如下所示:
text = zf.open(zname)
我正在尝试读取 zip 中的文件,但我得到了 "IOError: [Errno 2] No such file or directory: " 我怀疑它与 os.path.join 有关。如果我添加 "zname" 的打印语句,则不包括从根目录开始的完整路径。 我创建了一个类似的功能,它非常适合未压缩的文件,但我对如何在 zip 文件中工作感到困惑。 任何帮助将不胜感激。我是 python 的新手,所以如果我遗漏了一些明显的东西,请原谅我。
这是代码和错误消息:
def re_zip():
zcount = []
zpattern = ('.zip','.ZIP')
for root, directories, filenames in os.walk(path):
for filename in filenames:
if filename.endswith(zpattern):
zj = os.path.join(root,filename)
zf = zipfile.ZipFile(zj)
for zname in zf.namelist():
if zname.endswith(ext):
text = open(zname, 'r')
hits = 0
for line in text:
if re.match(regex, line):
hits = hits + 1
zcount.append(hits)
print (zname + " , " + str(hits))
output.write(str(hits) + " , " + zname + "\n")
text.close()
return(sum(zcount))
Traceback (most recent call last):
File "re_count.py", line 80, in <module>
total = re_match() + re_zip() + re_tar()
File "re_count.py", line 45, in re_zip
text = open(zname, 'r')
IOError: [Errno 2] No such file or directory: 'within_zip/folder/myfile.xml'
re_zip() 已通过将 text = open(zname, 'r') 更改为 text = zf.open(zname, 'r')
来修复现在我正尝试对 tar gz 文件执行相同的操作。 tar 或 tar.gz 我得到同样的错误。我相信 tar 模块应该同时处理这两个问题。
def re_tar():
tcount = []
tars = ('tar','gz','tgz','TAR','GZ')
for root, directories, filenames in os.walk(path):
for filename in filenames:
if filename.endswith(tars):
tj = os.path.join(root,filename)
tf = tarfile.open(tj)
for tarinfo in tf.getmembers():
tname = tarinfo.name
if tname.endswith(ext):
text = tf.open(tname, 'r')
hits = 0
for line in text:
if re.match(regex, line):
hits = hits + 1
tcount.append(hits)
print (tname + " , " + str(hits))
output.write(str(hits) + " , " + tname + "\n")
text.close()
tf.close()
return(sum(tcount))
File "re_count_v2.py", line 68, in re_tar
text = tf.open(tname, 'r')
File "C:\Users\username\AppData\Local\Continuum\Anaconda2\lib\tarfile.py", line 1673, in open
return func(name, "r", fileobj, **kwargs)
File "C:\Users\username\AppData\Local\Continuum\Anaconda2\lib\tarfile.py", line 1738, in gzopen
fileobj = gzip.GzipFile(name, mode, compresslevel, fileobj)
File "C:\Users\username\AppData\Local\Continuum\Anaconda2\lib\gzip.py", line 94, in __init__
fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
IOError: [Errno 2] No such file or directory: 'httpopenaccessikuedutr8080oai_.scol11413101_1.xml'
与 os.listdir()
一样,namelist()
方法 returns 只有文件名,没有根目录。您不能使用通常的 open()
函数,而是使用 zipfile.open()
方法,如下所示:
text = zf.open(zname)