Zipfile 未压缩 Python 中的一些 SVG 文件

Zipfile not compressing some SVG files in Python

我一直在制作一个脚本来将 Latex 代码转换为 epub 中的 SVG 图像。 这个想法是在临时目录中提取一个 epub,找到代码并创建 SVG 并将代码替换为 SVG 图像的链接,然后再次压缩所有内容。

除了最后的压缩,一切正常。它压缩了除我创建的新 SVG 之外的所有内容(我检查过它们位于临时未压缩 epub 的图像文件夹中)。这是一个最小的工作示例:

import zipfile
import os
import shutil

def create_minimal_uncompressed_epub(directory):
    if os.path.exists(directory):
        shutil.rmtree(directory, ignore_errors=True)
    os.makedirs(directory)
    with open(os.path.join(directory, 'mimetype'), 'w') as mimetype:
        mimetype.write('application/epub+zip')
    os.makedirs(os.path.join(directory, 'META-INF'))
    os.makedirs(os.path.join(directory, 'OEBPS'))
    with open(os.path.join(directory, 'META-INF', 'container.xml'), 'w') as container_xml:
        data = ('<?xml version="1.0"?>'
                '<container version="1.0" xmlns="urn:oasis:names:'
                'tc:opendocument:xmlns:container">'
                '<rootfiles>'
                '<rootfile full-path="OEBPS/content.opf" media-type='
                '"application/oebps-package+xml"/>'
                '</rootfiles>'
                '</container>')
        container_xml.write(data)
    with open(os.path.join(directory, 'OEBPS', 'content.opf'), 'w') as content_opf:
        data = ('<?xml version="1.0" encoding="UTF-8" ?><package xmlns='
                '"http://www.idpf.org/2007/opf" xmlns:dc="http://purl.o'
                'rg/dc/elements/1.1/" unique-identifier="db-id" version'
                '="3.0"><metadata><dc:title id="t1">Title</dc:title><dc'
                ':identifier id="db-id">isbn</dc:identifier><meta   pro'
                'perty="dcterms:modified">2014-03-27T09:14:09Z</meta><d'
                'c:language>en</dc:language></metadata><manifest><item '
                'id="toc" properties="nav" href="toc.xhtml" media-type='
                '"application/xhtml+xml" /><item id="ncx" href="toc.ncx'
                '" media-type="application/x-dtbncx+xml" /><item id="te'
                'mplate_css" href="template.css" media-type="text/css" '
                '/><item id="hello" href="1_hello.xhtml" media-type="ap'
                'plication/xhtml+xml" /></manifest><spine toc="ncx"><it'
                'emref idref="hello" /></spine></package>')
        content_opf.write(data)
    with open(os.path.join(directory, 'OEBPS', 'toc.xhtml'), 'w') as toc_xhtml:
        data = ('<?xml version="1.0" encoding="utf-8"?><html '
                'xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="htt'
                'p://www.idpf.org/2007/ops"><head><title>toc.xhtml</t'
                'itle><link href="template.css" rel="stylesheet" type'
                '="text/css" /></head><body><nav id="toc" epub:type="'
                'toc"><h1 class="frontmatter">Table of Contents</h1><'
                'ol class="contents"><li><a href="1_hello.xhtml">Hell'
                'o</a></li></ol></nav></body></html>')
        toc_xhtml.write(data)
    with open(os.path.join(directory, 'OEBPS', 'toc.ncx'), 'w') as toc_ncx:
        data = ('<?xml version="1.0" encoding="UTF-8" ?><ncx version="2005'
                '-1" xml:lang="en" xmlns="http://www.daisy.org/z3986/2005/'
                'ncx/"><head><meta name="dtb:uid" content="isbn"/><meta na'
                'me="dtb:depth" content="1"/></head><docTitle><text></text'
                '></docTitle><navMap><navPoint id="hello" playOrder="1"><n'
                'avLabel><text>cover</text></navLabel><content src="1_hell'
                'o.xhtml" /></navPoint></navMap></ncx>')
        toc_ncx.write(data)
    with open(os.path.join(directory, 'OEBPS', '1_hello.xhtml'), 'w') as hello_xhtml:
        data = ('<?xml version="1.0" encoding="utf-8"?><html xmlns="http://www.w3.or'
                'g/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops"><head><titl'
                'e>1_hello.xhtml</title><link href="template.css" rel="stylesheet" t'
                'ype="text/css" /></head><body><h1>Hello World!</h1></body></html> ')
        hello_xhtml.write(data)
    with open(os.path.join(directory, 'OEBPS', 'template.css'), 'w') as templace_css:
        data = ('h1 {text-align: center;}')
        templace_css.write(data)

def recursive_zip(zipf, directory, folder=None):
    nodes = os.listdir(directory)
    print nodes
    for item in nodes:
        if os.path.isfile(os.path.join(directory, item)):
            zipf.write(os.path.join(directory, item), os.path.join(folder, item), zipfile.ZIP_DEFLATED)
        elif os.path.isdir(os.path.join(directory, item)):
            recursive_zip(zipf, os.path.join(directory, item), os.path.join(folder, item))

def create_svg():
    return 'code here\n'

TEMP_DIR = 'minimal_temp_dir'
SVG_FILENAME = 'minimal_svg_filename.svg'
create_minimal_uncompressed_epub(TEMP_DIR)
with open(os.path.join(TEMP_DIR, 'OEBPS', SVG_FILENAME), 'w') as svgfile:
    svgfile.write(create_svg())
try:
    MINIMAL_EPUB = 'minimal_epub.epub'
    ZIPF = zipfile.ZipFile(MINIMAL_EPUB, 'w')
    ZIPF.write(os.path.join(TEMP_DIR, 'mimetype'), 'mimetype', zipfile.ZIP_STORED)
    for item in os.listdir(TEMP_DIR):
        if os.path.isdir(os.path.join(TEMP_DIR, item)):
            recursive_zip(ZIPF, os.path.join(TEMP_DIR, item), item)
    ZIPF.close()
except: #IOError
    print('\nError compressing file')

函数recursive_zip实际上找到了每个文件(注意里面的'print nodes')。不知道为什么 svg 文件丢失了。 没有错误。当我用 Sigil 打开它时,svg 文件在临时文件夹中,但不在压缩的最终版本中。

我终于明白是怎么回事了。我在文件 content.opf 中找到了图像列表,在名为 manifest 的标签中,并根据 International Digital Publishing Forum:

The required manifest must provide a list of all the files that are part of the publication (e.g. Content Documents, style sheets, image files, any embedded font files, any included schemas).

所以文件实际上被压缩并包含在 zip 文件中,但由于它被重命名为 .epub 并使用 Sigil 打开,SVG 图像没有显示,因为它们没有包含在文件中content.opf.