如何在 java 中构建大小为 400 GB 的 zip 文件

How to build a zip file with a size of 400 GB in java

我需要从包含 400GB 文档的露天站点下载所有文档。 下面的代码可以创建一个小的 zip 文件(大约 1GB),否则会占用太多内存。 我不想将 ZipOutputStream 保留在内存中,我只想为每个复制到 Zip 文件的文档使用内存,或者使用为每个文档覆盖的临时文件。

此类问题的最佳实践是什么?

这段代码是从我的main调用的:

FolderImpl sitoFolder = (FolderImpl) cmisObject;

List<Tree<FileableCmisObject>> sitoFolderDescendants = sitoFolder.getDescendants(-1);

byte[] zipFile = createZipFILE(sitoFolderDescendants);
String rootPath = cartella_download_file;
File dir = new File(rootPath + File.separator);
if (!dir.exists()) {
   dir.mkdirs();
}
Date date = new Date();
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
String stringDate = sdf.format(date);
String nameZipFile = sitoFolder.getName().replaceAll("\s","");
File serverFile = new File(dir.getAbsolutePath() + File.separator + stringDate+"_"+nameZipFile+".zip");
BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(serverFile));
IOUtils.write(zipFile, bufferedOutputStream);
bufferedOutputStream.close();

//Returns the zip file
private byte[] createZipFILE(List<Tree<FileableCmisObject>> list) throws IOException {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ByteTransform byteTransform = new ByteTransform();
    try {
        ReportDocument reportDocument = new ReportDocument();
        ZipOutputStream zos = new ZipOutputStream(baos);
        for (Tree<FileableCmisObject> aList : list) {
            traverseTree(aList, zos, reportDocument);
        }
        zos.close();
        return baos.toByteArray();
    } catch (IOException exc) {
        reportLog.error(exc.getMessage());
    } finally {
        baos.close();
    }
    return new byte[0];
}

private void traverseTree(Tree<FileableCmisObject> tree, ZipOutputStream zos, ReportDocument reportDocument) {
    for (int i=0; i<tree.getChildren().size(); i++) {
        Tree<FileableCmisObject> child = tree.getChildren().get(i);
        if (CmisUtil.isDocument(child.getItem())) {
            Document document = (Document) child.getItem();
            try {
                addToZipFile(document, zos);
            } catch (IOException ioExc) {
                appLog.error(ioExc.getMessage());
            }
        } else if(CmisUtil.isFolder(child.getItem())) {
            Folder folder = (Folder) child.getItem();
            if (folder.getChildren().getTotalNumItems() == 0) {
                try {
                    addToZipFolder(folder, zos);
                } catch (IOException ioExc) {
                    appLog.error(ioExc.getMessage());
                }
            }
        }
        traverseTree(child, zos, reportDocument);
    }
}

//Service method to add documents to the zip file
private void addToZipFile(Document document, ZipOutputStream zos) throws IOException {
    InputStream inputStream = document.getContentStream().getStream();
    String path = document.getPaths().get(0).replace(sito_export_path, "");       
    ZipEntry zipEntry = new ZipEntry(path);
    zos.putNextEntry(zipEntry);
    IOUtils.copy(inputStream, zos, 1024);
    inputStream.close();
    zos.closeEntry();
}

//Service method to add empty folder to the zip file
private void addToZipFolder(Folder folder, ZipOutputStream zos) throws IOException {
    String path = folder.getPaths().get(0).replace(sito_export_path, "");
    ZipEntry zipEntry = new ZipEntry(path.concat("/"));
    zos.putNextEntry(zipEntry);
}

实际上我已经为 alfresco 3.4.d 版本创建了 downlod 作为 zip 功能并使用了以下 code.i 没有检查它是否有 GB 的文件,因为我没有那么多 data.it 可能对你有帮助。

这是 Java 支持的 WebScript。

/*
 * this class create a zip file base on given(parameter) node
 * */
public class ZipContents extends AbstractWebScript {
    private static Log logger = LogFactory.getLog(ZipContents.class);

    private static final int BUFFER_SIZE = 1024;

    private static final String MIMETYPE_ZIP = "application/zip";
    private static final String TEMP_FILE_PREFIX = "alf";
    private static final String ZIP_EXTENSION = ".zip";

    private ContentService contentService;
    private NodeService nodeService;
    private NamespaceService namespaceService;
    private DictionaryService dictionaryService;
    private StoreRef storeRef;
    private String encoding;

    public void setNodeService(NodeService nodeService) {
        this.nodeService = nodeService;
    }

    public void setContentService(ContentService contentService) {
        this.contentService = contentService;
    }

    public void setNamespaceService(NamespaceService namespaceService) {
        this.namespaceService = namespaceService;
    }

    public void setDictionaryService(DictionaryService dictionaryService) {
        this.dictionaryService = dictionaryService;
    }

    public void setStoreUrl(String url) {
        this.storeRef = new StoreRef(url);
    }

    public void setEncoding(String encoding) {
        this.encoding = encoding;
    }

    public void execute(WebScriptRequest req, WebScriptResponse res) throws IOException {

        String nodes = req.getParameter("nodes");
        if (nodes == null || nodes.length() == 0) {
            throw new WebScriptException(HttpServletResponse.SC_BAD_REQUEST, "nodes");
        }

        List<String> nodeIds = new ArrayList<String>();
        StringTokenizer tokenizer = new StringTokenizer(nodes, ",");
        if (tokenizer.hasMoreTokens()) {
            while (tokenizer.hasMoreTokens()) {
                nodeIds.add(tokenizer.nextToken());
            }
        }

        String filename = req.getParameter("filename");
        if (filename == null || filename.length() == 0) {
            throw new WebScriptException(HttpServletResponse.SC_BAD_REQUEST, "filename");
        }

        String noaccentStr = req.getParameter("noaccent");
        if (noaccentStr == null || noaccentStr.length() == 0) {
            throw new WebScriptException(HttpServletResponse.SC_BAD_REQUEST, "noaccent");
        }

        try {
            res.setContentType(MIMETYPE_ZIP);
            res.setHeader("Content-Transfer-Encoding", "binary");
            res.addHeader("Content-Disposition", "attachment;filename=\"" + unAccent(filename) + ZIP_EXTENSION + "\"");

            res.setHeader("Cache-Control", "must-revalidate, post-check=0, pre-check=0");
            res.setHeader("Pragma", "public");
            res.setHeader("Expires", "0");

            createZipFile(nodeIds, res.getOutputStream(), new Boolean(noaccentStr));
        } catch (RuntimeException e) {
            throw new WebScriptException(HttpServletResponse.SC_BAD_REQUEST, e.getMessage());
        }
    }

    public void createZipFile(List<String> nodeIds, OutputStream os, boolean noaccent) throws IOException {
        File zip = null;

        try {
            if (nodeIds != null && !nodeIds.isEmpty()) {
                zip = TempFileProvider.createTempFile(TEMP_FILE_PREFIX, ZIP_EXTENSION);
                FileOutputStream stream = new FileOutputStream(zip);
                CheckedOutputStream checksum = new CheckedOutputStream(stream, new Adler32());
                BufferedOutputStream buff = new BufferedOutputStream(checksum);
                ZipArchiveOutputStream out = new ZipArchiveOutputStream(buff);
                out.setEncoding(encoding);
                out.setMethod(ZipArchiveOutputStream.DEFLATED);
                out.setLevel(Deflater.BEST_COMPRESSION);

                if (logger.isDebugEnabled()) {
                    logger.debug("Using encoding '" + encoding + "' for zip file.");
                }

                try {
                    for (String nodeId : nodeIds) {
                        NodeRef node = new NodeRef(storeRef, nodeId);
                        addToZip(node, out, noaccent, "");
                    }
                } catch (Exception e) {
                    logger.error(e.getMessage(), e);
                    throw new WebScriptException(HttpServletResponse.SC_BAD_REQUEST, e.getMessage());
                } finally {
                    out.close();
                    buff.close();
                    checksum.close();
                    stream.close();

                    if (nodeIds.size() > 0) {
                        InputStream in = new FileInputStream(zip);
                        try {
                            byte[] buffer = new byte[BUFFER_SIZE];
                            int len;

                            while ((len = in.read(buffer)) > 0) {
                                os.write(buffer, 0, len);
                            }
                        } finally {
                            IOUtils.closeQuietly(in);
                        }
                    }
                }
            }
        } catch (Exception e) {
            logger.error(e.getMessage(), e);
            throw new WebScriptException(HttpServletResponse.SC_BAD_REQUEST, e.getMessage());
        } finally {
            // try and delete the temporary file
            if (zip != null) {
                zip.delete();
            }
        }
    }

    public void addToZip(NodeRef node, ZipArchiveOutputStream out, boolean noaccent, String path) throws IOException {
        QName nodeQnameType = this.nodeService.getType(node);

        // Special case : links
        if (this.dictionaryService.isSubClass(nodeQnameType, ApplicationModel.TYPE_FILELINK)) {
            NodeRef linkDestinationNode = (NodeRef) nodeService.getProperty(node, ContentModel.PROP_LINK_DESTINATION);
            if (linkDestinationNode == null) {
                return;
            }

            // Duplicate entry: check if link is not in the same space of the
            // link destination
            if (nodeService.getPrimaryParent(node).getParentRef().equals(nodeService.getPrimaryParent(linkDestinationNode).getParentRef())) {
                return;
            }

            nodeQnameType = this.nodeService.getType(linkDestinationNode);
            node = linkDestinationNode;
        }

        String nodeName = (String) nodeService.getProperty(node, ContentModel.PROP_NAME);
        nodeName = noaccent ? unAccent(nodeName) : nodeName;

        if (this.dictionaryService.isSubClass(nodeQnameType, ContentModel.TYPE_CONTENT)) {
            ContentReader reader = contentService.getReader(node, ContentModel.PROP_CONTENT);
            if (reader != null) {
                InputStream is = reader.getContentInputStream();

                String filename = path.isEmpty() ? nodeName : path + '/' + nodeName;



                ZipArchiveEntry entry = new ZipArchiveEntry(filename);
                entry.setTime(((Date) nodeService.getProperty(node, ContentModel.PROP_MODIFIED)).getTime());

                entry.setSize(reader.getSize());
                out.putArchiveEntry(entry);

                byte buffer[] = new byte[BUFFER_SIZE];
                while (true) {
                    int nRead = is.read(buffer, 0, buffer.length);
                    if (nRead <= 0) {
                        break;
                    }

                    out.write(buffer, 0, nRead);
                }
                is.close();
                out.closeArchiveEntry();
            } else {
                logger.warn("Could not read : " + nodeName + "content");
            }
        } else if (this.dictionaryService.isSubClass(nodeQnameType, ContentModel.TYPE_FOLDER)
                && !this.dictionaryService.isSubClass(nodeQnameType, ContentModel.TYPE_SYSTEM_FOLDER)) {
            List<ChildAssociationRef> children = nodeService.getChildAssocs(node);
            if (children.isEmpty()) {

                String folderPath = path.isEmpty() ? nodeName + '/' : path + '/' + nodeName + '/';
                ZipArchiveEntry entry = new ZipArchiveEntry(folderPath);
                entry.setSize(0);
                entry.setTime(((Date) nodeService.getProperty(node, ContentModel.PROP_MODIFIED)).getTime());
                out.putArchiveEntry(entry);
                out.closeArchiveEntry();

            } else {
                for (ChildAssociationRef childAssoc : children) {
                    NodeRef childNodeRef = childAssoc.getChildRef();
                    addToZip(childNodeRef, out, noaccent, path.isEmpty() ? nodeName : path + '/' + nodeName);
                }
            }
        } else {
            logger.info("Unmanaged type: " + nodeQnameType.getPrefixedQName(this.namespaceService) + ", filename: " + nodeName);
        }
    }



    /**
     * ZipEntry() does not convert filenames from Unicode to platform (waiting
     * Java 7) http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4244499
     * 
     * @param s
     * @return
     */
    public static String unAccent(String s) {
        String temp = Normalizer.normalize(s, Normalizer.NFD, 0);
        return temp.replaceAll("[^\p{ASCII}]", "");
    }

}

我解决了。我先在服务器上创建了一个目录,然后直接在这个目录下创建了zip文件。

错误是首先将所有文件保存在:ByteArrayOutputStream,然后是 zip 文件。

File serverFile = new File(dir.getAbsolutePath() + File.separator + stringDate+"_"+nameZipFile+".zip");
FileOutputStream fileOutputStream = new FileOutputStream(serverFile);
ZipArchiveOutputStream zos = new ZipArchiveOutputStream(fileOutputStream);
for (Tree<FileableCmisObject> aList : sitoFolderDescendants) {
   traverseTree(aList, zos, reportDocument);
}
zos.close();

在 finally 块中,我关闭了 FileOutputStream。 比我使用以下方法更改服务方法:ZipArchiveOutputStreamZipArchiveEntry.

private void addToZipFolder(Folder folder, ZipArchiveOutputStream zos) throws IOException {
    String path = folder.getPaths().get(0).replace(sito_export_path, "");
    ZipArchiveEntry zipEntry = new ZipArchiveEntry(path.concat("/"));
    appLog.info("aggiungo cartella vuota "+folder.getName()+" al file zip");
    zos.putArchiveEntry(zipEntry);
    zos.closeArchiveEntry();
}

private void addToZipFile(Document document, ZipArchiveOutputStream zos) throws IOException {
    InputStream inputStream = document.getContentStream().getStream();
    String path = document.getPaths().get(0).replace(sito_export_path, "");
    ZipArchiveEntry entry = new ZipArchiveEntry(path);
    entry.setSize(document.getContentStreamLength());
    zos.putArchiveEntry(entry);
    byte buffer[] = new byte[1024];
    while (true) {
        int nRead = inputStream.read(buffer, 0, buffer.length);
        if (nRead <= 0) {
            break;
        }
        zos.write(buffer, 0, nRead);
    }
    inputStream.close();
    zos.closeArchiveEntry();
}