如何在 java 中更快地压缩归档文件
How to make compression more faster in java to archiving file
我正在开发 java 应用程序,我必须在其中将文件管理为 tar/zip 格式。所以我正在从 csv 文件创建 tar/zip。
现在我正在寻找方法,通过它我可以更快地完成它。因为我必须 运行 数百万个文件。
我正在使用以下代码以及如何让它更快。
以下库正在使用...
FileUtils 共 org.apache.commons.io
TarArchiveEntry 共 org.apache.commons.compress.archivers.tar
try {
// Create staging file output stream
File temp = new File(getFilePath(objectm));
log.debug("temping " + objectm.getPath());
outputStream = new FileOutputStream(temp);
// Create GZip pass-thru stream
if (isCompressionEnabled) {
compressionStream = new
CompressionStream(outputStream, getCompressionLevel(objectm));
}
// Create MD5 hash
final MessageDigest outputDigest = MessageDigest.getInstance("MD5");
md5OutputStream = new DigestOutputStream(isCompressionEnabled ? compressionStream : outputStream, outputDigest);
// Create tar stream
tarStream = new TarArchiveOutputStream(new BufferedOutputStream(md5OutputStream));
tarStream.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU);
tarStream.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_STAR);
// tar the first object
TarArchiveEntry entry = new TarArchiveEntry(objectm.getHierarchy());
entry.setSize(objectm.getOriginalSize());
entry.setModTime(objectm.getLastModified().getMillis());
tarStream.putArchiveEntry(entry);
org.apache.commons.io.IOUtils.copyLarge(inputStream, tarStream);
// Collect properties to return
String digest = Hex.encodeHexString(outputDigest.digest());
objectm.setChecksum(digest);
objectm.setDate(DateTime.now());
objectm.setCompressSize(FileUtils.sizeOf(temp));
tarStream.finish();
log.debug("Completed.");
} catch (Exception e) {
throw new Exception("Exception: Creating tar" , e);
} finally {
org.apache.cobjectmmons.io.IOUtils.closeQuietly(inputStream);
org.apache.cobjectmmons.io.IOUtils.closeQuietly(tarStream);
org.apache.cobjectmmons.io.IOUtils.closeQuietly(cobjectmpressionStream);
org.apache.cobjectmmons.io.IOUtils.closeQuietly(md5OutputStream);
org.apache.cobjectmmons.io.IOUtils.closeQuietly(outputStream);
}
在第二种方法中,我们将对象从临时位置移动到实际位置。
try {
File src = new File(getFilePath(objectm));
File dst = new File(sDestinationFile);
FileUtils.moveFile(src, dst);
boolean readableFlag = dst.setReadOnly();
} catch (IOException e) {
throw new Exception("Unable to move to destination.", e);
}
使用LZ4,这是目前最快的文件压缩算法。
下面的代码示例。
LZ4Factory factory = LZ4Factory.fastestInstance();
byte[] data = "12345345234572".getBytes("UTF-8");
final int decompressedLength = data.length;
// compress data
LZ4Compressor compressor = factory.fastCompressor();
int maxCompressedLength = compressor.maxCompressedLength(decompressedLength);
byte[] compressed = new byte[maxCompressedLength];
int compressedLength = compressor.compress(data, 0, decompressedLength, compressed, 0, maxCompressedLength);
// decompress data
// - method 1: when the decompressed length is known
LZ4FastDecompressor decompressor = factory.fastDecompressor();
byte[] restored = new byte[decompressedLength];
int compressedLength2 = decompressor.decompress(compressed, 0, restored, 0, decompressedLength);
// compressedLength == compressedLength2
// - method 2: when the compressed length is known (a little slower)
// the destination buffer needs to be over-sized
LZ4SafeDecompressor decompressor2 = factory.safeDecompressor();
int decompressedLength2 = decompressor2.decompress(compressed, 0, compressedLength, restored, 0);
// decompressedLength == decompressedLength2
我正在开发 java 应用程序,我必须在其中将文件管理为 tar/zip 格式。所以我正在从 csv 文件创建 tar/zip。 现在我正在寻找方法,通过它我可以更快地完成它。因为我必须 运行 数百万个文件。
我正在使用以下代码以及如何让它更快。 以下库正在使用...
FileUtils 共 org.apache.commons.io
TarArchiveEntry 共 org.apache.commons.compress.archivers.tar
try {
// Create staging file output stream
File temp = new File(getFilePath(objectm));
log.debug("temping " + objectm.getPath());
outputStream = new FileOutputStream(temp);
// Create GZip pass-thru stream
if (isCompressionEnabled) {
compressionStream = new
CompressionStream(outputStream, getCompressionLevel(objectm));
}
// Create MD5 hash
final MessageDigest outputDigest = MessageDigest.getInstance("MD5");
md5OutputStream = new DigestOutputStream(isCompressionEnabled ? compressionStream : outputStream, outputDigest);
// Create tar stream
tarStream = new TarArchiveOutputStream(new BufferedOutputStream(md5OutputStream));
tarStream.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU);
tarStream.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_STAR);
// tar the first object
TarArchiveEntry entry = new TarArchiveEntry(objectm.getHierarchy());
entry.setSize(objectm.getOriginalSize());
entry.setModTime(objectm.getLastModified().getMillis());
tarStream.putArchiveEntry(entry);
org.apache.commons.io.IOUtils.copyLarge(inputStream, tarStream);
// Collect properties to return
String digest = Hex.encodeHexString(outputDigest.digest());
objectm.setChecksum(digest);
objectm.setDate(DateTime.now());
objectm.setCompressSize(FileUtils.sizeOf(temp));
tarStream.finish();
log.debug("Completed.");
} catch (Exception e) {
throw new Exception("Exception: Creating tar" , e);
} finally {
org.apache.cobjectmmons.io.IOUtils.closeQuietly(inputStream);
org.apache.cobjectmmons.io.IOUtils.closeQuietly(tarStream);
org.apache.cobjectmmons.io.IOUtils.closeQuietly(cobjectmpressionStream);
org.apache.cobjectmmons.io.IOUtils.closeQuietly(md5OutputStream);
org.apache.cobjectmmons.io.IOUtils.closeQuietly(outputStream);
}
在第二种方法中,我们将对象从临时位置移动到实际位置。
try {
File src = new File(getFilePath(objectm));
File dst = new File(sDestinationFile);
FileUtils.moveFile(src, dst);
boolean readableFlag = dst.setReadOnly();
} catch (IOException e) {
throw new Exception("Unable to move to destination.", e);
}
使用LZ4,这是目前最快的文件压缩算法。 下面的代码示例。
LZ4Factory factory = LZ4Factory.fastestInstance();
byte[] data = "12345345234572".getBytes("UTF-8");
final int decompressedLength = data.length;
// compress data
LZ4Compressor compressor = factory.fastCompressor();
int maxCompressedLength = compressor.maxCompressedLength(decompressedLength);
byte[] compressed = new byte[maxCompressedLength];
int compressedLength = compressor.compress(data, 0, decompressedLength, compressed, 0, maxCompressedLength);
// decompress data
// - method 1: when the decompressed length is known
LZ4FastDecompressor decompressor = factory.fastDecompressor();
byte[] restored = new byte[decompressedLength];
int compressedLength2 = decompressor.decompress(compressed, 0, restored, 0, decompressedLength);
// compressedLength == compressedLength2
// - method 2: when the compressed length is known (a little slower)
// the destination buffer needs to be over-sized
LZ4SafeDecompressor decompressor2 = factory.safeDecompressor();
int decompressedLength2 = decompressor2.decompress(compressed, 0, compressedLength, restored, 0);
// decompressedLength == decompressedLength2