java.io.UTFDataFormatException 正在读取文件条目名称
java.io.UTFDataFormatException while reading file entry name
我正在尝试使用 DataInputStream / DataOutputStream 在另一个非 jar 文件中“打包”多个文件(以前在 jar 存档中)。
当时的想法是:
First int = number of entries
First UTF is the first entry name
Second Int is entry byte array length (entry size)
Then repeat for every entry.
代码:
public static void main(String[] args) throws Throwable {
test();
System.out.println("========================================================================================");
final DataInputStream dataInputStream = new DataInputStream(new FileInputStream(new File("C:\Users\Admin\Desktop\randomJarOut")));
for (int int1 = dataInputStream.readInt(), i = 0; i < int1; ++i) {
final String utf = dataInputStream.readUTF();
System.out.println("Entry name: " + utf);
final byte[] array = new byte[dataInputStream.readInt()];
for (int j = 0; j < array.length; ++j) {
array[j] = dataInputStream.readByte();
}
System.out.println("Entry bytes length: " + array.length);
}
}
拆开原始包装并包装成新包装:
private static void test() throws Throwable {
JarInputStream stream = new JarInputStream(new FileInputStream(new File("C:\Users\Admin\Desktop\randomJar.jar")));
JarInputStream stream1 = new JarInputStream(new FileInputStream(new File("C:\Users\Admin\Desktop\randomJar.jar")));
final byte[] buffer = new byte[2048];
final DataOutputStream outputStream = new DataOutputStream(new FileOutputStream(new File("C:\Users\Admin\Desktop\randomJarOut")));
int entryCount = 0;
for (ZipEntry entry; (entry = stream.getNextJarEntry()) != null; ) {
entryCount++;
}
outputStream.writeInt(entryCount);
for (JarEntry entry; (entry = stream1.getNextJarEntry()) != null; ) {
int entryRealSize = stream1.read(buffer);
if (!(entryRealSize == -1)) {
System.out.println("Writing: " + entry.getName() + " Length: " + entryRealSize);
outputStream.writeUTF(entry.getName());
outputStream.writeInt(entryRealSize);
for (int len = stream1.read(buffer); len != -1; len = stream1.read(buffer)) {
outputStream.write(buffer, 0, len);
}
}
}
outputStream.flush();
outputStream.close();
}
显然我能够毫无问题地解压第一个条目,第二个和其他条目:
Entry name: META-INF/services/org.jd.gui.spi.ContainerFactory
Entry bytes length: 434
Exception in thread "main" java.io.UTFDataFormatException: malformed input around byte 279
at java.io.DataInputStream.readUTF(DataInputStream.java:656)
at java.io.DataInputStream.readUTF(DataInputStream.java:564)
at it.princekin.esercizio.Bootstrap.main(Bootstrap.java:29)
Disconnected from the target VM, address: '127.0.0.1:54384', transport: 'socket'
Process finished with exit code 1
有谁知道如何解决这个问题?为什么这适用于第一个条目而不适用于其他条目?
问题可能在于您混合的不是互惠 read/write 方法:
- writer方法用
outputStream.writeInt(entryCount)
写入,main方法用dataInputStream.readInt()
读取。没关系。
- writer方法用
outputStream.writeUTF(entry.getName())
写入,main方法用dataInputStream.readUTF()
读取。没关系。
- writer方法用
outputStream.writeInt(entryRealSize)
写入,main方法用dataInputStream.readInt()
读取。没关系。
- writer方法用
outputStream.write(buffer, 0, len)
写,main方法用dataInputStream.readByte()
读几次。错误。
如果用 write(buffer, offset, len)
写入一个字节数组,则必须用 read(buffer, offset, len)
读取它,因为 write(buffer, offset, len)
恰好将 len
个物理字节写入输出流,而 writeByte
(readByte
的对应部分)写了很多关于对象类型的元数据开销,然后是它的状态变量。
writer 方法中的错误
writer 方法还有一个主要错误:它最多调用三次stream1.read(buffer)
,但只使用一次buffer
内容。结果是文件的实际大小实际上写入了输出流元数据,但后面只有一小部分数据。
如果在将输入文件写入输出流之前需要知道输入文件的大小,您有两种选择:
- 要么选择足够大的缓冲区大小(例如 204800),这样您就可以在 一次读取 中读取整个文件,并在 一次写入中写。
- 或者将读取算法与写入算法分开:首先是读取整个文件并将其存储在内存中的方法(例如 byte[]),然后是将 byte[] 写入输出流的另一种方法.
全固定解
我已经修复了你的程序,每个任务都有特定的、解耦的方法。该过程包括将输入文件解析为内存模型,根据您的自定义定义将其写入中间文件,然后再读回。
public static void main(String[] args)
throws Throwable
{
File inputJarFile=new File(args[0]);
File intermediateFile=new File(args[1]);
List<FileData> fileDataEntries=parse(inputJarFile);
write(fileDataEntries, intermediateFile);
read(intermediateFile);
}
public static List<FileData> parse(File inputJarFile)
throws IOException
{
List<FileData> list=new ArrayList<>();
try (JarInputStream stream=new JarInputStream(new FileInputStream(inputJarFile)))
{
for (ZipEntry entry; (entry=stream.getNextJarEntry()) != null;)
{
byte[] data=readAllBytes(stream);
if (data.length > 0)
{
list.add(new FileData(entry.getName(), data));
}
stream.closeEntry();
}
}
return list;
}
public static void write(List<FileData> fileDataEntries, File output)
throws Throwable
{
try (DataOutputStream outputStream=new DataOutputStream(new FileOutputStream(output)))
{
int entryCount=fileDataEntries.size();
outputStream.writeInt(entryCount);
for (FileData fileData : fileDataEntries)
{
int entryRealSize=fileData.getData().length;
{
System.out.println("Writing: " + fileData.getName() + " Length: " + entryRealSize);
outputStream.writeUTF(fileData.getName());
outputStream.writeInt(entryRealSize);
outputStream.write(fileData.getData());
}
}
outputStream.flush();
}
}
public static void read(File intermediateFile)
throws IOException
{
try (DataInputStream dataInputStream=new DataInputStream(new FileInputStream(intermediateFile)))
{
for (int entryCount=dataInputStream.readInt(), i=0; i < entryCount; i++)
{
String utf=dataInputStream.readUTF();
int entrySize=dataInputStream.readInt();
System.out.println("Entry name: " + utf + " size: " + entrySize);
byte[] data=readFixedLengthBuffer(dataInputStream, entrySize);
System.out.println("Entry bytes length: " + data.length);
}
}
}
private static byte[] readAllBytes(InputStream input)
throws IOException
{
byte[] buffer=new byte[4096];
byte[] total=new byte[0];
int len;
do
{
len=input.read(buffer);
if (len > 0)
{
byte[] total0=total;
total=new byte[total0.length + len];
System.arraycopy(total0, 0, total, 0, total0.length);
System.arraycopy(buffer, 0, total, total0.length, len);
}
}
while (len >= 0);
return total;
}
private static byte[] readFixedLengthBuffer(InputStream input, int size)
throws IOException
{
byte[] buffer=new byte[size];
int pos=0;
int len;
do
{
len=input.read(buffer, pos, size - pos);
if (len > 0)
{
pos+=len;
}
}
while (pos < size);
return buffer;
}
private static class FileData
{
private final String name;
private final byte[] data;
public FileData(String name, byte[] data)
{
super();
this.name=name;
this.data=data;
}
public String getName()
{
return this.name;
}
public byte[] getData()
{
return this.data;
}
}
我对此的看法是 jar 文件(实际上是一个 zip 文件)有一个 Central Directory,它只能用 ZipFile(或 JarFile)class 读取。
中央目录包含有关条目的一些数据,例如大小。
我认为 ZipInputStream 不会读取中央目录,因此 ZipEntry 不会包含大小(返回 -1,因为它是未知的),而从 ZipFile class 读取 ZipEntry 会。
因此,如果您首先使用 ZipFile 读取每个条目的大小并将其存储在地图中,则在使用 ZipInputStream 读取数据时可以轻松获取它。
This page 也包括一些很好的例子。
所以我的代码版本是:
import java.io.*;
import java.util.HashMap;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipInputStream;
public class JarRepacker {
public static void main(String[] args) throws Throwable {
JarRepacker repacker = new JarRepacker();
repacker.repackJarToMyFileFormat("commons-cli-1.3.1.jar", "randomJarOut.bin");
repacker.readMyFileFormat("randomJarOut.bin");
}
private void repackJarToMyFileFormat(String inputJar, String outputFile) throws Throwable {
int entryCount;
Map<String, Integer> sizeMap = new HashMap<>();
try (ZipFile zipFile = new ZipFile(inputJar)) {
entryCount = zipFile.size();
zipFile.entries().asIterator().forEachRemaining(e -> sizeMap.put(e.getName(), (int) e.getSize()));
}
try (final DataOutputStream outputStream = new DataOutputStream(new FileOutputStream(outputFile))) {
outputStream.writeInt(entryCount);
try (ZipInputStream stream = new ZipInputStream(new BufferedInputStream(new FileInputStream(inputJar)))) {
ZipEntry entry;
final byte[] buffer = new byte[2048];
while ((entry = stream.getNextEntry()) != null) {
final String name = entry.getName();
outputStream.writeUTF(name);
final Integer size = sizeMap.get(name);
outputStream.writeInt(size);
//System.out.println("Writing: " + name + " Size: " + size);
int len;
while ((len = stream.read(buffer)) > 0) {
outputStream.write(buffer, 0, len);
}
}
}
outputStream.flush();
}
}
private void readMyFileFormat(String fileToRead) throws IOException {
try (DataInputStream dataInputStream
= new DataInputStream(new BufferedInputStream(new FileInputStream(fileToRead)))) {
int entries = dataInputStream.readInt();
System.out.println("Entries in file: " + entries);
for (int i = 1; i <= entries; i++) {
final String name = dataInputStream.readUTF();
final int size = dataInputStream.readInt();
System.out.printf("[%3d] Reading: %s of size: %d%n", i, name, size);
final byte[] array = new byte[size];
for (int j = 0; j < array.length; ++j) {
array[j] = dataInputStream.readByte();
}
// Still need to do something with this array...
}
}
}
}
我正在尝试使用 DataInputStream / DataOutputStream 在另一个非 jar 文件中“打包”多个文件(以前在 jar 存档中)。
当时的想法是:
First int = number of entries
First UTF is the first entry name
Second Int is entry byte array length (entry size)
Then repeat for every entry.
代码:
public static void main(String[] args) throws Throwable {
test();
System.out.println("========================================================================================");
final DataInputStream dataInputStream = new DataInputStream(new FileInputStream(new File("C:\Users\Admin\Desktop\randomJarOut")));
for (int int1 = dataInputStream.readInt(), i = 0; i < int1; ++i) {
final String utf = dataInputStream.readUTF();
System.out.println("Entry name: " + utf);
final byte[] array = new byte[dataInputStream.readInt()];
for (int j = 0; j < array.length; ++j) {
array[j] = dataInputStream.readByte();
}
System.out.println("Entry bytes length: " + array.length);
}
}
拆开原始包装并包装成新包装:
private static void test() throws Throwable {
JarInputStream stream = new JarInputStream(new FileInputStream(new File("C:\Users\Admin\Desktop\randomJar.jar")));
JarInputStream stream1 = new JarInputStream(new FileInputStream(new File("C:\Users\Admin\Desktop\randomJar.jar")));
final byte[] buffer = new byte[2048];
final DataOutputStream outputStream = new DataOutputStream(new FileOutputStream(new File("C:\Users\Admin\Desktop\randomJarOut")));
int entryCount = 0;
for (ZipEntry entry; (entry = stream.getNextJarEntry()) != null; ) {
entryCount++;
}
outputStream.writeInt(entryCount);
for (JarEntry entry; (entry = stream1.getNextJarEntry()) != null; ) {
int entryRealSize = stream1.read(buffer);
if (!(entryRealSize == -1)) {
System.out.println("Writing: " + entry.getName() + " Length: " + entryRealSize);
outputStream.writeUTF(entry.getName());
outputStream.writeInt(entryRealSize);
for (int len = stream1.read(buffer); len != -1; len = stream1.read(buffer)) {
outputStream.write(buffer, 0, len);
}
}
}
outputStream.flush();
outputStream.close();
}
显然我能够毫无问题地解压第一个条目,第二个和其他条目:
Entry name: META-INF/services/org.jd.gui.spi.ContainerFactory
Entry bytes length: 434
Exception in thread "main" java.io.UTFDataFormatException: malformed input around byte 279
at java.io.DataInputStream.readUTF(DataInputStream.java:656)
at java.io.DataInputStream.readUTF(DataInputStream.java:564)
at it.princekin.esercizio.Bootstrap.main(Bootstrap.java:29)
Disconnected from the target VM, address: '127.0.0.1:54384', transport: 'socket'
Process finished with exit code 1
有谁知道如何解决这个问题?为什么这适用于第一个条目而不适用于其他条目?
问题可能在于您混合的不是互惠 read/write 方法:
- writer方法用
outputStream.writeInt(entryCount)
写入,main方法用dataInputStream.readInt()
读取。没关系。 - writer方法用
outputStream.writeUTF(entry.getName())
写入,main方法用dataInputStream.readUTF()
读取。没关系。 - writer方法用
outputStream.writeInt(entryRealSize)
写入,main方法用dataInputStream.readInt()
读取。没关系。 - writer方法用
outputStream.write(buffer, 0, len)
写,main方法用dataInputStream.readByte()
读几次。错误。
如果用 write(buffer, offset, len)
写入一个字节数组,则必须用 read(buffer, offset, len)
读取它,因为 write(buffer, offset, len)
恰好将 len
个物理字节写入输出流,而 writeByte
(readByte
的对应部分)写了很多关于对象类型的元数据开销,然后是它的状态变量。
writer 方法中的错误
writer 方法还有一个主要错误:它最多调用三次stream1.read(buffer)
,但只使用一次buffer
内容。结果是文件的实际大小实际上写入了输出流元数据,但后面只有一小部分数据。
如果在将输入文件写入输出流之前需要知道输入文件的大小,您有两种选择:
- 要么选择足够大的缓冲区大小(例如 204800),这样您就可以在 一次读取 中读取整个文件,并在 一次写入中写。
- 或者将读取算法与写入算法分开:首先是读取整个文件并将其存储在内存中的方法(例如 byte[]),然后是将 byte[] 写入输出流的另一种方法.
全固定解
我已经修复了你的程序,每个任务都有特定的、解耦的方法。该过程包括将输入文件解析为内存模型,根据您的自定义定义将其写入中间文件,然后再读回。
public static void main(String[] args)
throws Throwable
{
File inputJarFile=new File(args[0]);
File intermediateFile=new File(args[1]);
List<FileData> fileDataEntries=parse(inputJarFile);
write(fileDataEntries, intermediateFile);
read(intermediateFile);
}
public static List<FileData> parse(File inputJarFile)
throws IOException
{
List<FileData> list=new ArrayList<>();
try (JarInputStream stream=new JarInputStream(new FileInputStream(inputJarFile)))
{
for (ZipEntry entry; (entry=stream.getNextJarEntry()) != null;)
{
byte[] data=readAllBytes(stream);
if (data.length > 0)
{
list.add(new FileData(entry.getName(), data));
}
stream.closeEntry();
}
}
return list;
}
public static void write(List<FileData> fileDataEntries, File output)
throws Throwable
{
try (DataOutputStream outputStream=new DataOutputStream(new FileOutputStream(output)))
{
int entryCount=fileDataEntries.size();
outputStream.writeInt(entryCount);
for (FileData fileData : fileDataEntries)
{
int entryRealSize=fileData.getData().length;
{
System.out.println("Writing: " + fileData.getName() + " Length: " + entryRealSize);
outputStream.writeUTF(fileData.getName());
outputStream.writeInt(entryRealSize);
outputStream.write(fileData.getData());
}
}
outputStream.flush();
}
}
public static void read(File intermediateFile)
throws IOException
{
try (DataInputStream dataInputStream=new DataInputStream(new FileInputStream(intermediateFile)))
{
for (int entryCount=dataInputStream.readInt(), i=0; i < entryCount; i++)
{
String utf=dataInputStream.readUTF();
int entrySize=dataInputStream.readInt();
System.out.println("Entry name: " + utf + " size: " + entrySize);
byte[] data=readFixedLengthBuffer(dataInputStream, entrySize);
System.out.println("Entry bytes length: " + data.length);
}
}
}
private static byte[] readAllBytes(InputStream input)
throws IOException
{
byte[] buffer=new byte[4096];
byte[] total=new byte[0];
int len;
do
{
len=input.read(buffer);
if (len > 0)
{
byte[] total0=total;
total=new byte[total0.length + len];
System.arraycopy(total0, 0, total, 0, total0.length);
System.arraycopy(buffer, 0, total, total0.length, len);
}
}
while (len >= 0);
return total;
}
private static byte[] readFixedLengthBuffer(InputStream input, int size)
throws IOException
{
byte[] buffer=new byte[size];
int pos=0;
int len;
do
{
len=input.read(buffer, pos, size - pos);
if (len > 0)
{
pos+=len;
}
}
while (pos < size);
return buffer;
}
private static class FileData
{
private final String name;
private final byte[] data;
public FileData(String name, byte[] data)
{
super();
this.name=name;
this.data=data;
}
public String getName()
{
return this.name;
}
public byte[] getData()
{
return this.data;
}
}
我对此的看法是 jar 文件(实际上是一个 zip 文件)有一个 Central Directory,它只能用 ZipFile(或 JarFile)class 读取。 中央目录包含有关条目的一些数据,例如大小。
我认为 ZipInputStream 不会读取中央目录,因此 ZipEntry 不会包含大小(返回 -1,因为它是未知的),而从 ZipFile class 读取 ZipEntry 会。
因此,如果您首先使用 ZipFile 读取每个条目的大小并将其存储在地图中,则在使用 ZipInputStream 读取数据时可以轻松获取它。
This page 也包括一些很好的例子。
所以我的代码版本是:
import java.io.*;
import java.util.HashMap;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipInputStream;
public class JarRepacker {
public static void main(String[] args) throws Throwable {
JarRepacker repacker = new JarRepacker();
repacker.repackJarToMyFileFormat("commons-cli-1.3.1.jar", "randomJarOut.bin");
repacker.readMyFileFormat("randomJarOut.bin");
}
private void repackJarToMyFileFormat(String inputJar, String outputFile) throws Throwable {
int entryCount;
Map<String, Integer> sizeMap = new HashMap<>();
try (ZipFile zipFile = new ZipFile(inputJar)) {
entryCount = zipFile.size();
zipFile.entries().asIterator().forEachRemaining(e -> sizeMap.put(e.getName(), (int) e.getSize()));
}
try (final DataOutputStream outputStream = new DataOutputStream(new FileOutputStream(outputFile))) {
outputStream.writeInt(entryCount);
try (ZipInputStream stream = new ZipInputStream(new BufferedInputStream(new FileInputStream(inputJar)))) {
ZipEntry entry;
final byte[] buffer = new byte[2048];
while ((entry = stream.getNextEntry()) != null) {
final String name = entry.getName();
outputStream.writeUTF(name);
final Integer size = sizeMap.get(name);
outputStream.writeInt(size);
//System.out.println("Writing: " + name + " Size: " + size);
int len;
while ((len = stream.read(buffer)) > 0) {
outputStream.write(buffer, 0, len);
}
}
}
outputStream.flush();
}
}
private void readMyFileFormat(String fileToRead) throws IOException {
try (DataInputStream dataInputStream
= new DataInputStream(new BufferedInputStream(new FileInputStream(fileToRead)))) {
int entries = dataInputStream.readInt();
System.out.println("Entries in file: " + entries);
for (int i = 1; i <= entries; i++) {
final String name = dataInputStream.readUTF();
final int size = dataInputStream.readInt();
System.out.printf("[%3d] Reading: %s of size: %d%n", i, name, size);
final byte[] array = new byte[size];
for (int j = 0; j < array.length; ++j) {
array[j] = dataInputStream.readByte();
}
// Still need to do something with this array...
}
}
}
}