SevenZipSharp 无法解压某些 tar 档案
SevenZipSharp fails to unpack certain tar archives
我使用 SevenZipSharp 打包到 7z 档案以及从各种档案中解压缩。它多年来一直运作良好。
今天我有一个 .tgz 存档在第二阶段解包失败:
从 .tgz 中提取 .tar 有效,但解压 .tar 失败。受影响的只是这个单一档案。所有其他 .tgz 都可以正常工作。 .tar 本身没有问题,因为用 7-zip 软件解压也可以。
经过大量测试我和同事找到了原因:
我们必须调试 SevenZipSharp DLL 才能找到其中的故障。 DLL 通过读取前 16 个字节并将其与签名列表进行比较来检测存档的类型。这对于大多数类型的档案都是正确的,但对于 .tar 档案是错误的,因为 .tar 文件 header starts 与档案的文件名:TAR @ Wikipedia.签名“ustar”(如果存在)位于地址 257 (0x0101)。
SevenZipSharp 知道并检查该地址的“ustar”,但前提是之前的检测失败。不幸的是,我们的 TAR 档案的名称是“x42202.tar”。 .dmg 文件 (Apple Disk Image) 的 header 由一个“x”组成(这有多愚蠢,只使用一个字节作为签名??)。所以其实是检测成功了文件类型,只是检测结果错误
(我知道,链接的维基百科说,.dmg header 签名是“koly”,但我用在互联网上找到的下载的 .dmg 文件进行了确认。)
编辑 07.12.2021:签名实际上是“koly”,但 so-called header 的长度为 512 字节,位于 END 的文件。然而,SevenZipSharp 在开始时会检查签名。我测试过的大多数文件(但不是全部!)确实在开头有一个“x”,但我不知道为什么。也许它是一种非官方的 header(“x”似乎来自 MIME 类型“x-apple-diskimage”)。 - 编辑结束。
因此我们修改了 FileSignatureChecker.cs
中的代码以避免对 .tar 档案的错误档案类型检测。
您可以在下面找到原始代码和修改后的代码。
代码库是最新的 SevenZipSharp 版本,可以在 CodePlex archive 中找到。显然它不再处于活跃开发状态,因为版本号多年来没有改变,如果它仍然活跃,它会在 CodePlex 停用后移动。
更新2018-11-16
修改代码中的错误修复:如果找到,则不返回 enSpecialFormat。
更新2021-12-16
该错误仍然存在于 github 存储库 https://github.com/squid-box/SevenZipSharp 中,这是 SevenZipSharp 项目的当前位置。已上传对错误代码进行重大返工的拉取请求,正在等待合并。
原码
public static InArchiveFormat CheckSignature (Stream stream, out int offset, out bool isExecutable)
{
offset = 0;
if (!stream.CanRead)
{
throw new ArgumentException ("The stream must be readable.");
}
if (stream.Length < SIGNATURE_SIZE)
{
throw new ArgumentException ("The stream is invalid.");
}
#region Get file signature
var signature = new byte[SIGNATURE_SIZE];
int bytesRequired = SIGNATURE_SIZE;
int index = 0;
stream.Seek (0, SeekOrigin.Begin);
while (bytesRequired > 0)
{
int bytesRead = stream.Read (signature, index, bytesRequired);
bytesRequired -= bytesRead;
index += bytesRead;
}
string actualSignature = BitConverter.ToString (signature);
#endregion
InArchiveFormat suspectedFormat = InArchiveFormat.XZ; // any except PE and Cab
isExecutable = false;
foreach (string expectedSignature in Formats.InSignatureFormats.Keys)
{
if (actualSignature.StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) ||
actualSignature.Substring (6).StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) &&
Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.Lzh)
{
if (Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.PE)
{
suspectedFormat = InArchiveFormat.PE;
isExecutable = true;
}
else
{
return Formats.InSignatureFormats[expectedSignature];
}
}
}
// Many Microsoft formats
if (actualSignature.StartsWith ("D0-CF-11-E0-A1-B1-1A-E1", StringComparison.OrdinalIgnoreCase))
{
suspectedFormat = InArchiveFormat.Cab; // != InArchiveFormat.XZ
}
#region SpecialDetect
try
{
SpecialDetect (stream, 257, InArchiveFormat.Tar);
}
catch (ArgumentException) { }
if (SpecialDetect (stream, 0x8001, InArchiveFormat.Iso))
{
return InArchiveFormat.Iso;
}
if (SpecialDetect (stream, 0x8801, InArchiveFormat.Iso))
{
return InArchiveFormat.Iso;
}
if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
{
return InArchiveFormat.Iso;
}
if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
{
return InArchiveFormat.Iso;
}
if (SpecialDetect (stream, 0x400, InArchiveFormat.Hfs))
{
return InArchiveFormat.Hfs;
}
#region Last resort for tar - can mistake
if (stream.Length >= 1024)
{
stream.Seek (-1024, SeekOrigin.End);
byte[] buf = new byte[1024];
stream.Read (buf, 0, 1024);
bool istar = true;
for (int i = 0; i < 1024; i++)
{
istar = istar && buf[i] == 0;
}
if (istar)
{
return InArchiveFormat.Tar;
}
}
#endregion
#endregion
#region Check if it is an SFX archive or a file with an embedded archive.
if (suspectedFormat != InArchiveFormat.XZ)
{
#region Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes
var scanLength = Math.Min (stream.Length, SFX_SCAN_LENGTH);
signature = new byte[scanLength];
bytesRequired = (int)scanLength;
index = 0;
stream.Seek (0, SeekOrigin.Begin);
while (bytesRequired > 0)
{
int bytesRead = stream.Read (signature, index, bytesRequired);
bytesRequired -= bytesRead;
index += bytesRead;
}
actualSignature = BitConverter.ToString (signature);
#endregion
foreach (var format in new InArchiveFormat[]
{
InArchiveFormat.Zip,
InArchiveFormat.SevenZip,
InArchiveFormat.Rar,
InArchiveFormat.Cab,
InArchiveFormat.Arj
})
{
int pos = actualSignature.IndexOf (Formats.InSignatureFormatsReversed[format]);
if (pos > -1)
{
offset = pos / 3;
return format;
}
}
// Nothing
if (suspectedFormat == InArchiveFormat.PE)
{
return InArchiveFormat.PE;
}
}
#endregion
throw new ArgumentException ("The stream is invalid or no corresponding signature was found.");
}
修改后的代码
public static InArchiveFormat CheckSignature (Stream stream, out int offset, out bool isExecutable)
{
offset = 0;
if (!stream.CanRead)
{
throw new ArgumentException ("The stream must be readable.");
}
if (stream.Length < SIGNATURE_SIZE)
{
throw new ArgumentException ("The stream is invalid.");
}
#region Get file signature
var signature = new byte[SIGNATURE_SIZE];
int bytesRequired = SIGNATURE_SIZE;
int index = 0;
stream.Seek (0, SeekOrigin.Begin);
while (bytesRequired > 0)
{
int bytesRead = stream.Read (signature, index, bytesRequired);
bytesRequired -= bytesRead;
index += bytesRead;
}
string actualSignature = BitConverter.ToString (signature);
#endregion Get file signature
InArchiveFormat suspectedFormat = InArchiveFormat.XZ; // any except PE and Cab
isExecutable = false;
InArchiveFormat enDetectedFormat = (InArchiveFormat)(-1);
InArchiveFormat enSpecialFormat = (InArchiveFormat)(-1);
foreach (string expectedSignature in Formats.InSignatureFormats.Keys)
{
if (actualSignature.StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) ||
actualSignature.Substring (6).StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) &&
Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.Lzh)
{
if (Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.PE)
{
suspectedFormat = InArchiveFormat.PE;
isExecutable = true;
}
else
{
enDetectedFormat = Formats.InSignatureFormats[expectedSignature];
break;
}
}
}
// Many Microsoft formats
if (actualSignature.StartsWith ("D0-CF-11-E0-A1-B1-1A-E1", StringComparison.OrdinalIgnoreCase))
{
suspectedFormat = InArchiveFormat.Cab; // != InArchiveFormat.XZ
}
#region SpecialDetect
if (SpecialDetect (stream, 257, InArchiveFormat.Tar))
{
enSpecialFormat = InArchiveFormat.Tar;
}
else if (SpecialDetect (stream, 0x8001, InArchiveFormat.Iso))
{
enSpecialFormat = InArchiveFormat.Iso;
}
else if (SpecialDetect (stream, 0x8801, InArchiveFormat.Iso))
{
enSpecialFormat = InArchiveFormat.Iso;
}
else if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
{
enSpecialFormat = InArchiveFormat.Iso;
}
else if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
{
enSpecialFormat = InArchiveFormat.Iso;
}
else if (SpecialDetect (stream, 0x400, InArchiveFormat.Hfs))
{
enSpecialFormat = InArchiveFormat.Hfs;
}
#region Last resort for tar - can mistake
bool bPossiblyTAR = false;
if (stream.Length >= 1024)
{
stream.Seek (-1024, SeekOrigin.End);
byte[] buf = new byte[1024];
stream.Read (buf, 0, 1024);
bPossiblyTAR = true;
for (int i = 0; i < 1024; i++)
{
bPossiblyTAR = bPossiblyTAR && buf[i] == 0;
}
}
// TAR header starts with the filename of the archive.
// The filename can be anything, including the Identifiers of the various archive formats.
// This means that a TAR can be misinterpreted as any type of archive.
if (enSpecialFormat == InArchiveFormat.Tar
|| bPossiblyTAR)
{
var fs = stream as FileStream;
if (fs != null)
{
string sStreamFilename = fs.Name;
if (sStreamFilename.EndsWith (".tar", StringComparison.InvariantCultureIgnoreCase))
enDetectedFormat = InArchiveFormat.Tar;
}
}
#endregion Last resort for tar - can mistake
if (enDetectedFormat != (InArchiveFormat)(-1))
return enDetectedFormat;
if (enSpecialFormat != (InArchiveFormat)(-1))
return enSpecialFormat;
#endregion SpecialDetect
#region Check if it is an SFX archive or a file with an embedded archive.
if (suspectedFormat != InArchiveFormat.XZ)
{
#region Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes
var scanLength = Math.Min (stream.Length, SFX_SCAN_LENGTH);
signature = new byte[scanLength];
bytesRequired = (int)scanLength;
index = 0;
stream.Seek (0, SeekOrigin.Begin);
while (bytesRequired > 0)
{
int bytesRead = stream.Read (signature, index, bytesRequired);
bytesRequired -= bytesRead;
index += bytesRead;
}
actualSignature = BitConverter.ToString (signature);
#endregion Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes
foreach (var format in new InArchiveFormat[]
{
InArchiveFormat.Zip,
InArchiveFormat.SevenZip,
InArchiveFormat.Rar,
InArchiveFormat.Cab,
InArchiveFormat.Arj
})
{
int pos = actualSignature.IndexOf (Formats.InSignatureFormatsReversed[format]);
if (pos > -1)
{
offset = pos / 3;
return format;
}
}
// Nothing
if (suspectedFormat == InArchiveFormat.PE)
{
return InArchiveFormat.PE;
}
}
#endregion Check if it is an SFX archive or a file with an embedded archive.
throw new ArgumentException ("The stream is invalid or no corresponding signature was found.");
}
我使用 SevenZipSharp 打包到 7z 档案以及从各种档案中解压缩。它多年来一直运作良好。
今天我有一个 .tgz 存档在第二阶段解包失败:
从 .tgz 中提取 .tar 有效,但解压 .tar 失败。受影响的只是这个单一档案。所有其他 .tgz 都可以正常工作。 .tar 本身没有问题,因为用 7-zip 软件解压也可以。
经过大量测试我和同事找到了原因:
我们必须调试 SevenZipSharp DLL 才能找到其中的故障。 DLL 通过读取前 16 个字节并将其与签名列表进行比较来检测存档的类型。这对于大多数类型的档案都是正确的,但对于 .tar 档案是错误的,因为 .tar 文件 header starts 与档案的文件名:TAR @ Wikipedia.签名“ustar”(如果存在)位于地址 257 (0x0101)。
SevenZipSharp 知道并检查该地址的“ustar”,但前提是之前的检测失败。不幸的是,我们的 TAR 档案的名称是“x42202.tar”。 .dmg 文件 (Apple Disk Image) 的 header 由一个“x”组成(这有多愚蠢,只使用一个字节作为签名??)。所以其实是检测成功了文件类型,只是检测结果错误
(我知道,链接的维基百科说,.dmg header 签名是“koly”,但我用在互联网上找到的下载的 .dmg 文件进行了确认。)
编辑 07.12.2021:签名实际上是“koly”,但 so-called header 的长度为 512 字节,位于 END 的文件。然而,SevenZipSharp 在开始时会检查签名。我测试过的大多数文件(但不是全部!)确实在开头有一个“x”,但我不知道为什么。也许它是一种非官方的 header(“x”似乎来自 MIME 类型“x-apple-diskimage”)。 - 编辑结束。
因此我们修改了 FileSignatureChecker.cs
中的代码以避免对 .tar 档案的错误档案类型检测。
您可以在下面找到原始代码和修改后的代码。
代码库是最新的 SevenZipSharp 版本,可以在 CodePlex archive 中找到。显然它不再处于活跃开发状态,因为版本号多年来没有改变,如果它仍然活跃,它会在 CodePlex 停用后移动。
更新2018-11-16
修改代码中的错误修复:如果找到,则不返回 enSpecialFormat。
更新2021-12-16
该错误仍然存在于 github 存储库 https://github.com/squid-box/SevenZipSharp 中,这是 SevenZipSharp 项目的当前位置。已上传对错误代码进行重大返工的拉取请求,正在等待合并。
原码
public static InArchiveFormat CheckSignature (Stream stream, out int offset, out bool isExecutable)
{
offset = 0;
if (!stream.CanRead)
{
throw new ArgumentException ("The stream must be readable.");
}
if (stream.Length < SIGNATURE_SIZE)
{
throw new ArgumentException ("The stream is invalid.");
}
#region Get file signature
var signature = new byte[SIGNATURE_SIZE];
int bytesRequired = SIGNATURE_SIZE;
int index = 0;
stream.Seek (0, SeekOrigin.Begin);
while (bytesRequired > 0)
{
int bytesRead = stream.Read (signature, index, bytesRequired);
bytesRequired -= bytesRead;
index += bytesRead;
}
string actualSignature = BitConverter.ToString (signature);
#endregion
InArchiveFormat suspectedFormat = InArchiveFormat.XZ; // any except PE and Cab
isExecutable = false;
foreach (string expectedSignature in Formats.InSignatureFormats.Keys)
{
if (actualSignature.StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) ||
actualSignature.Substring (6).StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) &&
Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.Lzh)
{
if (Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.PE)
{
suspectedFormat = InArchiveFormat.PE;
isExecutable = true;
}
else
{
return Formats.InSignatureFormats[expectedSignature];
}
}
}
// Many Microsoft formats
if (actualSignature.StartsWith ("D0-CF-11-E0-A1-B1-1A-E1", StringComparison.OrdinalIgnoreCase))
{
suspectedFormat = InArchiveFormat.Cab; // != InArchiveFormat.XZ
}
#region SpecialDetect
try
{
SpecialDetect (stream, 257, InArchiveFormat.Tar);
}
catch (ArgumentException) { }
if (SpecialDetect (stream, 0x8001, InArchiveFormat.Iso))
{
return InArchiveFormat.Iso;
}
if (SpecialDetect (stream, 0x8801, InArchiveFormat.Iso))
{
return InArchiveFormat.Iso;
}
if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
{
return InArchiveFormat.Iso;
}
if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
{
return InArchiveFormat.Iso;
}
if (SpecialDetect (stream, 0x400, InArchiveFormat.Hfs))
{
return InArchiveFormat.Hfs;
}
#region Last resort for tar - can mistake
if (stream.Length >= 1024)
{
stream.Seek (-1024, SeekOrigin.End);
byte[] buf = new byte[1024];
stream.Read (buf, 0, 1024);
bool istar = true;
for (int i = 0; i < 1024; i++)
{
istar = istar && buf[i] == 0;
}
if (istar)
{
return InArchiveFormat.Tar;
}
}
#endregion
#endregion
#region Check if it is an SFX archive or a file with an embedded archive.
if (suspectedFormat != InArchiveFormat.XZ)
{
#region Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes
var scanLength = Math.Min (stream.Length, SFX_SCAN_LENGTH);
signature = new byte[scanLength];
bytesRequired = (int)scanLength;
index = 0;
stream.Seek (0, SeekOrigin.Begin);
while (bytesRequired > 0)
{
int bytesRead = stream.Read (signature, index, bytesRequired);
bytesRequired -= bytesRead;
index += bytesRead;
}
actualSignature = BitConverter.ToString (signature);
#endregion
foreach (var format in new InArchiveFormat[]
{
InArchiveFormat.Zip,
InArchiveFormat.SevenZip,
InArchiveFormat.Rar,
InArchiveFormat.Cab,
InArchiveFormat.Arj
})
{
int pos = actualSignature.IndexOf (Formats.InSignatureFormatsReversed[format]);
if (pos > -1)
{
offset = pos / 3;
return format;
}
}
// Nothing
if (suspectedFormat == InArchiveFormat.PE)
{
return InArchiveFormat.PE;
}
}
#endregion
throw new ArgumentException ("The stream is invalid or no corresponding signature was found.");
}
修改后的代码
public static InArchiveFormat CheckSignature (Stream stream, out int offset, out bool isExecutable)
{
offset = 0;
if (!stream.CanRead)
{
throw new ArgumentException ("The stream must be readable.");
}
if (stream.Length < SIGNATURE_SIZE)
{
throw new ArgumentException ("The stream is invalid.");
}
#region Get file signature
var signature = new byte[SIGNATURE_SIZE];
int bytesRequired = SIGNATURE_SIZE;
int index = 0;
stream.Seek (0, SeekOrigin.Begin);
while (bytesRequired > 0)
{
int bytesRead = stream.Read (signature, index, bytesRequired);
bytesRequired -= bytesRead;
index += bytesRead;
}
string actualSignature = BitConverter.ToString (signature);
#endregion Get file signature
InArchiveFormat suspectedFormat = InArchiveFormat.XZ; // any except PE and Cab
isExecutable = false;
InArchiveFormat enDetectedFormat = (InArchiveFormat)(-1);
InArchiveFormat enSpecialFormat = (InArchiveFormat)(-1);
foreach (string expectedSignature in Formats.InSignatureFormats.Keys)
{
if (actualSignature.StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) ||
actualSignature.Substring (6).StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) &&
Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.Lzh)
{
if (Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.PE)
{
suspectedFormat = InArchiveFormat.PE;
isExecutable = true;
}
else
{
enDetectedFormat = Formats.InSignatureFormats[expectedSignature];
break;
}
}
}
// Many Microsoft formats
if (actualSignature.StartsWith ("D0-CF-11-E0-A1-B1-1A-E1", StringComparison.OrdinalIgnoreCase))
{
suspectedFormat = InArchiveFormat.Cab; // != InArchiveFormat.XZ
}
#region SpecialDetect
if (SpecialDetect (stream, 257, InArchiveFormat.Tar))
{
enSpecialFormat = InArchiveFormat.Tar;
}
else if (SpecialDetect (stream, 0x8001, InArchiveFormat.Iso))
{
enSpecialFormat = InArchiveFormat.Iso;
}
else if (SpecialDetect (stream, 0x8801, InArchiveFormat.Iso))
{
enSpecialFormat = InArchiveFormat.Iso;
}
else if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
{
enSpecialFormat = InArchiveFormat.Iso;
}
else if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
{
enSpecialFormat = InArchiveFormat.Iso;
}
else if (SpecialDetect (stream, 0x400, InArchiveFormat.Hfs))
{
enSpecialFormat = InArchiveFormat.Hfs;
}
#region Last resort for tar - can mistake
bool bPossiblyTAR = false;
if (stream.Length >= 1024)
{
stream.Seek (-1024, SeekOrigin.End);
byte[] buf = new byte[1024];
stream.Read (buf, 0, 1024);
bPossiblyTAR = true;
for (int i = 0; i < 1024; i++)
{
bPossiblyTAR = bPossiblyTAR && buf[i] == 0;
}
}
// TAR header starts with the filename of the archive.
// The filename can be anything, including the Identifiers of the various archive formats.
// This means that a TAR can be misinterpreted as any type of archive.
if (enSpecialFormat == InArchiveFormat.Tar
|| bPossiblyTAR)
{
var fs = stream as FileStream;
if (fs != null)
{
string sStreamFilename = fs.Name;
if (sStreamFilename.EndsWith (".tar", StringComparison.InvariantCultureIgnoreCase))
enDetectedFormat = InArchiveFormat.Tar;
}
}
#endregion Last resort for tar - can mistake
if (enDetectedFormat != (InArchiveFormat)(-1))
return enDetectedFormat;
if (enSpecialFormat != (InArchiveFormat)(-1))
return enSpecialFormat;
#endregion SpecialDetect
#region Check if it is an SFX archive or a file with an embedded archive.
if (suspectedFormat != InArchiveFormat.XZ)
{
#region Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes
var scanLength = Math.Min (stream.Length, SFX_SCAN_LENGTH);
signature = new byte[scanLength];
bytesRequired = (int)scanLength;
index = 0;
stream.Seek (0, SeekOrigin.Begin);
while (bytesRequired > 0)
{
int bytesRead = stream.Read (signature, index, bytesRequired);
bytesRequired -= bytesRead;
index += bytesRead;
}
actualSignature = BitConverter.ToString (signature);
#endregion Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes
foreach (var format in new InArchiveFormat[]
{
InArchiveFormat.Zip,
InArchiveFormat.SevenZip,
InArchiveFormat.Rar,
InArchiveFormat.Cab,
InArchiveFormat.Arj
})
{
int pos = actualSignature.IndexOf (Formats.InSignatureFormatsReversed[format]);
if (pos > -1)
{
offset = pos / 3;
return format;
}
}
// Nothing
if (suspectedFormat == InArchiveFormat.PE)
{
return InArchiveFormat.PE;
}
}
#endregion Check if it is an SFX archive or a file with an embedded archive.
throw new ArgumentException ("The stream is invalid or no corresponding signature was found.");
}