SevenZipSharp 无法解压某些 tar 档案

SevenZipSharp fails to unpack certain tar archives

我使用 SevenZipSharp 打包到 7z 档案以及从各种档案中解压缩。它多年来一直运作良好。

今天我有一个 .tgz 存档在第二阶段解包失败:
从 .tgz 中提取 .tar 有效,但解压 .tar 失败。受影响的只是这个单一档案。所有其他 .tgz 都可以正常工作。 .tar 本身没有问题,因为用 7-zip 软件解压也可以。

经过大量测试我和同事找到了原因:
我们必须调试 SevenZipSharp DLL 才能找到其中的故障。 DLL 通过读取前 16 个字节并将其与签名列表进行比较来检测存档的类型。这对于大多数类型的档案都是正确的,但对于 .tar 档案是错误的,因为 .tar 文件 header starts 与档案的文件名:TAR @ Wikipedia.签名“ustar”(如果存在)位于地址 257 (0x0101)。

SevenZipSharp 知道并检查该地址的“ustar”,但前提是之前的检测失败。不幸的是,我们的 TAR 档案的名称是“x42202.tar”。 .dmg 文件 (Apple Disk Image) 的 header 由一个“x”组成(这有多愚蠢,只使用一个字节作为签名??)。所以其实是检测成功了文件类型,只是检测结果错误
(我知道,链接的维基百科说,.dmg header 签名是“koly”,但我用在互联网上找到的下载的 .dmg 文件进行了确认。)

编辑 07.12.2021:签名实际上是“koly”,但 so-called header 的长度为 512 字节,位于 END 的文件。然而,SevenZipSharp 在开始时会检查签名。我测试过的大多数文件(但不是全部!)确实在开头有一个“x”,但我不知道为什么。也许它是一种非官方的 header(“x”似乎来自 MIME 类型“x-apple-diskimage”)。 - 编辑结束。

因此我们修改了 FileSignatureChecker.cs 中的代码以避免对 .tar 档案的错误档案类型检测。
您可以在下面找到原始代码和修改后的代码。
代码库是最新的 SevenZipSharp 版本,可以在 CodePlex archive 中找到。显然它不再处于活跃开发状态,因为版本号多年来没有改变,如果它仍然活跃,它会在 CodePlex 停用后移动。

更新2018-11-16
修改代码中的错误修复:如果找到,则不返回 enSpecialFormat。

更新2021-12-16
该错误仍然存​​在于 github 存储库 https://github.com/squid-box/SevenZipSharp 中,这是 SevenZipSharp 项目的当前位置。已上传对错误代码进行重大返工的拉取请求,正在等待合并。

原码

public static InArchiveFormat CheckSignature (Stream stream, out int offset, out bool isExecutable)
{
  offset = 0;
  if (!stream.CanRead)
  {
    throw new ArgumentException ("The stream must be readable.");
  }
  if (stream.Length < SIGNATURE_SIZE)
  {
    throw new ArgumentException ("The stream is invalid.");
  }

  #region Get file signature

  var signature = new byte[SIGNATURE_SIZE];
  int bytesRequired = SIGNATURE_SIZE;
  int index = 0;
  stream.Seek (0, SeekOrigin.Begin);
  while (bytesRequired > 0)
  {
    int bytesRead = stream.Read (signature, index, bytesRequired);
    bytesRequired -= bytesRead;
    index += bytesRead;
  }
  string actualSignature = BitConverter.ToString (signature);

  #endregion

  InArchiveFormat suspectedFormat = InArchiveFormat.XZ; // any except PE and Cab
  isExecutable = false;

  foreach (string expectedSignature in Formats.InSignatureFormats.Keys)
  {
    if (actualSignature.StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) ||
        actualSignature.Substring (6).StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) &&
        Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.Lzh)
    {
      if (Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.PE)
      {
        suspectedFormat = InArchiveFormat.PE;
        isExecutable = true;
      }
      else
      {
        return Formats.InSignatureFormats[expectedSignature];
      }
    }
  }

  // Many Microsoft formats
  if (actualSignature.StartsWith ("D0-CF-11-E0-A1-B1-1A-E1", StringComparison.OrdinalIgnoreCase))
  {
    suspectedFormat = InArchiveFormat.Cab; // != InArchiveFormat.XZ
  }

  #region SpecialDetect
  try
  {
    SpecialDetect (stream, 257, InArchiveFormat.Tar);
  }
  catch (ArgumentException) { }
  if (SpecialDetect (stream, 0x8001, InArchiveFormat.Iso))
  {
    return InArchiveFormat.Iso;
  }
  if (SpecialDetect (stream, 0x8801, InArchiveFormat.Iso))
  {
    return InArchiveFormat.Iso;
  }
  if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
  {
    return InArchiveFormat.Iso;
  }
  if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
  {
    return InArchiveFormat.Iso;
  }
  if (SpecialDetect (stream, 0x400, InArchiveFormat.Hfs))
  {
    return InArchiveFormat.Hfs;
  }
  #region Last resort for tar - can mistake
  if (stream.Length >= 1024)
  {
    stream.Seek (-1024, SeekOrigin.End);
    byte[] buf = new byte[1024];
    stream.Read (buf, 0, 1024);
    bool istar = true;
    for (int i = 0; i < 1024; i++)
    {
      istar = istar && buf[i] == 0;
    }
    if (istar)
    {
      return InArchiveFormat.Tar;
    }
  }
  #endregion
  #endregion

  #region Check if it is an SFX archive or a file with an embedded archive.
  if (suspectedFormat != InArchiveFormat.XZ)
  {
    #region Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes
    var scanLength = Math.Min (stream.Length, SFX_SCAN_LENGTH);
    signature = new byte[scanLength];
    bytesRequired = (int)scanLength;
    index = 0;
    stream.Seek (0, SeekOrigin.Begin);
    while (bytesRequired > 0)
    {
      int bytesRead = stream.Read (signature, index, bytesRequired);
      bytesRequired -= bytesRead;
      index += bytesRead;
    }
    actualSignature = BitConverter.ToString (signature);
    #endregion

    foreach (var format in new InArchiveFormat[]
    {
                    InArchiveFormat.Zip,
                    InArchiveFormat.SevenZip,
                    InArchiveFormat.Rar,
                    InArchiveFormat.Cab,
                    InArchiveFormat.Arj
    })
    {
      int pos = actualSignature.IndexOf (Formats.InSignatureFormatsReversed[format]);
      if (pos > -1)
      {
        offset = pos / 3;
        return format;
      }
    }
    // Nothing
    if (suspectedFormat == InArchiveFormat.PE)
    {
      return InArchiveFormat.PE;
    }
  }
  #endregion

  throw new ArgumentException ("The stream is invalid or no corresponding signature was found.");
}

修改后的代码

public static InArchiveFormat CheckSignature (Stream stream, out int offset, out bool isExecutable)
{
  offset = 0;
  if (!stream.CanRead)
  {
    throw new ArgumentException ("The stream must be readable.");
  }
  if (stream.Length < SIGNATURE_SIZE)
  {
    throw new ArgumentException ("The stream is invalid.");
  }

  #region Get file signature

  var signature = new byte[SIGNATURE_SIZE];
  int bytesRequired = SIGNATURE_SIZE;
  int index = 0;
  stream.Seek (0, SeekOrigin.Begin);
  while (bytesRequired > 0)
  {
    int bytesRead = stream.Read (signature, index, bytesRequired);
    bytesRequired -= bytesRead;
    index += bytesRead;
  }
  string actualSignature = BitConverter.ToString (signature);

  #endregion Get file signature

  InArchiveFormat suspectedFormat = InArchiveFormat.XZ; // any except PE and Cab
  isExecutable = false;

  InArchiveFormat enDetectedFormat = (InArchiveFormat)(-1);
  InArchiveFormat enSpecialFormat = (InArchiveFormat)(-1);

  foreach (string expectedSignature in Formats.InSignatureFormats.Keys)
  {
    if (actualSignature.StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) ||
        actualSignature.Substring (6).StartsWith (expectedSignature, StringComparison.OrdinalIgnoreCase) &&
        Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.Lzh)
    {
      if (Formats.InSignatureFormats[expectedSignature] == InArchiveFormat.PE)
      {
        suspectedFormat = InArchiveFormat.PE;
        isExecutable = true;
      }
      else
      {
        enDetectedFormat = Formats.InSignatureFormats[expectedSignature];
        break;
      }
    }
  }

  // Many Microsoft formats
  if (actualSignature.StartsWith ("D0-CF-11-E0-A1-B1-1A-E1", StringComparison.OrdinalIgnoreCase))
  {
    suspectedFormat = InArchiveFormat.Cab; // != InArchiveFormat.XZ
  }

  #region SpecialDetect

  if (SpecialDetect (stream, 257, InArchiveFormat.Tar))
  {
    enSpecialFormat = InArchiveFormat.Tar;
  }
  else if (SpecialDetect (stream, 0x8001, InArchiveFormat.Iso))
  {
    enSpecialFormat = InArchiveFormat.Iso;
  }
  else if (SpecialDetect (stream, 0x8801, InArchiveFormat.Iso))
  {
    enSpecialFormat = InArchiveFormat.Iso;
  }
  else if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
  {
    enSpecialFormat = InArchiveFormat.Iso;
  }
  else if (SpecialDetect (stream, 0x9001, InArchiveFormat.Iso))
  {
    enSpecialFormat = InArchiveFormat.Iso;
  }
  else if (SpecialDetect (stream, 0x400, InArchiveFormat.Hfs))
  {
    enSpecialFormat = InArchiveFormat.Hfs;
  }

  #region Last resort for tar - can mistake

  bool bPossiblyTAR = false;
  if (stream.Length >= 1024)
  {
    stream.Seek (-1024, SeekOrigin.End);
    byte[] buf = new byte[1024];
    stream.Read (buf, 0, 1024);
    bPossiblyTAR = true;
    for (int i = 0; i < 1024; i++)
    {
      bPossiblyTAR = bPossiblyTAR && buf[i] == 0;
    }
  }

  // TAR header starts with the filename of the archive.
  // The filename can be anything, including the Identifiers of the various archive formats.
  // This means that a TAR can be misinterpreted as any type of archive.
  if (enSpecialFormat == InArchiveFormat.Tar
  || bPossiblyTAR)
  {
    var fs = stream as FileStream;
    if (fs != null)
    {
      string sStreamFilename = fs.Name;
      if (sStreamFilename.EndsWith (".tar", StringComparison.InvariantCultureIgnoreCase))
        enDetectedFormat = InArchiveFormat.Tar;
    }
  }

  #endregion Last resort for tar - can mistake

  if (enDetectedFormat != (InArchiveFormat)(-1))
    return enDetectedFormat;
  if (enSpecialFormat != (InArchiveFormat)(-1))
    return enSpecialFormat;

  #endregion SpecialDetect

  #region Check if it is an SFX archive or a file with an embedded archive.

  if (suspectedFormat != InArchiveFormat.XZ)
  {
    #region Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes

    var scanLength = Math.Min (stream.Length, SFX_SCAN_LENGTH);
    signature = new byte[scanLength];
    bytesRequired = (int)scanLength;
    index = 0;
    stream.Seek (0, SeekOrigin.Begin);
    while (bytesRequired > 0)
    {
      int bytesRead = stream.Read (signature, index, bytesRequired);
      bytesRequired -= bytesRead;
      index += bytesRead;
    }
    actualSignature = BitConverter.ToString (signature);

    #endregion Get first Min(stream.Length, SFX_SCAN_LENGTH) bytes

    foreach (var format in new InArchiveFormat[]
    {
                InArchiveFormat.Zip,
                InArchiveFormat.SevenZip,
                InArchiveFormat.Rar,
                InArchiveFormat.Cab,
                InArchiveFormat.Arj
    })
    {
      int pos = actualSignature.IndexOf (Formats.InSignatureFormatsReversed[format]);
      if (pos > -1)
      {
        offset = pos / 3;
        return format;
      }
    }
    // Nothing
    if (suspectedFormat == InArchiveFormat.PE)
    {
      return InArchiveFormat.PE;
    }
  }

  #endregion Check if it is an SFX archive or a file with an embedded archive.

  throw new ArgumentException ("The stream is invalid or no corresponding signature was found.");
}