ASCII85解码和FlateDecoding连续出错

Error occurs while ASCII85 decoding and FlateDecoding in a row

我使用以下代码解码 pdf 中的文本流。在某些情况下,流需要由 2 个过滤器解码。 << /Length 2348 /Filter [ /ASCII85Decode /FlateDecode ] >> 我首先通过 ASCII85Decode 解码流,然后通过 Flatedecode 解码。在某些情况下,经过平面解码的最终结果变为空。对这个问题有什么想法吗?

    public byte[] decode(byte[] encodedInput)
    {
        bool strict = false;
        MemoryStream stream = new MemoryStream(encodedInput);
        InflaterInputStream zip = new InflaterInputStream(stream);
        MemoryStream output = new MemoryStream();
        byte[] b = new byte[strict ? 4092 : 1];
        try
        {
            int n;
            while ((n = zip.Read(b, 0, b.Length)) > 0)
            {
                output.Write(b, 0, n);
            }
            zip.Close();
            output.Close();
            return (output.ToArray());
        }
        catch
        {
            if (strict)
                return null;
            return (output.ToArray());
        }
    }

//ASCII85Decode

class ASCII85 : IASCII85
{
    /// <summary>
 /// Prefix mark that identifies an encoded ASCII85 string, traditionally 
'<~'
 /// </summary>
    public string PrefixMark = "<~";
    /// <summary>
    /// Suffix mark that identifies an encoded ASCII85 string, 
traditionally '~>'
    /// </summary>
    public string SuffixMark = "~>";
    /// <summary>
    /// Maximum line length for encoded ASCII85 string; 
    /// set to zero for one unbroken line.
    /// </summary>
    public int LineLength = 75;
    /// <summary>
    /// Add the Prefix and Suffix marks when encoding, and enforce their 
presence for decoding
    /// </summary>
    public bool EnforceMarks = true;

    private const int _asciiOffset = 33;
    private byte[] _encodedBlock = new byte[5];
    private byte[] _decodedBlock = new byte[4];
    private uint _tuple = 0;
    private int _linePos = 0;
    private uint[] pow85 = { 85 * 85 * 85 * 85, 85 * 85 * 85, 85 * 85, 85, 
1 };

    /// <summary>
    /// Decodes an ASCII85 encoded string into the original binary data
    /// </summary>
    /// <param name="inputString">ASCII85 encoded string</param>
    /// <returns>byte array of decoded binary data</returns>
    public byte[] decode(string inputString)
    {
        if (EnforceMarks)
        {
            bool x = !inputString.StartsWith(PrefixMark);
            bool y = !inputString.EndsWith(SuffixMark);
            bool a = !inputString.StartsWith(PrefixMark) && 
!inputString.EndsWith(SuffixMark);             
            if (a)
            {
                throw new Exception("ASCII85 encoded data should begin with 
'" + PrefixMark +
                    "' and end with '" + SuffixMark + "'");
            }
        }
        if (inputString.StartsWith("<~"))
        {
            inputString = inputString.Substring(PrefixMark.Length);
        }
        if (inputString.EndsWith("~>"))
        {
            inputString = inputString.Substring(0, inputString.Length - 
SuffixMark.Length);
        }

        MemoryStream ms = new MemoryStream();
        int count = 0;
        bool processChar = false;

        foreach (char c in inputString)
        {
            switch (c)
            {
                case 'z':
                    if (count != 0)
                    {
                        throw new Exception("The character 'z' is invalid 
inside an ASCII85 block.");
                    }
                    _decodedBlock[0] = 0;
                    _decodedBlock[1] = 0;
                    _decodedBlock[2] = 0;
                    _decodedBlock[3] = 0;
                    ms.Write(_decodedBlock, 0, _decodedBlock.Length);
                    processChar = false;
                    break;
                case '\n':
                case '\r':
                case '\t':
                case '[=10=]':
                case '\f':
                case '\b':
                    processChar = false;
                    break;
                default:
                    if (c < '!' || c > 'u')
                    {
                        throw new Exception("Bad character '" + c + "' 
                    found. ASCII85 only allows characters '!' to 'u'.");
                    }
                    processChar = true;
                    break;
            }

            if (processChar)
            {
                _tuple += ((uint)(c - _asciiOffset) * pow85[count]);
                count++;
                if (count == _encodedBlock.Length)
                {
                    DecodeBlock();
                    ms.Write(_decodedBlock, 0, _decodedBlock.Length);
                    _tuple = 0;
                    count = 0;
                }
            }
        }

        if (count != 0)
        {
            if (count == 1)
            {
                throw new Exception("The last block of ASCII85 data cannot 
be a single byte.");
            }
            count--;
            _tuple += pow85[count];
            DecodeBlock(count);
            for (int i = 0; i < count; i++)
            {
                ms.WriteByte(_decodedBlock[i]);
            }
        }
        return ms.ToArray();
    }

    /// <summary>
    /// Encodes binary data into a plaintext ASCII85 format string
    /// </summary>
    /// <param name="ba">binary data to encode</param>
    /// <returns>ASCII85 encoded string</returns>
    public string encode(byte[] ba)
    {
        StringBuilder sb = new StringBuilder((int)(ba.Length *  
(_encodedBlock.Length / _decodedBlock.Length)));
        _linePos = 0;

        if (EnforceMarks)
        {
            AppendString(sb, PrefixMark);
        }

        int count = 0;
        _tuple = 0;
        foreach (byte b in ba)
        {
            if (count >= _decodedBlock.Length - 1)
            {
                _tuple |= b;
                if (_tuple == 0)
                {
                    AppendChar(sb, 'z');
                }
                else
                {
                    EncodeBlock(sb);
                }
                _tuple = 0;
                count = 0;
            }
            else
            {
                _tuple |= (uint)(b << (24 - (count * 8)));
                count++;
            }
        }

        if (count > 0)
        {
            EncodeBlock(count + 1, sb);
        }

        if (EnforceMarks)
        {
            AppendString(sb, SuffixMark);
        }
        return sb.ToString();
    }

    private void EncodeBlock(StringBuilder sb)
    {
        EncodeBlock(_encodedBlock.Length, sb);
    }

    private void EncodeBlock(int count, StringBuilder sb)
    {
        for (int i = _encodedBlock.Length - 1; i >= 0; i--)
        {
            _encodedBlock[i] = (byte)((_tuple % 85) + _asciiOffset);
            _tuple /= 85;
        }

        for (int i = 0; i < count; i++)
        {
            char c = (char)_encodedBlock[i];
            AppendChar(sb, c);
        }

    }

    private void DecodeBlock()
    {
        DecodeBlock(_decodedBlock.Length);
    }

    private void DecodeBlock(int bytes)
    {
        for (int i = 0; i < bytes; i++)
        {
            _decodedBlock[i] = (byte)(_tuple >> 24 - (i * 8));
        }
    }

    private void AppendString(StringBuilder sb, string s)
    {
        if (LineLength > 0 && (_linePos + s.Length > LineLength))
        {
            _linePos = 0;
            sb.Append('\n');
        }
        else
        {
            _linePos += s.Length;
        }
        sb.Append(s);
    }

    private void AppendChar(StringBuilder sb, char c)
    {
        sb.Append(c);
        _linePos++;
        if (LineLength > 0 && (_linePos >= LineLength))
        {
            _linePos = 0;
            sb.Append('\n');
        }
    }

    public string decode(byte[] ba)
    {
        throw new NotImplementedException();
    }
}

我不知道你的 InflaterInputStream,所以我用 System.IO.Compression.DeflateStream 代替。

我从你的示例文件中获取了所有带有 /Filter [ /ASCII85Decode /FlateDecode ] 的流内容,并尝试在 String rawStreamChars 中解码它们,如下所示:

ASCII85 ascii85 = new ASCII85();
ascii85.EnforceMarks = false;
byte[] ascii85Decoded = ascii85.decode(rawStreamChars);

using (MemoryStream stream = new MemoryStream(ascii85Decoded))
{
    // Remove 2 bytes zlib header
    stream.ReadByte();
    stream.ReadByte();
    using (DeflateStream decompressionStream = new DeflateStream(stream, CompressionMode.Decompress))
    using (MemoryStream result = new MemoryStream())
    {
        decompressionStream.CopyTo(result);
        Console.Out.WriteLine(Encoding.GetEncoding("windows-1252").GetString(result.ToArray()));
    }
}

并且在每种情况下我都获得了相应的内容流。

因此,您要么

  • 从流对象中错误地检索流内容;或
  • 不正确地使用您的 ASCII85 class(例如使用 EnforceMarks = true);或
  • 您的 decode 方法有问题。

嗯,或者有一些不太明显的错误在起作用。