ASCII85解码和FlateDecoding连续出错
Error occurs while ASCII85 decoding and FlateDecoding in a row
我使用以下代码解码 pdf 中的文本流。在某些情况下,流需要由 2 个过滤器解码。
<< /Length 2348 /Filter [ /ASCII85Decode /FlateDecode ] >>
我首先通过 ASCII85Decode 解码流,然后通过 Flatedecode 解码。在某些情况下,经过平面解码的最终结果变为空。对这个问题有什么想法吗?
public byte[] decode(byte[] encodedInput)
{
bool strict = false;
MemoryStream stream = new MemoryStream(encodedInput);
InflaterInputStream zip = new InflaterInputStream(stream);
MemoryStream output = new MemoryStream();
byte[] b = new byte[strict ? 4092 : 1];
try
{
int n;
while ((n = zip.Read(b, 0, b.Length)) > 0)
{
output.Write(b, 0, n);
}
zip.Close();
output.Close();
return (output.ToArray());
}
catch
{
if (strict)
return null;
return (output.ToArray());
}
}
//ASCII85Decode
class ASCII85 : IASCII85
{
/// <summary>
/// Prefix mark that identifies an encoded ASCII85 string, traditionally
'<~'
/// </summary>
public string PrefixMark = "<~";
/// <summary>
/// Suffix mark that identifies an encoded ASCII85 string,
traditionally '~>'
/// </summary>
public string SuffixMark = "~>";
/// <summary>
/// Maximum line length for encoded ASCII85 string;
/// set to zero for one unbroken line.
/// </summary>
public int LineLength = 75;
/// <summary>
/// Add the Prefix and Suffix marks when encoding, and enforce their
presence for decoding
/// </summary>
public bool EnforceMarks = true;
private const int _asciiOffset = 33;
private byte[] _encodedBlock = new byte[5];
private byte[] _decodedBlock = new byte[4];
private uint _tuple = 0;
private int _linePos = 0;
private uint[] pow85 = { 85 * 85 * 85 * 85, 85 * 85 * 85, 85 * 85, 85,
1 };
/// <summary>
/// Decodes an ASCII85 encoded string into the original binary data
/// </summary>
/// <param name="inputString">ASCII85 encoded string</param>
/// <returns>byte array of decoded binary data</returns>
public byte[] decode(string inputString)
{
if (EnforceMarks)
{
bool x = !inputString.StartsWith(PrefixMark);
bool y = !inputString.EndsWith(SuffixMark);
bool a = !inputString.StartsWith(PrefixMark) &&
!inputString.EndsWith(SuffixMark);
if (a)
{
throw new Exception("ASCII85 encoded data should begin with
'" + PrefixMark +
"' and end with '" + SuffixMark + "'");
}
}
if (inputString.StartsWith("<~"))
{
inputString = inputString.Substring(PrefixMark.Length);
}
if (inputString.EndsWith("~>"))
{
inputString = inputString.Substring(0, inputString.Length -
SuffixMark.Length);
}
MemoryStream ms = new MemoryStream();
int count = 0;
bool processChar = false;
foreach (char c in inputString)
{
switch (c)
{
case 'z':
if (count != 0)
{
throw new Exception("The character 'z' is invalid
inside an ASCII85 block.");
}
_decodedBlock[0] = 0;
_decodedBlock[1] = 0;
_decodedBlock[2] = 0;
_decodedBlock[3] = 0;
ms.Write(_decodedBlock, 0, _decodedBlock.Length);
processChar = false;
break;
case '\n':
case '\r':
case '\t':
case '[=10=]':
case '\f':
case '\b':
processChar = false;
break;
default:
if (c < '!' || c > 'u')
{
throw new Exception("Bad character '" + c + "'
found. ASCII85 only allows characters '!' to 'u'.");
}
processChar = true;
break;
}
if (processChar)
{
_tuple += ((uint)(c - _asciiOffset) * pow85[count]);
count++;
if (count == _encodedBlock.Length)
{
DecodeBlock();
ms.Write(_decodedBlock, 0, _decodedBlock.Length);
_tuple = 0;
count = 0;
}
}
}
if (count != 0)
{
if (count == 1)
{
throw new Exception("The last block of ASCII85 data cannot
be a single byte.");
}
count--;
_tuple += pow85[count];
DecodeBlock(count);
for (int i = 0; i < count; i++)
{
ms.WriteByte(_decodedBlock[i]);
}
}
return ms.ToArray();
}
/// <summary>
/// Encodes binary data into a plaintext ASCII85 format string
/// </summary>
/// <param name="ba">binary data to encode</param>
/// <returns>ASCII85 encoded string</returns>
public string encode(byte[] ba)
{
StringBuilder sb = new StringBuilder((int)(ba.Length *
(_encodedBlock.Length / _decodedBlock.Length)));
_linePos = 0;
if (EnforceMarks)
{
AppendString(sb, PrefixMark);
}
int count = 0;
_tuple = 0;
foreach (byte b in ba)
{
if (count >= _decodedBlock.Length - 1)
{
_tuple |= b;
if (_tuple == 0)
{
AppendChar(sb, 'z');
}
else
{
EncodeBlock(sb);
}
_tuple = 0;
count = 0;
}
else
{
_tuple |= (uint)(b << (24 - (count * 8)));
count++;
}
}
if (count > 0)
{
EncodeBlock(count + 1, sb);
}
if (EnforceMarks)
{
AppendString(sb, SuffixMark);
}
return sb.ToString();
}
private void EncodeBlock(StringBuilder sb)
{
EncodeBlock(_encodedBlock.Length, sb);
}
private void EncodeBlock(int count, StringBuilder sb)
{
for (int i = _encodedBlock.Length - 1; i >= 0; i--)
{
_encodedBlock[i] = (byte)((_tuple % 85) + _asciiOffset);
_tuple /= 85;
}
for (int i = 0; i < count; i++)
{
char c = (char)_encodedBlock[i];
AppendChar(sb, c);
}
}
private void DecodeBlock()
{
DecodeBlock(_decodedBlock.Length);
}
private void DecodeBlock(int bytes)
{
for (int i = 0; i < bytes; i++)
{
_decodedBlock[i] = (byte)(_tuple >> 24 - (i * 8));
}
}
private void AppendString(StringBuilder sb, string s)
{
if (LineLength > 0 && (_linePos + s.Length > LineLength))
{
_linePos = 0;
sb.Append('\n');
}
else
{
_linePos += s.Length;
}
sb.Append(s);
}
private void AppendChar(StringBuilder sb, char c)
{
sb.Append(c);
_linePos++;
if (LineLength > 0 && (_linePos >= LineLength))
{
_linePos = 0;
sb.Append('\n');
}
}
public string decode(byte[] ba)
{
throw new NotImplementedException();
}
}
我不知道你的 InflaterInputStream
,所以我用 System.IO.Compression.DeflateStream
代替。
我从你的示例文件中获取了所有带有 /Filter [ /ASCII85Decode /FlateDecode ]
的流内容,并尝试在 String rawStreamChars
中解码它们,如下所示:
ASCII85 ascii85 = new ASCII85();
ascii85.EnforceMarks = false;
byte[] ascii85Decoded = ascii85.decode(rawStreamChars);
using (MemoryStream stream = new MemoryStream(ascii85Decoded))
{
// Remove 2 bytes zlib header
stream.ReadByte();
stream.ReadByte();
using (DeflateStream decompressionStream = new DeflateStream(stream, CompressionMode.Decompress))
using (MemoryStream result = new MemoryStream())
{
decompressionStream.CopyTo(result);
Console.Out.WriteLine(Encoding.GetEncoding("windows-1252").GetString(result.ToArray()));
}
}
并且在每种情况下我都获得了相应的内容流。
因此,您要么
- 从流对象中错误地检索流内容;或
- 不正确地使用您的
ASCII85
class(例如使用 EnforceMarks = true
);或
- 您的
decode
方法有问题。
嗯,或者有一些不太明显的错误在起作用。
我使用以下代码解码 pdf 中的文本流。在某些情况下,流需要由 2 个过滤器解码。 << /Length 2348 /Filter [ /ASCII85Decode /FlateDecode ] >> 我首先通过 ASCII85Decode 解码流,然后通过 Flatedecode 解码。在某些情况下,经过平面解码的最终结果变为空。对这个问题有什么想法吗?
public byte[] decode(byte[] encodedInput)
{
bool strict = false;
MemoryStream stream = new MemoryStream(encodedInput);
InflaterInputStream zip = new InflaterInputStream(stream);
MemoryStream output = new MemoryStream();
byte[] b = new byte[strict ? 4092 : 1];
try
{
int n;
while ((n = zip.Read(b, 0, b.Length)) > 0)
{
output.Write(b, 0, n);
}
zip.Close();
output.Close();
return (output.ToArray());
}
catch
{
if (strict)
return null;
return (output.ToArray());
}
}
//ASCII85Decode
class ASCII85 : IASCII85
{
/// <summary>
/// Prefix mark that identifies an encoded ASCII85 string, traditionally
'<~'
/// </summary>
public string PrefixMark = "<~";
/// <summary>
/// Suffix mark that identifies an encoded ASCII85 string,
traditionally '~>'
/// </summary>
public string SuffixMark = "~>";
/// <summary>
/// Maximum line length for encoded ASCII85 string;
/// set to zero for one unbroken line.
/// </summary>
public int LineLength = 75;
/// <summary>
/// Add the Prefix and Suffix marks when encoding, and enforce their
presence for decoding
/// </summary>
public bool EnforceMarks = true;
private const int _asciiOffset = 33;
private byte[] _encodedBlock = new byte[5];
private byte[] _decodedBlock = new byte[4];
private uint _tuple = 0;
private int _linePos = 0;
private uint[] pow85 = { 85 * 85 * 85 * 85, 85 * 85 * 85, 85 * 85, 85,
1 };
/// <summary>
/// Decodes an ASCII85 encoded string into the original binary data
/// </summary>
/// <param name="inputString">ASCII85 encoded string</param>
/// <returns>byte array of decoded binary data</returns>
public byte[] decode(string inputString)
{
if (EnforceMarks)
{
bool x = !inputString.StartsWith(PrefixMark);
bool y = !inputString.EndsWith(SuffixMark);
bool a = !inputString.StartsWith(PrefixMark) &&
!inputString.EndsWith(SuffixMark);
if (a)
{
throw new Exception("ASCII85 encoded data should begin with
'" + PrefixMark +
"' and end with '" + SuffixMark + "'");
}
}
if (inputString.StartsWith("<~"))
{
inputString = inputString.Substring(PrefixMark.Length);
}
if (inputString.EndsWith("~>"))
{
inputString = inputString.Substring(0, inputString.Length -
SuffixMark.Length);
}
MemoryStream ms = new MemoryStream();
int count = 0;
bool processChar = false;
foreach (char c in inputString)
{
switch (c)
{
case 'z':
if (count != 0)
{
throw new Exception("The character 'z' is invalid
inside an ASCII85 block.");
}
_decodedBlock[0] = 0;
_decodedBlock[1] = 0;
_decodedBlock[2] = 0;
_decodedBlock[3] = 0;
ms.Write(_decodedBlock, 0, _decodedBlock.Length);
processChar = false;
break;
case '\n':
case '\r':
case '\t':
case '[=10=]':
case '\f':
case '\b':
processChar = false;
break;
default:
if (c < '!' || c > 'u')
{
throw new Exception("Bad character '" + c + "'
found. ASCII85 only allows characters '!' to 'u'.");
}
processChar = true;
break;
}
if (processChar)
{
_tuple += ((uint)(c - _asciiOffset) * pow85[count]);
count++;
if (count == _encodedBlock.Length)
{
DecodeBlock();
ms.Write(_decodedBlock, 0, _decodedBlock.Length);
_tuple = 0;
count = 0;
}
}
}
if (count != 0)
{
if (count == 1)
{
throw new Exception("The last block of ASCII85 data cannot
be a single byte.");
}
count--;
_tuple += pow85[count];
DecodeBlock(count);
for (int i = 0; i < count; i++)
{
ms.WriteByte(_decodedBlock[i]);
}
}
return ms.ToArray();
}
/// <summary>
/// Encodes binary data into a plaintext ASCII85 format string
/// </summary>
/// <param name="ba">binary data to encode</param>
/// <returns>ASCII85 encoded string</returns>
public string encode(byte[] ba)
{
StringBuilder sb = new StringBuilder((int)(ba.Length *
(_encodedBlock.Length / _decodedBlock.Length)));
_linePos = 0;
if (EnforceMarks)
{
AppendString(sb, PrefixMark);
}
int count = 0;
_tuple = 0;
foreach (byte b in ba)
{
if (count >= _decodedBlock.Length - 1)
{
_tuple |= b;
if (_tuple == 0)
{
AppendChar(sb, 'z');
}
else
{
EncodeBlock(sb);
}
_tuple = 0;
count = 0;
}
else
{
_tuple |= (uint)(b << (24 - (count * 8)));
count++;
}
}
if (count > 0)
{
EncodeBlock(count + 1, sb);
}
if (EnforceMarks)
{
AppendString(sb, SuffixMark);
}
return sb.ToString();
}
private void EncodeBlock(StringBuilder sb)
{
EncodeBlock(_encodedBlock.Length, sb);
}
private void EncodeBlock(int count, StringBuilder sb)
{
for (int i = _encodedBlock.Length - 1; i >= 0; i--)
{
_encodedBlock[i] = (byte)((_tuple % 85) + _asciiOffset);
_tuple /= 85;
}
for (int i = 0; i < count; i++)
{
char c = (char)_encodedBlock[i];
AppendChar(sb, c);
}
}
private void DecodeBlock()
{
DecodeBlock(_decodedBlock.Length);
}
private void DecodeBlock(int bytes)
{
for (int i = 0; i < bytes; i++)
{
_decodedBlock[i] = (byte)(_tuple >> 24 - (i * 8));
}
}
private void AppendString(StringBuilder sb, string s)
{
if (LineLength > 0 && (_linePos + s.Length > LineLength))
{
_linePos = 0;
sb.Append('\n');
}
else
{
_linePos += s.Length;
}
sb.Append(s);
}
private void AppendChar(StringBuilder sb, char c)
{
sb.Append(c);
_linePos++;
if (LineLength > 0 && (_linePos >= LineLength))
{
_linePos = 0;
sb.Append('\n');
}
}
public string decode(byte[] ba)
{
throw new NotImplementedException();
}
}
我不知道你的 InflaterInputStream
,所以我用 System.IO.Compression.DeflateStream
代替。
我从你的示例文件中获取了所有带有 /Filter [ /ASCII85Decode /FlateDecode ]
的流内容,并尝试在 String rawStreamChars
中解码它们,如下所示:
ASCII85 ascii85 = new ASCII85();
ascii85.EnforceMarks = false;
byte[] ascii85Decoded = ascii85.decode(rawStreamChars);
using (MemoryStream stream = new MemoryStream(ascii85Decoded))
{
// Remove 2 bytes zlib header
stream.ReadByte();
stream.ReadByte();
using (DeflateStream decompressionStream = new DeflateStream(stream, CompressionMode.Decompress))
using (MemoryStream result = new MemoryStream())
{
decompressionStream.CopyTo(result);
Console.Out.WriteLine(Encoding.GetEncoding("windows-1252").GetString(result.ToArray()));
}
}
并且在每种情况下我都获得了相应的内容流。
因此,您要么
- 从流对象中错误地检索流内容;或
- 不正确地使用您的
ASCII85
class(例如使用EnforceMarks = true
);或 - 您的
decode
方法有问题。
嗯,或者有一些不太明显的错误在起作用。