使用 flate.NewWriterDict 的压缩很奇怪
The compression using flate.NewWriterDict is strange
问题
第一道工序
compressed_1 : []byte{0xf2, 0x48, 0xcd, 0xc9, 0xc9, 0x7, 0x0}
第二个过程(用字典压缩)
compressed_2 : []byte{0x2, 0x13, 0x0, 0x0}
但我预计
compressed_2 : []byte{0xf2, 0x0, 0x11, 0x0, 0x0}
我的期望基于以下内容。
https://www.rfc-editor.org/rfc/rfc7692#section-7.2.3.2
python 的其他实现已确认它们将使用上述正确结果进行压缩。
https://github.com/aaugustin/websockets
不知道是go代码的原因还是我的执行方式不好。
如果你熟悉zlib和flate,请多多指教...
代码
执行代码
package flate_dict
import (
"bytes"
"compress/flate"
"fmt"
)
func compressWithDict(level int) {
// compress
src := []byte("Hello")
compressed := &bytes.Buffer{}
compressor, _ := flate.NewWriter(compressed, level)
compressor.Write(src)
compressor.Flush()
compressor.Close()
dict := compressed.Bytes()
dict = dict[:len(dict)-9]
fmt.Printf("compressed_1 : %#v\n", dict)
// compress
dct := []byte{0x48, 0x65, 0x6c, 0x6c, 0x6f}
src_2 := []byte("Hello")
compressed_2 := &bytes.Buffer{}
compressor_2, _ := flate.NewWriterDict(compressed_2, level, dct)
compressor_2.Write(src_2)
compressor_2.Flush()
compressor_2.Close()
dict_2 := compressed_2.Bytes()
dict_2 = dict_2[:len(dict_2)-9]
fmt.Printf("compressed_2 : %#v\n\n", dict_2)
}
用于调试的测试代码
package flate_dict
import "testing"
func Test_compressWithDict(t *testing.T) {
tests := []struct {
name string
level int
}{
{
name: "01",
level: -2,
},
{
name: "02",
level: -1,
},
{
name: "03",
level: 0,
},
{
name: "04",
level: 1,
},
{
name: "05",
level: 2,
},
{
name: "06",
level: 3,
},
{
name: "07",
level: 4,
},
{
name: "08",
level: 5,
},
{
name: "09",
level: 6,
},
{
name: "10",
level: 7,
},
{
name: "11",
level: 8,
},
{
name: "12",
level: 9,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
compressWithDict(tt.level)
})
}
}
一般来说,压缩实现不能保证产生相同的输出,它们只能确保输出可以被符合规范的另一个实现反转。您在这里额外调用了 flush,这会添加另一个同步标记,这就是为什么您必须在末尾切掉 8 个字节而不是 4 个字节来匹配您的输出。
没有用于比较的示例代码,我们无法准确判断您在比较什么,但输出似乎是您所期望的:
dict := []byte{0x48, 0x65, 0x6c, 0x6c, 0x6f}
out := &bytes.Buffer{}
cmp, _ := flate.NewWriterDict(out, 9, dict)
cmp.Write([]byte("Hello"))
cmp.Flush()
fmt.Printf("deflated: %#v\n", out.Bytes())
r := flate.NewReaderDict(out, dict)
orig, _ := ioutil.ReadAll(r)
fmt.Println("inflated:", string(orig))
// deflated: []byte{0x2, 0x13, 0x0, 0x0, 0x0, 0x0, 0xff, 0xff}
// inflated: Hello
问题
第一道工序
compressed_1 : []byte{0xf2, 0x48, 0xcd, 0xc9, 0xc9, 0x7, 0x0}
第二个过程(用字典压缩)
compressed_2 : []byte{0x2, 0x13, 0x0, 0x0}
但我预计
compressed_2 : []byte{0xf2, 0x0, 0x11, 0x0, 0x0}
我的期望基于以下内容。 https://www.rfc-editor.org/rfc/rfc7692#section-7.2.3.2
python 的其他实现已确认它们将使用上述正确结果进行压缩。 https://github.com/aaugustin/websockets
不知道是go代码的原因还是我的执行方式不好。 如果你熟悉zlib和flate,请多多指教...
代码
执行代码
package flate_dict
import (
"bytes"
"compress/flate"
"fmt"
)
func compressWithDict(level int) {
// compress
src := []byte("Hello")
compressed := &bytes.Buffer{}
compressor, _ := flate.NewWriter(compressed, level)
compressor.Write(src)
compressor.Flush()
compressor.Close()
dict := compressed.Bytes()
dict = dict[:len(dict)-9]
fmt.Printf("compressed_1 : %#v\n", dict)
// compress
dct := []byte{0x48, 0x65, 0x6c, 0x6c, 0x6f}
src_2 := []byte("Hello")
compressed_2 := &bytes.Buffer{}
compressor_2, _ := flate.NewWriterDict(compressed_2, level, dct)
compressor_2.Write(src_2)
compressor_2.Flush()
compressor_2.Close()
dict_2 := compressed_2.Bytes()
dict_2 = dict_2[:len(dict_2)-9]
fmt.Printf("compressed_2 : %#v\n\n", dict_2)
}
用于调试的测试代码
package flate_dict
import "testing"
func Test_compressWithDict(t *testing.T) {
tests := []struct {
name string
level int
}{
{
name: "01",
level: -2,
},
{
name: "02",
level: -1,
},
{
name: "03",
level: 0,
},
{
name: "04",
level: 1,
},
{
name: "05",
level: 2,
},
{
name: "06",
level: 3,
},
{
name: "07",
level: 4,
},
{
name: "08",
level: 5,
},
{
name: "09",
level: 6,
},
{
name: "10",
level: 7,
},
{
name: "11",
level: 8,
},
{
name: "12",
level: 9,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
compressWithDict(tt.level)
})
}
}
一般来说,压缩实现不能保证产生相同的输出,它们只能确保输出可以被符合规范的另一个实现反转。您在这里额外调用了 flush,这会添加另一个同步标记,这就是为什么您必须在末尾切掉 8 个字节而不是 4 个字节来匹配您的输出。
没有用于比较的示例代码,我们无法准确判断您在比较什么,但输出似乎是您所期望的:
dict := []byte{0x48, 0x65, 0x6c, 0x6c, 0x6f}
out := &bytes.Buffer{}
cmp, _ := flate.NewWriterDict(out, 9, dict)
cmp.Write([]byte("Hello"))
cmp.Flush()
fmt.Printf("deflated: %#v\n", out.Bytes())
r := flate.NewReaderDict(out, dict)
orig, _ := ioutil.ReadAll(r)
fmt.Println("inflated:", string(orig))
// deflated: []byte{0x2, 0x13, 0x0, 0x0, 0x0, 0x0, 0xff, 0xff}
// inflated: Hello