我试图改变来自流的字节因为解码,但它不起作用

I'm trying to mutate bytes coming from a stream because decoding, but it's not working

我正在尝试包装一个 io.ReaderCloser,它在生产中来自请求处理程序,带有一个可以传递到 JSON 解码器的自定义 reader。

我创建了下面的

import (
    "io"
)

// RemoveNull is a stream wrapper that should remove null bytes from the byte stream
type RemoveNull struct {
    Reader io.ReadCloser
}

// NewRemoveNullStream creates a new RemoveNull reader which passes the stream through a null check first
func NewRemoveNullStream(reader io.ReadCloser) RemoveNull {
    return RemoveNull{
        Reader: reader,
    }
}

// Read wraps a Reader to remove null bytes in the stream
func (null RemoveNull) Read(p []byte) (n int, err error) {
    n, err = null.Reader.Read(p)
    if err != nil {
        return n, err
    }

    nn := 0
    for i := range p {
        if p[i] != 0 {
            p[nn] = p[i]
            nn++

        }
    }
    p = p[:nn]
    // fmt.Println(p) i can see the value of p changing and all the null bytes are removed
    return n, nil
}

// Close closes the internal reader
func (null RemoveNull) Close() error {
    return null.Close()
}

当我运行以下时,我可以从打印语句中看到确实所有的空字节都被删除了,并且 len(p) == 所有预期的好字节的大小。我在下面编写了测试以查看代码是否按我的预期工作,而这正是我意识到它不是的地方。

这是完整的测试

import (
    "bytes"
    "encoding/json"
    "fmt"
    "io"
    "io/ioutil"
    "testing"

    "github.com/francoispqt/gojay" // can be replaced with the std json lib, code still doesn't work
)

func TestRemoveNull_Read(t *testing.T) {
    type fields struct {
        Reader io.ReadCloser
    }
    tests := []struct {
        name   string
        fields fields
        want   string
    }{
        {
            name: "should remove null bytes",
            fields: fields{
                Reader: ioutil.NopCloser(bytes.NewReader([]byte{123, 34, 98, 111, 100, 121, 34, 58, 34, 102, 101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 109, 97, 108, 101, 34, 125})),
            },
            want: "female",
        },
    }
    for _, tt := range tests {
        t.Run(tt.name, func(t *testing.T) {
            reader := tt.fields.Reader
            reader = NewRemoveNullStream(tt.fields.Reader) // wrapper the reader above in the nullByte reader

            // passed the reader into this JSON unmarshaller
            decoder := gojay.BorrowDecoder(reader)
            defer decoder.Release()

            var v _testStruct
            err := decoder.DecodeObject(&v)
            if err != nil {
                t.Fatalf("ReadAll failed %v", err)
            }

            bb, _ := json.Marshal(v)
            fmt.Println(string(bb)) // all the null bytes are still present

            fmt.Println(len(v.Body), len(tt.want))
            if v.Body != tt.want {
                t.Fatalf("DecodeObject() unexpected value, got %s want %s", v.Body, tt.want)
            }
        })
    }
}

type _testStruct struct {
    Body string `json:"body"`
}

func (v *_testStruct) UnmarshalJSONObject(dec *gojay.Decoder, k string) error {

    switch k {
    case "body":
        err := dec.String(&v.Body)
        return err
    }
    return nil
}

// NKeys returns the number of keys to unmarshal
func (v *_testStruct) NKeys() int { return 0 }

从测试中我可以看到解码时所有空字节仍然存在,但在 RemoveNull reader 中我可以看到所有空字节都已从下划线数组中删除。关于哪里出了问题以及如何实现从流中删除字节以避免解码器解码空字节的目标有什么想法吗?

您的读取实现有错误。它在 io.EOF 的情况下提前终止,其中既有错误又有数据。它 returns 读取了错误的字节数。您分配切片的最后一部分也没有意义,因为它不会更新传递给函数的切片。

试试这个:

func (null RemoveNull) Read(p []byte) (n int, err error) {
    n, err = null.Reader.Read(p)
    nn := 0
    for i:=0;i<n;i++ {
        if p[i] != 0 {
            p[nn] = p[i]
            nn++

        }
    }
    return nn, err
}