在 ByteArray 中定位 SubArray

Locating SubArray in ByteArray

摘要

正在以块的形式从文件中读取字节(未获得 128 - 1024 之间的特定大小,尚未决定)并且我想搜索缓冲区以查看它是否包含另一个字节的签名(模式)数组,如果它在缓冲区的最后找到一些模式,它应该从文件中读取接下来的几个字节以查看它是否找到匹配项

我试过的

public static bool Contains(byte[] buffer, byte[] signiture, FileStream file)
{
    for (var i = buffer.Length - 1; i >= signiture.Length - 1; i--) //move backwards through array stop if < signature
    {
        var found = true; //set found to true at start
        for (var j = signiture.Length - 1; j >= 0 && found; j--) //loop backwards throughsignature
        {
            found = buffer[i - (signiture.Length - 1 - j)] == signiture[j];// compare signature's element with corresponding element of buffer
        }
        if (found)
            return true; //if signature is found return true
    }


    //checking end of buffer for partial signiture
    for (var x = signiture.Length - 1; x >= 1; x--)
    {
        if (buffer.Skip(buffer.Length - x).Take(x).SequenceEqual(signiture.Skip(0).Take(x))) //check if partial is equal to partial signiture
        {
            byte[] nextBytes = new byte[signiture.Length - x];
            file.Read(nextBytes, 0, signiture.Length - x); //read next needed bytes from file
            if (!signiture.Skip(0).Take(x).ToArray().Concat(nextBytes).SequenceEqual(signiture))
                return false; //return false if not a match
            return true; //return true if a match
        }
    }
    return false; //if not found return false
}

这行得通,但有人告诉我 linq 很慢,我应该使用 Array.IndexOf()。我试过了,但不知道如何实现它

您可以使用 Span<T>, AsSpanMemoryExtensions.SequenceEqual。后者不是 LINQ;它经过优化,尤其是对于字节数组。它展开循环并使用不安全代码来执行 memcmp

如果您没有使用默认包含这些 types/methods 的框架,(.Netcore2.1+,.Netstandard 2.1) 您可以添加 System.Memory nuget 包。 SequenceEqual 的实现有点不同(所谓的 "slow version"),但它仍然比使用 LINQ 的 SequenceEqual 更快。

请注意,您还需要检查 FileStream.Read 的 return 值。

public static bool Contains(byte[] buffer, byte[] signiture, FileStream file)
{
    var sigSpan = signiture.AsSpan();

    //move backwards through buffer and check if signature found
    for (var i = buffer.Length - signiture.Length; i >= 0; i--)
    { 
        if (buffer.AsSpan(i, signiture.Length).SequenceEqual(sigSpan))
            return true;
    }

    for (var x = signiture.Length - 1; x >= 1; x--)
    {
        var sig = sigSpan.Slice(0, x);
        if (buffer.AsSpan(buffer.Length - x).SequenceEqual(sig)) //check if partial is equal to partial signiture
        {
            var sigLen = signiture.Length;
            byte[] nextBytes = ArrayPool<byte>.Shared.Rent(sigLen - x);

            // need to store number of bytes read
            var read = file.Read(nextBytes, 0, sigLen - x); //read next needed bytes from file
            var next = nextBytes.AsSpan(0, read);

            // don't need to concat with signature, because obviously signature is going to 
            // start with signature.Skip(0).Take(...)
            // just test that the number of bytes we read, plus the number we will skip equals
            // the actual length, then check the remainder
            var result = (read + x == signiture.Length 
                       && signiture.AsSpan(x).SequenceEqual(next));

            ArrayPool<byte>.Shared.Return(nextBytes);
            return result;
        }
    }

    return false; //if not found return false

}