在 ByteArray 中定位 SubArray
Locating SubArray in ByteArray
摘要
正在以块的形式从文件中读取字节(未获得 128 - 1024 之间的特定大小,尚未决定)并且我想搜索缓冲区以查看它是否包含另一个字节的签名(模式)数组,如果它在缓冲区的最后找到一些模式,它应该从文件中读取接下来的几个字节以查看它是否找到匹配项
我试过的
public static bool Contains(byte[] buffer, byte[] signiture, FileStream file)
{
for (var i = buffer.Length - 1; i >= signiture.Length - 1; i--) //move backwards through array stop if < signature
{
var found = true; //set found to true at start
for (var j = signiture.Length - 1; j >= 0 && found; j--) //loop backwards throughsignature
{
found = buffer[i - (signiture.Length - 1 - j)] == signiture[j];// compare signature's element with corresponding element of buffer
}
if (found)
return true; //if signature is found return true
}
//checking end of buffer for partial signiture
for (var x = signiture.Length - 1; x >= 1; x--)
{
if (buffer.Skip(buffer.Length - x).Take(x).SequenceEqual(signiture.Skip(0).Take(x))) //check if partial is equal to partial signiture
{
byte[] nextBytes = new byte[signiture.Length - x];
file.Read(nextBytes, 0, signiture.Length - x); //read next needed bytes from file
if (!signiture.Skip(0).Take(x).ToArray().Concat(nextBytes).SequenceEqual(signiture))
return false; //return false if not a match
return true; //return true if a match
}
}
return false; //if not found return false
}
这行得通,但有人告诉我 linq 很慢,我应该使用 Array.IndexOf()。我试过了,但不知道如何实现它
您可以使用 Span<T>,
AsSpan
和 MemoryExtensions.SequenceEqual
。后者不是 LINQ;它经过优化,尤其是对于字节数组。它展开循环并使用不安全代码来执行 memcmp
。
如果您没有使用默认包含这些 types/methods 的框架,(.Netcore2.1+,.Netstandard 2.1) 您可以添加 System.Memory nuget 包。 SequenceEqual
的实现有点不同(所谓的 "slow version"),但它仍然比使用 LINQ 的 SequenceEqual
更快。
请注意,您还需要检查 FileStream.Read
的 return 值。
public static bool Contains(byte[] buffer, byte[] signiture, FileStream file)
{
var sigSpan = signiture.AsSpan();
//move backwards through buffer and check if signature found
for (var i = buffer.Length - signiture.Length; i >= 0; i--)
{
if (buffer.AsSpan(i, signiture.Length).SequenceEqual(sigSpan))
return true;
}
for (var x = signiture.Length - 1; x >= 1; x--)
{
var sig = sigSpan.Slice(0, x);
if (buffer.AsSpan(buffer.Length - x).SequenceEqual(sig)) //check if partial is equal to partial signiture
{
var sigLen = signiture.Length;
byte[] nextBytes = ArrayPool<byte>.Shared.Rent(sigLen - x);
// need to store number of bytes read
var read = file.Read(nextBytes, 0, sigLen - x); //read next needed bytes from file
var next = nextBytes.AsSpan(0, read);
// don't need to concat with signature, because obviously signature is going to
// start with signature.Skip(0).Take(...)
// just test that the number of bytes we read, plus the number we will skip equals
// the actual length, then check the remainder
var result = (read + x == signiture.Length
&& signiture.AsSpan(x).SequenceEqual(next));
ArrayPool<byte>.Shared.Return(nextBytes);
return result;
}
}
return false; //if not found return false
}
摘要
正在以块的形式从文件中读取字节(未获得 128 - 1024 之间的特定大小,尚未决定)并且我想搜索缓冲区以查看它是否包含另一个字节的签名(模式)数组,如果它在缓冲区的最后找到一些模式,它应该从文件中读取接下来的几个字节以查看它是否找到匹配项
我试过的
public static bool Contains(byte[] buffer, byte[] signiture, FileStream file)
{
for (var i = buffer.Length - 1; i >= signiture.Length - 1; i--) //move backwards through array stop if < signature
{
var found = true; //set found to true at start
for (var j = signiture.Length - 1; j >= 0 && found; j--) //loop backwards throughsignature
{
found = buffer[i - (signiture.Length - 1 - j)] == signiture[j];// compare signature's element with corresponding element of buffer
}
if (found)
return true; //if signature is found return true
}
//checking end of buffer for partial signiture
for (var x = signiture.Length - 1; x >= 1; x--)
{
if (buffer.Skip(buffer.Length - x).Take(x).SequenceEqual(signiture.Skip(0).Take(x))) //check if partial is equal to partial signiture
{
byte[] nextBytes = new byte[signiture.Length - x];
file.Read(nextBytes, 0, signiture.Length - x); //read next needed bytes from file
if (!signiture.Skip(0).Take(x).ToArray().Concat(nextBytes).SequenceEqual(signiture))
return false; //return false if not a match
return true; //return true if a match
}
}
return false; //if not found return false
}
这行得通,但有人告诉我 linq 很慢,我应该使用 Array.IndexOf()。我试过了,但不知道如何实现它
您可以使用 Span<T>,
AsSpan
和 MemoryExtensions.SequenceEqual
。后者不是 LINQ;它经过优化,尤其是对于字节数组。它展开循环并使用不安全代码来执行 memcmp
。
如果您没有使用默认包含这些 types/methods 的框架,(.Netcore2.1+,.Netstandard 2.1) 您可以添加 System.Memory nuget 包。 SequenceEqual
的实现有点不同(所谓的 "slow version"),但它仍然比使用 LINQ 的 SequenceEqual
更快。
请注意,您还需要检查 FileStream.Read
的 return 值。
public static bool Contains(byte[] buffer, byte[] signiture, FileStream file)
{
var sigSpan = signiture.AsSpan();
//move backwards through buffer and check if signature found
for (var i = buffer.Length - signiture.Length; i >= 0; i--)
{
if (buffer.AsSpan(i, signiture.Length).SequenceEqual(sigSpan))
return true;
}
for (var x = signiture.Length - 1; x >= 1; x--)
{
var sig = sigSpan.Slice(0, x);
if (buffer.AsSpan(buffer.Length - x).SequenceEqual(sig)) //check if partial is equal to partial signiture
{
var sigLen = signiture.Length;
byte[] nextBytes = ArrayPool<byte>.Shared.Rent(sigLen - x);
// need to store number of bytes read
var read = file.Read(nextBytes, 0, sigLen - x); //read next needed bytes from file
var next = nextBytes.AsSpan(0, read);
// don't need to concat with signature, because obviously signature is going to
// start with signature.Skip(0).Take(...)
// just test that the number of bytes we read, plus the number we will skip equals
// the actual length, then check the remainder
var result = (read + x == signiture.Length
&& signiture.AsSpan(x).SequenceEqual(next));
ArrayPool<byte>.Shared.Return(nextBytes);
return result;
}
}
return false; //if not found return false
}