.NET 如何准确比较字符串?
How does .NET compare strings exactly?
字符串比较的具体工作原理是什么? -这是我一直想知道的事情。
- C# 是否在检查每个字符之前检查每个字符串的长度?
- 还是先开始循环比较每个字符?
- 是一个字符不同了还是继续检查每个字符,还是发现一个字符不同就停止?
- 或者它甚至不直接遍历每个字符?
我知道这是一大堆问题,但我一直找不到任何答案。
感谢任何答案以及文档。干杯!
基于此https://referencesource.microsoft.com/#mscorlib/system/string.cs,11648d2d83718c5e。是的,它会先检查长度。
// Determines whether two strings match.
[ReliabilityContract(Consistency.WillNotCorruptState, Cer.MayFail)]
public override bool Equals(Object obj) {
if (this == null) //this is necessary to guard against reverse-pinvokes and
throw new NullReferenceException(); //other callers who do not use the callvirt instruction
String str = obj as String;
if (str == null)
return false;
if (Object.ReferenceEquals(this, obj))
return true;
if (this.Length != str.Length)
return false;
return EqualsHelper(this, str);
}
[System.Security.SecuritySafeCritical] // auto-generated
[ReliabilityContract(Consistency.WillNotCorruptState, Cer.MayFail)]
private unsafe static bool EqualsHelper(String strA, String strB)
{
Contract.Requires(strA != null);
Contract.Requires(strB != null);
Contract.Requires(strA.Length == strB.Length);
int length = strA.Length;
fixed (char* ap = &strA.m_firstChar) fixed (char* bp = &strB.m_firstChar)
{
char* a = ap;
char* b = bp;
// unroll the loop
#if AMD64
// for AMD64 bit platform we unroll by 12 and
// check 3 qword at a time. This is less code
// than the 32 bit case and is shorter
// pathlength
while (length >= 12)
{
if (*(long*)a != *(long*)b) return false;
if (*(long*)(a+4) != *(long*)(b+4)) return false;
if (*(long*)(a+8) != *(long*)(b+8)) return false;
a += 12; b += 12; length -= 12;
}
#else
while (length >= 10)
{
if (*(int*)a != *(int*)b) return false;
if (*(int*)(a+2) != *(int*)(b+2)) return false;
if (*(int*)(a+4) != *(int*)(b+4)) return false;
if (*(int*)(a+6) != *(int*)(b+6)) return false;
if (*(int*)(a+8) != *(int*)(b+8)) return false;
a += 10; b += 10; length -= 10;
}
#endif
// This depends on the fact that the String objects are
// always zero terminated and that the terminating zero is not included
// in the length. For odd string sizes, the last compare will include
// the zero terminator.
while (length > 0)
{
if (*(int*)a != *(int*)b) break;
a += 2; b += 2; length -= 2;
}
return (length <= 0);
}
}
字符串比较的具体工作原理是什么? -这是我一直想知道的事情。
- C# 是否在检查每个字符之前检查每个字符串的长度?
- 还是先开始循环比较每个字符?
- 是一个字符不同了还是继续检查每个字符,还是发现一个字符不同就停止?
- 或者它甚至不直接遍历每个字符?
我知道这是一大堆问题,但我一直找不到任何答案。
感谢任何答案以及文档。干杯!
基于此https://referencesource.microsoft.com/#mscorlib/system/string.cs,11648d2d83718c5e。是的,它会先检查长度。
// Determines whether two strings match.
[ReliabilityContract(Consistency.WillNotCorruptState, Cer.MayFail)]
public override bool Equals(Object obj) {
if (this == null) //this is necessary to guard against reverse-pinvokes and
throw new NullReferenceException(); //other callers who do not use the callvirt instruction
String str = obj as String;
if (str == null)
return false;
if (Object.ReferenceEquals(this, obj))
return true;
if (this.Length != str.Length)
return false;
return EqualsHelper(this, str);
}
[System.Security.SecuritySafeCritical] // auto-generated
[ReliabilityContract(Consistency.WillNotCorruptState, Cer.MayFail)]
private unsafe static bool EqualsHelper(String strA, String strB)
{
Contract.Requires(strA != null);
Contract.Requires(strB != null);
Contract.Requires(strA.Length == strB.Length);
int length = strA.Length;
fixed (char* ap = &strA.m_firstChar) fixed (char* bp = &strB.m_firstChar)
{
char* a = ap;
char* b = bp;
// unroll the loop
#if AMD64
// for AMD64 bit platform we unroll by 12 and
// check 3 qword at a time. This is less code
// than the 32 bit case and is shorter
// pathlength
while (length >= 12)
{
if (*(long*)a != *(long*)b) return false;
if (*(long*)(a+4) != *(long*)(b+4)) return false;
if (*(long*)(a+8) != *(long*)(b+8)) return false;
a += 12; b += 12; length -= 12;
}
#else
while (length >= 10)
{
if (*(int*)a != *(int*)b) return false;
if (*(int*)(a+2) != *(int*)(b+2)) return false;
if (*(int*)(a+4) != *(int*)(b+4)) return false;
if (*(int*)(a+6) != *(int*)(b+6)) return false;
if (*(int*)(a+8) != *(int*)(b+8)) return false;
a += 10; b += 10; length -= 10;
}
#endif
// This depends on the fact that the String objects are
// always zero terminated and that the terminating zero is not included
// in the length. For odd string sizes, the last compare will include
// the zero terminator.
while (length > 0)
{
if (*(int*)a != *(int*)b) break;
a += 2; b += 2; length -= 2;
}
return (length <= 0);
}
}