strcmp 实现不适用于特殊字符

strcmp implementation not working with special characters

我正在尝试实现我自己的 strcmp 函数,当我使用特殊字符时,我的 strcmp 表现不同。

#include <string.h>    

int my_strcmp(const char *s1, const char *s2)
{
    const char  *str1;
    const char  *str2;

    str1 = s1;
    str2 = s2;
    while ((*str1 == *str2) && *str1)
    {
        str1++;
        str2++;
    }
    return (*str1 - *str2);
}

int main()
{
   char *src = "a§bcDef";
   char *des = "acbcDef";
   printf("%d %d\n", my_strcmp(des, src), strcmp(des, src));
   return(0);
}

输出

161 -95

char 在许多实现中被签名,并且您的 strcmp 实现认为 char 值 < 0 小于大于 0 的值。也许您想比较无符号值相反。

const unsigned char *str1 = (unsigned char*) s1;
const unsigned char *str2 = (unsigned char*) s2;

以下是标准关于 strcmp 的内容,相关部分以粗体突出显示:

The sign of a non-zero return value shall be determined by the sign of the difference between the values of the first pair of bytes (both interpreted as type unsigned char) that differ in the strings being compared.

您的代码将字节的差异作为 char,如果签名与规范不同。

改为:

return (unsigned char)(*str1) - (unsigned char)(*str2);

这里有一些原始代码的测试用例(my_strcmp),dasblinkenlight 目前接受的答案(my_strcmp1),以及这个答案(my_strcmp2)。只有 my_strcmp2 通过了测试。

#include <string.h>
#include <stdio.h>

int my_strcmp(const char *s1, const char *s2) {
    const signed char *str1 = (const signed char*)(s1);
    const signed char *str2 = (const signed char*)(s2);

    while ((*str1 == *str2) && *str1)
    {
        str1++;
        str2++;
    }
    return (*str1 - *str2);
}

int my_strcmp1(const char *s1, const char *s2) {
    const signed char *str1 = (const signed char*)(s1);
    const signed char *str2 = (const signed char*)(s2);

    while ((*str1 == *str2) && *str1)
    {
        str1++;
        str2++;
    }
    return (signed char)(*str1 - *str2);
}

int my_strcmp2(const char *s1, const char *s2) {
    const signed char *str1 = (const signed char*)(s1);
    const signed char *str2 = (const signed char*)(s2);

    while ((*str1 == *str2) && *str1)
    {
        str1++;
        str2++;
    }
    return (unsigned char)(*str1) - (unsigned char)(*str2);
}


int sgn(int a) {
    return a > 0 ? 1 : a < 0 ? -1 : 0;
}

#define TEST(sc, a, b) do { \
    if (sgn(sc(a, b)) != sgn(strcmp(a, b))) { \
        printf("%s(%s, %s) = %d, want %d\n", #sc, a, b, sc(a, b), strcmp((const char*)a, (const char*)b)); \
        fail = 1; \
    } } while(0)

int main(int argc, char *argv[]) {
    struct {
        const char *a;
        const char *b;
    }cases[] = {
        {"abc", "abc"},
        {"\x01", "\xff"},
        {"\xff", "\x01"},
        {"abc", "abd"},
        {"", ""},
    };
    int fail = 0;
    for (int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
        TEST(my_strcmp, cases[i].a, cases[i].b);
        TEST(my_strcmp1, cases[i].a, cases[i].b);
        TEST(my_strcmp2, cases[i].a, cases[i].b);
    }
    return fail;
}

(注意:我在实现中加入了一些显式的 signed 以便代码可以在带有 unsigned char 的编译器上进行测试)。另外,对宏感到抱歉 - 这是一个快速测试的技巧!