strcmp 实现不适用于特殊字符
strcmp implementation not working with special characters
我正在尝试实现我自己的 strcmp
函数,当我使用特殊字符时,我的 strcmp
表现不同。
#include <string.h>
int my_strcmp(const char *s1, const char *s2)
{
const char *str1;
const char *str2;
str1 = s1;
str2 = s2;
while ((*str1 == *str2) && *str1)
{
str1++;
str2++;
}
return (*str1 - *str2);
}
int main()
{
char *src = "a§bcDef";
char *des = "acbcDef";
printf("%d %d\n", my_strcmp(des, src), strcmp(des, src));
return(0);
}
输出
161 -95
char
在许多实现中被签名,并且您的 strcmp
实现认为 char
值 < 0 小于大于 0 的值。也许您想比较无符号值相反。
const unsigned char *str1 = (unsigned char*) s1;
const unsigned char *str2 = (unsigned char*) s2;
以下是标准关于 strcmp
的内容,相关部分以粗体突出显示:
The sign of a non-zero return value shall be determined by the sign
of the difference between the values of the first pair of bytes (both
interpreted as type unsigned char) that differ in the strings being
compared.
您的代码将字节的差异作为 char
,如果签名与规范不同。
改为:
return (unsigned char)(*str1) - (unsigned char)(*str2);
这里有一些原始代码的测试用例(my_strcmp
),dasblinkenlight 目前接受的答案(my_strcmp1
),以及这个答案(my_strcmp2
)。只有 my_strcmp2
通过了测试。
#include <string.h>
#include <stdio.h>
int my_strcmp(const char *s1, const char *s2) {
const signed char *str1 = (const signed char*)(s1);
const signed char *str2 = (const signed char*)(s2);
while ((*str1 == *str2) && *str1)
{
str1++;
str2++;
}
return (*str1 - *str2);
}
int my_strcmp1(const char *s1, const char *s2) {
const signed char *str1 = (const signed char*)(s1);
const signed char *str2 = (const signed char*)(s2);
while ((*str1 == *str2) && *str1)
{
str1++;
str2++;
}
return (signed char)(*str1 - *str2);
}
int my_strcmp2(const char *s1, const char *s2) {
const signed char *str1 = (const signed char*)(s1);
const signed char *str2 = (const signed char*)(s2);
while ((*str1 == *str2) && *str1)
{
str1++;
str2++;
}
return (unsigned char)(*str1) - (unsigned char)(*str2);
}
int sgn(int a) {
return a > 0 ? 1 : a < 0 ? -1 : 0;
}
#define TEST(sc, a, b) do { \
if (sgn(sc(a, b)) != sgn(strcmp(a, b))) { \
printf("%s(%s, %s) = %d, want %d\n", #sc, a, b, sc(a, b), strcmp((const char*)a, (const char*)b)); \
fail = 1; \
} } while(0)
int main(int argc, char *argv[]) {
struct {
const char *a;
const char *b;
}cases[] = {
{"abc", "abc"},
{"\x01", "\xff"},
{"\xff", "\x01"},
{"abc", "abd"},
{"", ""},
};
int fail = 0;
for (int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
TEST(my_strcmp, cases[i].a, cases[i].b);
TEST(my_strcmp1, cases[i].a, cases[i].b);
TEST(my_strcmp2, cases[i].a, cases[i].b);
}
return fail;
}
(注意:我在实现中加入了一些显式的 signed
以便代码可以在带有 unsigned char 的编译器上进行测试)。另外,对宏感到抱歉 - 这是一个快速测试的技巧!
我正在尝试实现我自己的 strcmp
函数,当我使用特殊字符时,我的 strcmp
表现不同。
#include <string.h>
int my_strcmp(const char *s1, const char *s2)
{
const char *str1;
const char *str2;
str1 = s1;
str2 = s2;
while ((*str1 == *str2) && *str1)
{
str1++;
str2++;
}
return (*str1 - *str2);
}
int main()
{
char *src = "a§bcDef";
char *des = "acbcDef";
printf("%d %d\n", my_strcmp(des, src), strcmp(des, src));
return(0);
}
输出
161 -95
char
在许多实现中被签名,并且您的 strcmp
实现认为 char
值 < 0 小于大于 0 的值。也许您想比较无符号值相反。
const unsigned char *str1 = (unsigned char*) s1;
const unsigned char *str2 = (unsigned char*) s2;
以下是标准关于 strcmp
的内容,相关部分以粗体突出显示:
The sign of a non-zero return value shall be determined by the sign of the difference between the values of the first pair of bytes (both interpreted as type unsigned char) that differ in the strings being compared.
您的代码将字节的差异作为 char
,如果签名与规范不同。
改为:
return (unsigned char)(*str1) - (unsigned char)(*str2);
这里有一些原始代码的测试用例(my_strcmp
),dasblinkenlight 目前接受的答案(my_strcmp1
),以及这个答案(my_strcmp2
)。只有 my_strcmp2
通过了测试。
#include <string.h>
#include <stdio.h>
int my_strcmp(const char *s1, const char *s2) {
const signed char *str1 = (const signed char*)(s1);
const signed char *str2 = (const signed char*)(s2);
while ((*str1 == *str2) && *str1)
{
str1++;
str2++;
}
return (*str1 - *str2);
}
int my_strcmp1(const char *s1, const char *s2) {
const signed char *str1 = (const signed char*)(s1);
const signed char *str2 = (const signed char*)(s2);
while ((*str1 == *str2) && *str1)
{
str1++;
str2++;
}
return (signed char)(*str1 - *str2);
}
int my_strcmp2(const char *s1, const char *s2) {
const signed char *str1 = (const signed char*)(s1);
const signed char *str2 = (const signed char*)(s2);
while ((*str1 == *str2) && *str1)
{
str1++;
str2++;
}
return (unsigned char)(*str1) - (unsigned char)(*str2);
}
int sgn(int a) {
return a > 0 ? 1 : a < 0 ? -1 : 0;
}
#define TEST(sc, a, b) do { \
if (sgn(sc(a, b)) != sgn(strcmp(a, b))) { \
printf("%s(%s, %s) = %d, want %d\n", #sc, a, b, sc(a, b), strcmp((const char*)a, (const char*)b)); \
fail = 1; \
} } while(0)
int main(int argc, char *argv[]) {
struct {
const char *a;
const char *b;
}cases[] = {
{"abc", "abc"},
{"\x01", "\xff"},
{"\xff", "\x01"},
{"abc", "abd"},
{"", ""},
};
int fail = 0;
for (int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
TEST(my_strcmp, cases[i].a, cases[i].b);
TEST(my_strcmp1, cases[i].a, cases[i].b);
TEST(my_strcmp2, cases[i].a, cases[i].b);
}
return fail;
}
(注意:我在实现中加入了一些显式的 signed
以便代码可以在带有 unsigned char 的编译器上进行测试)。另外,对宏感到抱歉 - 这是一个快速测试的技巧!