我想知道这段代码是如何工作的。 ((__ctype_ptr__+sizeof(""[__c]))[(int)(__c)])
I wonder how this code works. ((__ctype_ptr__+sizeof(""[__c]))[(int)(__c)])
#ifndef _CTYPE_H_
#define _CTYPE_H_
#include "_ansi.h"
_BEGIN_STD_C
int _EXFUN(isalnum, (int __c));
int _EXFUN(isalpha, (int __c));
int _EXFUN(iscntrl, (int __c));
int _EXFUN(isdigit, (int __c));
int _EXFUN(isgraph, (int __c));
int _EXFUN(islower, (int __c));
int _EXFUN(isprint, (int __c));
int _EXFUN(ispunct, (int __c));
int _EXFUN(isspace, (int __c));
int _EXFUN(isupper, (int __c));
int _EXFUN(isxdigit,(int __c));
int _EXFUN(tolower, (int __c));
int _EXFUN(toupper, (int __c));
#if !defined(__STRICT_ANSI__) || defined(__cplusplus) || __STDC_VERSION__ >= 199901L
int _EXFUN(isblank, (int __c));
#endif
#ifndef __STRICT_ANSI__
int _EXFUN(isascii, (int __c));
int _EXFUN(toascii, (int __c));
#define _tolower(__c) ((unsigned char)(__c) - 'A' + 'a')
#define _toupper(__c) ((unsigned char)(__c) - 'a' + 'A')
#endif
#define _U 01
#define _L 02
#define _N 04
#define _S 010
#define _P 020
#define _C 040
#define _X 0100
#define _B 0200
#ifndef _MB_CAPABLE
_CONST
#endif
extern __IMPORT char *__ctype_ptr__;
#ifndef __cplusplus
/* These macros are intentionally written in a manner that will trigger
a gcc -Wall warning if the user mistakenly passes a 'char' instead
of an int containing an 'unsigned char'. Note that the sizeof will
always be 1, which is what we want for mapping EOF to __ctype_ptr__[0];
the use of a raw index inside the sizeof triggers the gcc warning if
__c was of type char, and sizeof masks side effects of the extra __c.
Meanwhile, the real index to __ctype_ptr__+1 must be cast to int,
since isalpha(0x100000001LL) must equal isalpha(1), rather than being
an out-of-bounds reference on a 64-bit machine. */
#define __ctype_lookup(__c) ((__ctype_ptr__+sizeof(""[__c]))[(int)(__c)])
#define isalpha(__c) (__ctype_lookup(__c)&(_U|_L))
#define isupper(__c) ((__ctype_lookup(__c)&(_U|_L))==_U)
#define islower(__c) ((__ctype_lookup(__c)&(_U|_L))==_L)
#define isdigit(__c) (__ctype_lookup(__c)&_N)
#define isxdigit(__c) (__ctype_lookup(__c)&(_X|_N))
#define isspace(__c) (__ctype_lookup(__c)&_S)
#define ispunct(__c) (__ctype_lookup(__c)&_P)
#define isalnum(__c) (__ctype_lookup(__c)&(_U|_L|_N))
#define isprint(__c) (__ctype_lookup(__c)&(_P|_U|_L|_N|_B))
#define isgraph(__c) (__ctype_lookup(__c)&(_P|_U|_L|_N))
#define iscntrl(__c) (__ctype_lookup(__c)&_C)
#if defined(__GNUC__) && \
(!defined(__STRICT_ANSI__) || __STDC_VERSION__ >= 199901L)
#define isblank(__c) \
__extension__ ({ __typeof__ (__c) __x = (__c); \
(__ctype_lookup(__x)&_B) || (int) (__x) == '\t';})
#endif
/* Non-gcc versions will get the library versions, and will be
slightly slower. These macros are not NLS-aware so they are
disabled if the system supports the extended character sets. */
# if defined(__GNUC__)
# if !defined (_MB_EXTENDED_CHARSETS_ISO) && !defined (_MB_EXTENDED_CHARSETS_WINDOWS)
# define toupper(__c) \
__extension__ ({ __typeof__ (__c) __x = (__c); \
islower (__x) ? (int) __x - 'a' + 'A' : (int) __x;})
# define tolower(__c) \
__extension__ ({ __typeof__ (__c) __x = (__c); \
isupper (__x) ? (int) __x - 'A' + 'a' : (int) __x;})
# else /* _MB_EXTENDED_CHARSETS* */
/* Allow a gcc warning if the user passed 'char', but defer to the
function. */
# define toupper(__c) \
__extension__ ({ __typeof__ (__c) __x = (__c); \
(void) __ctype_ptr__[__x]; (toupper) (__x);})
# define tolower(__c) \
__extension__ ({ __typeof__ (__c) __x = (__c); \
(void) __ctype_ptr__[__x]; (tolower) (__x);})
# endif /* _MB_EXTENDED_CHARSETS* */
# endif /* __GNUC__ */
#endif /* !__cplusplus */
#ifndef __STRICT_ANSI__
#define isascii(__c) ((unsigned)(__c)<=0177)
#define toascii(__c) ((__c)&0177)
#endif
/* For C++ backward-compatibility only. */
extern __IMPORT _CONST char _ctype_[];
_END_STD_C
#endif /* _CTYPE_H_ */
此代码来自标准库ctype.h。
下面的代码给我留下了深刻的印象:
#define isalpha(__c) (__ctype_lookup(__c)&(_U|_L))
但是怎么办?仅通过该操作如何检查字母表?我想我应该知道 __ctype_lookup 是什么。
然而,__ctype_lookup更奇怪。
#define __ctype_lookup(__c) ((__ctype_ptr__+sizeof(""[__c]))[(int)(__c)])
还好代码上面有注释,可惜看不懂
/* These macros are intentionally written in a manner that will trigger
a gcc -Wall warning if the user mistakenly passes a 'char' instead
of an int containing an 'unsigned char'. Note that the sizeof will
always be 1, which is what we want for mapping EOF to __ctype_ptr__[0];
the use of a raw index inside the sizeof triggers the gcc warning if
__c was of type char, and sizeof masks side effects of the extra __c.
Meanwhile, the real index to __ctype_ptr__+1 must be cast to int,
since isalpha(0x100000001LL) must equal isalpha(1), rather than being
an out-of-bounds reference on a 64-bit machine. */
有人帮帮我!
而下面代码中的__IMPORT只是在某处定义的。重要的话我会找的
extern __IMPORT char *__ctype_ptr__;
参考the chromium implementation of ctype.c(希望有代表性):
__ctype_ptr__
是指向 _ctype_
数组开头的指针:
char *__ctype_ptr__ = (char *) _ctype_;
_ctype_
数组是一个常量字符数组:
_CONST char _ctype_[1 + 256] = {
0,
_CTYPE_DATA_0_127,
_CTYPE_DATA_128_255
};
最后 _CTYPE_DATA_0_127
和 _CTYPE_DATA_128_255
条目实际上是
定义包含几个char
,每个都是一个位域:
#define _CTYPE_DATA_0_127 \
_C, _C, _C, _C, _C, _C, _C, _C, \
_C, _C|_S, _C|_S, _C|_S, _C|_S, _C|_S, _C, _C, \
_C, _C, _C, _C, _C, _C, _C, _C, \
_C, _C, _C, _C, _C, _C, _C, _C, \
_S|_B, _P, _P, _P, _P, _P, _P, _P, \
_P, _P, _P, _P, _P, _P, _P, _P, \
_N, _N, _N, _N, _N, _N, _N, _N, \
_N, _N, _P, _P, _P, _P, _P, _P, \
_P, _U|_X, _U|_X, _U|_X, _U|_X, _U|_X, _U|_X, _U, \
_U, _U, _U, _U, _U, _U, _U, _U, \
_U, _U, _U, _U, _U, _U, _U, _U, \
_U, _U, _U, _P, _P, _P, _P, _P, \
_P, _L|_X, _L|_X, _L|_X, _L|_X, _L|_X, _L|_X, _L, \
_L, _L, _L, _L, _L, _L, _L, _L, \
_L, _L, _L, _L, _L, _L, _L, _L, \
_L, _L, _L, _P, _P, _P, _P, _C
其中每个 _U
、_P
等都是不同的位:
#define _U 01
#define _L 02
#define _N 04
#define _S 010
#define _P 020
#define _C 040
#define _X 0100
#define _B 0200
因此,行:
#define __ctype_lookup(__c) ((__ctype_ptr__+sizeof(""[__c]))[(int)(__c)])
1
只是查找 _ctype_
数组,然后
#define isalpha(__c) (__ctype_lookup(__c)&(_U|_L))
是该查找的按位掩码,具有适当的位掩码(在本例中,大写和小写的 OR)。
1:老实说我不确定 sizeof(""[__c])
实现了什么,我猜这与支持不同的 compilers/architectures 或其他东西有关。它跳过 _ctype_
数组
中的第一个“0”
#ifndef _CTYPE_H_
#define _CTYPE_H_
#include "_ansi.h"
_BEGIN_STD_C
int _EXFUN(isalnum, (int __c));
int _EXFUN(isalpha, (int __c));
int _EXFUN(iscntrl, (int __c));
int _EXFUN(isdigit, (int __c));
int _EXFUN(isgraph, (int __c));
int _EXFUN(islower, (int __c));
int _EXFUN(isprint, (int __c));
int _EXFUN(ispunct, (int __c));
int _EXFUN(isspace, (int __c));
int _EXFUN(isupper, (int __c));
int _EXFUN(isxdigit,(int __c));
int _EXFUN(tolower, (int __c));
int _EXFUN(toupper, (int __c));
#if !defined(__STRICT_ANSI__) || defined(__cplusplus) || __STDC_VERSION__ >= 199901L
int _EXFUN(isblank, (int __c));
#endif
#ifndef __STRICT_ANSI__
int _EXFUN(isascii, (int __c));
int _EXFUN(toascii, (int __c));
#define _tolower(__c) ((unsigned char)(__c) - 'A' + 'a')
#define _toupper(__c) ((unsigned char)(__c) - 'a' + 'A')
#endif
#define _U 01
#define _L 02
#define _N 04
#define _S 010
#define _P 020
#define _C 040
#define _X 0100
#define _B 0200
#ifndef _MB_CAPABLE
_CONST
#endif
extern __IMPORT char *__ctype_ptr__;
#ifndef __cplusplus
/* These macros are intentionally written in a manner that will trigger
a gcc -Wall warning if the user mistakenly passes a 'char' instead
of an int containing an 'unsigned char'. Note that the sizeof will
always be 1, which is what we want for mapping EOF to __ctype_ptr__[0];
the use of a raw index inside the sizeof triggers the gcc warning if
__c was of type char, and sizeof masks side effects of the extra __c.
Meanwhile, the real index to __ctype_ptr__+1 must be cast to int,
since isalpha(0x100000001LL) must equal isalpha(1), rather than being
an out-of-bounds reference on a 64-bit machine. */
#define __ctype_lookup(__c) ((__ctype_ptr__+sizeof(""[__c]))[(int)(__c)])
#define isalpha(__c) (__ctype_lookup(__c)&(_U|_L))
#define isupper(__c) ((__ctype_lookup(__c)&(_U|_L))==_U)
#define islower(__c) ((__ctype_lookup(__c)&(_U|_L))==_L)
#define isdigit(__c) (__ctype_lookup(__c)&_N)
#define isxdigit(__c) (__ctype_lookup(__c)&(_X|_N))
#define isspace(__c) (__ctype_lookup(__c)&_S)
#define ispunct(__c) (__ctype_lookup(__c)&_P)
#define isalnum(__c) (__ctype_lookup(__c)&(_U|_L|_N))
#define isprint(__c) (__ctype_lookup(__c)&(_P|_U|_L|_N|_B))
#define isgraph(__c) (__ctype_lookup(__c)&(_P|_U|_L|_N))
#define iscntrl(__c) (__ctype_lookup(__c)&_C)
#if defined(__GNUC__) && \
(!defined(__STRICT_ANSI__) || __STDC_VERSION__ >= 199901L)
#define isblank(__c) \
__extension__ ({ __typeof__ (__c) __x = (__c); \
(__ctype_lookup(__x)&_B) || (int) (__x) == '\t';})
#endif
/* Non-gcc versions will get the library versions, and will be
slightly slower. These macros are not NLS-aware so they are
disabled if the system supports the extended character sets. */
# if defined(__GNUC__)
# if !defined (_MB_EXTENDED_CHARSETS_ISO) && !defined (_MB_EXTENDED_CHARSETS_WINDOWS)
# define toupper(__c) \
__extension__ ({ __typeof__ (__c) __x = (__c); \
islower (__x) ? (int) __x - 'a' + 'A' : (int) __x;})
# define tolower(__c) \
__extension__ ({ __typeof__ (__c) __x = (__c); \
isupper (__x) ? (int) __x - 'A' + 'a' : (int) __x;})
# else /* _MB_EXTENDED_CHARSETS* */
/* Allow a gcc warning if the user passed 'char', but defer to the
function. */
# define toupper(__c) \
__extension__ ({ __typeof__ (__c) __x = (__c); \
(void) __ctype_ptr__[__x]; (toupper) (__x);})
# define tolower(__c) \
__extension__ ({ __typeof__ (__c) __x = (__c); \
(void) __ctype_ptr__[__x]; (tolower) (__x);})
# endif /* _MB_EXTENDED_CHARSETS* */
# endif /* __GNUC__ */
#endif /* !__cplusplus */
#ifndef __STRICT_ANSI__
#define isascii(__c) ((unsigned)(__c)<=0177)
#define toascii(__c) ((__c)&0177)
#endif
/* For C++ backward-compatibility only. */
extern __IMPORT _CONST char _ctype_[];
_END_STD_C
#endif /* _CTYPE_H_ */
此代码来自标准库ctype.h。
下面的代码给我留下了深刻的印象:
#define isalpha(__c) (__ctype_lookup(__c)&(_U|_L))
但是怎么办?仅通过该操作如何检查字母表?我想我应该知道 __ctype_lookup 是什么。
然而,__ctype_lookup更奇怪。
#define __ctype_lookup(__c) ((__ctype_ptr__+sizeof(""[__c]))[(int)(__c)])
还好代码上面有注释,可惜看不懂
/* These macros are intentionally written in a manner that will trigger a gcc -Wall warning if the user mistakenly passes a 'char' instead of an int containing an 'unsigned char'. Note that the sizeof will always be 1, which is what we want for mapping EOF to __ctype_ptr__[0]; the use of a raw index inside the sizeof triggers the gcc warning if __c was of type char, and sizeof masks side effects of the extra __c. Meanwhile, the real index to __ctype_ptr__+1 must be cast to int, since isalpha(0x100000001LL) must equal isalpha(1), rather than being an out-of-bounds reference on a 64-bit machine. */
有人帮帮我!
而下面代码中的__IMPORT只是在某处定义的。重要的话我会找的
extern __IMPORT char *__ctype_ptr__;
参考the chromium implementation of ctype.c(希望有代表性):
__ctype_ptr__
是指向 _ctype_
数组开头的指针:
char *__ctype_ptr__ = (char *) _ctype_;
_ctype_
数组是一个常量字符数组:
_CONST char _ctype_[1 + 256] = {
0,
_CTYPE_DATA_0_127,
_CTYPE_DATA_128_255
};
最后 _CTYPE_DATA_0_127
和 _CTYPE_DATA_128_255
条目实际上是
定义包含几个char
,每个都是一个位域:
#define _CTYPE_DATA_0_127 \
_C, _C, _C, _C, _C, _C, _C, _C, \
_C, _C|_S, _C|_S, _C|_S, _C|_S, _C|_S, _C, _C, \
_C, _C, _C, _C, _C, _C, _C, _C, \
_C, _C, _C, _C, _C, _C, _C, _C, \
_S|_B, _P, _P, _P, _P, _P, _P, _P, \
_P, _P, _P, _P, _P, _P, _P, _P, \
_N, _N, _N, _N, _N, _N, _N, _N, \
_N, _N, _P, _P, _P, _P, _P, _P, \
_P, _U|_X, _U|_X, _U|_X, _U|_X, _U|_X, _U|_X, _U, \
_U, _U, _U, _U, _U, _U, _U, _U, \
_U, _U, _U, _U, _U, _U, _U, _U, \
_U, _U, _U, _P, _P, _P, _P, _P, \
_P, _L|_X, _L|_X, _L|_X, _L|_X, _L|_X, _L|_X, _L, \
_L, _L, _L, _L, _L, _L, _L, _L, \
_L, _L, _L, _L, _L, _L, _L, _L, \
_L, _L, _L, _P, _P, _P, _P, _C
其中每个 _U
、_P
等都是不同的位:
#define _U 01
#define _L 02
#define _N 04
#define _S 010
#define _P 020
#define _C 040
#define _X 0100
#define _B 0200
因此,行:
#define __ctype_lookup(__c) ((__ctype_ptr__+sizeof(""[__c]))[(int)(__c)])
1
只是查找 _ctype_
数组,然后
#define isalpha(__c) (__ctype_lookup(__c)&(_U|_L))
是该查找的按位掩码,具有适当的位掩码(在本例中,大写和小写的 OR)。
1:老实说我不确定 sizeof(""[__c])
实现了什么,我猜这与支持不同的 compilers/architectures 或其他东西有关。它跳过 _ctype_
数组