strtok()函数的实现
Implementation of strtok() function
我需要编写函数 strtok。下面是我的代码。问题是 - 我无法显示结果字符串。在代码中我使用 strcpy()
然后显示新数组。
是否可以仅使用指针显示字符串 *str
?
#include <stdio.h>
#include <string.h>
char* my_strtok(char* s, char* delm){
char W[100];
int i = 0, k = 0, j = 0;
char *ptr;
static char *Iterator;
ptr = s;
if (s == NULL){
s = Iterator;
}
while (s[i] != '[=10=]'){
j = 0;
while (delm[j] != '[=10=]'){
if (s[i] != delm[j])
W[k] = s[i];
else goto It;
j++;
}
ptr++;
i++;
k++;
}
It:
W[i] = 0;
Iterator = ++ptr;
return W;
}
int main(void){
char s[100];
char delm[] = " ";
gets(s);
char newstr[100];
char *str = my_strtok(s, delm);
strcpy(newstr, str);
printf("%s", newstr);
return 0;
}
这里已经讨论了 strtok 的内部实现:
How does strtok() split the string into tokens in C?
在您的实现类型中(称其为您的类型,因为它与实际类型有很大不同),您没有为局部变量动态分配任何内存'W'。所以当你 return 它时,不能保证你会正确地接收到字符串,因为在本地分配给 'W' 的内存在调用函数中不再为它保留。除此之外,代码中还使用了许多不必要的变量。而且,它需要适当的重组。为了您更好地理解,我修改了您的代码(尽可能保持您的风格)以完成所需的工作:
#include <stdio.h>
#include <string.h>
#include <malloc.h>
char* my_strtok(char* s, char* delm)
{
static int currIndex = 0;
if(!s || !delm || s[currIndex] == '[=10=]')
return NULL;
char *W = (char *)malloc(sizeof(char)*100);
int i = currIndex, k = 0, j = 0;
while (s[i] != '[=10=]'){
j = 0;
while (delm[j] != '[=10=]'){
if (s[i] != delm[j])
W[k] = s[i];
else goto It;
j++;
}
i++;
k++;
}
It:
W[i] = 0;
currIndex = i+1;
//Iterator = ++ptr;
return W;
}
int main(void)
{
char s[100] = "my name is khan";
char delm[] = " ";
//char newstr[100];
char *str = my_strtok(s, delm);
while(str){
printf("%s", str);
free(str);
str = my_strtok(s, delm);
}
return 0;
}
赏金免责声明
赏金:"Looking for an answer drawing from credible and/or official sources."
我认为 GNU GLibC constitutes a credible and official source, right? You can download GLibC 2.22 here 为 .tar.gz
(25mb)。
提取源后:
string/strtok.c
#include <string.h>
static char *olds;
#undef strtok
#ifndef STRTOK
# define STRTOK strtok
#endif
/* Parse S into tokens separated by characters in DELIM.
If S is NULL, the last string strtok() was called with is
used. For example:
char s[] = "-abc-=-def";
x = strtok(s, "-"); // x = "abc"
x = strtok(NULL, "-="); // x = "def"
x = strtok(NULL, "="); // x = NULL
// s = "abc[=10=]=-def[=10=]"
*/
char *
STRTOK (char *s, const char *delim)
{
char *token;
if (s == NULL)
s = olds;
/* Scan leading delimiters. */
s += strspn (s, delim);
if (*s == '[=10=]')
{
olds = s;
return NULL;
}
/* Find the end of the token. */
token = s;
s = strpbrk (token, delim);
if (s == NULL)
/* This token finishes the string. */
olds = __rawmemchr (token, '[=10=]');
else
{
/* Terminate the token and make OLDS point past it. */
*s = '[=10=]';
olds = s + 1;
}
return token;
}
string/strspn.c
#include <string.h>
#undef strspn
#ifndef STRSPN
#define STRSPN strspn
#endif
/* Return the length of the maximum initial segment
of S which contains only characters in ACCEPT. */
size_t
STRSPN (const char *s, const char *accept)
{
const char *p;
const char *a;
size_t count = 0;
for (p = s; *p != '[=11=]'; ++p)
{
for (a = accept; *a != '[=11=]'; ++a)
if (*p == *a)
break;
if (*a == '[=11=]')
return count;
else
++count;
}
return count;
}
libc_hidden_builtin_def (strspn)
string/strpbrk.c
#include <string.h>
#undef strpbrk
#ifndef STRPBRK
#define STRPBRK strpbrk
#endif
/* Find the first occurrence in S of any character in ACCEPT. */
char *
STRPBRK (const char *s, const char *accept)
{
while (*s != '[=12=]')
{
const char *a = accept;
while (*a != '[=12=]')
if (*a++ == *s)
return (char *) s;
++s;
}
return NULL;
}
libc_hidden_builtin_def (strpbrk)
sysdeps/x86_64/rawmemchr.S
#include <sysdep.h>
.text
ENTRY (__rawmemchr)
movd %rsi, %xmm1
mov %rdi, %rcx
punpcklbw %xmm1, %xmm1
punpcklbw %xmm1, %xmm1
and , %rcx
pshufd [=13=], %xmm1, %xmm1
cmp , %rcx
ja L(crosscache)
movdqu (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
/* Check if there is a match. */
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
add , %rdi
and $-16, %rdi
jmp L(loop_prolog)
.p2align 4
L(crosscache):
and , %rcx
and $-16, %rdi
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
/* Check if there is a match. */
pmovmskb %xmm0, %eax
/* Remove the leading bytes. */
sar %cl, %eax
test %eax, %eax
je L(unaligned_no_match)
/* Check which byte is a match. */
bsf %eax, %eax
add %rdi, %rax
add %rcx, %rax
ret
.p2align 4
L(unaligned_no_match):
add , %rdi
.p2align 4
L(loop_prolog):
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
movdqa 48(%rdi), %xmm4
pcmpeqb %xmm1, %xmm4
add , %rdi
pmovmskb %xmm4, %eax
test %eax, %eax
jnz L(matches0)
test [=13=]x3f, %rdi
jz L(align64_loop)
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
movdqa 48(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
add , %rdi
test %eax, %eax
jnz L(matches0)
and $-64, %rdi
.p2align 4
L(align64_loop):
movdqa (%rdi), %xmm0
movdqa 16(%rdi), %xmm2
movdqa 32(%rdi), %xmm3
movdqa 48(%rdi), %xmm4
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm1, %xmm2
pcmpeqb %xmm1, %xmm3
pcmpeqb %xmm1, %xmm4
pmaxub %xmm0, %xmm3
pmaxub %xmm2, %xmm4
pmaxub %xmm3, %xmm4
pmovmskb %xmm4, %eax
add , %rdi
test %eax, %eax
jz L(align64_loop)
sub , %rdi
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pcmpeqb 48(%rdi), %xmm1
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
pmovmskb %xmm1, %eax
bsf %eax, %eax
lea 48(%rdi, %rax), %rax
ret
.p2align 4
L(matches0):
bsf %eax, %eax
lea -16(%rax, %rdi), %rax
ret
.p2align 4
L(matches):
bsf %eax, %eax
add %rdi, %rax
ret
.p2align 4
L(matches16):
bsf %eax, %eax
lea 16(%rax, %rdi), %rax
ret
.p2align 4
L(matches32):
bsf %eax, %eax
lea 32(%rax, %rdi), %rax
ret
.p2align 4
L(return_null):
xor %rax, %rax
ret
END (__rawmemchr)
weak_alias (__rawmemchr, rawmemchr)
libc_hidden_builtin_def (__rawmemchr)
实际上这个是我对一般情况的解决方案...当 str 是 char* 时你不能写入它。
这里是:
And here is the link to github资源:
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define DICT_LEN 256
int *create_delim_dict(char *delim)
{
int *d = (int*)malloc(sizeof(int)*DICT_LEN);
memset((void*)d, 0, sizeof(int)*DICT_LEN);
int i;
for(i=0; i< strlen(delim); i++) {
d[delim[i]] = 1;
}
return d;
}
char *my_strtok(char *str, char *delim)
{
static char *last, *to_free;
int *deli_dict = create_delim_dict(delim);
if(!deli_dict) {
return NULL;
}
if(str) {
last = (char*)malloc(strlen(str)+1);
if(!last) {
free(deli_dict);
}
to_free = last;
strcpy(last, str);
}
while(deli_dict[*last] && *last != '[=10=]') {
last++;
}
str = last;
if(*last == '[=10=]') {
free(deli_dict);
free(to_free);
return NULL;
}
while (*last != '[=10=]' && !deli_dict[*last]) {
last++;
}
*last = '[=10=]';
last++;
free(deli_dict);
return str;
}
int main()
{
char * str = "- This, a sample string.";
char *del = " ,.-";
char *s = my_strtok(str, del);
while(s) {
printf("%s\n", s);
s = my_strtok(NULL, del);
}
return 0;
}
char * strtok2(char *str, const char *delim)
{
static char *nxt; /* static variable used to advance the string to replace delimiters */
static int size; /* static variable used to count until the end of the string */
/* IMPORTANT: any advance to 'nxt' must be followed by a diminution of 'size', and vice verce */
int i; /* counter of delimiter(s) in string */
/* initialize the string when strtok2 is first calles and supplied with a valid string */
if(str != NULL)
{
nxt = str;
size = strlen(str);
}
/* if we havn't reached the end of the string, any other call to strtok2 with NULL will come here */
else if(size > 0)
{
nxt++; /* last run left nxt on a null terminator, so we have to advance it */
size--; /* any advancement must follow diminution of size */
str = nxt; /* string now points to the first char after the last delimiter */
}
/* if we reached the end of string, return NULL pointer */
else
{
str = NULL;
}
/* nxt is used to advance the string until a delimiter or a series of delimiters are encountered;
* it then stops on the last delimiter which has turned to NULL terminator
*/
while(*nxt)
{
i = strspn(nxt, delim);
while(i > 1) /* turns delimiters to NULL terminator (except the last one) */
{
*nxt = '[=10=]';
nxt++;
size--;
i--;
} /* in the last delimiter we have to do something a */
if(1 == i) /* bit different we have to actually take nxt backwards */
{ /* one step, to break the while(*nxt) loop */
*nxt = '[=10=]';
if(size > 1) /* if the delimiter is last char, don't do next lines */
{
nxt--;
size++; /* nxt is diminished so size increases */
}
}
nxt++; /* if no delimiter is found, advance nxt */
size--; /* in case of the last delimiter in a series, we took nxt */
} /* a step back, so now moving it a step forward means */
/* it rests on a NULL terminator */
return str;
}
这是一个实现,根据您的需要增加输入缓冲区,并增加对 strtok 的调用次数(str1 为 NULL)以取回更多令牌
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
char *ret_nondelim(char *str1,char *str2);
char *my_strtok(char *str1,char *str2);
char *ret_noofbytes(char *str1,char *str2);
int main()
{
char str1[20];
char str2[20];
char *res;
printf("enter string one\n");
scanf("%s",str1);
printf("enter string two\n");
scanf("%s",str2);
res=my_strtok(str1,str2);
if(res)
printf("%s\n",res);
else
printf("returned NULL\n");
free(res);
res=my_strtok(NULL,str2);
if(res)
printf("%s\n",res);
else
printf("returned NULL\n");
free(res);
res=my_strtok(NULL,str2);
if(res)
printf("%s\n",res);
else
printf("returned NULL\n");
free(res);
}
char *my_strtok(char *str1,char *str2)
{
static char *str1_cpy;
static int i;
char *ptr;
int flag=0;
if(str1!=NULL)
{
str1_cpy=ret_nondelim(str1,str2); //get location of non delimiting byte
str1=ret_noofbytes(str1_cpy,str2); //scan and get location of delimitng byte
if((str1-str1_cpy)) //no of bytes = non delimiting location - delimting location
{
ptr=malloc(str1-str1_cpy+1); //malloc location and and store the string`enter code here`
int k;
for(k=0;k<(str1-str1_cpy);k++)
ptr[k]=str1_cpy[k];
ptr[k]='[=10=]';
str1_cpy=str1; //save pointer for next iteration
return ptr; //return pointer
}
else
return NULL;
}
else
{
str1_cpy=ret_nondelim(str1_cpy,str2); //same as above but pass saved pointer
str1=ret_noofbytes(str1_cpy,str2);
if((str1-str1_cpy))
{
ptr=malloc(str1-str1_cpy+1);
int k;
for(k=0;k<(str1-str1_cpy);k++)
ptr[k]=str1_cpy[k];
ptr[k]='[=10=]';
str1_cpy=str1; //save pointer for next iteration
return ptr;
}
else
return NULL;
}
}
char *ret_nondelim(char *str1,char *str2)
{
int flag=0;
for(;*(str1);)
{
for(int j=0;j<strlen(str2);j++)
{
if(*(str1)==*(str2+j)) //check if slected byte is non delimiting byte
{
str1++; //break if true go check next byte
break;
}
else
flag++; //shows that selected byte is none of the delimitng byte
}
if(flag==strlen(str2)) //(above comment)-so non delimiting byte found return that pointer to caller
break;
else
flag=0;
}
return str1;
}
char *ret_noofbytes(char *str1,char *str2)
{
int flag=0;
for(;*(str1);)
{
for(int k=0;k<strlen(str2);k++)
{
if(*(str2+k)==*(str1)) //check if selected bytes is delimiting byte
{
flag=1;
break; //break twice if true ie .flag==1
}
}
if(flag==1)
break;
else
str1++; //if not found go check next byte
}
return str1; //return the point where delimiting byte was found
}
Here is the code which explains how strtok works in C.
#include <stdio.h>
#include <string.h>
#define SIZE_OF_STRING 50
#define SIZE_OF_DELIMITER 5
/* Make the temp_ptr as static, so it will hold the previous pointing address */
static char *temp_ptr = NULL;
char *string_token(char *str, char *delimiter)
{
char *final_ptr = NULL; `enter code here`
/* Flag has been defined as static to avoid the parent function loop
* runs infinitely after reaching end of string.
*/
static int flag = 0;
int i, j;
if (delimiter == NULL) {
return NULL;
}
/* If flag is 1, it will denote the end of the string */
if (flag == 1) {
return NULL;
}
/* The below condition is to avoid temp_ptr is getting assigned
* with NULL string from the parent function main. Without
* the below condition, NULL will be assigned to temp_ptr
* and final_ptr, so accessing these pointers will lead to
* segmentation fault.
*/
if (str != NULL) {
temp_ptr = str;
}
/* Before function call ends, temp_ptr should move to the next char,
* so we can't return the temp_ptr. Hence, we introduced a new pointer
* final_ptr which points to temp_ptr.
*/
final_ptr = temp_ptr;
printf("%s %d str: %s delimiter: %s length: %ld temp_ptr: %s strlen: %ld"
" final_ptr: %s \n",__func__, __LINE__, str, delimiter,
strlen(delimiter), temp_ptr, strlen(temp_ptr), final_ptr);
for (i = 0; i <= strlen(temp_ptr); i++)
{
for (j = 0; j < strlen(delimiter); j++) {
if (temp_ptr[i] == '[=10=]') {
/* If the flag is not set, both the temp_ptr and flag_ptr
* will be holding string "Jasmine" which will make parent
* to call this function string_token infinitely.
*/
flag = 1;
return final_ptr;
}
if ((temp_ptr[i] == delimiter[j])) {
/* NULL character is assigned, so that final_ptr will return
* til NULL character. Here, final_ptr points to temp_ptr.
*/
temp_ptr[i] = '[=10=]';
temp_ptr += i+1;
return final_ptr;
}
}
}
/* You will somehow end up here if for loop condition fails.
* If this function doesn't return any char pointer, it will
* lead to segmentation fault in the parent function.
*/
return NULL;
}
int main()
{
char str[SIZE_OF_STRING] = "shirley|Rose|Jasmine";
char *token = NULL;
char del[SIZE_OF_DELIMITER] = "|";
token = string_token(str, del);
while (token != NULL) {
printf("token %s\n", token);
token = string_token(NULL, del);
}
return 0;
}
https://shirleyanengineer.blogspot.com/2019/11/write-your-own-strtok-in-c.html
@Enzo Ferber 提供的 glibc 解决方案对我不起作用(MS Visual Studio 2019,Visual Micro Release 22.02.18.5 --> Arduino(1.8),avr-gcc 5.4.0)在:
/* 终止令牌并使 OLDS 指向它。 */
*s = '[=10=]';
因为s只是指针的浅拷贝。将 *s = '[=10=]';
更改为:
**((char**)&s) = '[=12=]';
而且效果很好。不确定这是否特定于 gcc 实现,但似乎尝试修改函数中指针的浅表副本在任何 ANSI/ISO C 实现中都会失败。
我需要编写函数 strtok。下面是我的代码。问题是 - 我无法显示结果字符串。在代码中我使用 strcpy()
然后显示新数组。
是否可以仅使用指针显示字符串 *str
?
#include <stdio.h>
#include <string.h>
char* my_strtok(char* s, char* delm){
char W[100];
int i = 0, k = 0, j = 0;
char *ptr;
static char *Iterator;
ptr = s;
if (s == NULL){
s = Iterator;
}
while (s[i] != '[=10=]'){
j = 0;
while (delm[j] != '[=10=]'){
if (s[i] != delm[j])
W[k] = s[i];
else goto It;
j++;
}
ptr++;
i++;
k++;
}
It:
W[i] = 0;
Iterator = ++ptr;
return W;
}
int main(void){
char s[100];
char delm[] = " ";
gets(s);
char newstr[100];
char *str = my_strtok(s, delm);
strcpy(newstr, str);
printf("%s", newstr);
return 0;
}
这里已经讨论了 strtok 的内部实现:
How does strtok() split the string into tokens in C?
在您的实现类型中(称其为您的类型,因为它与实际类型有很大不同),您没有为局部变量动态分配任何内存'W'。所以当你 return 它时,不能保证你会正确地接收到字符串,因为在本地分配给 'W' 的内存在调用函数中不再为它保留。除此之外,代码中还使用了许多不必要的变量。而且,它需要适当的重组。为了您更好地理解,我修改了您的代码(尽可能保持您的风格)以完成所需的工作:
#include <stdio.h>
#include <string.h>
#include <malloc.h>
char* my_strtok(char* s, char* delm)
{
static int currIndex = 0;
if(!s || !delm || s[currIndex] == '[=10=]')
return NULL;
char *W = (char *)malloc(sizeof(char)*100);
int i = currIndex, k = 0, j = 0;
while (s[i] != '[=10=]'){
j = 0;
while (delm[j] != '[=10=]'){
if (s[i] != delm[j])
W[k] = s[i];
else goto It;
j++;
}
i++;
k++;
}
It:
W[i] = 0;
currIndex = i+1;
//Iterator = ++ptr;
return W;
}
int main(void)
{
char s[100] = "my name is khan";
char delm[] = " ";
//char newstr[100];
char *str = my_strtok(s, delm);
while(str){
printf("%s", str);
free(str);
str = my_strtok(s, delm);
}
return 0;
}
赏金免责声明
赏金:"Looking for an answer drawing from credible and/or official sources."
我认为 GNU GLibC constitutes a credible and official source, right? You can download GLibC 2.22 here 为 .tar.gz
(25mb)。
提取源后:
string/strtok.c
#include <string.h>
static char *olds;
#undef strtok
#ifndef STRTOK
# define STRTOK strtok
#endif
/* Parse S into tokens separated by characters in DELIM.
If S is NULL, the last string strtok() was called with is
used. For example:
char s[] = "-abc-=-def";
x = strtok(s, "-"); // x = "abc"
x = strtok(NULL, "-="); // x = "def"
x = strtok(NULL, "="); // x = NULL
// s = "abc[=10=]=-def[=10=]"
*/
char *
STRTOK (char *s, const char *delim)
{
char *token;
if (s == NULL)
s = olds;
/* Scan leading delimiters. */
s += strspn (s, delim);
if (*s == '[=10=]')
{
olds = s;
return NULL;
}
/* Find the end of the token. */
token = s;
s = strpbrk (token, delim);
if (s == NULL)
/* This token finishes the string. */
olds = __rawmemchr (token, '[=10=]');
else
{
/* Terminate the token and make OLDS point past it. */
*s = '[=10=]';
olds = s + 1;
}
return token;
}
string/strspn.c
#include <string.h>
#undef strspn
#ifndef STRSPN
#define STRSPN strspn
#endif
/* Return the length of the maximum initial segment
of S which contains only characters in ACCEPT. */
size_t
STRSPN (const char *s, const char *accept)
{
const char *p;
const char *a;
size_t count = 0;
for (p = s; *p != '[=11=]'; ++p)
{
for (a = accept; *a != '[=11=]'; ++a)
if (*p == *a)
break;
if (*a == '[=11=]')
return count;
else
++count;
}
return count;
}
libc_hidden_builtin_def (strspn)
string/strpbrk.c
#include <string.h>
#undef strpbrk
#ifndef STRPBRK
#define STRPBRK strpbrk
#endif
/* Find the first occurrence in S of any character in ACCEPT. */
char *
STRPBRK (const char *s, const char *accept)
{
while (*s != '[=12=]')
{
const char *a = accept;
while (*a != '[=12=]')
if (*a++ == *s)
return (char *) s;
++s;
}
return NULL;
}
libc_hidden_builtin_def (strpbrk)
sysdeps/x86_64/rawmemchr.S
#include <sysdep.h>
.text
ENTRY (__rawmemchr)
movd %rsi, %xmm1
mov %rdi, %rcx
punpcklbw %xmm1, %xmm1
punpcklbw %xmm1, %xmm1
and , %rcx
pshufd [=13=], %xmm1, %xmm1
cmp , %rcx
ja L(crosscache)
movdqu (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
/* Check if there is a match. */
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
add , %rdi
and $-16, %rdi
jmp L(loop_prolog)
.p2align 4
L(crosscache):
and , %rcx
and $-16, %rdi
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
/* Check if there is a match. */
pmovmskb %xmm0, %eax
/* Remove the leading bytes. */
sar %cl, %eax
test %eax, %eax
je L(unaligned_no_match)
/* Check which byte is a match. */
bsf %eax, %eax
add %rdi, %rax
add %rcx, %rax
ret
.p2align 4
L(unaligned_no_match):
add , %rdi
.p2align 4
L(loop_prolog):
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
movdqa 48(%rdi), %xmm4
pcmpeqb %xmm1, %xmm4
add , %rdi
pmovmskb %xmm4, %eax
test %eax, %eax
jnz L(matches0)
test [=13=]x3f, %rdi
jz L(align64_loop)
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
movdqa 48(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
add , %rdi
test %eax, %eax
jnz L(matches0)
and $-64, %rdi
.p2align 4
L(align64_loop):
movdqa (%rdi), %xmm0
movdqa 16(%rdi), %xmm2
movdqa 32(%rdi), %xmm3
movdqa 48(%rdi), %xmm4
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm1, %xmm2
pcmpeqb %xmm1, %xmm3
pcmpeqb %xmm1, %xmm4
pmaxub %xmm0, %xmm3
pmaxub %xmm2, %xmm4
pmaxub %xmm3, %xmm4
pmovmskb %xmm4, %eax
add , %rdi
test %eax, %eax
jz L(align64_loop)
sub , %rdi
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pcmpeqb 48(%rdi), %xmm1
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
pmovmskb %xmm1, %eax
bsf %eax, %eax
lea 48(%rdi, %rax), %rax
ret
.p2align 4
L(matches0):
bsf %eax, %eax
lea -16(%rax, %rdi), %rax
ret
.p2align 4
L(matches):
bsf %eax, %eax
add %rdi, %rax
ret
.p2align 4
L(matches16):
bsf %eax, %eax
lea 16(%rax, %rdi), %rax
ret
.p2align 4
L(matches32):
bsf %eax, %eax
lea 32(%rax, %rdi), %rax
ret
.p2align 4
L(return_null):
xor %rax, %rax
ret
END (__rawmemchr)
weak_alias (__rawmemchr, rawmemchr)
libc_hidden_builtin_def (__rawmemchr)
实际上这个是我对一般情况的解决方案...当 str 是 char* 时你不能写入它。
这里是:
And here is the link to github资源:
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define DICT_LEN 256
int *create_delim_dict(char *delim)
{
int *d = (int*)malloc(sizeof(int)*DICT_LEN);
memset((void*)d, 0, sizeof(int)*DICT_LEN);
int i;
for(i=0; i< strlen(delim); i++) {
d[delim[i]] = 1;
}
return d;
}
char *my_strtok(char *str, char *delim)
{
static char *last, *to_free;
int *deli_dict = create_delim_dict(delim);
if(!deli_dict) {
return NULL;
}
if(str) {
last = (char*)malloc(strlen(str)+1);
if(!last) {
free(deli_dict);
}
to_free = last;
strcpy(last, str);
}
while(deli_dict[*last] && *last != '[=10=]') {
last++;
}
str = last;
if(*last == '[=10=]') {
free(deli_dict);
free(to_free);
return NULL;
}
while (*last != '[=10=]' && !deli_dict[*last]) {
last++;
}
*last = '[=10=]';
last++;
free(deli_dict);
return str;
}
int main()
{
char * str = "- This, a sample string.";
char *del = " ,.-";
char *s = my_strtok(str, del);
while(s) {
printf("%s\n", s);
s = my_strtok(NULL, del);
}
return 0;
}
char * strtok2(char *str, const char *delim)
{
static char *nxt; /* static variable used to advance the string to replace delimiters */
static int size; /* static variable used to count until the end of the string */
/* IMPORTANT: any advance to 'nxt' must be followed by a diminution of 'size', and vice verce */
int i; /* counter of delimiter(s) in string */
/* initialize the string when strtok2 is first calles and supplied with a valid string */
if(str != NULL)
{
nxt = str;
size = strlen(str);
}
/* if we havn't reached the end of the string, any other call to strtok2 with NULL will come here */
else if(size > 0)
{
nxt++; /* last run left nxt on a null terminator, so we have to advance it */
size--; /* any advancement must follow diminution of size */
str = nxt; /* string now points to the first char after the last delimiter */
}
/* if we reached the end of string, return NULL pointer */
else
{
str = NULL;
}
/* nxt is used to advance the string until a delimiter or a series of delimiters are encountered;
* it then stops on the last delimiter which has turned to NULL terminator
*/
while(*nxt)
{
i = strspn(nxt, delim);
while(i > 1) /* turns delimiters to NULL terminator (except the last one) */
{
*nxt = '[=10=]';
nxt++;
size--;
i--;
} /* in the last delimiter we have to do something a */
if(1 == i) /* bit different we have to actually take nxt backwards */
{ /* one step, to break the while(*nxt) loop */
*nxt = '[=10=]';
if(size > 1) /* if the delimiter is last char, don't do next lines */
{
nxt--;
size++; /* nxt is diminished so size increases */
}
}
nxt++; /* if no delimiter is found, advance nxt */
size--; /* in case of the last delimiter in a series, we took nxt */
} /* a step back, so now moving it a step forward means */
/* it rests on a NULL terminator */
return str;
}
这是一个实现,根据您的需要增加输入缓冲区,并增加对 strtok 的调用次数(str1 为 NULL)以取回更多令牌
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
char *ret_nondelim(char *str1,char *str2);
char *my_strtok(char *str1,char *str2);
char *ret_noofbytes(char *str1,char *str2);
int main()
{
char str1[20];
char str2[20];
char *res;
printf("enter string one\n");
scanf("%s",str1);
printf("enter string two\n");
scanf("%s",str2);
res=my_strtok(str1,str2);
if(res)
printf("%s\n",res);
else
printf("returned NULL\n");
free(res);
res=my_strtok(NULL,str2);
if(res)
printf("%s\n",res);
else
printf("returned NULL\n");
free(res);
res=my_strtok(NULL,str2);
if(res)
printf("%s\n",res);
else
printf("returned NULL\n");
free(res);
}
char *my_strtok(char *str1,char *str2)
{
static char *str1_cpy;
static int i;
char *ptr;
int flag=0;
if(str1!=NULL)
{
str1_cpy=ret_nondelim(str1,str2); //get location of non delimiting byte
str1=ret_noofbytes(str1_cpy,str2); //scan and get location of delimitng byte
if((str1-str1_cpy)) //no of bytes = non delimiting location - delimting location
{
ptr=malloc(str1-str1_cpy+1); //malloc location and and store the string`enter code here`
int k;
for(k=0;k<(str1-str1_cpy);k++)
ptr[k]=str1_cpy[k];
ptr[k]='[=10=]';
str1_cpy=str1; //save pointer for next iteration
return ptr; //return pointer
}
else
return NULL;
}
else
{
str1_cpy=ret_nondelim(str1_cpy,str2); //same as above but pass saved pointer
str1=ret_noofbytes(str1_cpy,str2);
if((str1-str1_cpy))
{
ptr=malloc(str1-str1_cpy+1);
int k;
for(k=0;k<(str1-str1_cpy);k++)
ptr[k]=str1_cpy[k];
ptr[k]='[=10=]';
str1_cpy=str1; //save pointer for next iteration
return ptr;
}
else
return NULL;
}
}
char *ret_nondelim(char *str1,char *str2)
{
int flag=0;
for(;*(str1);)
{
for(int j=0;j<strlen(str2);j++)
{
if(*(str1)==*(str2+j)) //check if slected byte is non delimiting byte
{
str1++; //break if true go check next byte
break;
}
else
flag++; //shows that selected byte is none of the delimitng byte
}
if(flag==strlen(str2)) //(above comment)-so non delimiting byte found return that pointer to caller
break;
else
flag=0;
}
return str1;
}
char *ret_noofbytes(char *str1,char *str2)
{
int flag=0;
for(;*(str1);)
{
for(int k=0;k<strlen(str2);k++)
{
if(*(str2+k)==*(str1)) //check if selected bytes is delimiting byte
{
flag=1;
break; //break twice if true ie .flag==1
}
}
if(flag==1)
break;
else
str1++; //if not found go check next byte
}
return str1; //return the point where delimiting byte was found
}
Here is the code which explains how strtok works in C.
#include <stdio.h>
#include <string.h>
#define SIZE_OF_STRING 50
#define SIZE_OF_DELIMITER 5
/* Make the temp_ptr as static, so it will hold the previous pointing address */
static char *temp_ptr = NULL;
char *string_token(char *str, char *delimiter)
{
char *final_ptr = NULL; `enter code here`
/* Flag has been defined as static to avoid the parent function loop
* runs infinitely after reaching end of string.
*/
static int flag = 0;
int i, j;
if (delimiter == NULL) {
return NULL;
}
/* If flag is 1, it will denote the end of the string */
if (flag == 1) {
return NULL;
}
/* The below condition is to avoid temp_ptr is getting assigned
* with NULL string from the parent function main. Without
* the below condition, NULL will be assigned to temp_ptr
* and final_ptr, so accessing these pointers will lead to
* segmentation fault.
*/
if (str != NULL) {
temp_ptr = str;
}
/* Before function call ends, temp_ptr should move to the next char,
* so we can't return the temp_ptr. Hence, we introduced a new pointer
* final_ptr which points to temp_ptr.
*/
final_ptr = temp_ptr;
printf("%s %d str: %s delimiter: %s length: %ld temp_ptr: %s strlen: %ld"
" final_ptr: %s \n",__func__, __LINE__, str, delimiter,
strlen(delimiter), temp_ptr, strlen(temp_ptr), final_ptr);
for (i = 0; i <= strlen(temp_ptr); i++)
{
for (j = 0; j < strlen(delimiter); j++) {
if (temp_ptr[i] == '[=10=]') {
/* If the flag is not set, both the temp_ptr and flag_ptr
* will be holding string "Jasmine" which will make parent
* to call this function string_token infinitely.
*/
flag = 1;
return final_ptr;
}
if ((temp_ptr[i] == delimiter[j])) {
/* NULL character is assigned, so that final_ptr will return
* til NULL character. Here, final_ptr points to temp_ptr.
*/
temp_ptr[i] = '[=10=]';
temp_ptr += i+1;
return final_ptr;
}
}
}
/* You will somehow end up here if for loop condition fails.
* If this function doesn't return any char pointer, it will
* lead to segmentation fault in the parent function.
*/
return NULL;
}
int main()
{
char str[SIZE_OF_STRING] = "shirley|Rose|Jasmine";
char *token = NULL;
char del[SIZE_OF_DELIMITER] = "|";
token = string_token(str, del);
while (token != NULL) {
printf("token %s\n", token);
token = string_token(NULL, del);
}
return 0;
}
https://shirleyanengineer.blogspot.com/2019/11/write-your-own-strtok-in-c.html
@Enzo Ferber 提供的 glibc 解决方案对我不起作用(MS Visual Studio 2019,Visual Micro Release 22.02.18.5 --> Arduino(1.8),avr-gcc 5.4.0)在:
/* 终止令牌并使 OLDS 指向它。 */
*s = '[=10=]';
因为s只是指针的浅拷贝。将 *s = '[=10=]';
更改为:
**((char**)&s) = '[=12=]';
而且效果很好。不确定这是否特定于 gcc 实现,但似乎尝试修改函数中指针的浅表副本在任何 ANSI/ISO C 实现中都会失败。