C 文件到多个 char *groups by word delimiter
C file to multiple char *groups by word delimiter
Really my data is here, and I think its really
cool. Somewhere, i want to break on some really
awesome data. Please let me really explain what is going
on. You are amazing. Something is really awesome.
Please give me the stuffs.
char **字符串:
my data is here, and I think its
cool. Somewhere, i want to break on some
awesome data. Please let me
explain what is going'\n'on. You are amazing. Something is
awesome.'\n'Please give me the stuffs.
char *filedata = malloc(fileLength);
fread(filedata, end, 1, fp); //ABC
size_t stringCount = 8;
size_t idx = 0;
char **data = malloc(stringCount * sizeof(*packets));
if(!data) {
fprintf(stderr, "There was an error");
return 1;
fread(data, end, 1, text);
char *stuff = strtok(data, "really");
while(stuff) {
data[idx++] = strdup(stuff);
s = strtok(NULL, "stuff");
if(idx >= stringCount) {
stringCount *= 2;
void *tmp = realloc(stuff, stringCount * sizeof(*stuff));
if(!tmp) {
perror("Unable to make a larger string list");
stringCount /= 2;
stuff = tmp;
您要在单词 "really"
上标记 "file" 的目标存在一些微妙的困难。这些是什么?文本文件通常一次读取一行,如果存储整个行文件,则作为多个指针,每个指针指向一行的开头。意思是,如果采用通用的 面向行 方法来读取文件,您的标记(从文件开头开始,或以单词 "really"
或者,您可以将整个文件读入单个缓冲区,然后使用 strstr
解析分隔符 "really"
, 但是... ,您将需要确保保存文件的缓冲区 nul-terminated 以避免最终调用 strstr
的未定义行为。 (通常将整个文件读入缓冲区不会导致 nul-terminated 缓冲区)
也就是说,即使使用 strstr
,您也必须有效地手动解析文件的内容。您将需要保留三个指针(一个指向令牌开头的开始指针,一个用于搜索您的定界符的指针,以处理发现的定界符是 较大单词的较小包含子字符串的情况 ,最后是一个结束指针来标记令牌的结束。
该方案相当简单,您的第一个标记开始和文件的开头,每个后续标记都以单词 "really"
开头。所以你向前扫描找到 " really"
(注意 " really"
之前的 space),将结束指针设置为令牌的开头 " really"
,将令牌复制到缓冲区, /* do stuff with token */
、free (token);
,将您的开始指针更新为 "really"
的开头,将您的通用解析指针设置为过去 "really"
并重复直到 "really"
不是成立。当你退出解析循环时,你仍然需要 /* do stuff */
您还可以决定如何处理每个令牌中包含的 '\n'
。为了下面的输出目的,它们只是被 ' '
覆盖。 (您可以添加您喜欢的任何其他条件,例如消除由换行符替换引起的任何尾随或中间白色space,留给您)
总而言之,您可以执行类似于以下内容的操作,其中将文件内容读取到 nul-terminated 缓冲区由函数 read_file()
处理其余的分词只是在 main()
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
char *read_file (char* fname, size_t *nbytes)
long bytes = 0;
char* file_content;
FILE *file = fopen(fname, "rb");
if (!file) /* validate file open for reading */
return NULL;
fseek (file, 0, SEEK_END); /* fseek end of file */
if ((bytes = ftell (file)) == -1) { /* get number of bytes */
fprintf (stderr, "error: unable to determine file length.\n");
return NULL;
fseek (file, 0, SEEK_SET); /* fseek beginning of file */
/* allocate memory for file */
if (!(file_content = malloc (bytes + 1))) { /* allocate/validate memory */
perror ("malloc - virtual memory exhausted");
return NULL;
/* read all data into file in single call to fread */
if (fread (file_content, 1, (size_t)bytes, file) != (size_t)bytes) {
fprintf (stderr, "error: failed to read %ld-bytes from '%s'.\n",
bytes, fname);
return NULL;
fclose (file); /* close file */
file_content[bytes] = 0; /* nul terminate - to allow strstr use */
*nbytes = (size_t)bytes; /* update nbytes making size avialable */
return file_content; /* return pointer to caller */
int main (int argc, char **argv) {
size_t nbytes;
char *content;
if (argc < 2) { /* validate required argument givent */
fprintf (stderr, "error: insufficient input. filename req'd.\n");
return 1;
if ((content = read_file (argv[1], &nbytes))) { /* read/validate */
char *sp = content, /* start pointer for token */
*p = sp, /* pointer for parsing token */
*ep = p; /* end pointer one past end of token */
const char *delim = " really"; /* delimiter */
while ((ep = strstr (p, delim))) { /* while delimiter found */
if (isspace (*(ep + sizeof delim - 1)) || /* if next isspace */
ispunct (*(ep + sizeof delim - 1))) { /* or next ispunct */
/* delimiter found */
size_t tlen = ep - sp; /* get token length */
char *token = malloc (tlen + 1), /* allocate for token */
*tp = token; /* pointer to token */
if (!token) { /* validate allocation */
perror ("malloc-token");
memcpy (token, sp, tlen); /* copy to token */
*(token + tlen) = 0; /* nul-termiante */
while (*tp) { /* replace '\n' with ' ' */
if (*tp == '\n')
*tp = ' ';
printf ("\ntoken: %s\n", token); /* output token */
/* do stuff with token */
free (token); /* free token memory */
sp = ep + 1; /* advance start to beginning of next token */
p = ep + sizeof delim; /* advance pointer */
p = sp; /* use p to change '\n' to ' ' in last token */
while (*p) { /* replacement loop */
if (*p == '\n')
*p = ' ';
printf ("\ntoken: %s\n", sp);
/* do stuff with last token */
free (content); /* free buffer holding file */
return 0;
$ cat dat/breakreally.txt
my data is here, and I think its really
cool. Somewhere, i want to break on some really
awesome data. Please let me really explain what is going
on. You are amazing.
$ ./bin/freadbreakreally dat/breakreally.txt
token: my data is here, and I think its
token: really cool. Somewhere, i want to break on some
token: really awesome data. Please let me
token: really explain what is going on. You are amazing.
Really my data is here, and I think its really
cool. Somewhere, i want to break on some really
awesome data. Please let me really explain what is going
on. You are amazing. Something is really awesome.
Please give me the stuffs.
char **字符串:
my data is here, and I think its
cool. Somewhere, i want to break on some
awesome data. Please let me
explain what is going'\n'on. You are amazing. Something is
awesome.'\n'Please give me the stuffs.
char *filedata = malloc(fileLength);
fread(filedata, end, 1, fp); //ABC
size_t stringCount = 8;
size_t idx = 0;
char **data = malloc(stringCount * sizeof(*packets));
if(!data) {
fprintf(stderr, "There was an error");
return 1;
fread(data, end, 1, text);
char *stuff = strtok(data, "really");
while(stuff) {
data[idx++] = strdup(stuff);
s = strtok(NULL, "stuff");
if(idx >= stringCount) {
stringCount *= 2;
void *tmp = realloc(stuff, stringCount * sizeof(*stuff));
if(!tmp) {
perror("Unable to make a larger string list");
stringCount /= 2;
stuff = tmp;
您要在单词 "really"
上标记 "file" 的目标存在一些微妙的困难。这些是什么?文本文件通常一次读取一行,如果存储整个行文件,则作为多个指针,每个指针指向一行的开头。意思是,如果采用通用的 面向行 方法来读取文件,您的标记(从文件开头开始,或以单词 "really"
或者,您可以将整个文件读入单个缓冲区,然后使用 strstr
解析分隔符 "really"
, 但是... ,您将需要确保保存文件的缓冲区 nul-terminated 以避免最终调用 strstr
的未定义行为。 (通常将整个文件读入缓冲区不会导致 nul-terminated 缓冲区)
也就是说,即使使用 strstr
,您也必须有效地手动解析文件的内容。您将需要保留三个指针(一个指向令牌开头的开始指针,一个用于搜索您的定界符的指针,以处理发现的定界符是 较大单词的较小包含子字符串的情况 ,最后是一个结束指针来标记令牌的结束。
该方案相当简单,您的第一个标记开始和文件的开头,每个后续标记都以单词 "really"
开头。所以你向前扫描找到 " really"
(注意 " really"
之前的 space),将结束指针设置为令牌的开头 " really"
,将令牌复制到缓冲区, /* do stuff with token */
、free (token);
,将您的开始指针更新为 "really"
的开头,将您的通用解析指针设置为过去 "really"
并重复直到 "really"
不是成立。当你退出解析循环时,你仍然需要 /* do stuff */
您还可以决定如何处理每个令牌中包含的 '\n'
。为了下面的输出目的,它们只是被 ' '
覆盖。 (您可以添加您喜欢的任何其他条件,例如消除由换行符替换引起的任何尾随或中间白色space,留给您)
总而言之,您可以执行类似于以下内容的操作,其中将文件内容读取到 nul-terminated 缓冲区由函数 read_file()
处理其余的分词只是在 main()
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
char *read_file (char* fname, size_t *nbytes)
long bytes = 0;
char* file_content;
FILE *file = fopen(fname, "rb");
if (!file) /* validate file open for reading */
return NULL;
fseek (file, 0, SEEK_END); /* fseek end of file */
if ((bytes = ftell (file)) == -1) { /* get number of bytes */
fprintf (stderr, "error: unable to determine file length.\n");
return NULL;
fseek (file, 0, SEEK_SET); /* fseek beginning of file */
/* allocate memory for file */
if (!(file_content = malloc (bytes + 1))) { /* allocate/validate memory */
perror ("malloc - virtual memory exhausted");
return NULL;
/* read all data into file in single call to fread */
if (fread (file_content, 1, (size_t)bytes, file) != (size_t)bytes) {
fprintf (stderr, "error: failed to read %ld-bytes from '%s'.\n",
bytes, fname);
return NULL;
fclose (file); /* close file */
file_content[bytes] = 0; /* nul terminate - to allow strstr use */
*nbytes = (size_t)bytes; /* update nbytes making size avialable */
return file_content; /* return pointer to caller */
int main (int argc, char **argv) {
size_t nbytes;
char *content;
if (argc < 2) { /* validate required argument givent */
fprintf (stderr, "error: insufficient input. filename req'd.\n");
return 1;
if ((content = read_file (argv[1], &nbytes))) { /* read/validate */
char *sp = content, /* start pointer for token */
*p = sp, /* pointer for parsing token */
*ep = p; /* end pointer one past end of token */
const char *delim = " really"; /* delimiter */
while ((ep = strstr (p, delim))) { /* while delimiter found */
if (isspace (*(ep + sizeof delim - 1)) || /* if next isspace */
ispunct (*(ep + sizeof delim - 1))) { /* or next ispunct */
/* delimiter found */
size_t tlen = ep - sp; /* get token length */
char *token = malloc (tlen + 1), /* allocate for token */
*tp = token; /* pointer to token */
if (!token) { /* validate allocation */
perror ("malloc-token");
memcpy (token, sp, tlen); /* copy to token */
*(token + tlen) = 0; /* nul-termiante */
while (*tp) { /* replace '\n' with ' ' */
if (*tp == '\n')
*tp = ' ';
printf ("\ntoken: %s\n", token); /* output token */
/* do stuff with token */
free (token); /* free token memory */
sp = ep + 1; /* advance start to beginning of next token */
p = ep + sizeof delim; /* advance pointer */
p = sp; /* use p to change '\n' to ' ' in last token */
while (*p) { /* replacement loop */
if (*p == '\n')
*p = ' ';
printf ("\ntoken: %s\n", sp);
/* do stuff with last token */
free (content); /* free buffer holding file */
return 0;
$ cat dat/breakreally.txt
my data is here, and I think its really
cool. Somewhere, i want to break on some really
awesome data. Please let me really explain what is going
on. You are amazing.
$ ./bin/freadbreakreally dat/breakreally.txt
token: my data is here, and I think its
token: really cool. Somewhere, i want to break on some
token: really awesome data. Please let me
token: really explain what is going on. You are amazing.