从文本文件中逐行查找特定单词的出现
Finding occurrences of specific word line by line from text file
我试图逐行读出我的文本文件
FILE *infile;
char line[1000];
infile = fopen("file.txt","r");
while(fgets(line,1000,infile) != NULL)
{
//....
}
fclose(infile);
然后我需要找到一个特定的单词,例如 "the",并且需要查看它出现了多少次以及它还出现在哪几行。
这个我应该能数出字数
int wordTimes = 0;
if((strcmp("the", currentWord) == 0))
{
printf("'%s' appears in line %d which is: \n%s\n\n", "the", line_num, line);
wordTimes++;
}
其中 line
是字符串出现的文本行,line_num
是它出现的行号。
然后单词显示的次数使用此代码:
if(wordTimes > 0)
{
printf("'%s' appears %d times\n", "the", wordTimes);
}
else
{
printf("'%s' does not appear\n", "the");
}
问题是我不确定如何将行中的每个单词与 "the" 进行比较并仍然打印出它所适用的行。
我必须为此使用非常基本的 C,所以这意味着我不能使用 strtok()
或 strstr()
。我只能使用 strlen()
和 strcmp()
.
也许您需要像这样编写一个 strword()
函数。我假设您可以使用 <ctype.h>
中的分类函数(宏),但如果不允许也有解决方法。
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
char *strword(char *haystack, char *needle);
char *strword(char *haystack, char *needle)
{
char *pos = haystack;
char old_ch = ' ';
while (*pos != '[=10=]')
{
if (!isalpha(old_ch) && *pos == *needle)
{
char *txt = pos + 1;
char *str = needle + 1;
while (*txt == *str)
{
if (*str == '[=10=]')
return pos; // Exact match at end of haystack
txt++, str++;
}
if (*str == '[=10=]' && !isalpha(*txt))
return pos;
}
old_ch = *pos++;
}
return 0;
}
int main(void)
{
/*
** Note that 'the' appears in the haystack as a prefix to a word,
** wholly contained in a word, and at the end of a word - and is not
** counted in any of those places. And punctuation is OK.
*/
char haystack[] =
"the way to blithely count the occurrences (tithe)"
" of 'the' in their line is the";
char needle[] = "the";
char *curpos = haystack;
char *word;
int count = 0;
while ((word = strword(curpos, needle)) != 0)
{
count++;
printf("Found <%s> at [%.20s]\n", needle, word);
curpos = word + 1;
}
printf("Found %d occurrences of <%s> in [%s]\n", count, needle, haystack);
assert(strword("the", "the") != 0);
assert(strword("th", "the") == 0);
assert(strword("t", "t") != 0);
assert(strword("", "t") == 0);
assert(strword("if t fi", "t") != 0);
assert(strword("if t fi", "") == 0);
return 0;
}
当 运行 时,会产生:
Found <the> at [the way to blithely ]
Found <the> at [the occurrences (tit]
Found <the> at [the' in their line i]
Found <the> at [the]
Found 4 occurrences of <the> in [the way to blithely count the occurrences (tithe) of 'the' in their line is the]
Is there a way to do the strword
function without <ctype.h>
?
是的。我在开篇就说了这么多。由于唯一使用的 function/macro 是 isalpha()
,您可以做出一些假设(您不在使用 EBCDIC 的系统上)以便拉丁字母表是连续的,并且您可以使用此 is_alpha()
代替 isalpha()
— 并从包含的列表中省略 <ctype.h>
headers:
static inline int is_alpha(int c)
{
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}
我试图逐行读出我的文本文件
FILE *infile;
char line[1000];
infile = fopen("file.txt","r");
while(fgets(line,1000,infile) != NULL)
{
//....
}
fclose(infile);
然后我需要找到一个特定的单词,例如 "the",并且需要查看它出现了多少次以及它还出现在哪几行。
这个我应该能数出字数
int wordTimes = 0;
if((strcmp("the", currentWord) == 0))
{
printf("'%s' appears in line %d which is: \n%s\n\n", "the", line_num, line);
wordTimes++;
}
其中 line
是字符串出现的文本行,line_num
是它出现的行号。
然后单词显示的次数使用此代码:
if(wordTimes > 0)
{
printf("'%s' appears %d times\n", "the", wordTimes);
}
else
{
printf("'%s' does not appear\n", "the");
}
问题是我不确定如何将行中的每个单词与 "the" 进行比较并仍然打印出它所适用的行。
我必须为此使用非常基本的 C,所以这意味着我不能使用 strtok()
或 strstr()
。我只能使用 strlen()
和 strcmp()
.
也许您需要像这样编写一个 strword()
函数。我假设您可以使用 <ctype.h>
中的分类函数(宏),但如果不允许也有解决方法。
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
char *strword(char *haystack, char *needle);
char *strword(char *haystack, char *needle)
{
char *pos = haystack;
char old_ch = ' ';
while (*pos != '[=10=]')
{
if (!isalpha(old_ch) && *pos == *needle)
{
char *txt = pos + 1;
char *str = needle + 1;
while (*txt == *str)
{
if (*str == '[=10=]')
return pos; // Exact match at end of haystack
txt++, str++;
}
if (*str == '[=10=]' && !isalpha(*txt))
return pos;
}
old_ch = *pos++;
}
return 0;
}
int main(void)
{
/*
** Note that 'the' appears in the haystack as a prefix to a word,
** wholly contained in a word, and at the end of a word - and is not
** counted in any of those places. And punctuation is OK.
*/
char haystack[] =
"the way to blithely count the occurrences (tithe)"
" of 'the' in their line is the";
char needle[] = "the";
char *curpos = haystack;
char *word;
int count = 0;
while ((word = strword(curpos, needle)) != 0)
{
count++;
printf("Found <%s> at [%.20s]\n", needle, word);
curpos = word + 1;
}
printf("Found %d occurrences of <%s> in [%s]\n", count, needle, haystack);
assert(strword("the", "the") != 0);
assert(strword("th", "the") == 0);
assert(strword("t", "t") != 0);
assert(strword("", "t") == 0);
assert(strword("if t fi", "t") != 0);
assert(strword("if t fi", "") == 0);
return 0;
}
当 运行 时,会产生:
Found <the> at [the way to blithely ]
Found <the> at [the occurrences (tit]
Found <the> at [the' in their line i]
Found <the> at [the]
Found 4 occurrences of <the> in [the way to blithely count the occurrences (tithe) of 'the' in their line is the]
Is there a way to do the
strword
function without<ctype.h>
?
是的。我在开篇就说了这么多。由于唯一使用的 function/macro 是 isalpha()
,您可以做出一些假设(您不在使用 EBCDIC 的系统上)以便拉丁字母表是连续的,并且您可以使用此 is_alpha()
代替 isalpha()
— 并从包含的列表中省略 <ctype.h>
headers:
static inline int is_alpha(int c)
{
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}