我如何在c中解析这个文件
how can i parse this file in c
如何从词义中拆分单词
1. 猛犸象:大
我的代码:
void ReadFromFile(){
FILE *dictionary = fopen("dictionary.txt", "r");
char word[20];
char meaning[50];
while(fscanf(dictionary, "%[^:]:%[^\t]\t", word, meaning) == 2){
printf("%s %s\n", word, meaning);
}
fclose(dictionary);
假设 word
和 meaning
不包含数字和点,
我的方法如下:
- 首先,将数字和点上的输入行拆分为标记
形式为
word: meaning
.
- 接下来用冒号分隔每个标记。
- 最后,删除开头和结尾的空白字符。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define INFILE "dictionary.txt"
void split(char *str);
void separate(char *str);
char *trim(char *str);
/*
* split line on serial number into "word" and "meaning" pairs
* WARNING: the array of "str" is modified
*/
void
split(char *str)
{
char *tk; // pointer to each token
char delim[] = "0123456789."; // characters used in the serial number
tk = strtok(str, delim); // get the first token
while (tk != NULL) {
separate(tk); // separate each token
tk = strtok(NULL, delim); // get the next token
}
}
/*
* separate the pair into "word" and "meaning" and print them
*/
void
separate(char *str)
{
char *p;
if (NULL == (p = index(str, ':'))) {
// search a colon character in "str"
fprintf(stderr, "Illegal format: %s\n", str);
exit(1);
}
*p++ = '[=10=]'; // terminate the "word" string
// now "p" points to the start of "meaning"
printf("%s %s\n", trim(str), trim(p));
}
/*
* remove leading and trailing whitespaces
* WARNING: the array of "str" is modified
*/
char *
trim(char *str)
{
char *p;
for (p = str; *p != '[=10=]'; p++); // jump to the end of "str"
for (; p > str && (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n' || *p == '[=10=]'); p--);
// rewind the pointer skipping blanks
*++p = '[=10=]'; // chop the trailing blanks off
for (p = str; *p != '[=10=]' && (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n'); p++);
// skip leading blanks
return p;
}
int
main()
{
FILE *fp;
char str[BUFSIZ];
if (NULL == (fp = fopen(INFILE, "r"))) {
perror(INFILE);
exit(1);
}
while (NULL != fgets(str, BUFSIZ, fp)) {
split(trim(str));
}
fclose(fp);
return 0;
}
输出:
foe enemy
vast huge
purchase buy
drowsy sleepy
absent missing
prank trick
[snip]
[备选]
我想 C
可能不是适合这种字符串操作的语言。 python
、perl
或 ruby
等高级语言将用更少的代码解决它。这是一个 python
的例子,它会产生相同的结果:
import re
with open("dictionary.txt") as f:
s = f.read()
for m in re.finditer(r'\d+\.\s*(.+?):\s*(\S+)', s):
print(m.group(1) + " " + m.group(2))
如何从词义中拆分单词 1. 猛犸象:大
我的代码:
void ReadFromFile(){
FILE *dictionary = fopen("dictionary.txt", "r");
char word[20];
char meaning[50];
while(fscanf(dictionary, "%[^:]:%[^\t]\t", word, meaning) == 2){
printf("%s %s\n", word, meaning);
}
fclose(dictionary);
假设 word
和 meaning
不包含数字和点,
我的方法如下:
- 首先,将数字和点上的输入行拆分为标记
形式为
word: meaning
. - 接下来用冒号分隔每个标记。
- 最后,删除开头和结尾的空白字符。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define INFILE "dictionary.txt"
void split(char *str);
void separate(char *str);
char *trim(char *str);
/*
* split line on serial number into "word" and "meaning" pairs
* WARNING: the array of "str" is modified
*/
void
split(char *str)
{
char *tk; // pointer to each token
char delim[] = "0123456789."; // characters used in the serial number
tk = strtok(str, delim); // get the first token
while (tk != NULL) {
separate(tk); // separate each token
tk = strtok(NULL, delim); // get the next token
}
}
/*
* separate the pair into "word" and "meaning" and print them
*/
void
separate(char *str)
{
char *p;
if (NULL == (p = index(str, ':'))) {
// search a colon character in "str"
fprintf(stderr, "Illegal format: %s\n", str);
exit(1);
}
*p++ = '[=10=]'; // terminate the "word" string
// now "p" points to the start of "meaning"
printf("%s %s\n", trim(str), trim(p));
}
/*
* remove leading and trailing whitespaces
* WARNING: the array of "str" is modified
*/
char *
trim(char *str)
{
char *p;
for (p = str; *p != '[=10=]'; p++); // jump to the end of "str"
for (; p > str && (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n' || *p == '[=10=]'); p--);
// rewind the pointer skipping blanks
*++p = '[=10=]'; // chop the trailing blanks off
for (p = str; *p != '[=10=]' && (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n'); p++);
// skip leading blanks
return p;
}
int
main()
{
FILE *fp;
char str[BUFSIZ];
if (NULL == (fp = fopen(INFILE, "r"))) {
perror(INFILE);
exit(1);
}
while (NULL != fgets(str, BUFSIZ, fp)) {
split(trim(str));
}
fclose(fp);
return 0;
}
输出:
foe enemy
vast huge
purchase buy
drowsy sleepy
absent missing
prank trick
[snip]
[备选]
我想 C
可能不是适合这种字符串操作的语言。 python
、perl
或 ruby
等高级语言将用更少的代码解决它。这是一个 python
的例子,它会产生相同的结果:
import re
with open("dictionary.txt") as f:
s = f.read()
for m in re.finditer(r'\d+\.\s*(.+?):\s*(\S+)', s):
print(m.group(1) + " " + m.group(2))