如何在不知道每行字符串长度的情况下使用 fscanf

How to use fscanf without knowing the length of strings in every line

据我了解,有一种方法可以这样解析输入:

A million $ exit $$$ 16
The Cheit and its Punishment $$$ 8
War and Remembrance $$$ 12
Winds of War $$$ 12
How to Play Football $$$ 12
Ultrashort Pulses $$$ 8
Nonlinear Optics $$$ 8
etc..

其中“$$$”分隔数据字段。
我正在寻找升级短语:

sscanf(line, " %200[^$][^$][^$]$$$%ld", name, &copies);

所以它适合第 1 行。示例中的 1。

编辑:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define NAME_LENGTH 200
#define ERROR -1
typedef int BOOL;
#define TRUE 1
#define FALSE 0

typedef struct book{
    char name[NAME_LENGTH];
    long copies;
    struct book *next;
} Book;

Book* create_book(char name[], long copies){
    Book *new_book = (Book*) malloc(sizeof(Book));
    if (new_book != NULL) {
        strcpy(new_book->name, name);
        new_book->next = NULL;
        new_book->copies = copies;
    }
    return new_book;
}

Book* add_first(Book *head, char name[], long copies){
    Book *new_book = create_book(name, copies);
    if (new_book == NULL)
        return NULL;
    new_book->next = head;
    return new_book;
}

Book* add_last(Book *head, char name[], long copies){
    Book *tail;
    Book *new_book = create_book(name, copies);
    if (new_book == NULL)
        return NULL;
    if (head == NULL)
        return new_book;
    tail = head;
    while (tail->next != NULL)
        tail = tail->next;
    tail->next = new_book;
    return head;
}

Book* add_sorted(Book *head, char name[], long copies){
    Book* iter, *prev = NULL;
    Book* new_book = create_book(name, copies);
    if(new_book == NULL)
        return head;
    if (head == NULL)
        return new_book;
    if (!strcmp(new_book->name, head->name)){
        new_book->next = head;
        return new_book;
    }
    iter = head;
    while ((iter != NULL) && (strcmp(new_book->name, head->name))){
        prev = iter;
        iter = iter->next;
    }
    prev->next = new_book;
    new_book->next = iter;
    return head;
}

int length(const Book *head){
    if (head == NULL)
        return 0;
    return 1 + length(head->next);
}

void free_library(Book *head_book){
    if (head_book == NULL)
        return;
    free_library(head_book->next);
    free(head_book);
}

Book* find_book(Book *head, char name[]){
    if (head == NULL)
        return NULL;
    if (strcmp(head->name, name) == 0)
        return head;
    find_book(head->next, name);
    return NULL;
}

Book* delete_book(Book *head, char name[]){
    Book *iter = head, *prev = NULL;
    if (head == NULL)
        return head;
    if ((!strcmp(head->name, name)) == 1){
        iter = head->next;
        free(head);
        return iter;
    }
    while (iter->next != NULL){
        if ((!strcmp(head->name, name)) == 1){
            prev->next = iter->next;
            free(iter);
            break;
        }
        prev = iter;
        iter = iter->next;
    }
    return head;
}

Book* initBooksList(FILE *input){
    Book *head_book = NULL, *existing_book = NULL;
    long copies = 0;
    char line[256] = {0}, name[NAME_LENGTH];
    if (input == NULL){
        printf("File did not open. Exit..\n");
        return NULL;
    }
    while(!feof(input)){
        if((fgets(line, 256, input) != NULL) && (head_book == NULL)){
            sscanf(line, " %200[^$][^$][^$]$$$%ld", name, &copies);
            printf("%s\n%ld\n", name, copies);
            head_book = create_book(name, copies);
            strcpy(line, "");
            strcpy(name, "");
            copies = 0;
        }
        else{
            sscanf(line, " %200[^$][^$][^$]$$$%ld", name, &copies);
            existing_book = find_book(head_book, name);
            if(existing_book != NULL){
                existing_book->copies += copies;
                printf("%s\n%ld\n", name, existing_book->copies);
            }
            else{
                add_sorted(head_book, name, copies);
                printf("%s\n%ld\n", name, copies);
                strcpy(line, "");
                strcpy(name, "");
                copies = 0;
            }
        }
    }
    return head_book;
}

void storeBooks(Book *head_book){

}

void returnBook(Book *head_book){

}

void borrowBook(Book *head_book){

}

int main(int argc, char *argv[]){
    int i = 0;
    FILE *ptr;
    printf("%d\n", argc);
    for(i = 0; i < argc; i++)
        printf("argv[%d] = %s\n", i, argv[i]);
    ptr = fopen(argv[1], "r");
    initBooksList(ptr);
    return 0;
}

这应该让您知道您可以做什么:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main()
{
  char line[] = "The Cheit and its Punishment $$$ 8";

  char *seppointer = strchr(line, '$');
  *seppointer = 0;    
  int price = atoi(seppointer + 4);

  printf("Title: %s\nPrice: %d\n", line, price);
}

免责声明:没有错误检查,假定该行具有所需的格式。

如果你知道最长的标题是200个字符,如your comment所示,你可以为此分配一个数组(包括space用于null-terminator)。

您可以使用 fscanf() 来解析格式字符串 " %200[^$]$$$%d" 的文件行。第一个 space 告诉 fscanf() 跳过前导的白色 space,这可能是之前 I/O 操作留下的。下一个转换说明符是 %200[^$],它告诉 fscanf() 将任何字符读入字符串,直到遇到 $$ 留在输入流中。请注意,此处指定的最大宽度为 200,以防止缓冲区溢出。格式字符串中接下来的三个字符 $$$ 必须出现在输入中,并且在到达最终转换说明符 %d 之前匹配。

#include <stdio.h>
#include <stdlib.h>

#define MAX_TITLE  201

int main(void)
{
    /* Open file, and check for success */
    FILE *fp = fopen("data.txt", "r");
    if (fp == NULL) {
        perror("Unable to open file");
        exit(EXIT_FAILURE);
    }

    char title[MAX_TITLE];
    int price;

    while (fscanf(fp, " %200[^$]$$$%d", title, &price) == 2) {
        printf("Title: %s --- Price: $%d\n", title, price);
    }

    fclose(fp);

    return 0;
}

这是当 运行 针对您的输入文件时的程序输出:

Title: The Cheit and its Punishment  --- Price: 
Title: War and Remembrance  --- Price: 
Title: Winds of War  --- Price: 
Title: How to Play Football  --- Price: 
Title: Ultrashort Pulses  --- Price: 
Title: Nonlinear Optics  --- Price: 

上述代码中对 fscanf() 的调用在输入流中的每一行的最后一个数字后面留下了白色 space 字符;这就是为什么格式字符串中需要前导 whitespace 的原因。更好的解决方案是使用 fgets() 获取一行输入,并使用 sscanf() 解析该行。应该分配一个 buffer 来保存读取的每一行的内容;此处的慷慨分配是好的,因为它减少了长输入在输入流中留下字符的机会。如果有可能输入更长的时间,则应添加代码以在下次调用 fgets().

之前清除输入流

这种方法的一个优点是,由于读取了包括 \n 在内的整行,因此无需像以前那样跳过前导白色 space 字符。另一个优点是可以忽略最终数字后的虚假字符,或者由代码处理;由于线路已存储,因此可以根据需要多次检查和扫描。最终数字后面的字符会给第一个版本带来问题,该版本只能跳过前导白色space.

#include <stdio.h>
#include <stdlib.h>

#define BUF_SZ     1000
#define MAX_TITLE  201

int main(void)
{
    /* Open file, and check for success */
    FILE *fp = fopen("data.txt", "r");
    if (fp == NULL) {
        perror("Unable to open file");
        exit(EXIT_FAILURE);
    }

    char buffer[BUF_SZ];
    char title[MAX_TITLE];
    int price;
    size_t lnum = 0;

    while (fgets(buffer, BUF_SZ, fp) != NULL) {
        ++lnum;
        if (sscanf(buffer, "%200[^$]$$$%d", title, &price) == 2) {
            printf("Title: %s --- Price: $%d\n", title, price);
        } else {
            fprintf(stderr, "Format error in line %zu\n", lnum);
        }
    }

    fclose(fp);

    return 0;
}

此处使用 fgets() 可以更灵活地检查输入。要处理 $ 是标题的一部分的情况,您可以使用 strstr() 首先找到分隔符 " $$$",然后将分隔符之前的字符复制到 title[] 数组中在一个循环中。由于 strstr() returns 指向找到的字符串的指针,因此可以将此指针提供给 sscanf() 以挑选出最终数字。如果未找到字符串,strstr() 函数 returns 一个空指针,这可用于识别有格式问题的行。请注意 strstr()string.h:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define BUF_SZ     1000
#define MAX_TITLE  201

int main(void)
{
    /* Open file, and check for success */
    FILE *fp = fopen("data.txt", "r");
    if (fp == NULL) {
        perror("Unable to open file");
        exit(EXIT_FAILURE);
    }

    char buffer[BUF_SZ];
    char title[MAX_TITLE];
    int copies;
    size_t lnum = 0;

    while (fgets(buffer, BUF_SZ, fp) != NULL) {
        ++lnum;

        /* Find delimiter string in buffer */
        char *title_end = strstr(buffer, " $$$");
        if (title_end == NULL) {
            fprintf(stderr, "Format error in line %zu\n", lnum);
            continue;
        } else {

            /* Copy characters into title until space before delimiter */
            char *curr = buffer;
            size_t i = 0;
            while (curr < title_end && i < MAX_TITLE) {
                title[i] = buffer[i];
                ++curr;
                ++i;
            }
            title[i] = '[=13=]';
        }

        if (sscanf(title_end, " $$$%d", &copies) == 1) {
            printf("Title: %s --- Copies: %d\n", title, copies);
        } else {
            fprintf(stderr, "Format error in line %zu\n", lnum);
        }
    }

    fclose(fp);

    return 0;

}

这是修改后的输入文件:

The Cheit and its Punishment $$$ 8
War and Remembrance $$$ 12
Winds of War $$$ 12
A million $ exit $$$ 16
How to Play Football $$$ 12
Ultrashort Pulses $$$ 8
Nonlinear Optics $$$ 8

和结果输出:

Title: The Cheit and its Punishment --- Copies: 8
Title: War and Remembrance --- Copies: 12
Title: Winds of War --- Copies: 12
Title: A million $ exit --- Copies: 16
Title: How to Play Football --- Copies: 12
Title: Ultrashort Pulses --- Copies: 8
Title: Nonlinear Optics --- Copies: 8