Stdin with getc 产生额外的输出,并打开文件导致 C 中的分段错误

Stdin with getc producing additional output, and opening file causing segmentation fault in C

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>

typedef int bool;
#define true 1
#define false 0

#define A 65
#define Z 90
#define a 97
#define z 122
#define NEWLINE 10

int main(int argc, char* argv[])
{
    int noArgReverse();
    int argReverse(int i, char* c[]);
    if (argc == 1){
        if (noArgReverse() == 0)
            return 0;
        else
            return 1;
    }
    if (argc > 1){
        if (argReverse(argc, argv) == 0)
            return 0;
        else
            return 1;
    }
    else{
        fprintf(stderr, "unknown error detected.\n");
        return 1;
    }
}

int noArgReverse()
{
    char charInput[10000];
    int pointerArray[5000];
    int pointerCount = 0;
    bool wordStart = false;
    int indexer;
    int lineLength;
    int parser;
    char currInput;

    pointerArray[0] = 0; // first word would start at 0 be default

    while (currInput != EOF){
        lineLength = 0;
        indexer = 0;
        pointerCount = 0;
        while ((currInput = getc(stdin)) != NEWLINE){
            /*
             * I am implementing a 10,000 char limit, as this seems an
             * unreasonable length.
             */
            if (lineLength == 9999){
                fprintf(stderr, "Line length exceeded 10,000 chars. "
                        "This line and, if in the middle of a word,"
                        "will be split.\n");
                break;
            }

            if (!wordStart){
                if ((currInput >= A && currInput <= Z) || (currInput >= a && currInput <= z)){
                    wordStart = true;
                }
            }

            while (wordStart){
                charInput[lineLength++] = currInput;
                currInput = getc(stdin);
                //if the word has ended
                if ((currInput < A || currInput > Z) && (currInput < a || currInput > z)){
                    wordStart = false;
                    charInput[lineLength++] = '[=10=]';
                    if (pointerCount != 0){ // at least one word has been added
                        ++indexer;
                        pointerArray[indexer] = pointerCount;
                        pointerCount = lineLength;
                    }
                    else //first word of the line to be added
                        pointerCount = lineLength;
                }
            }
        }

        while (indexer >= 0){
            parser = pointerArray[indexer--];
            while (charInput[parser] != '[=10=]')
                fprintf (stdout, "%c", charInput[parser++]);
            fprintf (stdout, " ");
        }
        fprintf (stdout, "\r\n");

        if (lineLength == 0){
            currInput = EOF;
        }
    }
    return 0;
}

int argReverse (int argc, char* argv[])
{
    char charInput[10000];
    int pointerArray[5000];
    int pointerCount = 0;
    bool wordStart = false;
    int indexer;
    int lineLength;
    int parser;
    char currInput;
    FILE *currentFile;

    while (argc > 0){
        currentFile = fopen(argv[argc--], "r");
        while ((currInput = getc(currentFile)) != EOF){
            lineLength = 0;
            indexer = 0;
            pointerCount = 0;
            while (currInput != NEWLINE){
                /*
                 * I am implementing a 10,000 char limit, as this seems an
                 * unreasonable length for a single line.
                 */
                if (lineLength == 9999){
                    fprintf(stderr, "Line length exceeded 10,000 chars. "
                            "This line and, if in the middle of a word, the word, "
                            "will be split.\n");
                    break;
                }

                if (!wordStart){
                    if ((currInput >= A && currInput <= Z) || (currInput >= a && currInput <= z)){
                        wordStart = true;
                    }
                }

                while (wordStart){
                    charInput[lineLength++] = currInput;
                    currInput = getc(currentFile);
                    //if the word has ended
                    if ((currInput < A || currInput > Z) && (currInput < a || currInput > z)){
                        wordStart = false;
                        charInput[lineLength++] = '[=10=]';
                        if (pointerCount != 0){ // at least one word has been added
                            ++indexer;
                            pointerArray[indexer] = pointerCount;
                            pointerCount = lineLength;   
                        }
                        else //first word of the line to be added
                            pointerCount = lineLength;
                    }
                }
            }
        }
        fclose(currentFile);
    }
    return 0;
}

所以对于我的第一个函数,我遇到了一个错误,我在调试时似乎无法深入了解,或者更确切地说,我不确定如何解决。该函数应从 stdin 获取输入,并以相反的顺序打印单词(字符应保持顺序,因此 "This is a sentence" 应为 "sentence a is This")。很简单。但是,当我给出示例输入时,我得到的输出都是错误的。

输入:

This is sample
input for testing

输出:

testing for input sample is This

This

输入有一个return,但是输出的行与行之间多了一个return,并且没有分割行。

因此,它没有在应该打印换行符的时候打印换行符,而是在结束时再次打印第一个输入的单词。

我遇到的第二个问题是在第二组代码中,argReverse 函数。文件打开后,在本例中我使用 test.txt,这是一个包含几行短语和空行的简单文本文件,第一次使用 getc return 是一个分段错误.我读到这是权限或文件打开失败,但我不确定如何解决此问题。我试图首先打开最后一个文件,然后从那里开始工作,显然,这应该能够处理多个文件,但我什至无法打开一个。我不确定该怎么做才能解决这个问题。我试过将 getc 移到 while 循环之外,同样的问题。我猜我在打开文件时做错了什么,但我不知道它是什么。

风格说明:

bool类型,truefalse<stdbool.h>中定义。

使用 'A' 'Z' 'a' 'z' '\n' 等字符常量代替硬编码数字,and/or 使用 <ctype.h> 中的 isalpha 等字符分类函数。

"reverse" 函数在结束时只是 return 0,所以 return 没有任何意义。它们应该声明为 returning void。如果他们做了 return 有用的事情,我会 return 来自 main 的值(消除 if 语句)。例如,

if ( argc == 1 )
    return noArgReverse();

将大型数组放在堆栈上通常不是一个好主意。 (大是主观的,但我根据经验使用 2K 字节。)对于不可重入函数,您可以将数组声明为 static 以将它们从堆栈中取出。对于可重入函数,您可以 malloc 数组,最后 free 它们。

设计注意事项:

fgets 函数将读取一行并将其放入缓冲区。无需一次读取一个字符。

处理命令行参数时,规范循环是

int main( int argc, char *argv[] )
{
    for ( int i = 1; i < argc; i++ )
        printf( "argv[%d] is \"%s\"\n", i, argv[i] );
}

你的段错误的原因是你正在使用 argv[argc],C 规范保证它是 NULL。因此,您将 NULL 传递给 fopen。此外,您应该始终检查 fopen 中的 return 值,因为如果无法打开文件,fopen 将 return NULL

到目前为止,代码中最大的设计问题是重复。你有两个几乎相同的功能,这对调试和维护来说是一场噩梦,因为每次更改都需要进行两次,并测试两次。解决方案是定义一个 reverse 函数,它将文件指针作为输入。 main 函数应该处理 opening/closing 文件,或者可以在没有任何参数时传递 stdin

示例代码:

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>

#define MAXL 10000
#define MAXW  5000

void reverse( FILE *fp );

int main( int argc, char *argv[] )
{
    if ( argc < 2 )
    {
        reverse( stdin );
        return 0;
    }

    FILE *fp;
    for ( int i = 1; i < argc; i++ )
    {
        printf( "----- %s -----\n", argv[i] );
        if ( (fp = fopen( argv[i], "r" )) == NULL )
        {
            printf( "***Error: unable to open file\n" );
        }
        else
        {
            reverse( fp );
            fclose( fp );
        }
    }

    return 0;
}

void reverse( FILE *fp )
{
    static char line[MAXL];   // buffer for the input line
    static char *word[MAXW];  // array of pointers to the words on the line

    while ( fgets( line, MAXL, fp ) != NULL )
    {
        int i = -1;
        int count = 0;        // count of words on the line
        for (;;)
        {
            // skip any non-alpha characters
            for ( i++; line[i]; i++ )
                if ( isalpha( line[i] ) )
                    break;

            // check if we've reached the end of the line
            if ( !line[i] )
                break;

            // add the pointer to the word list
            word[count++] = &line[i];

            // scan till we reach the end of the word
            for ( i++; line[i]; i++ )
                if ( !isalpha( line[i] ) )
                    break;

            // check if we've reached the end of the line
            if ( !line[i] )
                break;

            // terminate the word
            line[i] = '[=12=]';
        }

        // output the words in reverse order
        for ( i = count - 1; i >= 0; i-- )
            printf( "%s ", word[i] );
        printf( "\n" );
    }
}