我对 fgets() 和 strtok() 的使用对于解析多行输入是否不正确？

Question

我正在为数组中的 finding the majority element（即出现次数超过 size/2 的元素）编写摩尔投票算法的实现。代码应该 return 多数元素（如果存在）否则它应该 return -1。现在，如果我在 main() 函数中直接对整数数组进行硬编码并从那里调用它，那么我的 majorityElement(int size, int arr[]) 版本似乎工作得很好。

int majorityElement(int size, int arr[])
{
    int majorityindex = 0;
    int votes = 1;
    int index;
    for (index = 1; index < size; index++)
    {
        if (arr[index] == arr[majorityindex])
            votes++;
        else 
            votes--;
        if (votes == 0)
        {
            majorityindex = index;
            votes = 1;
        }
    }
    int count = 0;
    int i;
    for (i = 0; i < size; i++)
    {
        if(arr[majorityindex] == arr[i])
        count++;
    }
    if (count > (size/2))
        return arr[majorityindex];
    return -1;    
}

但是，如果我尝试读取这样的输入流，我会遇到一些问题：

输入的第一行包含测试用例的数量。测试用例的第一行是数组的大小，第二行是数组的元素。

我尝试从 main() 函数中读取输入流，如下所示：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX 100
int majorityElement(int size, int arr[]);

int main() 
{
   char buf[3];
   fgets(buf, MAX, stdin);
   int n = atoi(buf);
   char a[3];
   char b[MAX];
   int i;
   int count;
   int* num;
   for (i = 0; i < n; i++)
   {
    count = 0; 
    fgets(a, MAX, stdin);
    fgets(b, MAX, stdin);
    int x = atoi(a);
    char* num[x];
    int arr[x];
    int k = 0;
    char* token = strtok(b, " ");
      while (token != NULL)
        { 
          num[k] = token; 
          arr[k] = atoi(num[k]);
          token = strtok(NULL, " "); 
          k++;
        }
    printf("%d\n", majorityElement(x, arr));
    }

   return 1;
}

我在声明期间将 buf[] 和 a[] 的大小设为 3，因为它们必须有足够的 space 用于 \n 读取的字符 fgets() 以及终止 [=29=] 字符。据我所知，atoi() 函数在将字符数组（字符串）转换为整数时会忽略 \n 字符。我尝试将输入的第一个条目（即条目数）存储在字符数组buf中，将其转换为字符串并存储在变量n中。同样，我试图在变量 x 中获取测试数组的大小，在整数数组 arr 中获取测试数组（测试用例的第二行）。虽然 buf 和 n 似乎在所有情况下都能获得正确的值，但我不太确定 arr。我知道 fgets() 会留下一个终端 \n 字符，并且可能在使用 strtok 标记化期间造成一些破坏，尽管我不能指出为什么。我尝试在 GeeksForGeeks 上提交此代码。它为示例测试用例提供了绝对正确的输出：

即

3
-1

但是，当我尝试 "submit" 我的解决方案时，它说：

Possibly your code doesn't work correctly for multiple test-cases (TCs).

The first test case where your code failed:

    Input:
    4
    1 2 2 1

    Its Correct output is:
    -1

    And Your Code's output is:
    1

我似乎无法理解这一点。如果我在 stdin:

中手动写这个

1
4
1 2 2 1

代码输出

-1

这确实是正确的解决方案。这与提交期间声称的输出不匹配，即 1。所以我不确定我哪里出错了。我是否在 main() 函数中错误地使用了 fgets() 或 strtok()？还是别的原因？

根据评论中的建议更新了main()函数。

int main() 
{
   char buf[MAX];
   fgets(buf, MAX, stdin);
   int n = atoi(buf);
   char a[MAX];
   char b[MAX];
   int i;
   int count;
   int* num;
   for (i = 0; i < n; i++)
   {
    count = 0; 
    fgets(a, MAX, stdin);
    fgets(b, sizeof(a), stdin);
    a[sizeof(a)-1] = '[=18=]';
    b[sizeof(b)-1] = '[=18=]';
    int x = atoi(a);
    int arr[x];
    int k = 0;
    char* token = strtok(b, " ");
      while (token != NULL)
        { 
          if (k > x)
          break;
          arr[k] = atoi(token);
          token = strtok(NULL, " "); 
          k++;
        }
    printf("%d\n", majorityElement(x, arr));
    }

   return 1;
}

正如@Vlad 所指出的，我的原始数组中的 MAX 设置得太低了。问题说数组中的条目数上限为 10^7，每个数组条目上限为 10^6（7 位数字）。所以 MAX 需要 10^8 的顺序。根据评论中的建议，我现在使用 动态分配 而不是可变长度数组。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX 10000000

int majorityElement(int size, int arr[])
{
    int majorityindex = 0;
    int votes = 1;
    int index;
    for (index = 1; index < size; index++)
    {
        if (arr[index] == arr[majorityindex])
            votes++;
        else 
            votes--;
        if (votes == 0)
        {
            majorityindex = index;
            votes = 1;
        }
    }
    int count = 0;
    int i;
    for (i = 0; i < size; i++)
    {
        if(arr[majorityindex] == arr[i])
        count++;
    }
    if (count > (size/2))
        return arr[majorityindex];
    return -1;    
}

int main() 
{
   char* buf = calloc (MAX, sizeof(char));
   fgets(buf, MAX, stdin);
   int n = atoi(buf);
   char* a = calloc (MAX, sizeof(char));
   char* b = calloc(MAX, sizeof(char));
   int i;
   for (i = 0; i < n; i++)
   {
    fgets(a, MAX, stdin);
    fgets(b, MAX, stdin);
    a[strlen(a)-1] = '[=19=]';
    b[strlen(b)-1] = '[=19=]';
    int x = atoi(a);
    int *arr = calloc(x, sizeof(int));
    int k = 0;
    char* token = strtok(b, " ");
      while (token != NULL)
        { 
          if (k > x)
          break;
          arr[k] = atoi(token);
          token = strtok(NULL, " "); 
          k++;
        }
    printf("%d\n", majorityElement(x, arr));
    free(arr)
    }
   free(buf);
   free(a);
   free(b);
   return 1;
}

如果我将 MAX 设置为 10^7，那么代码将通过所有测试用例并接受提交。但是，如果我将 MAX 设置为 10^8（根据需要），则会出现分段错误。如何克服这个问题？

Answer 1

您的程序有几个缺点。

例如在函数 main 中有未使用的变量声明为

int count;
int* num;

该函数确实考虑到 -1 可以是数组的有效值。

测试中可指定的元素数量存在问题。这是一个非常大的数字（根据描述1 <= N <= 10000000）。所以 MAX 等于 100 的值太低了。结果，数据可能被错误且不完整地读取。可变长度数组也可能出现问题。

不需要使用函数fgets，因为每个整数都可以使用scanf来读取。

我可以建议以下解决方案。试一试，看看它是否能通过测试。

#include <stdio.h>
#include <stdlib.h>

size_t majorityElement( const int a[], size_t n )
{
    size_t majority_index = 0;

    for ( size_t i = 1, votes = 1; i < n; i++ )
    {
        if ( a[majority_index] == a[i] )
        {
            ++votes;
        }
        else
        {
            --votes;
        }

        if ( votes == 0 )
        {
            majority_index = i;
            ++votes;
        }
    }

    size_t count = 0;

    for ( size_t i = 0; i < n; i++ ) count += a[i] == a[majority_index];

    return n / 2 < count ? majority_index : n;
}

int main(void) 
{
    size_t n = 0;

    scanf( "%zu", &n );

    for ( size_t i = 0; i < n; i++ )
    {
        size_t m = 0;

        scanf( "%zu", &m );

        if ( m != 0 )
        {
            int *a = calloc( m, sizeof( int ) );

            for ( size_t j = 0; j < m; j++ ) scanf( "%d", a + j );

            size_t majority_index = majorityElement( a, m );

            printf( "%d\n", majority_index == m ? -1 : a[majority_index] );

            free( a );
        }           
    }

    return 0;
}

如果它没有通过测试，那么它似乎在测试中存在错误。:)

或者如果函数 return 类型不能改变，那么函数定义可以看起来像

int majorityElement( const int a[], size_t n )
{
    size_t majority_index = 0;

    for ( size_t i = 1, votes = 1; i < n; i++ )
    {
        if ( a[majority_index] == a[i] )
        {
            ++votes;
        }
        else
        {
            --votes;
        }

        if ( votes == 0 )
        {
            majority_index = i;
            ++votes;
        }
    }

    size_t count = 0;

    for ( size_t i = 0; i < n; i++ ) count += a[i] == a[majority_index];

    return n / 2 < count ? a[majority_index] : -1;
}

我对 fgets() 和 strtok() 的使用对于解析多行输入是否不正确？

Is my usage of fgets() and strtok() incorrect for parsing a multi-line input?

c

scanf

fgets

tokenize

strtok