了解由于 getchar 而导致的 c 循环

Question

我正在为以下程序寻求帮助，今天下午我已经为此苦苦挣扎了几个小时。

我想创建一个函数来接收字符数组（属于字母表 A = {a,b,c}）及其 dim 和 return 1 以防字符属于语言 L，如果不是，则为 0。

语言是：a^k b^n c^m，因此 k,m>=0 和 n>0。

我非常努力地尝试了，我打算 post 我已经完成的，但我的方法似乎非常长（除了它缺乏功能）

我想知道如何改进我的代码。

#include <stdio.h>
int array(char v[], int dim) {
   int i, j, k, trovato = 1;
   if (v[0] == 'c') trovato = 0;

   if (v[0] == 'a') {
      for (i = 1; i < dim; i++) {
         while (trovato == 1) {
            if (v[i] == 'c')
               trovato = 0;
            else if (v[i] == 'b') {
               trovato = 1;
               for (j = i + 1; j < dim; j++) {
                  while (trovato == 1) {
                     if (v[j] == 'a') trovato = 0;
                     if (v[j] == 'b')
                        trovato = 1;
                     else if (v[j] == 'c') {
                        trovato = 1;
                        for (k = j + 1; k < dim; k++) {
                           while (trovato == 1) {
                              if (v[k] == 'c')
                                 trovato = 1;
                              else
                                 trovato = 0;
                           }
                        }
                     }
                  }
               }
            }
         }
      }
   }

   if (v[0] == 'b') {
      for (i = 1; i < dim; i++) {
         while (trovato == 1) {
            if (v[i] == 'a') trovato = 0;
            if (v[i] == 'b')
               trovato = 1;
            else if (v[i] == 'c') {
               trovato = 1;
               for (j = i; j < dim; j++) {
                  while (trovato == 1) {
                     if (v[j] != 'c')
                        trovato = 0;
                     else
                        trovato = 1;
                  }
               }
            }
         }
      }
   }

   return trovato;
}

int main() {
   char d;
   int DIM, i = 0, k;
   scanf("%d", &DIM);
   char r[DIM];
   scanf("%c", &d);
   d = getchar();
   while (d != '\n') {
      r[i] = d;
      i++;
      scanf("%c", &d);
      d = getchar();
   }
   k = array(r, DIM);
   printf("%d\n", k);
   return 0;
}

我真正不明白的是输入的原因，就像它在 while 循环中一样。

我认为问题是我对 getchar 和 scanf 字符的理解，所以这些行例如：

scanf("%c",&d);
d=getchar();

向量数组应该如何初始化？

我真正关心的是效率，我担心不能快速正确地改进，这就是为什么我要对我试图完成的工作提出即使是严格但有建设性的批评。

Answer 1

让我们试着让它更简单。我们计算一个指向数组末尾的指针 (v + dim) 这样我们就不需要使用索引和索引变量，而是可以修改 v 指针本身。

int matches(const char *v, size_t dim) {
    const char *end = v + dim;
    size_t k = 0, m = 0, n = 0;

    // count consecutive 'a's.
    // for as long as `v` is positioned before the end
    // and points to an 'a', increment `v` and increment `k`.
    while (v < end && *v == 'a') {
        k ++;
        v ++;
    }

    // count consecutive 'b's
    while (v < end && *v == 'b') {
        m ++;
        v ++;
    }

    // count consecutive 'c's
    while (v < end && *v == 'c') {
        n ++;
        v ++;
    }

    // we didn't meet the end yet, something else was seen!
    if (v < end) {
        // not just aaa...bbbbb....cccc...
        return 0;
    }

    // there were only a's, b's, c's in that order
    else {
        check that k, m, n matches the constraints 
        and return a result based on that.
    }
}

所以第一个循环将指针向前移动，只要它指向 'a'，第二个循环指向 'b'，第三个循环指向 'c' - 然后我们确保我们确实走到了尽头，而不仅仅是 abca 的情况；我们在循环中递增 k、m 和 n，所以当 else 子句被命中时，确实 L = a^k b^m c^n 和你只需要检查 k, m, n.

不需要嵌套循环。

至于输入，

scanf("%c",&d);
d=getchar();

您正在阅读字符两次。你只需要 getchar():

int c;  // getchar returns an *int*

while ((c = getchar()) != EOF && c != '\n') {
    r[i ++] = d;
}

这就是读取输入直到文件末尾或换行符所需的全部内容。

Answer 2

另一种方法是使用 http://re2c.org/ 为您生成它。取自他们的网站，

Its main goal is generating fast lexers: at least as fast as their reasonably optimized hand-coded counterparts.

#include <stdlib.h>
#include <stdio.h>
#include <errno.h>

static int match(const char *const s) {
    const char *t = s, *marker;
    /*!re2c
        re2c:yyfill:enable = 0;
        re2c:define:YYCTYPE = char;
        re2c:define:YYCURSOR = t;
        re2c:define:YYMARKER = marker;

        pattern = 'a'* 'b'+ 'c'*;
        end = '\n'? "\x00";
        *           { return 0; }
        pattern end { return 1; }
    */
}

int main(void) {
    char a[512];
    while(fgets(a, sizeof a, stdin))
        printf("[%s] %s", match(a) ? "passed" : "reject", a);
    if(errno) return perror("input"), EXIT_FAILURE;
    return EXIT_SUCCESS;
}

正在生成 re2c a.re > a.c、

/* Generated by re2c 1.0.3 on Sun Oct 21 18:38:31 2018 */
#line 1 "a.re"
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>

#line 6 "a.re"


static int match(const char *const s) {
    const char *t = s, *marker;

#line 14 "<stdout>"
{
    char yych;
    yych = *t;
    switch (yych) {
    case 'A':
    case 'a':   goto yy4;
    case 'B':
    case 'b':   goto yy5;
    default:    goto yy2;
    }
yy2:
    ++t;
yy3:
#line 18 "a.re"
    { return 0; }
#line 30 "<stdout>"
yy4:
    yych = *(marker = ++t);
    switch (yych) {
    case 'A':
    case 'B':
    case 'a':
    case 'b':   goto yy7;
    default:    goto yy3;
    }
yy5:
    yych = *(marker = ++t);
    switch (yych) {
    case 0x00:
    case '\n':
    case 'B':
    case 'C':
    case 'b':
    case 'c':   goto yy10;
    default:    goto yy3;
    }
yy6:
    yych = *++t;
yy7:
    switch (yych) {
    case 'A':
    case 'a':   goto yy6;
    case 'B':
    case 'b':   goto yy9;
    default:    goto yy8;
    }
yy8:
    t = marker;
    goto yy3;
yy9:
    yych = *++t;
yy10:
    switch (yych) {
    case 0x00:  goto yy11;
    case '\n':  goto yy13;
    case 'B':
    case 'b':   goto yy9;
    case 'C':
    case 'c':   goto yy14;
    default:    goto yy8;
    }
yy11:
    ++t;
#line 19 "a.re"
    { return 1; }
#line 80 "<stdout>"
yy13:
    yych = *++t;
    if (yych <= 0x00) goto yy11;
    goto yy8;
yy14:
    yych = *++t;
    switch (yych) {
    case 0x00:  goto yy11;
    case '\n':  goto yy13;
    case 'C':
    case 'c':   goto yy14;
    default:    goto yy8;
    }
}
#line 20 "a.re"

}

int main(void) {
    char a[512];
    while(fgets(a, sizeof a, stdin))
        printf("[%s] %s", match(a) ? "passed" : "reject", a);
    if(errno) return perror("input"), EXIT_FAILURE;
    return EXIT_SUCCESS;
}

可以获得区分大小写的输出和许多其他选项，请参阅 http://re2c.org/manual/options/options.html。

了解由于 getchar 而导致的 c 循环

Understanding c loops due to getchar

c

arrays

function

stdio

char