Would you please help me find the cause for my Lexical Analyzer Error: Invalid Symbol?

Question

好的，问题是我不知道为什么会收到此错误。

对于 class，我们正在逐个编写编译器。这段代码应该标记输入符号。我写了一系列 if/else 语句，就像一个非常简单的 trie，认为它能够找到所有符号。它对其中一些工作正常，但卡在“<>”。

这是整个函数：

// Process the symbols
void symbol_processor(char *input)
{
    // Initialize symbol_type operator
    int symbol_type = -1;

    printf("Location1: %d\n", input_index);

    // A series of if/else that ape a trie
    if (input[input_index] == '=')
        if (input[input_index + 1] == '=')
        {
            printf("Location2: %d\n", input_index);
            // Set the symbol for "=="
            symbol_type = eqlsym;

            // Move forward two input_index spaces
            input_index += 2;
            printf("Location4: %d\n", input_index);
        }
    else if (input[input_index] == '<')
        if (input[input_index + 1] == '>')
        {
            printf("Location4: %d\n", input_index);
            // Set the symbol for "<>"
            symbol_type = neqsym;

            // Move forward two input_index spaces
            input_index += 2;
        }
        else if (input[input_index + 1] == '=')
        {
            // Set the symbol for "<="
            symbol_type = leqsym;

            // Move forward two input_index spaces
            input_index += 2;
        }
        else
        {
            printf("Location: %d\n", input_index);
            // Set the symbol for "<"
            symbol_type = lessym;

            // Move forward one input_index space
            input_index++;
        }
    else if (input[input_index] == '>')
        if (input[input_index + 1] == '=')
        {
            // Set the symbol for ">="
            symbol_type = geqsym;

            // Move forward two input_index spaces
            input_index += 2;
        }
        else
        {
            // Set the symbol for ">"
            symbol_type = modsym;

            // Move forward one index space
            input_index++;
        }
    else if (input[input_index] == ':')
        if (input[input_index + 1] == '=')
        {
            // Set the symbol for ":="
            symbol_type = becomessym;

            // Move forward two input_index spaces
            input_index += 2;
        }
    // This could cause an issue
    else if (input[input_index] == '/')
        if (input[input_index + 1] == '*')
            comment_error = comment_processor(input);
        else
        {
            
            // Set the symbol for ">"
            symbol_type = slashsym;

            // Move forward one index space
            input_index++;
        }
    else if (input[input_index] == '%')
    {
        // Set the symbol for "%"
        symbol_type = modsym;

        // Move forward one index space
        input_index++;
    }
    else if (input[input_index] == '*')
    {
        // Set the symbol for "*"
        symbol_type = multsym;

        // Move forward one index space
        input_index++;
    }
    else if (input[input_index] == '+')
    {
        // Set the symbol for "+"
        symbol_type = plussym;

        // Move forward one index space
        input_index++;
    }
    else if (input[input_index] == '-')
    {
        // Set the symbol for "-"
        symbol_type = minussym;

        // Move forward one index space
        input_index++;
    }
    else if (input[input_index] == '(')
    {
        // Set the symbol for "("
        symbol_type = lparentsym;

        // Move forward one index space
        input_index++;
    }
    else if (input[input_index] == ')')
    {
        // Set the symbol for ")"
        symbol_type = rparentsym;

        // Move forward one input_index space
        input_index++;
    }
    else if (input[input_index] == ',')
    {
        // Set the symbol for ","
        symbol_type = commasym;

        // Move forward one index space
        input_index++;
    }
    else if (input[input_index] == '.')
    {
        // Set the symbol for "."
        symbol_type = periodsym;

        // Move forward one input_index space
        input_index++;
    }
    else if (input[input_index] == ';')
    {
        // Set the symbol for ";"
        symbol_type = semicolonsym;

        // Move forward one index space
        input_index++;
    }

    // Check to see if an error should be thrown
    if (symbol_type == -1)
        error_processor(1); // Invalid Symbol

    // Append symbol to the list
    list[lex_index].type = symbol_type;
    lex_index++;
}

但我很确定问题出在这里：

else if (input[input_index] == '<')
        if (input[input_index + 1] == '>')
        {
            printf("Location4: %d\n", input_index);
            // Set the symbol for "<>"
            symbol_type = neqsym;

            // Move forward two input_index spaces
            input_index += 2;
        }
        else if (input[input_index + 1] == '=')
        {
            // Set the symbol for "<="
            symbol_type = leqsym;

            // Move forward two input_index spaces
            input_index += 2;
        }
        else
        {
            printf("Location: %d\n", input_index);
            // Set the symbol for "<"
            symbol_type = lessym;

            // Move forward one input_index space
            input_index++;
        }

我就是看不出是什么问题，希望比我更有智慧和经验的程序员能指出来。另外，忽略 printf 语句。我正在使用它们来尝试帮助调试。

这是我输入的完整输入文本，如果有帮助的话。错误在“var”之后的“<”处抛出。

const==var<>procedureend<=if>=then.else;while(do)call:=read,write+124-jalapeno*/comment //

Answer 1

您只接受以“=”开头的符号，因为没有与您的第一个 if 匹配的 else 语句。你的缩进让它看起来像有；但是如果你运行你的程序通过缩进，你会看到你哪里出错了。这个序列：

if (input[input_index] == '=')
    if (input[input_index + 1] == '=')
    {
        printf("Location2: %d\n", input_index);
        // Set the symbol for "=="
        symbol_type = eqlsym;

        // Move forward two input_index spaces
        input_index += 2;
        printf("Location4: %d\n", input_index);
    }
else if (input[input_index] == '<')
    if (input[input_index + 1] == '>')

实际上是：

if (input[input_index] == '=')
    if (input[input_index + 1] == '=')
    {
        printf("Location2: %d\n", input_index);
        // Set the symbol for "=="
        symbol_type = eqlsym;

        // Move forward two input_index spaces
        input_index += 2;
        printf("Location4: %d\n", input_index);
    } else if (input[input_index] == '<')
        if (input[input_index + 1] == '>')

使用 switch 语句通常更符合 C 语言的习惯，特别是对于最外层的条件，因此逻辑看起来更像：

switch (input[input_index]) {
    case '=':
        if (input[input_index + 1] == '=')
        {
        }
        break;
    case '<':
        if (input[input_index + 1] == '>')
        {
        }
        else if (input[input_index + 1] == '=')
        {
        }
        else
        {
        }
        break;
    case '>':
        if (input[input_index + 1] == '=')
        {
        }
        else
        {
        }
        break;

在 reader 看来，这比试图弄清楚 if (input[input_index] == ... 中的每一个是否都使用相同的数组和索引等要容易得多。

Would you please help me find the cause for my Lexical Analyzer Error: Invalid Symbol?

Would you please help me find the cause for my Lexical Analyzer Error: Invalid Symbol?

c

compiler-construction

symbols

if-statement

trie