分词器不工作

Tokenizer not working

我正在尝试标记一个字符串以提供一个字符串数组,但我的代码似乎是错误的。

这是我的代码:

asmInstruction *tokenizeLine(char *charLine) {
    int words = countTokens(charLine);
    char *tokens = (char*) malloc(MAX_LINE_LENGTH);

    asmInstruction *instr = (asmInstruction*) malloc(sizeof(asmInstruction*));
    instr->args = (char**) malloc(MAX_LINE_LENGTH);

    int count = 1;
    tokens = strtok(charLine, " ,");
    while (count <= words) {
        tokens = strtok(NULL, " ,");
        instr->args[count - 1] = (char*)malloc(MAX_LINE_LENGTH);
        instr->args[count - 1] = tokens;
        ++count;
    }

    free(tokens);
    return instr;
}

 /* Reads a file and returns the number of lines in this file. */
    uint32_t countLines(FILE *file) {
    uint32_t lines = 0;
    int32_t c;
    while (EOF != (c = fgetc(file))) {
        if (c == '\n') {
            ++lines;
        }
    }
    /* Reset the file pointer to the start of the file */
    rewind(file);
    return lines;
}

和结构:

typedef struct {
    char **args; /* An array of strings*/
} asmInstruction;

我的主要在这里:

int main() {
    char s[] = "ldr r2,r1";
    asmInstruction *instr = tokenizeLine(s);
    printf("%s", instr->args[0]);
}

/* Counts the number of tokens in a line */
uint32_t countTokens(char line[]) {
    /* The correct way to do this! */
    uint32_t numberOfTokens = 0;
    /* Split at spaces and commas */
    char *tokens = strtok(line, " ,");
    while (tokens != NULL) {
        tokens = strtok(NULL, " ,");
        numberOfTokens++;
    }
    return numberOfTokens;
}

所以,这应该打印 ldr。 但是,它打印 null。 如果我遍历标记,它不会打印出来而是空的。 我期待打印出令牌

ldr r2 r1

但只有第一个被打印出来。

似乎 instr->args[count-1] 从未被分配过任何东西,因为显然 tokens 也没有被分配过东西。

这是为什么? 谢谢。

asmInstruction *tokenizeLine(char *charLine) {
    int words = countTokens(charLine);
    char *tokens;//don't need malloc for this, because just pointer holder.

    asmInstruction *instr = (asmInstruction*) malloc(sizeof(asmInstruction));//allocate size isn't sizeof(asmInstruction*)
    instr->args = (char**) malloc((words+1) * sizeof(char*));//+1 for NULL, or add member E.g instr->numOfWords = words

    int count = 0;
    tokens = strtok(charLine, " ,");
    while (tokens) {
        instr->args[count] = malloc(strlen(tokens)+1);
        strcpy(instr->args[count++], tokens);
        //or  process for each line
        //instr->args[count++] = tokens;//no need allocate for word 
        tokens = strtok(NULL, " ,");//get next tokens
    }
    instr->args[count] = NULL;//set sentinel

    return instr;
}

以下代码:

handles errors
has many/ most of the logic errors corrected
properly defines the struct asmInstruction
performs the functionality indicated in the question.

建议删除 struct asmInstruction,因为不需要它,只需使用 char** args = NULL;在 tokenizeLine() 函数和 return args.

没有必要,也不希望为 'tokens' 分配内存。因为每次从 strtok() 的 returned 值设置 'tokens' 时都会覆盖该内存指针 如果一个malloc做了,那么就会有内存泄漏。

在下面的代码中,还需要一些 在调用 'exit( EXIT_FAILURE );'

之前释放 malloc 内存和关闭文件的附加逻辑
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>


struct asmInstruction
{
    char **args; /* An array of strings*/
};

#define MAX_LINE_LENGTH (100)

// prototypes
uint32_t countTokens(char line[]);
uint32_t countLines(FILE *file);
struct asmInstruction *tokenizeLine(char *charLine);

int main( void )
{
    char s[] = "ldr r2,r1";
    struct asmInstruction *instr = tokenizeLine(s);
    printf("%s", instr->args[0]);
    return( 0 );
} // end function: main


/* Counts the number of tokens in a line */
uint32_t countTokens(char line[])
{
    /* The correct way to do this! */
    uint32_t numberOfTokens = 0;
    /* Split at spaces and commas */
    char *tokens = strtok(line, " ,");

    while (tokens != NULL)
    {
        tokens = strtok(NULL, " ,");
        numberOfTokens++;
    }
    return numberOfTokens;
} // end function: countTokens


struct asmInstruction *tokenizeLine(char *charLine)
{
    int words = countTokens(charLine);

    char *tokens = NULL;

    struct asmInstruction *instr = NULL;
    if( NULL == (instr = malloc(sizeof( struct asmInstruction)) ) )
    { // then malloc failed
        perror( "malloc for struct asmInstruction failed" );
        exit( EXIT_FAILURE );
    }

    // implied else, malloc successful

    instr->args = NULL;
    if( NULL == (instr->args = malloc(words*sizeof(char*)) ) )
    { // then malloc failed
        perror( "malloc for array of char pointers failed:" );
        exit( EXIT_FAILURE );
    }

    // implied else, malloc successful

    memset( instr->args, '[=11=]', words*sizeof(char*) );

    int count = 0;
    tokens = strtok(charLine, " ,");

    while ( tokens )
    {
        if( NULL == (instr->args[count] = malloc(strlen(tokens)+1) ) )
        { // then, malloc failed
            perror( "malloc for arg failed" );
            exit( EXIT_FAILURE );
        }

        // implied else, malloc successful

        strcpy(instr->args[count], tokens );
        ++count;
        tokens = strtok(NULL, " ,");
    } // end while

    return instr;
} // end function: tokenizeLine


 /* Reads a file and returns the number of lines in this file. */
    uint32_t countLines(FILE *file)
    {
    uint32_t lines = 0;
    int32_t c;

    while (EOF != (c = fgetc(file)))
    {
        if (c == '\n') {
            ++lines;
        }
    }

    /* Reset the file pointer to the start of the file */
    rewind(file);
    return lines;
} // end function: countLines