C: 为 hexdump 格式化字符串(char* 到另一个 char*)

C: formatting strings for hexdump (char* to another char*)

我想将一个 char* 指针的 hexdump 写入另一个 char*

为此我采取了 this code snippet:

#include <stdio.h>

void DumpHex(const void* data, size_t size) {
    char ascii[17];
    size_t i, j;
    ascii[16] = '[=10=]';
    for (i = 0; i < size; ++i) {
        printf("%02X ", ((unsigned char*)data)[i]);
        if (((unsigned char*)data)[i] >= ' ' && ((unsigned char*)data)[i] <= '~') {
            ascii[i % 16] = ((unsigned char*)data)[i];
        } else {
            ascii[i % 16] = '.';
        }
        if ((i+1) % 8 == 0 || i+1 == size) {
            printf(" ");
            if ((i+1) % 16 == 0) {
                printf("|  %s \n", ascii);
            } else if (i+1 == size) {
                ascii[(i+1) % 16] = '[=10=]';
                if ((i+1) % 16 <= 8) {
                    printf(" ");
                }
                for (j = (i+1) % 16; j < 16; ++j) {
                    printf("   ");
                }
                printf("|  %s \n", ascii);
            }
        }
    }
}

并修改为:

#include <stdio.h>

char* DumpHex2(const void* data, size_t size) {
    const int symbolSize = 100;
    char* buffer = calloc(10*size, sizeof(char));
    char* symbol = calloc(symbolSize, sizeof(char));

    char ascii[17];
    size_t i, j;
    ascii[16] = '[=11=]';
    for (i = 0; i < size; ++i) {
        snprintf(symbol, symbolSize, "%02X ", ((unsigned char*)data)[i]);
        strcat(buffer, symbol);
        memset(symbol,0,strlen(symbol));
        if (((unsigned char*)data)[i] >= ' ' && ((unsigned char*)data)[i] <= '~') {
            ascii[i % 16] = ((unsigned char*)data)[i];
        } else {
            ascii[i % 16] = '.';
        }
        if ((i+1) % 8 == 0 || i+1 == size) {
            strcat(buffer, " ");
            if ((i+1) % 16 == 0) {
                snprintf(symbol, symbolSize, "|  %s \n", ascii);
                strcat(buffer, symbol);
                memset(symbol,0,strlen(symbol));
            } else if (i+1 == size) {
                ascii[(i+1) % 16] = '[=11=]';
                if ((i+1) % 16 <= 8) {
                    strcat(buffer, " ");
                }
                for (j = (i+1) % 16; j < 16; ++j) {
                    strcat(buffer, "   ");
                }
                snprintf(symbol, symbolSize, "|  %s \n", ascii);
                strcat(buffer, symbol);
                memset(symbol,0,strlen(symbol));
            }
        }
    }

    free(symbol);

    return buffer;
}

有效并且returns输出相同:

int main(int argc, char **argv) {
    char* text = "Hello World! é";

    DumpHex(text, strlen(text));

    char* dump = DumpHex2(text, strlen(text));
    printf("%s", dump);
    free(dump);

    return EXIT_SUCCESS;
}

输出:

48 65 6C 6C 6F 20 57 6F  72 6C 64 21 20 C3 A9     |  Hello World! .. 
48 65 6C 6C 6F 20 57 6F  72 6C 64 21 20 C3 A9     |  Hello World! .. 

然而我的修改,即:

snprintf(symbol, symbolSize, "|  %s \n", ascii);
strcat(buffer, symbol);
memset(symbol,0,strlen(symbol));

我觉得很糟糕(我是 C 语言的新手)。有没有一种方法可以更轻松地格式化和附加字符串?

您不能对未初始化的数据使用 strlen()

char* buffer = malloc(1000000);
memset(buffer,0,strlen(buffer));

strlen() 无法找出分配的内存大小,因为它依赖于终止空字符(0'[=17=]'),这可能会或可能不在缓冲区指向的内存中的某个地方。或者指定在 memset():

中分配的内存大小
memset(buffer, 0, 1000000);

或使用 calloc() 将分配的内存初始化为零:

char buffer = calloc(1000000, sizeof(char));  // or calloc(1000000, 1) since sizeof(char) is 1 by definition.

您的代码中可能还有其他问题。例如,您在 main() 中调用 DumpHex2() 两次 但永远不会释放函数分配的内存。分配给 symbol 的内存也泄漏了。

如果您更新您的问题以包含您希望 DumpHex2() 生成的文本的确切格式,将会更容易回答。

您应该使用 isprint() 来确定字符是否可打印。

更短且恕我直言更易于阅读和理解:

#include <ctype.h>   // isprint()
#include <stddef.h>  // size_t
#include <stdlib.h>  // malloc(), free()
#include <string.h>  // strcat()
#include <stdio.h>   // sprintf()

enum {
    DUMP_BYTES_PER_LINE = 16,
    DUMP_BYTES_GROUP = 8,
    DUMP_CHARS_PER_LINE = DUMP_BYTES_PER_LINE * 4 + DUMP_BYTES_PER_LINE / DUMP_BYTES_GROUP + 4
};

char* DumpHex(const void* data, size_t size)
{
    size_t const num_lines = size / DUMP_BYTES_PER_LINE + ((size % DUMP_BYTES_PER_LINE) > 0);
    size_t const result_length = num_lines * DUMP_CHARS_PER_LINE;

    char *result = malloc((result_length + 1) * sizeof(*result));
    if (!result)
        return NULL;

    memset(result, ' ', result_length);
    result[result_length] = '[=13=]';

    char *dump_pos = result;
    char *plain_pos = result + DUMP_BYTES_PER_LINE * 3 + DUMP_BYTES_PER_LINE / DUMP_BYTES_GROUP + 3;
    char unsigned const *src = data;

    for (size_t i = 0; i < size; ++i, dump_pos += 3, ++plain_pos) {

        sprintf(dump_pos, "%02x ", (int)src[i]);
        dump_pos[3] = ' ';
        *plain_pos = isprint(src[i]) ? src[i] : '.';

        if ((i + 1) % DUMP_BYTES_PER_LINE == 0 || i + 1 == size) {
            *++plain_pos = '\n';

            size_t const bytes_per_line_left = (i + 1) % DUMP_BYTES_PER_LINE;
            plain_pos[bytes_per_line_left ? -(long long)bytes_per_line_left - 3 : -DUMP_BYTES_PER_LINE - 3] = '|';

            dump_pos = plain_pos + 1 - 3;
            plain_pos = dump_pos + DUMP_BYTES_PER_LINE * 3 + DUMP_BYTES_PER_LINE / DUMP_BYTES_GROUP + 5;
        }
        else if ((i + 1) % DUMP_BYTES_GROUP == 0) {
            ++dump_pos;
        }
    }

    return result;
}