C: 为 hexdump 格式化字符串(char* 到另一个 char*)
C: formatting strings for hexdump (char* to another char*)
我想将一个 char*
指针的 hexdump 写入另一个 char*
。
为此我采取了 this code snippet:
#include <stdio.h>
void DumpHex(const void* data, size_t size) {
char ascii[17];
size_t i, j;
ascii[16] = '[=10=]';
for (i = 0; i < size; ++i) {
printf("%02X ", ((unsigned char*)data)[i]);
if (((unsigned char*)data)[i] >= ' ' && ((unsigned char*)data)[i] <= '~') {
ascii[i % 16] = ((unsigned char*)data)[i];
} else {
ascii[i % 16] = '.';
}
if ((i+1) % 8 == 0 || i+1 == size) {
printf(" ");
if ((i+1) % 16 == 0) {
printf("| %s \n", ascii);
} else if (i+1 == size) {
ascii[(i+1) % 16] = '[=10=]';
if ((i+1) % 16 <= 8) {
printf(" ");
}
for (j = (i+1) % 16; j < 16; ++j) {
printf(" ");
}
printf("| %s \n", ascii);
}
}
}
}
并修改为:
#include <stdio.h>
char* DumpHex2(const void* data, size_t size) {
const int symbolSize = 100;
char* buffer = calloc(10*size, sizeof(char));
char* symbol = calloc(symbolSize, sizeof(char));
char ascii[17];
size_t i, j;
ascii[16] = '[=11=]';
for (i = 0; i < size; ++i) {
snprintf(symbol, symbolSize, "%02X ", ((unsigned char*)data)[i]);
strcat(buffer, symbol);
memset(symbol,0,strlen(symbol));
if (((unsigned char*)data)[i] >= ' ' && ((unsigned char*)data)[i] <= '~') {
ascii[i % 16] = ((unsigned char*)data)[i];
} else {
ascii[i % 16] = '.';
}
if ((i+1) % 8 == 0 || i+1 == size) {
strcat(buffer, " ");
if ((i+1) % 16 == 0) {
snprintf(symbol, symbolSize, "| %s \n", ascii);
strcat(buffer, symbol);
memset(symbol,0,strlen(symbol));
} else if (i+1 == size) {
ascii[(i+1) % 16] = '[=11=]';
if ((i+1) % 16 <= 8) {
strcat(buffer, " ");
}
for (j = (i+1) % 16; j < 16; ++j) {
strcat(buffer, " ");
}
snprintf(symbol, symbolSize, "| %s \n", ascii);
strcat(buffer, symbol);
memset(symbol,0,strlen(symbol));
}
}
}
free(symbol);
return buffer;
}
有效并且returns输出相同:
int main(int argc, char **argv) {
char* text = "Hello World! é";
DumpHex(text, strlen(text));
char* dump = DumpHex2(text, strlen(text));
printf("%s", dump);
free(dump);
return EXIT_SUCCESS;
}
输出:
48 65 6C 6C 6F 20 57 6F 72 6C 64 21 20 C3 A9 | Hello World! ..
48 65 6C 6C 6F 20 57 6F 72 6C 64 21 20 C3 A9 | Hello World! ..
然而我的修改,即:
snprintf(symbol, symbolSize, "| %s \n", ascii);
strcat(buffer, symbol);
memset(symbol,0,strlen(symbol));
我觉得很糟糕(我是 C 语言的新手)。有没有一种方法可以更轻松地格式化和附加字符串?
您不能对未初始化的数据使用 strlen()
:
char* buffer = malloc(1000000);
memset(buffer,0,strlen(buffer));
strlen()
无法找出分配的内存大小,因为它依赖于终止空字符(0
、'[=17=]'
),这可能会或可能不在缓冲区指向的内存中的某个地方。或者指定在 memset()
:
中分配的内存大小
memset(buffer, 0, 1000000);
或使用 calloc()
将分配的内存初始化为零:
char buffer = calloc(1000000, sizeof(char)); // or calloc(1000000, 1) since sizeof(char) is 1 by definition.
您的代码中可能还有其他问题。例如,您在 main()
中调用 DumpHex2()
两次 但永远不会释放函数分配的内存。分配给 symbol
的内存也泄漏了。
如果您更新您的问题以包含您希望 DumpHex2()
生成的文本的确切格式,将会更容易回答。
您应该使用 isprint()
来确定字符是否可打印。
更短且恕我直言更易于阅读和理解:
#include <ctype.h> // isprint()
#include <stddef.h> // size_t
#include <stdlib.h> // malloc(), free()
#include <string.h> // strcat()
#include <stdio.h> // sprintf()
enum {
DUMP_BYTES_PER_LINE = 16,
DUMP_BYTES_GROUP = 8,
DUMP_CHARS_PER_LINE = DUMP_BYTES_PER_LINE * 4 + DUMP_BYTES_PER_LINE / DUMP_BYTES_GROUP + 4
};
char* DumpHex(const void* data, size_t size)
{
size_t const num_lines = size / DUMP_BYTES_PER_LINE + ((size % DUMP_BYTES_PER_LINE) > 0);
size_t const result_length = num_lines * DUMP_CHARS_PER_LINE;
char *result = malloc((result_length + 1) * sizeof(*result));
if (!result)
return NULL;
memset(result, ' ', result_length);
result[result_length] = '[=13=]';
char *dump_pos = result;
char *plain_pos = result + DUMP_BYTES_PER_LINE * 3 + DUMP_BYTES_PER_LINE / DUMP_BYTES_GROUP + 3;
char unsigned const *src = data;
for (size_t i = 0; i < size; ++i, dump_pos += 3, ++plain_pos) {
sprintf(dump_pos, "%02x ", (int)src[i]);
dump_pos[3] = ' ';
*plain_pos = isprint(src[i]) ? src[i] : '.';
if ((i + 1) % DUMP_BYTES_PER_LINE == 0 || i + 1 == size) {
*++plain_pos = '\n';
size_t const bytes_per_line_left = (i + 1) % DUMP_BYTES_PER_LINE;
plain_pos[bytes_per_line_left ? -(long long)bytes_per_line_left - 3 : -DUMP_BYTES_PER_LINE - 3] = '|';
dump_pos = plain_pos + 1 - 3;
plain_pos = dump_pos + DUMP_BYTES_PER_LINE * 3 + DUMP_BYTES_PER_LINE / DUMP_BYTES_GROUP + 5;
}
else if ((i + 1) % DUMP_BYTES_GROUP == 0) {
++dump_pos;
}
}
return result;
}
我想将一个 char*
指针的 hexdump 写入另一个 char*
。
为此我采取了 this code snippet:
#include <stdio.h>
void DumpHex(const void* data, size_t size) {
char ascii[17];
size_t i, j;
ascii[16] = '[=10=]';
for (i = 0; i < size; ++i) {
printf("%02X ", ((unsigned char*)data)[i]);
if (((unsigned char*)data)[i] >= ' ' && ((unsigned char*)data)[i] <= '~') {
ascii[i % 16] = ((unsigned char*)data)[i];
} else {
ascii[i % 16] = '.';
}
if ((i+1) % 8 == 0 || i+1 == size) {
printf(" ");
if ((i+1) % 16 == 0) {
printf("| %s \n", ascii);
} else if (i+1 == size) {
ascii[(i+1) % 16] = '[=10=]';
if ((i+1) % 16 <= 8) {
printf(" ");
}
for (j = (i+1) % 16; j < 16; ++j) {
printf(" ");
}
printf("| %s \n", ascii);
}
}
}
}
并修改为:
#include <stdio.h>
char* DumpHex2(const void* data, size_t size) {
const int symbolSize = 100;
char* buffer = calloc(10*size, sizeof(char));
char* symbol = calloc(symbolSize, sizeof(char));
char ascii[17];
size_t i, j;
ascii[16] = '[=11=]';
for (i = 0; i < size; ++i) {
snprintf(symbol, symbolSize, "%02X ", ((unsigned char*)data)[i]);
strcat(buffer, symbol);
memset(symbol,0,strlen(symbol));
if (((unsigned char*)data)[i] >= ' ' && ((unsigned char*)data)[i] <= '~') {
ascii[i % 16] = ((unsigned char*)data)[i];
} else {
ascii[i % 16] = '.';
}
if ((i+1) % 8 == 0 || i+1 == size) {
strcat(buffer, " ");
if ((i+1) % 16 == 0) {
snprintf(symbol, symbolSize, "| %s \n", ascii);
strcat(buffer, symbol);
memset(symbol,0,strlen(symbol));
} else if (i+1 == size) {
ascii[(i+1) % 16] = '[=11=]';
if ((i+1) % 16 <= 8) {
strcat(buffer, " ");
}
for (j = (i+1) % 16; j < 16; ++j) {
strcat(buffer, " ");
}
snprintf(symbol, symbolSize, "| %s \n", ascii);
strcat(buffer, symbol);
memset(symbol,0,strlen(symbol));
}
}
}
free(symbol);
return buffer;
}
有效并且returns输出相同:
int main(int argc, char **argv) {
char* text = "Hello World! é";
DumpHex(text, strlen(text));
char* dump = DumpHex2(text, strlen(text));
printf("%s", dump);
free(dump);
return EXIT_SUCCESS;
}
输出:
48 65 6C 6C 6F 20 57 6F 72 6C 64 21 20 C3 A9 | Hello World! ..
48 65 6C 6C 6F 20 57 6F 72 6C 64 21 20 C3 A9 | Hello World! ..
然而我的修改,即:
snprintf(symbol, symbolSize, "| %s \n", ascii);
strcat(buffer, symbol);
memset(symbol,0,strlen(symbol));
我觉得很糟糕(我是 C 语言的新手)。有没有一种方法可以更轻松地格式化和附加字符串?
您不能对未初始化的数据使用 strlen()
:
char* buffer = malloc(1000000); memset(buffer,0,strlen(buffer));
strlen()
无法找出分配的内存大小,因为它依赖于终止空字符(0
、'[=17=]'
),这可能会或可能不在缓冲区指向的内存中的某个地方。或者指定在 memset()
:
memset(buffer, 0, 1000000);
或使用 calloc()
将分配的内存初始化为零:
char buffer = calloc(1000000, sizeof(char)); // or calloc(1000000, 1) since sizeof(char) is 1 by definition.
您的代码中可能还有其他问题。例如,您在 main()
中调用 DumpHex2()
两次 但永远不会释放函数分配的内存。分配给 symbol
的内存也泄漏了。
如果您更新您的问题以包含您希望 DumpHex2()
生成的文本的确切格式,将会更容易回答。
您应该使用 isprint()
来确定字符是否可打印。
更短且恕我直言更易于阅读和理解:
#include <ctype.h> // isprint()
#include <stddef.h> // size_t
#include <stdlib.h> // malloc(), free()
#include <string.h> // strcat()
#include <stdio.h> // sprintf()
enum {
DUMP_BYTES_PER_LINE = 16,
DUMP_BYTES_GROUP = 8,
DUMP_CHARS_PER_LINE = DUMP_BYTES_PER_LINE * 4 + DUMP_BYTES_PER_LINE / DUMP_BYTES_GROUP + 4
};
char* DumpHex(const void* data, size_t size)
{
size_t const num_lines = size / DUMP_BYTES_PER_LINE + ((size % DUMP_BYTES_PER_LINE) > 0);
size_t const result_length = num_lines * DUMP_CHARS_PER_LINE;
char *result = malloc((result_length + 1) * sizeof(*result));
if (!result)
return NULL;
memset(result, ' ', result_length);
result[result_length] = '[=13=]';
char *dump_pos = result;
char *plain_pos = result + DUMP_BYTES_PER_LINE * 3 + DUMP_BYTES_PER_LINE / DUMP_BYTES_GROUP + 3;
char unsigned const *src = data;
for (size_t i = 0; i < size; ++i, dump_pos += 3, ++plain_pos) {
sprintf(dump_pos, "%02x ", (int)src[i]);
dump_pos[3] = ' ';
*plain_pos = isprint(src[i]) ? src[i] : '.';
if ((i + 1) % DUMP_BYTES_PER_LINE == 0 || i + 1 == size) {
*++plain_pos = '\n';
size_t const bytes_per_line_left = (i + 1) % DUMP_BYTES_PER_LINE;
plain_pos[bytes_per_line_left ? -(long long)bytes_per_line_left - 3 : -DUMP_BYTES_PER_LINE - 3] = '|';
dump_pos = plain_pos + 1 - 3;
plain_pos = dump_pos + DUMP_BYTES_PER_LINE * 3 + DUMP_BYTES_PER_LINE / DUMP_BYTES_GROUP + 5;
}
else if ((i + 1) % DUMP_BYTES_GROUP == 0) {
++dump_pos;
}
}
return result;
}