读取二进制文件并确定文件类型的程序
Program to read binary and determine filetype
我写了这段代码,它读取二进制文件并确定其文件类型(用于一些测试文件结尾)。它适用于 PDF、MP3,但不适用于 jpg。
问题是什么?对于 jpg,行 printf("%s[%d]: %x\n", "Buffer", j, buffer[j]);
显示了多个字节(即 ffffff 而不是一个字节)
#include <stdio.h>
const int header[6][8] = {
{0x89,0x50,0x4E,0x47,0x0D,0x0A,0x1A,0x0A},
{0xFF,0xD8,0x00,0x00,0x00,0x00,0x00,0x00},
{0xFF,0xFB,0x00,0x00,0x00,0x00,0x00,0x00},
{0x49,0x44,0x33,0x00,0x00,0x00,0x00,0x00},
{0x25,0x50,0x44,0x46,0x2D,0x00,0x00,0x00},
{0x42,0x4C,0x45,0x4E,0x44,0x45,0x52,0x00}
};
const char* filetype[6] = {"PNG","JPG","MP3","MP3v2","PDF","Blender"};
int main()
{
FILE *fd;
char buffer[8];
if ((fd = fopen("C:\Users\***\Desktop\Unnamed.jpg", "rb")) == NULL) {
return -1;
}
//fread(buffer, sizeof(char), 8, fd);
fread(buffer, sizeof(buffer), 1, fd);
for (int i = 0; i < 6; i++) {
for(int j = 0; j < 8; j++){
printf("%s[%d][%d]: %x\n","Header",i,j,header[i][j]);
printf("%s[%d]: %x\n", "Buffer", j, buffer[j]);
if (header[i][j] == 0x00) {
printf("%s: %s","Found file type",filetype[i]);
return 1;
}
if (header[i][j] != buffer[j]) {
break;
}
}
}
printf("%s", "Couldn't determine filetype - Not in library");
return 0;
}
您忘记创建缓冲区 unsigned char
,因此您对值进行了符号扩展。
显然,内置魔术签名的 table 也应该是 const unsigned char
,并且应该通过单个 memcmp()
调用进行比较。
使用 unsigned char
表示 buffer
(您也可以表示 header
)以避免出现问题,如果您的 char
已签名并给出负值(您与 int 就像 0x89,其中设置了第 7 位)
PNG 也有问题,因为 PNG 的值为:
{0x89,0x50,0x4E,0x47,0x0D,0x0A,0x1A,0x0A},
不像所有其他情况那样以 0 结尾,这是必需的,因为您的算法需要找到 0 以表明您找到了:
if (header[i][j] == 0x00) {
printf("%s: %s","Found file type",filetype[i]);
return 1;
}
只需为 PNG 添加一列也有 0。
最后:
const unsigned char header[6][9] = {
{0x89,0x50,0x4E,0x47,0x0D,0x0A,0x1A,0x0A, 0x00},
{0xFF,0xD8,0x00,0x00,0x00,0x00,0x00,0x00, 0x00},
{0xFF,0xFB,0x00,0x00,0x00,0x00,0x00,0x00, 0x00},
{0x49,0x44,0x33,0x00,0x00,0x00,0x00,0x00, 0x00},
{0x25,0x50,0x44,0x46,0x2D,0x00,0x00,0x00, 0x00},
{0x42,0x4C,0x45,0x4E,0x44,0x45,0x52,0x00, 0x00}
};
const char* filetype[6] = {"PNG","JPG","MP3","MP3v2","PDF","Blender"};
int main()
{
FILE *fd;
unsigned char buffer[sizeof(header[0])];
if ((fd = fopen("C:\Users\***\Desktop\Unnamed.jpg", "rb")) == NULL) {
return -1;
}
fread(buffer, sizeof(buffer), 1, fd);
for (int i = 0; i < ; i++) {
for(int j = 0; j < sizeof(header[0]); j++){
printf("%s[%d][%d]: %x\n","Header",i,j,header[i][j]);
printf("%s[%d]: %x\n", "Buffer", j, buffer[j]);
if (header[i][j] == 0x00) {
printf("%s: %s","Found file type",filetype[i]);
return 1;
}
if (header[i][j] != buffer[j]) {
break;
}
}
}
printf("%s", "Couldn't determine filetype - Not in library");
return 0;
}
我写了这段代码,它读取二进制文件并确定其文件类型(用于一些测试文件结尾)。它适用于 PDF、MP3,但不适用于 jpg。
问题是什么?对于 jpg,行 printf("%s[%d]: %x\n", "Buffer", j, buffer[j]);
显示了多个字节(即 ffffff 而不是一个字节)
#include <stdio.h>
const int header[6][8] = {
{0x89,0x50,0x4E,0x47,0x0D,0x0A,0x1A,0x0A},
{0xFF,0xD8,0x00,0x00,0x00,0x00,0x00,0x00},
{0xFF,0xFB,0x00,0x00,0x00,0x00,0x00,0x00},
{0x49,0x44,0x33,0x00,0x00,0x00,0x00,0x00},
{0x25,0x50,0x44,0x46,0x2D,0x00,0x00,0x00},
{0x42,0x4C,0x45,0x4E,0x44,0x45,0x52,0x00}
};
const char* filetype[6] = {"PNG","JPG","MP3","MP3v2","PDF","Blender"};
int main()
{
FILE *fd;
char buffer[8];
if ((fd = fopen("C:\Users\***\Desktop\Unnamed.jpg", "rb")) == NULL) {
return -1;
}
//fread(buffer, sizeof(char), 8, fd);
fread(buffer, sizeof(buffer), 1, fd);
for (int i = 0; i < 6; i++) {
for(int j = 0; j < 8; j++){
printf("%s[%d][%d]: %x\n","Header",i,j,header[i][j]);
printf("%s[%d]: %x\n", "Buffer", j, buffer[j]);
if (header[i][j] == 0x00) {
printf("%s: %s","Found file type",filetype[i]);
return 1;
}
if (header[i][j] != buffer[j]) {
break;
}
}
}
printf("%s", "Couldn't determine filetype - Not in library");
return 0;
}
您忘记创建缓冲区 unsigned char
,因此您对值进行了符号扩展。
显然,内置魔术签名的 table 也应该是 const unsigned char
,并且应该通过单个 memcmp()
调用进行比较。
使用 unsigned char
表示 buffer
(您也可以表示 header
)以避免出现问题,如果您的 char
已签名并给出负值(您与 int 就像 0x89,其中设置了第 7 位)
PNG 也有问题,因为 PNG 的值为:
{0x89,0x50,0x4E,0x47,0x0D,0x0A,0x1A,0x0A},
不像所有其他情况那样以 0 结尾,这是必需的,因为您的算法需要找到 0 以表明您找到了:
if (header[i][j] == 0x00) {
printf("%s: %s","Found file type",filetype[i]);
return 1;
}
只需为 PNG 添加一列也有 0。
最后:
const unsigned char header[6][9] = {
{0x89,0x50,0x4E,0x47,0x0D,0x0A,0x1A,0x0A, 0x00},
{0xFF,0xD8,0x00,0x00,0x00,0x00,0x00,0x00, 0x00},
{0xFF,0xFB,0x00,0x00,0x00,0x00,0x00,0x00, 0x00},
{0x49,0x44,0x33,0x00,0x00,0x00,0x00,0x00, 0x00},
{0x25,0x50,0x44,0x46,0x2D,0x00,0x00,0x00, 0x00},
{0x42,0x4C,0x45,0x4E,0x44,0x45,0x52,0x00, 0x00}
};
const char* filetype[6] = {"PNG","JPG","MP3","MP3v2","PDF","Blender"};
int main()
{
FILE *fd;
unsigned char buffer[sizeof(header[0])];
if ((fd = fopen("C:\Users\***\Desktop\Unnamed.jpg", "rb")) == NULL) {
return -1;
}
fread(buffer, sizeof(buffer), 1, fd);
for (int i = 0; i < ; i++) {
for(int j = 0; j < sizeof(header[0]); j++){
printf("%s[%d][%d]: %x\n","Header",i,j,header[i][j]);
printf("%s[%d]: %x\n", "Buffer", j, buffer[j]);
if (header[i][j] == 0x00) {
printf("%s: %s","Found file type",filetype[i]);
return 1;
}
if (header[i][j] != buffer[j]) {
break;
}
}
}
printf("%s", "Couldn't determine filetype - Not in library");
return 0;
}