我的程序计算出的 git 对象的 SHA1 散列与来自 git 的对象不匹配
SHA1 hash of git object calculated by my program not matching the one from git
我正在编写一个 C 程序来复制 git 给出的 SHA1 哈希值,但它给出的哈希值一直在变化,永远不会匹配 git 中的哈希值。我认为我的 SHA1 实现没有任何问题。我尝试用另一个实现替换我的实现,但仍然得到相同的结果。这是我的代码:
sha1.h:
#ifndef SHA1_H
#define SHA1_H
#include <stdint.h>
typedef struct {
uint32_t state[5];
uint32_t count[2];
unsigned char buffer[64];
} SHA1_CTX;
void SHA1Transform(uint32_t state[5], const unsigned char buffer[64]);
void SHA1Init(SHA1_CTX *context);
void SHA1Update(SHA1_CTX *context, const unsigned char *data, uint32_t len);
void SHA1Final(unsigned char digest[20], SHA1_CTX *context);
void SHA1(char *hash_out, const char *str, int len);
#endif /* _SHA1_H */
sha1.c:
#if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_WARNINGS)
#define _CRT_SECURE_NO_WARNINGS
#endif
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sha1.h"
#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
/* blk0() and blk() perform the initial expand. */
#if BYTE_ORDER == LITTLE_ENDIAN
#define blk0(i) (block->l[i] = (rol(block->l[i], 24) & 0xFF00FF00) | (rol(block->l[i], 8) & 0x00FF00FF))
#elif BYTE_ORDER == BIG_ENDIAN
#define blk0(i) block->l[i]
#else
#error "Endianness not defined!"
#endif
#define blk(i) (block->l[i & 15] = rol(block->l[(i + 13) & 15] ^ block->l[(i + 8) & 15] ^ block->l[(i + 2) & 15] ^ block->l[i & 15], 1))
/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
#define R0(v, w, x, y, z, i) \
z += ((w & (x ^ y)) ^ y) + blk0(i) + 0x5A827999 + rol(v, 5); \
w = rol(w, 30);
#define R1(v, w, x, y, z, i) \
z += ((w & (x ^ y)) ^ y) + blk(i) + 0x5A827999 + rol(v, 5); \
w = rol(w, 30);
#define R2(v, w, x, y, z, i) \
z += (w ^ x ^ y) + blk(i) + 0x6ED9EBA1 + rol(v, 5); \
w = rol(w, 30);
#define R3(v, w, x, y, z, i) \
z += (((w | x) & y) | (w & x)) + blk(i) + 0x8F1BBCDC + rol(v, 5); \
w = rol(w, 30);
#define R4(v, w, x, y, z, i) \
z += (w ^ x ^ y) + blk(i) + 0xCA62C1D6 + rol(v, 5); \
w = rol(w, 30);
void SHA1Transform(uint32_t state[5], const unsigned char buffer[64])
{
uint32_t a, b, c, d, e;
typedef union {
unsigned char c[64];
uint32_t l[16];
} CHAR64LONG16;
CHAR64LONG16 block[1]; /* use array to appear as a pointer */
memcpy(block, buffer, 64);
/* Copy context->state[] to working vars */
a = state[0];
b = state[1];
c = state[2];
d = state[3];
e = state[4];
/* 4 rounds of 20 operations each. Loop unrolled. */
R0(a, b, c, d, e, 0);
R0(e, a, b, c, d, 1);
R0(d, e, a, b, c, 2);
R0(c, d, e, a, b, 3);
R0(b, c, d, e, a, 4);
R0(a, b, c, d, e, 5);
R0(e, a, b, c, d, 6);
R0(d, e, a, b, c, 7);
R0(c, d, e, a, b, 8);
R0(b, c, d, e, a, 9);
R0(a, b, c, d, e, 10);
R0(e, a, b, c, d, 11);
R0(d, e, a, b, c, 12);
R0(c, d, e, a, b, 13);
R0(b, c, d, e, a, 14);
R0(a, b, c, d, e, 15);
R1(e, a, b, c, d, 16);
R1(d, e, a, b, c, 17);
R1(c, d, e, a, b, 18);
R1(b, c, d, e, a, 19);
R2(a, b, c, d, e, 20);
R2(e, a, b, c, d, 21);
R2(d, e, a, b, c, 22);
R2(c, d, e, a, b, 23);
R2(b, c, d, e, a, 24);
R2(a, b, c, d, e, 25);
R2(e, a, b, c, d, 26);
R2(d, e, a, b, c, 27);
R2(c, d, e, a, b, 28);
R2(b, c, d, e, a, 29);
R2(a, b, c, d, e, 30);
R2(e, a, b, c, d, 31);
R2(d, e, a, b, c, 32);
R2(c, d, e, a, b, 33);
R2(b, c, d, e, a, 34);
R2(a, b, c, d, e, 35);
R2(e, a, b, c, d, 36);
R2(d, e, a, b, c, 37);
R2(c, d, e, a, b, 38);
R2(b, c, d, e, a, 39);
R3(a, b, c, d, e, 40);
R3(e, a, b, c, d, 41);
R3(d, e, a, b, c, 42);
R3(c, d, e, a, b, 43);
R3(b, c, d, e, a, 44);
R3(a, b, c, d, e, 45);
R3(e, a, b, c, d, 46);
R3(d, e, a, b, c, 47);
R3(c, d, e, a, b, 48);
R3(b, c, d, e, a, 49);
R3(a, b, c, d, e, 50);
R3(e, a, b, c, d, 51);
R3(d, e, a, b, c, 52);
R3(c, d, e, a, b, 53);
R3(b, c, d, e, a, 54);
R3(a, b, c, d, e, 55);
R3(e, a, b, c, d, 56);
R3(d, e, a, b, c, 57);
R3(c, d, e, a, b, 58);
R3(b, c, d, e, a, 59);
R4(a, b, c, d, e, 60);
R4(e, a, b, c, d, 61);
R4(d, e, a, b, c, 62);
R4(c, d, e, a, b, 63);
R4(b, c, d, e, a, 64);
R4(a, b, c, d, e, 65);
R4(e, a, b, c, d, 66);
R4(d, e, a, b, c, 67);
R4(c, d, e, a, b, 68);
R4(b, c, d, e, a, 69);
R4(a, b, c, d, e, 70);
R4(e, a, b, c, d, 71);
R4(d, e, a, b, c, 72);
R4(c, d, e, a, b, 73);
R4(b, c, d, e, a, 74);
R4(a, b, c, d, e, 75);
R4(e, a, b, c, d, 76);
R4(d, e, a, b, c, 77);
R4(c, d, e, a, b, 78);
R4(b, c, d, e, a, 79);
/* Add the working vars back into context.state[] */
state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
state[4] += e;
/* Wipe variables */
a = b = c = d = e = 0;
memset(block, '[=13=]', sizeof(block));
}
void SHA1Init(SHA1_CTX *context)
{
/* SHA1 initialization constants */
context->state[0] = 0x67452301;
context->state[1] = 0xEFCDAB89;
context->state[2] = 0x98BADCFE;
context->state[3] = 0x10325476;
context->state[4] = 0xC3D2E1F0;
context->count[0] = context->count[1] = 0;
}
void SHA1Update(SHA1_CTX *context, const unsigned char *data, uint32_t len)
{
uint32_t i;
uint32_t j;
j = context->count[0];
if ((context->count[0] += len << 3) < j)
context->count[1]++;
context->count[1] += (len >> 29);
j = (j >> 3) & 63;
if ((j + len) > 63) {
memcpy(&context->buffer[j], data, (i = 64 - j));
SHA1Transform(context->state, context->buffer);
for (; i + 63 < len; i += 64) {
SHA1Transform(context->state, &data[i]);
}
j = 0;
} else {
i = 0;
}
memcpy(&context->buffer[j], &data[i], len - i);
}
void SHA1Final(unsigned char digest[20], SHA1_CTX *context)
{
unsigned i;
unsigned char finalcount[8];
unsigned char c;
for (i = 0; i < 8; i++)
finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)] >> ((3 - (i & 3)) * 8)) & 255); /* Endian independent */
c = 0200;
SHA1Update(context, &c, 1);
while ((context->count[0] & 504) != 448) {
c = 0000;
SHA1Update(context, &c, 1);
}
SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */
for (i = 0; i < 20; i++)
digest[i] = (unsigned char)((context->state[i >> 2] >> ((3 - (i & 3)) * 8)) & 255);
/* Wipe variables */
memset(context, '[=13=]', sizeof(*context));
memset(&finalcount, '[=13=]', sizeof(finalcount));
}
void SHA1(char *hash_out, const char *str, int len)
{
SHA1_CTX ctx;
unsigned char *hash = malloc(20);
SHA1Init(&ctx);
for (int i = 0; i < len; i += 1)
SHA1Update(&ctx, (const unsigned char *)str + i, 1);
SHA1Final(hash, &ctx);
/* Convert to hex */
for (int i = 0; i < 20; i++)
sprintf(hash_out + i * 2, "%02x", hash[i]);
}
对象哈希函数:
char *object_hash(struct object *object)
{
/* Get size of object */
int size = object->size;
/* Size to string */
char *sizeStr = malloc(20);
sprintf(sizeStr, "%d", size);
size += strlen(objectTypeStrings[object->type]);
size += strlen(sizeStr);
size += 2;
char *format = malloc(size + 1);
sprintf(format, "%s %s[=14=]%s", objectTypeStrings[object->type], sizeStr, object->data);
/* Get hash of object */
char *hash = calloc(41, sizeof(char));
SHA1(hash, format, size);
free(format);
free(sizeStr);
return hash;
}
其他相关内容:
enum objectType {
commit,
tree,
blob,
tag
};
struct object {
enum objectType type;
int size;
char *data;
};
char *objectTypeStrings[] = {
"commit",
"tree",
"blob",
"tag"
};
你的格式字符串中有一个文字 NUL ('[=10=]'
),所以你的最后一个参数(数据)没有被添加(因为 C 字符串在第一个 NUL 结束)。
如果你想 sprintf
附加 NUL,你必须使用 %c
并将 '[=10=]'
作为参数传递(尽管 sprintf
已经把字符串末尾的空值,您实际上不需要)。
您还应注意,任意文件的数据中可能包含 NUL 字符,当作为 %s
传递时,这些字符将被 sprintf
截断。
我建议您 memcpy
您的数据,而不是为此目的使用 sprintf
。
(此外,您可以避免在尺寸字符串上调用 strlen
,因为 sprintf
returns 打印的字符数,不包括终止 NUL)
我还要补充一点,如@Nickolay Olshevsky 所示,您实际上不需要将数据复制到同一个缓冲区中,您只需将哈希值与 header 一起提供,然后是数据。
更可靠的解决方案是对数据使用 SHA1Init() 和后续 SHA1Update() 调用,以 SHA1Finish() 结束。当前的方法会很慢,因为在 SHA1() 函数调用中逐字节散列数据。
IE。类似于以下内容:
char *object_hash(struct object *object)
{
SHA1_CTX ctx;
SHA1Init(&ctx);
/* Size to string */
char sizeStr[20] = {0};
snprintf(sizeStr, sizeof(sizeStr), "%d", object->size);
SHA1Update(&ctx, (const uint8_t *) objectTypeStrings[object->type], strlen(objectTypeStrings[object->type]);
SHA1Update(&ctx, (const uint8_t *) " ", 1);
SHA1Update(&ctx, (const uint8_t *) sizeStr, strlen(sizeStr));
SHA1Update(&ctx, (const uint8_t *) "", 1);
SHA1Update(&ctx, (const uint8_t *) object->data, object->size);
/* Get hash of object */
unsigned char hash[20] = {0};
SHA1Final(hash, &ctx);
/* Convert to hex */
char *hash_out = calloc(41, sizeof(char));
for (size_t i = 0; i < 20; i++)
sprintf(hash_out + i * 2, "%02x", hash[i]);
return hash_out;
}
我正在编写一个 C 程序来复制 git 给出的 SHA1 哈希值,但它给出的哈希值一直在变化,永远不会匹配 git 中的哈希值。我认为我的 SHA1 实现没有任何问题。我尝试用另一个实现替换我的实现,但仍然得到相同的结果。这是我的代码:
sha1.h:
#ifndef SHA1_H
#define SHA1_H
#include <stdint.h>
typedef struct {
uint32_t state[5];
uint32_t count[2];
unsigned char buffer[64];
} SHA1_CTX;
void SHA1Transform(uint32_t state[5], const unsigned char buffer[64]);
void SHA1Init(SHA1_CTX *context);
void SHA1Update(SHA1_CTX *context, const unsigned char *data, uint32_t len);
void SHA1Final(unsigned char digest[20], SHA1_CTX *context);
void SHA1(char *hash_out, const char *str, int len);
#endif /* _SHA1_H */
sha1.c:
#if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_WARNINGS)
#define _CRT_SECURE_NO_WARNINGS
#endif
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sha1.h"
#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
/* blk0() and blk() perform the initial expand. */
#if BYTE_ORDER == LITTLE_ENDIAN
#define blk0(i) (block->l[i] = (rol(block->l[i], 24) & 0xFF00FF00) | (rol(block->l[i], 8) & 0x00FF00FF))
#elif BYTE_ORDER == BIG_ENDIAN
#define blk0(i) block->l[i]
#else
#error "Endianness not defined!"
#endif
#define blk(i) (block->l[i & 15] = rol(block->l[(i + 13) & 15] ^ block->l[(i + 8) & 15] ^ block->l[(i + 2) & 15] ^ block->l[i & 15], 1))
/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
#define R0(v, w, x, y, z, i) \
z += ((w & (x ^ y)) ^ y) + blk0(i) + 0x5A827999 + rol(v, 5); \
w = rol(w, 30);
#define R1(v, w, x, y, z, i) \
z += ((w & (x ^ y)) ^ y) + blk(i) + 0x5A827999 + rol(v, 5); \
w = rol(w, 30);
#define R2(v, w, x, y, z, i) \
z += (w ^ x ^ y) + blk(i) + 0x6ED9EBA1 + rol(v, 5); \
w = rol(w, 30);
#define R3(v, w, x, y, z, i) \
z += (((w | x) & y) | (w & x)) + blk(i) + 0x8F1BBCDC + rol(v, 5); \
w = rol(w, 30);
#define R4(v, w, x, y, z, i) \
z += (w ^ x ^ y) + blk(i) + 0xCA62C1D6 + rol(v, 5); \
w = rol(w, 30);
void SHA1Transform(uint32_t state[5], const unsigned char buffer[64])
{
uint32_t a, b, c, d, e;
typedef union {
unsigned char c[64];
uint32_t l[16];
} CHAR64LONG16;
CHAR64LONG16 block[1]; /* use array to appear as a pointer */
memcpy(block, buffer, 64);
/* Copy context->state[] to working vars */
a = state[0];
b = state[1];
c = state[2];
d = state[3];
e = state[4];
/* 4 rounds of 20 operations each. Loop unrolled. */
R0(a, b, c, d, e, 0);
R0(e, a, b, c, d, 1);
R0(d, e, a, b, c, 2);
R0(c, d, e, a, b, 3);
R0(b, c, d, e, a, 4);
R0(a, b, c, d, e, 5);
R0(e, a, b, c, d, 6);
R0(d, e, a, b, c, 7);
R0(c, d, e, a, b, 8);
R0(b, c, d, e, a, 9);
R0(a, b, c, d, e, 10);
R0(e, a, b, c, d, 11);
R0(d, e, a, b, c, 12);
R0(c, d, e, a, b, 13);
R0(b, c, d, e, a, 14);
R0(a, b, c, d, e, 15);
R1(e, a, b, c, d, 16);
R1(d, e, a, b, c, 17);
R1(c, d, e, a, b, 18);
R1(b, c, d, e, a, 19);
R2(a, b, c, d, e, 20);
R2(e, a, b, c, d, 21);
R2(d, e, a, b, c, 22);
R2(c, d, e, a, b, 23);
R2(b, c, d, e, a, 24);
R2(a, b, c, d, e, 25);
R2(e, a, b, c, d, 26);
R2(d, e, a, b, c, 27);
R2(c, d, e, a, b, 28);
R2(b, c, d, e, a, 29);
R2(a, b, c, d, e, 30);
R2(e, a, b, c, d, 31);
R2(d, e, a, b, c, 32);
R2(c, d, e, a, b, 33);
R2(b, c, d, e, a, 34);
R2(a, b, c, d, e, 35);
R2(e, a, b, c, d, 36);
R2(d, e, a, b, c, 37);
R2(c, d, e, a, b, 38);
R2(b, c, d, e, a, 39);
R3(a, b, c, d, e, 40);
R3(e, a, b, c, d, 41);
R3(d, e, a, b, c, 42);
R3(c, d, e, a, b, 43);
R3(b, c, d, e, a, 44);
R3(a, b, c, d, e, 45);
R3(e, a, b, c, d, 46);
R3(d, e, a, b, c, 47);
R3(c, d, e, a, b, 48);
R3(b, c, d, e, a, 49);
R3(a, b, c, d, e, 50);
R3(e, a, b, c, d, 51);
R3(d, e, a, b, c, 52);
R3(c, d, e, a, b, 53);
R3(b, c, d, e, a, 54);
R3(a, b, c, d, e, 55);
R3(e, a, b, c, d, 56);
R3(d, e, a, b, c, 57);
R3(c, d, e, a, b, 58);
R3(b, c, d, e, a, 59);
R4(a, b, c, d, e, 60);
R4(e, a, b, c, d, 61);
R4(d, e, a, b, c, 62);
R4(c, d, e, a, b, 63);
R4(b, c, d, e, a, 64);
R4(a, b, c, d, e, 65);
R4(e, a, b, c, d, 66);
R4(d, e, a, b, c, 67);
R4(c, d, e, a, b, 68);
R4(b, c, d, e, a, 69);
R4(a, b, c, d, e, 70);
R4(e, a, b, c, d, 71);
R4(d, e, a, b, c, 72);
R4(c, d, e, a, b, 73);
R4(b, c, d, e, a, 74);
R4(a, b, c, d, e, 75);
R4(e, a, b, c, d, 76);
R4(d, e, a, b, c, 77);
R4(c, d, e, a, b, 78);
R4(b, c, d, e, a, 79);
/* Add the working vars back into context.state[] */
state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
state[4] += e;
/* Wipe variables */
a = b = c = d = e = 0;
memset(block, '[=13=]', sizeof(block));
}
void SHA1Init(SHA1_CTX *context)
{
/* SHA1 initialization constants */
context->state[0] = 0x67452301;
context->state[1] = 0xEFCDAB89;
context->state[2] = 0x98BADCFE;
context->state[3] = 0x10325476;
context->state[4] = 0xC3D2E1F0;
context->count[0] = context->count[1] = 0;
}
void SHA1Update(SHA1_CTX *context, const unsigned char *data, uint32_t len)
{
uint32_t i;
uint32_t j;
j = context->count[0];
if ((context->count[0] += len << 3) < j)
context->count[1]++;
context->count[1] += (len >> 29);
j = (j >> 3) & 63;
if ((j + len) > 63) {
memcpy(&context->buffer[j], data, (i = 64 - j));
SHA1Transform(context->state, context->buffer);
for (; i + 63 < len; i += 64) {
SHA1Transform(context->state, &data[i]);
}
j = 0;
} else {
i = 0;
}
memcpy(&context->buffer[j], &data[i], len - i);
}
void SHA1Final(unsigned char digest[20], SHA1_CTX *context)
{
unsigned i;
unsigned char finalcount[8];
unsigned char c;
for (i = 0; i < 8; i++)
finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)] >> ((3 - (i & 3)) * 8)) & 255); /* Endian independent */
c = 0200;
SHA1Update(context, &c, 1);
while ((context->count[0] & 504) != 448) {
c = 0000;
SHA1Update(context, &c, 1);
}
SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */
for (i = 0; i < 20; i++)
digest[i] = (unsigned char)((context->state[i >> 2] >> ((3 - (i & 3)) * 8)) & 255);
/* Wipe variables */
memset(context, '[=13=]', sizeof(*context));
memset(&finalcount, '[=13=]', sizeof(finalcount));
}
void SHA1(char *hash_out, const char *str, int len)
{
SHA1_CTX ctx;
unsigned char *hash = malloc(20);
SHA1Init(&ctx);
for (int i = 0; i < len; i += 1)
SHA1Update(&ctx, (const unsigned char *)str + i, 1);
SHA1Final(hash, &ctx);
/* Convert to hex */
for (int i = 0; i < 20; i++)
sprintf(hash_out + i * 2, "%02x", hash[i]);
}
对象哈希函数:
char *object_hash(struct object *object)
{
/* Get size of object */
int size = object->size;
/* Size to string */
char *sizeStr = malloc(20);
sprintf(sizeStr, "%d", size);
size += strlen(objectTypeStrings[object->type]);
size += strlen(sizeStr);
size += 2;
char *format = malloc(size + 1);
sprintf(format, "%s %s[=14=]%s", objectTypeStrings[object->type], sizeStr, object->data);
/* Get hash of object */
char *hash = calloc(41, sizeof(char));
SHA1(hash, format, size);
free(format);
free(sizeStr);
return hash;
}
其他相关内容:
enum objectType {
commit,
tree,
blob,
tag
};
struct object {
enum objectType type;
int size;
char *data;
};
char *objectTypeStrings[] = {
"commit",
"tree",
"blob",
"tag"
};
你的格式字符串中有一个文字 NUL ('[=10=]'
),所以你的最后一个参数(数据)没有被添加(因为 C 字符串在第一个 NUL 结束)。
如果你想 sprintf
附加 NUL,你必须使用 %c
并将 '[=10=]'
作为参数传递(尽管 sprintf
已经把字符串末尾的空值,您实际上不需要)。
您还应注意,任意文件的数据中可能包含 NUL 字符,当作为 %s
传递时,这些字符将被 sprintf
截断。
我建议您 memcpy
您的数据,而不是为此目的使用 sprintf
。
(此外,您可以避免在尺寸字符串上调用 strlen
,因为 sprintf
returns 打印的字符数,不包括终止 NUL)
我还要补充一点,如@Nickolay Olshevsky 所示,您实际上不需要将数据复制到同一个缓冲区中,您只需将哈希值与 header 一起提供,然后是数据。
更可靠的解决方案是对数据使用 SHA1Init() 和后续 SHA1Update() 调用,以 SHA1Finish() 结束。当前的方法会很慢,因为在 SHA1() 函数调用中逐字节散列数据。 IE。类似于以下内容:
char *object_hash(struct object *object)
{
SHA1_CTX ctx;
SHA1Init(&ctx);
/* Size to string */
char sizeStr[20] = {0};
snprintf(sizeStr, sizeof(sizeStr), "%d", object->size);
SHA1Update(&ctx, (const uint8_t *) objectTypeStrings[object->type], strlen(objectTypeStrings[object->type]);
SHA1Update(&ctx, (const uint8_t *) " ", 1);
SHA1Update(&ctx, (const uint8_t *) sizeStr, strlen(sizeStr));
SHA1Update(&ctx, (const uint8_t *) "", 1);
SHA1Update(&ctx, (const uint8_t *) object->data, object->size);
/* Get hash of object */
unsigned char hash[20] = {0};
SHA1Final(hash, &ctx);
/* Convert to hex */
char *hash_out = calloc(41, sizeof(char));
for (size_t i = 0; i < 20; i++)
sprintf(hash_out + i * 2, "%02x", hash[i]);
return hash_out;
}