我的程序计算出的 git 对象的 SHA1 散列与来自 git 的对象不匹配

Question

我正在编写一个 C 程序来复制 git 给出的 SHA1 哈希值，但它给出的哈希值一直在变化，永远不会匹配 git 中的哈希值。我认为我的 SHA1 实现没有任何问题。我尝试用另一个实现替换我的实现，但仍然得到相同的结果。这是我的代码：

sha1.h:

#ifndef SHA1_H
#define SHA1_H

#include <stdint.h>

typedef struct {
    uint32_t state[5];
    uint32_t count[2];
    unsigned char buffer[64];
} SHA1_CTX;

void SHA1Transform(uint32_t state[5], const unsigned char buffer[64]);

void SHA1Init(SHA1_CTX *context);

void SHA1Update(SHA1_CTX *context, const unsigned char *data, uint32_t len);

void SHA1Final(unsigned char digest[20], SHA1_CTX *context);

void SHA1(char *hash_out, const char *str, int len);

#endif /* _SHA1_H */

sha1.c:

#if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_WARNINGS)
#define _CRT_SECURE_NO_WARNINGS
#endif

#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sha1.h"

#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))

/* blk0() and blk() perform the initial expand. */
#if BYTE_ORDER == LITTLE_ENDIAN
#define blk0(i) (block->l[i] = (rol(block->l[i], 24) & 0xFF00FF00) | (rol(block->l[i], 8) & 0x00FF00FF))
#elif BYTE_ORDER == BIG_ENDIAN
#define blk0(i) block->l[i]
#else
#error "Endianness not defined!"
#endif
#define blk(i) (block->l[i & 15] = rol(block->l[(i + 13) & 15] ^ block->l[(i + 8) & 15] ^ block->l[(i + 2) & 15] ^ block->l[i & 15], 1))

/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
#define R0(v, w, x, y, z, i)                                     \
    z += ((w & (x ^ y)) ^ y) + blk0(i) + 0x5A827999 + rol(v, 5); \
    w = rol(w, 30);
#define R1(v, w, x, y, z, i)                                    \
    z += ((w & (x ^ y)) ^ y) + blk(i) + 0x5A827999 + rol(v, 5); \
    w = rol(w, 30);
#define R2(v, w, x, y, z, i)                            \
    z += (w ^ x ^ y) + blk(i) + 0x6ED9EBA1 + rol(v, 5); \
    w = rol(w, 30);
#define R3(v, w, x, y, z, i)                                          \
    z += (((w | x) & y) | (w & x)) + blk(i) + 0x8F1BBCDC + rol(v, 5); \
    w = rol(w, 30);
#define R4(v, w, x, y, z, i)                            \
    z += (w ^ x ^ y) + blk(i) + 0xCA62C1D6 + rol(v, 5); \
    w = rol(w, 30);

void SHA1Transform(uint32_t state[5], const unsigned char buffer[64])
{
    uint32_t a, b, c, d, e;

    typedef union {
        unsigned char c[64];
        uint32_t l[16];
    } CHAR64LONG16;

    CHAR64LONG16 block[1]; /* use array to appear as a pointer */

    memcpy(block, buffer, 64);
    /* Copy context->state[] to working vars */
    a = state[0];
    b = state[1];
    c = state[2];
    d = state[3];
    e = state[4];
    /* 4 rounds of 20 operations each. Loop unrolled. */
    R0(a, b, c, d, e, 0);
    R0(e, a, b, c, d, 1);
    R0(d, e, a, b, c, 2);
    R0(c, d, e, a, b, 3);
    R0(b, c, d, e, a, 4);
    R0(a, b, c, d, e, 5);
    R0(e, a, b, c, d, 6);
    R0(d, e, a, b, c, 7);
    R0(c, d, e, a, b, 8);
    R0(b, c, d, e, a, 9);
    R0(a, b, c, d, e, 10);
    R0(e, a, b, c, d, 11);
    R0(d, e, a, b, c, 12);
    R0(c, d, e, a, b, 13);
    R0(b, c, d, e, a, 14);
    R0(a, b, c, d, e, 15);
    R1(e, a, b, c, d, 16);
    R1(d, e, a, b, c, 17);
    R1(c, d, e, a, b, 18);
    R1(b, c, d, e, a, 19);
    R2(a, b, c, d, e, 20);
    R2(e, a, b, c, d, 21);
    R2(d, e, a, b, c, 22);
    R2(c, d, e, a, b, 23);
    R2(b, c, d, e, a, 24);
    R2(a, b, c, d, e, 25);
    R2(e, a, b, c, d, 26);
    R2(d, e, a, b, c, 27);
    R2(c, d, e, a, b, 28);
    R2(b, c, d, e, a, 29);
    R2(a, b, c, d, e, 30);
    R2(e, a, b, c, d, 31);
    R2(d, e, a, b, c, 32);
    R2(c, d, e, a, b, 33);
    R2(b, c, d, e, a, 34);
    R2(a, b, c, d, e, 35);
    R2(e, a, b, c, d, 36);
    R2(d, e, a, b, c, 37);
    R2(c, d, e, a, b, 38);
    R2(b, c, d, e, a, 39);
    R3(a, b, c, d, e, 40);
    R3(e, a, b, c, d, 41);
    R3(d, e, a, b, c, 42);
    R3(c, d, e, a, b, 43);
    R3(b, c, d, e, a, 44);
    R3(a, b, c, d, e, 45);
    R3(e, a, b, c, d, 46);
    R3(d, e, a, b, c, 47);
    R3(c, d, e, a, b, 48);
    R3(b, c, d, e, a, 49);
    R3(a, b, c, d, e, 50);
    R3(e, a, b, c, d, 51);
    R3(d, e, a, b, c, 52);
    R3(c, d, e, a, b, 53);
    R3(b, c, d, e, a, 54);
    R3(a, b, c, d, e, 55);
    R3(e, a, b, c, d, 56);
    R3(d, e, a, b, c, 57);
    R3(c, d, e, a, b, 58);
    R3(b, c, d, e, a, 59);
    R4(a, b, c, d, e, 60);
    R4(e, a, b, c, d, 61);
    R4(d, e, a, b, c, 62);
    R4(c, d, e, a, b, 63);
    R4(b, c, d, e, a, 64);
    R4(a, b, c, d, e, 65);
    R4(e, a, b, c, d, 66);
    R4(d, e, a, b, c, 67);
    R4(c, d, e, a, b, 68);
    R4(b, c, d, e, a, 69);
    R4(a, b, c, d, e, 70);
    R4(e, a, b, c, d, 71);
    R4(d, e, a, b, c, 72);
    R4(c, d, e, a, b, 73);
    R4(b, c, d, e, a, 74);
    R4(a, b, c, d, e, 75);
    R4(e, a, b, c, d, 76);
    R4(d, e, a, b, c, 77);
    R4(c, d, e, a, b, 78);
    R4(b, c, d, e, a, 79);
    /* Add the working vars back into context.state[] */
    state[0] += a;
    state[1] += b;
    state[2] += c;
    state[3] += d;
    state[4] += e;
    /* Wipe variables */
    a = b = c = d = e = 0;
    memset(block, '[=13=]', sizeof(block));
}

void SHA1Init(SHA1_CTX *context)
{
    /* SHA1 initialization constants */
    context->state[0] = 0x67452301;
    context->state[1] = 0xEFCDAB89;
    context->state[2] = 0x98BADCFE;
    context->state[3] = 0x10325476;
    context->state[4] = 0xC3D2E1F0;
    context->count[0] = context->count[1] = 0;
}

void SHA1Update(SHA1_CTX *context, const unsigned char *data, uint32_t len)
{
    uint32_t i;

    uint32_t j;

    j = context->count[0];
    if ((context->count[0] += len << 3) < j)
        context->count[1]++;
    context->count[1] += (len >> 29);
    j = (j >> 3) & 63;
    if ((j + len) > 63) {
        memcpy(&context->buffer[j], data, (i = 64 - j));
        SHA1Transform(context->state, context->buffer);
        for (; i + 63 < len; i += 64) {
            SHA1Transform(context->state, &data[i]);
        }
        j = 0;
    } else {
        i = 0;
    }
    memcpy(&context->buffer[j], &data[i], len - i);
}

void SHA1Final(unsigned char digest[20], SHA1_CTX *context)
{
    unsigned i;

    unsigned char finalcount[8];

    unsigned char c;

    for (i = 0; i < 8; i++)
        finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)] >> ((3 - (i & 3)) * 8)) & 255); /* Endian independent */
    c = 0200;
    SHA1Update(context, &c, 1);
    while ((context->count[0] & 504) != 448) {
        c = 0000;
        SHA1Update(context, &c, 1);
    }
    SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */
    for (i = 0; i < 20; i++)
        digest[i] = (unsigned char)((context->state[i >> 2] >> ((3 - (i & 3)) * 8)) & 255);
    /* Wipe variables */
    memset(context, '[=13=]', sizeof(*context));
    memset(&finalcount, '[=13=]', sizeof(finalcount));
}

void SHA1(char *hash_out, const char *str, int len)
{
    SHA1_CTX ctx;
    unsigned char *hash = malloc(20);

    SHA1Init(&ctx);
    for (int i = 0; i < len; i += 1)
        SHA1Update(&ctx, (const unsigned char *)str + i, 1);
    SHA1Final(hash, &ctx);

    /* Convert to hex */
    for (int i = 0; i < 20; i++)
        sprintf(hash_out + i * 2, "%02x", hash[i]);
}

对象哈希函数：

char *object_hash(struct object *object)
{
    /* Get size of object */
    int size = object->size;
    /* Size to string */
    char *sizeStr = malloc(20);
    sprintf(sizeStr, "%d", size);
    size += strlen(objectTypeStrings[object->type]);
    size += strlen(sizeStr);
    size += 2;
    char *format = malloc(size + 1);
    sprintf(format, "%s %s[=14=]%s", objectTypeStrings[object->type], sizeStr, object->data);
    /* Get hash of object */
    char *hash = calloc(41, sizeof(char));
    SHA1(hash, format, size);
    free(format);
    free(sizeStr);
    return hash;
}

其他相关内容：

enum objectType {
    commit,
    tree,
    blob,
    tag
};

struct object {
    enum objectType type;
    int size;
    char *data;
};

char *objectTypeStrings[] = {
    "commit",
    "tree",
    "blob",
    "tag"
};

此处所有代码：https://github.com/arnavbhate/avcs

Answer 1

你的格式字符串中有一个文字 NUL ('[=10=]')，所以你的最后一个参数（数据）没有被添加（因为 C 字符串在第一个 NUL 结束）。

如果你想 sprintf 附加 NUL，你必须使用 %c 并将 '[=10=]' 作为参数传递（尽管 sprintf 已经把字符串末尾的空值，您实际上不需要）。

您还应注意，任意文件的数据中可能包含 NUL 字符，当作为 %s 传递时，这些字符将被 sprintf 截断。我建议您 memcpy 您的数据，而不是为此目的使用 sprintf。

（此外，您可以避免在尺寸字符串上调用 strlen，因为 sprintf returns 打印的字符数，不包括终止 NUL）

我还要补充一点，如@Nickolay Olshevsky 所示，您实际上不需要将数据复制到同一个缓冲区中，您只需将哈希值与 header 一起提供，然后是数据。

Answer 2

更可靠的解决方案是对数据使用 SHA1Init() 和后续 SHA1Update() 调用，以 SHA1Finish() 结束。当前的方法会很慢，因为在 SHA1() 函数调用中逐字节散列数据。 IE。类似于以下内容：


char *object_hash(struct object *object)
{
    SHA1_CTX ctx;
    SHA1Init(&ctx);
    /* Size to string */
    char sizeStr[20] = {0};
    snprintf(sizeStr, sizeof(sizeStr), "%d", object->size);
    SHA1Update(&ctx, (const uint8_t *) objectTypeStrings[object->type], strlen(objectTypeStrings[object->type]);
    SHA1Update(&ctx, (const uint8_t *) " ", 1);
    SHA1Update(&ctx, (const uint8_t *) sizeStr, strlen(sizeStr));
    SHA1Update(&ctx, (const uint8_t *) "", 1);
    SHA1Update(&ctx, (const uint8_t *) object->data, object->size);

    /* Get hash of object */
    unsigned char hash[20] = {0};
    SHA1Final(hash, &ctx);

    /* Convert to hex */
    char *hash_out = calloc(41, sizeof(char));
    for (size_t i = 0; i < 20; i++)
        sprintf(hash_out + i * 2, "%02x", hash[i]);
    return hash_out;
}

我的程序计算出的 git 对象的 SHA1 散列与来自 git 的对象不匹配

SHA1 hash of git object calculated by my program not matching the one from git

c

sha1