错误的 SHA-1 哈希

Wrong SHA-1 hash

我打算将 AVR-Crypto 的 SHA-1 implementation 用于 HMAC。但是,我似乎无法生成正确的 SHA-1 总和。

例如,如果我用下面的代码调用函数

  unsigned char sha1sum[20];
  char *msg = "FFFFFFFFFF";

  sha1( sha1sum, msg, strlen(msg));

我得到 000000000000000000002C002312290000000029 而不是预期的 c1bb92851109fe950a2655fa1d4ba1d04719f6fb。有谁知道可能出了什么问题?这是 AVR-Crypto 的实现

#include <string.h> /* memcpy & co */
#include <stdint.h>
#include "config.h"
#include "debug.h"
#include "sha1.h"

#ifdef DEBUG
#  undef DEBUG
#endif

#include "cli.h"

#define LITTLE_ENDIAN

/********************************************************************************************************/

/**
 * \brief initialises given SHA-1 context
 *
 */
void sha1_init(sha1_ctx_t *state){
    DEBUG_S("\r\nSHA1_INIT");
    state->h[0] = 0x67452301;
    state->h[1] = 0xefcdab89;
    state->h[2] = 0x98badcfe;
    state->h[3] = 0x10325476;
    state->h[4] = 0xc3d2e1f0;
    state->length = 0;
}

/********************************************************************************************************/
/* some helping functions */
uint32_t rotl32(uint32_t n, uint8_t bits){
    return ((n<<bits) | (n>>(32-bits)));
}

uint32_t change_endian32(uint32_t x){
    return (((x)<<24) | ((x)>>24) | (((x)& 0x0000ff00)<<8) | (((x)& 0x00ff0000)>>8));
}


/* three SHA-1 inner functions */
uint32_t ch(uint32_t x, uint32_t y, uint32_t z){
    DEBUG_S("\r\nCH");
    return ((x&y)^((~x)&z));
}

uint32_t maj(uint32_t x, uint32_t y, uint32_t z){
    DEBUG_S("\r\nMAJ");
    return ((x&y)^(x&z)^(y&z));
}

uint32_t parity(uint32_t x, uint32_t y, uint32_t z){
    DEBUG_S("\r\nPARITY");
    return ((x^y)^z);
}

/********************************************************************************************************/
/**
 * \brief "add" a block to the hash
 * This is the core function of the hash algorithm. To understand how it's working
 * and what thoese variables do, take a look at FIPS-182. This is an "alternativ" implementation
 */

#define MASK 0x0000000f

typedef uint32_t (*pf_t)(uint32_t x, uint32_t y, uint32_t z);

void sha1_nextBlock (sha1_ctx_t *state, const void *block){
    uint32_t a[5];
    uint32_t w[16];
    uint32_t temp;
    uint8_t t,s,fi, fib;
    pf_t f[] = {ch,parity,maj,parity};
    uint32_t k[4]={ 0x5a827999,
                    0x6ed9eba1,
                    0x8f1bbcdc,
                    0xca62c1d6};

    /* load the w array (changing the endian and so) */
    for(t=0; t<16; ++t){
        w[t] = change_endian32(((uint32_t*)block)[t]);
    }

#if DEBUG
    uint8_t dbgi;
    for(dbgi=0; dbgi<16; ++dbgi){
        /*
        DEBUG_S("\n\rBlock:");
        DEBUG_B(dbgi);
        DEBUG_C(':');
        */
        cli_putstr_P(PSTR("\r\nBlock:"));
        cli_hexdump(&dbgi, 1);
        cli_putc(':');
        cli_hexdump(&(w[dbgi]) ,4);
    }
#endif

    /* load the state */
    memcpy(a, state->h, 5*sizeof(uint32_t));


    /* the fun stuff */
    for(fi=0,fib=0,t=0; t<=79; ++t){
        s = t & MASK;
        if(t>=16){
            #if DEBUG
             DEBUG_S("\r\n ws = "); cli_hexdump(&(w[s]), 4);
            #endif
            w[s] = rotl32( w[(s+13)&MASK] ^ w[(s+8)&MASK] ^
                 w[(s+ 2)&MASK] ^ w[s] ,1);
            #ifdef DEBUG
             DEBUG_S(" --> ws = "); cli_hexdump(&(w[s]), 4);
            #endif
        }

        uint32_t dtemp;
        temp = rotl32(a[0],5) + (dtemp=f[fi](a[1],a[2],a[3])) + a[4] + k[fi] + w[s];
        memmove(&(a[1]), &(a[0]), 4*sizeof(uint32_t)); /* e=d; d=c; c=b; b=a; */
        a[0] = temp;
        a[2] = rotl32(a[2],30); /* we might also do rotr32(c,2) */
        fib++;
        if(fib==20){
            fib=0;
            fi = (fi+1)%4;
        }
        #if DEBUG
        /* debug dump */
        DEBUG_S("\r\nt = "); DEBUG_B(t);
        DEBUG_S("; a[]: ");
         cli_hexdump(a, 5*4);
        DEBUG_S("; k = ");
         cli_hexdump(&(k[t/20]), 4);
        DEBUG_S("; f(b,c,d) = ");
         cli_hexdump(&dtemp, 4);
        #endif
    }

    /* update the state */
    for(t=0; t<5; ++t){
        state->h[t] += a[t];
    }
    state->length += 512;
}

/********************************************************************************************************/

void sha1_lastBlock(sha1_ctx_t *state, const void *block, uint16_t length){
    uint8_t lb[SHA1_BLOCK_BYTES]; /* local block */
    while(length>=SHA1_BLOCK_BITS){
        sha1_nextBlock(state, block);
        length -= SHA1_BLOCK_BITS;
        block = (uint8_t*)block + SHA1_BLOCK_BYTES;
    }
    state->length += length;
    memset(lb, 0, SHA1_BLOCK_BYTES);
    memcpy (lb, block, (length+7)>>3);

    /* set the final one bit */
    lb[length>>3] |= 0x80>>(length & 0x07);

    if (length>512-64-1){ /* not enouth space for 64bit length value */
        sha1_nextBlock(state, lb);
        state->length -= 512;
        memset(lb, 0, SHA1_BLOCK_BYTES);
    }
    /* store the 64bit length value */
#if defined LITTLE_ENDIAN
        /* this is now rolled up */
    uint8_t i;
    for (i=0; i<8; ++i){
        lb[56+i] = ((uint8_t*)&(state->length))[7-i];
    }
#elif defined BIG_ENDIAN
    *((uint64_t)&(lb[56])) = state->length;
#endif
    sha1_nextBlock(state, lb);
}

/********************************************************************************************************/

void sha1_ctx2hash (void *dest, sha1_ctx_t *state){
#if defined LITTLE_ENDIAN
    uint8_t i;
    for(i=0; i<5; ++i){
        ((uint32_t*)dest)[i] = change_endian32(state->h[i]);
    }
#elif BIG_ENDIAN
    if (dest != state->h)
        memcpy(dest, state->h, SHA1_HASH_BITS/8);
#else
# error unsupported endian type!
#endif
}

/********************************************************************************************************/
/**
 *
 *
 */
void sha1 (void *dest, const void *msg, uint32_t length){
    sha1_ctx_t s;
    DEBUG_S("\r\nBLA BLUB");
    sha1_init(&s);
    while(length & (~0x0001ff)){ /* length>=512 */
        DEBUG_S("\r\none block");
        sha1_nextBlock(&s, msg);
        msg = (uint8_t*)msg + SHA1_BLOCK_BITS/8; /* increment pointer to next block */
        length -= SHA1_BLOCK_BITS;
    }
    sha1_lastBlock(&s, msg, length);
    sha1_ctx2hash(dest, &s);
}

这是header:

#ifndef SHA1_H_
#define SHA1_H_

#include "stdint.h"
/** \def SHA1_HASH_BITS
 * definees the size of a SHA-1 hash in bits 
 */

/** \def SHA1_HASH_BYTES
 * definees the size of a SHA-1 hash in bytes 
 */

/** \def SHA1_BLOCK_BITS
 * definees the size of a SHA-1 input block in bits 
 */

/** \def SHA1_BLOCK_BYTES
 * definees the size of a SHA-1 input block in bytes 
 */
#define SHA1_HASH_BITS  160
#define SHA1_HASH_BYTES (SHA1_HASH_BITS/8)
#define SHA1_BLOCK_BITS 512
#define SHA1_BLOCK_BYTES (SHA1_BLOCK_BITS/8)

/** \typedef sha1_ctx_t
 * \brief SHA-1 context type
 * 
 * A vatiable of this type may hold the state of a SHA-1 hashing process
 */
typedef struct {
    uint32_t h[5];
//  uint64_t length;
    uint8_t length;
} sha1_ctx_t;

/** \typedef sha1_hash_t
 * \brief hash value type
 * A variable of this type may hold a SHA-1 hash value 
 */
/*
typedef uint8_t sha1_hash_t[SHA1_HASH_BITS/8];
*/

/** \fn sha1_init(sha1_ctx_t *state)
 * \brief initializes a SHA-1 context
 * This function sets a ::sha1_ctx_t variable to the initialization vector
 * for SHA-1 hashing.
 * \param state pointer to the SHA-1 context variable
 */
void sha1_init(sha1_ctx_t *state);

/** \fn sha1_nextBlock(sha1_ctx_t *state, const void *block)
 *  \brief process one input block
 * This function processes one input block and updates the hash context 
 * accordingly
 * \param state pointer to the state variable to update
 * \param block pointer to the message block to process
 */
void sha1_nextBlock (sha1_ctx_t *state, const void *block);

/** \fn sha1_lastBlock(sha1_ctx_t *state, const void *block, uint16_t length_b)
 * \brief processes the given block and finalizes the context
 * This function processes the last block in a SHA-1 hashing process.
 * The block should have a maximum length of a single input block.
 * \param state pointer to the state variable to update and finalize
 * \param block pointer to themessage block to process
 * \param length_b length of the message block in bits  
 */
void sha1_lastBlock (sha1_ctx_t *state, const void *block, uint16_t length_b);

/** \fn sha1_ctx2hash(sha1_hash_t *dest, sha1_ctx_t *state)
 * \brief convert a state variable into an actual hash value
 * Writes the hash value corresponding to the state to the memory pointed by dest.
 * \param dest pointer to the hash value destination
 * \param state pointer to the hash context
 */ 
void sha1_ctx2hash (void *dest, sha1_ctx_t *state);

/** \fn sha1(sha1_hash_t *dest, const void *msg, uint32_t length_b)
 * \brief hashing a message which in located entirely in RAM
 * This function automatically hashes a message which is entirely in RAM with
 * the SHA-1 hashing algorithm.
 * \param dest pointer to the hash value destination
 * \param msg  pointer to the message which should be hashed
 * \param length_b length of the message in bits
 */ 
void sha1(void *dest, const void *msg, uint32_t length_b);



#endif /*SHA1_H_*/

UPDATE 如果我用 unsigned char sha1sum[20] = 0; 初始化 sha1sum,结果和都是 0x00。

问题的代码中至少有两个错误(详见下文),但都不能解释显示的结果,以及 unsigned char sha1sum[20] = {0} 在调用代码会改变结果。从我们读取的 C 源代码到机器代码的翻译出了点问题!很有可能,sha1_ctx2hash 没有写在应该写的地方。

问题可能出在 header 中,不在问题中,编译器错误...因为我们使用的是 8051,所以 be/have 可能是 pointer types 的问题,特别是在必须指向相同大小的指针的指针转换中。

此外,是否确定 8051 编译器是 little-endian?好像常见的Keil C51 uses big-endian convention. That's an arbitrary choice of the compiler+support library, since on the original 8051 there is no multi-byte data-related instruction, the closest thing is LCALL which stack pushes are little-endian, but LJMP and MOV DPTR,# code is big-endian. Update: We are told the compiler is by IAR. According to IAR's documentation,第5版是big-endian,第6版变成了little-endian。

更新:我们发现了另一个关键问题(除了可能不安全的指针转换和下面讨论的两个错误之外)。在搜索的某个时刻,用没有字节顺序依赖性或指针转换的单个过程替换代码,输出变为 0000eb1700007f3d000004f0000059290000fc21 并且建议 would-be-32 位值被截断为 16 位。事实上,OP 透露:

I have this in my stdint.h:   typedef unsigned uint32_t;

这仅在 unsigned int 完全 32 位 的编译器上是正确的,而 C 标准给出的唯一保证是它是 至少 16 位 ,并且大多数 C 编译器都将此最小值用于 less-than-32 位 CPU(出于效率原因;有些甚至可以选择禁用将字节操作数提升为整数,甚至对 80+80+96 成为 0) 感到高兴。


测试代码中的错误:sha1( sha1sum, msg, strlen(msg))应该是sha1( sha1sum, msg, strlen(msg)*8)之类的,因为长度参数是以位为单位的。

sha1_lastBlockw.r.t 中的错误。 header 文件:代码阅读

for (i=0; i<8; ++i){
    lb[56+i] = ((uint8_t*)&(state->length))[7-i];
}

假设state->length是8个字节,其实不是,因为uint64_t length在header中被改成了uint8_t length(常见的是uint64_t 在 8051 编译器上不可用)。 big-endian 案例的代码(目前未编译)也受到影响。

如果确实 uint8_t length 并且因此最多 31 个字节的长度限制是接受 table,little-endian 和 big-endian 情况都减少到 lb[SHA1_BLOCK_BYTES-1] = state->length;(没有循环)。

或者,对于 length 可能使用的任何无符号类型和字节序:

for (i = SHA1_BLOCK_BYTES; state->length != 0; state->length >>= 8)
    lb[--i] = (uint8_t)(state->length);

注意:代码 *((uint64_t*)&(lb[56])) = state->lengthlength 的 8 个字节写入数组 lb[] 的末尾,但仅在 big-endian 机器上正确 uint64_t.


代码在 (length+7)%8 < 6 时有潜在的额外问题:要散列的最后一个字节中至少有一位未被屏蔽,如果设置它进入散列并使其出错。这在散列完整字节的用例中不会造成伤害。


原始代码可能是正确的(除了上述潜在的额外问题),但考虑到 objective 通过一次调用散列 in-memory 数据(什么 sha1 确实如此),既不紧凑也不可读。除其他问题外:

  • sha1_lastBlock 中有(正确的)块循环,因此 header 中的限制措辞 块应具有单个输入块的最大长度不存在;
  • 这使得 sha1 中的另一个块循环变得多余;
  • 如果使用 uint8_t length 或以其他方式散列小于 56 字节,则可以删除这两个循环;
  • 16 字节的 memmove 和来自索引 table;
  • 的向量中的函数调用可能会减慢循环
  • little-endian 情况下的字节序转换效率很低;
  • sha1_ctx2hash 中,#elif BIG_ENDIAN 在我的心理编译器中触发了一个错误,因为 BIG_ENDIAN 似乎未定义,而 #elif 应该有一个参数;应该是 #elif defined BIG_ENDIAN(如上面几行所用);
  • pf_t f[] = {ch,parity,maj,parity};const 的一个很好的候选者,也许 static:我曾经使用过的每个 8051 的 C 编译器都不会识别数组在设置后没有改变,因此可以刻在代码中;
  • 对于这样的编译器,不必要地使用函数指针(如上所述)是一种久经考验的方法,会损害性能,或者更糟;至少它阻止了调用树的分析,需要在带有覆盖的静态地址分配自动变量,这反过来显着提高了性能和代码大小。

如果您追求的是速度,那么您的起始代码是不够的,而且没有什么能与汇编语言相媲美。就像二十年前,我为一些 8051 工具链编写了 SHA-1,与仅使用 C 相比,汇编调优节省了大量成本(IIRC:主要是因为 32 位旋转从性能的角度来看很糟糕)。


已更新:这里是散列短消息的说明性代码,以 endian-neutral 方式,没有任何指针转换,也不依赖于 <stdint.h> (事实证明这对于所使用的编译器来说是不够的)。请注意,length 参数以字节(而不是位)为单位,限制为 55 个字节,不允许在顶部实现 HMAC-SHA-1。这是为了保持代码简单:超过这个限制,我们需要对压缩函数进行多次迭代n,因此要么是大量的代码重复,至少有两个函数,要么是某种状态机。

#include <limits.h> // for UCHAR_MAX, UINT_MAX, ULONG_MAX

// Compute the SHA-1 hash of a short msg, of length at most 55 bytes
// Result hash must be 20 bytes; it can overlap msg.
// CAUTION: if length>55 the result is wrong, and if length>59
// we loose second-preimage resistance, thus collision-resistance.
void sha1upto55bytes(
          unsigned char *hash,  // result, 20 bytes
    const unsigned char *msg,   // bytes to hash
          unsigned char length  // length of msg in bytes, maximum 55
    )
    {
    // We locally (re)define uint8_t and uint32_t so as not to depend of <stdint.h>
    // which is not available on some old C compilers for embedded systems.
#if 255==UCHAR_MAX
    typedef unsigned char uint8_t;
#endif
#if 16383==UINT_MAX>>9>>9
    typedef unsigned int uint32_t;
#elif  16383==ULONG_MAX>>9>>9
    typedef unsigned long uint32_t;
#endif

    // Internal buffer (64 bytes)
    // We require 8-bit uint8_t, 32-bit uint32_t, and integer promotion; otherwise,
    // we try to abort compilation on the following declaration.
    uint32_t w[
        99==(uint8_t)355              &&  // check uint8_t
        4303==(uint32_t)(-1)/999u/999 &&  // check uint32_t
        440==(uint8_t)55<<3               // check integer promotion
        ? 16 : -1];                       // negative index if error

    // Type for state, so that we can use struct copy for that
    typedef struct state_t { uint32_t q[5]; } state_t;

    // Initial state; use single quotes if the compiler barks
    const state_t s = {{ 0x67452301,0xefcdab89,0x98badcfe,0x10325476,0xc3d2e1f0 }};

    // Active state (20 bytes); on 8051 should be in internal RAM for best performance
    state_t h = s;  // initialize the state using a struct copy

   // Workhorse temporary; on 8051 should be in internal RAM for best performance
    uint32_t x;

    // Workhorse index; on 8051 should be a register for performance
    uint8_t  j;

    // Prepare the single block to hash; this code works regardless of endianness,
    // and does not perform misaligned memory accesses if msg is misaligned.
    x = 0;  // This is only to prevent a bogus compiler warning
    j = 0;
    do
        {   // for each block byte, up to and including high 4 bytes of length
        x <<= 8;
        if (j < length)
            x |= *msg++;    // message byte
        else
            if (j == length)
                x |= 0x80;  // padding byte
        if ((j&3)==3)
            w[j >> 2] = x;
        }
    while (++j!=60);
    w[15] = length << 3;    // length in bits, needs integer promotion for length>31

    // Hash that block
    j = 0;
    do {        // round loop, run 80 times
        do {        // dummy loop (avoid a goto)
            if (j<40) {
                if (j<20) {             // for rounds 0..19
                    x = (((h.q[2] ^ h.q[3])&h.q[1]) ^ h.q[3]) + 0x5A827999;
                    break;  // out of dummy loop
                    }
                else
                    x = 0x6ED9EBA1;     // for rounds 20..39
                }
            else {
                if (j<60) {             // for rounds 40..59
                    x = (h.q[1] | h.q[2])&h.q[3];
                    x |= h.q[1] & h.q[2];
                    x += 0x8F1BBCDC;
                    break;
                    }
                else
                    x = 0xCA62C1D6;     // for rounds 60..79
                }
            // for rounds 20..39 and 60..79
            x += h.q[1] ^ h.q[2] ^ h.q[3];
            }
        while (0);      // end of of dummy loop
        // for all rounds
        x += (h.q[0] << 5) | (h.q[0] >> 27);
        x += h.q[4];
        h.q[4] = h.q[3];
        h.q[3] = h.q[2];
        h.q[2] = (h.q[1] << 30) | (h.q[1] >> 2);
        h.q[1] = h.q[0];
        h.q[0] = x;
        x = w[j & 15];
        if (j>=16) {    // rounds 16..79
            x ^= w[(j + 2) & 15];
            x ^= w[(j + 8) & 15];
            x ^= w[(j + 13) & 15];
            w[j & 15] = x = (x << 1) | (x >> 31);
            }
        h.q[0] += x;    // for all rounds
        }
    while (++j != 80);
    // The five final 32-bit modular additions are made in the next loop, and
    // reuse the constants (rather than a RAM copy), saving code and RAM.

    // Final addition and store result; this code works regardless of endianness,
    // and does not perform misaligned memory accesses if hash is misaligned.
    j = 0;
    do
        {
        x = h.q[j] + s.q[j];    // final 32-bit modular additions
        *hash++ = (uint8_t)(x>>24);
        *hash++ = (uint8_t)(x>>16);
        *hash++ = (uint8_t)(x>> 8);
        *hash++ = (uint8_t)(x    );
        }
    while (++j != 5);
    }