#include "blake2s.h" #include #include // TODO remove #include static const uint32_t IV[8] = {0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19}; const uint8_t SIGMA[10][16] = { {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}}; uint32_t rotr_u32(uint32_t word, uint8_t c) { return (word >> c) | (word << (32 - c)); } void G(uint32_t v[16], unsigned a, unsigned b, unsigned c, unsigned d, uint32_t x, uint32_t y) { v[a] = v[a] + v[b] + x; v[d] = rotr_u32(v[d] ^ v[a], 16); v[c] = v[c] + v[d]; v[b] = rotr_u32(v[b] ^ v[c], 12); v[a] = v[a] + v[b] + y; v[d] = rotr_u32(v[d] ^ v[a], 8); v[c] = v[c] + v[d]; v[b] = rotr_u32(v[b] ^ v[c], 7); } void F(struct BLAKE2s_ctx *ctx, uint32_t m[16], uint32_t f) { // Initialize local work vector v uint32_t v[16] = {ctx->h[0], ctx->h[1], ctx->h[2], ctx->h[3], ctx->h[4], ctx->h[5], ctx->h[6], ctx->h[7], IV[0], IV[1], IV[2], IV[3], IV[4] ^ ctx->t[0], IV[5] ^ ctx->t[1], IV[6], IV[7]}; if (f) v[14] = ~v[14]; // if last block flag, invert all bits // cryptographic mixing for (unsigned i = 0; i < 10; ++i) { G(v, 0, 4, 8, 12, m[SIGMA[i][0]], m[SIGMA[i][1]]); G(v, 1, 5, 9, 13, m[SIGMA[i][2]], m[SIGMA[i][3]]); G(v, 2, 6, 10, 14, m[SIGMA[i][4]], m[SIGMA[i][5]]); G(v, 3, 7, 11, 15, m[SIGMA[i][6]], m[SIGMA[i][7]]); G(v, 0, 5, 10, 15, m[SIGMA[i][8]], m[SIGMA[i][9]]); G(v, 1, 6, 11, 12, m[SIGMA[i][10]], m[SIGMA[i][11]]); G(v, 2, 7, 8, 13, m[SIGMA[i][12]], m[SIGMA[i][13]]); G(v, 3, 4, 9, 14, m[SIGMA[i][14]], m[SIGMA[i][15]]); } // xor the two halves for (unsigned i = 0; i < 8; ++i) ctx->h[i] ^= (v[i] ^ v[i + 8]); } int BLAKE2s_init(struct BLAKE2s_ctx *ctx, size_t outlen, const void *key, size_t keylen) { if (outlen == 0 || outlen > 32 || keylen > 32) { printf("invalid outlen=%lu or keylen=%lu\n", outlen, keylen); // TODO remove return -1; } memset(ctx, 0, sizeof(struct BLAKE2s_ctx)); ctx->param.outlen = outlen; ctx->param.keylen = keylen; ctx->param.fanout = 1; ctx->param.depth = 1; ctx->outlen = outlen; // copy IV into state vector h for (unsigned i = 0; i < 8; ++i) ctx->h[i] = IV[i]; // copy param block 0 onto h[0] ctx->h[0] ^= (ctx->param.depth << 24) ^ (ctx->param.fanout << 16) ^ (keylen << 8) ^ outlen; if (keylen > 0) { BLAKE2s_update(ctx, key, keylen); ctx->c = 64; // at the end } return 0; } #define MIN(a, b) ((a < b) ? a : b) void BLAKE2s_update(struct BLAKE2s_ctx *ctx, const void *d, size_t dd) { for (unsigned i = 0; i < dd;) { if (ctx->c == 64) { // if block is full, consume block ctx->t[0] += ctx->c; if (ctx->t[0] < ctx->c) ctx->t[1] += 1; ctx->c = 0; // reset counter uint32_t *m = (uint32_t *)ctx->b; for (unsigned i = 0; i < 16; ++i) m[i] = htole32(m[i]); F(ctx, m, 0); } // TODO memcpy ctx->b[ctx->c] = ((uint8_t *)d)[i]; ++(ctx->c); ++i; } } void BLAKE2s_final(struct BLAKE2s_ctx *ctx, void *out) { ctx->t[0] += ctx->c; if (ctx->t[0] < ctx->c) ctx->t[1] += 1; for (; ctx->c < 64; ++(ctx->c)) ctx->b[ctx->c] = 0; // fill up block with zeroes uint32_t *m = (uint32_t *)ctx->b; for (unsigned i = 0; i < 16; ++i) m[i] = htole32(m[i]); F(ctx, m, 1); for (unsigned i = 0; i < ctx->outlen; ++i) ((uint8_t *)out)[i] = (ctx->h[i >> 2] >> (8 * (i & 3))) & 0xff; } int BLAKE2s(void *out, size_t outlen, const void *key, size_t keylen, const void *in, size_t inlen) { struct BLAKE2s_ctx ctx; if (BLAKE2s_init(&ctx, outlen, key, keylen)) return -1; BLAKE2s_update(&ctx, in, inlen); BLAKE2s_final(&ctx, out); return 0; }