1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
#include "blake2s.h"
#include <endian.h>
#include <stdlib.h>
#include <string.h>
const uint32_t IV[8] = {0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19};
const uint8_t SIGMA[10][16] = {
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}};
uint32_t
rotr_u32(uint32_t word, uint8_t c)
{
return (word >> c) | (word << (32 - c));
}
void
G(uint32_t v[16], unsigned a, unsigned b, unsigned c, unsigned d, uint32_t x, uint32_t y)
{
v[a] = v[a] + v[b] + x;
v[d] = rotr_u32(v[d] ^ v[a], 16);
v[c] = v[c] + v[d];
v[b] = rotr_u32(v[b] ^ v[c], 12);
v[a] = v[a] + v[b] + y;
v[d] = rotr_u32(v[d] ^ v[a], 8);
v[c] = v[c] + v[d];
v[b] = rotr_u32(v[b] ^ v[c], 7);
}
void
F(struct BLAKE2s_ctx *ctx, uint32_t m[16], uint32_t f)
{
unsigned i;
/* Initialize local work vector v */
uint32_t v[16] = {ctx->h[0], ctx->h[1], ctx->h[2], ctx->h[3], ctx->h[4], ctx->h[5],
ctx->h[6], ctx->h[7], IV[0], IV[1], IV[2], IV[3],
IV[4] ^ ctx->t[0], IV[5] ^ ctx->t[1], IV[6], IV[7]};
if (f) v[14] = ~v[14]; /* if last block flag, invert all bits */
/* cryptographic mixing */
for (i = 0; i < 10; ++i) {
G(v, 0, 4, 8, 12, m[SIGMA[i][0]], m[SIGMA[i][1]]);
G(v, 1, 5, 9, 13, m[SIGMA[i][2]], m[SIGMA[i][3]]);
G(v, 2, 6, 10, 14, m[SIGMA[i][4]], m[SIGMA[i][5]]);
G(v, 3, 7, 11, 15, m[SIGMA[i][6]], m[SIGMA[i][7]]);
G(v, 0, 5, 10, 15, m[SIGMA[i][8]], m[SIGMA[i][9]]);
G(v, 1, 6, 11, 12, m[SIGMA[i][10]], m[SIGMA[i][11]]);
G(v, 2, 7, 8, 13, m[SIGMA[i][12]], m[SIGMA[i][13]]);
G(v, 3, 4, 9, 14, m[SIGMA[i][14]], m[SIGMA[i][15]]);
}
/* xor the two halves */
for (i = 0; i < 8; ++i) ctx->h[i] ^= (v[i] ^ v[i + 8]);
}
int
BLAKE2s_init(struct BLAKE2s_ctx *ctx, uint8_t outlen, const void *key, uint8_t keylen)
{
if (outlen == 0 || outlen > 32 || keylen > 32) {
return -1;
}
memset(ctx, 0, sizeof(struct BLAKE2s_ctx));
ctx->param.outlen = outlen;
ctx->param.keylen = keylen;
ctx->param.fanout = 1;
ctx->param.depth = 1;
/* copy IV into state vector h */
memcpy(ctx->h, IV, 32);
/* copy param block 0 onto h[0] */
ctx->h[0] ^= (ctx->param.depth << 24) ^ (ctx->param.fanout << 16) ^ (keylen << 8) ^ outlen;
if (keylen > 0) {
BLAKE2s_update(ctx, key, keylen);
ctx->c = 64; /* at the end */
}
return 0;
}
#define MIN(a, b) ((a < b) ? a : b)
void
BLAKE2s_update(struct BLAKE2s_ctx *ctx, const void *d, size_t dd)
{
unsigned i, j;
for (i = 0; i < dd;) {
if (ctx->c == 64) { /* if block is full, consume block */
ctx->t[0] += ctx->c;
if (ctx->t[0] < ctx->c) ctx->t[1] += 1;
ctx->c = 0; /* reset counter */
uint32_t *m = (uint32_t *)ctx->b;
for (j = 0; j < 16; ++j) m[j] = htole32(m[j]);
F(ctx, m, 0);
}
const uint8_t len = MIN(64 - ctx->c, dd - i);
memcpy(&ctx->b[ctx->c], &((uint8_t *)d)[i], len);
ctx->c += len;
i += len;
}
}
void
BLAKE2s_final(struct BLAKE2s_ctx *ctx, void *out)
{
unsigned i;
ctx->t[0] += ctx->c;
if (ctx->t[0] < ctx->c) ctx->t[1] += 1;
for (; ctx->c < 64; ++(ctx->c)) ctx->b[ctx->c] = 0; /* fill up block with zeroes */
uint32_t *m = (uint32_t *)ctx->b;
for (i = 0; i < 16; ++i) m[i] = htole32(m[i]);
F(ctx, m, 1);
for (i = 0; i < ctx->param.outlen; ++i) ((uint8_t *)out)[i] = (ctx->h[i >> 2] >> (8 * (i & 3))) & 0xff;
}
|