aboutsummaryrefslogtreecommitdiff
path: root/lib/blake2/blake2s.c
blob: 9206c89be914fce0edb9842671647370e7e546c1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include "blake2s.h"
#include <endian.h>
#include <stdlib.h>
#include <string.h>

const uint32_t IV[8] = {0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19};

const uint8_t SIGMA[10][16] = {
    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
    {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
    {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
    {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
    {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}};

uint32_t
rotr_u32(uint32_t word, uint8_t c)
{
  return (word >> c) | (word << (32 - c));
}

void
G(uint32_t v[16], unsigned a, unsigned b, unsigned c, unsigned d, uint32_t x, uint32_t y)
{
  v[a] = v[a] + v[b] + x;
  v[d] = rotr_u32(v[d] ^ v[a], 16);
  v[c] = v[c] + v[d];
  v[b] = rotr_u32(v[b] ^ v[c], 12);
  v[a] = v[a] + v[b] + y;
  v[d] = rotr_u32(v[d] ^ v[a], 8);
  v[c] = v[c] + v[d];
  v[b] = rotr_u32(v[b] ^ v[c], 7);
}

void
F(struct BLAKE2s_ctx *ctx, uint32_t m[16], uint32_t f)
{
  unsigned i;

  /* Initialize local work vector v */
  uint32_t v[16] = {ctx->h[0],         ctx->h[1],         ctx->h[2], ctx->h[3], ctx->h[4], ctx->h[5],
                    ctx->h[6],         ctx->h[7],         IV[0],     IV[1],     IV[2],     IV[3],
                    IV[4] ^ ctx->t[0], IV[5] ^ ctx->t[1], IV[6],     IV[7]};
  if (f) v[14] = ~v[14]; /* if last block flag, invert all bits */

  /* cryptographic mixing */
  for (i = 0; i < 10; ++i) {
    G(v, 0, 4, 8, 12, m[SIGMA[i][0]], m[SIGMA[i][1]]);
    G(v, 1, 5, 9, 13, m[SIGMA[i][2]], m[SIGMA[i][3]]);
    G(v, 2, 6, 10, 14, m[SIGMA[i][4]], m[SIGMA[i][5]]);
    G(v, 3, 7, 11, 15, m[SIGMA[i][6]], m[SIGMA[i][7]]);

    G(v, 0, 5, 10, 15, m[SIGMA[i][8]], m[SIGMA[i][9]]);
    G(v, 1, 6, 11, 12, m[SIGMA[i][10]], m[SIGMA[i][11]]);
    G(v, 2, 7, 8, 13, m[SIGMA[i][12]], m[SIGMA[i][13]]);
    G(v, 3, 4, 9, 14, m[SIGMA[i][14]], m[SIGMA[i][15]]);
  }

  /* xor the two halves */
  for (i = 0; i < 8; ++i) ctx->h[i] ^= (v[i] ^ v[i + 8]);
}

int
BLAKE2s_init(struct BLAKE2s_ctx *ctx, uint8_t outlen, const void *key, uint8_t keylen)
{
  if (outlen == 0 || outlen > 32 || keylen > 32) {
    return -1;
  }

  memset(ctx, 0, sizeof(struct BLAKE2s_ctx));
  ctx->param.outlen = outlen;
  ctx->param.keylen = keylen;
  ctx->param.fanout = 1;
  ctx->param.depth = 1;

  /* copy IV into state vector h */
  memcpy(ctx->h, IV, 32);
  /* copy param block 0 onto h[0] */
  ctx->h[0] ^= (ctx->param.depth << 24) ^ (ctx->param.fanout << 16) ^ (keylen << 8) ^ outlen;

  if (keylen > 0) {
    BLAKE2s_update(ctx, key, keylen);
    ctx->c = 64; /* at the end */
  }

  return 0;
}

#define MIN(a, b) ((a < b) ? a : b)

void
BLAKE2s_update(struct BLAKE2s_ctx *ctx, const void *d, size_t dd)
{
  unsigned i, j;
  for (i = 0; i < dd;) {

    if (ctx->c == 64) { /* if block is full, consume block */
      ctx->t[0] += ctx->c;
      if (ctx->t[0] < ctx->c) ctx->t[1] += 1;
      ctx->c = 0; /* reset counter */

      uint32_t *m = (uint32_t *)ctx->b;
      for (j = 0; j < 16; ++j) m[j] = htole32(m[j]);
      F(ctx, m, 0);
    }

    const uint8_t len = MIN(64 - ctx->c, dd - i);
    memcpy(&ctx->b[ctx->c], &((uint8_t *)d)[i], len);
    ctx->c += len;
    i += len;
  }
}

void
BLAKE2s_final(struct BLAKE2s_ctx *ctx, void *out)
{
  unsigned i;

  ctx->t[0] += ctx->c;
  if (ctx->t[0] < ctx->c) ctx->t[1] += 1;
  for (; ctx->c < 64; ++(ctx->c)) ctx->b[ctx->c] = 0; /* fill up block with zeroes */

  uint32_t *m = (uint32_t *)ctx->b;
  for (i = 0; i < 16; ++i) m[i] = htole32(m[i]);
  F(ctx, m, 1);

  for (i = 0; i < ctx->param.outlen; ++i) ((uint8_t *)out)[i] = (ctx->h[i >> 2] >> (8 * (i & 3))) & 0xff;
}