argon2.c raw
1 /*
2 Based on Golang's Argon2 implementation from crypto package
3
4 Written for hash-wasm by Dani BirĂ³
5 */
6
7 #include "hash-wasm.h"
8
9 #define BYTES_PER_PAGE 65536
10
11 uint8_t *B = NULL;
12 uint64_t B_size = 0;
13
14 WASM_EXPORT
15 int8_t Hash_SetMemorySize(uint32_t total_bytes) {
16 uint32_t bytes_required = total_bytes - B_size;
17
18 if (bytes_required > 0) {
19 uint32_t blocks = bytes_required / BYTES_PER_PAGE;
20 if (blocks * BYTES_PER_PAGE < bytes_required) {
21 blocks += 1;
22 }
23
24 if (__builtin_wasm_memory_grow(0, blocks) == -1) {
25 return -1;
26 }
27
28 B_size += blocks * BYTES_PER_PAGE;
29 }
30
31 return 0;
32 }
33
34 WASM_EXPORT
35 uint8_t *Hash_GetBuffer() {
36 if (B == NULL) {
37 // start of new memory
38 B = (uint8_t *)(__builtin_wasm_memory_size(0) * BYTES_PER_PAGE);
39 if (Hash_SetMemorySize(512 * 1024) == -1) { // always preallocate 16kb to not cause problems with the other hashes
40 return NULL;
41 }
42 }
43
44 return B;
45 }
46
47 static __inline__ uint64_t rotr64(const uint64_t w, const unsigned c) {
48 return (w >> c) | (w << (64 - c));
49 }
50
51 #define G(a, b, c, d) \
52 do { \
53 a = a + b + 2 * (a & 0xFFFFFFFF) * (b & 0xFFFFFFFF); \
54 d = rotr64(d ^ a, 32); \
55 c = c + d + 2 * (c & 0xFFFFFFFF) * (d & 0xFFFFFFFF); \
56 b = rotr64(b ^ c, 24); \
57 a = a + b + 2 * (a & 0xFFFFFFFF) * (b & 0xFFFFFFFF); \
58 d = rotr64(d ^ a, 16); \
59 c = c + d + 2 * (c & 0xFFFFFFFF) * (d & 0xFFFFFFFF); \
60 b = rotr64(b ^ c, 63); \
61 } while (0)
62
63 void P(
64 uint64_t *a0, uint64_t *a1, uint64_t *a2, uint64_t *a3,
65 uint64_t *a4, uint64_t *a5, uint64_t *a6, uint64_t *a7,
66 uint64_t *a8, uint64_t *a9, uint64_t *a10, uint64_t *a11,
67 uint64_t *a12, uint64_t *a13, uint64_t *a14, uint64_t *a15
68 ) {
69 G(*a0, *a4, *a8, *a12);
70 G(*a1, *a5, *a9, *a13);
71 G(*a2, *a6, *a10, *a14);
72 G(*a3, *a7, *a11, *a15);
73 G(*a0, *a5, *a10, *a15);
74 G(*a1, *a6, *a11, *a12);
75 G(*a2, *a7, *a8, *a13);
76 G(*a3, *a4, *a9, *a14);
77 }
78
79 uint32_t indexAlpha(
80 uint64_t rand, uint32_t lanes, uint32_t segments,
81 uint32_t parallelism, uint32_t k, uint32_t slice,
82 uint32_t lane, uint32_t index
83 ) {
84 uint32_t rlane = ((uint32_t)(rand >> 32)) % parallelism;
85
86 if (k == 0 && slice == 0) {
87 rlane = lane;
88 }
89
90 uint32_t max = segments * 3;
91 uint32_t start = ((slice + 1) % 4) * segments;
92
93 if (lane == rlane) {
94 max += index;
95 }
96
97 if (k == 0) {
98 max = slice * segments;
99 start = 0;
100 if (slice == 0 || lane == rlane) {
101 max += index;
102 }
103 }
104
105 if (index == 0 || lane == rlane) {
106 max--;
107 }
108
109 uint64_t phi = rand & 0xFFFFFFFF;
110 phi = phi * phi >> 32;
111 phi = phi * max >> 32;
112 uint32_t ri = (start + max - 1 - phi) % (uint64_t)lanes;
113
114 return rlane * lanes + ri;
115 }
116
117 uint64_t t[128];
118
119 void block(uint64_t *z, uint64_t *a, uint64_t *b, int32_t xor) {
120 #pragma clang loop unroll(full)
121 for (int i = 0; i < 128; i++) {
122 t[i] = a[i] ^ b[i];
123 }
124
125 #pragma clang loop unroll(full)
126 for (int i = 0; i < 128; i += 16) {
127 P(
128 &t[i], &t[i + 1], &t[i + 2], &t[i + 3], &t[i + 4], &t[i + 5], &t[i + 6], &t[i + 7],
129 &t[i + 8], &t[i + 9], &t[i + 10], &t[i + 11], &t[i + 12], &t[i + 13], &t[i + 14], &t[i + 15]
130 );
131 }
132
133 #pragma clang loop unroll(full)
134 for (int i = 0; i < 16; i += 2) {
135 P(
136 &t[i], &t[i + 1], &t[i + 16], &t[i + 17], &t[i + 32], &t[i + 33], &t[i + 48], &t[i + 49],
137 &t[i + 64], &t[i + 65], &t[i + 80], &t[i + 81], &t[i + 96], &t[i + 97], &t[i + 112], &t[i + 113]
138 );
139 }
140
141 if (xor) {
142 for (int i = 0; i < 128; i++) {
143 z[i] ^= a[i] ^ b[i] ^ t[i];
144 }
145 } else {
146 for (int i = 0; i < 128; i++) {
147 z[i] = a[i] ^ b[i] ^ t[i];
148 }
149 }
150 }
151
152 uint64_t addresses[128];
153 uint64_t zero[128];
154 uint64_t in[128];
155
156 WASM_EXPORT
157 void Hash_Calculate(uint32_t length, uint32_t memorySize) {
158 uint32_t *initVector = (uint32_t *)(B + 1024 * memorySize);
159 uint32_t parallelism = initVector[0];
160 uint32_t hashLength = initVector[1];
161 uint32_t memorySize2 = initVector[2];
162 uint32_t iterations = initVector[3];
163 uint32_t version = initVector[4];
164 uint32_t hashType = initVector[5];
165 if (memorySize2 != memorySize) {
166 return;
167 }
168
169 uint32_t segments = memorySize / (parallelism * 4);
170 memorySize = segments * parallelism * 4;
171 uint32_t lanes = segments * 4;
172
173 in[3] = memorySize;
174 in[4] = iterations;
175 in[5] = hashType;
176
177 for (uint32_t k = 0; k < iterations; k++) {
178 in[0] = k;
179 for (uint8_t slice = 0; slice < 4; slice++) {
180 in[2] = slice;
181 for (uint32_t lane = 0; lane < parallelism; lane++) {
182 in[1] = lane;
183 in[6] = 0;
184 uint32_t index = 0;
185 if (k == 0 && slice == 0) {
186 index = 2;
187 if (hashType == 1 || hashType == 2) {
188 in[6]++;
189 block(addresses, in, zero, 0);
190 block(addresses, addresses, zero, 0);
191 }
192 }
193 uint32_t offset = lane * lanes + slice * segments + index;
194 while (index < segments) {
195 uint32_t prev = offset - 1;
196 if (index == 0 && slice == 0) {
197 prev += lanes;
198 }
199
200 uint64_t rand;
201 if (hashType == 1 || (hashType == 2 && k == 0 && slice < 2)) {
202 if (index % 128 == 0) {
203 in[6]++;
204 block(addresses, in, zero, 0);
205 block(addresses, addresses, zero, 0);
206 }
207 rand = addresses[index % 128];
208 } else {
209 rand = *(uint64_t *)(B + prev * 1024);
210 }
211 uint32_t newOffset = indexAlpha(rand, lanes, segments, parallelism, k, slice, lane, index);
212
213 block(
214 (uint64_t *)&B[offset * 1024],
215 (uint64_t *)&B[prev * 1024],
216 (uint64_t *)&B[newOffset * 1024],
217 1
218 );
219 index++;
220 offset++;
221 }
222 }
223 }
224 }
225
226 uint32_t destIndex = (memorySize - 1) * 1024;
227 for (uint32_t lane = 0; lane < parallelism - 1; lane++) {
228 uint32_t sourceIndex = (lane * lanes + lanes - 1) * 1024;
229 for (uint32_t i = 0; i < 1024; i += 8) {
230 *(uint64_t *)&B[destIndex + i] ^= *(uint64_t *)&B[sourceIndex + i];
231 }
232 }
233
234 for (uint16_t i = 0; i < 1024; i += 8) {
235 *(uint64_t *)&B[i] = *(uint64_t *)&B[destIndex + i];
236 }
237 }
238