OLD | NEW |
| (Empty) |
1 /* ==================================================================== | |
2 * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
3 * | |
4 * Redistribution and use in source and binary forms, with or without | |
5 * modification, are permitted provided that the following conditions | |
6 * are met: | |
7 * | |
8 * 1. Redistributions of source code must retain the above copyright | |
9 * notice, this list of conditions and the following disclaimer. | |
10 * | |
11 * 2. Redistributions in binary form must reproduce the above copyright | |
12 * notice, this list of conditions and the following disclaimer in | |
13 * the documentation and/or other materials provided with the | |
14 * distribution. | |
15 * | |
16 * 3. All advertising materials mentioning features or use of this | |
17 * software must display the following acknowledgment: | |
18 * "This product includes software developed by the OpenSSL Project | |
19 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
20 * | |
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
22 * endorse or promote products derived from this software without | |
23 * prior written permission. For written permission, please contact | |
24 * licensing@OpenSSL.org. | |
25 * | |
26 * 5. Products derived from this software may not be called "OpenSSL" | |
27 * nor may "OpenSSL" appear in their names without prior written | |
28 * permission of the OpenSSL Project. | |
29 * | |
30 * 6. Redistributions of any form whatsoever must retain the following | |
31 * acknowledgment: | |
32 * "This product includes software developed by the OpenSSL Project | |
33 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
34 * | |
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
46 * OF THE POSSIBILITY OF SUCH DAMAGE. | |
47 * ==================================================================== | |
48 */ | |
49 | |
50 /* This implementation was taken from the public domain, neon2 version in | |
51 * SUPERCOP by D. J. Bernstein and Peter Schwabe. */ | |
52 | |
53 #include <stdint.h> | |
54 #include <string.h> | |
55 | |
56 #include <openssl/poly1305.h> | |
57 | |
58 #if !defined(OPENSSL_NO_POLY1305) | |
59 | |
60 typedef struct { | |
61 uint32_t v[12]; /* for alignment; only using 10 */ | |
62 } fe1305x2; | |
63 | |
64 #define addmulmod openssl_poly1305_neon2_addmulmod | |
65 #define blocks openssl_poly1305_neon2_blocks | |
66 | |
67 extern void addmulmod(fe1305x2 *r, const fe1305x2 *x, const fe1305x2 *y, const f
e1305x2 *c); | |
68 | |
69 extern int blocks(fe1305x2 *h, const fe1305x2 *precomp, const unsigned char *in,
unsigned int inlen); | |
70 | |
71 static void freeze(fe1305x2 *r) | |
72 { | |
73 int i; | |
74 | |
75 uint32_t x0 = r->v[0]; | |
76 uint32_t x1 = r->v[2]; | |
77 uint32_t x2 = r->v[4]; | |
78 uint32_t x3 = r->v[6]; | |
79 uint32_t x4 = r->v[8]; | |
80 uint32_t y0; | |
81 uint32_t y1; | |
82 uint32_t y2; | |
83 uint32_t y3; | |
84 uint32_t y4; | |
85 uint32_t swap; | |
86 | |
87 for (i = 0;i < 3;++i) | |
88 { | |
89 x1 += x0 >> 26; x0 &= 0x3ffffff; | |
90 x2 += x1 >> 26; x1 &= 0x3ffffff; | |
91 x3 += x2 >> 26; x2 &= 0x3ffffff; | |
92 x4 += x3 >> 26; x3 &= 0x3ffffff; | |
93 x0 += 5*(x4 >> 26); x4 &= 0x3ffffff; | |
94 } | |
95 | |
96 y0 = x0 + 5; | |
97 y1 = x1 + (y0 >> 26); y0 &= 0x3ffffff; | |
98 y2 = x2 + (y1 >> 26); y1 &= 0x3ffffff; | |
99 y3 = x3 + (y2 >> 26); y2 &= 0x3ffffff; | |
100 y4 = x4 + (y3 >> 26); y3 &= 0x3ffffff; | |
101 swap = -(y4 >> 26); y4 &= 0x3ffffff; | |
102 | |
103 y0 ^= x0; | |
104 y1 ^= x1; | |
105 y2 ^= x2; | |
106 y3 ^= x3; | |
107 y4 ^= x4; | |
108 | |
109 y0 &= swap; | |
110 y1 &= swap; | |
111 y2 &= swap; | |
112 y3 &= swap; | |
113 y4 &= swap; | |
114 | |
115 y0 ^= x0; | |
116 y1 ^= x1; | |
117 y2 ^= x2; | |
118 y3 ^= x3; | |
119 y4 ^= x4; | |
120 | |
121 r->v[0] = y0; | |
122 r->v[2] = y1; | |
123 r->v[4] = y2; | |
124 r->v[6] = y3; | |
125 r->v[8] = y4; | |
126 } | |
127 | |
128 static void fe1305x2_tobytearray(unsigned char *r, fe1305x2 *x) | |
129 { | |
130 uint32_t x0 = x->v[0]; | |
131 uint32_t x1 = x->v[2]; | |
132 uint32_t x2 = x->v[4]; | |
133 uint32_t x3 = x->v[6]; | |
134 uint32_t x4 = x->v[8]; | |
135 | |
136 x1 += x0 >> 26; | |
137 x0 &= 0x3ffffff; | |
138 x2 += x1 >> 26; | |
139 x1 &= 0x3ffffff; | |
140 x3 += x2 >> 26; | |
141 x2 &= 0x3ffffff; | |
142 x4 += x3 >> 26; | |
143 x3 &= 0x3ffffff; | |
144 | |
145 *(uint32_t *) r = x0 + (x1 << 26); | |
146 *(uint32_t *) (r + 4) = (x1 >> 6) + (x2 << 20); | |
147 *(uint32_t *) (r + 8) = (x2 >> 12) + (x3 << 14); | |
148 *(uint32_t *) (r + 12) = (x3 >> 18) + (x4 << 8); | |
149 } | |
150 | |
151 /* load32 exists to avoid breaking strict aliasing rules in | |
152 * fe1305x2_frombytearray. */ | |
153 static uint32_t load32(unsigned char *t) | |
154 { | |
155 uint32_t tmp; | |
156 memcpy(&tmp, t, sizeof(tmp)); | |
157 return tmp; | |
158 } | |
159 | |
160 static void fe1305x2_frombytearray(fe1305x2 *r, const unsigned char *x, unsigned
long long xlen) | |
161 { | |
162 int i; | |
163 unsigned char t[17]; | |
164 | |
165 for (i = 0; (i < 16) && (i < xlen); i++) | |
166 t[i] = x[i]; | |
167 xlen -= i; | |
168 x += i; | |
169 t[i++] = 1; | |
170 for (; i<17; i++) | |
171 t[i] = 0; | |
172 | |
173 r->v[0] = 0x3ffffff & load32(t); | |
174 r->v[2] = 0x3ffffff & (load32(t + 3) >> 2); | |
175 r->v[4] = 0x3ffffff & (load32(t + 6) >> 4); | |
176 r->v[6] = 0x3ffffff & (load32(t + 9) >> 6); | |
177 r->v[8] = load32(t + 13); | |
178 | |
179 if (xlen) | |
180 { | |
181 for (i = 0; (i < 16) && (i < xlen); i++) | |
182 t[i] = x[i]; | |
183 t[i++] = 1; | |
184 for (; i<17; i++) | |
185 t[i] = 0; | |
186 | |
187 r->v[1] = 0x3ffffff & load32(t); | |
188 r->v[3] = 0x3ffffff & (load32(t + 3) >> 2); | |
189 r->v[5] = 0x3ffffff & (load32(t + 6) >> 4); | |
190 r->v[7] = 0x3ffffff & (load32(t + 9) >> 6); | |
191 r->v[9] = load32(t + 13); | |
192 } | |
193 else | |
194 r->v[1] = r->v[3] = r->v[5] = r->v[7] = r->v[9] = 0; | |
195 } | |
196 | |
197 static const fe1305x2 zero __attribute__ ((aligned (16))); | |
198 | |
199 struct poly1305_state_st { | |
200 unsigned char data[sizeof(fe1305x2[5]) + 128]; | |
201 unsigned char buf[32]; | |
202 unsigned int buf_used; | |
203 unsigned char key[16]; | |
204 }; | |
205 | |
206 void CRYPTO_poly1305_init_neon(poly1305_state *state, | |
207 const unsigned char key[32]) | |
208 { | |
209 struct poly1305_state_st *st = (struct poly1305_state_st*) (state); | |
210 fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); | |
211 fe1305x2 *const h = r + 1; | |
212 fe1305x2 *const c = h + 1; | |
213 fe1305x2 *const precomp = c + 1; | |
214 unsigned int j; | |
215 | |
216 r->v[1] = r->v[0] = 0x3ffffff & *(uint32_t *) key; | |
217 r->v[3] = r->v[2] = 0x3ffff03 & ((*(uint32_t *) (key + 3)) >> 2); | |
218 r->v[5] = r->v[4] = 0x3ffc0ff & ((*(uint32_t *) (key + 6)) >> 4); | |
219 r->v[7] = r->v[6] = 0x3f03fff & ((*(uint32_t *) (key + 9)) >> 6); | |
220 r->v[9] = r->v[8] = 0x00fffff & ((*(uint32_t *) (key + 12)) >> 8); | |
221 | |
222 for (j = 0; j < 10; j++) | |
223 h->v[j] = 0; /* XXX: should fast-forward a bit */ | |
224 | |
225 addmulmod(precomp,r,r,&zero); /* precompute r^2 */ | |
226 addmulmod(precomp + 1,precomp,precomp,&zero); /* precompute r^4 */ | |
227 | |
228 memcpy(st->key, key + 16, 16); | |
229 st->buf_used = 0; | |
230 } | |
231 | |
232 void CRYPTO_poly1305_update_neon(poly1305_state *state, const unsigned char *in, | |
233 size_t in_len) | |
234 { | |
235 struct poly1305_state_st *st = (struct poly1305_state_st*) (state); | |
236 fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); | |
237 fe1305x2 *const h = r + 1; | |
238 fe1305x2 *const c = h + 1; | |
239 fe1305x2 *const precomp = c + 1; | |
240 unsigned int i; | |
241 | |
242 if (st->buf_used) | |
243 { | |
244 unsigned int todo = 32 - st->buf_used; | |
245 if (todo > in_len) | |
246 todo = in_len; | |
247 for (i = 0; i < todo; i++) | |
248 st->buf[st->buf_used + i] = in[i]; | |
249 st->buf_used += todo; | |
250 in_len -= todo; | |
251 in += todo; | |
252 | |
253 if (st->buf_used == sizeof(st->buf) && in_len) | |
254 { | |
255 addmulmod(h,h,precomp,&zero); | |
256 fe1305x2_frombytearray(c, st->buf, sizeof(st->buf)); | |
257 for (i = 0; i < 10; i++) | |
258 h->v[i] += c->v[i]; | |
259 st->buf_used = 0; | |
260 } | |
261 } | |
262 | |
263 while (in_len > 32) | |
264 { | |
265 unsigned int tlen = 1048576; | |
266 if (in_len < tlen) | |
267 tlen = in_len; | |
268 tlen -= blocks(h, precomp, in, tlen); | |
269 in_len -= tlen; | |
270 in += tlen; | |
271 } | |
272 | |
273 if (in_len) | |
274 { | |
275 for (i = 0; i < in_len; i++) | |
276 st->buf[i] = in[i]; | |
277 st->buf_used = in_len; | |
278 } | |
279 } | |
280 | |
281 void CRYPTO_poly1305_finish_neon(poly1305_state* state, unsigned char mac[16]) | |
282 { | |
283 struct poly1305_state_st *st = (struct poly1305_state_st*) (state); | |
284 fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); | |
285 fe1305x2 *const h = r + 1; | |
286 fe1305x2 *const c = h + 1; | |
287 fe1305x2 *const precomp = c + 1; | |
288 | |
289 addmulmod(h,h,precomp,&zero); | |
290 | |
291 if (st->buf_used > 16) | |
292 { | |
293 fe1305x2_frombytearray(c, st->buf, st->buf_used); | |
294 precomp->v[1] = r->v[1]; | |
295 precomp->v[3] = r->v[3]; | |
296 precomp->v[5] = r->v[5]; | |
297 precomp->v[7] = r->v[7]; | |
298 precomp->v[9] = r->v[9]; | |
299 addmulmod(h,h,precomp,c); | |
300 } | |
301 else if (st->buf_used > 0) | |
302 { | |
303 fe1305x2_frombytearray(c, st->buf, st->buf_used); | |
304 r->v[1] = 1; | |
305 r->v[3] = 0; | |
306 r->v[5] = 0; | |
307 r->v[7] = 0; | |
308 r->v[9] = 0; | |
309 addmulmod(h,h,r,c); | |
310 } | |
311 | |
312 h->v[0] += h->v[1]; | |
313 h->v[2] += h->v[3]; | |
314 h->v[4] += h->v[5]; | |
315 h->v[6] += h->v[7]; | |
316 h->v[8] += h->v[9]; | |
317 freeze(h); | |
318 | |
319 fe1305x2_frombytearray(c, st->key, 16); | |
320 c->v[8] ^= (1 << 24); | |
321 | |
322 h->v[0] += c->v[0]; | |
323 h->v[2] += c->v[2]; | |
324 h->v[4] += c->v[4]; | |
325 h->v[6] += c->v[6]; | |
326 h->v[8] += c->v[8]; | |
327 fe1305x2_tobytearray(mac, h); | |
328 } | |
329 | |
330 #endif /* !OPENSSL_NO_POLY1305 */ | |
OLD | NEW |