OLD | NEW |
| (Empty) |
1 diff -burN android-openssl-lhash2/openssl.config android-openssl/openssl.config | |
2 --- android-openssl-lhash2/openssl.config 2013-11-05 14:38:31.187575574 -0
500 | |
3 +++ android-openssl/openssl.config 2013-11-05 15:03:54.661551145 -0500 | |
4 @@ -432,6 +432,7 @@ | |
5 crypto/buffer/buf_err.c \ | |
6 crypto/buffer/buf_str.c \ | |
7 crypto/buffer/buffer.c \ | |
8 +crypto/chacha/chacha_enc.c \ | |
9 crypto/cmac/cm_ameth.c \ | |
10 crypto/cmac/cm_pmeth.c \ | |
11 crypto/cmac/cmac.c \ | |
12 @@ -565,6 +566,7 @@ | |
13 crypto/evp/e_aes.c \ | |
14 crypto/evp/e_aes_cbc_hmac_sha1.c \ | |
15 crypto/evp/e_bf.c \ | |
16 +crypto/evp/e_chacha20poly1305.c \ | |
17 crypto/evp/e_des.c \ | |
18 crypto/evp/e_des3.c \ | |
19 crypto/evp/e_null.c \ | |
20 @@ -576,6 +578,7 @@ | |
21 crypto/evp/e_xcbc_d.c \ | |
22 crypto/evp/encode.c \ | |
23 crypto/evp/evp_acnf.c \ | |
24 +crypto/evp/evp_aead.c \ | |
25 crypto/evp/evp_cnf.c \ | |
26 crypto/evp/evp_enc.c \ | |
27 crypto/evp/evp_err.c \ | |
28 @@ -674,6 +677,7 @@ | |
29 crypto/pkcs7/pk7_smime.c \ | |
30 crypto/pkcs7/pkcs7err.c \ | |
31 crypto/pqueue/pqueue.c \ | |
32 +crypto/poly1305/poly1305.c \ | |
33 crypto/rand/md_rand.c \ | |
34 crypto/rand/rand_egd.c \ | |
35 crypto/rand/rand_err.c \ | |
36 @@ -789,7 +793,10 @@ | |
37 crypto/aes/asm/aes-armv4.S \ | |
38 crypto/bn/asm/armv4-gf2m.S \ | |
39 crypto/bn/asm/armv4-mont.S \ | |
40 +crypto/chacha/chacha_vec_arm.S \ | |
41 crypto/modes/asm/ghash-armv4.S \ | |
42 +crypto/poly1305/poly1305_arm.c \ | |
43 +crypto/poly1305/poly1305_arm_asm.S \ | |
44 crypto/sha/asm/sha1-armv4-large.S \ | |
45 crypto/sha/asm/sha256-armv4.S \ | |
46 crypto/sha/asm/sha512-armv4.S \ | |
47 @@ -852,6 +863,7 @@ | |
48 crypto/bn/asm/x86_64-gf2m.S \ | |
49 crypto/bn/asm/x86_64-mont.S \ | |
50 crypto/bn/asm/x86_64-mont5.S \ | |
51 +crypto/chacha/chacha_vec.c \ | |
52 crypto/md5/asm/md5-x86_64.S \ | |
53 crypto/modes/asm/ghash-x86_64.S \ | |
54 crypto/rc4/asm/rc4-md5-x86_64.S \ | |
55 @@ -859,6 +871,7 @@ | |
56 crypto/sha/asm/sha1-x86_64.S \ | |
57 crypto/sha/asm/sha256-x86_64.S \ | |
58 crypto/sha/asm/sha512-x86_64.S \ | |
59 +crypto/poly1305/poly1305_vec.c \ | |
60 crypto/x86_64cpuid.S \ | |
61 " | |
62 | |
63 @@ -866,7 +879,9 @@ | |
64 crypto/aes/aes_cbc.c \ | |
65 crypto/aes/aes_core.c \ | |
66 crypto/bn/bn_asm.c \ | |
67 +crypto/chacha/chacha_enc.c \ | |
68 crypto/mem_clr.c \ | |
69 +crypto/poly1305/poly1305.c \ | |
70 crypto/rc4/rc4_enc.c \ | |
71 crypto/rc4/rc4_skey.c \ | |
72 " | |
73 @@ -998,6 +1013,12 @@ | |
74 x509_hash_name_algorithm_change.patch \ | |
75 reduce_client_hello_size.patch \ | |
76 fix_lhash_iteration.patch \ | |
77 +tls1_change_cipher_state_rewrite.patch \ | |
78 +aead_support.patch \ | |
79 +aead_ssl_support.patch \ | |
80 +use_aead_for_aes_gcm.patch \ | |
81 +chacha20poly1305.patch \ | |
82 +neon_runtime.patch \ | |
83 " | |
84 | |
85 OPENSSL_PATCHES_progs_SOURCES="\ | |
86 diff -burN android-openssl-lhash2/patches/aead_ssl_support.patch android-openssl
/patches/aead_ssl_support.patch | |
87 --- android-openssl-lhash2/patches/aead_ssl_support.patch 1969-12-31 19:00
:00.000000000 -0500 | |
88 +++ android-openssl/patches/aead_ssl_support.patch 2013-11-05 14:14:34.6312
83497 -0500 | |
89 @@ -0,0 +1,690 @@ | |
90 +From dc8386dbb390f4b867019873cd072a5fe01ba4e9 Mon Sep 17 00:00:00 2001 | |
91 +From: Adam Langley <agl@chromium.org> | |
92 +Date: Thu, 25 Jul 2013 17:35:23 -0400 | |
93 +Subject: [PATCH 41/50] aead_ssl_support. | |
94 + | |
95 +This change allows AEADs to be used in ssl/ to implement SSL/TLS | |
96 +ciphersuites. | |
97 +--- | |
98 + ssl/s2_clnt.c | 2 +- | |
99 + ssl/s2_enc.c | 2 +- | |
100 + ssl/s2_srvr.c | 2 +- | |
101 + ssl/s3_enc.c | 8 +- | |
102 + ssl/s3_pkt.c | 4 +- | |
103 + ssl/ssl.h | 15 +++- | |
104 + ssl/ssl3.h | 1 + | |
105 + ssl/ssl_ciph.c | 70 +++++++++++---- | |
106 + ssl/ssl_err.c | 3 + | |
107 + ssl/ssl_lib.c | 12 +++ | |
108 + ssl/ssl_locl.h | 23 ++++- | |
109 + ssl/ssl_txt.c | 2 +- | |
110 + ssl/t1_enc.c | 262 +++++++++++++++++++++++++++++++++++++++++++++++++++------ | |
111 + 13 files changed, 356 insertions(+), 50 deletions(-) | |
112 + | |
113 +diff --git a/ssl/s2_clnt.c b/ssl/s2_clnt.c | |
114 +index 03b6cf9..32adaf5 100644 | |
115 +--- a/ssl/s2_clnt.c | |
116 ++++ b/ssl/s2_clnt.c | |
117 +@@ -623,7 +623,7 @@ static int client_master_key(SSL *s) | |
118 + if (s->state == SSL2_ST_SEND_CLIENT_MASTER_KEY_A) | |
119 + { | |
120 + | |
121 +- if (!ssl_cipher_get_evp(s->session,&c,&md,NULL,NULL,NULL)) | |
122 ++ if (!ssl_cipher_get_evp(s->session,&c,&md,NULL,NULL)) | |
123 + { | |
124 + ssl2_return_error(s,SSL2_PE_NO_CIPHER); | |
125 + SSLerr(SSL_F_CLIENT_MASTER_KEY,SSL_R_PROBLEMS_MAPPING_CI
PHER_FUNCTIONS); | |
126 +diff --git a/ssl/s2_enc.c b/ssl/s2_enc.c | |
127 +index ff3395f..087c4a2 100644 | |
128 +--- a/ssl/s2_enc.c | |
129 ++++ b/ssl/s2_enc.c | |
130 +@@ -68,7 +68,7 @@ int ssl2_enc_init(SSL *s, int client) | |
131 + const EVP_MD *md; | |
132 + int num; | |
133 + | |
134 +- if (!ssl_cipher_get_evp(s->session,&c,&md,NULL,NULL,NULL)) | |
135 ++ if (!ssl_cipher_get_evp(s->session,&c,&md,NULL,NULL)) | |
136 + { | |
137 + ssl2_return_error(s,SSL2_PE_NO_CIPHER); | |
138 + SSLerr(SSL_F_SSL2_ENC_INIT,SSL_R_PROBLEMS_MAPPING_CIPHER_FUNCTIO
NS); | |
139 +diff --git a/ssl/s2_srvr.c b/ssl/s2_srvr.c | |
140 +index 9b1a6ac..9392921 100644 | |
141 +--- a/ssl/s2_srvr.c | |
142 ++++ b/ssl/s2_srvr.c | |
143 +@@ -452,7 +452,7 @@ static int get_client_master_key(SSL *s) | |
144 + | |
145 + is_export=SSL_C_IS_EXPORT(s->session->cipher); | |
146 + | |
147 +- if (!ssl_cipher_get_evp(s->session,&c,&md,NULL,NULL,NULL)) | |
148 ++ if (!ssl_cipher_get_evp(s->session,&c,&md,NULL,NULL)) | |
149 + { | |
150 + ssl2_return_error(s,SSL2_PE_NO_CIPHER); | |
151 + SSLerr(SSL_F_GET_CLIENT_MASTER_KEY,SSL_R_PROBLEMS_MAPPING_CIPHER
_FUNCTIONS); | |
152 +diff --git a/ssl/s3_enc.c b/ssl/s3_enc.c | |
153 +index e3cd4f0..191b86b 100644 | |
154 +--- a/ssl/s3_enc.c | |
155 ++++ b/ssl/s3_enc.c | |
156 +@@ -397,7 +397,13 @@ int ssl3_setup_key_block(SSL *s) | |
157 + if (s->s3->tmp.key_block_length != 0) | |
158 + return(1); | |
159 + | |
160 +- if (!ssl_cipher_get_evp(s->session,&c,&hash,NULL,NULL,&comp)) | |
161 ++ if (!ssl_cipher_get_comp(s->session, &comp)) | |
162 ++ { | |
163 ++ SSLerr(SSL_F_SSL3_SETUP_KEY_BLOCK,SSL_R_CIPHER_OR_HASH_UNAVAILAB
LE); | |
164 ++ return(0); | |
165 ++ } | |
166 ++ | |
167 ++ if (!ssl_cipher_get_evp(s->session,&c,&hash,NULL,NULL)) | |
168 + { | |
169 + SSLerr(SSL_F_SSL3_SETUP_KEY_BLOCK,SSL_R_CIPHER_OR_HASH_UNAVAILAB
LE); | |
170 + return(0); | |
171 +diff --git a/ssl/s3_pkt.c b/ssl/s3_pkt.c | |
172 +index 33bb78a..5038f6c 100644 | |
173 +--- a/ssl/s3_pkt.c | |
174 ++++ b/ssl/s3_pkt.c | |
175 +@@ -790,7 +790,9 @@ static int do_ssl3_write(SSL *s, int type, const unsigned c
har *buf, | |
176 + else | |
177 + eivlen = 0; | |
178 + } | |
179 +- else | |
180 ++ else if (s->aead_write_ctx != NULL) | |
181 ++ eivlen = s->aead_write_ctx->variable_nonce_len; | |
182 ++ else | |
183 + eivlen = 0; | |
184 + | |
185 + /* lets setup the record stuff. */ | |
186 +diff --git a/ssl/ssl.h b/ssl/ssl.h | |
187 +index 672f3eb..0644cbf 100644 | |
188 +--- a/ssl/ssl.h | |
189 ++++ b/ssl/ssl.h | |
190 +@@ -406,7 +406,9 @@ struct ssl_cipher_st | |
191 + unsigned long algorithm_ssl; /* (major) protocol version */ | |
192 + | |
193 + unsigned long algo_strength; /* strength and export flags */ | |
194 +- unsigned long algorithm2; /* Extra flags */ | |
195 ++ unsigned long algorithm2; /* Extra flags. See SSL2_CF_* in ssl2.h | |
196 ++ and algorithm2 section in | |
197 ++ ssl_locl.h */ | |
198 + int strength_bits; /* Number of bits really used */ | |
199 + int alg_bits; /* Number of bits for algorithm */ | |
200 + }; | |
201 +@@ -748,6 +750,9 @@ int SRP_generate_client_master_secret(SSL *s,unsigned char
*master_key); | |
202 + | |
203 + #endif | |
204 + | |
205 ++struct ssl_aead_ctx_st; | |
206 ++typedef struct ssl_aead_ctx_st SSL_AEAD_CTX; | |
207 ++ | |
208 + #if defined(OPENSSL_SYS_MSDOS) && !defined(OPENSSL_SYS_WIN32) | |
209 + #define SSL_MAX_CERT_LIST_DEFAULT 1024*30 /* 30k max cert list :-) */ | |
210 + #else | |
211 +@@ -1294,6 +1299,9 @@ struct ssl_st | |
212 + /* These are the ones being used, the ones in SSL_SESSION are | |
213 + * the ones to be 'copied' into these ones */ | |
214 + int mac_flags; | |
215 ++ SSL_AEAD_CTX *aead_read_ctx; /* AEAD context. If non-NULL, then | |
216 ++ |enc_read_ctx| and |read_hash| are | |
217 ++ ignored. */ | |
218 + EVP_CIPHER_CTX *enc_read_ctx; /* cryptographic state */ | |
219 + EVP_MD_CTX *read_hash; /* used for mac generation */ | |
220 + #ifndef OPENSSL_NO_COMP | |
221 +@@ -1302,6 +1310,9 @@ struct ssl_st | |
222 + char *expand; | |
223 + #endif | |
224 + | |
225 ++ SSL_AEAD_CTX *aead_write_ctx; /* AEAD context. If non-NULL, then | |
226 ++ |enc_write_ctx| and |write_hash| are | |
227 ++ ignored. */ | |
228 + EVP_CIPHER_CTX *enc_write_ctx; /* cryptographic state */ | |
229 + EVP_MD_CTX *write_hash; /* used for mac generation */ | |
230 + #ifndef OPENSSL_NO_COMP | |
231 +@@ -2437,8 +2448,10 @@ void ERR_load_SSL_strings(void); | |
232 + #define SSL_F_SSL_USE_RSAPRIVATEKEY_FILE 206 | |
233 + #define SSL_F_SSL_VERIFY_CERT_CHAIN 207 | |
234 + #define SSL_F_SSL_WRITE 208 | |
235 ++#define SSL_F_TLS1_AEAD_CTX_INIT 339 | |
236 + #define SSL_F_TLS1_CERT_VERIFY_MAC 286 | |
237 + #define SSL_F_TLS1_CHANGE_CIPHER_STATE 209 | |
238 ++#define SSL_F_TLS1_CHANGE_CIPHER_STATE_AEAD 340 | |
239 + #define SSL_F_TLS1_CHANGE_CIPHER_STATE_CIPHER 338 | |
240 + #define SSL_F_TLS1_CHECK_SERVERHELLO_TLSEXT 274 | |
241 + #define SSL_F_TLS1_ENC 210 | |
242 +diff --git a/ssl/ssl3.h b/ssl/ssl3.h | |
243 +index a4f6d4a..6a5cdbe 100644 | |
244 +--- a/ssl/ssl3.h | |
245 ++++ b/ssl/ssl3.h | |
246 +@@ -517,6 +517,7 @@ typedef struct ssl3_state_st | |
247 + unsigned char *key_block; | |
248 + | |
249 + const EVP_CIPHER *new_sym_enc; | |
250 ++ const EVP_AEAD *new_aead; | |
251 + const EVP_MD *new_hash; | |
252 + int new_mac_pkey_type; | |
253 + int new_mac_secret_size; | |
254 +diff --git a/ssl/ssl_ciph.c b/ssl/ssl_ciph.c | |
255 +index 2966ddf..7e780cd 100644 | |
256 +--- a/ssl/ssl_ciph.c | |
257 ++++ b/ssl/ssl_ciph.c | |
258 +@@ -484,32 +484,66 @@ static void load_builtin_compressions(void) | |
259 + } | |
260 + #endif | |
261 + | |
262 ++/* ssl_cipher_get_comp sets |comp| to the correct SSL_COMP for the given | |
263 ++ * session and returns 1. On error it returns 0. */ | |
264 ++int ssl_cipher_get_comp(const SSL_SESSION *s, SSL_COMP **comp) | |
265 ++ { | |
266 ++ int i; | |
267 ++ | |
268 ++ SSL_COMP ctmp; | |
269 ++#ifndef OPENSSL_NO_COMP | |
270 ++ load_builtin_compressions(); | |
271 ++#endif | |
272 ++ | |
273 ++ *comp=NULL; | |
274 ++ ctmp.id=s->compress_meth; | |
275 ++ if (ssl_comp_methods != NULL) | |
276 ++ { | |
277 ++ i=sk_SSL_COMP_find(ssl_comp_methods,&ctmp); | |
278 ++ if (i >= 0) | |
279 ++ *comp=sk_SSL_COMP_value(ssl_comp_methods,i); | |
280 ++ else | |
281 ++ *comp=NULL; | |
282 ++ } | |
283 ++ | |
284 ++ return 1; | |
285 ++ } | |
286 ++ | |
287 ++/* ssl_cipher_get_evp_aead sets |*aead| to point to the correct EVP_AEAD objec
t | |
288 ++ * for |s->cipher|. It returns 1 on success and 0 on error. */ | |
289 ++int ssl_cipher_get_evp_aead(const SSL_SESSION *s, const EVP_AEAD **aead) | |
290 ++ { | |
291 ++ const SSL_CIPHER *c = s->cipher; | |
292 ++ | |
293 ++ *aead = NULL; | |
294 ++ | |
295 ++ if (c == NULL) | |
296 ++ return 0; | |
297 ++ if ((c->algorithm2 & SSL_CIPHER_ALGORITHM2_AEAD) == 0) | |
298 ++ return 0; | |
299 ++ | |
300 ++#ifndef OPENSSL_NO_AES | |
301 ++ /* There is only one AEAD for now. */ | |
302 ++ *aead = EVP_aead_aes_128_gcm(); | |
303 ++ return 1; | |
304 ++#endif | |
305 ++ | |
306 ++ return 0; | |
307 ++ } | |
308 ++ | |
309 + int ssl_cipher_get_evp(const SSL_SESSION *s, const EVP_CIPHER **enc, | |
310 +- const EVP_MD **md, int *mac_pkey_type, int *mac_secret_size,SSL_COM
P **comp) | |
311 ++ const EVP_MD **md, int *mac_pkey_type, int *mac_secret_size) | |
312 + { | |
313 + int i; | |
314 + const SSL_CIPHER *c; | |
315 + | |
316 + c=s->cipher; | |
317 + if (c == NULL) return(0); | |
318 +- if (comp != NULL) | |
319 +- { | |
320 +- SSL_COMP ctmp; | |
321 +-#ifndef OPENSSL_NO_COMP | |
322 +- load_builtin_compressions(); | |
323 +-#endif | |
324 + | |
325 +- *comp=NULL; | |
326 +- ctmp.id=s->compress_meth; | |
327 +- if (ssl_comp_methods != NULL) | |
328 +- { | |
329 +- i=sk_SSL_COMP_find(ssl_comp_methods,&ctmp); | |
330 +- if (i >= 0) | |
331 +- *comp=sk_SSL_COMP_value(ssl_comp_methods,i); | |
332 +- else | |
333 +- *comp=NULL; | |
334 +- } | |
335 +- } | |
336 ++ /* This function doesn't deal with EVP_AEAD. See | |
337 ++ * |ssl_cipher_get_aead_evp|. */ | |
338 ++ if (c->algorithm2 & SSL_CIPHER_ALGORITHM2_AEAD) | |
339 ++ return(0); | |
340 + | |
341 + if ((enc == NULL) || (md == NULL)) return(0); | |
342 + | |
343 +diff --git a/ssl/ssl_err.c b/ssl/ssl_err.c | |
344 +index 97b2a0d..ad3a7b9 100644 | |
345 +--- a/ssl/ssl_err.c | |
346 ++++ b/ssl/ssl_err.c | |
347 +@@ -280,6 +280,9 @@ static ERR_STRING_DATA SSL_str_functs[]= | |
348 + {ERR_FUNC(SSL_F_SSL_VERIFY_CERT_CHAIN), "SSL_VERIFY_CERT_CHAIN"}, | |
349 + {ERR_FUNC(SSL_F_SSL_WRITE), "SSL_write"}, | |
350 + {ERR_FUNC(SSL_F_TLS1_CERT_VERIFY_MAC), "tls1_cert_verify_mac"}, | |
351 ++{ERR_FUNC(SSL_F_TLS1_AEAD_CTX_INIT), "TLS1_AEAD_CTX_INIT"}, | |
352 ++{ERR_FUNC(SSL_F_TLS1_CHANGE_CIPHER_STATE), "tls1_change_cipher_state"}, | |
353 ++{ERR_FUNC(SSL_F_TLS1_CHANGE_CIPHER_STATE_AEAD), "TLS1_CHANGE_CIPHER_STAT
E_AEAD"}, | |
354 + {ERR_FUNC(SSL_F_TLS1_CHANGE_CIPHER_STATE_CIPHER), "TLS1_CHANGE_CIPHER_STAT
E_CIPHER"}, | |
355 + {ERR_FUNC(SSL_F_TLS1_CHECK_SERVERHELLO_TLSEXT), "TLS1_CHECK_SERVERHELLO_
TLSEXT"}, | |
356 + {ERR_FUNC(SSL_F_TLS1_ENC), "TLS1_ENC"}, | |
357 +diff --git a/ssl/ssl_lib.c b/ssl/ssl_lib.c | |
358 +index 3b264b6..8a0150c 100644 | |
359 +--- a/ssl/ssl_lib.c | |
360 ++++ b/ssl/ssl_lib.c | |
361 +@@ -2881,6 +2881,18 @@ void ssl_clear_cipher_ctx(SSL *s) | |
362 + OPENSSL_free(s->enc_write_ctx); | |
363 + s->enc_write_ctx=NULL; | |
364 + } | |
365 ++ if (s->aead_read_ctx != NULL) | |
366 ++ { | |
367 ++ EVP_AEAD_CTX_cleanup(&s->aead_read_ctx->ctx); | |
368 ++ OPENSSL_free(s->aead_read_ctx); | |
369 ++ s->aead_read_ctx = NULL; | |
370 ++ } | |
371 ++ if (s->aead_write_ctx != NULL) | |
372 ++ { | |
373 ++ EVP_AEAD_CTX_cleanup(&s->aead_write_ctx->ctx); | |
374 ++ OPENSSL_free(s->aead_write_ctx); | |
375 ++ s->aead_write_ctx = NULL; | |
376 ++ } | |
377 + #ifndef OPENSSL_NO_COMP | |
378 + if (s->expand != NULL) | |
379 + { | |
380 +diff --git a/ssl/ssl_locl.h b/ssl/ssl_locl.h | |
381 +index 3d800af..63bc28b 100644 | |
382 +--- a/ssl/ssl_locl.h | |
383 ++++ b/ssl/ssl_locl.h | |
384 +@@ -380,6 +380,14 @@ | |
385 + | |
386 + #define TLSEXT_CHANNEL_ID_SIZE 128 | |
387 + | |
388 ++/* SSL_CIPHER_ALGORITHM2_AEAD is a flag in SSL_CIPHER.algorithm2 which | |
389 ++ * indicates that the cipher is implemented via an EVP_AEAD. */ | |
390 ++#define SSL_CIPHER_ALGORITHM2_AEAD (1<<23) | |
391 ++ | |
392 ++/* SSL_CIPHER_AEAD_FIXED_NONCE_LEN returns the number of bytes of fixed nonce | |
393 ++ * for an SSL_CIPHER* with the SSL_CIPHER_ALGORITHM2_AEAD flag. */ | |
394 ++#define SSL_CIPHER_AEAD_FIXED_NONCE_LEN(ssl_cipher) \ | |
395 ++ (((ssl_cipher->algorithm2 >> 24) & 0xf)*2) | |
396 + | |
397 + /* | |
398 + * Export and cipher strength information. For each cipher we have to decide | |
399 +@@ -588,6 +596,17 @@ typedef struct ssl3_enc_method | |
400 + int use_context); | |
401 + } SSL3_ENC_METHOD; | |
402 + | |
403 ++/* ssl_aead_ctx_st contains information about an AEAD that is being used to | |
404 ++ * encrypt an SSL connection. */ | |
405 ++struct ssl_aead_ctx_st | |
406 ++ { | |
407 ++ EVP_AEAD_CTX ctx; | |
408 ++ /* fixed_nonce contains any bytes of the nonce that are fixed for all | |
409 ++ * records. */ | |
410 ++ unsigned char fixed_nonce[8]; | |
411 ++ unsigned char fixed_nonce_len, variable_nonce_len, tag_len; | |
412 ++ }; | |
413 ++ | |
414 + #ifndef OPENSSL_NO_COMP | |
415 + /* Used for holding the relevant compression methods loaded into SSL_CTX */ | |
416 + typedef struct ssl3_comp_st | |
417 +@@ -834,8 +853,10 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_list(const SSL_MET
HOD *meth, | |
418 + STACK_OF(SSL_CIPHER) **sorted, | |
419 + const char *rule_str); | |
420 + void ssl_update_cache(SSL *s, int mode); | |
421 ++int ssl_cipher_get_comp(const SSL_SESSION *s, SSL_COMP **comp); | |
422 ++int ssl_cipher_get_evp_aead(const SSL_SESSION *s, const EVP_AEAD **aead); | |
423 + int ssl_cipher_get_evp(const SSL_SESSION *s,const EVP_CIPHER **enc, | |
424 +- const EVP_MD **md,int *mac_pkey_type,int *mac_secret_size
, SSL_COMP **comp); | |
425 ++ const EVP_MD **md,int *mac_pkey_type,int *mac_secret_size
); | |
426 + int ssl_get_handshake_digest(int i,long *mask,const EVP_MD **md);
| |
427 + int ssl_verify_cert_chain(SSL *s,STACK_OF(X509) *sk); | |
428 + int ssl_undefined_function(SSL *s); | |
429 +diff --git a/ssl/ssl_txt.c b/ssl/ssl_txt.c | |
430 +index 6479d52..07826d5 100644 | |
431 +--- a/ssl/ssl_txt.c | |
432 ++++ b/ssl/ssl_txt.c | |
433 +@@ -216,7 +216,7 @@ int SSL_SESSION_print(BIO *bp, const SSL_SESSION *x) | |
434 + { | |
435 + SSL_COMP *comp = NULL; | |
436 + | |
437 +- ssl_cipher_get_evp(x,NULL,NULL,NULL,NULL,&comp); | |
438 ++ ssl_cipher_get_comp(x, &comp); | |
439 + if (comp == NULL) | |
440 + { | |
441 + if (BIO_printf(bp,"\n Compression: %d",x->compress_me
th) <= 0) goto err; | |
442 +diff --git a/ssl/t1_enc.c b/ssl/t1_enc.c | |
443 +index e1f91ba..7af1a32 100644 | |
444 +--- a/ssl/t1_enc.c | |
445 ++++ b/ssl/t1_enc.c | |
446 +@@ -316,6 +316,66 @@ static int tls1_generate_key_block(SSL *s, unsigned char *
km, | |
447 + return ret; | |
448 + } | |
449 + | |
450 ++/* tls1_aead_ctx_init allocates |*aead_ctx|, if needed and returns 1. It | |
451 ++ * returns 0 on malloc error. */ | |
452 ++static int tls1_aead_ctx_init(SSL_AEAD_CTX **aead_ctx) | |
453 ++ { | |
454 ++ if (*aead_ctx != NULL) | |
455 ++ EVP_AEAD_CTX_cleanup(&(*aead_ctx)->ctx); | |
456 ++ else | |
457 ++ { | |
458 ++ *aead_ctx = (SSL_AEAD_CTX*) OPENSSL_malloc(sizeof(SSL_AEAD_CTX))
; | |
459 ++ if (*aead_ctx == NULL) | |
460 ++ { | |
461 ++ SSLerr(SSL_F_TLS1_AEAD_CTX_INIT, ERR_R_MALLOC_FAILURE); | |
462 ++ return 0; | |
463 ++ } | |
464 ++ } | |
465 ++ | |
466 ++ return 1; | |
467 ++ } | |
468 ++ | |
469 ++static int tls1_change_cipher_state_aead(SSL *s, char is_read, | |
470 ++ const unsigned char *key, unsigned key_len, | |
471 ++ const unsigned char *iv, unsigned iv_len) | |
472 ++ { | |
473 ++ const EVP_AEAD *aead = s->s3->tmp.new_aead; | |
474 ++ SSL_AEAD_CTX *aead_ctx; | |
475 ++ | |
476 ++ if (is_read) | |
477 ++ { | |
478 ++ if (!tls1_aead_ctx_init(&s->aead_read_ctx)) | |
479 ++ return 0; | |
480 ++ aead_ctx = s->aead_read_ctx; | |
481 ++ } | |
482 ++ else | |
483 ++ { | |
484 ++ if (!tls1_aead_ctx_init(&s->aead_write_ctx)) | |
485 ++ return 0; | |
486 ++ aead_ctx = s->aead_write_ctx; | |
487 ++ } | |
488 ++ | |
489 ++ if (!EVP_AEAD_CTX_init(&aead_ctx->ctx, aead, key, key_len, | |
490 ++ EVP_AEAD_DEFAULT_TAG_LENGTH, NULL /* engine */)) | |
491 ++ return 0; | |
492 ++ if (iv_len > sizeof(aead_ctx->fixed_nonce)) | |
493 ++ { | |
494 ++ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE_AEAD, ERR_R_INTERNAL_ERROR
); | |
495 ++ return 0; | |
496 ++ } | |
497 ++ memcpy(aead_ctx->fixed_nonce, iv, iv_len); | |
498 ++ aead_ctx->fixed_nonce_len = iv_len; | |
499 ++ aead_ctx->variable_nonce_len = 8; /* always the case, currently. */ | |
500 ++ if (aead_ctx->variable_nonce_len + aead_ctx->fixed_nonce_len != EVP_AEAD
_nonce_length(aead)) | |
501 ++ { | |
502 ++ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE_AEAD, ERR_R_INTERNAL_ERROR
); | |
503 ++ return 0; | |
504 ++ } | |
505 ++ aead_ctx->tag_len = EVP_AEAD_max_overhead(aead); | |
506 ++ | |
507 ++ return 1; | |
508 ++ } | |
509 ++ | |
510 + /* tls1_change_cipher_state_cipher performs the work needed to switch cipher | |
511 + * states when using EVP_CIPHER. The argument |is_read| is true iff this | |
512 + * function is being called due to reading, as opposed to writing, a | |
513 +@@ -494,6 +554,7 @@ int tls1_change_cipher_state(SSL *s, int which) | |
514 + const unsigned char *client_write_key, *server_write_key, *key; | |
515 + const unsigned char *client_write_iv, *server_write_iv, *iv; | |
516 + const EVP_CIPHER *cipher = s->s3->tmp.new_sym_enc; | |
517 ++ const EVP_AEAD *aead = s->s3->tmp.new_aead; | |
518 + unsigned key_len, iv_len, mac_secret_len; | |
519 + const unsigned char *key_data; | |
520 + const char is_export = SSL_C_IS_EXPORT(s->s3->tmp.new_cipher) != 0; | |
521 +@@ -551,14 +612,22 @@ int tls1_change_cipher_state(SSL *s, int which) | |
522 + | |
523 + mac_secret_len = s->s3->tmp.new_mac_secret_size; | |
524 + | |
525 +- key_len = EVP_CIPHER_key_length(cipher); | |
526 +- if (is_export && key_len > SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher)
) | |
527 +- key_len = SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher); | |
528 +- | |
529 +- if (EVP_CIPHER_mode(cipher) == EVP_CIPH_GCM_MODE) | |
530 +- iv_len = EVP_GCM_TLS_FIXED_IV_LEN; | |
531 ++ if (aead != NULL) | |
532 ++ { | |
533 ++ key_len = EVP_AEAD_key_length(aead); | |
534 ++ iv_len = SSL_CIPHER_AEAD_FIXED_NONCE_LEN(s->s3->tmp.new_cipher); | |
535 ++ } | |
536 + else | |
537 +- iv_len = EVP_CIPHER_iv_length(cipher); | |
538 ++ { | |
539 ++ key_len = EVP_CIPHER_key_length(cipher); | |
540 ++ if (is_export && key_len > SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new
_cipher)) | |
541 ++ key_len = SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher); | |
542 ++ | |
543 ++ if (EVP_CIPHER_mode(cipher) == EVP_CIPH_GCM_MODE) | |
544 ++ iv_len = EVP_GCM_TLS_FIXED_IV_LEN; | |
545 ++ else | |
546 ++ iv_len = EVP_CIPHER_iv_length(cipher); | |
547 ++ } | |
548 + | |
549 + key_data = s->s3->tmp.key_block; | |
550 + client_write_mac_secret = key_data; key_data += mac_secret_len; | |
551 +@@ -587,12 +656,20 @@ int tls1_change_cipher_state(SSL *s, int which) | |
552 + return 0; | |
553 + } | |
554 + | |
555 +- if (!tls1_change_cipher_state_cipher(s, is_read, use_client_keys, | |
556 +- mac_secret, mac_secret_len, | |
557 +- key, key_len, | |
558 +- iv, iv_len)) { | |
559 +- return 0; | |
560 +- } | |
561 ++ if (aead != NULL) | |
562 ++ { | |
563 ++ if (!tls1_change_cipher_state_aead(s, is_read, | |
564 ++ key, key_len, iv, iv_len)) | |
565 ++ return 0; | |
566 ++ } | |
567 ++ else | |
568 ++ { | |
569 ++ if (!tls1_change_cipher_state_cipher(s, is_read, use_client_keys
, | |
570 ++ mac_secret, mac_secret_len, | |
571 ++ key, key_len, | |
572 ++ iv, iv_len)) | |
573 ++ return 0; | |
574 ++ } | |
575 + | |
576 + return 1; | |
577 + err: | |
578 +@@ -603,13 +680,14 @@ err: | |
579 + int tls1_setup_key_block(SSL *s) | |
580 + { | |
581 + unsigned char *p1,*p2=NULL; | |
582 +- const EVP_CIPHER *c; | |
583 +- const EVP_MD *hash; | |
584 ++ const EVP_CIPHER *c = NULL; | |
585 ++ const EVP_MD *hash = NULL; | |
586 ++ const EVP_AEAD *aead = NULL; | |
587 + int num; | |
588 + SSL_COMP *comp; | |
589 + int mac_type= NID_undef,mac_secret_size=0; | |
590 + int ret=0; | |
591 +- int iv_len; | |
592 ++ unsigned key_len, iv_len; | |
593 + | |
594 + #ifdef KSSL_DEBUG | |
595 + printf ("tls1_setup_key_block()\n"); | |
596 +@@ -618,22 +696,36 @@ int tls1_setup_key_block(SSL *s) | |
597 + if (s->s3->tmp.key_block_length != 0) | |
598 + return(1); | |
599 + | |
600 +- if (!ssl_cipher_get_evp(s->session,&c,&hash,&mac_type,&mac_secret_size,&
comp)) | |
601 ++ if (!ssl_cipher_get_comp(s->session, &comp)) | |
602 ++ goto cipher_unavailable_err; | |
603 ++ | |
604 ++ if (s->session->cipher && | |
605 ++ (s->session->cipher->algorithm2 & SSL_CIPHER_ALGORITHM2_AEAD)) | |
606 + { | |
607 +- SSLerr(SSL_F_TLS1_SETUP_KEY_BLOCK,SSL_R_CIPHER_OR_HASH_UNAVAILAB
LE); | |
608 +- return(0); | |
609 ++ if (!ssl_cipher_get_evp_aead(s->session, &aead)) | |
610 ++ goto cipher_unavailable_err; | |
611 ++ key_len = EVP_AEAD_key_length(aead); | |
612 ++ iv_len = SSL_CIPHER_AEAD_FIXED_NONCE_LEN(s->session->cipher); | |
613 + } | |
614 +- | |
615 +- if (EVP_CIPHER_mode(c) == EVP_CIPH_GCM_MODE) | |
616 +- iv_len = EVP_GCM_TLS_FIXED_IV_LEN; | |
617 + else | |
618 +- iv_len = EVP_CIPHER_iv_length(c); | |
619 ++ { | |
620 ++ if (!ssl_cipher_get_evp(s->session,&c,&hash,&mac_type,&mac_secre
t_size)) | |
621 ++ goto cipher_unavailable_err; | |
622 ++ key_len = EVP_CIPHER_key_length(c); | |
623 + | |
624 ++ if (EVP_CIPHER_mode(c) == EVP_CIPH_GCM_MODE) | |
625 ++ iv_len = EVP_GCM_TLS_FIXED_IV_LEN; | |
626 ++ else | |
627 ++ iv_len = EVP_CIPHER_iv_length(c); | |
628 ++ } | |
629 ++ | |
630 ++ s->s3->tmp.new_aead=aead; | |
631 + s->s3->tmp.new_sym_enc=c; | |
632 + s->s3->tmp.new_hash=hash; | |
633 + s->s3->tmp.new_mac_pkey_type = mac_type; | |
634 + s->s3->tmp.new_mac_secret_size = mac_secret_size; | |
635 +- num=EVP_CIPHER_key_length(c)+mac_secret_size+iv_len; | |
636 ++ | |
637 ++ num=key_len+mac_secret_size+iv_len; | |
638 + num*=2; | |
639 + | |
640 + ssl3_cleanup_key_block(s); | |
641 +@@ -696,6 +788,10 @@ err: | |
642 + OPENSSL_free(p2); | |
643 + } | |
644 + return(ret); | |
645 ++ | |
646 ++cipher_unavailable_err: | |
647 ++ SSLerr(SSL_F_TLS1_SETUP_KEY_BLOCK,SSL_R_CIPHER_OR_HASH_UNAVAILABLE); | |
648 ++ return 0; | |
649 + } | |
650 + | |
651 + /* tls1_enc encrypts/decrypts the record in |s->wrec| / |s->rrec|, respectivel
y. | |
652 +@@ -714,6 +810,124 @@ int tls1_enc(SSL *s, int send) | |
653 + unsigned long l; | |
654 + int bs,i,j,k,pad=0,ret,mac_size=0; | |
655 + const EVP_CIPHER *enc; | |
656 ++ const SSL_AEAD_CTX *aead; | |
657 ++ | |
658 ++ if (send) | |
659 ++ rec = &s->s3->wrec; | |
660 ++ else | |
661 ++ rec = &s->s3->rrec; | |
662 ++ | |
663 ++ if (send) | |
664 ++ aead = s->aead_write_ctx; | |
665 ++ else | |
666 ++ aead = s->aead_read_ctx; | |
667 ++ | |
668 ++ if (aead) | |
669 ++ { | |
670 ++ unsigned char ad[13], *seq, *in, *out, nonce[16]; | |
671 ++ unsigned nonce_used; | |
672 ++ ssize_t n; | |
673 ++ | |
674 ++ seq = send ? s->s3->write_sequence : s->s3->read_sequence; | |
675 ++ | |
676 ++ if (s->version == DTLS1_VERSION || s->version == DTLS1_BAD_VER) | |
677 ++ { | |
678 ++ unsigned char dtlsseq[9], *p = dtlsseq; | |
679 ++ | |
680 ++ s2n(send ? s->d1->w_epoch : s->d1->r_epoch, p); | |
681 ++ memcpy(p, &seq[2], 6); | |
682 ++ memcpy(ad, dtlsseq, 8); | |
683 ++ } | |
684 ++ else | |
685 ++ { | |
686 ++ memcpy(ad, seq, 8); | |
687 ++ for (i=7; i>=0; i--) /* increment */ | |
688 ++ { | |
689 ++ ++seq[i]; | |
690 ++ if (seq[i] != 0) | |
691 ++ break; | |
692 ++ } | |
693 ++ } | |
694 ++ | |
695 ++ ad[8] = rec->type; | |
696 ++ ad[9] = (unsigned char)(s->version>>8); | |
697 ++ ad[10] = (unsigned char)(s->version); | |
698 ++ | |
699 ++ if (aead->fixed_nonce_len + aead->variable_nonce_len > sizeof(no
nce) || | |
700 ++ aead->variable_nonce_len > 8) | |
701 ++ return -1; /* internal error - should never happen. */ | |
702 ++ | |
703 ++ memcpy(nonce, aead->fixed_nonce, aead->fixed_nonce_len); | |
704 ++ nonce_used = aead->fixed_nonce_len; | |
705 ++ | |
706 ++ if (send) | |
707 ++ { | |
708 ++ size_t len = rec->length; | |
709 ++ in = rec->input; | |
710 ++ out = rec->data; | |
711 ++ | |
712 ++ /* When sending we use the sequence number as the | |
713 ++ * variable part of the nonce. */ | |
714 ++ if (aead->variable_nonce_len > 8) | |
715 ++ return -1; | |
716 ++ memcpy(nonce + nonce_used, ad, aead->variable_nonce_len)
; | |
717 ++ nonce_used += aead->variable_nonce_len; | |
718 ++ | |
719 ++ /* in do_ssl3_write, rec->input is moved forward by | |
720 ++ * variable_nonce_len in order to leave space for the | |
721 ++ * variable nonce. Thus we can copy the sequence number | |
722 ++ * bytes into place without overwriting any of the | |
723 ++ * plaintext. */ | |
724 ++ memcpy(out, ad, aead->variable_nonce_len); | |
725 ++ len -= aead->variable_nonce_len; | |
726 ++ | |
727 ++ ad[11] = len >> 8; | |
728 ++ ad[12] = len & 0xff; | |
729 ++ | |
730 ++ n = EVP_AEAD_CTX_seal(&aead->ctx, | |
731 ++ out + aead->variable_nonce_len, le
n + aead->tag_len, | |
732 ++ nonce, nonce_used, | |
733 ++ in + aead->variable_nonce_len, len
, | |
734 ++ ad, sizeof(ad)); | |
735 ++ if (n >= 0) | |
736 ++ n += aead->variable_nonce_len; | |
737 ++ } | |
738 ++ else | |
739 ++ { | |
740 ++ /* receive */ | |
741 ++ size_t len = rec->length; | |
742 ++ | |
743 ++ if (rec->data != rec->input) | |
744 ++ return -1; /* internal error - should never hap
pen. */ | |
745 ++ out = in = rec->input; | |
746 ++ | |
747 ++ if (len < aead->variable_nonce_len) | |
748 ++ return 0; | |
749 ++ memcpy(nonce + nonce_used, in, aead->variable_nonce_len)
; | |
750 ++ nonce_used += aead->variable_nonce_len; | |
751 ++ | |
752 ++ in += aead->variable_nonce_len; | |
753 ++ len -= aead->variable_nonce_len; | |
754 ++ out += aead->variable_nonce_len; | |
755 ++ | |
756 ++ if (len < aead->tag_len) | |
757 ++ return 0; | |
758 ++ len -= aead->tag_len; | |
759 ++ | |
760 ++ ad[11] = len >> 8; | |
761 ++ ad[12] = len & 0xff; | |
762 ++ | |
763 ++ n = EVP_AEAD_CTX_open(&aead->ctx, out, len, nonce, nonce
_used, | |
764 ++ in, len + aead->tag_len, ad, sizeo
f(ad)); | |
765 ++ | |
766 ++ rec->data = rec->input = out; | |
767 ++ } | |
768 ++ | |
769 ++ if (n == -1) | |
770 ++ return -1; | |
771 ++ rec->length = n; | |
772 ++ return 1; | |
773 ++ } | |
774 + | |
775 + if (send) | |
776 + { | |
777 +-- | |
778 +1.8.4.1 | |
779 + | |
780 diff -burN android-openssl-lhash2/patches/aead_support.patch android-openssl/pat
ches/aead_support.patch | |
781 --- android-openssl-lhash2/patches/aead_support.patch 1969-12-31 19:00:00.0000
00000 -0500 | |
782 +++ android-openssl/patches/aead_support.patch 2013-11-05 14:14:34.631283497 -0
500 | |
783 @@ -0,0 +1,811 @@ | |
784 +From 98f0c6e114f55b4451bea824b05ab29db3351f12 Mon Sep 17 00:00:00 2001 | |
785 +From: Adam Langley <agl@chromium.org> | |
786 +Date: Thu, 25 Jul 2013 16:52:35 -0400 | |
787 +Subject: [PATCH 40/50] aead_support | |
788 + | |
789 +This change adds an AEAD interface to EVP and an AES-GCM implementation | |
790 +suitable for use in TLS. | |
791 +--- | |
792 + crypto/evp/Makefile | 4 +- | |
793 + crypto/evp/e_aes.c | 214 +++++++++++++++++++++++++++++++++++---- | |
794 + crypto/evp/evp.h | 111 ++++++++++++++++++++ | |
795 + crypto/evp/evp_aead.c | 192 +++++++++++++++++++++++++++++++++++ | |
796 + crypto/evp/evp_err.c | 8 ++ | |
797 + crypto/evp/evp_locl.h | 24 +++++ | |
798 + doc/crypto/EVP_AEAD_CTX_init.pod | 96 ++++++++++++++++++ | |
799 + 7 files changed, 626 insertions(+), 23 deletions(-) | |
800 + create mode 100644 crypto/evp/evp_aead.c | |
801 + create mode 100644 doc/crypto/EVP_AEAD_CTX_init.pod | |
802 + | |
803 +diff --git a/crypto/evp/Makefile b/crypto/evp/Makefile | |
804 +index 1e46ceb..b73038d 100644 | |
805 +--- a/crypto/evp/Makefile | |
806 ++++ b/crypto/evp/Makefile | |
807 +@@ -29,7 +29,7 @@ LIBSRC= encode.c digest.c evp_enc.c evp_key.c evp_acnf.c evp_
cnf.c \ | |
808 + c_all.c c_allc.c c_alld.c evp_lib.c bio_ok.c \ | |
809 + evp_pkey.c evp_pbe.c p5_crpt.c p5_crpt2.c \ | |
810 + e_old.c pmeth_lib.c pmeth_fn.c pmeth_gn.c m_sigver.c evp_fips.c \ | |
811 +- e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c | |
812 ++ e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c evp_aead.c | |
813 + | |
814 + LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \ | |
815 + e_des.o e_bf.o e_idea.o e_des3.o e_camellia.o\ | |
816 +@@ -42,7 +42,7 @@ LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o
evp_cnf.o \ | |
817 + c_all.o c_allc.o c_alld.o evp_lib.o bio_ok.o \ | |
818 + evp_pkey.o evp_pbe.o p5_crpt.o p5_crpt2.o \ | |
819 + e_old.o pmeth_lib.o pmeth_fn.o pmeth_gn.o m_sigver.o evp_fips.o \ | |
820 +- e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o | |
821 ++ e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o evp_aead.o | |
822 + | |
823 + SRC= $(LIBSRC) | |
824 + | |
825 +diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c | |
826 +index ef44f63..e4485e4 100644 | |
827 +--- a/crypto/evp/e_aes.c | |
828 ++++ b/crypto/evp/e_aes.c | |
829 +@@ -814,44 +814,45 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int
arg, void *ptr) | |
830 + } | |
831 + } | |
832 + | |
833 +-static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, | |
834 +- const unsigned char *iv, int enc) | |
835 ++static ctr128_f aes_gcm_set_key(AES_KEY *aes_key, GCM128_CONTEXT *gcm_ctx, | |
836 ++ const unsigned char *key, size_t key_len) | |
837 + { | |
838 +- EVP_AES_GCM_CTX *gctx = ctx->cipher_data; | |
839 +- if (!iv && !key) | |
840 +- return 1; | |
841 +- if (key) | |
842 +- { do { | |
843 + #ifdef BSAES_CAPABLE | |
844 + if (BSAES_CAPABLE) | |
845 + { | |
846 +- AES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks); | |
847 +- CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks, | |
848 ++ AES_set_encrypt_key(key,key_len*8,aes_key); | |
849 ++ CRYPTO_gcm128_init(gcm_ctx,aes_key, | |
850 + (block128_f)AES_encrypt); | |
851 +- gctx->ctr = (ctr128_f)bsaes_ctr32_encrypt_blocks; | |
852 +- break; | |
853 ++ return (ctr128_f)bsaes_ctr32_encrypt_blocks; | |
854 + } | |
855 +- else | |
856 + #endif | |
857 + #ifdef VPAES_CAPABLE | |
858 + if (VPAES_CAPABLE) | |
859 + { | |
860 +- vpaes_set_encrypt_key(key,ctx->key_len*8,&gctx->ks); | |
861 +- CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks, | |
862 ++ vpaes_set_encrypt_key(key,key_len*8,aes_key); | |
863 ++ CRYPTO_gcm128_init(gcm_ctx,aes_key, | |
864 + (block128_f)vpaes_encrypt); | |
865 +- gctx->ctr = NULL; | |
866 +- break; | |
867 ++ return NULL; | |
868 + } | |
869 + #endif | |
870 +- AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); | |
871 +- CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f)AES_encryp
t); | |
872 ++ AES_set_encrypt_key(key, key_len*8, aes_key); | |
873 ++ CRYPTO_gcm128_init(gcm_ctx, aes_key, (block128_f)AES_encrypt); | |
874 + #ifdef AES_CTR_ASM | |
875 +- gctx->ctr = (ctr128_f)AES_ctr32_encrypt; | |
876 ++ return (ctr128_f)AES_ctr32_encrypt; | |
877 + #else | |
878 +- gctx->ctr = NULL; | |
879 ++ return NULL; | |
880 + #endif | |
881 +- } while (0); | |
882 ++ } | |
883 + | |
884 ++static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, | |
885 ++ const unsigned char *iv, int enc) | |
886 ++ { | |
887 ++ EVP_AES_GCM_CTX *gctx = ctx->cipher_data; | |
888 ++ if (!iv && !key) | |
889 ++ return 1; | |
890 ++ if (key) | |
891 ++ { | |
892 ++ gctx->ctr = aes_gcm_set_key(&gctx->ks, &gctx->gcm, key, ctx->key
_len); | |
893 + /* If we have an iv can set it directly, otherwise use | |
894 + * saved IV. | |
895 + */ | |
896 +@@ -1310,5 +1311,176 @@ BLOCK_CIPHER_custom(NID_aes,128,1,12,ccm,CCM,EVP_CIPH_F
LAG_FIPS|CUSTOM_FLAGS) | |
897 + BLOCK_CIPHER_custom(NID_aes,192,1,12,ccm,CCM,EVP_CIPH_FLAG_FIPS|CUSTOM_FLAGS) | |
898 + BLOCK_CIPHER_custom(NID_aes,256,1,12,ccm,CCM,EVP_CIPH_FLAG_FIPS|CUSTOM_FLAGS) | |
899 + | |
900 ++#define EVP_AEAD_AES_128_GCM_TAG_LEN 16 | |
901 ++ | |
902 ++struct aead_aes_128_gcm_ctx { | |
903 ++ union { double align; AES_KEY ks; } ks; | |
904 ++ GCM128_CONTEXT gcm; | |
905 ++ ctr128_f ctr; | |
906 ++ unsigned char tag_len; | |
907 ++}; | |
908 ++ | |
909 ++static int aead_aes_128_gcm_init(EVP_AEAD_CTX *ctx, | |
910 ++ const unsigned char *key, size_t key_len, size_t tag_len) | |
911 ++ { | |
912 ++ struct aead_aes_128_gcm_ctx *gcm_ctx; | |
913 ++ | |
914 ++ if (key_len*8 != 128) | |
915 ++ { | |
916 ++ EVPerr(EVP_F_AEAD_AES_128_GCM_INIT, EVP_R_BAD_KEY_LENGTH); | |
917 ++ return 0; /* EVP_AEAD_CTX_init should catch this. */ | |
918 ++ } | |
919 ++ | |
920 ++ if (tag_len == EVP_AEAD_DEFAULT_TAG_LENGTH) | |
921 ++ tag_len = EVP_AEAD_AES_128_GCM_TAG_LEN; | |
922 ++ | |
923 ++ if (tag_len > EVP_AEAD_AES_128_GCM_TAG_LEN) | |
924 ++ { | |
925 ++ EVPerr(EVP_F_AEAD_AES_128_GCM_INIT, EVP_R_TAG_TOO_LARGE); | |
926 ++ return 0; | |
927 ++ } | |
928 ++ | |
929 ++ gcm_ctx = OPENSSL_malloc(sizeof(struct aead_aes_128_gcm_ctx)); | |
930 ++ if (gcm_ctx == NULL) | |
931 ++ return 0; | |
932 ++ | |
933 ++#ifdef AESNI_CAPABLE | |
934 ++ if (AESNI_CAPABLE) | |
935 ++ { | |
936 ++ aesni_set_encrypt_key(key, key_len * 8, &gcm_ctx->ks.ks); | |
937 ++ CRYPTO_gcm128_init(&gcm_ctx->gcm, &gcm_ctx->ks.ks, | |
938 ++ (block128_f)aesni_encrypt); | |
939 ++ gcm_ctx->ctr = (ctr128_f) aesni_ctr32_encrypt_blocks; | |
940 ++ } | |
941 ++ else | |
942 ++#endif | |
943 ++ { | |
944 ++ gcm_ctx->ctr = aes_gcm_set_key(&gcm_ctx->ks.ks, &gcm_ctx->gcm, | |
945 ++ key, key_len); | |
946 ++ } | |
947 ++ gcm_ctx->tag_len = tag_len; | |
948 ++ ctx->aead_state = gcm_ctx; | |
949 ++ | |
950 ++ return 1; | |
951 ++ } | |
952 ++ | |
953 ++static void aead_aes_128_gcm_cleanup(EVP_AEAD_CTX *ctx) | |
954 ++ { | |
955 ++ struct aead_aes_128_gcm_ctx *gcm_ctx = ctx->aead_state; | |
956 ++ OPENSSL_free(gcm_ctx); | |
957 ++ } | |
958 ++ | |
959 ++static ssize_t aead_aes_128_gcm_seal(const EVP_AEAD_CTX *ctx, | |
960 ++ unsigned char *out, size_t max_out_len, | |
961 ++ const unsigned char *nonce, size_t nonce_len, | |
962 ++ const unsigned char *in, size_t in_len, | |
963 ++ const unsigned char *ad, size_t ad_len) | |
964 ++ { | |
965 ++ size_t bulk = 0; | |
966 ++ const struct aead_aes_128_gcm_ctx *gcm_ctx = ctx->aead_state; | |
967 ++ GCM128_CONTEXT gcm; | |
968 ++ | |
969 ++ if (max_out_len < in_len + gcm_ctx->tag_len) | |
970 ++ { | |
971 ++ EVPerr(EVP_F_AEAD_AES_128_GCM_SEAL, EVP_R_BUFFER_TOO_SMALL); | |
972 ++ return -1; | |
973 ++ } | |
974 ++ | |
975 ++ memcpy(&gcm, &gcm_ctx->gcm, sizeof(gcm)); | |
976 ++ CRYPTO_gcm128_setiv(&gcm, nonce, nonce_len); | |
977 ++ | |
978 ++ if (ad_len > 0 && CRYPTO_gcm128_aad(&gcm, ad, ad_len)) | |
979 ++ return -1; | |
980 ++ | |
981 ++ if (gcm_ctx->ctr) | |
982 ++ { | |
983 ++ if (CRYPTO_gcm128_encrypt_ctr32(&gcm, in + bulk, out + bulk, | |
984 ++ in_len - bulk, gcm_ctx->ctr)) | |
985 ++ return -1; | |
986 ++ } | |
987 ++ else | |
988 ++ { | |
989 ++ if (CRYPTO_gcm128_encrypt(&gcm, in + bulk, out + bulk, | |
990 ++ in_len - bulk)) | |
991 ++ return -1; | |
992 ++ } | |
993 ++ | |
994 ++ CRYPTO_gcm128_tag(&gcm, out + in_len, gcm_ctx->tag_len); | |
995 ++ return in_len + gcm_ctx->tag_len; | |
996 ++ } | |
997 ++ | |
998 ++static ssize_t aead_aes_128_gcm_open(const EVP_AEAD_CTX *ctx, | |
999 ++ unsigned char *out, size_t max_out_len, | |
1000 ++ const unsigned char *nonce, size_t nonce_len, | |
1001 ++ const unsigned char *in, size_t in_len, | |
1002 ++ const unsigned char *ad, size_t ad_len) | |
1003 ++ { | |
1004 ++ size_t bulk = 0; | |
1005 ++ const struct aead_aes_128_gcm_ctx *gcm_ctx = ctx->aead_state; | |
1006 ++ unsigned char tag[EVP_AEAD_AES_128_GCM_TAG_LEN]; | |
1007 ++ size_t out_len; | |
1008 ++ GCM128_CONTEXT gcm; | |
1009 ++ | |
1010 ++ if (in_len < gcm_ctx->tag_len) | |
1011 ++ { | |
1012 ++ EVPerr(EVP_F_AEAD_AES_128_GCM_OPEN, EVP_R_BAD_DECRYPT); | |
1013 ++ return -1; | |
1014 ++ } | |
1015 ++ | |
1016 ++ out_len = in_len - gcm_ctx->tag_len; | |
1017 ++ | |
1018 ++ if (max_out_len < out_len) | |
1019 ++ { | |
1020 ++ EVPerr(EVP_F_AEAD_AES_128_GCM_OPEN, EVP_R_BUFFER_TOO_SMALL); | |
1021 ++ return -1; | |
1022 ++ } | |
1023 ++ | |
1024 ++ memcpy(&gcm, &gcm_ctx->gcm, sizeof(gcm)); | |
1025 ++ CRYPTO_gcm128_setiv(&gcm, nonce, nonce_len); | |
1026 ++ | |
1027 ++ if (CRYPTO_gcm128_aad(&gcm, ad, ad_len)) | |
1028 ++ return -1; | |
1029 ++ | |
1030 ++ if (gcm_ctx->ctr) | |
1031 ++ { | |
1032 ++ if (CRYPTO_gcm128_decrypt_ctr32(&gcm, in + bulk, out + bulk, | |
1033 ++ in_len-bulk-gcm_ctx->tag_len, | |
1034 ++ gcm_ctx->ctr)) | |
1035 ++ return -1; | |
1036 ++ } | |
1037 ++ else | |
1038 ++ { | |
1039 ++ if (CRYPTO_gcm128_decrypt(&gcm, in + bulk, out + bulk, | |
1040 ++ in_len - bulk - gcm_ctx->tag_len)) | |
1041 ++ return -1; | |
1042 ++ } | |
1043 ++ | |
1044 ++ CRYPTO_gcm128_tag(&gcm, tag, gcm_ctx->tag_len); | |
1045 ++ if (CRYPTO_memcmp(tag, in + out_len, gcm_ctx->tag_len) != 0) | |
1046 ++ { | |
1047 ++ EVPerr(EVP_F_AEAD_AES_128_GCM_OPEN, EVP_R_BAD_DECRYPT); | |
1048 ++ return -1; | |
1049 ++ } | |
1050 ++ | |
1051 ++ return out_len; | |
1052 ++ } | |
1053 ++ | |
1054 ++static const EVP_AEAD aead_aes_128_gcm = { | |
1055 ++ 16, /* key len */ | |
1056 ++ 12, /* nonce len */ | |
1057 ++ EVP_AEAD_AES_128_GCM_TAG_LEN, /* overhead */ | |
1058 ++ EVP_AEAD_AES_128_GCM_TAG_LEN, /* max tag length */ | |
1059 ++ | |
1060 ++ aead_aes_128_gcm_init, | |
1061 ++ aead_aes_128_gcm_cleanup, | |
1062 ++ aead_aes_128_gcm_seal, | |
1063 ++ aead_aes_128_gcm_open, | |
1064 ++}; | |
1065 ++ | |
1066 ++const EVP_AEAD *EVP_aead_aes_128_gcm() | |
1067 ++ { | |
1068 ++ return &aead_aes_128_gcm; | |
1069 ++ } | |
1070 ++ | |
1071 + #endif | |
1072 + #endif | |
1073 +diff --git a/crypto/evp/evp.h b/crypto/evp/evp.h | |
1074 +index 5f18d4b..bd10642 100644 | |
1075 +--- a/crypto/evp/evp.h | |
1076 ++++ b/crypto/evp/evp.h | |
1077 +@@ -1243,6 +1243,109 @@ void EVP_PKEY_meth_set_ctrl(EVP_PKEY_METHOD *pmeth, | |
1078 + int (*ctrl_str)(EVP_PKEY_CTX *ctx, | |
1079 + const char *type, const char *value)); | |
1080 + | |
1081 ++/* Authenticated Encryption with Additional Data. | |
1082 ++ * | |
1083 ++ * AEAD couples confidentiality and integrity in a single primtive. AEAD | |
1084 ++ * algorithms take a key and then can seal and open individual messages. Each | |
1085 ++ * message has a unique, per-message nonce and, optionally, additional data | |
1086 ++ * which is authenticated but not included in the output. */ | |
1087 ++ | |
1088 ++struct evp_aead_st; | |
1089 ++typedef struct evp_aead_st EVP_AEAD; | |
1090 ++ | |
1091 ++#ifndef OPENSSL_NO_AES | |
1092 ++/* EVP_aes_128_gcm is AES-128 in Galois Counter Mode. */ | |
1093 ++const EVP_AEAD *EVP_aead_aes_128_gcm(void); | |
1094 ++#endif | |
1095 ++ | |
1096 ++/* EVP_AEAD_key_length returns the length, in bytes, of the keys used by | |
1097 ++ * |aead|. */ | |
1098 ++size_t EVP_AEAD_key_length(const EVP_AEAD *aead); | |
1099 ++ | |
1100 ++/* EVP_AEAD_nonce_length returns the length, in bytes, of the per-message nonc
e | |
1101 ++ * for |aead|. */ | |
1102 ++size_t EVP_AEAD_nonce_length(const EVP_AEAD *aead); | |
1103 ++ | |
1104 ++/* EVP_AEAD_max_overhead returns the maximum number of additional bytes added | |
1105 ++ * by the act of sealing data with |aead|. */ | |
1106 ++size_t EVP_AEAD_max_overhead(const EVP_AEAD *aead); | |
1107 ++ | |
1108 ++/* EVP_AEAD_max_tag_len returns the maximum tag length when using |aead|. This | |
1109 ++ * is the largest value that can be passed as |tag_len| to | |
1110 ++ * |EVP_AEAD_CTX_init|. */ | |
1111 ++size_t EVP_AEAD_max_tag_len(const EVP_AEAD *aead); | |
1112 ++ | |
1113 ++/* An EVP_AEAD_CTX represents an AEAD algorithm configured with a specific key | |
1114 ++ * and message-independent IV. */ | |
1115 ++typedef struct evp_aead_ctx_st { | |
1116 ++ const EVP_AEAD *aead; | |
1117 ++ /* aead_state is an opaque pointer to whatever state the AEAD needs to | |
1118 ++ * maintain. */ | |
1119 ++ void *aead_state; | |
1120 ++} EVP_AEAD_CTX; | |
1121 ++ | |
1122 ++#define EVP_AEAD_DEFAULT_TAG_LENGTH 0 | |
1123 ++ | |
1124 ++/* EVP_AEAD_init initializes |ctx| for the given AEAD algorithm from |impl|. | |
1125 ++ * The |impl| argument may be NULL to choose the default implementation. | |
1126 ++ * Authentication tags may be truncated by passing a size as |tag_len|. A | |
1127 ++ * |tag_len| of zero indicates the default tag length and this is defined as | |
1128 ++ * EVP_AEAD_DEFAULT_TAG_LENGTH for readability. | |
1129 ++ * Returns 1 on success. Otherwise returns 0 and pushes to the error stack. */ | |
1130 ++int EVP_AEAD_CTX_init(EVP_AEAD_CTX *ctx, const EVP_AEAD *aead, | |
1131 ++ const unsigned char *key, size_t key_len, | |
1132 ++ size_t tag_len, ENGINE *impl); | |
1133 ++ | |
1134 ++/* EVP_AEAD_CTX_cleanup frees any data allocated by |ctx|. */ | |
1135 ++void EVP_AEAD_CTX_cleanup(EVP_AEAD_CTX *ctx); | |
1136 ++ | |
1137 ++/* EVP_AEAD_CTX_seal encrypts and authenticates |in_len| bytes from |in| and | |
1138 ++ * authenticates |ad_len| bytes from |ad| and writes the result to |out|, | |
1139 ++ * returning the number of bytes written, or -1 on error. | |
1140 ++ * | |
1141 ++ * This function may be called (with the same EVP_AEAD_CTX) concurrently with | |
1142 ++ * itself or EVP_AEAD_CTX_open. | |
1143 ++ * | |
1144 ++ * At most |max_out_len| bytes are written to |out| and, in order to ensure | |
1145 ++ * success, |max_out_len| should be |in_len| plus the result of | |
1146 ++ * EVP_AEAD_overhead. | |
1147 ++ * | |
1148 ++ * The length of |nonce|, |nonce_len|, must be equal to the result of | |
1149 ++ * EVP_AEAD_nonce_length for this AEAD. | |
1150 ++ * | |
1151 ++ * EVP_AEAD_CTX_seal never results in a partial output. If |max_out_len| is | |
1152 ++ * insufficient, -1 will be returned. | |
1153 ++ * | |
1154 ++ * If |in| and |out| alias then |out| must be <= |in|. */ | |
1155 ++ssize_t EVP_AEAD_CTX_seal(const EVP_AEAD_CTX *ctx, | |
1156 ++ unsigned char *out, size_t max_out_len, | |
1157 ++ const unsigned char *nonce, size_t nonce_len, | |
1158 ++ const unsigned char *in, size_t in_len, | |
1159 ++ const unsigned char *ad, size_t ad_len); | |
1160 ++ | |
1161 ++/* EVP_AEAD_CTX_open authenticates |in_len| bytes from |in| and |ad_len| bytes | |
1162 ++ * from |ad| and decrypts at most |in_len| bytes into |out|. It returns the | |
1163 ++ * number of bytes written, or -1 on error. | |
1164 ++ * | |
1165 ++ * This function may be called (with the same EVP_AEAD_CTX) concurrently with | |
1166 ++ * itself or EVP_AEAD_CTX_seal. | |
1167 ++ * | |
1168 ++ * At most |in_len| bytes are written to |out|. In order to ensure success, | |
1169 ++ * |max_out_len| should be at least |in_len|. | |
1170 ++ * | |
1171 ++ * The length of |nonce|, |nonce_len|, must be equal to the result of | |
1172 ++ * EVP_AEAD_nonce_length for this AEAD. | |
1173 ++ * | |
1174 ++ * EVP_AEAD_CTX_open never results in a partial output. If |max_out_len| is | |
1175 ++ * insufficient, -1 will be returned. | |
1176 ++ * | |
1177 ++ * If |in| and |out| alias then |out| must be <= |in|. */ | |
1178 ++ssize_t EVP_AEAD_CTX_open(const EVP_AEAD_CTX *ctx, | |
1179 ++ unsigned char *out, size_t max_out_len, | |
1180 ++ const unsigned char *nonce, size_t nonce_len, | |
1181 ++ const unsigned char *in, size_t in_len, | |
1182 ++ const unsigned char *ad, size_t ad_len); | |
1183 ++ | |
1184 + void EVP_add_alg_module(void); | |
1185 + | |
1186 + /* BEGIN ERROR CODES */ | |
1187 +@@ -1254,6 +1357,11 @@ void ERR_load_EVP_strings(void); | |
1188 + /* Error codes for the EVP functions. */ | |
1189 + | |
1190 + /* Function codes. */ | |
1191 ++#define EVP_F_AEAD_AES_128_GCM_INIT 183 | |
1192 ++#define EVP_F_AEAD_AES_128_GCM_OPEN 181 | |
1193 ++#define EVP_F_AEAD_AES_128_GCM_SEAL 182 | |
1194 ++#define EVP_F_AEAD_CTX_OPEN 185 | |
1195 ++#define EVP_F_AEAD_CTX_SEAL 186 | |
1196 + #define EVP_F_AESNI_INIT_KEY 165 | |
1197 + #define EVP_F_AESNI_XTS_CIPHER 176 | |
1198 + #define EVP_F_AES_INIT_KEY 133 | |
1199 +@@ -1268,6 +1376,7 @@ void ERR_load_EVP_strings(void); | |
1200 + #define EVP_F_DSA_PKEY2PKCS8 135 | |
1201 + #define EVP_F_ECDSA_PKEY2PKCS8 129 | |
1202 + #define EVP_F_ECKEY_PKEY2PKCS8 132 | |
1203 ++#define EVP_F_EVP_AEAD_CTX_INIT 180 | |
1204 + #define EVP_F_EVP_CIPHERINIT_EX 123 | |
1205 + #define EVP_F_EVP_CIPHER_CTX_COPY 163 | |
1206 + #define EVP_F_EVP_CIPHER_CTX_CTRL 124 | |
1207 +@@ -1383,10 +1492,12 @@ void ERR_load_EVP_strings(void); | |
1208 + #define EVP_R_NO_VERIFY_FUNCTION_CONFIGURED 105 | |
1209 + #define EVP_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE 150 | |
1210 + #define EVP_R_OPERATON_NOT_INITIALIZED 151 | |
1211 ++#define EVP_R_OUTPUT_ALIASES_INPUT 170 | |
1212 + #define EVP_R_PKCS8_UNKNOWN_BROKEN_TYPE 117 | |
1213 + #define EVP_R_PRIVATE_KEY_DECODE_ERROR 145 | |
1214 + #define EVP_R_PRIVATE_KEY_ENCODE_ERROR 146 | |
1215 + #define EVP_R_PUBLIC_KEY_NOT_RSA 106 | |
1216 ++#define EVP_R_TAG_TOO_LARGE 171 | |
1217 + #define EVP_R_TOO_LARGE 164 | |
1218 + #define EVP_R_UNKNOWN_CIPHER 160 | |
1219 + #define EVP_R_UNKNOWN_DIGEST 161 | |
1220 +diff --git a/crypto/evp/evp_aead.c b/crypto/evp/evp_aead.c | |
1221 +new file mode 100644 | |
1222 +index 0000000..91da561 | |
1223 +--- /dev/null | |
1224 ++++ b/crypto/evp/evp_aead.c | |
1225 +@@ -0,0 +1,192 @@ | |
1226 ++/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | |
1227 ++ * All rights reserved. | |
1228 ++ * | |
1229 ++ * This package is an SSL implementation written | |
1230 ++ * by Eric Young (eay@cryptsoft.com). | |
1231 ++ * The implementation was written so as to conform with Netscapes SSL. | |
1232 ++ * | |
1233 ++ * This library is free for commercial and non-commercial use as long as | |
1234 ++ * the following conditions are aheared to. The following conditions | |
1235 ++ * apply to all code found in this distribution, be it the RC4, RSA, | |
1236 ++ * lhash, DES, etc., code; not just the SSL code. The SSL documentation | |
1237 ++ * included with this distribution is covered by the same copyright terms | |
1238 ++ * except that the holder is Tim Hudson (tjh@cryptsoft.com). | |
1239 ++ * | |
1240 ++ * Copyright remains Eric Young's, and as such any Copyright notices in | |
1241 ++ * the code are not to be removed. | |
1242 ++ * If this package is used in a product, Eric Young should be given attributio
n | |
1243 ++ * as the author of the parts of the library used. | |
1244 ++ * This can be in the form of a textual message at program startup or | |
1245 ++ * in documentation (online or textual) provided with the package. | |
1246 ++ * | |
1247 ++ * Redistribution and use in source and binary forms, with or without | |
1248 ++ * modification, are permitted provided that the following conditions | |
1249 ++ * are met: | |
1250 ++ * 1. Redistributions of source code must retain the copyright | |
1251 ++ * notice, this list of conditions and the following disclaimer. | |
1252 ++ * 2. Redistributions in binary form must reproduce the above copyright | |
1253 ++ * notice, this list of conditions and the following disclaimer in the | |
1254 ++ * documentation and/or other materials provided with the distribution. | |
1255 ++ * 3. All advertising materials mentioning features or use of this software | |
1256 ++ * must display the following acknowledgement: | |
1257 ++ * "This product includes cryptographic software written by | |
1258 ++ * Eric Young (eay@cryptsoft.com)" | |
1259 ++ * The word 'cryptographic' can be left out if the rouines from the library | |
1260 ++ * being used are not cryptographic related :-). | |
1261 ++ * 4. If you include any Windows specific code (or a derivative thereof) from | |
1262 ++ * the apps directory (application code) you must include an acknowledgemen
t: | |
1263 ++ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com
)" | |
1264 ++ * | |
1265 ++ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND | |
1266 ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
1267 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
1268 ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |
1269 ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
1270 ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
1271 ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
1272 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
1273 ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
1274 ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
1275 ++ * SUCH DAMAGE. | |
1276 ++ * | |
1277 ++ * The licence and distribution terms for any publically available version or | |
1278 ++ * derivative of this code cannot be changed. i.e. this code cannot simply be | |
1279 ++ * copied and put under another distribution licence | |
1280 ++ * [including the GNU Public Licence.] | |
1281 ++ */ | |
1282 ++ | |
1283 ++#include <limits.h> | |
1284 ++#include <string.h> | |
1285 ++ | |
1286 ++#include <openssl/evp.h> | |
1287 ++#include <openssl/err.h> | |
1288 ++ | |
1289 ++#include "evp_locl.h" | |
1290 ++ | |
1291 ++size_t EVP_AEAD_key_length(const EVP_AEAD *aead) | |
1292 ++ { | |
1293 ++ return aead->key_len; | |
1294 ++ } | |
1295 ++ | |
1296 ++size_t EVP_AEAD_nonce_length(const EVP_AEAD *aead) | |
1297 ++ { | |
1298 ++ return aead->nonce_len; | |
1299 ++ } | |
1300 ++ | |
1301 ++size_t EVP_AEAD_max_overhead(const EVP_AEAD *aead) | |
1302 ++ { | |
1303 ++ return aead->overhead; | |
1304 ++ } | |
1305 ++ | |
1306 ++size_t EVP_AEAD_max_tag_len(const EVP_AEAD *aead) | |
1307 ++ { | |
1308 ++ return aead->max_tag_len; | |
1309 ++ } | |
1310 ++ | |
1311 ++int EVP_AEAD_CTX_init(EVP_AEAD_CTX *ctx, const EVP_AEAD *aead, | |
1312 ++ const unsigned char *key, size_t key_len, | |
1313 ++ size_t tag_len, ENGINE *impl) | |
1314 ++ { | |
1315 ++ ctx->aead = aead; | |
1316 ++ if (key_len != aead->key_len) | |
1317 ++ { | |
1318 ++ EVPerr(EVP_F_EVP_AEAD_CTX_INIT,EVP_R_UNSUPPORTED_KEY_SIZE); | |
1319 ++ return 0; | |
1320 ++ } | |
1321 ++ return aead->init(ctx, key, key_len, tag_len); | |
1322 ++ } | |
1323 ++ | |
1324 ++void EVP_AEAD_CTX_cleanup(EVP_AEAD_CTX *ctx) | |
1325 ++ { | |
1326 ++ if (ctx->aead == NULL) | |
1327 ++ return; | |
1328 ++ ctx->aead->cleanup(ctx); | |
1329 ++ ctx->aead = NULL; | |
1330 ++ } | |
1331 ++ | |
1332 ++/* check_alias returns 0 if |out| points within the buffer determined by |in| | |
1333 ++ * and |in_len| and 1 otherwise. | |
1334 ++ * | |
1335 ++ * When processing, there's only an issue if |out| points within in[:in_len] | |
1336 ++ * and isn't equal to |in|. If that's the case then writing the output will | |
1337 ++ * stomp input that hasn't been read yet. | |
1338 ++ * | |
1339 ++ * This function checks for that case. */ | |
1340 ++static int check_alias(const unsigned char *in, size_t in_len, | |
1341 ++ const unsigned char *out) | |
1342 ++ { | |
1343 ++ if (out <= in) | |
1344 ++ return 1; | |
1345 ++ if (in + in_len <= out) | |
1346 ++ return 1; | |
1347 ++ return 0; | |
1348 ++ } | |
1349 ++ | |
1350 ++ssize_t EVP_AEAD_CTX_seal(const EVP_AEAD_CTX *ctx, | |
1351 ++ unsigned char *out, size_t max_out_len, | |
1352 ++ const unsigned char *nonce, size_t nonce_len, | |
1353 ++ const unsigned char *in, size_t in_len, | |
1354 ++ const unsigned char *ad, size_t ad_len) | |
1355 ++ { | |
1356 ++ size_t possible_out_len = in_len + ctx->aead->overhead; | |
1357 ++ ssize_t r; | |
1358 ++ | |
1359 ++ if (possible_out_len < in_len /* overflow */ || | |
1360 ++ possible_out_len > SSIZE_MAX /* return value cannot be | |
1361 ++ represented */) | |
1362 ++ { | |
1363 ++ EVPerr(EVP_F_AEAD_CTX_SEAL, EVP_R_TOO_LARGE); | |
1364 ++ goto error; | |
1365 ++ } | |
1366 ++ | |
1367 ++ if (!check_alias(in, in_len, out)) | |
1368 ++ { | |
1369 ++ EVPerr(EVP_F_AEAD_CTX_SEAL, EVP_R_OUTPUT_ALIASES_INPUT); | |
1370 ++ goto error; | |
1371 ++ } | |
1372 ++ | |
1373 ++ r = ctx->aead->seal(ctx, out, max_out_len, nonce, nonce_len, | |
1374 ++ in, in_len, ad, ad_len); | |
1375 ++ if (r >= 0) | |
1376 ++ return r; | |
1377 ++ | |
1378 ++error: | |
1379 ++ /* In the event of an error, clear the output buffer so that a caller | |
1380 ++ * that doesn't check the return value doesn't send raw data. */ | |
1381 ++ memset(out, 0, max_out_len); | |
1382 ++ return -1; | |
1383 ++ } | |
1384 ++ | |
1385 ++ssize_t EVP_AEAD_CTX_open(const EVP_AEAD_CTX *ctx, | |
1386 ++ unsigned char *out, size_t max_out_len, | |
1387 ++ const unsigned char *nonce, size_t nonce_len, | |
1388 ++ const unsigned char *in, size_t in_len, | |
1389 ++ const unsigned char *ad, size_t ad_len) | |
1390 ++ { | |
1391 ++ ssize_t r; | |
1392 ++ | |
1393 ++ if (in_len > SSIZE_MAX) | |
1394 ++ { | |
1395 ++ EVPerr(EVP_F_AEAD_CTX_OPEN, EVP_R_TOO_LARGE); | |
1396 ++ goto error; /* may not be able to represent return value. */ | |
1397 ++ } | |
1398 ++ | |
1399 ++ if (!check_alias(in, in_len, out)) | |
1400 ++ { | |
1401 ++ EVPerr(EVP_F_AEAD_CTX_OPEN, EVP_R_OUTPUT_ALIASES_INPUT); | |
1402 ++ goto error; | |
1403 ++ } | |
1404 ++ | |
1405 ++ r = ctx->aead->open(ctx, out, max_out_len, nonce, nonce_len, | |
1406 ++ in, in_len, ad, ad_len); | |
1407 ++ | |
1408 ++ if (r >= 0) | |
1409 ++ return r; | |
1410 ++ | |
1411 ++error: | |
1412 ++ /* In the event of an error, clear the output buffer so that a caller | |
1413 ++ * that doesn't check the return value doesn't try and process bad | |
1414 ++ * data. */ | |
1415 ++ memset(out, 0, max_out_len); | |
1416 ++ return -1; | |
1417 ++ } | |
1418 +diff --git a/crypto/evp/evp_err.c b/crypto/evp/evp_err.c | |
1419 +index 08eab98..c47969c 100644 | |
1420 +--- a/crypto/evp/evp_err.c | |
1421 ++++ b/crypto/evp/evp_err.c | |
1422 +@@ -70,6 +70,11 @@ | |
1423 + | |
1424 + static ERR_STRING_DATA EVP_str_functs[]= | |
1425 + { | |
1426 ++{ERR_FUNC(EVP_F_AEAD_AES_128_GCM_INIT), "AEAD_AES_128_GCM_INIT"}, | |
1427 ++{ERR_FUNC(EVP_F_AEAD_AES_128_GCM_OPEN), "AEAD_AES_128_GCM_OPEN"}, | |
1428 ++{ERR_FUNC(EVP_F_AEAD_AES_128_GCM_SEAL), "AEAD_AES_128_GCM_SEAL"}, | |
1429 ++{ERR_FUNC(EVP_F_AEAD_CTX_OPEN), "AEAD_CTX_OPEN"}, | |
1430 ++{ERR_FUNC(EVP_F_AEAD_CTX_SEAL), "AEAD_CTX_SEAL"}, | |
1431 + {ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"}, | |
1432 + {ERR_FUNC(EVP_F_AESNI_XTS_CIPHER), "AESNI_XTS_CIPHER"}, | |
1433 + {ERR_FUNC(EVP_F_AES_INIT_KEY), "AES_INIT_KEY"}, | |
1434 +@@ -84,6 +89,7 @@ static ERR_STRING_DATA EVP_str_functs[]= | |
1435 + {ERR_FUNC(EVP_F_DSA_PKEY2PKCS8), "DSA_PKEY2PKCS8"}, | |
1436 + {ERR_FUNC(EVP_F_ECDSA_PKEY2PKCS8), "ECDSA_PKEY2PKCS8"}, | |
1437 + {ERR_FUNC(EVP_F_ECKEY_PKEY2PKCS8), "ECKEY_PKEY2PKCS8"}, | |
1438 ++{ERR_FUNC(EVP_F_EVP_AEAD_CTX_INIT), "EVP_AEAD_CTX_init"}, | |
1439 + {ERR_FUNC(EVP_F_EVP_CIPHERINIT_EX), "EVP_CipherInit_ex"}, | |
1440 + {ERR_FUNC(EVP_F_EVP_CIPHER_CTX_COPY), "EVP_CIPHER_CTX_copy"}, | |
1441 + {ERR_FUNC(EVP_F_EVP_CIPHER_CTX_CTRL), "EVP_CIPHER_CTX_ctrl"}, | |
1442 +@@ -202,10 +208,12 @@ static ERR_STRING_DATA EVP_str_reasons[]= | |
1443 + {ERR_REASON(EVP_R_NO_VERIFY_FUNCTION_CONFIGURED),"no verify function configure
d"}, | |
1444 + {ERR_REASON(EVP_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE),"operation not sup
ported for this keytype"}, | |
1445 + {ERR_REASON(EVP_R_OPERATON_NOT_INITIALIZED),"operaton not initialized"}, | |
1446 ++{ERR_REASON(EVP_R_OUTPUT_ALIASES_INPUT) ,"output aliases input"}, | |
1447 + {ERR_REASON(EVP_R_PKCS8_UNKNOWN_BROKEN_TYPE),"pkcs8 unknown broken type"}, | |
1448 + {ERR_REASON(EVP_R_PRIVATE_KEY_DECODE_ERROR),"private key decode error"}, | |
1449 + {ERR_REASON(EVP_R_PRIVATE_KEY_ENCODE_ERROR),"private key encode error"}, | |
1450 + {ERR_REASON(EVP_R_PUBLIC_KEY_NOT_RSA) ,"public key not rsa"}, | |
1451 ++{ERR_REASON(EVP_R_TAG_TOO_LARGE) ,"tag too large"}, | |
1452 + {ERR_REASON(EVP_R_TOO_LARGE) ,"too large"}, | |
1453 + {ERR_REASON(EVP_R_UNKNOWN_CIPHER) ,"unknown cipher"}, | |
1454 + {ERR_REASON(EVP_R_UNKNOWN_DIGEST) ,"unknown digest"}, | |
1455 +diff --git a/crypto/evp/evp_locl.h b/crypto/evp/evp_locl.h | |
1456 +index 08c0a66..c0f9fdf 100644 | |
1457 +--- a/crypto/evp/evp_locl.h | |
1458 ++++ b/crypto/evp/evp_locl.h | |
1459 +@@ -348,6 +348,30 @@ int PKCS5_v2_PBKDF2_keyivgen(EVP_CIPHER_CTX *ctx, const ch
ar *pass, int passlen, | |
1460 + ASN1_TYPE *param, | |
1461 + const EVP_CIPHER *c, const EVP_MD *md, int en_de); | |
1462 + | |
1463 ++/* EVP_AEAD represents a specific AEAD algorithm. */ | |
1464 ++struct evp_aead_st { | |
1465 ++ unsigned char key_len; | |
1466 ++ unsigned char nonce_len; | |
1467 ++ unsigned char overhead; | |
1468 ++ unsigned char max_tag_len; | |
1469 ++ | |
1470 ++ int (*init) (struct evp_aead_ctx_st*, const unsigned char *key, | |
1471 ++ size_t key_len, size_t tag_len); | |
1472 ++ void (*cleanup) (struct evp_aead_ctx_st*); | |
1473 ++ | |
1474 ++ ssize_t (*seal) (const struct evp_aead_ctx_st *ctx, | |
1475 ++ unsigned char *out, size_t max_out_len, | |
1476 ++ const unsigned char *nonce, size_t nonce_len, | |
1477 ++ const unsigned char *in, size_t in_len, | |
1478 ++ const unsigned char *ad, size_t ad_len); | |
1479 ++ | |
1480 ++ ssize_t (*open) (const struct evp_aead_ctx_st *ctx, | |
1481 ++ unsigned char *out, size_t max_out_len, | |
1482 ++ const unsigned char *nonce, size_t nonce_len, | |
1483 ++ const unsigned char *in, size_t in_len, | |
1484 ++ const unsigned char *ad, size_t ad_len); | |
1485 ++}; | |
1486 ++ | |
1487 + #ifdef OPENSSL_FIPS | |
1488 + | |
1489 + #ifdef OPENSSL_DOING_MAKEDEPEND | |
1490 +diff --git a/doc/crypto/EVP_AEAD_CTX_init.pod b/doc/crypto/EVP_AEAD_CTX_init.po
d | |
1491 +new file mode 100644 | |
1492 +index 0000000..20e455d | |
1493 +--- /dev/null | |
1494 ++++ b/doc/crypto/EVP_AEAD_CTX_init.pod | |
1495 +@@ -0,0 +1,96 @@ | |
1496 ++=pod | |
1497 ++ | |
1498 ++=head1 NAME | |
1499 ++ | |
1500 ++EVP_AEAD_CTX_init, EVP_AEAD_CTX_cleanup, EVP_AEAD_CTX_seal, EVP_AEAD_CTX_open
- authenticated encryption functions. | |
1501 ++ | |
1502 ++=head1 SYNOPSIS | |
1503 ++ | |
1504 ++ #include <openssl/evp.h> | |
1505 ++ | |
1506 ++ int EVP_AEAD_CTX_init(EVP_AEAD_CTX *ctx, const EVP_AEAD *aead, | |
1507 ++ const unsigned char *key, size_t key_len, | |
1508 ++ size_t tag_len, ENGINE *impl); | |
1509 ++ void EVP_AEAD_CTX_cleanup(EVP_AEAD_CTX *ctx); | |
1510 ++ ssize_t EVP_AEAD_CTX_seal(const EVP_AEAD_CTX *ctx, | |
1511 ++ unsigned char *out, size_t max_out_len, | |
1512 ++ const unsigned char *nonce, size_t nonce_len, | |
1513 ++ const unsigned char *in, size_t in_len, | |
1514 ++ const unsigned char *ad, size_t ad_len); | |
1515 ++ ssize_t EVP_AEAD_CTX_open(const EVP_AEAD_CTX *ctx, | |
1516 ++ unsigned char *out, size_t max_out_len, | |
1517 ++ const unsigned char *nonce, size_t nonce_len, | |
1518 ++ const unsigned char *in, size_t in_len, | |
1519 ++ const unsigned char *ad, size_t ad_len); | |
1520 ++ | |
1521 ++=head1 DESCRIPTION | |
1522 ++ | |
1523 ++The EVP_AEAD_CTX_init() function initialises an B<EVP_AEAD_CTX> structure and | |
1524 ++performs any precomputation needed to use B<aead> with B<key>. The length of | |
1525 ++the key, B<key_len>, is given in bytes. | |
1526 ++ | |
1527 ++The B<tag_len> argument contains the length of the tags, in bytes, and allows | |
1528 ++for the processing of truncated authenticators. A zero value indicates that th
e | |
1529 ++default tag length should be used and this is defined as | |
1530 ++C<EVP_AEAD_DEFAULT_TAG_LENGTH> in order to make the code clear. Using truncate
d | |
1531 ++tags increases an attacker's chance of creating a valid forgery. Be aware that | |
1532 ++the attacker's chance may increase more than exponentially as would naively be | |
1533 ++expected. | |
1534 ++ | |
1535 ++When no longer needed, the initialised B<EVP_AEAD_CTX> structure must be passe
d | |
1536 ++to EVP_AEAD_CTX_cleanup(), which will deallocate any memory used. | |
1537 ++ | |
1538 ++With an B<EVP_AEAD_CTX> in hand, one can seal and open messages. These | |
1539 ++operations are intended to meet the standard notions of privacy and | |
1540 ++authenticity for authenticated encryption. For formal definitions see I<Bellar
e | |
1541 ++and Namprempre>, "Authenticated encryption: relations among notions and | |
1542 ++analysis of the generic composition paradigm," Lecture Notes in Computer | |
1543 ++Science B<1976> (2000), 531–545, | |
1544 ++L<http://www-cse.ucsd.edu/~mihir/papers/oem.html>. | |
1545 ++ | |
1546 ++When sealing messages, a nonce must be given. The length of the nonce is fixed | |
1547 ++by the AEAD in use and is returned by EVP_AEAD_nonce_length(). I<The nonce mus
t | |
1548 ++be unique for all messages with the same key>. This is critically important - | |
1549 ++nonce reuse may completely undermine the security of the AEAD. Nonces may be | |
1550 ++predictable and public, so long as they are unique. Uniqueness may be achieved | |
1551 ++with a simple counter or, if long enough, may be generated randomly. The nonce | |
1552 ++must be passed into the "open" operation by the receiver so must either be | |
1553 ++implicit (e.g. a counter), or must be transmitted along with the sealed messag
e. | |
1554 ++ | |
1555 ++The "seal" and "open" operations are atomic - an entire message must be | |
1556 ++encrypted or decrypted in a single call. Large messages may have to be split u
p | |
1557 ++in order to accomodate this. When doing so, be mindful of the need not to | |
1558 ++repeat nonces and the possibility that an attacker could duplicate, reorder or | |
1559 ++drop message chunks. For example, using a single key for a given (large) | |
1560 ++message and sealing chunks with nonces counting from zero would be secure as | |
1561 ++long as the number of chunks was securely transmitted. (Otherwise an attacker | |
1562 ++could truncate the message by dropping chunks from the end.) | |
1563 ++ | |
1564 ++The number of chunks could be transmitted by prefixing it to the plaintext, fo
r | |
1565 ++example. This also assumes that no other message would ever use the same key | |
1566 ++otherwise the rule that nonces must be unique for a given key would be | |
1567 ++violated. | |
1568 ++ | |
1569 ++The "seal" and "open" operations also permit additional data to be | |
1570 ++authenticated via the B<ad> parameter. This data is not included in the | |
1571 ++ciphertext and must be identical for both the "seal" and "open" call. This | |
1572 ++permits implicit context to be authenticated but may be C<NULL> if not needed. | |
1573 ++ | |
1574 ++The "seal" and "open" operations may work inplace if the B<out> and B<in> | |
1575 ++arguments are equal. They may also be used to shift the data left inside the | |
1576 ++same buffer if B<out> is less than B<in>. However, B<out> may not point inside | |
1577 ++the input data otherwise the input may be overwritten before it has been read. | |
1578 ++This case will cause an error. | |
1579 ++ | |
1580 ++=head1 RETURN VALUES | |
1581 ++ | |
1582 ++The "seal" and "open" operations return an C<ssize_t> with value -1 on error, | |
1583 ++otherwise they return the number of output bytes written. An error will be | |
1584 ++returned if the input length is large enough that the output size exceeds the | |
1585 ++range of a C<ssize_t>. | |
1586 ++ | |
1587 ++=head1 HISTORY | |
1588 ++ | |
1589 ++These functions were first added to OpenSSL 1.0.2. | |
1590 ++ | |
1591 ++=cut | |
1592 +-- | |
1593 +1.8.4.1 | |
1594 + | |
1595 diff -burN android-openssl-lhash2/patches/chacha20poly1305.patch android-openssl
/patches/chacha20poly1305.patch | |
1596 --- android-openssl-lhash2/patches/chacha20poly1305.patch 1969-12-31 19:00
:00.000000000 -0500 | |
1597 +++ android-openssl/patches/chacha20poly1305.patch 2013-11-05 15:15:28.4544
80948 -0500 | |
1598 @@ -0,0 +1,5740 @@ | |
1599 +From 2688f00904e4ffd647afcff69bb8fe6df8c5902b Mon Sep 17 00:00:00 2001 | |
1600 +From: Adam Langley <agl@chromium.org> | |
1601 +Date: Mon, 9 Sep 2013 12:13:24 -0400 | |
1602 +Subject: [PATCH 43/52] chacha20poly1305 | |
1603 + | |
1604 +Add support for Chacha20 + Poly1305. | |
1605 +--- | |
1606 + .gitignore | 1 + | |
1607 + Configure | 56 +- | |
1608 + Makefile.org | 6 +- | |
1609 + apps/speed.c | 64 +- | |
1610 + crypto/chacha/Makefile | 80 ++ | |
1611 + crypto/chacha/chacha.h | 85 ++ | |
1612 + crypto/chacha/chacha_enc.c | 167 +++ | |
1613 + crypto/chacha/chacha_vec.c | 345 +++++++ | |
1614 + crypto/chacha/chachatest.c | 211 ++++ | |
1615 + crypto/evp/Makefile | 35 +- | |
1616 + crypto/evp/e_chacha20poly1305.c | 261 +++++ | |
1617 + crypto/evp/evp.h | 8 + | |
1618 + crypto/evp/evp_err.c | 3 + | |
1619 + crypto/poly1305/Makefile | 81 ++ | |
1620 + crypto/poly1305/poly1305.c | 320 ++++++ | |
1621 + crypto/poly1305/poly1305.h | 88 ++ | |
1622 + crypto/poly1305/poly1305_arm.c | 335 ++++++ | |
1623 + crypto/poly1305/poly1305_arm_asm.s | 2009 ++++++++++++++++++++++++++++++++++++ | |
1624 + crypto/poly1305/poly1305_vec.c | 733 +++++++++++++ | |
1625 + crypto/poly1305/poly1305test.c | 166 +++ | |
1626 + ssl/s3_lib.c | 75 +- | |
1627 + ssl/s3_pkt.c | 5 +- | |
1628 + ssl/ssl.h | 1 + | |
1629 + ssl/ssl_ciph.c | 16 +- | |
1630 + ssl/ssl_locl.h | 10 + | |
1631 + ssl/t1_enc.c | 30 +- | |
1632 + ssl/tls1.h | 8 + | |
1633 + test/Makefile | 23 +- | |
1634 + 28 files changed, 5166 insertions(+), 56 deletions(-) | |
1635 + create mode 100644 crypto/chacha/Makefile | |
1636 + create mode 100644 crypto/chacha/chacha.h | |
1637 + create mode 100644 crypto/chacha/chacha_enc.c | |
1638 + create mode 100644 crypto/chacha/chacha_vec.c | |
1639 + create mode 100644 crypto/chacha/chachatest.c | |
1640 + create mode 100644 crypto/evp/e_chacha20poly1305.c | |
1641 + create mode 100644 crypto/poly1305/Makefile | |
1642 + create mode 100644 crypto/poly1305/poly1305.c | |
1643 + create mode 100644 crypto/poly1305/poly1305.h | |
1644 + create mode 100644 crypto/poly1305/poly1305_arm.c | |
1645 + create mode 100644 crypto/poly1305/poly1305_arm_asm.s | |
1646 + create mode 100644 crypto/poly1305/poly1305_vec.c | |
1647 + create mode 100644 crypto/poly1305/poly1305test.c | |
1648 + | |
1649 +diff --git a/openssl/ssl/ssl_ciph.c b/openssl/ssl/ssl_ciph.c | |
1650 +index db85b29..cebb18a 100644 | |
1651 +--- a/ssl/ssl_ciph.c | |
1652 ++++ b/ssl/ssl_ciph.c | |
1653 +@@ -1442,7 +1442,9 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_list(const SSL_ME
THOD *ssl_method, | |
1654 + ssl_cipher_apply_rule(0, SSL_kEECDH, 0, 0, 0, 0, 0, CIPHER_ADD, -1, &hea
d, &tail); | |
1655 + ssl_cipher_apply_rule(0, SSL_kEECDH, 0, 0, 0, 0, 0, CIPHER_DEL, -1, &hea
d, &tail); | |
1656 + | |
1657 +- /* AES is our preferred symmetric cipher */ | |
1658 ++ /* CHACHA20 is fast and safe on all hardware and is thus our preferred | |
1659 ++ * symmetric cipher, with AES second. */ | |
1660 ++ ssl_cipher_apply_rule(0, 0, 0, SSL_CHACHA20POLY1305, 0, 0, 0, CIPHER_ADD
, -1, &head, &tail); | |
1661 + ssl_cipher_apply_rule(0, 0, 0, SSL_AES, 0, 0, 0, CIPHER_ADD, -1, &head,
&tail); | |
1662 + | |
1663 + /* Temporarily enable everything else for sorting */ | |
1664 +diff --git a/Configure b/Configure | |
1665 +index 9c803dc..1b95384 100755 | |
1666 +--- a/Configure | |
1667 ++++ b/Configure | |
1668 +@@ -124,24 +124,24 @@ my $tlib="-lnsl -lsocket"; | |
1669 + my $bits1="THIRTY_TWO_BIT "; | |
1670 + my $bits2="SIXTY_FOUR_BIT "; | |
1671 + | |
1672 +-my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o cryp
t586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-58
6.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cm
ll-x86.o:ghash-x86.o:"; | |
1673 ++my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o cryp
t586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-58
6.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cm
ll-x86.o:ghash-x86.o:::"; | |
1674 + | |
1675 + my $x86_elf_asm="$x86_asm:elf"; | |
1676 + | |
1677 +-my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64
-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86
_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86
_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:gha
sh-x86_64.o:"; | |
1678 +-my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64
.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::
::ghash-ia64.o::void"; | |
1679 +-my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9
a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sp
arcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; | |
1680 +-my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void"; | |
1681 +-my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash
-alpha.o::void"; | |
1682 +-my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::
::::::"; | |
1683 +-my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha
256-mips.o sha512-mips.o::::::::"; | |
1684 +-my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::a
es-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4
-s390x.o:::::ghash-s390x.o:"; | |
1685 +-my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_c
bc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-
armv4.o::void"; | |
1686 +-my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-
parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash
-parisc.o::32"; | |
1687 +-my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o a
es-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::gh
ash-parisc.o::64"; | |
1688 +-my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.
o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::"; | |
1689 +-my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.
o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::"; | |
1690 +-my $no_asm=":::::::::::::::void"; | |
1691 ++my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64
-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86
_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86
_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:gha
sh-x86_64.o::chacha_vec.o:poly1305_vec.o"; | |
1692 ++my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64
.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::
::ghash-ia64.o::::void"; | |
1693 ++my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9
a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sp
arcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::::void"; | |
1694 ++my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::::void"; | |
1695 ++my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash
-alpha.o::::void"; | |
1696 ++my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::
::::::::"; | |
1697 ++my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha
256-mips.o sha512-mips.o::::::::::"; | |
1698 ++my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::a
es-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4
-s390x.o:::::::ghash-s390x.o:"; | |
1699 ++my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_c
bc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-
armv4.o::chacha_vec.o:poly1305_arm.o poly1305_arm_asm.o:void"; | |
1700 ++my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-
parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash
-parisc.o::::32"; | |
1701 ++my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o a
es-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::gh
ash-parisc.o::::64"; | |
1702 ++my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.
o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::::"; | |
1703 ++my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.
o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::::"; | |
1704 ++my $no_asm=":::::::::::::::::void"; | |
1705 + | |
1706 + # As for $BSDthreads. Idea is to maintain "collective" set of flags, | |
1707 + # which would cover all BSD flavors. -pthread applies to them all, | |
1708 +@@ -152,7 +152,7 @@ my $no_asm=":::::::::::::::void"; | |
1709 + # seems to be sufficient? | |
1710 + my $BSDthreads="-pthread -D_THREAD_SAFE -D_REENTRANT"; | |
1711 + | |
1712 +-#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lfl
ags : $bn_ops : $cpuid_obj : $bn_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj
: $sha1_obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_ob
j : $modes_obj : $engines_obj : $dso_scheme : $shared_target : $shared_cflag : $
shared_ldflag : $shared_extension : $ranlib : $arflags : $multilib | |
1713 ++#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lfl
ags : $bn_ops : $cpuid_obj : $bn_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj
: $sha1_obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_ob
j : $modes_obj : $engines_obj : $chacha_obj : $poly1305_obj : $dso_scheme : $sha
red_target : $shared_cflag : $shared_ldflag : $shared_extension : $ranlib : $arf
lags : $multilib : | |
1714 + | |
1715 + my %table=( | |
1716 + # File 'TABLE' (created by 'make TABLE') contains the data from this list, | |
1717 +@@ -647,6 +647,8 @@ my $idx_wp_obj = $idx++; | |
1718 + my $idx_cmll_obj = $idx++; | |
1719 + my $idx_modes_obj = $idx++; | |
1720 + my $idx_engines_obj = $idx++; | |
1721 ++my $idx_chacha_obj = $idx++; | |
1722 ++my $idx_poly1305_obj = $idx++; | |
1723 + my $idx_perlasm_scheme = $idx++; | |
1724 + my $idx_dso_scheme = $idx++; | |
1725 + my $idx_shared_target = $idx++; | |
1726 +@@ -692,6 +694,8 @@ my $aes_enc="aes_core.o aes_cbc.o"; | |
1727 + my $bf_enc ="bf_enc.o"; | |
1728 + my $cast_enc="c_enc.o"; | |
1729 + my $rc4_enc="rc4_enc.o rc4_skey.o"; | |
1730 ++my $chacha_enc="chacha_enc.o"; | |
1731 ++my $poly1305 ="poly1305.o"; | |
1732 + my $rc5_enc="rc5_enc.o"; | |
1733 + my $md5_obj=""; | |
1734 + my $sha1_obj=""; | |
1735 +@@ -1144,7 +1148,7 @@ $openssldir=$prefix . "/" . $openssldir if $openssldir !~
/(^\/|^[a-zA-Z]:[\\\/] | |
1736 + | |
1737 + print "IsMK1MF=$IsMK1MF\n"; | |
1738 + | |
1739 +-my @fields = split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); | |
1740 ++my @fields = split(/\s*:\s*/,$table{$target} . ":" x 31 , -1); | |
1741 + my $cc = $fields[$idx_cc]; | |
1742 + # Allow environment CC to override compiler... | |
1743 + if($ENV{CC}) { | |
1744 +@@ -1181,6 +1185,8 @@ my $ranlib = $ENV{'RANLIB'} || $fields[$idx_ranlib]; | |
1745 + my $ar = $ENV{'AR'} || "ar"; | |
1746 + my $arflags = $fields[$idx_arflags]; | |
1747 + my $multilib = $fields[$idx_multilib]; | |
1748 ++my $chacha_obj = $fields[$idx_chacha_obj]; | |
1749 ++my $poly1305_obj = $fields[$idx_poly1305_obj]; | |
1750 + | |
1751 + # if $prefix/lib$multilib is not an existing directory, then | |
1752 + # assume that it's not searched by linker automatically, in | |
1753 +@@ -1477,6 +1483,8 @@ $des_obj=$des_enc unless ($des_obj =~ /\.o$/); | |
1754 + $bf_obj=$bf_enc unless ($bf_obj =~ /\.o$/); | |
1755 + $cast_obj=$cast_enc unless ($cast_obj =~ /\.o$/); | |
1756 + $rc4_obj=$rc4_enc unless ($rc4_obj =~ /\.o$/); | |
1757 ++$chacha_obj=$chacha_enc unless ($chacha_obj =~ /\.o$/); | |
1758 ++$poly1305_obj=$poly1305 unless ($poly1305_obj =~ /\.o$/); | |
1759 + $rc5_obj=$rc5_enc unless ($rc5_obj =~ /\.o$/); | |
1760 + if ($sha1_obj =~ /\.o$/) | |
1761 + { | |
1762 +@@ -1637,6 +1645,8 @@ while (<IN>) | |
1763 + s/^BF_ENC=.*$/BF_ENC= $bf_obj/; | |
1764 + s/^CAST_ENC=.*$/CAST_ENC= $cast_obj/; | |
1765 + s/^RC4_ENC=.*$/RC4_ENC= $rc4_obj/; | |
1766 ++ s/^CHACHA_ENC=.*$/CHACHA_ENC= $chacha_obj/; | |
1767 ++ s/^POLY1305=.*$/POLY1305= $poly1305_obj/; | |
1768 + s/^RC5_ENC=.*$/RC5_ENC= $rc5_obj/; | |
1769 + s/^MD5_ASM_OBJ=.*$/MD5_ASM_OBJ= $md5_obj/; | |
1770 + s/^SHA1_ASM_OBJ=.*$/SHA1_ASM_OBJ= $sha1_obj/; | |
1771 +@@ -1698,6 +1708,8 @@ print "AES_ENC =$aes_obj\n"; | |
1772 + print "BF_ENC =$bf_obj\n"; | |
1773 + print "CAST_ENC =$cast_obj\n"; | |
1774 + print "RC4_ENC =$rc4_obj\n"; | |
1775 ++print "CHACHA_ENC =$chacha_obj\n"; | |
1776 ++print "POLY1305 =$poly1305_obj\n"; | |
1777 + print "RC5_ENC =$rc5_obj\n"; | |
1778 + print "MD5_OBJ_ASM =$md5_obj\n"; | |
1779 + print "SHA1_OBJ_ASM =$sha1_obj\n"; | |
1780 +@@ -2096,11 +2108,11 @@ sub print_table_entry | |
1781 + | |
1782 + (my $cc,my $cflags,my $unistd,my $thread_cflag,my $sys_id,my $lflags, | |
1783 + my $bn_ops,my $cpuid_obj,my $bn_obj,my $des_obj,my $aes_obj, my $bf_obj, | |
1784 +- my $md5_obj,my $sha1_obj,my $cast_obj,my $rc4_obj,my $rmd160_obj, | |
1785 +- my $rc5_obj,my $wp_obj,my $cmll_obj,my $modes_obj, my $engines_obj, | |
1786 ++ my $md5_obj,my $sha1_obj,my $cast_obj,my $rc4_obj,my $chacha_obj,my $pol
y1305_obj, | |
1787 ++ my $rmd160_obj, my $rc5_obj,my $wp_obj,my $cmll_obj,my $modes_obj, my $e
ngines_obj, | |
1788 + my $perlasm_scheme,my $dso_scheme,my $shared_target,my $shared_cflag, | |
1789 + my $shared_ldflag,my $shared_extension,my $ranlib,my $arflags,my $multil
ib)= | |
1790 +- split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); | |
1791 ++ split(/\s*:\s*/,$table{$target} . ":" x 31 , -1); | |
1792 + | |
1793 + print <<EOF | |
1794 + | |
1795 +@@ -2121,6 +2133,8 @@ sub print_table_entry | |
1796 + \$sha1_obj = $sha1_obj | |
1797 + \$cast_obj = $cast_obj | |
1798 + \$rc4_obj = $rc4_obj | |
1799 ++\$chacha_obj = $chacha_obj | |
1800 ++\$poly1305_obj = $poly1305_obj | |
1801 + \$rmd160_obj = $rmd160_obj | |
1802 + \$rc5_obj = $rc5_obj | |
1803 + \$wp_obj = $wp_obj | |
1804 +@@ -2150,7 +2164,7 @@ sub test_sanity | |
1805 + | |
1806 + foreach $target (sort keys %table) | |
1807 + { | |
1808 +- @fields = split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); | |
1809 ++ @fields = split(/\s*:\s*/,$table{$target} . ":" x 31 , -1); | |
1810 + | |
1811 + if ($fields[$idx_dso_scheme-1] =~ /^(beos|dl|dlfcn|win32|vms)$/) | |
1812 + { | |
1813 +diff --git a/Makefile.org b/Makefile.org | |
1814 +index 2db31ea..919466d 100644 | |
1815 +--- a/Makefile.org | |
1816 ++++ b/Makefile.org | |
1817 +@@ -94,6 +94,8 @@ BF_ENC= bf_enc.o | |
1818 + CAST_ENC= c_enc.o | |
1819 + RC4_ENC= rc4_enc.o | |
1820 + RC5_ENC= rc5_enc.o | |
1821 ++CHACHA_ENC= chacha_enc.o | |
1822 ++POLY1305= poly1305.o | |
1823 + MD5_ASM_OBJ= | |
1824 + SHA1_ASM_OBJ= | |
1825 + RMD160_ASM_OBJ= | |
1826 +@@ -147,7 +149,7 @@ SDIRS= \ | |
1827 + bn ec rsa dsa ecdsa dh ecdh dso engine \ | |
1828 + buffer bio stack lhash rand err \ | |
1829 + evp asn1 pem x509 x509v3 conf txt_db pkcs7 pkcs12 comp ocsp ui krb5 \ | |
1830 +- cms pqueue ts jpake srp store cmac | |
1831 ++ cms pqueue ts jpake srp store cmac poly1305 chacha | |
1832 + # keep in mind that the above list is adjusted by ./Configure | |
1833 + # according to no-xxx arguments... | |
1834 + | |
1835 +@@ -232,6 +234,8 @@ BUILDENV= PLATFORM='$(PLATFORM)' PROCESSOR='$(PROCESSOR)'
\ | |
1836 + WP_ASM_OBJ='$(WP_ASM_OBJ)' \ | |
1837 + MODES_ASM_OBJ='$(MODES_ASM_OBJ)' \ | |
1838 + ENGINES_ASM_OBJ='$(ENGINES_ASM_OBJ)' \ | |
1839 ++ CHACHA_ENC='$(CHACHA_ENC)' \ | |
1840 ++ POLY1305='$(POLY1305)' \ | |
1841 + PERLASM_SCHEME='$(PERLASM_SCHEME)' \ | |
1842 + FIPSLIBDIR='${FIPSLIBDIR}' \ | |
1843 + FIPSDIR='${FIPSDIR}' \ | |
1844 +diff --git a/crypto/chacha/Makefile b/crypto/chacha/Makefile | |
1845 +new file mode 100644 | |
1846 +index 0000000..289933b | |
1847 +--- /dev/null | |
1848 ++++ b/crypto/chacha/Makefile | |
1849 +@@ -0,0 +1,80 @@ | |
1850 ++# | |
1851 ++# OpenSSL/crypto/chacha/Makefile | |
1852 ++# | |
1853 ++ | |
1854 ++DIR= chacha | |
1855 ++TOP= ../.. | |
1856 ++CC= cc | |
1857 ++CPP= $(CC) -E | |
1858 ++INCLUDES= | |
1859 ++CFLAG=-g | |
1860 ++AR= ar r | |
1861 ++ | |
1862 ++CFLAGS= $(INCLUDES) $(CFLAG) | |
1863 ++ASFLAGS= $(INCLUDES) $(ASFLAG) | |
1864 ++AFLAGS= $(ASFLAGS) | |
1865 ++ | |
1866 ++CHACHA_ENC=chacha_enc.o | |
1867 ++ | |
1868 ++GENERAL=Makefile | |
1869 ++TEST=chachatest.o | |
1870 ++APPS= | |
1871 ++ | |
1872 ++LIB=$(TOP)/libcrypto.a | |
1873 ++LIBSRC= | |
1874 ++LIBOBJ=$(CHACHA_ENC) | |
1875 ++ | |
1876 ++SRC= $(LIBSRC) | |
1877 ++ | |
1878 ++EXHEADER=chacha.h | |
1879 ++HEADER= $(EXHEADER) | |
1880 ++ | |
1881 ++ALL= $(GENERAL) $(SRC) $(HEADER) | |
1882 ++ | |
1883 ++top: | |
1884 ++ (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all) | |
1885 ++ | |
1886 ++all: lib | |
1887 ++ | |
1888 ++lib: $(LIBOBJ) | |
1889 ++ $(AR) $(LIB) $(LIBOBJ) | |
1890 ++ $(RANLIB) $(LIB) || echo Never mind. | |
1891 ++ @touch lib | |
1892 ++ | |
1893 ++files: | |
1894 ++ $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO | |
1895 ++ | |
1896 ++links: | |
1897 ++ @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER) | |
1898 ++ @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST) | |
1899 ++ @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS) | |
1900 ++ | |
1901 ++install: | |
1902 ++ @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile... | |
1903 ++ @headerlist="$(EXHEADER)"; for i in $$headerlist ; \ | |
1904 ++ do \ | |
1905 ++ (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \ | |
1906 ++ chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \ | |
1907 ++ done; | |
1908 ++ | |
1909 ++tags: | |
1910 ++ ctags $(SRC) | |
1911 ++ | |
1912 ++tests: | |
1913 ++ | |
1914 ++lint: | |
1915 ++ lint -DLINT $(INCLUDES) $(SRC)>fluff | |
1916 ++ | |
1917 ++depend: | |
1918 ++ @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... | |
1919 ++ $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) | |
1920 ++ | |
1921 ++dclean: | |
1922 ++ $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKE
FILE) >Makefile.new | |
1923 ++ mv -f Makefile.new $(MAKEFILE) | |
1924 ++ | |
1925 ++clean: | |
1926 ++ rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff | |
1927 ++ | |
1928 ++# DO NOT DELETE THIS LINE -- make depend depends on it. | |
1929 ++ | |
1930 +diff --git a/crypto/chacha/chacha.h b/crypto/chacha/chacha.h | |
1931 +new file mode 100644 | |
1932 +index 0000000..d56519d | |
1933 +--- /dev/null | |
1934 ++++ b/crypto/chacha/chacha.h | |
1935 +@@ -0,0 +1,85 @@ | |
1936 ++/* | |
1937 ++ * Chacha stream algorithm. | |
1938 ++ * | |
1939 ++ * Created on: Jun, 2013 | |
1940 ++ * Author: Elie Bursztein (elieb@google.com) | |
1941 ++ * | |
1942 ++ * Adapted from the estream code by D. Bernstein. | |
1943 ++ */ | |
1944 ++/* ==================================================================== | |
1945 ++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
1946 ++ * | |
1947 ++ * Redistribution and use in source and binary forms, with or without | |
1948 ++ * modification, are permitted provided that the following conditions | |
1949 ++ * are met: | |
1950 ++ * | |
1951 ++ * 1. Redistributions of source code must retain the above copyright | |
1952 ++ * notice, this list of conditions and the following disclaimer. | |
1953 ++ * | |
1954 ++ * 2. Redistributions in binary form must reproduce the above copyright | |
1955 ++ * notice, this list of conditions and the following disclaimer in | |
1956 ++ * the documentation and/or other materials provided with the | |
1957 ++ * distribution. | |
1958 ++ * | |
1959 ++ * 3. All advertising materials mentioning features or use of this | |
1960 ++ * software must display the following acknowledgment: | |
1961 ++ * "This product includes software developed by the OpenSSL Project | |
1962 ++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
1963 ++ * | |
1964 ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
1965 ++ * endorse or promote products derived from this software without | |
1966 ++ * prior written permission. For written permission, please contact | |
1967 ++ * licensing@OpenSSL.org. | |
1968 ++ * | |
1969 ++ * 5. Products derived from this software may not be called "OpenSSL" | |
1970 ++ * nor may "OpenSSL" appear in their names without prior written | |
1971 ++ * permission of the OpenSSL Project. | |
1972 ++ * | |
1973 ++ * 6. Redistributions of any form whatsoever must retain the following | |
1974 ++ * acknowledgment: | |
1975 ++ * "This product includes software developed by the OpenSSL Project | |
1976 ++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
1977 ++ * | |
1978 ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
1979 ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
1980 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
1981 ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
1982 ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
1983 ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
1984 ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
1985 ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
1986 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
1987 ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
1988 ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
1989 ++ * OF THE POSSIBILITY OF SUCH DAMAGE. | |
1990 ++ * ==================================================================== | |
1991 ++ */ | |
1992 ++#ifndef HEADER_CHACHA_H | |
1993 ++#define HEADER_CHACHA_H | |
1994 ++ | |
1995 ++#include <openssl/opensslconf.h> | |
1996 ++ | |
1997 ++#if defined(OPENSSL_NO_CHACHA) | |
1998 ++#error ChaCha support is disabled. | |
1999 ++#endif | |
2000 ++ | |
2001 ++#include <stddef.h> | |
2002 ++ | |
2003 ++#ifdef __cplusplus | |
2004 ++extern "C" { | |
2005 ++#endif | |
2006 ++ | |
2007 ++/* CRYPTO_chacha_20 encrypts |in_len| bytes from |in| with the given key and | |
2008 ++ * nonce and writes the result to |out|, which may be equal to |in|. The | |
2009 ++ * initial block counter is specified by |counter|. */ | |
2010 ++void CRYPTO_chacha_20(unsigned char *out, | |
2011 ++ const unsigned char *in, size_t in_len, | |
2012 ++ const unsigned char key[32], | |
2013 ++ const unsigned char nonce[8], | |
2014 ++ size_t counter); | |
2015 ++ | |
2016 ++#ifdef __cplusplus | |
2017 ++} | |
2018 ++#endif | |
2019 ++ | |
2020 ++#endif | |
2021 +diff --git a/crypto/chacha/chacha_enc.c b/crypto/chacha/chacha_enc.c | |
2022 +new file mode 100644 | |
2023 +index 0000000..54d1ca3 | |
2024 +--- /dev/null | |
2025 ++++ b/crypto/chacha/chacha_enc.c | |
2026 +@@ -0,0 +1,167 @@ | |
2027 ++/* | |
2028 ++ * Chacha stream algorithm. | |
2029 ++ * | |
2030 ++ * Created on: Jun, 2013 | |
2031 ++ * Author: Elie Bursztein (elieb@google.com) | |
2032 ++ * | |
2033 ++ * Adapted from the estream code by D. Bernstein. | |
2034 ++ */ | |
2035 ++/* ==================================================================== | |
2036 ++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
2037 ++ * | |
2038 ++ * Redistribution and use in source and binary forms, with or without | |
2039 ++ * modification, are permitted provided that the following conditions | |
2040 ++ * are met: | |
2041 ++ * | |
2042 ++ * 1. Redistributions of source code must retain the above copyright | |
2043 ++ * notice, this list of conditions and the following disclaimer. | |
2044 ++ * | |
2045 ++ * 2. Redistributions in binary form must reproduce the above copyright | |
2046 ++ * notice, this list of conditions and the following disclaimer in | |
2047 ++ * the documentation and/or other materials provided with the | |
2048 ++ * distribution. | |
2049 ++ * | |
2050 ++ * 3. All advertising materials mentioning features or use of this | |
2051 ++ * software must display the following acknowledgment: | |
2052 ++ * "This product includes software developed by the OpenSSL Project | |
2053 ++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
2054 ++ * | |
2055 ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
2056 ++ * endorse or promote products derived from this software without | |
2057 ++ * prior written permission. For written permission, please contact | |
2058 ++ * licensing@OpenSSL.org. | |
2059 ++ * | |
2060 ++ * 5. Products derived from this software may not be called "OpenSSL" | |
2061 ++ * nor may "OpenSSL" appear in their names without prior written | |
2062 ++ * permission of the OpenSSL Project. | |
2063 ++ * | |
2064 ++ * 6. Redistributions of any form whatsoever must retain the following | |
2065 ++ * acknowledgment: | |
2066 ++ * "This product includes software developed by the OpenSSL Project | |
2067 ++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
2068 ++ * | |
2069 ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
2070 ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
2071 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
2072 ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
2073 ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
2074 ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
2075 ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
2076 ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
2077 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
2078 ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
2079 ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
2080 ++ * OF THE POSSIBILITY OF SUCH DAMAGE. | |
2081 ++ * ==================================================================== | |
2082 ++ */ | |
2083 ++ | |
2084 ++#include <stdint.h> | |
2085 ++#include <string.h> | |
2086 ++#include <openssl/opensslconf.h> | |
2087 ++ | |
2088 ++#if !defined(OPENSSL_NO_CHACHA) | |
2089 ++ | |
2090 ++#include <openssl/chacha.h> | |
2091 ++ | |
2092 ++/* sigma contains the ChaCha constants, which happen to be an ASCII string. */ | |
2093 ++static const char sigma[16] = "expand 32-byte k"; | |
2094 ++ | |
2095 ++#define ROTATE(v, n) (((v) << (n)) | ((v) >> (32 - (n)))) | |
2096 ++#define XOR(v, w) ((v) ^ (w)) | |
2097 ++#define PLUS(x, y) ((x) + (y)) | |
2098 ++#define PLUSONE(v) (PLUS((v), 1)) | |
2099 ++ | |
2100 ++#define U32TO8_LITTLE(p, v) \ | |
2101 ++ { (p)[0] = (v >> 0) & 0xff; (p)[1] = (v >> 8) & 0xff; \ | |
2102 ++ (p)[2] = (v >> 16) & 0xff; (p)[3] = (v >> 24) & 0xff; } | |
2103 ++#define U8TO32_LITTLE(p) \ | |
2104 ++ (((uint32_t)((p)[0]) ) | ((uint32_t)((p)[1]) << 8) | \ | |
2105 ++ ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24) ) | |
2106 ++ | |
2107 ++/* QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round. */ | |
2108 ++#define QUARTERROUND(a,b,c,d) \ | |
2109 ++ x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \ | |
2110 ++ x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \ | |
2111 ++ x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \ | |
2112 ++ x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7); | |
2113 ++ | |
2114 ++typedef unsigned int uint32_t; | |
2115 ++ | |
2116 ++/* chacha_core performs |num_rounds| rounds of ChaCha20 on the input words in | |
2117 ++ * |input| and writes the 64 output bytes to |output|. */ | |
2118 ++static void chacha_core(unsigned char output[64], const uint32_t input[16], | |
2119 ++ int num_rounds) | |
2120 ++ { | |
2121 ++ uint32_t x[16]; | |
2122 ++ int i; | |
2123 ++ | |
2124 ++ memcpy(x, input, sizeof(uint32_t) * 16); | |
2125 ++ for (i = 20; i > 0; i -= 2) | |
2126 ++ { | |
2127 ++ QUARTERROUND( 0, 4, 8,12) | |
2128 ++ QUARTERROUND( 1, 5, 9,13) | |
2129 ++ QUARTERROUND( 2, 6,10,14) | |
2130 ++ QUARTERROUND( 3, 7,11,15) | |
2131 ++ QUARTERROUND( 0, 5,10,15) | |
2132 ++ QUARTERROUND( 1, 6,11,12) | |
2133 ++ QUARTERROUND( 2, 7, 8,13) | |
2134 ++ QUARTERROUND( 3, 4, 9,14) | |
2135 ++ } | |
2136 ++ | |
2137 ++ for (i = 0; i < 16; ++i) | |
2138 ++ x[i] = PLUS(x[i], input[i]); | |
2139 ++ for (i = 0; i < 16; ++i) | |
2140 ++ U32TO8_LITTLE(output + 4 * i, x[i]); | |
2141 ++ } | |
2142 ++ | |
2143 ++void CRYPTO_chacha_20(unsigned char *out, | |
2144 ++ const unsigned char *in, size_t in_len, | |
2145 ++ const unsigned char key[32], | |
2146 ++ const unsigned char nonce[8], | |
2147 ++ size_t counter) | |
2148 ++ { | |
2149 ++ uint32_t input[16]; | |
2150 ++ unsigned char buf[64]; | |
2151 ++ size_t todo, i; | |
2152 ++ | |
2153 ++ input[0] = U8TO32_LITTLE(sigma + 0); | |
2154 ++ input[1] = U8TO32_LITTLE(sigma + 4); | |
2155 ++ input[2] = U8TO32_LITTLE(sigma + 8); | |
2156 ++ input[3] = U8TO32_LITTLE(sigma + 12); | |
2157 ++ | |
2158 ++ input[4] = U8TO32_LITTLE(key + 0); | |
2159 ++ input[5] = U8TO32_LITTLE(key + 4); | |
2160 ++ input[6] = U8TO32_LITTLE(key + 8); | |
2161 ++ input[7] = U8TO32_LITTLE(key + 12); | |
2162 ++ | |
2163 ++ input[8] = U8TO32_LITTLE(key + 16); | |
2164 ++ input[9] = U8TO32_LITTLE(key + 20); | |
2165 ++ input[10] = U8TO32_LITTLE(key + 24); | |
2166 ++ input[11] = U8TO32_LITTLE(key + 28); | |
2167 ++ | |
2168 ++ input[12] = counter; | |
2169 ++ input[13] = ((uint64_t) counter) >> 32; | |
2170 ++ input[14] = U8TO32_LITTLE(nonce + 0); | |
2171 ++ input[15] = U8TO32_LITTLE(nonce + 4); | |
2172 ++ | |
2173 ++ while (in_len > 0) | |
2174 ++ { | |
2175 ++ todo = sizeof(buf); | |
2176 ++ if (in_len < todo) | |
2177 ++ todo = in_len; | |
2178 ++ | |
2179 ++ chacha_core(buf, input, 20); | |
2180 ++ for (i = 0; i < todo; i++) | |
2181 ++ out[i] = in[i] ^ buf[i]; | |
2182 ++ | |
2183 ++ out += todo; | |
2184 ++ in += todo; | |
2185 ++ in_len -= todo; | |
2186 ++ | |
2187 ++ input[12]++; | |
2188 ++ if (input[12] == 0) | |
2189 ++ input[13]++; | |
2190 ++ } | |
2191 ++ } | |
2192 ++ | |
2193 ++#endif /* !OPENSSL_NO_CHACHA */ | |
2194 +diff --git a/crypto/chacha/chacha_vec.c b/crypto/chacha/chacha_vec.c | |
2195 +new file mode 100644 | |
2196 +index 0000000..33b2238 | |
2197 +--- /dev/null | |
2198 ++++ b/crypto/chacha/chacha_vec.c | |
2199 +@@ -0,0 +1,345 @@ | |
2200 ++/* ==================================================================== | |
2201 ++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
2202 ++ * | |
2203 ++ * Redistribution and use in source and binary forms, with or without | |
2204 ++ * modification, are permitted provided that the following conditions | |
2205 ++ * are met: | |
2206 ++ * | |
2207 ++ * 1. Redistributions of source code must retain the above copyright | |
2208 ++ * notice, this list of conditions and the following disclaimer. | |
2209 ++ * | |
2210 ++ * 2. Redistributions in binary form must reproduce the above copyright | |
2211 ++ * notice, this list of conditions and the following disclaimer in | |
2212 ++ * the documentation and/or other materials provided with the | |
2213 ++ * distribution. | |
2214 ++ * | |
2215 ++ * 3. All advertising materials mentioning features or use of this | |
2216 ++ * software must display the following acknowledgment: | |
2217 ++ * "This product includes software developed by the OpenSSL Project | |
2218 ++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
2219 ++ * | |
2220 ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
2221 ++ * endorse or promote products derived from this software without | |
2222 ++ * prior written permission. For written permission, please contact | |
2223 ++ * licensing@OpenSSL.org. | |
2224 ++ * | |
2225 ++ * 5. Products derived from this software may not be called "OpenSSL" | |
2226 ++ * nor may "OpenSSL" appear in their names without prior written | |
2227 ++ * permission of the OpenSSL Project. | |
2228 ++ * | |
2229 ++ * 6. Redistributions of any form whatsoever must retain the following | |
2230 ++ * acknowledgment: | |
2231 ++ * "This product includes software developed by the OpenSSL Project | |
2232 ++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
2233 ++ * | |
2234 ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
2235 ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
2236 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
2237 ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
2238 ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
2239 ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
2240 ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
2241 ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
2242 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
2243 ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
2244 ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
2245 ++ * OF THE POSSIBILITY OF SUCH DAMAGE. | |
2246 ++ * ==================================================================== | |
2247 ++ */ | |
2248 ++ | |
2249 ++/* This implementation is by Ted Krovetz and was submitted to SUPERCOP and | |
2250 ++ * marked as public domain. It was been altered to allow for non-aligned input
s | |
2251 ++ * and to allow the block counter to be passed in specifically. */ | |
2252 ++ | |
2253 ++#include <string.h> | |
2254 ++#include <stdint.h> | |
2255 ++#include <openssl/opensslconf.h> | |
2256 ++ | |
2257 ++#if !defined(OPENSSL_NO_CHACHA) | |
2258 ++ | |
2259 ++#include <openssl/chacha.h> | |
2260 ++ | |
2261 ++#ifndef CHACHA_RNDS | |
2262 ++#define CHACHA_RNDS 20 /* 8 (high speed), 20 (conservative), 12 (middle) */ | |
2263 ++#endif | |
2264 ++ | |
2265 ++/* Architecture-neutral way to specify 16-byte vector of ints */ | |
2266 ++typedef unsigned vec __attribute__ ((vector_size (16))); | |
2267 ++ | |
2268 ++/* This implementation is designed for Neon, SSE and AltiVec machines. The | |
2269 ++ * following specify how to do certain vector operations efficiently on | |
2270 ++ * each architecture, using intrinsics. | |
2271 ++ * This implementation supports parallel processing of multiple blocks, | |
2272 ++ * including potentially using general-purpose registers. | |
2273 ++ */ | |
2274 ++#if __ARM_NEON__ | |
2275 ++#include <arm_neon.h> | |
2276 ++#define GPR_TOO 1 | |
2277 ++#define VBPI 2 | |
2278 ++#define ONE (vec)vsetq_lane_u32(1,vdupq_n_u32(0),0) | |
2279 ++#define LOAD(m) (vec)(*((vec*)(m))) | |
2280 ++#define STORE(m,r) (*((vec*)(m))) = (r) | |
2281 ++#define ROTV1(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,1) | |
2282 ++#define ROTV2(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,2) | |
2283 ++#define ROTV3(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,3) | |
2284 ++#define ROTW16(x) (vec)vrev32q_u16((uint16x8_t)x) | |
2285 ++#if __clang__ | |
2286 ++#define ROTW7(x) (x << ((vec){ 7, 7, 7, 7})) ^ (x >> ((vec){25,25,25,25})) | |
2287 ++#define ROTW8(x) (x << ((vec){ 8, 8, 8, 8})) ^ (x >> ((vec){24,24,24,24})) | |
2288 ++#define ROTW12(x) (x << ((vec){12,12,12,12})) ^ (x >> ((vec){20,20,20,20})) | |
2289 ++#else | |
2290 ++#define ROTW7(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,7),(uint32x4_t)x,
25) | |
2291 ++#define ROTW8(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,8),(uint32x4_t)x,
24) | |
2292 ++#define ROTW12(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,12),(uint32x4_t)x
,20) | |
2293 ++#endif | |
2294 ++#elif __SSE2__ | |
2295 ++#include <emmintrin.h> | |
2296 ++#define GPR_TOO 0 | |
2297 ++#if __clang__ | |
2298 ++#define VBPI 4 | |
2299 ++#else | |
2300 ++#define VBPI 3 | |
2301 ++#endif | |
2302 ++#define ONE (vec)_mm_set_epi32(0,0,0,1) | |
2303 ++#define LOAD(m) (vec)_mm_loadu_si128((__m128i*)(m)) | |
2304 ++#define STORE(m,r) _mm_storeu_si128((__m128i*)(m), (__m128i) (r)) | |
2305 ++#define ROTV1(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(0,3,2,1)) | |
2306 ++#define ROTV2(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(1,0,3,2)) | |
2307 ++#define ROTV3(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(2,1,0,3)) | |
2308 ++#define ROTW7(x) (vec)(_mm_slli_epi32((__m128i)x, 7) ^ _mm_srli_epi32((__m128
i)x,25)) | |
2309 ++#define ROTW12(x) (vec)(_mm_slli_epi32((__m128i)x,12) ^ _mm_srli_epi32((__m128
i)x,20)) | |
2310 ++#if __SSSE3__ | |
2311 ++#include <tmmintrin.h> | |
2312 ++#define ROTW8(x) (vec)_mm_shuffle_epi8((__m128i)x,_mm_set_epi8(14,13,12,15,10
,9,8,11,6,5,4,7,2,1,0,3)) | |
2313 ++#define ROTW16(x) (vec)_mm_shuffle_epi8((__m128i)x,_mm_set_epi8(13,12,15,14,9,
8,11,10,5,4,7,6,1,0,3,2)) | |
2314 ++#else | |
2315 ++#define ROTW8(x) (vec)(_mm_slli_epi32((__m128i)x, 8) ^ _mm_srli_epi32((__m128
i)x,24)) | |
2316 ++#define ROTW16(x) (vec)(_mm_slli_epi32((__m128i)x,16) ^ _mm_srli_epi32((__m128
i)x,16)) | |
2317 ++#endif | |
2318 ++#else | |
2319 ++#error -- Implementation supports only machines with neon or SSE2 | |
2320 ++#endif | |
2321 ++ | |
2322 ++#ifndef REVV_BE | |
2323 ++#define REVV_BE(x) (x) | |
2324 ++#endif | |
2325 ++ | |
2326 ++#ifndef REVW_BE | |
2327 ++#define REVW_BE(x) (x) | |
2328 ++#endif | |
2329 ++ | |
2330 ++#define BPI (VBPI + GPR_TOO) /* Blocks computed per loop iteration */ | |
2331 ++ | |
2332 ++#define DQROUND_VECTORS(a,b,c,d) \ | |
2333 ++ a += b; d ^= a; d = ROTW16(d); \ | |
2334 ++ c += d; b ^= c; b = ROTW12(b); \ | |
2335 ++ a += b; d ^= a; d = ROTW8(d); \ | |
2336 ++ c += d; b ^= c; b = ROTW7(b); \ | |
2337 ++ b = ROTV1(b); c = ROTV2(c); d = ROTV3(d); \ | |
2338 ++ a += b; d ^= a; d = ROTW16(d); \ | |
2339 ++ c += d; b ^= c; b = ROTW12(b); \ | |
2340 ++ a += b; d ^= a; d = ROTW8(d); \ | |
2341 ++ c += d; b ^= c; b = ROTW7(b); \ | |
2342 ++ b = ROTV3(b); c = ROTV2(c); d = ROTV1(d); | |
2343 ++ | |
2344 ++#define QROUND_WORDS(a,b,c,d) \ | |
2345 ++ a = a+b; d ^= a; d = d<<16 | d>>16; \ | |
2346 ++ c = c+d; b ^= c; b = b<<12 | b>>20; \ | |
2347 ++ a = a+b; d ^= a; d = d<< 8 | d>>24; \ | |
2348 ++ c = c+d; b ^= c; b = b<< 7 | b>>25; | |
2349 ++ | |
2350 ++#define WRITE_XOR(in, op, d, v0, v1, v2, v3) \ | |
2351 ++ STORE(op + d + 0, LOAD(in + d + 0) ^ REVV_BE(v0)); \ | |
2352 ++ STORE(op + d + 4, LOAD(in + d + 4) ^ REVV_BE(v1)); \ | |
2353 ++ STORE(op + d + 8, LOAD(in + d + 8) ^ REVV_BE(v2)); \ | |
2354 ++ STORE(op + d +12, LOAD(in + d +12) ^ REVV_BE(v3)); | |
2355 ++ | |
2356 ++void CRYPTO_chacha_20( | |
2357 ++ unsigned char *out, | |
2358 ++ const unsigned char *in, | |
2359 ++ size_t inlen, | |
2360 ++ const unsigned char key[32], | |
2361 ++ const unsigned char nonce[8], | |
2362 ++ size_t counter) | |
2363 ++ { | |
2364 ++ unsigned iters, i, *op=(unsigned *)out, *ip=(unsigned *)in, *kp; | |
2365 ++#if defined(__ARM_NEON__) | |
2366 ++ unsigned *np; | |
2367 ++#endif | |
2368 ++ vec s0, s1, s2, s3; | |
2369 ++#if !defined(__ARM_NEON__) && !defined(__SSE2__) | |
2370 ++ __attribute__ ((aligned (16))) unsigned key[8], nonce[4]; | |
2371 ++#endif | |
2372 ++ __attribute__ ((aligned (16))) unsigned chacha_const[] = | |
2373 ++ {0x61707865,0x3320646E,0x79622D32,0x6B206574}; | |
2374 ++#if defined(__ARM_NEON__) || defined(__SSE2__) | |
2375 ++ kp = (unsigned *)key; | |
2376 ++#else | |
2377 ++ ((vec *)key)[0] = REVV_BE(((vec *)key)[0]); | |
2378 ++ ((vec *)key)[1] = REVV_BE(((vec *)key)[1]); | |
2379 ++ nonce[0] = REVW_BE(((unsigned *)nonce)[0]); | |
2380 ++ nonce[1] = REVW_BE(((unsigned *)nonce)[1]); | |
2381 ++ nonce[2] = REVW_BE(((unsigned *)nonce)[2]); | |
2382 ++ nonce[3] = REVW_BE(((unsigned *)nonce)[3]); | |
2383 ++ kp = (unsigned *)key; | |
2384 ++ np = (unsigned *)nonce; | |
2385 ++#endif | |
2386 ++#if defined(__ARM_NEON__) | |
2387 ++ np = (unsigned*) nonce; | |
2388 ++#endif | |
2389 ++ s0 = LOAD(chacha_const); | |
2390 ++ s1 = LOAD(&((vec*)kp)[0]); | |
2391 ++ s2 = LOAD(&((vec*)kp)[1]); | |
2392 ++ s3 = (vec){ | |
2393 ++ counter & 0xffffffff, | |
2394 ++#if __ARM_NEON__ | |
2395 ++ 0, /* can't right-shift 32 bits on a 32-bit system. */ | |
2396 ++#else | |
2397 ++ counter >> 32, | |
2398 ++#endif | |
2399 ++ ((uint32_t*)nonce)[0], | |
2400 ++ ((uint32_t*)nonce)[1] | |
2401 ++ }; | |
2402 ++ | |
2403 ++ for (iters = 0; iters < inlen/(BPI*64); iters++) | |
2404 ++ { | |
2405 ++#if GPR_TOO | |
2406 ++ register unsigned x0, x1, x2, x3, x4, x5, x6, x7, x8, | |
2407 ++ x9, x10, x11, x12, x13, x14, x15; | |
2408 ++#endif | |
2409 ++#if VBPI > 2 | |
2410 ++ vec v8,v9,v10,v11; | |
2411 ++#endif | |
2412 ++#if VBPI > 3 | |
2413 ++ vec v12,v13,v14,v15; | |
2414 ++#endif | |
2415 ++ | |
2416 ++ vec v0,v1,v2,v3,v4,v5,v6,v7; | |
2417 ++ v4 = v0 = s0; v5 = v1 = s1; v6 = v2 = s2; v3 = s3; | |
2418 ++ v7 = v3 + ONE; | |
2419 ++#if VBPI > 2 | |
2420 ++ v8 = v4; v9 = v5; v10 = v6; | |
2421 ++ v11 = v7 + ONE; | |
2422 ++#endif | |
2423 ++#if VBPI > 3 | |
2424 ++ v12 = v8; v13 = v9; v14 = v10; | |
2425 ++ v15 = v11 + ONE; | |
2426 ++#endif | |
2427 ++#if GPR_TOO | |
2428 ++ x0 = chacha_const[0]; x1 = chacha_const[1]; | |
2429 ++ x2 = chacha_const[2]; x3 = chacha_const[3]; | |
2430 ++ x4 = kp[0]; x5 = kp[1]; x6 = kp[2]; x7 = kp[3]; | |
2431 ++ x8 = kp[4]; x9 = kp[5]; x10 = kp[6]; x11 = kp[7]; | |
2432 ++ x12 = counter+BPI*iters+(BPI-1); x13 = 0; | |
2433 ++ x14 = np[0]; x15 = np[1]; | |
2434 ++#endif | |
2435 ++ for (i = CHACHA_RNDS/2; i; i--) | |
2436 ++ { | |
2437 ++ DQROUND_VECTORS(v0,v1,v2,v3) | |
2438 ++ DQROUND_VECTORS(v4,v5,v6,v7) | |
2439 ++#if VBPI > 2 | |
2440 ++ DQROUND_VECTORS(v8,v9,v10,v11) | |
2441 ++#endif | |
2442 ++#if VBPI > 3 | |
2443 ++ DQROUND_VECTORS(v12,v13,v14,v15) | |
2444 ++#endif | |
2445 ++#if GPR_TOO | |
2446 ++ QROUND_WORDS( x0, x4, x8,x12) | |
2447 ++ QROUND_WORDS( x1, x5, x9,x13) | |
2448 ++ QROUND_WORDS( x2, x6,x10,x14) | |
2449 ++ QROUND_WORDS( x3, x7,x11,x15) | |
2450 ++ QROUND_WORDS( x0, x5,x10,x15) | |
2451 ++ QROUND_WORDS( x1, x6,x11,x12) | |
2452 ++ QROUND_WORDS( x2, x7, x8,x13) | |
2453 ++ QROUND_WORDS( x3, x4, x9,x14) | |
2454 ++#endif | |
2455 ++ } | |
2456 ++ | |
2457 ++ WRITE_XOR(ip, op, 0, v0+s0, v1+s1, v2+s2, v3+s3) | |
2458 ++ s3 += ONE; | |
2459 ++ WRITE_XOR(ip, op, 16, v4+s0, v5+s1, v6+s2, v7+s3) | |
2460 ++ s3 += ONE; | |
2461 ++#if VBPI > 2 | |
2462 ++ WRITE_XOR(ip, op, 32, v8+s0, v9+s1, v10+s2, v11+s3) | |
2463 ++ s3 += ONE; | |
2464 ++#endif | |
2465 ++#if VBPI > 3 | |
2466 ++ WRITE_XOR(ip, op, 48, v12+s0, v13+s1, v14+s2, v15+s3) | |
2467 ++ s3 += ONE; | |
2468 ++#endif | |
2469 ++ ip += VBPI*16; | |
2470 ++ op += VBPI*16; | |
2471 ++#if GPR_TOO | |
2472 ++ op[0] = REVW_BE(REVW_BE(ip[0]) ^ (x0 + chacha_const[0])); | |
2473 ++ op[1] = REVW_BE(REVW_BE(ip[1]) ^ (x1 + chacha_const[1])); | |
2474 ++ op[2] = REVW_BE(REVW_BE(ip[2]) ^ (x2 + chacha_const[2])); | |
2475 ++ op[3] = REVW_BE(REVW_BE(ip[3]) ^ (x3 + chacha_const[3])); | |
2476 ++ op[4] = REVW_BE(REVW_BE(ip[4]) ^ (x4 + kp[0])); | |
2477 ++ op[5] = REVW_BE(REVW_BE(ip[5]) ^ (x5 + kp[1])); | |
2478 ++ op[6] = REVW_BE(REVW_BE(ip[6]) ^ (x6 + kp[2])); | |
2479 ++ op[7] = REVW_BE(REVW_BE(ip[7]) ^ (x7 + kp[3])); | |
2480 ++ op[8] = REVW_BE(REVW_BE(ip[8]) ^ (x8 + kp[4])); | |
2481 ++ op[9] = REVW_BE(REVW_BE(ip[9]) ^ (x9 + kp[5])); | |
2482 ++ op[10] = REVW_BE(REVW_BE(ip[10]) ^ (x10 + kp[6])); | |
2483 ++ op[11] = REVW_BE(REVW_BE(ip[11]) ^ (x11 + kp[7])); | |
2484 ++ op[12] = REVW_BE(REVW_BE(ip[12]) ^ (x12 + counter+BPI*iters+(BPI
-1))); | |
2485 ++ op[13] = REVW_BE(REVW_BE(ip[13]) ^ (x13)); | |
2486 ++ op[14] = REVW_BE(REVW_BE(ip[14]) ^ (x14 + np[0])); | |
2487 ++ op[15] = REVW_BE(REVW_BE(ip[15]) ^ (x15 + np[1])); | |
2488 ++ s3 += ONE; | |
2489 ++ ip += 16; | |
2490 ++ op += 16; | |
2491 ++#endif | |
2492 ++ } | |
2493 ++ | |
2494 ++ for (iters = inlen%(BPI*64)/64; iters != 0; iters--) | |
2495 ++ { | |
2496 ++ vec v0 = s0, v1 = s1, v2 = s2, v3 = s3; | |
2497 ++ for (i = CHACHA_RNDS/2; i; i--) | |
2498 ++ { | |
2499 ++ DQROUND_VECTORS(v0,v1,v2,v3); | |
2500 ++ } | |
2501 ++ WRITE_XOR(ip, op, 0, v0+s0, v1+s1, v2+s2, v3+s3) | |
2502 ++ s3 += ONE; | |
2503 ++ ip += 16; | |
2504 ++ op += 16; | |
2505 ++ } | |
2506 ++ | |
2507 ++ inlen = inlen % 64; | |
2508 ++ if (inlen) | |
2509 ++ { | |
2510 ++ __attribute__ ((aligned (16))) vec buf[4]; | |
2511 ++ vec v0,v1,v2,v3; | |
2512 ++ v0 = s0; v1 = s1; v2 = s2; v3 = s3; | |
2513 ++ for (i = CHACHA_RNDS/2; i; i--) | |
2514 ++ { | |
2515 ++ DQROUND_VECTORS(v0,v1,v2,v3); | |
2516 ++ } | |
2517 ++ | |
2518 ++ if (inlen >= 16) | |
2519 ++ { | |
2520 ++ STORE(op + 0, LOAD(ip + 0) ^ REVV_BE(v0 + s0)); | |
2521 ++ if (inlen >= 32) | |
2522 ++ { | |
2523 ++ STORE(op + 4, LOAD(ip + 4) ^ REVV_BE(v1 + s1)); | |
2524 ++ if (inlen >= 48) | |
2525 ++ { | |
2526 ++ STORE(op + 8, LOAD(ip + 8) ^ | |
2527 ++ REVV_BE(v2 + s2)); | |
2528 ++ buf[3] = REVV_BE(v3 + s3); | |
2529 ++ } | |
2530 ++ else | |
2531 ++ buf[2] = REVV_BE(v2 + s2); | |
2532 ++ } | |
2533 ++ else | |
2534 ++ buf[1] = REVV_BE(v1 + s1); | |
2535 ++ } | |
2536 ++ else | |
2537 ++ buf[0] = REVV_BE(v0 + s0); | |
2538 ++ | |
2539 ++ for (i=inlen & ~15; i<inlen; i++) | |
2540 ++ ((char *)op)[i] = ((char *)ip)[i] ^ ((char *)buf)[i]; | |
2541 ++ } | |
2542 ++ } | |
2543 ++ | |
2544 ++#endif /* !OPENSSL_NO_CHACHA */ | |
2545 +diff --git a/crypto/chacha/chachatest.c b/crypto/chacha/chachatest.c | |
2546 +new file mode 100644 | |
2547 +index 0000000..b2a9389 | |
2548 +--- /dev/null | |
2549 ++++ b/crypto/chacha/chachatest.c | |
2550 +@@ -0,0 +1,211 @@ | |
2551 ++/* | |
2552 ++ * Chacha stream algorithm. | |
2553 ++ * | |
2554 ++ * Created on: Jun, 2013 | |
2555 ++ * Author: Elie Bursztein (elieb@google.com) | |
2556 ++ * | |
2557 ++ * Adapted from the estream code by D. Bernstein. | |
2558 ++ */ | |
2559 ++/* ==================================================================== | |
2560 ++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
2561 ++ * | |
2562 ++ * Redistribution and use in source and binary forms, with or without | |
2563 ++ * modification, are permitted provided that the following conditions | |
2564 ++ * are met: | |
2565 ++ * | |
2566 ++ * 1. Redistributions of source code must retain the above copyright | |
2567 ++ * notice, this list of conditions and the following disclaimer. | |
2568 ++ * | |
2569 ++ * 2. Redistributions in binary form must reproduce the above copyright | |
2570 ++ * notice, this list of conditions and the following disclaimer in | |
2571 ++ * the documentation and/or other materials provided with the | |
2572 ++ * distribution. | |
2573 ++ * | |
2574 ++ * 3. All advertising materials mentioning features or use of this | |
2575 ++ * software must display the following acknowledgment: | |
2576 ++ * "This product includes software developed by the OpenSSL Project | |
2577 ++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
2578 ++ * | |
2579 ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
2580 ++ * endorse or promote products derived from this software without | |
2581 ++ * prior written permission. For written permission, please contact | |
2582 ++ * licensing@OpenSSL.org. | |
2583 ++ * | |
2584 ++ * 5. Products derived from this software may not be called "OpenSSL" | |
2585 ++ * nor may "OpenSSL" appear in their names without prior written | |
2586 ++ * permission of the OpenSSL Project. | |
2587 ++ * | |
2588 ++ * 6. Redistributions of any form whatsoever must retain the following | |
2589 ++ * acknowledgment: | |
2590 ++ * "This product includes software developed by the OpenSSL Project | |
2591 ++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
2592 ++ * | |
2593 ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
2594 ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
2595 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
2596 ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
2597 ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
2598 ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
2599 ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
2600 ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
2601 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
2602 ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
2603 ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
2604 ++ * OF THE POSSIBILITY OF SUCH DAMAGE. | |
2605 ++ * ==================================================================== | |
2606 ++ */ | |
2607 ++ | |
2608 ++#include <stdio.h> | |
2609 ++#include <stdlib.h> | |
2610 ++#include <string.h> | |
2611 ++#include <stdint.h> | |
2612 ++ | |
2613 ++#include <openssl/chacha.h> | |
2614 ++ | |
2615 ++struct chacha_test { | |
2616 ++ const char *keyhex; | |
2617 ++ const char *noncehex; | |
2618 ++ const char *outhex; | |
2619 ++}; | |
2620 ++ | |
2621 ++static const struct chacha_test chacha_tests[] = { | |
2622 ++ { | |
2623 ++ "000000000000000000000000000000000000000000000000000000000000000
0", | |
2624 ++ "0000000000000000", | |
2625 ++ "76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc
7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee6586", | |
2626 ++ }, | |
2627 ++ { | |
2628 ++ "000000000000000000000000000000000000000000000000000000000000000
1", | |
2629 ++ "0000000000000000", | |
2630 ++ "4540f05a9f1fb296d7736e7b208e3c96eb4fe1834688d2604f450952ed432d4
1bbe2a0b6ea7566d2a5d1e7e20d42af2c53d792b1c43fea817e9ad275ae546963", | |
2631 ++ }, | |
2632 ++ { | |
2633 ++ "000000000000000000000000000000000000000000000000000000000000000
0", | |
2634 ++ "0000000000000001", | |
2635 ++ "de9cba7bf3d69ef5e786dc63973f653a0b49e015adbff7134fcb7df13782103
1e85a050278a7084527214f73efc7fa5b5277062eb7a0433e445f41e31afab757", | |
2636 ++ }, | |
2637 ++ { | |
2638 ++ "000000000000000000000000000000000000000000000000000000000000000
0", | |
2639 ++ "0100000000000000", | |
2640 ++ "ef3fdfd6c61578fbf5cf35bd3dd33b8009631634d21e42ac33960bd138e50d3
2111e4caf237ee53ca8ad6426194a88545ddc497a0b466e7d6bbdb0041b2f586b", | |
2641 ++ }, | |
2642 ++ { | |
2643 ++ "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1
f", | |
2644 ++ "0001020304050607", | |
2645 ++ "f798a189f195e66982105ffb640bb7757f579da31602fc93ec01ac56f85ac3c
134a4547b733b46413042c9440049176905d3be59ea1c53f15916155c2be8241a38008b9a26bc359
41e2444177c8ade6689de95264986d95889fb60e84629c9bd9a5acb1cc118be563eb9b3a4a472f82
e09a7e778492b562ef7130e88dfe031c79db9d4f7c7a899151b9a475032b63fc385245fe054e3dd5
a97a5f576fe064025d3ce042c566ab2c507b138db853e3d6959660996546cc9c4a6eafdc777c040d
70eaf46f76dad3979e5c5360c3317166a1c894c94a371876a94df7628fe4eaaf2ccb27d5aaae0ad7
ad0f9d4b6ad3b54098746d4524d38407a6deb", | |
2646 ++ }, | |
2647 ++}; | |
2648 ++ | |
2649 ++static unsigned char hex_digit(char h) | |
2650 ++ { | |
2651 ++ if (h >= '0' && h <= '9') | |
2652 ++ return h - '0'; | |
2653 ++ else if (h >= 'a' && h <= 'f') | |
2654 ++ return h - 'a' + 10; | |
2655 ++ else if (h >= 'A' && h <= 'F') | |
2656 ++ return h - 'A' + 10; | |
2657 ++ else | |
2658 ++ abort(); | |
2659 ++ } | |
2660 ++ | |
2661 ++static void hex_decode(unsigned char *out, const char* hex) | |
2662 ++ { | |
2663 ++ size_t j = 0; | |
2664 ++ | |
2665 ++ while (*hex != 0) | |
2666 ++ { | |
2667 ++ unsigned char v = hex_digit(*hex++); | |
2668 ++ v <<= 4; | |
2669 ++ v |= hex_digit(*hex++); | |
2670 ++ out[j++] = v; | |
2671 ++ } | |
2672 ++ } | |
2673 ++ | |
2674 ++static void hexdump(unsigned char *a, size_t len) | |
2675 ++ { | |
2676 ++ size_t i; | |
2677 ++ | |
2678 ++ for (i = 0; i < len; i++) | |
2679 ++ printf("%02x", a[i]); | |
2680 ++ } | |
2681 ++ | |
2682 ++/* misalign returns a pointer that points 0 to 15 bytes into |in| such that th
e | |
2683 ++ * returned pointer has alignment 1 mod 16. */ | |
2684 ++static void* misalign(void* in) | |
2685 ++ { | |
2686 ++ intptr_t x = (intptr_t) in; | |
2687 ++ x += (17 - (x % 16)) % 16; | |
2688 ++ return (void*) x; | |
2689 ++ } | |
2690 ++ | |
2691 ++int main() | |
2692 ++ { | |
2693 ++ static const unsigned num_tests = | |
2694 ++ sizeof(chacha_tests) / sizeof(struct chacha_test); | |
2695 ++ unsigned i; | |
2696 ++ unsigned char key_bytes[32 + 16]; | |
2697 ++ unsigned char nonce_bytes[8 + 16] = {0}; | |
2698 ++ | |
2699 ++ unsigned char *key = misalign(key_bytes); | |
2700 ++ unsigned char *nonce = misalign(nonce_bytes); | |
2701 ++ | |
2702 ++ for (i = 0; i < num_tests; i++) | |
2703 ++ { | |
2704 ++ const struct chacha_test *test = &chacha_tests[i]; | |
2705 ++ unsigned char *expected, *out_bytes, *zero_bytes, *out, *zeros; | |
2706 ++ size_t len = strlen(test->outhex); | |
2707 ++ | |
2708 ++ if (strlen(test->keyhex) != 32*2 || | |
2709 ++ strlen(test->noncehex) != 8*2 || | |
2710 ++ (len & 1) == 1) | |
2711 ++ return 1; | |
2712 ++ | |
2713 ++ len /= 2; | |
2714 ++ | |
2715 ++ hex_decode(key, test->keyhex); | |
2716 ++ hex_decode(nonce, test->noncehex); | |
2717 ++ | |
2718 ++ expected = malloc(len); | |
2719 ++ out_bytes = malloc(len+16); | |
2720 ++ zero_bytes = malloc(len+16); | |
2721 ++ /* Attempt to test unaligned inputs. */ | |
2722 ++ out = misalign(out_bytes); | |
2723 ++ zeros = misalign(zero_bytes); | |
2724 ++ memset(zeros, 0, len); | |
2725 ++ | |
2726 ++ hex_decode(expected, test->outhex); | |
2727 ++ CRYPTO_chacha_20(out, zeros, len, key, nonce, 0); | |
2728 ++ | |
2729 ++ if (memcmp(out, expected, len) != 0) | |
2730 ++ { | |
2731 ++ printf("ChaCha20 test #%d failed.\n", i); | |
2732 ++ printf("got: "); | |
2733 ++ hexdump(out, len); | |
2734 ++ printf("\nexpected: "); | |
2735 ++ hexdump(expected, len); | |
2736 ++ printf("\n"); | |
2737 ++ return 1; | |
2738 ++ } | |
2739 ++ | |
2740 ++ /* The last test has a large output. We test whether the | |
2741 ++ * counter works as expected by skipping the first 64 bytes of | |
2742 ++ * it. */ | |
2743 ++ if (i == num_tests - 1) | |
2744 ++ { | |
2745 ++ CRYPTO_chacha_20(out, zeros, len - 64, key, nonce, 1); | |
2746 ++ if (memcmp(out, expected + 64, len - 64) != 0) | |
2747 ++ { | |
2748 ++ printf("ChaCha20 skip test failed.\n"); | |
2749 ++ return 1; | |
2750 ++ } | |
2751 ++ } | |
2752 ++ | |
2753 ++ free(expected); | |
2754 ++ free(zero_bytes); | |
2755 ++ free(out_bytes); | |
2756 ++ } | |
2757 ++ | |
2758 ++ | |
2759 ++ printf("PASS\n"); | |
2760 ++ return 0; | |
2761 ++ } | |
2762 +diff --git a/crypto/evp/Makefile b/crypto/evp/Makefile | |
2763 +index b73038d..86b0504 100644 | |
2764 +--- a/crypto/evp/Makefile | |
2765 ++++ b/crypto/evp/Makefile | |
2766 +@@ -29,7 +29,8 @@ LIBSRC= encode.c digest.c evp_enc.c evp_key.c evp_acnf.c evp_
cnf.c \ | |
2767 + c_all.c c_allc.c c_alld.c evp_lib.c bio_ok.c \ | |
2768 + evp_pkey.c evp_pbe.c p5_crpt.c p5_crpt2.c \ | |
2769 + e_old.c pmeth_lib.c pmeth_fn.c pmeth_gn.c m_sigver.c evp_fips.c \ | |
2770 +- e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c evp_aead.c | |
2771 ++ e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c evp_aead.c \ | |
2772 ++ e_chacha20poly1305.c | |
2773 + | |
2774 + LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \ | |
2775 + e_des.o e_bf.o e_idea.o e_des3.o e_camellia.o\ | |
2776 +@@ -42,7 +43,7 @@ LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o
evp_cnf.o \ | |
2777 + c_all.o c_allc.o c_alld.o evp_lib.o bio_ok.o \ | |
2778 + evp_pkey.o evp_pbe.o p5_crpt.o p5_crpt2.o \ | |
2779 + e_old.o pmeth_lib.o pmeth_fn.o pmeth_gn.o m_sigver.o evp_fips.o \ | |
2780 +- e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o evp_aead.o | |
2781 ++ e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o evp_aead.o e_chacha20poly1305.o | |
2782 + | |
2783 + SRC= $(LIBSRC) | |
2784 + | |
2785 +@@ -239,6 +240,21 @@ e_cast.o: ../../include/openssl/objects.h ../../include/op
enssl/opensslconf.h | |
2786 + e_cast.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h | |
2787 + e_cast.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h | |
2788 + e_cast.o: ../../include/openssl/symhacks.h ../cryptlib.h e_cast.c evp_locl.h | |
2789 ++e_chacha20poly1305.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h | |
2790 ++e_chacha20poly1305.o: ../../include/openssl/chacha.h | |
2791 ++e_chacha20poly1305.o: ../../include/openssl/crypto.h | |
2792 ++e_chacha20poly1305.o: ../../include/openssl/e_os2.h ../../include/openssl/err.
h | |
2793 ++e_chacha20poly1305.o: ../../include/openssl/evp.h ../../include/openssl/lhash.
h | |
2794 ++e_chacha20poly1305.o: ../../include/openssl/obj_mac.h | |
2795 ++e_chacha20poly1305.o: ../../include/openssl/objects.h | |
2796 ++e_chacha20poly1305.o: ../../include/openssl/opensslconf.h | |
2797 ++e_chacha20poly1305.o: ../../include/openssl/opensslv.h | |
2798 ++e_chacha20poly1305.o: ../../include/openssl/ossl_typ.h | |
2799 ++e_chacha20poly1305.o: ../../include/openssl/poly1305.h | |
2800 ++e_chacha20poly1305.o: ../../include/openssl/safestack.h | |
2801 ++e_chacha20poly1305.o: ../../include/openssl/stack.h | |
2802 ++e_chacha20poly1305.o: ../../include/openssl/symhacks.h e_chacha20poly1305.c | |
2803 ++e_chacha20poly1305.o: evp_locl.h | |
2804 + e_des.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h | |
2805 + e_des.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | |
2806 + e_des.o: ../../include/openssl/des.h ../../include/openssl/des_old.h | |
2807 +@@ -258,9 +274,10 @@ e_des3.o: ../../include/openssl/evp.h ../../include/openss
l/lhash.h | |
2808 + e_des3.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h | |
2809 + e_des3.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h | |
2810 + e_des3.o: ../../include/openssl/ossl_typ.h ../../include/openssl/rand.h | |
2811 +-e_des3.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h | |
2812 +-e_des3.o: ../../include/openssl/symhacks.h ../../include/openssl/ui.h | |
2813 +-e_des3.o: ../../include/openssl/ui_compat.h ../cryptlib.h e_des3.c evp_locl.h | |
2814 ++e_des3.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h | |
2815 ++e_des3.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h | |
2816 ++e_des3.o: ../../include/openssl/ui.h ../../include/openssl/ui_compat.h | |
2817 ++e_des3.o: ../cryptlib.h e_des3.c evp_locl.h | |
2818 + e_idea.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.
h | |
2819 + e_idea.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | |
2820 + e_idea.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h | |
2821 +@@ -356,6 +373,14 @@ evp_acnf.o: ../../include/openssl/opensslconf.h | |
2822 + evp_acnf.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h | |
2823 + evp_acnf.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h | |
2824 + evp_acnf.o: ../../include/openssl/symhacks.h ../cryptlib.h evp_acnf.c | |
2825 ++evp_aead.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h | |
2826 ++evp_aead.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h | |
2827 ++evp_aead.o: ../../include/openssl/err.h ../../include/openssl/evp.h | |
2828 ++evp_aead.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h | |
2829 ++evp_aead.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.
h | |
2830 ++evp_aead.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h | |
2831 ++evp_aead.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h | |
2832 ++evp_aead.o: ../../include/openssl/symhacks.h evp_aead.c | |
2833 + evp_cnf.o: ../../e_os.h ../../include/openssl/asn1.h | |
2834 + evp_cnf.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h | |
2835 + evp_cnf.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h | |
2836 +diff --git a/crypto/evp/e_chacha20poly1305.c b/crypto/evp/e_chacha20poly1305.c | |
2837 +new file mode 100644 | |
2838 +index 0000000..1c0c0fb | |
2839 +--- /dev/null | |
2840 ++++ b/crypto/evp/e_chacha20poly1305.c | |
2841 +@@ -0,0 +1,267 @@ | |
2842 ++/* ==================================================================== | |
2843 ++ * Copyright (c) 2013 The OpenSSL Project. All rights reserved. | |
2844 ++ * | |
2845 ++ * Redistribution and use in source and binary forms, with or without | |
2846 ++ * modification, are permitted provided that the following conditions | |
2847 ++ * are met: | |
2848 ++ * | |
2849 ++ * 1. Redistributions of source code must retain the above copyright | |
2850 ++ * notice, this list of conditions and the following disclaimer. | |
2851 ++ * | |
2852 ++ * 2. Redistributions in binary form must reproduce the above copyright | |
2853 ++ * notice, this list of conditions and the following disclaimer in | |
2854 ++ * the documentation and/or other materials provided with the | |
2855 ++ * distribution. | |
2856 ++ * | |
2857 ++ * 3. All advertising materials mentioning features or use of this | |
2858 ++ * software must display the following acknowledgment: | |
2859 ++ * "This product includes software developed by the OpenSSL Project | |
2860 ++ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" | |
2861 ++ * | |
2862 ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
2863 ++ * endorse or promote products derived from this software without | |
2864 ++ * prior written permission. For written permission, please contact | |
2865 ++ * openssl-core@openssl.org. | |
2866 ++ * | |
2867 ++ * 5. Products derived from this software may not be called "OpenSSL" | |
2868 ++ * nor may "OpenSSL" appear in their names without prior written | |
2869 ++ * permission of the OpenSSL Project. | |
2870 ++ * | |
2871 ++ * 6. Redistributions of any form whatsoever must retain the following | |
2872 ++ * acknowledgment: | |
2873 ++ * "This product includes software developed by the OpenSSL Project | |
2874 ++ * for use in the OpenSSL Toolkit (http://www.openssl.org/)" | |
2875 ++ * | |
2876 ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
2877 ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
2878 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
2879 ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
2880 ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
2881 ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
2882 ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
2883 ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
2884 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
2885 ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
2886 ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
2887 ++ * OF THE POSSIBILITY OF SUCH DAMAGE. | |
2888 ++ * ==================================================================== | |
2889 ++ * | |
2890 ++ */ | |
2891 ++ | |
2892 ++#include <stdint.h> | |
2893 ++#include <string.h> | |
2894 ++#include <openssl/opensslconf.h> | |
2895 ++ | |
2896 ++#if !defined(OPENSSL_NO_CHACHA) && !defined(OPENSSL_NO_POLY1305) | |
2897 ++ | |
2898 ++#include <openssl/chacha.h> | |
2899 ++#include <openssl/poly1305.h> | |
2900 ++#include <openssl/evp.h> | |
2901 ++#include <openssl/err.h> | |
2902 ++#include "evp_locl.h" | |
2903 ++ | |
2904 ++#define POLY1305_TAG_LEN 16 | |
2905 ++#define CHACHA20_NONCE_LEN 8 | |
2906 ++ | |
2907 ++struct aead_chacha20_poly1305_ctx | |
2908 ++ { | |
2909 ++ unsigned char key[32]; | |
2910 ++ unsigned char tag_len; | |
2911 ++ }; | |
2912 ++ | |
2913 ++static int aead_chacha20_poly1305_init(EVP_AEAD_CTX *ctx, const unsigned char
*key, size_t key_len, size_t tag_len) | |
2914 ++ { | |
2915 ++ struct aead_chacha20_poly1305_ctx *c20_ctx; | |
2916 ++ | |
2917 ++ if (tag_len == 0) | |
2918 ++ tag_len = POLY1305_TAG_LEN; | |
2919 ++ | |
2920 ++ if (tag_len > POLY1305_TAG_LEN) | |
2921 ++ { | |
2922 ++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_INIT, EVP_R_TOO_LARGE); | |
2923 ++ return 0; | |
2924 ++ } | |
2925 ++ | |
2926 ++ if (key_len != sizeof(c20_ctx->key)) | |
2927 ++ return 0; /* internal error - EVP_AEAD_CTX_init should catch th
is. */ | |
2928 ++ | |
2929 ++ c20_ctx = OPENSSL_malloc(sizeof(struct aead_chacha20_poly1305_ctx)); | |
2930 ++ if (c20_ctx == NULL) | |
2931 ++ return 0; | |
2932 ++ | |
2933 ++ memcpy(&c20_ctx->key[0], key, key_len); | |
2934 ++ c20_ctx->tag_len = tag_len; | |
2935 ++ ctx->aead_state = c20_ctx; | |
2936 ++ | |
2937 ++ return 1; | |
2938 ++ } | |
2939 ++ | |
2940 ++static void aead_chacha20_poly1305_cleanup(EVP_AEAD_CTX *ctx) | |
2941 ++ { | |
2942 ++ struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state; | |
2943 ++ OPENSSL_cleanse(c20_ctx->key, sizeof(c20_ctx->key)); | |
2944 ++ OPENSSL_free(c20_ctx); | |
2945 ++ } | |
2946 ++ | |
2947 ++static void poly1305_update_with_length(poly1305_state *poly1305, | |
2948 ++ const unsigned char *data, size_t data_len) | |
2949 ++ { | |
2950 ++ size_t j = data_len; | |
2951 ++ unsigned char length_bytes[8]; | |
2952 ++ unsigned i; | |
2953 ++ | |
2954 ++ for (i = 0; i < sizeof(length_bytes); i++) | |
2955 ++ { | |
2956 ++ length_bytes[i] = j; | |
2957 ++ j >>= 8; | |
2958 ++ } | |
2959 ++ | |
2960 ++ CRYPTO_poly1305_update(poly1305, data, data_len); | |
2961 ++ CRYPTO_poly1305_update(poly1305, length_bytes, sizeof(length_bytes)); | |
2962 ++} | |
2963 ++ | |
2964 ++#if __arm__ | |
2965 ++#define ALIGNED __attribute__((aligned(16))) | |
2966 ++#else | |
2967 ++#define ALIGNED | |
2968 ++#endif | |
2969 ++ | |
2970 ++static ssize_t aead_chacha20_poly1305_seal(const EVP_AEAD_CTX *ctx, | |
2971 ++ unsigned char *out, size_t max_out_len, | |
2972 ++ const unsigned char *nonce, size_t nonce_len, | |
2973 ++ const unsigned char *in, size_t in_len, | |
2974 ++ const unsigned char *ad, size_t ad_len) | |
2975 ++ { | |
2976 ++ const struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state; | |
2977 ++ unsigned char poly1305_key[32] ALIGNED; | |
2978 ++ poly1305_state poly1305; | |
2979 ++ const uint64_t in_len_64 = in_len; | |
2980 ++ | |
2981 ++ /* The underlying ChaCha implementation may not overflow the block | |
2982 ++ * counter into the second counter word. Therefore we disallow | |
2983 ++ * individual operations that work on more than 2TB at a time. | |
2984 ++ * |in_len_64| is needed because, on 32-bit platforms, size_t is only | |
2985 ++ * 32-bits and this produces a warning because it's always false. | |
2986 ++ * Casting to uint64_t inside the conditional is not sufficient to stop | |
2987 ++ * the warning. */ | |
2988 ++ if (in_len_64 >= (1ull << 32)*64-64) | |
2989 ++ { | |
2990 ++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_TOO_LARGE); | |
2991 ++ return -1; | |
2992 ++ } | |
2993 ++ | |
2994 ++ if (max_out_len < in_len + c20_ctx->tag_len) | |
2995 ++ { | |
2996 ++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_BUFFER_TOO_SMALL
); | |
2997 ++ return -1; | |
2998 ++ } | |
2999 ++ | |
3000 ++ if (nonce_len != CHACHA20_NONCE_LEN) | |
3001 ++ { | |
3002 ++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_IV_TOO_LARGE); | |
3003 ++ return -1; | |
3004 ++ } | |
3005 ++ | |
3006 ++ memset(poly1305_key, 0, sizeof(poly1305_key)); | |
3007 ++ CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key), c20_c
tx->key, nonce, 0); | |
3008 ++ | |
3009 ++ CRYPTO_poly1305_init(&poly1305, poly1305_key); | |
3010 ++ poly1305_update_with_length(&poly1305, ad, ad_len); | |
3011 ++ CRYPTO_chacha_20(out, in, in_len, c20_ctx->key, nonce, 1); | |
3012 ++ poly1305_update_with_length(&poly1305, out, in_len); | |
3013 ++ | |
3014 ++ if (c20_ctx->tag_len != POLY1305_TAG_LEN) | |
3015 ++ { | |
3016 ++ unsigned char tag[POLY1305_TAG_LEN]; | |
3017 ++ CRYPTO_poly1305_finish(&poly1305, tag); | |
3018 ++ memcpy(out + in_len, tag, c20_ctx->tag_len); | |
3019 ++ return in_len + c20_ctx->tag_len; | |
3020 ++ } | |
3021 ++ | |
3022 ++ CRYPTO_poly1305_finish(&poly1305, out + in_len); | |
3023 ++ return in_len + POLY1305_TAG_LEN; | |
3024 ++ } | |
3025 ++ | |
3026 ++static ssize_t aead_chacha20_poly1305_open(const EVP_AEAD_CTX *ctx, | |
3027 ++ unsigned char *out, size_t max_out_len, | |
3028 ++ const unsigned char *nonce, size_t nonce_len, | |
3029 ++ const unsigned char *in, size_t in_len, | |
3030 ++ const unsigned char *ad, size_t ad_len) | |
3031 ++ { | |
3032 ++ const struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state; | |
3033 ++ unsigned char mac[POLY1305_TAG_LEN]; | |
3034 ++ unsigned char poly1305_key[32] ALIGNED; | |
3035 ++ size_t out_len; | |
3036 ++ poly1305_state poly1305; | |
3037 ++ const uint64_t in_len_64 = in_len; | |
3038 ++ | |
3039 ++ if (in_len < c20_ctx->tag_len) | |
3040 ++ { | |
3041 ++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BAD_DECRYPT); | |
3042 ++ return -1; | |
3043 ++ } | |
3044 ++ | |
3045 ++ /* The underlying ChaCha implementation may not overflow the block | |
3046 ++ * counter into the second counter word. Therefore we disallow | |
3047 ++ * individual operations that work on more than 2TB at a time. | |
3048 ++ * |in_len_64| is needed because, on 32-bit platforms, size_t is only | |
3049 ++ * 32-bits and this produces a warning because it's always false. | |
3050 ++ * Casting to uint64_t inside the conditional is not sufficient to stop | |
3051 ++ * the warning. */ | |
3052 ++ if (in_len_64 >= (1ull << 32)*64-64) | |
3053 ++ { | |
3054 ++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_TOO_LARGE); | |
3055 ++ return -1; | |
3056 ++ } | |
3057 ++ | |
3058 ++ if (nonce_len != CHACHA20_NONCE_LEN) | |
3059 ++ { | |
3060 ++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_IV_TOO_LARGE); | |
3061 ++ return -1; | |
3062 ++ } | |
3063 ++ | |
3064 ++ out_len = in_len - c20_ctx->tag_len; | |
3065 ++ | |
3066 ++ if (max_out_len < out_len) | |
3067 ++ { | |
3068 ++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BUFFER_TOO_SMALL
); | |
3069 ++ return -1; | |
3070 ++ } | |
3071 ++ | |
3072 ++ memset(poly1305_key, 0, sizeof(poly1305_key)); | |
3073 ++ CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key), c20_c
tx->key, nonce, 0); | |
3074 ++ | |
3075 ++ CRYPTO_poly1305_init(&poly1305, poly1305_key); | |
3076 ++ poly1305_update_with_length(&poly1305, ad, ad_len); | |
3077 ++ poly1305_update_with_length(&poly1305, in, out_len); | |
3078 ++ CRYPTO_poly1305_finish(&poly1305, mac); | |
3079 ++ | |
3080 ++ if (CRYPTO_memcmp(mac, in + out_len, c20_ctx->tag_len) != 0) | |
3081 ++ { | |
3082 ++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BAD_DECRYPT); | |
3083 ++ return -1; | |
3084 ++ } | |
3085 ++ | |
3086 ++ CRYPTO_chacha_20(out, in, out_len, c20_ctx->key, nonce, 1); | |
3087 ++ return out_len; | |
3088 ++ } | |
3089 ++ | |
3090 ++static const EVP_AEAD aead_chacha20_poly1305 = | |
3091 ++ { | |
3092 ++ 32, /* key len */ | |
3093 ++ CHACHA20_NONCE_LEN, /* nonce len */ | |
3094 ++ POLY1305_TAG_LEN, /* overhead */ | |
3095 ++ POLY1305_TAG_LEN, /* max tag length */ | |
3096 ++ | |
3097 ++ aead_chacha20_poly1305_init, | |
3098 ++ aead_chacha20_poly1305_cleanup, | |
3099 ++ aead_chacha20_poly1305_seal, | |
3100 ++ aead_chacha20_poly1305_open, | |
3101 ++ }; | |
3102 ++ | |
3103 ++const EVP_AEAD *EVP_aead_chacha20_poly1305() | |
3104 ++ { | |
3105 ++ return &aead_chacha20_poly1305; | |
3106 ++ } | |
3107 ++ | |
3108 ++#endif /* !OPENSSL_NO_CHACHA && !OPENSSL_NO_POLY1305 */ | |
3109 +diff --git a/crypto/evp/evp.h b/crypto/evp/evp.h | |
3110 +index bd10642..7dc1656 100644 | |
3111 +--- a/crypto/evp/evp.h | |
3112 ++++ b/crypto/evp/evp.h | |
3113 +@@ -1258,6 +1258,11 @@ typedef struct evp_aead_st EVP_AEAD; | |
3114 + const EVP_AEAD *EVP_aead_aes_128_gcm(void); | |
3115 + #endif | |
3116 + | |
3117 ++#if !defined(OPENSSL_NO_CHACHA) && !defined(OPENSSL_NO_POLY1305) | |
3118 ++/* EVP_aead_chacha20_poly1305 is ChaCha20 with a Poly1305 authenticator. */ | |
3119 ++const EVP_AEAD *EVP_aead_chacha20_poly1305(void); | |
3120 ++#endif | |
3121 ++ | |
3122 + /* EVP_AEAD_key_length returns the length, in bytes, of the keys used by | |
3123 + * |aead|. */ | |
3124 + size_t EVP_AEAD_key_length(const EVP_AEAD *aead); | |
3125 +@@ -1360,6 +1365,9 @@ void ERR_load_EVP_strings(void); | |
3126 + #define EVP_F_AEAD_AES_128_GCM_INIT 183 | |
3127 + #define EVP_F_AEAD_AES_128_GCM_OPEN 181 | |
3128 + #define EVP_F_AEAD_AES_128_GCM_SEAL 182 | |
3129 ++#define EVP_F_AEAD_CHACHA20_POLY1305_INIT 187 | |
3130 ++#define EVP_F_AEAD_CHACHA20_POLY1305_OPEN 184 | |
3131 ++#define EVP_F_AEAD_CHACHA20_POLY1305_SEAL 183 | |
3132 + #define EVP_F_AEAD_CTX_OPEN 185 | |
3133 + #define EVP_F_AEAD_CTX_SEAL 186 | |
3134 + #define EVP_F_AESNI_INIT_KEY 165 | |
3135 +diff --git a/crypto/evp/evp_err.c b/crypto/evp/evp_err.c | |
3136 +index c47969c..fb747e5 100644 | |
3137 +--- a/crypto/evp/evp_err.c | |
3138 ++++ b/crypto/evp/evp_err.c | |
3139 +@@ -73,6 +73,9 @@ static ERR_STRING_DATA EVP_str_functs[]= | |
3140 + {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_INIT), "AEAD_AES_128_GCM_INIT"}, | |
3141 + {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_OPEN), "AEAD_AES_128_GCM_OPEN"}, | |
3142 + {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_SEAL), "AEAD_AES_128_GCM_SEAL"}, | |
3143 ++{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_INIT), "AEAD_CHACHA20_POLY1305_INIT"}, | |
3144 ++{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_OPEN), "AEAD_CHACHA20_POLY1305_OPEN"}, | |
3145 ++{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_SEAL), "AEAD_CHACHA20_POLY1305_SEAL"}, | |
3146 + {ERR_FUNC(EVP_F_AEAD_CTX_OPEN), "AEAD_CTX_OPEN"}, | |
3147 + {ERR_FUNC(EVP_F_AEAD_CTX_SEAL), "AEAD_CTX_SEAL"}, | |
3148 + {ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"}, | |
3149 +diff --git a/crypto/poly1305/Makefile b/crypto/poly1305/Makefile | |
3150 +new file mode 100644 | |
3151 +index 0000000..397d7cd | |
3152 +--- /dev/null | |
3153 ++++ b/crypto/poly1305/Makefile | |
3154 +@@ -0,0 +1,81 @@ | |
3155 ++# | |
3156 ++# OpenSSL/crypto/poly1305/Makefile | |
3157 ++# | |
3158 ++ | |
3159 ++DIR= poly1305 | |
3160 ++TOP= ../.. | |
3161 ++CC= cc | |
3162 ++CPP= $(CC) -E | |
3163 ++INCLUDES= | |
3164 ++CFLAG=-g | |
3165 ++AR= ar r | |
3166 ++ | |
3167 ++POLY1305=poly1305_vec.o | |
3168 ++ | |
3169 ++CFLAGS= $(INCLUDES) $(CFLAG) | |
3170 ++ASFLAGS= $(INCLUDES) $(ASFLAG) | |
3171 ++AFLAGS= $(ASFLAGS) | |
3172 ++ | |
3173 ++GENERAL=Makefile | |
3174 ++TEST= | |
3175 ++APPS= | |
3176 ++ | |
3177 ++LIB=$(TOP)/libcrypto.a | |
3178 ++LIBSRC=poly1305_vec.c | |
3179 ++LIBOBJ=$(POLY1305) | |
3180 ++ | |
3181 ++SRC= $(LIBSRC) | |
3182 ++ | |
3183 ++EXHEADER=poly1305.h | |
3184 ++HEADER= $(EXHEADER) | |
3185 ++ | |
3186 ++ALL= $(GENERAL) $(SRC) $(HEADER) | |
3187 ++ | |
3188 ++top: | |
3189 ++ (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all) | |
3190 ++ | |
3191 ++all: lib | |
3192 ++ | |
3193 ++lib: $(LIBOBJ) | |
3194 ++ $(AR) $(LIB) $(LIBOBJ) | |
3195 ++ $(RANLIB) $(LIB) || echo Never mind. | |
3196 ++ @touch lib | |
3197 ++ | |
3198 ++files: | |
3199 ++ $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO | |
3200 ++ | |
3201 ++links: | |
3202 ++ @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER) | |
3203 ++ @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST) | |
3204 ++ @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS) | |
3205 ++ | |
3206 ++install: | |
3207 ++ @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile... | |
3208 ++ @headerlist="$(EXHEADER)"; for i in $$headerlist ; \ | |
3209 ++ do \ | |
3210 ++ (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \ | |
3211 ++ chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \ | |
3212 ++ done; | |
3213 ++ | |
3214 ++tags: | |
3215 ++ ctags $(SRC) | |
3216 ++ | |
3217 ++tests: | |
3218 ++ | |
3219 ++lint: | |
3220 ++ lint -DLINT $(INCLUDES) $(SRC)>fluff | |
3221 ++ | |
3222 ++depend: | |
3223 ++ @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... | |
3224 ++ $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) | |
3225 ++ | |
3226 ++dclean: | |
3227 ++ $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKE
FILE) >Makefile.new | |
3228 ++ mv -f Makefile.new $(MAKEFILE) | |
3229 ++ | |
3230 ++clean: | |
3231 ++ rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff | |
3232 ++ | |
3233 ++# DO NOT DELETE THIS LINE -- make depend depends on it. | |
3234 ++ | |
3235 ++poly1305_vec.o: ../../include/openssl/poly1305.h poly1305_vec.c | |
3236 +diff --git a/crypto/poly1305/poly1305.c b/crypto/poly1305/poly1305.c | |
3237 +new file mode 100644 | |
3238 +index 0000000..2e5621d | |
3239 +--- /dev/null | |
3240 ++++ b/crypto/poly1305/poly1305.c | |
3241 +@@ -0,0 +1,321 @@ | |
3242 ++/* ==================================================================== | |
3243 ++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
3244 ++ * | |
3245 ++ * Redistribution and use in source and binary forms, with or without | |
3246 ++ * modification, are permitted provided that the following conditions | |
3247 ++ * are met: | |
3248 ++ * | |
3249 ++ * 1. Redistributions of source code must retain the above copyright | |
3250 ++ * notice, this list of conditions and the following disclaimer. | |
3251 ++ * | |
3252 ++ * 2. Redistributions in binary form must reproduce the above copyright | |
3253 ++ * notice, this list of conditions and the following disclaimer in | |
3254 ++ * the documentation and/or other materials provided with the | |
3255 ++ * distribution. | |
3256 ++ * | |
3257 ++ * 3. All advertising materials mentioning features or use of this | |
3258 ++ * software must display the following acknowledgment: | |
3259 ++ * "This product includes software developed by the OpenSSL Project | |
3260 ++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
3261 ++ * | |
3262 ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
3263 ++ * endorse or promote products derived from this software without | |
3264 ++ * prior written permission. For written permission, please contact | |
3265 ++ * licensing@OpenSSL.org. | |
3266 ++ * | |
3267 ++ * 5. Products derived from this software may not be called "OpenSSL" | |
3268 ++ * nor may "OpenSSL" appear in their names without prior written | |
3269 ++ * permission of the OpenSSL Project. | |
3270 ++ * | |
3271 ++ * 6. Redistributions of any form whatsoever must retain the following | |
3272 ++ * acknowledgment: | |
3273 ++ * "This product includes software developed by the OpenSSL Project | |
3274 ++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
3275 ++ * | |
3276 ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
3277 ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
3278 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
3279 ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
3280 ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
3281 ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
3282 ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
3283 ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
3284 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
3285 ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
3286 ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
3287 ++ * OF THE POSSIBILITY OF SUCH DAMAGE. | |
3288 ++ * ==================================================================== | |
3289 ++ */ | |
3290 ++ | |
3291 ++/* This implementation of poly1305 is by Andrew Moon | |
3292 ++ * (https://github.com/floodyberry/poly1305-donna) and released as public | |
3293 ++ * domain. */ | |
3294 ++ | |
3295 ++#include <string.h> | |
3296 ++#include <stdint.h> | |
3297 ++#include <openssl/opensslconf.h> | |
3298 ++ | |
3299 ++#if !defined(OPENSSL_NO_POLY1305) | |
3300 ++ | |
3301 ++#include <openssl/poly1305.h> | |
3302 ++#include <openssl/crypto.h> | |
3303 ++ | |
3304 ++#if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86
_64__) | |
3305 ++/* We can assume little-endian. */ | |
3306 ++static uint32_t U8TO32_LE(const unsigned char *m) | |
3307 ++ { | |
3308 ++ uint32_t r; | |
3309 ++ memcpy(&r, m, sizeof(r)); | |
3310 ++ return r; | |
3311 ++ } | |
3312 ++ | |
3313 ++static void U32TO8_LE(unsigned char *m, uint32_t v) | |
3314 ++ { | |
3315 ++ memcpy(m, &v, sizeof(v)); | |
3316 ++ } | |
3317 ++#else | |
3318 ++static uint32_t U8TO32_LE(const unsigned char *m) | |
3319 ++ { | |
3320 ++ return (uint32_t)m[0] | | |
3321 ++ (uint32_t)m[1] << 8 | | |
3322 ++ (uint32_t)m[2] << 16 | | |
3323 ++ (uint32_t)m[3] << 24; | |
3324 ++ } | |
3325 ++ | |
3326 ++static void U32TO8_LE(unsigned char *m, uint32_t v) | |
3327 ++ { | |
3328 ++ m[0] = v; | |
3329 ++ m[1] = v >> 8; | |
3330 ++ m[2] = v >> 16; | |
3331 ++ m[3] = v >> 24; | |
3332 ++ } | |
3333 ++#endif | |
3334 ++ | |
3335 ++static uint64_t | |
3336 ++mul32x32_64(uint32_t a, uint32_t b) | |
3337 ++ { | |
3338 ++ return (uint64_t)a * b; | |
3339 ++ } | |
3340 ++ | |
3341 ++ | |
3342 ++struct poly1305_state_st | |
3343 ++ { | |
3344 ++ uint32_t r0,r1,r2,r3,r4; | |
3345 ++ uint32_t s1,s2,s3,s4; | |
3346 ++ uint32_t h0,h1,h2,h3,h4; | |
3347 ++ unsigned char buf[16]; | |
3348 ++ unsigned int buf_used; | |
3349 ++ unsigned char key[16]; | |
3350 ++ }; | |
3351 ++ | |
3352 ++/* poly1305_blocks updates |state| given some amount of input data. This | |
3353 ++ * function may only be called with a |len| that is not a multiple of 16 at th
e | |
3354 ++ * end of the data. Otherwise the input must be buffered into 16 byte blocks. | |
3355 ++ * */ | |
3356 ++static void poly1305_update(struct poly1305_state_st *state, | |
3357 ++ const unsigned char *in, size_t len) | |
3358 ++ { | |
3359 ++ uint32_t t0,t1,t2,t3; | |
3360 ++ uint64_t t[5]; | |
3361 ++ uint32_t b; | |
3362 ++ uint64_t c; | |
3363 ++ size_t j; | |
3364 ++ unsigned char mp[16]; | |
3365 ++ | |
3366 ++ if (len < 16) | |
3367 ++ goto poly1305_donna_atmost15bytes; | |
3368 ++ | |
3369 ++poly1305_donna_16bytes: | |
3370 ++ t0 = U8TO32_LE(in); | |
3371 ++ t1 = U8TO32_LE(in+4); | |
3372 ++ t2 = U8TO32_LE(in+8); | |
3373 ++ t3 = U8TO32_LE(in+12); | |
3374 ++ | |
3375 ++ in += 16; | |
3376 ++ len -= 16; | |
3377 ++ | |
3378 ++ state->h0 += t0 & 0x3ffffff; | |
3379 ++ state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; | |
3380 ++ state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; | |
3381 ++ state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; | |
3382 ++ state->h4 += (t3 >> 8) | (1 << 24); | |
3383 ++ | |
3384 ++poly1305_donna_mul: | |
3385 ++ t[0] = mul32x32_64(state->h0,state->r0) + | |
3386 ++ mul32x32_64(state->h1,state->s4) + | |
3387 ++ mul32x32_64(state->h2,state->s3) + | |
3388 ++ mul32x32_64(state->h3,state->s2) + | |
3389 ++ mul32x32_64(state->h4,state->s1); | |
3390 ++ t[1] = mul32x32_64(state->h0,state->r1) + | |
3391 ++ mul32x32_64(state->h1,state->r0) + | |
3392 ++ mul32x32_64(state->h2,state->s4) + | |
3393 ++ mul32x32_64(state->h3,state->s3) + | |
3394 ++ mul32x32_64(state->h4,state->s2); | |
3395 ++ t[2] = mul32x32_64(state->h0,state->r2) + | |
3396 ++ mul32x32_64(state->h1,state->r1) + | |
3397 ++ mul32x32_64(state->h2,state->r0) + | |
3398 ++ mul32x32_64(state->h3,state->s4) + | |
3399 ++ mul32x32_64(state->h4,state->s3); | |
3400 ++ t[3] = mul32x32_64(state->h0,state->r3) + | |
3401 ++ mul32x32_64(state->h1,state->r2) + | |
3402 ++ mul32x32_64(state->h2,state->r1) + | |
3403 ++ mul32x32_64(state->h3,state->r0) + | |
3404 ++ mul32x32_64(state->h4,state->s4); | |
3405 ++ t[4] = mul32x32_64(state->h0,state->r4) + | |
3406 ++ mul32x32_64(state->h1,state->r3) + | |
3407 ++ mul32x32_64(state->h2,state->r2) + | |
3408 ++ mul32x32_64(state->h3,state->r1) + | |
3409 ++ mul32x32_64(state->h4,state->r0); | |
3410 ++ | |
3411 ++ state->h0 = (uint32_t)t[0] & 0x3ffffff; c = (t[0] >
> 26); | |
3412 ++ t[1] += c; state->h1 = (uint32_t)t[1] & 0x3ffffff; b = (uint32_t)(t[1] >
> 26); | |
3413 ++ t[2] += b; state->h2 = (uint32_t)t[2] & 0x3ffffff; b = (uint32_t)(t[2] >
> 26); | |
3414 ++ t[3] += b; state->h3 = (uint32_t)t[3] & 0x3ffffff; b = (uint32_t)(t[3] >
> 26); | |
3415 ++ t[4] += b; state->h4 = (uint32_t)t[4] & 0x3ffffff; b = (uint32_t)(t[4] >
> 26); | |
3416 ++ state->h0 += b * 5; | |
3417 ++ | |
3418 ++ if (len >= 16) | |
3419 ++ goto poly1305_donna_16bytes; | |
3420 ++ | |
3421 ++ /* final bytes */ | |
3422 ++poly1305_donna_atmost15bytes: | |
3423 ++ if (!len) | |
3424 ++ return; | |
3425 ++ | |
3426 ++ for (j = 0; j < len; j++) | |
3427 ++ mp[j] = in[j]; | |
3428 ++ mp[j++] = 1; | |
3429 ++ for (; j < 16; j++) | |
3430 ++ mp[j] = 0; | |
3431 ++ len = 0; | |
3432 ++ | |
3433 ++ t0 = U8TO32_LE(mp+0); | |
3434 ++ t1 = U8TO32_LE(mp+4); | |
3435 ++ t2 = U8TO32_LE(mp+8); | |
3436 ++ t3 = U8TO32_LE(mp+12); | |
3437 ++ | |
3438 ++ state->h0 += t0 & 0x3ffffff; | |
3439 ++ state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; | |
3440 ++ state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; | |
3441 ++ state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; | |
3442 ++ state->h4 += (t3 >> 8); | |
3443 ++ | |
3444 ++ goto poly1305_donna_mul; | |
3445 ++ } | |
3446 ++ | |
3447 ++void CRYPTO_poly1305_init(poly1305_state *statep, const unsigned char key[32]) | |
3448 ++ { | |
3449 ++ struct poly1305_state_st *state = (struct poly1305_state_st*) statep; | |
3450 ++ uint32_t t0,t1,t2,t3; | |
3451 ++ | |
3452 ++ t0 = U8TO32_LE(key+0); | |
3453 ++ t1 = U8TO32_LE(key+4); | |
3454 ++ t2 = U8TO32_LE(key+8); | |
3455 ++ t3 = U8TO32_LE(key+12); | |
3456 ++ | |
3457 ++ /* precompute multipliers */ | |
3458 ++ state->r0 = t0 & 0x3ffffff; t0 >>= 26; t0 |= t1 << 6; | |
3459 ++ state->r1 = t0 & 0x3ffff03; t1 >>= 20; t1 |= t2 << 12; | |
3460 ++ state->r2 = t1 & 0x3ffc0ff; t2 >>= 14; t2 |= t3 << 18; | |
3461 ++ state->r3 = t2 & 0x3f03fff; t3 >>= 8; | |
3462 ++ state->r4 = t3 & 0x00fffff; | |
3463 ++ | |
3464 ++ state->s1 = state->r1 * 5; | |
3465 ++ state->s2 = state->r2 * 5; | |
3466 ++ state->s3 = state->r3 * 5; | |
3467 ++ state->s4 = state->r4 * 5; | |
3468 ++ | |
3469 ++ /* init state */ | |
3470 ++ state->h0 = 0; | |
3471 ++ state->h1 = 0; | |
3472 ++ state->h2 = 0; | |
3473 ++ state->h3 = 0; | |
3474 ++ state->h4 = 0; | |
3475 ++ | |
3476 ++ state->buf_used = 0; | |
3477 ++ memcpy(state->key, key + 16, sizeof(state->key)); | |
3478 ++ } | |
3479 ++ | |
3480 ++void CRYPTO_poly1305_update(poly1305_state *statep, const unsigned char *in, | |
3481 ++ size_t in_len) | |
3482 ++ { | |
3483 ++ unsigned int i; | |
3484 ++ struct poly1305_state_st *state = (struct poly1305_state_st*) statep; | |
3485 ++ | |
3486 ++ if (state->buf_used) | |
3487 ++ { | |
3488 ++ unsigned int todo = 16 - state->buf_used; | |
3489 ++ if (todo > in_len) | |
3490 ++ todo = in_len; | |
3491 ++ for (i = 0; i < todo; i++) | |
3492 ++ state->buf[state->buf_used + i] = in[i]; | |
3493 ++ state->buf_used += todo; | |
3494 ++ in_len -= todo; | |
3495 ++ in += todo; | |
3496 ++ | |
3497 ++ if (state->buf_used == 16) | |
3498 ++ { | |
3499 ++ poly1305_update(state, state->buf, 16); | |
3500 ++ state->buf_used = 0; | |
3501 ++ } | |
3502 ++ } | |
3503 ++ | |
3504 ++ if (in_len >= 16) | |
3505 ++ { | |
3506 ++ size_t todo = in_len & ~0xf; | |
3507 ++ poly1305_update(state, in, todo); | |
3508 ++ in += todo; | |
3509 ++ in_len &= 0xf; | |
3510 ++ } | |
3511 ++ | |
3512 ++ if (in_len) | |
3513 ++ { | |
3514 ++ for (i = 0; i < in_len; i++) | |
3515 ++ state->buf[i] = in[i]; | |
3516 ++ state->buf_used = in_len; | |
3517 ++ } | |
3518 ++ } | |
3519 ++ | |
3520 ++void CRYPTO_poly1305_finish(poly1305_state *statep, unsigned char mac[16]) | |
3521 ++ { | |
3522 ++ struct poly1305_state_st *state = (struct poly1305_state_st*) statep; | |
3523 ++ uint64_t f0,f1,f2,f3; | |
3524 ++ uint32_t g0,g1,g2,g3,g4; | |
3525 ++ uint32_t b, nb; | |
3526 ++ | |
3527 ++ if (state->buf_used) | |
3528 ++ poly1305_update(state, state->buf, state->buf_used); | |
3529 ++ | |
3530 ++ b = state->h0 >> 26; state->h0 = state->h0 & 0x3ffff
ff; | |
3531 ++ state->h1 += b; b = state->h1 >> 26; state->h1 = state->h1 & 0x3ffff
ff; | |
3532 ++ state->h2 += b; b = state->h2 >> 26; state->h2 = state->h2 & 0x3ffff
ff; | |
3533 ++ state->h3 += b; b = state->h3 >> 26; state->h3 = state->h3 & 0x3ffff
ff; | |
3534 ++ state->h4 += b; b = state->h4 >> 26; state->h4 = state->h4 & 0x3ffff
ff; | |
3535 ++ state->h0 += b * 5; | |
3536 ++ | |
3537 ++ g0 = state->h0 + 5; b = g0 >> 26; g0 &= 0x3ffffff; | |
3538 ++ g1 = state->h1 + b; b = g1 >> 26; g1 &= 0x3ffffff; | |
3539 ++ g2 = state->h2 + b; b = g2 >> 26; g2 &= 0x3ffffff; | |
3540 ++ g3 = state->h3 + b; b = g3 >> 26; g3 &= 0x3ffffff; | |
3541 ++ g4 = state->h4 + b - (1 << 26); | |
3542 ++ | |
3543 ++ b = (g4 >> 31) - 1; | |
3544 ++ nb = ~b; | |
3545 ++ state->h0 = (state->h0 & nb) | (g0 & b); | |
3546 ++ state->h1 = (state->h1 & nb) | (g1 & b); | |
3547 ++ state->h2 = (state->h2 & nb) | (g2 & b); | |
3548 ++ state->h3 = (state->h3 & nb) | (g3 & b); | |
3549 ++ state->h4 = (state->h4 & nb) | (g4 & b); | |
3550 ++ | |
3551 ++ f0 = ((state->h0 ) | (state->h1 << 26)) + (uint64_t)U8TO32_LE(&stat
e->key[0]); | |
3552 ++ f1 = ((state->h1 >> 6) | (state->h2 << 20)) + (uint64_t)U8TO32_LE(&stat
e->key[4]); | |
3553 ++ f2 = ((state->h2 >> 12) | (state->h3 << 14)) + (uint64_t)U8TO32_LE(&stat
e->key[8]); | |
3554 ++ f3 = ((state->h3 >> 18) | (state->h4 << 8)) + (uint64_t)U8TO32_LE(&stat
e->key[12]); | |
3555 ++ | |
3556 ++ U32TO8_LE(&mac[ 0], f0); f1 += (f0 >> 32); | |
3557 ++ U32TO8_LE(&mac[ 4], f1); f2 += (f1 >> 32); | |
3558 ++ U32TO8_LE(&mac[ 8], f2); f3 += (f2 >> 32); | |
3559 ++ U32TO8_LE(&mac[12], f3); | |
3560 ++ } | |
3561 ++ | |
3562 ++#endif /* !OPENSSL_NO_POLY1305 */ | |
3563 +diff --git a/crypto/poly1305/poly1305.h b/crypto/poly1305/poly1305.h | |
3564 +new file mode 100644 | |
3565 +index 0000000..28f85ed | |
3566 +--- /dev/null | |
3567 ++++ b/crypto/poly1305/poly1305.h | |
3568 +@@ -0,0 +1,88 @@ | |
3569 ++/* | |
3570 ++ * Poly1305 | |
3571 ++ * | |
3572 ++ * Created on: Jun, 2013 | |
3573 ++ * Author: Elie Bursztein (elieb@google.com) | |
3574 ++ * | |
3575 ++ * Adapted from the estream code by D. Bernstein. | |
3576 ++ */ | |
3577 ++/* ==================================================================== | |
3578 ++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
3579 ++ * | |
3580 ++ * Redistribution and use in source and binary forms, with or without | |
3581 ++ * modification, are permitted provided that the following conditions | |
3582 ++ * are met: | |
3583 ++ * | |
3584 ++ * 1. Redistributions of source code must retain the above copyright | |
3585 ++ * notice, this list of conditions and the following disclaimer. | |
3586 ++ * | |
3587 ++ * 2. Redistributions in binary form must reproduce the above copyright | |
3588 ++ * notice, this list of conditions and the following disclaimer in | |
3589 ++ * the documentation and/or other materials provided with the | |
3590 ++ * distribution. | |
3591 ++ * | |
3592 ++ * 3. All advertising materials mentioning features or use of this | |
3593 ++ * software must display the following acknowledgment: | |
3594 ++ * "This product includes software developed by the OpenSSL Project | |
3595 ++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
3596 ++ * | |
3597 ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
3598 ++ * endorse or promote products derived from this software without | |
3599 ++ * prior written permission. For written permission, please contact | |
3600 ++ * licensing@OpenSSL.org. | |
3601 ++ * | |
3602 ++ * 5. Products derived from this software may not be called "OpenSSL" | |
3603 ++ * nor may "OpenSSL" appear in their names without prior written | |
3604 ++ * permission of the OpenSSL Project. | |
3605 ++ * | |
3606 ++ * 6. Redistributions of any form whatsoever must retain the following | |
3607 ++ * acknowledgment: | |
3608 ++ * "This product includes software developed by the OpenSSL Project | |
3609 ++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
3610 ++ * | |
3611 ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
3612 ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
3613 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
3614 ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
3615 ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
3616 ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
3617 ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
3618 ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
3619 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
3620 ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
3621 ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
3622 ++ * OF THE POSSIBILITY OF SUCH DAMAGE. | |
3623 ++ * ==================================================================== | |
3624 ++ */ | |
3625 ++ | |
3626 ++#ifndef HEADER_POLY1305_H_ | |
3627 ++#define HEADER_POLY1305_H_ | |
3628 ++ | |
3629 ++#include <stdint.h> | |
3630 ++#include <openssl/opensslconf.h> | |
3631 ++ | |
3632 ++#if defined(OPENSSL_NO_POLY1305) | |
3633 ++#error Poly1305 support is disabled. | |
3634 ++#endif | |
3635 ++ | |
3636 ++typedef unsigned char poly1305_state[512]; | |
3637 ++ | |
3638 ++/* poly1305_init sets up |state| so that it can be used to calculate an | |
3639 ++ * authentication tag with the one-time key |key|. Note that |key| is a | |
3640 ++ * one-time key and therefore there is no `reset' method because that would | |
3641 ++ * enable several messages to be authenticated with the same key. */ | |
3642 ++extern void CRYPTO_poly1305_init(poly1305_state* state, | |
3643 ++ const unsigned char key[32]); | |
3644 ++ | |
3645 ++/* poly1305_update processes |in_len| bytes from |in|. It can be called zero o
r | |
3646 ++ * more times after poly1305_init. */ | |
3647 ++extern void CRYPTO_poly1305_update(poly1305_state* state, | |
3648 ++ const unsigned char *in, | |
3649 ++ size_t in_len); | |
3650 ++ | |
3651 ++/* poly1305_finish completes the poly1305 calculation and writes a 16 byte | |
3652 ++ * authentication tag to |mac|. */ | |
3653 ++extern void CRYPTO_poly1305_finish(poly1305_state* state, | |
3654 ++ unsigned char mac[16]); | |
3655 ++ | |
3656 ++#endif /* HEADER_POLY1305_H_ */ | |
3657 +diff --git a/crypto/poly1305/poly1305_arm.c b/crypto/poly1305/poly1305_arm.c | |
3658 +new file mode 100644 | |
3659 +index 0000000..adcef35 | |
3660 +--- /dev/null | |
3661 ++++ b/crypto/poly1305/poly1305_arm.c | |
3662 +@@ -0,0 +1,327 @@ | |
3663 ++/* ==================================================================== | |
3664 ++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
3665 ++ * | |
3666 ++ * Redistribution and use in source and binary forms, with or without | |
3667 ++ * modification, are permitted provided that the following conditions | |
3668 ++ * are met: | |
3669 ++ * | |
3670 ++ * 1. Redistributions of source code must retain the above copyright | |
3671 ++ * notice, this list of conditions and the following disclaimer. | |
3672 ++ * | |
3673 ++ * 2. Redistributions in binary form must reproduce the above copyright | |
3674 ++ * notice, this list of conditions and the following disclaimer in | |
3675 ++ * the documentation and/or other materials provided with the | |
3676 ++ * distribution. | |
3677 ++ * | |
3678 ++ * 3. All advertising materials mentioning features or use of this | |
3679 ++ * software must display the following acknowledgment: | |
3680 ++ * "This product includes software developed by the OpenSSL Project | |
3681 ++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
3682 ++ * | |
3683 ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
3684 ++ * endorse or promote products derived from this software without | |
3685 ++ * prior written permission. For written permission, please contact | |
3686 ++ * licensing@OpenSSL.org. | |
3687 ++ * | |
3688 ++ * 5. Products derived from this software may not be called "OpenSSL" | |
3689 ++ * nor may "OpenSSL" appear in their names without prior written | |
3690 ++ * permission of the OpenSSL Project. | |
3691 ++ * | |
3692 ++ * 6. Redistributions of any form whatsoever must retain the following | |
3693 ++ * acknowledgment: | |
3694 ++ * "This product includes software developed by the OpenSSL Project | |
3695 ++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
3696 ++ * | |
3697 ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
3698 ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
3699 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
3700 ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
3701 ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
3702 ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
3703 ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
3704 ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
3705 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
3706 ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
3707 ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
3708 ++ * OF THE POSSIBILITY OF SUCH DAMAGE. | |
3709 ++ * ==================================================================== | |
3710 ++ */ | |
3711 ++ | |
3712 ++/* This implementation was taken from the public domain, neon2 version in | |
3713 ++ * SUPERCOP by D. J. Bernstein and Peter Schwabe. */ | |
3714 ++ | |
3715 ++#include <stdint.h> | |
3716 ++ | |
3717 ++#include <openssl/poly1305.h> | |
3718 ++ | |
3719 ++#if !defined(OPENSSL_NO_POLY1305) | |
3720 ++ | |
3721 ++typedef struct { | |
3722 ++ uint32_t v[12]; /* for alignment; only using 10 */ | |
3723 ++} fe1305x2; | |
3724 ++ | |
3725 ++#define addmulmod openssl_poly1305_neon2_addmulmod | |
3726 ++#define blocks openssl_poly1305_neon2_blocks | |
3727 ++ | |
3728 ++extern void addmulmod(fe1305x2 *r, const fe1305x2 *x, const fe1305x2 *y, const
fe1305x2 *c); | |
3729 ++ | |
3730 ++extern int blocks(fe1305x2 *h, const fe1305x2 *precomp, const unsigned char *i
n, unsigned int inlen); | |
3731 ++ | |
3732 ++static void freeze(fe1305x2 *r) | |
3733 ++ { | |
3734 ++ int i; | |
3735 ++ | |
3736 ++ uint32_t x0 = r->v[0]; | |
3737 ++ uint32_t x1 = r->v[2]; | |
3738 ++ uint32_t x2 = r->v[4]; | |
3739 ++ uint32_t x3 = r->v[6]; | |
3740 ++ uint32_t x4 = r->v[8]; | |
3741 ++ uint32_t y0; | |
3742 ++ uint32_t y1; | |
3743 ++ uint32_t y2; | |
3744 ++ uint32_t y3; | |
3745 ++ uint32_t y4; | |
3746 ++ uint32_t swap; | |
3747 ++ | |
3748 ++ for (i = 0;i < 3;++i) | |
3749 ++ { | |
3750 ++ x1 += x0 >> 26; x0 &= 0x3ffffff; | |
3751 ++ x2 += x1 >> 26; x1 &= 0x3ffffff; | |
3752 ++ x3 += x2 >> 26; x2 &= 0x3ffffff; | |
3753 ++ x4 += x3 >> 26; x3 &= 0x3ffffff; | |
3754 ++ x0 += 5*(x4 >> 26); x4 &= 0x3ffffff; | |
3755 ++ } | |
3756 ++ | |
3757 ++ y0 = x0 + 5; | |
3758 ++ y1 = x1 + (y0 >> 26); y0 &= 0x3ffffff; | |
3759 ++ y2 = x2 + (y1 >> 26); y1 &= 0x3ffffff; | |
3760 ++ y3 = x3 + (y2 >> 26); y2 &= 0x3ffffff; | |
3761 ++ y4 = x4 + (y3 >> 26); y3 &= 0x3ffffff; | |
3762 ++ swap = -(y4 >> 26); y4 &= 0x3ffffff; | |
3763 ++ | |
3764 ++ y0 ^= x0; | |
3765 ++ y1 ^= x1; | |
3766 ++ y2 ^= x2; | |
3767 ++ y3 ^= x3; | |
3768 ++ y4 ^= x4; | |
3769 ++ | |
3770 ++ y0 &= swap; | |
3771 ++ y1 &= swap; | |
3772 ++ y2 &= swap; | |
3773 ++ y3 &= swap; | |
3774 ++ y4 &= swap; | |
3775 ++ | |
3776 ++ y0 ^= x0; | |
3777 ++ y1 ^= x1; | |
3778 ++ y2 ^= x2; | |
3779 ++ y3 ^= x3; | |
3780 ++ y4 ^= x4; | |
3781 ++ | |
3782 ++ r->v[0] = y0; | |
3783 ++ r->v[2] = y1; | |
3784 ++ r->v[4] = y2; | |
3785 ++ r->v[6] = y3; | |
3786 ++ r->v[8] = y4; | |
3787 ++ } | |
3788 ++ | |
3789 ++static void fe1305x2_tobytearray(unsigned char *r, fe1305x2 *x) | |
3790 ++ { | |
3791 ++ uint32_t x0 = x->v[0]; | |
3792 ++ uint32_t x1 = x->v[2]; | |
3793 ++ uint32_t x2 = x->v[4]; | |
3794 ++ uint32_t x3 = x->v[6]; | |
3795 ++ uint32_t x4 = x->v[8]; | |
3796 ++ | |
3797 ++ x1 += x0 >> 26; | |
3798 ++ x0 &= 0x3ffffff; | |
3799 ++ x2 += x1 >> 26; | |
3800 ++ x1 &= 0x3ffffff; | |
3801 ++ x3 += x2 >> 26; | |
3802 ++ x2 &= 0x3ffffff; | |
3803 ++ x4 += x3 >> 26; | |
3804 ++ x3 &= 0x3ffffff; | |
3805 ++ | |
3806 ++ *(uint32_t *) r = x0 + (x1 << 26); | |
3807 ++ *(uint32_t *) (r + 4) = (x1 >> 6) + (x2 << 20); | |
3808 ++ *(uint32_t *) (r + 8) = (x2 >> 12) + (x3 << 14); | |
3809 ++ *(uint32_t *) (r + 12) = (x3 >> 18) + (x4 << 8); | |
3810 ++ } | |
3811 ++ | |
3812 ++/* load32 exists to avoid breaking strict aliasing rules in | |
3813 ++ * fe1305x2_frombytearray. */ | |
3814 ++static uint32_t load32(unsigned char *t) | |
3815 ++ { | |
3816 ++ uint32_t tmp; | |
3817 ++ memcpy(&tmp, t, sizeof(tmp)); | |
3818 ++ return tmp; | |
3819 ++ } | |
3820 ++ | |
3821 ++static void fe1305x2_frombytearray(fe1305x2 *r, const unsigned char *x, unsign
ed long long xlen) | |
3822 ++ { | |
3823 ++ int i; | |
3824 ++ unsigned char t[17]; | |
3825 ++ | |
3826 ++ for (i = 0; (i < 16) && (i < xlen); i++) | |
3827 ++ t[i] = x[i]; | |
3828 ++ xlen -= i; | |
3829 ++ x += i; | |
3830 ++ t[i++] = 1; | |
3831 ++ for (; i<17; i++) | |
3832 ++ t[i] = 0; | |
3833 ++ | |
3834 ++ r->v[0] = 0x3ffffff & load32(t); | |
3835 ++ r->v[2] = 0x3ffffff & (load32(t + 3) >> 2); | |
3836 ++ r->v[4] = 0x3ffffff & (load32(t + 6) >> 4); | |
3837 ++ r->v[6] = 0x3ffffff & (load32(t + 9) >> 6); | |
3838 ++ r->v[8] = load32(t + 13); | |
3839 ++ | |
3840 ++ if (xlen) | |
3841 ++ { | |
3842 ++ for (i = 0; (i < 16) && (i < xlen); i++) | |
3843 ++ t[i] = x[i]; | |
3844 ++ t[i++] = 1; | |
3845 ++ for (; i<17; i++) | |
3846 ++ t[i] = 0; | |
3847 ++ | |
3848 ++ r->v[1] = 0x3ffffff & load32(t); | |
3849 ++ r->v[3] = 0x3ffffff & (load32(t + 3) >> 2); | |
3850 ++ r->v[5] = 0x3ffffff & (load32(t + 6) >> 4); | |
3851 ++ r->v[7] = 0x3ffffff & (load32(t + 9) >> 6); | |
3852 ++ r->v[9] = load32(t + 13); | |
3853 ++ } | |
3854 ++ else | |
3855 ++ r->v[1] = r->v[3] = r->v[5] = r->v[7] = r->v[9] = 0; | |
3856 ++ } | |
3857 ++ | |
3858 ++static const fe1305x2 zero __attribute__ ((aligned (16))); | |
3859 ++ | |
3860 ++struct poly1305_state_st { | |
3861 ++ unsigned char data[sizeof(fe1305x2[5]) + 128]; | |
3862 ++ unsigned char buf[32]; | |
3863 ++ unsigned int buf_used; | |
3864 ++ unsigned char key[16]; | |
3865 ++}; | |
3866 ++ | |
3867 ++void CRYPTO_poly1305_init(poly1305_state *state, const unsigned char key[32]) | |
3868 ++ { | |
3869 ++ struct poly1305_state_st *st = (struct poly1305_state_st*) (state); | |
3870 ++ fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); | |
3871 ++ fe1305x2 *const h = r + 1; | |
3872 ++ fe1305x2 *const c = h + 1; | |
3873 ++ fe1305x2 *const precomp = c + 1; | |
3874 ++ unsigned int j; | |
3875 ++ | |
3876 ++ r->v[1] = r->v[0] = 0x3ffffff & *(uint32_t *) key; | |
3877 ++ r->v[3] = r->v[2] = 0x3ffff03 & ((*(uint32_t *) (key + 3)) >> 2); | |
3878 ++ r->v[5] = r->v[4] = 0x3ffc0ff & ((*(uint32_t *) (key + 6)) >> 4); | |
3879 ++ r->v[7] = r->v[6] = 0x3f03fff & ((*(uint32_t *) (key + 9)) >> 6); | |
3880 ++ r->v[9] = r->v[8] = 0x00fffff & ((*(uint32_t *) (key + 12)) >> 8); | |
3881 ++ | |
3882 ++ for (j = 0; j < 10; j++) | |
3883 ++ h->v[j] = 0; /* XXX: should fast-forward a bit */ | |
3884 ++ | |
3885 ++ addmulmod(precomp,r,r,&zero); /* precompute r^2 */ | |
3886 ++ addmulmod(precomp + 1,precomp,precomp,&zero); /* precompute r^4 */ | |
3887 ++ | |
3888 ++ memcpy(st->key, key + 16, 16); | |
3889 ++ st->buf_used = 0; | |
3890 ++ } | |
3891 ++ | |
3892 ++void CRYPTO_poly1305_update(poly1305_state *state, const unsigned char *in, si
ze_t in_len) | |
3893 ++ { | |
3894 ++ struct poly1305_state_st *st = (struct poly1305_state_st*) (state); | |
3895 ++ fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); | |
3896 ++ fe1305x2 *const h = r + 1; | |
3897 ++ fe1305x2 *const c = h + 1; | |
3898 ++ fe1305x2 *const precomp = c + 1; | |
3899 ++ unsigned int i; | |
3900 ++ | |
3901 ++ if (st->buf_used) | |
3902 ++ { | |
3903 ++ unsigned int todo = 32 - st->buf_used; | |
3904 ++ if (todo > in_len) | |
3905 ++ todo = in_len; | |
3906 ++ for (i = 0; i < todo; i++) | |
3907 ++ st->buf[st->buf_used + i] = in[i]; | |
3908 ++ st->buf_used += todo; | |
3909 ++ in_len -= todo; | |
3910 ++ in += todo; | |
3911 ++ | |
3912 ++ if (st->buf_used == sizeof(st->buf) && in_len) | |
3913 ++ { | |
3914 ++ addmulmod(h,h,precomp,&zero); | |
3915 ++ fe1305x2_frombytearray(c, st->buf, sizeof(st->buf)); | |
3916 ++ for (i = 0; i < 10; i++) | |
3917 ++ h->v[i] += c->v[i]; | |
3918 ++ st->buf_used = 0; | |
3919 ++ } | |
3920 ++ } | |
3921 ++ | |
3922 ++ while (in_len > 32) | |
3923 ++ { | |
3924 ++ unsigned int tlen = 1048576; | |
3925 ++ if (in_len < tlen) | |
3926 ++ tlen = in_len; | |
3927 ++ tlen -= blocks(h, precomp, in, tlen); | |
3928 ++ in_len -= tlen; | |
3929 ++ in += tlen; | |
3930 ++ } | |
3931 ++ | |
3932 ++ if (in_len) | |
3933 ++ { | |
3934 ++ for (i = 0; i < in_len; i++) | |
3935 ++ st->buf[i] = in[i]; | |
3936 ++ st->buf_used = in_len; | |
3937 ++ } | |
3938 ++ } | |
3939 ++ | |
3940 ++void CRYPTO_poly1305_finish(poly1305_state* state, unsigned char mac[16]) | |
3941 ++ { | |
3942 ++ struct poly1305_state_st *st = (struct poly1305_state_st*) (state); | |
3943 ++ fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); | |
3944 ++ fe1305x2 *const h = r + 1; | |
3945 ++ fe1305x2 *const c = h + 1; | |
3946 ++ fe1305x2 *const precomp = c + 1; | |
3947 ++ | |
3948 ++ addmulmod(h,h,precomp,&zero); | |
3949 ++ | |
3950 ++ if (st->buf_used > 16) | |
3951 ++ { | |
3952 ++ fe1305x2_frombytearray(c, st->buf, st->buf_used); | |
3953 ++ precomp->v[1] = r->v[1]; | |
3954 ++ precomp->v[3] = r->v[3]; | |
3955 ++ precomp->v[5] = r->v[5]; | |
3956 ++ precomp->v[7] = r->v[7]; | |
3957 ++ precomp->v[9] = r->v[9]; | |
3958 ++ addmulmod(h,h,precomp,c); | |
3959 ++ } | |
3960 ++ else if (st->buf_used > 0) | |
3961 ++ { | |
3962 ++ fe1305x2_frombytearray(c, st->buf, st->buf_used); | |
3963 ++ r->v[1] = 1; | |
3964 ++ r->v[3] = 0; | |
3965 ++ r->v[5] = 0; | |
3966 ++ r->v[7] = 0; | |
3967 ++ r->v[9] = 0; | |
3968 ++ addmulmod(h,h,r,c); | |
3969 ++ } | |
3970 ++ | |
3971 ++ h->v[0] += h->v[1]; | |
3972 ++ h->v[2] += h->v[3]; | |
3973 ++ h->v[4] += h->v[5]; | |
3974 ++ h->v[6] += h->v[7]; | |
3975 ++ h->v[8] += h->v[9]; | |
3976 ++ freeze(h); | |
3977 ++ | |
3978 ++ fe1305x2_frombytearray(c, st->key, 16); | |
3979 ++ c->v[8] ^= (1 << 24); | |
3980 ++ | |
3981 ++ h->v[0] += c->v[0]; | |
3982 ++ h->v[2] += c->v[2]; | |
3983 ++ h->v[4] += c->v[4]; | |
3984 ++ h->v[6] += c->v[6]; | |
3985 ++ h->v[8] += c->v[8]; | |
3986 ++ fe1305x2_tobytearray(mac, h); | |
3987 ++ } | |
3988 ++ | |
3989 ++#endif /* !OPENSSL_NO_POLY1305 */ | |
3990 +diff --git a/crypto/poly1305/poly1305_arm_asm.S b/crypto/poly1305/poly1305_arm_
asm.S | |
3991 +new file mode 100644 | |
3992 +index 0000000..449d16f | |
3993 +--- /dev/null | |
3994 ++++ b/crypto/poly1305/poly1305_arm_asm.S | |
3995 +@@ -0,0 +1,2009 @@ | |
3996 ++# This implementation was taken from the public domain, neon2 version in | |
3997 ++# SUPERCOP by D. J. Bernstein and Peter Schwabe. | |
3998 ++ | |
3999 ++# qhasm: int32 input_0 | |
4000 ++ | |
4001 ++# qhasm: int32 input_1 | |
4002 ++ | |
4003 ++# qhasm: int32 input_2 | |
4004 ++ | |
4005 ++# qhasm: int32 input_3 | |
4006 ++ | |
4007 ++# qhasm: stack32 input_4 | |
4008 ++ | |
4009 ++# qhasm: stack32 input_5 | |
4010 ++ | |
4011 ++# qhasm: stack32 input_6 | |
4012 ++ | |
4013 ++# qhasm: stack32 input_7 | |
4014 ++ | |
4015 ++# qhasm: int32 caller_r4 | |
4016 ++ | |
4017 ++# qhasm: int32 caller_r5 | |
4018 ++ | |
4019 ++# qhasm: int32 caller_r6 | |
4020 ++ | |
4021 ++# qhasm: int32 caller_r7 | |
4022 ++ | |
4023 ++# qhasm: int32 caller_r8 | |
4024 ++ | |
4025 ++# qhasm: int32 caller_r9 | |
4026 ++ | |
4027 ++# qhasm: int32 caller_r10 | |
4028 ++ | |
4029 ++# qhasm: int32 caller_r11 | |
4030 ++ | |
4031 ++# qhasm: int32 caller_r12 | |
4032 ++ | |
4033 ++# qhasm: int32 caller_r14 | |
4034 ++ | |
4035 ++# qhasm: reg128 caller_q4 | |
4036 ++ | |
4037 ++# qhasm: reg128 caller_q5 | |
4038 ++ | |
4039 ++# qhasm: reg128 caller_q6 | |
4040 ++ | |
4041 ++# qhasm: reg128 caller_q7 | |
4042 ++ | |
4043 ++# qhasm: startcode | |
4044 ++.fpu neon | |
4045 ++.text | |
4046 ++ | |
4047 ++# qhasm: reg128 r0 | |
4048 ++ | |
4049 ++# qhasm: reg128 r1 | |
4050 ++ | |
4051 ++# qhasm: reg128 r2 | |
4052 ++ | |
4053 ++# qhasm: reg128 r3 | |
4054 ++ | |
4055 ++# qhasm: reg128 r4 | |
4056 ++ | |
4057 ++# qhasm: reg128 x01 | |
4058 ++ | |
4059 ++# qhasm: reg128 x23 | |
4060 ++ | |
4061 ++# qhasm: reg128 x4 | |
4062 ++ | |
4063 ++# qhasm: reg128 y0 | |
4064 ++ | |
4065 ++# qhasm: reg128 y12 | |
4066 ++ | |
4067 ++# qhasm: reg128 y34 | |
4068 ++ | |
4069 ++# qhasm: reg128 5y12 | |
4070 ++ | |
4071 ++# qhasm: reg128 5y34 | |
4072 ++ | |
4073 ++# qhasm: stack128 y0_stack | |
4074 ++ | |
4075 ++# qhasm: stack128 y12_stack | |
4076 ++ | |
4077 ++# qhasm: stack128 y34_stack | |
4078 ++ | |
4079 ++# qhasm: stack128 5y12_stack | |
4080 ++ | |
4081 ++# qhasm: stack128 5y34_stack | |
4082 ++ | |
4083 ++# qhasm: reg128 z0 | |
4084 ++ | |
4085 ++# qhasm: reg128 z12 | |
4086 ++ | |
4087 ++# qhasm: reg128 z34 | |
4088 ++ | |
4089 ++# qhasm: reg128 5z12 | |
4090 ++ | |
4091 ++# qhasm: reg128 5z34 | |
4092 ++ | |
4093 ++# qhasm: stack128 z0_stack | |
4094 ++ | |
4095 ++# qhasm: stack128 z12_stack | |
4096 ++ | |
4097 ++# qhasm: stack128 z34_stack | |
4098 ++ | |
4099 ++# qhasm: stack128 5z12_stack | |
4100 ++ | |
4101 ++# qhasm: stack128 5z34_stack | |
4102 ++ | |
4103 ++# qhasm: stack128 two24 | |
4104 ++ | |
4105 ++# qhasm: int32 ptr | |
4106 ++ | |
4107 ++# qhasm: reg128 c01 | |
4108 ++ | |
4109 ++# qhasm: reg128 c23 | |
4110 ++ | |
4111 ++# qhasm: reg128 d01 | |
4112 ++ | |
4113 ++# qhasm: reg128 d23 | |
4114 ++ | |
4115 ++# qhasm: reg128 t0 | |
4116 ++ | |
4117 ++# qhasm: reg128 t1 | |
4118 ++ | |
4119 ++# qhasm: reg128 t2 | |
4120 ++ | |
4121 ++# qhasm: reg128 t3 | |
4122 ++ | |
4123 ++# qhasm: reg128 t4 | |
4124 ++ | |
4125 ++# qhasm: reg128 mask | |
4126 ++ | |
4127 ++# qhasm: reg128 u0 | |
4128 ++ | |
4129 ++# qhasm: reg128 u1 | |
4130 ++ | |
4131 ++# qhasm: reg128 u2 | |
4132 ++ | |
4133 ++# qhasm: reg128 u3 | |
4134 ++ | |
4135 ++# qhasm: reg128 u4 | |
4136 ++ | |
4137 ++# qhasm: reg128 v01 | |
4138 ++ | |
4139 ++# qhasm: reg128 mid | |
4140 ++ | |
4141 ++# qhasm: reg128 v23 | |
4142 ++ | |
4143 ++# qhasm: reg128 v4 | |
4144 ++ | |
4145 ++# qhasm: int32 len | |
4146 ++ | |
4147 ++# qhasm: qpushenter crypto_onetimeauth_poly1305_neon2_blocks | |
4148 ++.align 4 | |
4149 ++.global openssl_poly1305_neon2_blocks | |
4150 ++.type openssl_poly1305_neon2_blocks STT_FUNC | |
4151 ++openssl_poly1305_neon2_blocks: | |
4152 ++vpush {q4,q5,q6,q7} | |
4153 ++mov r12,sp | |
4154 ++sub sp,sp,#192 | |
4155 ++and sp,sp,#0xffffffe0 | |
4156 ++ | |
4157 ++# qhasm: len = input_3 | |
4158 ++# asm 1: mov >len=int32#4,<input_3=int32#4 | |
4159 ++# asm 2: mov >len=r3,<input_3=r3 | |
4160 ++mov r3,r3 | |
4161 ++ | |
4162 ++# qhasm: new y0 | |
4163 ++ | |
4164 ++# qhasm: y0 = mem64[input_1]y0[1]; input_1 += 8 | |
4165 ++# asm 1: vld1.8 {<y0=reg128#1%bot},[<input_1=int32#2]! | |
4166 ++# asm 2: vld1.8 {<y0=d0},[<input_1=r1]! | |
4167 ++vld1.8 {d0},[r1]! | |
4168 ++ | |
4169 ++# qhasm: y12 = mem128[input_1]; input_1 += 16 | |
4170 ++# asm 1: vld1.8 {>y12=reg128#2%bot->y12=reg128#2%top},[<input_1=int32#2]! | |
4171 ++# asm 2: vld1.8 {>y12=d2->y12=d3},[<input_1=r1]! | |
4172 ++vld1.8 {d2-d3},[r1]! | |
4173 ++ | |
4174 ++# qhasm: y34 = mem128[input_1]; input_1 += 16 | |
4175 ++# asm 1: vld1.8 {>y34=reg128#3%bot->y34=reg128#3%top},[<input_1=int32#2]! | |
4176 ++# asm 2: vld1.8 {>y34=d4->y34=d5},[<input_1=r1]! | |
4177 ++vld1.8 {d4-d5},[r1]! | |
4178 ++ | |
4179 ++# qhasm: input_1 += 8 | |
4180 ++# asm 1: add >input_1=int32#2,<input_1=int32#2,#8 | |
4181 ++# asm 2: add >input_1=r1,<input_1=r1,#8 | |
4182 ++add r1,r1,#8 | |
4183 ++ | |
4184 ++# qhasm: new z0 | |
4185 ++ | |
4186 ++# qhasm: z0 = mem64[input_1]z0[1]; input_1 += 8 | |
4187 ++# asm 1: vld1.8 {<z0=reg128#4%bot},[<input_1=int32#2]! | |
4188 ++# asm 2: vld1.8 {<z0=d6},[<input_1=r1]! | |
4189 ++vld1.8 {d6},[r1]! | |
4190 ++ | |
4191 ++# qhasm: z12 = mem128[input_1]; input_1 += 16 | |
4192 ++# asm 1: vld1.8 {>z12=reg128#5%bot->z12=reg128#5%top},[<input_1=int32#2]! | |
4193 ++# asm 2: vld1.8 {>z12=d8->z12=d9},[<input_1=r1]! | |
4194 ++vld1.8 {d8-d9},[r1]! | |
4195 ++ | |
4196 ++# qhasm: z34 = mem128[input_1]; input_1 += 16 | |
4197 ++# asm 1: vld1.8 {>z34=reg128#6%bot->z34=reg128#6%top},[<input_1=int32#2]! | |
4198 ++# asm 2: vld1.8 {>z34=d10->z34=d11},[<input_1=r1]! | |
4199 ++vld1.8 {d10-d11},[r1]! | |
4200 ++ | |
4201 ++# qhasm: 2x mask = 0xffffffff | |
4202 ++# asm 1: vmov.i64 >mask=reg128#7,#0xffffffff | |
4203 ++# asm 2: vmov.i64 >mask=q6,#0xffffffff | |
4204 ++vmov.i64 q6,#0xffffffff | |
4205 ++ | |
4206 ++# qhasm: 2x u4 = 0xff | |
4207 ++# asm 1: vmov.i64 >u4=reg128#8,#0xff | |
4208 ++# asm 2: vmov.i64 >u4=q7,#0xff | |
4209 ++vmov.i64 q7,#0xff | |
4210 ++ | |
4211 ++# qhasm: x01 aligned= mem128[input_0];input_0+=16 | |
4212 ++# asm 1: vld1.8 {>x01=reg128#9%bot->x01=reg128#9%top},[<input_0=int32#1,: 128]
! | |
4213 ++# asm 2: vld1.8 {>x01=d16->x01=d17},[<input_0=r0,: 128]! | |
4214 ++vld1.8 {d16-d17},[r0,: 128]! | |
4215 ++ | |
4216 ++# qhasm: x23 aligned= mem128[input_0];input_0+=16 | |
4217 ++# asm 1: vld1.8 {>x23=reg128#10%bot->x23=reg128#10%top},[<input_0=int32#1,: 12
8]! | |
4218 ++# asm 2: vld1.8 {>x23=d18->x23=d19},[<input_0=r0,: 128]! | |
4219 ++vld1.8 {d18-d19},[r0,: 128]! | |
4220 ++ | |
4221 ++# qhasm: x4 aligned= mem64[input_0]x4[1] | |
4222 ++# asm 1: vld1.8 {<x4=reg128#11%bot},[<input_0=int32#1,: 64] | |
4223 ++# asm 2: vld1.8 {<x4=d20},[<input_0=r0,: 64] | |
4224 ++vld1.8 {d20},[r0,: 64] | |
4225 ++ | |
4226 ++# qhasm: input_0 -= 32 | |
4227 ++# asm 1: sub >input_0=int32#1,<input_0=int32#1,#32 | |
4228 ++# asm 2: sub >input_0=r0,<input_0=r0,#32 | |
4229 ++sub r0,r0,#32 | |
4230 ++ | |
4231 ++# qhasm: 2x mask unsigned>>=6 | |
4232 ++# asm 1: vshr.u64 >mask=reg128#7,<mask=reg128#7,#6 | |
4233 ++# asm 2: vshr.u64 >mask=q6,<mask=q6,#6 | |
4234 ++vshr.u64 q6,q6,#6 | |
4235 ++ | |
4236 ++# qhasm: 2x u4 unsigned>>= 7 | |
4237 ++# asm 1: vshr.u64 >u4=reg128#8,<u4=reg128#8,#7 | |
4238 ++# asm 2: vshr.u64 >u4=q7,<u4=q7,#7 | |
4239 ++vshr.u64 q7,q7,#7 | |
4240 ++ | |
4241 ++# qhasm: 4x 5y12 = y12 << 2 | |
4242 ++# asm 1: vshl.i32 >5y12=reg128#12,<y12=reg128#2,#2 | |
4243 ++# asm 2: vshl.i32 >5y12=q11,<y12=q1,#2 | |
4244 ++vshl.i32 q11,q1,#2 | |
4245 ++ | |
4246 ++# qhasm: 4x 5y34 = y34 << 2 | |
4247 ++# asm 1: vshl.i32 >5y34=reg128#13,<y34=reg128#3,#2 | |
4248 ++# asm 2: vshl.i32 >5y34=q12,<y34=q2,#2 | |
4249 ++vshl.i32 q12,q2,#2 | |
4250 ++ | |
4251 ++# qhasm: 4x 5y12 += y12 | |
4252 ++# asm 1: vadd.i32 >5y12=reg128#12,<5y12=reg128#12,<y12=reg128#2 | |
4253 ++# asm 2: vadd.i32 >5y12=q11,<5y12=q11,<y12=q1 | |
4254 ++vadd.i32 q11,q11,q1 | |
4255 ++ | |
4256 ++# qhasm: 4x 5y34 += y34 | |
4257 ++# asm 1: vadd.i32 >5y34=reg128#13,<5y34=reg128#13,<y34=reg128#3 | |
4258 ++# asm 2: vadd.i32 >5y34=q12,<5y34=q12,<y34=q2 | |
4259 ++vadd.i32 q12,q12,q2 | |
4260 ++ | |
4261 ++# qhasm: 2x u4 <<= 24 | |
4262 ++# asm 1: vshl.i64 >u4=reg128#8,<u4=reg128#8,#24 | |
4263 ++# asm 2: vshl.i64 >u4=q7,<u4=q7,#24 | |
4264 ++vshl.i64 q7,q7,#24 | |
4265 ++ | |
4266 ++# qhasm: 4x 5z12 = z12 << 2 | |
4267 ++# asm 1: vshl.i32 >5z12=reg128#14,<z12=reg128#5,#2 | |
4268 ++# asm 2: vshl.i32 >5z12=q13,<z12=q4,#2 | |
4269 ++vshl.i32 q13,q4,#2 | |
4270 ++ | |
4271 ++# qhasm: 4x 5z34 = z34 << 2 | |
4272 ++# asm 1: vshl.i32 >5z34=reg128#15,<z34=reg128#6,#2 | |
4273 ++# asm 2: vshl.i32 >5z34=q14,<z34=q5,#2 | |
4274 ++vshl.i32 q14,q5,#2 | |
4275 ++ | |
4276 ++# qhasm: 4x 5z12 += z12 | |
4277 ++# asm 1: vadd.i32 >5z12=reg128#14,<5z12=reg128#14,<z12=reg128#5 | |
4278 ++# asm 2: vadd.i32 >5z12=q13,<5z12=q13,<z12=q4 | |
4279 ++vadd.i32 q13,q13,q4 | |
4280 ++ | |
4281 ++# qhasm: 4x 5z34 += z34 | |
4282 ++# asm 1: vadd.i32 >5z34=reg128#15,<5z34=reg128#15,<z34=reg128#6 | |
4283 ++# asm 2: vadd.i32 >5z34=q14,<5z34=q14,<z34=q5 | |
4284 ++vadd.i32 q14,q14,q5 | |
4285 ++ | |
4286 ++# qhasm: new two24 | |
4287 ++ | |
4288 ++# qhasm: new y0_stack | |
4289 ++ | |
4290 ++# qhasm: new y12_stack | |
4291 ++ | |
4292 ++# qhasm: new y34_stack | |
4293 ++ | |
4294 ++# qhasm: new 5y12_stack | |
4295 ++ | |
4296 ++# qhasm: new 5y34_stack | |
4297 ++ | |
4298 ++# qhasm: new z0_stack | |
4299 ++ | |
4300 ++# qhasm: new z12_stack | |
4301 ++ | |
4302 ++# qhasm: new z34_stack | |
4303 ++ | |
4304 ++# qhasm: new 5z12_stack | |
4305 ++ | |
4306 ++# qhasm: new 5z34_stack | |
4307 ++ | |
4308 ++# qhasm: ptr = &two24 | |
4309 ++# asm 1: lea >ptr=int32#2,<two24=stack128#1 | |
4310 ++# asm 2: lea >ptr=r1,<two24=[sp,#0] | |
4311 ++add r1,sp,#0 | |
4312 ++ | |
4313 ++# qhasm: mem128[ptr] aligned= u4 | |
4314 ++# asm 1: vst1.8 {<u4=reg128#8%bot-<u4=reg128#8%top},[<ptr=int32#2,: 128] | |
4315 ++# asm 2: vst1.8 {<u4=d14-<u4=d15},[<ptr=r1,: 128] | |
4316 ++vst1.8 {d14-d15},[r1,: 128] | |
4317 ++ | |
4318 ++# qhasm: r4 = u4 | |
4319 ++# asm 1: vmov >r4=reg128#16,<u4=reg128#8 | |
4320 ++# asm 2: vmov >r4=q15,<u4=q7 | |
4321 ++vmov q15,q7 | |
4322 ++ | |
4323 ++# qhasm: r0 = u4 | |
4324 ++# asm 1: vmov >r0=reg128#8,<u4=reg128#8 | |
4325 ++# asm 2: vmov >r0=q7,<u4=q7 | |
4326 ++vmov q7,q7 | |
4327 ++ | |
4328 ++# qhasm: ptr = &y0_stack | |
4329 ++# asm 1: lea >ptr=int32#2,<y0_stack=stack128#2 | |
4330 ++# asm 2: lea >ptr=r1,<y0_stack=[sp,#16] | |
4331 ++add r1,sp,#16 | |
4332 ++ | |
4333 ++# qhasm: mem128[ptr] aligned= y0 | |
4334 ++# asm 1: vst1.8 {<y0=reg128#1%bot-<y0=reg128#1%top},[<ptr=int32#2,: 128] | |
4335 ++# asm 2: vst1.8 {<y0=d0-<y0=d1},[<ptr=r1,: 128] | |
4336 ++vst1.8 {d0-d1},[r1,: 128] | |
4337 ++ | |
4338 ++# qhasm: ptr = &y12_stack | |
4339 ++# asm 1: lea >ptr=int32#2,<y12_stack=stack128#3 | |
4340 ++# asm 2: lea >ptr=r1,<y12_stack=[sp,#32] | |
4341 ++add r1,sp,#32 | |
4342 ++ | |
4343 ++# qhasm: mem128[ptr] aligned= y12 | |
4344 ++# asm 1: vst1.8 {<y12=reg128#2%bot-<y12=reg128#2%top},[<ptr=int32#2,: 128] | |
4345 ++# asm 2: vst1.8 {<y12=d2-<y12=d3},[<ptr=r1,: 128] | |
4346 ++vst1.8 {d2-d3},[r1,: 128] | |
4347 ++ | |
4348 ++# qhasm: ptr = &y34_stack | |
4349 ++# asm 1: lea >ptr=int32#2,<y34_stack=stack128#4 | |
4350 ++# asm 2: lea >ptr=r1,<y34_stack=[sp,#48] | |
4351 ++add r1,sp,#48 | |
4352 ++ | |
4353 ++# qhasm: mem128[ptr] aligned= y34 | |
4354 ++# asm 1: vst1.8 {<y34=reg128#3%bot-<y34=reg128#3%top},[<ptr=int32#2,: 128] | |
4355 ++# asm 2: vst1.8 {<y34=d4-<y34=d5},[<ptr=r1,: 128] | |
4356 ++vst1.8 {d4-d5},[r1,: 128] | |
4357 ++ | |
4358 ++# qhasm: ptr = &z0_stack | |
4359 ++# asm 1: lea >ptr=int32#2,<z0_stack=stack128#7 | |
4360 ++# asm 2: lea >ptr=r1,<z0_stack=[sp,#96] | |
4361 ++add r1,sp,#96 | |
4362 ++ | |
4363 ++# qhasm: mem128[ptr] aligned= z0 | |
4364 ++# asm 1: vst1.8 {<z0=reg128#4%bot-<z0=reg128#4%top},[<ptr=int32#2,: 128] | |
4365 ++# asm 2: vst1.8 {<z0=d6-<z0=d7},[<ptr=r1,: 128] | |
4366 ++vst1.8 {d6-d7},[r1,: 128] | |
4367 ++ | |
4368 ++# qhasm: ptr = &z12_stack | |
4369 ++# asm 1: lea >ptr=int32#2,<z12_stack=stack128#8 | |
4370 ++# asm 2: lea >ptr=r1,<z12_stack=[sp,#112] | |
4371 ++add r1,sp,#112 | |
4372 ++ | |
4373 ++# qhasm: mem128[ptr] aligned= z12 | |
4374 ++# asm 1: vst1.8 {<z12=reg128#5%bot-<z12=reg128#5%top},[<ptr=int32#2,: 128] | |
4375 ++# asm 2: vst1.8 {<z12=d8-<z12=d9},[<ptr=r1,: 128] | |
4376 ++vst1.8 {d8-d9},[r1,: 128] | |
4377 ++ | |
4378 ++# qhasm: ptr = &z34_stack | |
4379 ++# asm 1: lea >ptr=int32#2,<z34_stack=stack128#9 | |
4380 ++# asm 2: lea >ptr=r1,<z34_stack=[sp,#128] | |
4381 ++add r1,sp,#128 | |
4382 ++ | |
4383 ++# qhasm: mem128[ptr] aligned= z34 | |
4384 ++# asm 1: vst1.8 {<z34=reg128#6%bot-<z34=reg128#6%top},[<ptr=int32#2,: 128] | |
4385 ++# asm 2: vst1.8 {<z34=d10-<z34=d11},[<ptr=r1,: 128] | |
4386 ++vst1.8 {d10-d11},[r1,: 128] | |
4387 ++ | |
4388 ++# qhasm: ptr = &5y12_stack | |
4389 ++# asm 1: lea >ptr=int32#2,<5y12_stack=stack128#5 | |
4390 ++# asm 2: lea >ptr=r1,<5y12_stack=[sp,#64] | |
4391 ++add r1,sp,#64 | |
4392 ++ | |
4393 ++# qhasm: mem128[ptr] aligned= 5y12 | |
4394 ++# asm 1: vst1.8 {<5y12=reg128#12%bot-<5y12=reg128#12%top},[<ptr=int32#2,: 128] | |
4395 ++# asm 2: vst1.8 {<5y12=d22-<5y12=d23},[<ptr=r1,: 128] | |
4396 ++vst1.8 {d22-d23},[r1,: 128] | |
4397 ++ | |
4398 ++# qhasm: ptr = &5y34_stack | |
4399 ++# asm 1: lea >ptr=int32#2,<5y34_stack=stack128#6 | |
4400 ++# asm 2: lea >ptr=r1,<5y34_stack=[sp,#80] | |
4401 ++add r1,sp,#80 | |
4402 ++ | |
4403 ++# qhasm: mem128[ptr] aligned= 5y34 | |
4404 ++# asm 1: vst1.8 {<5y34=reg128#13%bot-<5y34=reg128#13%top},[<ptr=int32#2,: 128] | |
4405 ++# asm 2: vst1.8 {<5y34=d24-<5y34=d25},[<ptr=r1,: 128] | |
4406 ++vst1.8 {d24-d25},[r1,: 128] | |
4407 ++ | |
4408 ++# qhasm: ptr = &5z12_stack | |
4409 ++# asm 1: lea >ptr=int32#2,<5z12_stack=stack128#10 | |
4410 ++# asm 2: lea >ptr=r1,<5z12_stack=[sp,#144] | |
4411 ++add r1,sp,#144 | |
4412 ++ | |
4413 ++# qhasm: mem128[ptr] aligned= 5z12 | |
4414 ++# asm 1: vst1.8 {<5z12=reg128#14%bot-<5z12=reg128#14%top},[<ptr=int32#2,: 128] | |
4415 ++# asm 2: vst1.8 {<5z12=d26-<5z12=d27},[<ptr=r1,: 128] | |
4416 ++vst1.8 {d26-d27},[r1,: 128] | |
4417 ++ | |
4418 ++# qhasm: ptr = &5z34_stack | |
4419 ++# asm 1: lea >ptr=int32#2,<5z34_stack=stack128#11 | |
4420 ++# asm 2: lea >ptr=r1,<5z34_stack=[sp,#160] | |
4421 ++add r1,sp,#160 | |
4422 ++ | |
4423 ++# qhasm: mem128[ptr] aligned= 5z34 | |
4424 ++# asm 1: vst1.8 {<5z34=reg128#15%bot-<5z34=reg128#15%top},[<ptr=int32#2,: 128] | |
4425 ++# asm 2: vst1.8 {<5z34=d28-<5z34=d29},[<ptr=r1,: 128] | |
4426 ++vst1.8 {d28-d29},[r1,: 128] | |
4427 ++ | |
4428 ++# qhasm: unsigned>? len - 64 | |
4429 ++# asm 1: cmp <len=int32#4,#64 | |
4430 ++# asm 2: cmp <len=r3,#64 | |
4431 ++cmp r3,#64 | |
4432 ++ | |
4433 ++# qhasm: goto below64bytes if !unsigned> | |
4434 ++bls ._below64bytes | |
4435 ++ | |
4436 ++# qhasm: input_2 += 32 | |
4437 ++# asm 1: add >input_2=int32#2,<input_2=int32#3,#32 | |
4438 ++# asm 2: add >input_2=r1,<input_2=r2,#32 | |
4439 ++add r1,r2,#32 | |
4440 ++ | |
4441 ++# qhasm: mainloop2: | |
4442 ++._mainloop2: | |
4443 ++ | |
4444 ++# qhasm: c01 = mem128[input_2];input_2+=16 | |
4445 ++# asm 1: vld1.8 {>c01=reg128#1%bot->c01=reg128#1%top},[<input_2=int32#2]! | |
4446 ++# asm 2: vld1.8 {>c01=d0->c01=d1},[<input_2=r1]! | |
4447 ++vld1.8 {d0-d1},[r1]! | |
4448 ++ | |
4449 ++# qhasm: c23 = mem128[input_2];input_2+=16 | |
4450 ++# asm 1: vld1.8 {>c23=reg128#2%bot->c23=reg128#2%top},[<input_2=int32#2]! | |
4451 ++# asm 2: vld1.8 {>c23=d2->c23=d3},[<input_2=r1]! | |
4452 ++vld1.8 {d2-d3},[r1]! | |
4453 ++ | |
4454 ++# qhasm: r4[0,1] += x01[0] unsigned* z34[2]; r4[2,3] += x01[1] unsigned* z3
4[3] | |
4455 ++# asm 1: vmlal.u32 <r4=reg128#16,<x01=reg128#9%bot,<z34=reg128#6%top | |
4456 ++# asm 2: vmlal.u32 <r4=q15,<x01=d16,<z34=d11 | |
4457 ++vmlal.u32 q15,d16,d11 | |
4458 ++ | |
4459 ++# qhasm: ptr = &z12_stack | |
4460 ++# asm 1: lea >ptr=int32#3,<z12_stack=stack128#8 | |
4461 ++# asm 2: lea >ptr=r2,<z12_stack=[sp,#112] | |
4462 ++add r2,sp,#112 | |
4463 ++ | |
4464 ++# qhasm: z12 aligned= mem128[ptr] | |
4465 ++# asm 1: vld1.8 {>z12=reg128#3%bot->z12=reg128#3%top},[<ptr=int32#3,: 128] | |
4466 ++# asm 2: vld1.8 {>z12=d4->z12=d5},[<ptr=r2,: 128] | |
4467 ++vld1.8 {d4-d5},[r2,: 128] | |
4468 ++ | |
4469 ++# qhasm: r4[0,1] += x01[2] unsigned* z34[0]; r4[2,3] += x01[3] unsigned* z34[
1] | |
4470 ++# asm 1: vmlal.u32 <r4=reg128#16,<x01=reg128#9%top,<z34=reg128#6%bot | |
4471 ++# asm 2: vmlal.u32 <r4=q15,<x01=d17,<z34=d10 | |
4472 ++vmlal.u32 q15,d17,d10 | |
4473 ++ | |
4474 ++# qhasm: ptr = &z0_stack | |
4475 ++# asm 1: lea >ptr=int32#3,<z0_stack=stack128#7 | |
4476 ++# asm 2: lea >ptr=r2,<z0_stack=[sp,#96] | |
4477 ++add r2,sp,#96 | |
4478 ++ | |
4479 ++# qhasm: z0 aligned= mem128[ptr] | |
4480 ++# asm 1: vld1.8 {>z0=reg128#4%bot->z0=reg128#4%top},[<ptr=int32#3,: 128] | |
4481 ++# asm 2: vld1.8 {>z0=d6->z0=d7},[<ptr=r2,: 128] | |
4482 ++vld1.8 {d6-d7},[r2,: 128] | |
4483 ++ | |
4484 ++# qhasm: r4[0,1] += x23[0] unsigned* z12[2]; r4[2,3] += x23[1] unsigned* z12[
3] | |
4485 ++# asm 1: vmlal.u32 <r4=reg128#16,<x23=reg128#10%bot,<z12=reg128#3%top | |
4486 ++# asm 2: vmlal.u32 <r4=q15,<x23=d18,<z12=d5 | |
4487 ++vmlal.u32 q15,d18,d5 | |
4488 ++ | |
4489 ++# qhasm: c01 c23 = c01[0]c01[1]c01[2]c23[2]c23[0]c23[1]c01[3]c23[3] | |
4490 ++# asm 1: vtrn.32 <c01=reg128#1%top,<c23=reg128#2%top | |
4491 ++# asm 2: vtrn.32 <c01=d1,<c23=d3 | |
4492 ++vtrn.32 d1,d3 | |
4493 ++ | |
4494 ++# qhasm: r4[0,1] += x23[2] unsigned* z12[0]; r4[2,3] += x23[3] unsigned* z12[
1] | |
4495 ++# asm 1: vmlal.u32 <r4=reg128#16,<x23=reg128#10%top,<z12=reg128#3%bot | |
4496 ++# asm 2: vmlal.u32 <r4=q15,<x23=d19,<z12=d4 | |
4497 ++vmlal.u32 q15,d19,d4 | |
4498 ++ | |
4499 ++# qhasm: r4[0,1] += x4[0] unsigned* z0[0]; r4[2,3] += x4[1] unsigned* z0[1] | |
4500 ++# asm 1: vmlal.u32 <r4=reg128#16,<x4=reg128#11%bot,<z0=reg128#4%bot | |
4501 ++# asm 2: vmlal.u32 <r4=q15,<x4=d20,<z0=d6 | |
4502 ++vmlal.u32 q15,d20,d6 | |
4503 ++ | |
4504 ++# qhasm: r3[0,1] = c23[2]<<18; r3[2,3] = c23[3]<<18 | |
4505 ++# asm 1: vshll.u32 >r3=reg128#5,<c23=reg128#2%top,#18 | |
4506 ++# asm 2: vshll.u32 >r3=q4,<c23=d3,#18 | |
4507 ++vshll.u32 q4,d3,#18 | |
4508 ++ | |
4509 ++# qhasm: c01 c23 = c01[0]c23[0]c01[2]c01[3]c01[1]c23[1]c23[2]c23[3] | |
4510 ++# asm 1: vtrn.32 <c01=reg128#1%bot,<c23=reg128#2%bot | |
4511 ++# asm 2: vtrn.32 <c01=d0,<c23=d2 | |
4512 ++vtrn.32 d0,d2 | |
4513 ++ | |
4514 ++# qhasm: r3[0,1] += x01[0] unsigned* z34[0]; r3[2,3] += x01[1] unsigned* z34
[1] | |
4515 ++# asm 1: vmlal.u32 <r3=reg128#5,<x01=reg128#9%bot,<z34=reg128#6%bot | |
4516 ++# asm 2: vmlal.u32 <r3=q4,<x01=d16,<z34=d10 | |
4517 ++vmlal.u32 q4,d16,d10 | |
4518 ++ | |
4519 ++# qhasm: r3[0,1] += x01[2] unsigned* z12[2]; r3[2,3] += x01[3] unsigned* z12
[3] | |
4520 ++# asm 1: vmlal.u32 <r3=reg128#5,<x01=reg128#9%top,<z12=reg128#3%top | |
4521 ++# asm 2: vmlal.u32 <r3=q4,<x01=d17,<z12=d5 | |
4522 ++vmlal.u32 q4,d17,d5 | |
4523 ++ | |
4524 ++# qhasm: r0 = r0[1]c01[0]r0[2,3] | |
4525 ++# asm 1: vext.32 <r0=reg128#8%bot,<r0=reg128#8%bot,<c01=reg128#1%bot,#1 | |
4526 ++# asm 2: vext.32 <r0=d14,<r0=d14,<c01=d0,#1 | |
4527 ++vext.32 d14,d14,d0,#1 | |
4528 ++ | |
4529 ++# qhasm: r3[0,1] += x23[0] unsigned* z12[0]; r3[2,3] += x23[1] unsigned* z12
[1] | |
4530 ++# asm 1: vmlal.u32 <r3=reg128#5,<x23=reg128#10%bot,<z12=reg128#3%bot | |
4531 ++# asm 2: vmlal.u32 <r3=q4,<x23=d18,<z12=d4 | |
4532 ++vmlal.u32 q4,d18,d4 | |
4533 ++ | |
4534 ++# qhasm: input_2
-= 64 | |
4535 ++# asm 1: sub >input_2=int32#2,<input_2=int32#2,#64 | |
4536 ++# asm 2: sub >input_2=r1,<input_2=r1,#64 | |
4537 ++sub r1,r1,#64 | |
4538 ++ | |
4539 ++# qhasm: r3[0,1] += x23[2] unsigned* z0[0]; r3[2,3] += x23[3] unsigned* z0[1
] | |
4540 ++# asm 1: vmlal.u32 <r3=reg128#5,<x23=reg128#10%top,<z0=reg128#4%bot | |
4541 ++# asm 2: vmlal.u32 <r3=q4,<x23=d19,<z0=d6 | |
4542 ++vmlal.u32 q4,d19,d6 | |
4543 ++ | |
4544 ++# qhasm: ptr = &5z34_stack | |
4545 ++# asm 1: lea >ptr=int32#3,<5z34_stack=stack128#11 | |
4546 ++# asm 2: lea >ptr=r2,<5z34_stack=[sp,#160] | |
4547 ++add r2,sp,#160 | |
4548 ++ | |
4549 ++# qhasm: 5z34 aligned= mem128[ptr] | |
4550 ++# asm 1: vld1.8 {>5z34=reg128#6%bot->5z34=reg128#6%top},[<ptr=int32#3,: 128] | |
4551 ++# asm 2: vld1.8 {>5z34=d10->5z34=d11},[<ptr=r2,: 128] | |
4552 ++vld1.8 {d10-d11},[r2,: 128] | |
4553 ++ | |
4554 ++# qhasm: r3[0,1] += x4[0] unsigned* 5z34[2]; r3[2,3] += x4[1] unsigned* 5z
34[3] | |
4555 ++# asm 1: vmlal.u32 <r3=reg128#5,<x4=reg128#11%bot,<5z34=reg128#6%top | |
4556 ++# asm 2: vmlal.u32 <r3=q4,<x4=d20,<5z34=d11 | |
4557 ++vmlal.u32 q4,d20,d11 | |
4558 ++ | |
4559 ++# qhasm: r0 = r0[1]r0[0]r0[3]r0[2] | |
4560 ++# asm 1: vrev64.i32 >r0=reg128#8,<r0=reg128#8 | |
4561 ++# asm 2: vrev64.i32 >r0=q7,<r0=q7 | |
4562 ++vrev64.i32 q7,q7 | |
4563 ++ | |
4564 ++# qhasm: r2[0,1] = c01[2]<<12; r2[2,3] = c01[3]<<12 | |
4565 ++# asm 1: vshll.u32 >r2=reg128#14,<c01=reg128#1%top,#12 | |
4566 ++# asm 2: vshll.u32 >r2=q13,<c01=d1,#12 | |
4567 ++vshll.u32 q13,d1,#12 | |
4568 ++ | |
4569 ++# qhasm: d01 = mem128[input_2];input_2+=16 | |
4570 ++# asm 1: vld1.8 {>d01=reg128#12%bot->d01=reg128#12%top},[<input_2=int32#2]! | |
4571 ++# asm 2: vld1.8 {>d01=d22->d01=d23},[<input_2=r1]! | |
4572 ++vld1.8 {d22-d23},[r1]! | |
4573 ++ | |
4574 ++# qhasm: r2[0,1] += x01[0] unsigned* z12[2]; r2[2,3] += x01[1] unsigned* z12
[3] | |
4575 ++# asm 1: vmlal.u32 <r2=reg128#14,<x01=reg128#9%bot,<z12=reg128#3%top | |
4576 ++# asm 2: vmlal.u32 <r2=q13,<x01=d16,<z12=d5 | |
4577 ++vmlal.u32 q13,d16,d5 | |
4578 ++ | |
4579 ++# qhasm: r2[0,1] += x01[2] unsigned* z12[0]; r2[2,3] += x01[3] unsigned* z12
[1] | |
4580 ++# asm 1: vmlal.u32 <r2=reg128#14,<x01=reg128#9%top,<z12=reg128#3%bot | |
4581 ++# asm 2: vmlal.u32 <r2=q13,<x01=d17,<z12=d4 | |
4582 ++vmlal.u32 q13,d17,d4 | |
4583 ++ | |
4584 ++# qhasm: r2[0,1] += x23[0] unsigned* z0[0]; r2[2,3] += x23[1] unsigned* z0[1
] | |
4585 ++# asm 1: vmlal.u32 <r2=reg128#14,<x23=reg128#10%bot,<z0=reg128#4%bot | |
4586 ++# asm 2: vmlal.u32 <r2=q13,<x23=d18,<z0=d6 | |
4587 ++vmlal.u32 q13,d18,d6 | |
4588 ++ | |
4589 ++# qhasm: r2[0,1] += x23[2] unsigned* 5z34[2]; r2[2,3] += x23[3] unsigned* 5z
34[3] | |
4590 ++# asm 1: vmlal.u32 <r2=reg128#14,<x23=reg128#10%top,<5z34=reg128#6%top | |
4591 ++# asm 2: vmlal.u32 <r2=q13,<x23=d19,<5z34=d11 | |
4592 ++vmlal.u32 q13,d19,d11 | |
4593 ++ | |
4594 ++# qhasm: r2[0,1] += x4[0] unsigned* 5z34[0]; r2[2,3] += x4[1] unsigned* 5z34
[1] | |
4595 ++# asm 1: vmlal.u32 <r2=reg128#14,<x4=reg128#11%bot,<5z34=reg128#6%bot | |
4596 ++# asm 2: vmlal.u32 <r2=q13,<x4=d20,<5z34=d10 | |
4597 ++vmlal.u32 q13,d20,d10 | |
4598 ++ | |
4599 ++# qhasm: r0 = r0[0,1]c01[1]r0[2] | |
4600 ++# asm 1: vext.32 <r0=reg128#8%top,<c01=reg128#1%bot,<r0=reg128#8%top,#1 | |
4601 ++# asm 2: vext.32 <r0=d15,<c01=d0,<r0=d15,#1 | |
4602 ++vext.32 d15,d0,d15,#1 | |
4603 ++ | |
4604 ++# qhasm: r1[0,1] = c23[0]<<6; r1[2,3] = c23[1]<<6 | |
4605 ++# asm 1: vshll.u32 >r1=reg128#15,<c23=reg128#2%bot,#6 | |
4606 ++# asm 2: vshll.u32 >r1=q14,<c23=d2,#6 | |
4607 ++vshll.u32 q14,d2,#6 | |
4608 ++ | |
4609 ++# qhasm: r1[0,1] += x01[0] unsigned* z12[0]; r1[2,3] += x01[1] unsigned* z12
[1] | |
4610 ++# asm 1: vmlal.u32 <r1=reg128#15,<x01=reg128#9%bot,<z12=reg128#3%bot | |
4611 ++# asm 2: vmlal.u32 <r1=q14,<x01=d16,<z12=d4 | |
4612 ++vmlal.u32 q14,d16,d4 | |
4613 ++ | |
4614 ++# qhasm: r1[0,1] += x01[2] unsigned* z0[0]; r1[2,3] += x01[3] unsigned* z0[1
] | |
4615 ++# asm 1: vmlal.u32 <r1=reg128#15,<x01=reg128#9%top,<z0=reg128#4%bot | |
4616 ++# asm 2: vmlal.u32 <r1=q14,<x01=d17,<z0=d6 | |
4617 ++vmlal.u32 q14,d17,d6 | |
4618 ++ | |
4619 ++# qhasm: r1[0,1] += x23[0] unsigned* 5z34[2]; r1[2,3] += x23[1] unsigned* 5z
34[3] | |
4620 ++# asm 1: vmlal.u32 <r1=reg128#15,<x23=reg128#10%bot,<5z34=reg128#6%top | |
4621 ++# asm 2: vmlal.u32 <r1=q14,<x23=d18,<5z34=d11 | |
4622 ++vmlal.u32 q14,d18,d11 | |
4623 ++ | |
4624 ++# qhasm: r1[0,1] += x23[2] unsigned* 5z34[0]; r1[2,3] += x23[3] unsigned* 5z34
[1] | |
4625 ++# asm 1: vmlal.u32 <r1=reg128#15,<x23=reg128#10%top,<5z34=reg128#6%bot | |
4626 ++# asm 2: vmlal.u32 <r1=q14,<x23=d19,<5z34=d10 | |
4627 ++vmlal.u32 q14,d19,d10 | |
4628 ++ | |
4629 ++# qhasm: ptr = &5z12_stack | |
4630 ++# asm 1: lea >ptr=int32#3,<5z12_stack=stack128#10 | |
4631 ++# asm 2: lea >ptr=r2,<5z12_stack=[sp,#144] | |
4632 ++add r2,sp,#144 | |
4633 ++ | |
4634 ++# qhasm: 5z12 aligned= mem128[ptr] | |
4635 ++# asm 1: vld1.8 {>5z12=reg128#1%bot->5z12=reg128#1%top},[<ptr=int32#3,: 128] | |
4636 ++# asm 2: vld1.8 {>5z12=d0->5z12=d1},[<ptr=r2,: 128] | |
4637 ++vld1.8 {d0-d1},[r2,: 128] | |
4638 ++ | |
4639 ++# qhasm: r1[0,1] += x4[0] unsigned* 5z12[2]; r1[2,3] += x4[1] unsigned* 5z12
[3] | |
4640 ++# asm 1: vmlal.u32 <r1=reg128#15,<x4=reg128#11%bot,<5z12=reg128#1%top | |
4641 ++# asm 2: vmlal.u32 <r1=q14,<x4=d20,<5z12=d1 | |
4642 ++vmlal.u32 q14,d20,d1 | |
4643 ++ | |
4644 ++# qhasm: d23 = mem128[input_2];input_2+=16 | |
4645 ++# asm 1: vld1.8 {>d23=reg128#2%bot->d23=reg128#2%top},[<input_2=int32#2]! | |
4646 ++# asm 2: vld1.8 {>d23=d2->d23=d3},[<input_2=r1]! | |
4647 ++vld1.8 {d2-d3},[r1]! | |
4648 ++ | |
4649 ++# qhasm: input_2 += 32 | |
4650 ++# asm 1: add >input_2=int32#2,<input_2=int32#2,#32 | |
4651 ++# asm 2: add >input_2=r1,<input_2=r1,#32 | |
4652 ++add r1,r1,#32 | |
4653 ++ | |
4654 ++# qhasm: r0[0,1] += x4[0] unsigned* 5z12[0]; r0[2,3] += x4[1] unsigned* 5z12
[1] | |
4655 ++# asm 1: vmlal.u32 <r0=reg128#8,<x4=reg128#11%bot,<5z12=reg128#1%bot | |
4656 ++# asm 2: vmlal.u32 <r0=q7,<x4=d20,<5z12=d0 | |
4657 ++vmlal.u32 q7,d20,d0 | |
4658 ++ | |
4659 ++# qhasm: r0[0,1] += x23[0] unsigned* 5z34[0]; r0[2,3] += x23[1] unsigned* 5z34
[1] | |
4660 ++# asm 1: vmlal.u32 <r0=reg128#8,<x23=reg128#10%bot,<5z34=reg128#6%bot | |
4661 ++# asm 2: vmlal.u32 <r0=q7,<x23=d18,<5z34=d10 | |
4662 ++vmlal.u32 q7,d18,d10 | |
4663 ++ | |
4664 ++# qhasm: d01 d23 = d01[0] d23[0] d01[1] d23[1] | |
4665 ++# asm 1: vswp <d23=reg128#2%bot,<d01=reg128#12%top | |
4666 ++# asm 2: vswp <d23=d2,<d01=d23 | |
4667 ++vswp d2,d23 | |
4668 ++ | |
4669 ++# qhasm: r0[0,1] += x23[2] unsigned* 5z12[2]; r0[2,3] += x23[3] unsigned* 5z12
[3] | |
4670 ++# asm 1: vmlal.u32 <r0=reg128#8,<x23=reg128#10%top,<5z12=reg128#1%top | |
4671 ++# asm 2: vmlal.u32 <r0=q7,<x23=d19,<5z12=d1 | |
4672 ++vmlal.u32 q7,d19,d1 | |
4673 ++ | |
4674 ++# qhasm: r0[0,1] += x01[0] unsigned* z0[0]; r0[2,3] += x01[1] unsigned* z0[1
] | |
4675 ++# asm 1: vmlal.u32 <r0=reg128#8,<x01=reg128#9%bot,<z0=reg128#4%bot | |
4676 ++# asm 2: vmlal.u32 <r0=q7,<x01=d16,<z0=d6 | |
4677 ++vmlal.u32 q7,d16,d6 | |
4678 ++ | |
4679 ++# qhasm: new mid | |
4680 ++ | |
4681 ++# qhasm: 2x v4 = d23 unsigned>> 40 | |
4682 ++# asm 1: vshr.u64 >v4=reg128#4,<d23=reg128#2,#40 | |
4683 ++# asm 2: vshr.u64 >v4=q3,<d23=q1,#40 | |
4684 ++vshr.u64 q3,q1,#40 | |
4685 ++ | |
4686 ++# qhasm: mid = d01[1]d23[0] mid[2,3] | |
4687 ++# asm 1: vext.32 <mid=reg128#1%bot,<d01=reg128#12%bot,<d23=reg128#2%bot,#1 | |
4688 ++# asm 2: vext.32 <mid=d0,<d01=d22,<d23=d2,#1 | |
4689 ++vext.32 d0,d22,d2,#1 | |
4690 ++ | |
4691 ++# qhasm: new v23 | |
4692 ++ | |
4693 ++# qhasm: v23[2] = d23[0,1] unsigned>> 14; v23[3] = d23[2,3] unsig
ned>> 14 | |
4694 ++# asm 1: vshrn.u64 <v23=reg128#10%top,<d23=reg128#2,#14 | |
4695 ++# asm 2: vshrn.u64 <v23=d19,<d23=q1,#14 | |
4696 ++vshrn.u64 d19,q1,#14 | |
4697 ++ | |
4698 ++# qhasm: mid = mid[0,1] d01[3]d23[2] | |
4699 ++# asm 1: vext.32 <mid=reg128#1%top,<d01=reg128#12%top,<d23=reg128#2%top,#1 | |
4700 ++# asm 2: vext.32 <mid=d1,<d01=d23,<d23=d3,#1 | |
4701 ++vext.32 d1,d23,d3,#1 | |
4702 ++ | |
4703 ++# qhasm: new v01 | |
4704 ++ | |
4705 ++# qhasm: v01[2] = d01[0,1] unsigned>> 26; v01[3] = d01[2,3] unsig
ned>> 26 | |
4706 ++# asm 1: vshrn.u64 <v01=reg128#11%top,<d01=reg128#12,#26 | |
4707 ++# asm 2: vshrn.u64 <v01=d21,<d01=q11,#26 | |
4708 ++vshrn.u64 d21,q11,#26 | |
4709 ++ | |
4710 ++# qhasm: v01 = d01[1]d01[0] v01[2,3] | |
4711 ++# asm 1: vext.32 <v01=reg128#11%bot,<d01=reg128#12%bot,<d01=reg128#12%bot,#1 | |
4712 ++# asm 2: vext.32 <v01=d20,<d01=d22,<d01=d22,#1 | |
4713 ++vext.32 d20,d22,d22,#1 | |
4714 ++ | |
4715 ++# qhasm: r0[0,1] += x01[2] unsigned* 5z34[2]; r0[2,3] += x01[3] unsigned* 5z
34[3] | |
4716 ++# asm 1: vmlal.u32 <r0=reg128#8,<x01=reg128#9%top,<5z34=reg128#6%top | |
4717 ++# asm 2: vmlal.u32 <r0=q7,<x01=d17,<5z34=d11 | |
4718 ++vmlal.u32 q7,d17,d11 | |
4719 ++ | |
4720 ++# qhasm: v01 = v01[1]d01[2] v01[2,3] | |
4721 ++# asm 1: vext.32 <v01=reg128#11%bot,<v01=reg128#11%bot,<d01=reg128#12%top,#1 | |
4722 ++# asm 2: vext.32 <v01=d20,<v01=d20,<d01=d23,#1 | |
4723 ++vext.32 d20,d20,d23,#1 | |
4724 ++ | |
4725 ++# qhasm: v23[0] = mid[0,1] unsigned>> 20; v23[1] = mid[2,3] unsig
ned>> 20 | |
4726 ++# asm 1: vshrn.u64 <v23=reg128#10%bot,<mid=reg128#1,#20 | |
4727 ++# asm 2: vshrn.u64 <v23=d18,<mid=q0,#20 | |
4728 ++vshrn.u64 d18,q0,#20 | |
4729 ++ | |
4730 ++# qhasm: v4 = v4[0]v4[2]v4[1]v4[3] | |
4731 ++# asm 1: vtrn.32 <v4=reg128#4%bot,<v4=reg128#4%top | |
4732 ++# asm 2: vtrn.32 <v4=d6,<v4=d7 | |
4733 ++vtrn.32 d6,d7 | |
4734 ++ | |
4735 ++# qhasm: 4x v01 &= 0x03ffffff | |
4736 ++# asm 1: vand.i32 <v01=reg128#11,#0x03ffffff | |
4737 ++# asm 2: vand.i32 <v01=q10,#0x03ffffff | |
4738 ++vand.i32 q10,#0x03ffffff | |
4739 ++ | |
4740 ++# qhasm: ptr = &y34_stack | |
4741 ++# asm 1: lea >ptr=int32#3,<y34_stack=stack128#4 | |
4742 ++# asm 2: lea >ptr=r2,<y34_stack=[sp,#48] | |
4743 ++add r2,sp,#48 | |
4744 ++ | |
4745 ++# qhasm: y34 aligned= mem128[ptr] | |
4746 ++# asm 1: vld1.8 {>y34=reg128#3%bot->y34=reg128#3%top},[<ptr=int32#3,: 128] | |
4747 ++# asm 2: vld1.8 {>y34=d4->y34=d5},[<ptr=r2,: 128] | |
4748 ++vld1.8 {d4-d5},[r2,: 128] | |
4749 ++ | |
4750 ++# qhasm: 4x v23 &= 0x03ffffff | |
4751 ++# asm 1: vand.i32 <v23=reg128#10,#0x03ffffff | |
4752 ++# asm 2: vand.i32 <v23=q9,#0x03ffffff | |
4753 ++vand.i32 q9,#0x03ffffff | |
4754 ++ | |
4755 ++# qhasm: ptr = &y12_stack | |
4756 ++# asm 1: lea >ptr=int32#3,<y12_stack=stack128#3 | |
4757 ++# asm 2: lea >ptr=r2,<y12_stack=[sp,#32] | |
4758 ++add r2,sp,#32 | |
4759 ++ | |
4760 ++# qhasm: y12 aligned= mem128[ptr] | |
4761 ++# asm 1: vld1.8 {>y12=reg128#2%bot->y12=reg128#2%top},[<ptr=int32#3,: 128] | |
4762 ++# asm 2: vld1.8 {>y12=d2->y12=d3},[<ptr=r2,: 128] | |
4763 ++vld1.8 {d2-d3},[r2,: 128] | |
4764 ++ | |
4765 ++# qhasm: 4x v4 |= 0x01000000 | |
4766 ++# asm 1: vorr.i32 <v4=reg128#4,#0x01000000 | |
4767 ++# asm 2: vorr.i32 <v4=q3,#0x01000000 | |
4768 ++vorr.i32 q3,#0x01000000 | |
4769 ++ | |
4770 ++# qhasm: ptr = &y0_stack | |
4771 ++# asm 1: lea >ptr=int32#3,<y0_stack=stack128#2 | |
4772 ++# asm 2: lea >ptr=r2,<y0_stack=[sp,#16] | |
4773 ++add r2,sp,#16 | |
4774 ++ | |
4775 ++# qhasm: y0 aligned= mem128[ptr] | |
4776 ++# asm 1: vld1.8 {>y0=reg128#1%bot->y0=reg128#1%top},[<ptr=int32#3,: 128] | |
4777 ++# asm 2: vld1.8 {>y0=d0->y0=d1},[<ptr=r2,: 128] | |
4778 ++vld1.8 {d0-d1},[r2,: 128] | |
4779 ++ | |
4780 ++# qhasm: r4[0,1] += v01[0] unsigned* y34[2]; r4[2,3] += v01[1] unsigned* y3
4[3] | |
4781 ++# asm 1: vmlal.u32 <r4=reg128#16,<v01=reg128#11%bot,<y34=reg128#3%top | |
4782 ++# asm 2: vmlal.u32 <r4=q15,<v01=d20,<y34=d5 | |
4783 ++vmlal.u32 q15,d20,d5 | |
4784 ++ | |
4785 ++# qhasm: r4[0,1] += v01[2] unsigned* y34[0]; r4[2,3] += v01[3] unsigned* y34[
1] | |
4786 ++# asm 1: vmlal.u32 <r4=reg128#16,<v01=reg128#11%top,<y34=reg128#3%bot | |
4787 ++# asm 2: vmlal.u32 <r4=q15,<v01=d21,<y34=d4 | |
4788 ++vmlal.u32 q15,d21,d4 | |
4789 ++ | |
4790 ++# qhasm: r4[0,1] += v23[0] unsigned* y12[2]; r4[2,3] += v23[1] unsigned* y12[
3] | |
4791 ++# asm 1: vmlal.u32 <r4=reg128#16,<v23=reg128#10%bot,<y12=reg128#2%top | |
4792 ++# asm 2: vmlal.u32 <r4=q15,<v23=d18,<y12=d3 | |
4793 ++vmlal.u32 q15,d18,d3 | |
4794 ++ | |
4795 ++# qhasm: r4[0,1] += v23[2] unsigned* y12[0]; r4[2,3] += v23[3] unsigned* y12[
1] | |
4796 ++# asm 1: vmlal.u32 <r4=reg128#16,<v23=reg128#10%top,<y12=reg128#2%bot | |
4797 ++# asm 2: vmlal.u32 <r4=q15,<v23=d19,<y12=d2 | |
4798 ++vmlal.u32 q15,d19,d2 | |
4799 ++ | |
4800 ++# qhasm: r4[0,1] += v4[0] unsigned* y0[0]; r4[2,3] += v4[1] unsigned* y0[1] | |
4801 ++# asm 1: vmlal.u32 <r4=reg128#16,<v4=reg128#4%bot,<y0=reg128#1%bot | |
4802 ++# asm 2: vmlal.u32 <r4=q15,<v4=d6,<y0=d0 | |
4803 ++vmlal.u32 q15,d6,d0 | |
4804 ++ | |
4805 ++# qhasm: ptr = &5y34_stack | |
4806 ++# asm 1: lea >ptr=int32#3,<5y34_stack=stack128#6 | |
4807 ++# asm 2: lea >ptr=r2,<5y34_stack=[sp,#80] | |
4808 ++add r2,sp,#80 | |
4809 ++ | |
4810 ++# qhasm: 5y34 aligned= mem128[ptr] | |
4811 ++# asm 1: vld1.8 {>5y34=reg128#13%bot->5y34=reg128#13%top},[<ptr=int32#3,: 128] | |
4812 ++# asm 2: vld1.8 {>5y34=d24->5y34=d25},[<ptr=r2,: 128] | |
4813 ++vld1.8 {d24-d25},[r2,: 128] | |
4814 ++ | |
4815 ++# qhasm: r3[0,1] += v01[0] unsigned* y34[0]; r3[2,3] += v01[1] unsigned* y34
[1] | |
4816 ++# asm 1: vmlal.u32 <r3=reg128#5,<v01=reg128#11%bot,<y34=reg128#3%bot | |
4817 ++# asm 2: vmlal.u32 <r3=q4,<v01=d20,<y34=d4 | |
4818 ++vmlal.u32 q4,d20,d4 | |
4819 ++ | |
4820 ++# qhasm: r3[0,1] += v01[2] unsigned* y12[2]; r3[2,3] += v01[3] unsigned* y12
[3] | |
4821 ++# asm 1: vmlal.u32 <r3=reg128#5,<v01=reg128#11%top,<y12=reg128#2%top | |
4822 ++# asm 2: vmlal.u32 <r3=q4,<v01=d21,<y12=d3 | |
4823 ++vmlal.u32 q4,d21,d3 | |
4824 ++ | |
4825 ++# qhasm: r3[0,1] += v23[0] unsigned* y12[0]; r3[2,3] += v23[1] unsigned* y12
[1] | |
4826 ++# asm 1: vmlal.u32 <r3=reg128#5,<v23=reg128#10%bot,<y12=reg128#2%bot | |
4827 ++# asm 2: vmlal.u32 <r3=q4,<v23=d18,<y12=d2 | |
4828 ++vmlal.u32 q4,d18,d2 | |
4829 ++ | |
4830 ++# qhasm: r3[0,1] += v23[2] unsigned* y0[0]; r3[2,3] += v23[3] unsigned* y0[1
] | |
4831 ++# asm 1: vmlal.u32 <r3=reg128#5,<v23=reg128#10%top,<y0=reg128#1%bot | |
4832 ++# asm 2: vmlal.u32 <r3=q4,<v23=d19,<y0=d0 | |
4833 ++vmlal.u32 q4,d19,d0 | |
4834 ++ | |
4835 ++# qhasm: r3[0,1] += v4[0] unsigned* 5y34[2]; r3[2,3] += v4[1] unsigned* 5y
34[3] | |
4836 ++# asm 1: vmlal.u32 <r3=reg128#5,<v4=reg128#4%bot,<5y34=reg128#13%top | |
4837 ++# asm 2: vmlal.u32 <r3=q4,<v4=d6,<5y34=d25 | |
4838 ++vmlal.u32 q4,d6,d25 | |
4839 ++ | |
4840 ++# qhasm: ptr = &5y12_stack | |
4841 ++# asm 1: lea >ptr=int32#3,<5y12_stack=stack128#5 | |
4842 ++# asm 2: lea >ptr=r2,<5y12_stack=[sp,#64] | |
4843 ++add r2,sp,#64 | |
4844 ++ | |
4845 ++# qhasm: 5y12 aligned= mem128[ptr] | |
4846 ++# asm 1: vld1.8 {>5y12=reg128#12%bot->5y12=reg128#12%top},[<ptr=int32#3,: 128] | |
4847 ++# asm 2: vld1.8 {>5y12=d22->5y12=d23},[<ptr=r2,: 128] | |
4848 ++vld1.8 {d22-d23},[r2,: 128] | |
4849 ++ | |
4850 ++# qhasm: r0[0,1] += v4[0] unsigned* 5y12[0]; r0[2,3] += v4[1] unsigned* 5y12
[1] | |
4851 ++# asm 1: vmlal.u32 <r0=reg128#8,<v4=reg128#4%bot,<5y12=reg128#12%bot | |
4852 ++# asm 2: vmlal.u32 <r0=q7,<v4=d6,<5y12=d22 | |
4853 ++vmlal.u32 q7,d6,d22 | |
4854 ++ | |
4855 ++# qhasm: r0[0,1] += v23[0] unsigned* 5y34[0]; r0[2,3] += v23[1] unsigned* 5y34
[1] | |
4856 ++# asm 1: vmlal.u32 <r0=reg128#8,<v23=reg128#10%bot,<5y34=reg128#13%bot | |
4857 ++# asm 2: vmlal.u32 <r0=q7,<v23=d18,<5y34=d24 | |
4858 ++vmlal.u32 q7,d18,d24 | |
4859 ++ | |
4860 ++# qhasm: r0[0,1] += v23[2] unsigned* 5y12[2]; r0[2,3] += v23[3] unsigned* 5y12
[3] | |
4861 ++# asm 1: vmlal.u32 <r0=reg128#8,<v23=reg128#10%top,<5y12=reg128#12%top | |
4862 ++# asm 2: vmlal.u32 <r0=q7,<v23=d19,<5y12=d23 | |
4863 ++vmlal.u32 q7,d19,d23 | |
4864 ++ | |
4865 ++# qhasm: r0[0,1] += v01[0] unsigned* y0[0]; r0[2,3] += v01[1] unsigned* y0[1
] | |
4866 ++# asm 1: vmlal.u32 <r0=reg128#8,<v01=reg128#11%bot,<y0=reg128#1%bot | |
4867 ++# asm 2: vmlal.u32 <r0=q7,<v01=d20,<y0=d0 | |
4868 ++vmlal.u32 q7,d20,d0 | |
4869 ++ | |
4870 ++# qhasm: r0[0,1] += v01[2] unsigned* 5y34[2]; r0[2,3] += v01[3] unsigned* 5y
34[3] | |
4871 ++# asm 1: vmlal.u32 <r0=reg128#8,<v01=reg128#11%top,<5y34=reg128#13%top | |
4872 ++# asm 2: vmlal.u32 <r0=q7,<v01=d21,<5y34=d25 | |
4873 ++vmlal.u32 q7,d21,d25 | |
4874 ++ | |
4875 ++# qhasm: r1[0,1] += v01[0] unsigned* y12[0]; r1[2,3] += v01[1] unsigned* y12
[1] | |
4876 ++# asm 1: vmlal.u32 <r1=reg128#15,<v01=reg128#11%bot,<y12=reg128#2%bot | |
4877 ++# asm 2: vmlal.u32 <r1=q14,<v01=d20,<y12=d2 | |
4878 ++vmlal.u32 q14,d20,d2 | |
4879 ++ | |
4880 ++# qhasm: r1[0,1] += v01[2] unsigned* y0[0]; r1[2,3] += v01[3] unsigned* y0[1
] | |
4881 ++# asm 1: vmlal.u32 <r1=reg128#15,<v01=reg128#11%top,<y0=reg128#1%bot | |
4882 ++# asm 2: vmlal.u32 <r1=q14,<v01=d21,<y0=d0 | |
4883 ++vmlal.u32 q14,d21,d0 | |
4884 ++ | |
4885 ++# qhasm: r1[0,1] += v23[0] unsigned* 5y34[2]; r1[2,3] += v23[1] unsigned* 5y
34[3] | |
4886 ++# asm 1: vmlal.u32 <r1=reg128#15,<v23=reg128#10%bot,<5y34=reg128#13%top | |
4887 ++# asm 2: vmlal.u32 <r1=q14,<v23=d18,<5y34=d25 | |
4888 ++vmlal.u32 q14,d18,d25 | |
4889 ++ | |
4890 ++# qhasm: r1[0,1] += v23[2] unsigned* 5y34[0]; r1[2,3] += v23[3] unsigned* 5y34
[1] | |
4891 ++# asm 1: vmlal.u32 <r1=reg128#15,<v23=reg128#10%top,<5y34=reg128#13%bot | |
4892 ++# asm 2: vmlal.u32 <r1=q14,<v23=d19,<5y34=d24 | |
4893 ++vmlal.u32 q14,d19,d24 | |
4894 ++ | |
4895 ++# qhasm: r1[0,1] += v4[0] unsigned* 5y12[2]; r1[2,3] += v4[1] unsigned* 5y12
[3] | |
4896 ++# asm 1: vmlal.u32 <r1=reg128#15,<v4=reg128#4%bot,<5y12=reg128#12%top | |
4897 ++# asm 2: vmlal.u32 <r1=q14,<v4=d6,<5y12=d23 | |
4898 ++vmlal.u32 q14,d6,d23 | |
4899 ++ | |
4900 ++# qhasm: r2[0,1] += v01[0] unsigned* y12[2]; r2[2,3] += v01[1] unsigned* y12
[3] | |
4901 ++# asm 1: vmlal.u32 <r2=reg128#14,<v01=reg128#11%bot,<y12=reg128#2%top | |
4902 ++# asm 2: vmlal.u32 <r2=q13,<v01=d20,<y12=d3 | |
4903 ++vmlal.u32 q13,d20,d3 | |
4904 ++ | |
4905 ++# qhasm: r2[0,1] += v01[2] unsigned* y12[0]; r2[2,3] += v01[3] unsigned* y12
[1] | |
4906 ++# asm 1: vmlal.u32 <r2=reg128#14,<v01=reg128#11%top,<y12=reg128#2%bot | |
4907 ++# asm 2: vmlal.u32 <r2=q13,<v01=d21,<y12=d2 | |
4908 ++vmlal.u32 q13,d21,d2 | |
4909 ++ | |
4910 ++# qhasm: r2[0,1] += v23[0] unsigned* y0[0]; r2[2,3] += v23[1] unsigned* y0[1
] | |
4911 ++# asm 1: vmlal.u32 <r2=reg128#14,<v23=reg128#10%bot,<y0=reg128#1%bot | |
4912 ++# asm 2: vmlal.u32 <r2=q13,<v23=d18,<y0=d0 | |
4913 ++vmlal.u32 q13,d18,d0 | |
4914 ++ | |
4915 ++# qhasm: r2[0,1] += v23[2] unsigned* 5y34[2]; r2[2,3] += v23[3] unsigned* 5y
34[3] | |
4916 ++# asm 1: vmlal.u32 <r2=reg128#14,<v23=reg128#10%top,<5y34=reg128#13%top | |
4917 ++# asm 2: vmlal.u32 <r2=q13,<v23=d19,<5y34=d25 | |
4918 ++vmlal.u32 q13,d19,d25 | |
4919 ++ | |
4920 ++# qhasm: r2[0,1] += v4[0] unsigned* 5y34[0]; r2[2,3] += v4[1] unsigned* 5y34
[1] | |
4921 ++# asm 1: vmlal.u32 <r2=reg128#14,<v4=reg128#4%bot,<5y34=reg128#13%bot | |
4922 ++# asm 2: vmlal.u32 <r2=q13,<v4=d6,<5y34=d24 | |
4923 ++vmlal.u32 q13,d6,d24 | |
4924 ++ | |
4925 ++# qhasm: ptr = &two24 | |
4926 ++# asm 1: lea >ptr=int32#3,<two24=stack128#1 | |
4927 ++# asm 2: lea >ptr=r2,<two24=[sp,#0] | |
4928 ++add r2,sp,#0 | |
4929 ++ | |
4930 ++# qhasm: 2x t1 = r0 unsigned>> 26 | |
4931 ++# asm 1: vshr.u64 >t1=reg128#4,<r0=reg128#8,#26 | |
4932 ++# asm 2: vshr.u64 >t1=q3,<r0=q7,#26 | |
4933 ++vshr.u64 q3,q7,#26 | |
4934 ++ | |
4935 ++# qhasm: len -= 64 | |
4936 ++# asm 1: sub >len=int32#4,<len=int32#4,#64 | |
4937 ++# asm 2: sub >len=r3,<len=r3,#64 | |
4938 ++sub r3,r3,#64 | |
4939 ++ | |
4940 ++# qhasm: r0 &= mask | |
4941 ++# asm 1: vand >r0=reg128#6,<r0=reg128#8,<mask=reg128#7 | |
4942 ++# asm 2: vand >r0=q5,<r0=q7,<mask=q6 | |
4943 ++vand q5,q7,q6 | |
4944 ++ | |
4945 ++# qhasm: 2x r1 += t1 | |
4946 ++# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#15,<t1=reg128#4 | |
4947 ++# asm 2: vadd.i64 >r1=q3,<r1=q14,<t1=q3 | |
4948 ++vadd.i64 q3,q14,q3 | |
4949 ++ | |
4950 ++# qhasm: 2x t4 = r3 unsigned>> 26 | |
4951 ++# asm 1: vshr.u64 >t4=reg128#8,<r3=reg128#5,#26 | |
4952 ++# asm 2: vshr.u64 >t4=q7,<r3=q4,#26 | |
4953 ++vshr.u64 q7,q4,#26 | |
4954 ++ | |
4955 ++# qhasm: r3 &= mask | |
4956 ++# asm 1: vand >r3=reg128#5,<r3=reg128#5,<mask=reg128#7 | |
4957 ++# asm 2: vand >r3=q4,<r3=q4,<mask=q6 | |
4958 ++vand q4,q4,q6 | |
4959 ++ | |
4960 ++# qhasm: 2x x4 = r4 + t4 | |
4961 ++# asm 1: vadd.i64 >x4=reg128#8,<r4=reg128#16,<t4=reg128#8 | |
4962 ++# asm 2: vadd.i64 >x4=q7,<r4=q15,<t4=q7 | |
4963 ++vadd.i64 q7,q15,q7 | |
4964 ++ | |
4965 ++# qhasm: r4 aligned= mem128[ptr] | |
4966 ++# asm 1: vld1.8 {>r4=reg128#16%bot->r4=reg128#16%top},[<ptr=int32#3,: 128] | |
4967 ++# asm 2: vld1.8 {>r4=d30->r4=d31},[<ptr=r2,: 128] | |
4968 ++vld1.8 {d30-d31},[r2,: 128] | |
4969 ++ | |
4970 ++# qhasm: 2x t2 = r1 unsigned>> 26 | |
4971 ++# asm 1: vshr.u64 >t2=reg128#9,<r1=reg128#4,#26 | |
4972 ++# asm 2: vshr.u64 >t2=q8,<r1=q3,#26 | |
4973 ++vshr.u64 q8,q3,#26 | |
4974 ++ | |
4975 ++# qhasm: r1 &= mask | |
4976 ++# asm 1: vand >r1=reg128#4,<r1=reg128#4,<mask=reg128#7 | |
4977 ++# asm 2: vand >r1=q3,<r1=q3,<mask=q6 | |
4978 ++vand q3,q3,q6 | |
4979 ++ | |
4980 ++# qhasm: 2x t0 = x4 unsigned>> 26 | |
4981 ++# asm 1: vshr.u64 >t0=reg128#10,<x4=reg128#8,#26 | |
4982 ++# asm 2: vshr.u64 >t0=q9,<x4=q7,#26 | |
4983 ++vshr.u64 q9,q7,#26 | |
4984 ++ | |
4985 ++# qhasm: 2x r2 += t2 | |
4986 ++# asm 1: vadd.i64 >r2=reg128#9,<r2=reg128#14,<t2=reg128#9 | |
4987 ++# asm 2: vadd.i64 >r2=q8,<r2=q13,<t2=q8 | |
4988 ++vadd.i64 q8,q13,q8 | |
4989 ++ | |
4990 ++# qhasm: x4 &= mask | |
4991 ++# asm 1: vand >x4=reg128#11,<x4=reg128#8,<mask=reg128#7 | |
4992 ++# asm 2: vand >x4=q10,<x4=q7,<mask=q6 | |
4993 ++vand q10,q7,q6 | |
4994 ++ | |
4995 ++# qhasm: 2x x01 = r0 + t0 | |
4996 ++# asm 1: vadd.i64 >x01=reg128#6,<r0=reg128#6,<t0=reg128#10 | |
4997 ++# asm 2: vadd.i64 >x01=q5,<r0=q5,<t0=q9 | |
4998 ++vadd.i64 q5,q5,q9 | |
4999 ++ | |
5000 ++# qhasm: r0 aligned= mem128[ptr] | |
5001 ++# asm 1: vld1.8 {>r0=reg128#8%bot->r0=reg128#8%top},[<ptr=int32#3,: 128] | |
5002 ++# asm 2: vld1.8 {>r0=d14->r0=d15},[<ptr=r2,: 128] | |
5003 ++vld1.8 {d14-d15},[r2,: 128] | |
5004 ++ | |
5005 ++# qhasm: ptr = &z34_stack | |
5006 ++# asm 1: lea >ptr=int32#3,<z34_stack=stack128#9 | |
5007 ++# asm 2: lea >ptr=r2,<z34_stack=[sp,#128] | |
5008 ++add r2,sp,#128 | |
5009 ++ | |
5010 ++# qhasm: 2x t0 <<= 2 | |
5011 ++# asm 1: vshl.i64 >t0=reg128#10,<t0=reg128#10,#2 | |
5012 ++# asm 2: vshl.i64 >t0=q9,<t0=q9,#2 | |
5013 ++vshl.i64 q9,q9,#2 | |
5014 ++ | |
5015 ++# qhasm: 2x t3 = r2 unsigned>> 26 | |
5016 ++# asm 1: vshr.u64 >t3=reg128#14,<r2=reg128#9,#26 | |
5017 ++# asm 2: vshr.u64 >t3=q13,<r2=q8,#26 | |
5018 ++vshr.u64 q13,q8,#26 | |
5019 ++ | |
5020 ++# qhasm: 2x x01 += t0 | |
5021 ++# asm 1: vadd.i64 >x01=reg128#15,<x01=reg128#6,<t0=reg128#10 | |
5022 ++# asm 2: vadd.i64 >x01=q14,<x01=q5,<t0=q9 | |
5023 ++vadd.i64 q14,q5,q9 | |
5024 ++ | |
5025 ++# qhasm: z34 aligned= mem128[ptr] | |
5026 ++# asm 1: vld1.8 {>z34=reg128#6%bot->z34=reg128#6%top},[<ptr=int32#3,: 128] | |
5027 ++# asm 2: vld1.8 {>z34=d10->z34=d11},[<ptr=r2,: 128] | |
5028 ++vld1.8 {d10-d11},[r2,: 128] | |
5029 ++ | |
5030 ++# qhasm: x23 = r2 & mask | |
5031 ++# asm 1: vand >x23=reg128#10,<r2=reg128#9,<mask=reg128#7 | |
5032 ++# asm 2: vand >x23=q9,<r2=q8,<mask=q6 | |
5033 ++vand q9,q8,q6 | |
5034 ++ | |
5035 ++# qhasm: 2x r3 += t3 | |
5036 ++# asm 1: vadd.i64 >r3=reg128#5,<r3=reg128#5,<t3=reg128#14 | |
5037 ++# asm 2: vadd.i64 >r3=q4,<r3=q4,<t3=q13 | |
5038 ++vadd.i64 q4,q4,q13 | |
5039 ++ | |
5040 ++# qhasm: input_2
+= 32 | |
5041 ++# asm 1: add >input_2=int32#2,<input_2=int32#2,#32 | |
5042 ++# asm 2: add >input_2=r1,<input_2=r1,#32 | |
5043 ++add r1,r1,#32 | |
5044 ++ | |
5045 ++# qhasm: 2x t1 = x01 unsigned>> 26 | |
5046 ++# asm 1: vshr.u64 >t1=reg128#14,<x01=reg128#15,#26 | |
5047 ++# asm 2: vshr.u64 >t1=q13,<x01=q14,#26 | |
5048 ++vshr.u64 q13,q14,#26 | |
5049 ++ | |
5050 ++# qhasm: x23 = x23[0,2,1,3] | |
5051 ++# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top | |
5052 ++# asm 2: vtrn.32 <x23=d18,<x23=d19 | |
5053 ++vtrn.32 d18,d19 | |
5054 ++ | |
5055 ++# qhasm: x01 = x01 & mask | |
5056 ++# asm 1: vand >x01=reg128#9,<x01=reg128#15,<mask=reg128#7 | |
5057 ++# asm 2: vand >x01=q8,<x01=q14,<mask=q6 | |
5058 ++vand q8,q14,q6 | |
5059 ++ | |
5060 ++# qhasm: 2x r1 += t1 | |
5061 ++# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#4,<t1=reg128#14 | |
5062 ++# asm 2: vadd.i64 >r1=q3,<r1=q3,<t1=q13 | |
5063 ++vadd.i64 q3,q3,q13 | |
5064 ++ | |
5065 ++# qhasm: 2x t4 = r3 unsigned>> 26 | |
5066 ++# asm 1: vshr.u64 >t4=reg128#14,<r3=reg128#5,#26 | |
5067 ++# asm 2: vshr.u64 >t4=q13,<r3=q4,#26 | |
5068 ++vshr.u64 q13,q4,#26 | |
5069 ++ | |
5070 ++# qhasm: x01 = x01[0,2,1,3] | |
5071 ++# asm 1: vtrn.32 <x01=reg128#9%bot,<x01=reg128#9%top | |
5072 ++# asm 2: vtrn.32 <x01=d16,<x01=d17 | |
5073 ++vtrn.32 d16,d17 | |
5074 ++ | |
5075 ++# qhasm: r3 &= mask | |
5076 ++# asm 1: vand >r3=reg128#5,<r3=reg128#5,<mask=reg128#7 | |
5077 ++# asm 2: vand >r3=q4,<r3=q4,<mask=q6 | |
5078 ++vand q4,q4,q6 | |
5079 ++ | |
5080 ++# qhasm: r1 = r1[0,2,1,3] | |
5081 ++# asm 1: vtrn.32 <r1=reg128#4%bot,<r1=reg128#4%top | |
5082 ++# asm 2: vtrn.32 <r1=d6,<r1=d7 | |
5083 ++vtrn.32 d6,d7 | |
5084 ++ | |
5085 ++# qhasm: 2x x4 += t4 | |
5086 ++# asm 1: vadd.i64 >x4=reg128#11,<x4=reg128#11,<t4=reg128#14 | |
5087 ++# asm 2: vadd.i64 >x4=q10,<x4=q10,<t4=q13 | |
5088 ++vadd.i64 q10,q10,q13 | |
5089 ++ | |
5090 ++# qhasm: r3 = r3[0,2,1,3] | |
5091 ++# asm 1: vtrn.32 <r3=reg128#5%bot,<r3=reg128#5%top | |
5092 ++# asm 2: vtrn.32 <r3=d8,<r3=d9 | |
5093 ++vtrn.32 d8,d9 | |
5094 ++ | |
5095 ++# qhasm: x01 = x01[0,1] r1[0,1] | |
5096 ++# asm 1: vext.32 <x01=reg128#9%top,<r1=reg128#4%bot,<r1=reg128#4%bot,#0 | |
5097 ++# asm 2: vext.32 <x01=d17,<r1=d6,<r1=d6,#0 | |
5098 ++vext.32 d17,d6,d6,#0 | |
5099 ++ | |
5100 ++# qhasm: x23 = x23[0,1] r3[0,1] | |
5101 ++# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#5%bot,<r3=reg128#5%bot,#0 | |
5102 ++# asm 2: vext.32 <x23=d19,<r3=d8,<r3=d8,#0 | |
5103 ++vext.32 d19,d8,d8,#0 | |
5104 ++ | |
5105 ++# qhasm: x4 = x4[0,2,1,3] | |
5106 ++# asm 1: vtrn.32 <x4=reg128#11%bot,<x4=reg128#11%top | |
5107 ++# asm 2: vtrn.32 <x4=d20,<x4=d21 | |
5108 ++vtrn.32 d20,d21 | |
5109 ++ | |
5110 ++# qhasm: unsigned>? len - 64 | |
5111 ++# asm 1: cmp <len=int32#4,#64 | |
5112 ++# asm 2: cmp <len=r3,#64 | |
5113 ++cmp r3,#64 | |
5114 ++ | |
5115 ++# qhasm: goto mainloop2 if unsigned> | |
5116 ++bhi ._mainloop2 | |
5117 ++ | |
5118 ++# qhasm: input_2 -= 32 | |
5119 ++# asm 1: sub >input_2=int32#3,<input_2=int32#2,#32 | |
5120 ++# asm 2: sub >input_2=r2,<input_2=r1,#32 | |
5121 ++sub r2,r1,#32 | |
5122 ++ | |
5123 ++# qhasm: below64bytes: | |
5124 ++._below64bytes: | |
5125 ++ | |
5126 ++# qhasm: unsigned>? len - 32 | |
5127 ++# asm 1: cmp <len=int32#4,#32 | |
5128 ++# asm 2: cmp <len=r3,#32 | |
5129 ++cmp r3,#32 | |
5130 ++ | |
5131 ++# qhasm: goto end if !unsigned> | |
5132 ++bls ._end | |
5133 ++ | |
5134 ++# qhasm: mainloop: | |
5135 ++._mainloop: | |
5136 ++ | |
5137 ++# qhasm: new r0 | |
5138 ++ | |
5139 ++# qhasm: ptr = &two24 | |
5140 ++# asm 1: lea >ptr=int32#2,<two24=stack128#1 | |
5141 ++# asm 2: lea >ptr=r1,<two24=[sp,#0] | |
5142 ++add r1,sp,#0 | |
5143 ++ | |
5144 ++# qhasm: r4 aligned= mem128[ptr] | |
5145 ++# asm 1: vld1.8 {>r4=reg128#5%bot->r4=reg128#5%top},[<ptr=int32#2,: 128] | |
5146 ++# asm 2: vld1.8 {>r4=d8->r4=d9},[<ptr=r1,: 128] | |
5147 ++vld1.8 {d8-d9},[r1,: 128] | |
5148 ++ | |
5149 ++# qhasm: u4 aligned= mem128[ptr] | |
5150 ++# asm 1: vld1.8 {>u4=reg128#6%bot->u4=reg128#6%top},[<ptr=int32#2,: 128] | |
5151 ++# asm 2: vld1.8 {>u4=d10->u4=d11},[<ptr=r1,: 128] | |
5152 ++vld1.8 {d10-d11},[r1,: 128] | |
5153 ++ | |
5154 ++# qhasm: c01 = mem128[input_2];input_2+=16 | |
5155 ++# asm 1: vld1.8 {>c01=reg128#8%bot->c01=reg128#8%top},[<input_2=int32#3]! | |
5156 ++# asm 2: vld1.8 {>c01=d14->c01=d15},[<input_2=r2]! | |
5157 ++vld1.8 {d14-d15},[r2]! | |
5158 ++ | |
5159 ++# qhasm: r4[0,1] += x01[0] unsigned* y34[2]; r4[2,3] += x01[1] unsigned* y3
4[3] | |
5160 ++# asm 1: vmlal.u32 <r4=reg128#5,<x01=reg128#9%bot,<y34=reg128#3%top | |
5161 ++# asm 2: vmlal.u32 <r4=q4,<x01=d16,<y34=d5 | |
5162 ++vmlal.u32 q4,d16,d5 | |
5163 ++ | |
5164 ++# qhasm: c23 = mem128[input_2];input_2+=16 | |
5165 ++# asm 1: vld1.8 {>c23=reg128#14%bot->c23=reg128#14%top},[<input_2=int32#3]! | |
5166 ++# asm 2: vld1.8 {>c23=d26->c23=d27},[<input_2=r2]! | |
5167 ++vld1.8 {d26-d27},[r2]! | |
5168 ++ | |
5169 ++# qhasm: r4[0,1] += x01[2] unsigned* y34[0]; r4[2,3] += x01[3] unsigned* y34[
1] | |
5170 ++# asm 1: vmlal.u32 <r4=reg128#5,<x01=reg128#9%top,<y34=reg128#3%bot | |
5171 ++# asm 2: vmlal.u32 <r4=q4,<x01=d17,<y34=d4 | |
5172 ++vmlal.u32 q4,d17,d4 | |
5173 ++ | |
5174 ++# qhasm: r0 = u4[1]c01[0]r0[2,3] | |
5175 ++# asm 1: vext.32 <r0=reg128#4%bot,<u4=reg128#6%bot,<c01=reg128#8%bot,#1 | |
5176 ++# asm 2: vext.32 <r0=d6,<u4=d10,<c01=d14,#1 | |
5177 ++vext.32 d6,d10,d14,#1 | |
5178 ++ | |
5179 ++# qhasm: r4[0,1] += x23[0] unsigned* y12[2]; r4[2,3] += x23[1] unsigned* y12[
3] | |
5180 ++# asm 1: vmlal.u32 <r4=reg128#5,<x23=reg128#10%bot,<y12=reg128#2%top | |
5181 ++# asm 2: vmlal.u32 <r4=q4,<x23=d18,<y12=d3 | |
5182 ++vmlal.u32 q4,d18,d3 | |
5183 ++ | |
5184 ++# qhasm: r0 = r0[0,1]u4[1]c23[0] | |
5185 ++# asm 1: vext.32 <r0=reg128#4%top,<u4=reg128#6%bot,<c23=reg128#14%bot,#1 | |
5186 ++# asm 2: vext.32 <r0=d7,<u4=d10,<c23=d26,#1 | |
5187 ++vext.32 d7,d10,d26,#1 | |
5188 ++ | |
5189 ++# qhasm: r4[0,1] += x23[2] unsigned* y12[0]; r4[2,3] += x23[3] unsigned* y12[
1] | |
5190 ++# asm 1: vmlal.u32 <r4=reg128#5,<x23=reg128#10%top,<y12=reg128#2%bot | |
5191 ++# asm 2: vmlal.u32 <r4=q4,<x23=d19,<y12=d2 | |
5192 ++vmlal.u32 q4,d19,d2 | |
5193 ++ | |
5194 ++# qhasm: r0 = r0[1]r0[0]r0[3]r0[2] | |
5195 ++# asm 1: vrev64.i32 >r0=reg128#4,<r0=reg128#4 | |
5196 ++# asm 2: vrev64.i32 >r0=q3,<r0=q3 | |
5197 ++vrev64.i32 q3,q3 | |
5198 ++ | |
5199 ++# qhasm: r4[0,1] += x4[0] unsigned* y0[0]; r4[2,3] += x4[1] unsigned* y0[1] | |
5200 ++# asm 1: vmlal.u32 <r4=reg128#5,<x4=reg128#11%bot,<y0=reg128#1%bot | |
5201 ++# asm 2: vmlal.u32 <r4=q4,<x4=d20,<y0=d0 | |
5202 ++vmlal.u32 q4,d20,d0 | |
5203 ++ | |
5204 ++# qhasm: r0[0,1] += x4[0] unsigned* 5y12[0]; r0[2,3] += x4[1] unsigned* 5y12
[1] | |
5205 ++# asm 1: vmlal.u32 <r0=reg128#4,<x4=reg128#11%bot,<5y12=reg128#12%bot | |
5206 ++# asm 2: vmlal.u32 <r0=q3,<x4=d20,<5y12=d22 | |
5207 ++vmlal.u32 q3,d20,d22 | |
5208 ++ | |
5209 ++# qhasm: r0[0,1] += x23[0] unsigned* 5y34[0]; r0[2,3] += x23[1] unsigned* 5y34
[1] | |
5210 ++# asm 1: vmlal.u32 <r0=reg128#4,<x23=reg128#10%bot,<5y34=reg128#13%bot | |
5211 ++# asm 2: vmlal.u32 <r0=q3,<x23=d18,<5y34=d24 | |
5212 ++vmlal.u32 q3,d18,d24 | |
5213 ++ | |
5214 ++# qhasm: r0[0,1] += x23[2] unsigned* 5y12[2]; r0[2,3] += x23[3] unsigned* 5y12
[3] | |
5215 ++# asm 1: vmlal.u32 <r0=reg128#4,<x23=reg128#10%top,<5y12=reg128#12%top | |
5216 ++# asm 2: vmlal.u32 <r0=q3,<x23=d19,<5y12=d23 | |
5217 ++vmlal.u32 q3,d19,d23 | |
5218 ++ | |
5219 ++# qhasm: c01 c23 = c01[0]c23[0]c01[2]c23[2]c01[1]c23[1]c01[3]c23[3] | |
5220 ++# asm 1: vtrn.32 <c01=reg128#8,<c23=reg128#14 | |
5221 ++# asm 2: vtrn.32 <c01=q7,<c23=q13 | |
5222 ++vtrn.32 q7,q13 | |
5223 ++ | |
5224 ++# qhasm: r0[0,1] += x01[0] unsigned* y0[0]; r0[2,3] += x01[1] unsigned* y0[1
] | |
5225 ++# asm 1: vmlal.u32 <r0=reg128#4,<x01=reg128#9%bot,<y0=reg128#1%bot | |
5226 ++# asm 2: vmlal.u32 <r0=q3,<x01=d16,<y0=d0 | |
5227 ++vmlal.u32 q3,d16,d0 | |
5228 ++ | |
5229 ++# qhasm: r3[0,1] = c23[2]<<18; r3[2,3] = c23[3]<<18 | |
5230 ++# asm 1: vshll.u32 >r3=reg128#6,<c23=reg128#14%top,#18 | |
5231 ++# asm 2: vshll.u32 >r3=q5,<c23=d27,#18 | |
5232 ++vshll.u32 q5,d27,#18 | |
5233 ++ | |
5234 ++# qhasm: r0[0,1] += x01[2] unsigned* 5y34[2]; r0[2,3] += x01[3] unsigned* 5y
34[3] | |
5235 ++# asm 1: vmlal.u32 <r0=reg128#4,<x01=reg128#9%top,<5y34=reg128#13%top | |
5236 ++# asm 2: vmlal.u32 <r0=q3,<x01=d17,<5y34=d25 | |
5237 ++vmlal.u32 q3,d17,d25 | |
5238 ++ | |
5239 ++# qhasm: r3[0,1] += x01[0] unsigned* y34[0]; r3[2,3] += x01[1] unsigned* y34
[1] | |
5240 ++# asm 1: vmlal.u32 <r3=reg128#6,<x01=reg128#9%bot,<y34=reg128#3%bot | |
5241 ++# asm 2: vmlal.u32 <r3=q5,<x01=d16,<y34=d4 | |
5242 ++vmlal.u32 q5,d16,d4 | |
5243 ++ | |
5244 ++# qhasm: r3[0,1] += x01[2] unsigned* y12[2]; r3[2,3] += x01[3] unsigned* y12
[3] | |
5245 ++# asm 1: vmlal.u32 <r3=reg128#6,<x01=reg128#9%top,<y12=reg128#2%top | |
5246 ++# asm 2: vmlal.u32 <r3=q5,<x01=d17,<y12=d3 | |
5247 ++vmlal.u32 q5,d17,d3 | |
5248 ++ | |
5249 ++# qhasm: r3[0,1] += x23[0] unsigned* y12[0]; r3[2,3] += x23[1] unsigned* y12
[1] | |
5250 ++# asm 1: vmlal.u32 <r3=reg128#6,<x23=reg128#10%bot,<y12=reg128#2%bot | |
5251 ++# asm 2: vmlal.u32 <r3=q5,<x23=d18,<y12=d2 | |
5252 ++vmlal.u32 q5,d18,d2 | |
5253 ++ | |
5254 ++# qhasm: r3[0,1] += x23[2] unsigned* y0[0]; r3[2,3] += x23[3] unsigned* y0[1
] | |
5255 ++# asm 1: vmlal.u32 <r3=reg128#6,<x23=reg128#10%top,<y0=reg128#1%bot | |
5256 ++# asm 2: vmlal.u32 <r3=q5,<x23=d19,<y0=d0 | |
5257 ++vmlal.u32 q5,d19,d0 | |
5258 ++ | |
5259 ++# qhasm: r1[0,1] = c23[0]<<6; r1[2,3] = c23[1]<<6 | |
5260 ++# asm 1: vshll.u32 >r1=reg128#14,<c23=reg128#14%bot,#6 | |
5261 ++# asm 2: vshll.u32 >r1=q13,<c23=d26,#6 | |
5262 ++vshll.u32 q13,d26,#6 | |
5263 ++ | |
5264 ++# qhasm: r3[0,1] += x4[0] unsigned* 5y34[2]; r3[2,3] += x4[1] unsigned* 5y
34[3] | |
5265 ++# asm 1: vmlal.u32 <r3=reg128#6,<x4=reg128#11%bot,<5y34=reg128#13%top | |
5266 ++# asm 2: vmlal.u32 <r3=q5,<x4=d20,<5y34=d25 | |
5267 ++vmlal.u32 q5,d20,d25 | |
5268 ++ | |
5269 ++# qhasm: r1[0,1] += x01[0] unsigned* y12[0]; r1[2,3] += x01[1] unsigned* y12
[1] | |
5270 ++# asm 1: vmlal.u32 <r1=reg128#14,<x01=reg128#9%bot,<y12=reg128#2%bot | |
5271 ++# asm 2: vmlal.u32 <r1=q13,<x01=d16,<y12=d2 | |
5272 ++vmlal.u32 q13,d16,d2 | |
5273 ++ | |
5274 ++# qhasm: r1[0,1] += x01[2] unsigned* y0[0]; r1[2,3] += x01[3] unsigned* y0[1
] | |
5275 ++# asm 1: vmlal.u32 <r1=reg128#14,<x01=reg128#9%top,<y0=reg128#1%bot | |
5276 ++# asm 2: vmlal.u32 <r1=q13,<x01=d17,<y0=d0 | |
5277 ++vmlal.u32 q13,d17,d0 | |
5278 ++ | |
5279 ++# qhasm: r1[0,1] += x23[0] unsigned* 5y34[2]; r1[2,3] += x23[1] unsigned* 5y
34[3] | |
5280 ++# asm 1: vmlal.u32 <r1=reg128#14,<x23=reg128#10%bot,<5y34=reg128#13%top | |
5281 ++# asm 2: vmlal.u32 <r1=q13,<x23=d18,<5y34=d25 | |
5282 ++vmlal.u32 q13,d18,d25 | |
5283 ++ | |
5284 ++# qhasm: r1[0,1] += x23[2] unsigned* 5y34[0]; r1[2,3] += x23[3] unsigned* 5y34
[1] | |
5285 ++# asm 1: vmlal.u32 <r1=reg128#14,<x23=reg128#10%top,<5y34=reg128#13%bot | |
5286 ++# asm 2: vmlal.u32 <r1=q13,<x23=d19,<5y34=d24 | |
5287 ++vmlal.u32 q13,d19,d24 | |
5288 ++ | |
5289 ++# qhasm: r2[0,1] = c01[2]<<12; r2[2,3] = c01[3]<<12 | |
5290 ++# asm 1: vshll.u32 >r2=reg128#8,<c01=reg128#8%top,#12 | |
5291 ++# asm 2: vshll.u32 >r2=q7,<c01=d15,#12 | |
5292 ++vshll.u32 q7,d15,#12 | |
5293 ++ | |
5294 ++# qhasm: r1[0,1] += x4[0] unsigned* 5y12[2]; r1[2,3] += x4[1] unsigned* 5y12
[3] | |
5295 ++# asm 1: vmlal.u32 <r1=reg128#14,<x4=reg128#11%bot,<5y12=reg128#12%top | |
5296 ++# asm 2: vmlal.u32 <r1=q13,<x4=d20,<5y12=d23 | |
5297 ++vmlal.u32 q13,d20,d23 | |
5298 ++ | |
5299 ++# qhasm: r2[0,1] += x01[0] unsigned* y12[2]; r2[2,3] += x01[1] unsigned* y12
[3] | |
5300 ++# asm 1: vmlal.u32 <r2=reg128#8,<x01=reg128#9%bot,<y12=reg128#2%top | |
5301 ++# asm 2: vmlal.u32 <r2=q7,<x01=d16,<y12=d3 | |
5302 ++vmlal.u32 q7,d16,d3 | |
5303 ++ | |
5304 ++# qhasm: r2[0,1] += x01[2] unsigned* y12[0]; r2[2,3] += x01[3] unsigned* y12
[1] | |
5305 ++# asm 1: vmlal.u32 <r2=reg128#8,<x01=reg128#9%top,<y12=reg128#2%bot | |
5306 ++# asm 2: vmlal.u32 <r2=q7,<x01=d17,<y12=d2 | |
5307 ++vmlal.u32 q7,d17,d2 | |
5308 ++ | |
5309 ++# qhasm: r2[0,1] += x23[0] unsigned* y0[0]; r2[2,3] += x23[1] unsigned* y0[1
] | |
5310 ++# asm 1: vmlal.u32 <r2=reg128#8,<x23=reg128#10%bot,<y0=reg128#1%bot | |
5311 ++# asm 2: vmlal.u32 <r2=q7,<x23=d18,<y0=d0 | |
5312 ++vmlal.u32 q7,d18,d0 | |
5313 ++ | |
5314 ++# qhasm: r2[0,1] += x23[2] unsigned* 5y34[2]; r2[2,3] += x23[3] unsigned* 5y
34[3] | |
5315 ++# asm 1: vmlal.u32 <r2=reg128#8,<x23=reg128#10%top,<5y34=reg128#13%top | |
5316 ++# asm 2: vmlal.u32 <r2=q7,<x23=d19,<5y34=d25 | |
5317 ++vmlal.u32 q7,d19,d25 | |
5318 ++ | |
5319 ++# qhasm: r2[0,1] += x4[0] unsigned* 5y34[0]; r2[2,3] += x4[1] unsigned* 5y34
[1] | |
5320 ++# asm 1: vmlal.u32 <r2=reg128#8,<x4=reg128#11%bot,<5y34=reg128#13%bot | |
5321 ++# asm 2: vmlal.u32 <r2=q7,<x4=d20,<5y34=d24 | |
5322 ++vmlal.u32 q7,d20,d24 | |
5323 ++ | |
5324 ++# qhasm: 2x t1 = r0 unsigned>> 26 | |
5325 ++# asm 1: vshr.u64 >t1=reg128#9,<r0=reg128#4,#26 | |
5326 ++# asm 2: vshr.u64 >t1=q8,<r0=q3,#26 | |
5327 ++vshr.u64 q8,q3,#26 | |
5328 ++ | |
5329 ++# qhasm: r0 &= mask | |
5330 ++# asm 1: vand >r0=reg128#4,<r0=reg128#4,<mask=reg128#7 | |
5331 ++# asm 2: vand >r0=q3,<r0=q3,<mask=q6 | |
5332 ++vand q3,q3,q6 | |
5333 ++ | |
5334 ++# qhasm: 2x r1 += t1 | |
5335 ++# asm 1: vadd.i64 >r1=reg128#9,<r1=reg128#14,<t1=reg128#9 | |
5336 ++# asm 2: vadd.i64 >r1=q8,<r1=q13,<t1=q8 | |
5337 ++vadd.i64 q8,q13,q8 | |
5338 ++ | |
5339 ++# qhasm: 2x t4 = r3 unsigned>> 26 | |
5340 ++# asm 1: vshr.u64 >t4=reg128#10,<r3=reg128#6,#26 | |
5341 ++# asm 2: vshr.u64 >t4=q9,<r3=q5,#26 | |
5342 ++vshr.u64 q9,q5,#26 | |
5343 ++ | |
5344 ++# qhasm: r3 &= mask | |
5345 ++# asm 1: vand >r3=reg128#6,<r3=reg128#6,<mask=reg128#7 | |
5346 ++# asm 2: vand >r3=q5,<r3=q5,<mask=q6 | |
5347 ++vand q5,q5,q6 | |
5348 ++ | |
5349 ++# qhasm: 2x r4 += t4 | |
5350 ++# asm 1: vadd.i64 >r4=reg128#5,<r4=reg128#5,<t4=reg128#10 | |
5351 ++# asm 2: vadd.i64 >r4=q4,<r4=q4,<t4=q9 | |
5352 ++vadd.i64 q4,q4,q9 | |
5353 ++ | |
5354 ++# qhasm: 2x t2 = r1 unsigned>> 26 | |
5355 ++# asm 1: vshr.u64 >t2=reg128#10,<r1=reg128#9,#26 | |
5356 ++# asm 2: vshr.u64 >t2=q9,<r1=q8,#26 | |
5357 ++vshr.u64 q9,q8,#26 | |
5358 ++ | |
5359 ++# qhasm: r1 &= mask | |
5360 ++# asm 1: vand >r1=reg128#11,<r1=reg128#9,<mask=reg128#7 | |
5361 ++# asm 2: vand >r1=q10,<r1=q8,<mask=q6 | |
5362 ++vand q10,q8,q6 | |
5363 ++ | |
5364 ++# qhasm: 2x t0 = r4 unsigned>> 26 | |
5365 ++# asm 1: vshr.u64 >t0=reg128#9,<r4=reg128#5,#26 | |
5366 ++# asm 2: vshr.u64 >t0=q8,<r4=q4,#26 | |
5367 ++vshr.u64 q8,q4,#26 | |
5368 ++ | |
5369 ++# qhasm: 2x r2 += t2 | |
5370 ++# asm 1: vadd.i64 >r2=reg128#8,<r2=reg128#8,<t2=reg128#10 | |
5371 ++# asm 2: vadd.i64 >r2=q7,<r2=q7,<t2=q9 | |
5372 ++vadd.i64 q7,q7,q9 | |
5373 ++ | |
5374 ++# qhasm: r4 &= mask | |
5375 ++# asm 1: vand >r4=reg128#5,<r4=reg128#5,<mask=reg128#7 | |
5376 ++# asm 2: vand >r4=q4,<r4=q4,<mask=q6 | |
5377 ++vand q4,q4,q6 | |
5378 ++ | |
5379 ++# qhasm: 2x r0 += t0 | |
5380 ++# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#9 | |
5381 ++# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q8 | |
5382 ++vadd.i64 q3,q3,q8 | |
5383 ++ | |
5384 ++# qhasm: 2x t0 <<= 2 | |
5385 ++# asm 1: vshl.i64 >t0=reg128#9,<t0=reg128#9,#2 | |
5386 ++# asm 2: vshl.i64 >t0=q8,<t0=q8,#2 | |
5387 ++vshl.i64 q8,q8,#2 | |
5388 ++ | |
5389 ++# qhasm: 2x t3 = r2 unsigned>> 26 | |
5390 ++# asm 1: vshr.u64 >t3=reg128#14,<r2=reg128#8,#26 | |
5391 ++# asm 2: vshr.u64 >t3=q13,<r2=q7,#26 | |
5392 ++vshr.u64 q13,q7,#26 | |
5393 ++ | |
5394 ++# qhasm: 2x r0 += t0 | |
5395 ++# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#9 | |
5396 ++# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q8 | |
5397 ++vadd.i64 q3,q3,q8 | |
5398 ++ | |
5399 ++# qhasm: x23 = r2 & mask | |
5400 ++# asm 1: vand >x23=reg128#10,<r2=reg128#8,<mask=reg128#7 | |
5401 ++# asm 2: vand >x23=q9,<r2=q7,<mask=q6 | |
5402 ++vand q9,q7,q6 | |
5403 ++ | |
5404 ++# qhasm: 2x r3 += t3 | |
5405 ++# asm 1: vadd.i64 >r3=reg128#6,<r3=reg128#6,<t3=reg128#14 | |
5406 ++# asm 2: vadd.i64 >r3=q5,<r3=q5,<t3=q13 | |
5407 ++vadd.i64 q5,q5,q13 | |
5408 ++ | |
5409 ++# qhasm: 2x t1 = r0 unsigned>> 26 | |
5410 ++# asm 1: vshr.u64 >t1=reg128#8,<r0=reg128#4,#26 | |
5411 ++# asm 2: vshr.u64 >t1=q7,<r0=q3,#26 | |
5412 ++vshr.u64 q7,q3,#26 | |
5413 ++ | |
5414 ++# qhasm: x01 = r0 & mask | |
5415 ++# asm 1: vand >x01=reg128#9,<r0=reg128#4,<mask=reg128#7 | |
5416 ++# asm 2: vand >x01=q8,<r0=q3,<mask=q6 | |
5417 ++vand q8,q3,q6 | |
5418 ++ | |
5419 ++# qhasm: 2x r1 += t1 | |
5420 ++# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#11,<t1=reg128#8 | |
5421 ++# asm 2: vadd.i64 >r1=q3,<r1=q10,<t1=q7 | |
5422 ++vadd.i64 q3,q10,q7 | |
5423 ++ | |
5424 ++# qhasm: 2x t4 = r3 unsigned>> 26 | |
5425 ++# asm 1: vshr.u64 >t4=reg128#8,<r3=reg128#6,#26 | |
5426 ++# asm 2: vshr.u64 >t4=q7,<r3=q5,#26 | |
5427 ++vshr.u64 q7,q5,#26 | |
5428 ++ | |
5429 ++# qhasm: r3 &= mask | |
5430 ++# asm 1: vand >r3=reg128#6,<r3=reg128#6,<mask=reg128#7 | |
5431 ++# asm 2: vand >r3=q5,<r3=q5,<mask=q6 | |
5432 ++vand q5,q5,q6 | |
5433 ++ | |
5434 ++# qhasm: 2x x4 = r4 + t4 | |
5435 ++# asm 1: vadd.i64 >x4=reg128#11,<r4=reg128#5,<t4=reg128#8 | |
5436 ++# asm 2: vadd.i64 >x4=q10,<r4=q4,<t4=q7 | |
5437 ++vadd.i64 q10,q4,q7 | |
5438 ++ | |
5439 ++# qhasm: len -= 32 | |
5440 ++# asm 1: sub >len=int32#4,<len=int32#4,#32 | |
5441 ++# asm 2: sub >len=r3,<len=r3,#32 | |
5442 ++sub r3,r3,#32 | |
5443 ++ | |
5444 ++# qhasm: x01 = x01[0,2,1,3] | |
5445 ++# asm 1: vtrn.32 <x01=reg128#9%bot,<x01=reg128#9%top | |
5446 ++# asm 2: vtrn.32 <x01=d16,<x01=d17 | |
5447 ++vtrn.32 d16,d17 | |
5448 ++ | |
5449 ++# qhasm: x23 = x23[0,2,1,3] | |
5450 ++# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top | |
5451 ++# asm 2: vtrn.32 <x23=d18,<x23=d19 | |
5452 ++vtrn.32 d18,d19 | |
5453 ++ | |
5454 ++# qhasm: r1 = r1[0,2,1,3] | |
5455 ++# asm 1: vtrn.32 <r1=reg128#4%bot,<r1=reg128#4%top | |
5456 ++# asm 2: vtrn.32 <r1=d6,<r1=d7 | |
5457 ++vtrn.32 d6,d7 | |
5458 ++ | |
5459 ++# qhasm: r3 = r3[0,2,1,3] | |
5460 ++# asm 1: vtrn.32 <r3=reg128#6%bot,<r3=reg128#6%top | |
5461 ++# asm 2: vtrn.32 <r3=d10,<r3=d11 | |
5462 ++vtrn.32 d10,d11 | |
5463 ++ | |
5464 ++# qhasm: x4 = x4[0,2,1,3] | |
5465 ++# asm 1: vtrn.32 <x4=reg128#11%bot,<x4=reg128#11%top | |
5466 ++# asm 2: vtrn.32 <x4=d20,<x4=d21 | |
5467 ++vtrn.32 d20,d21 | |
5468 ++ | |
5469 ++# qhasm: x01 = x01[0,1] r1[0,1] | |
5470 ++# asm 1: vext.32 <x01=reg128#9%top,<r1=reg128#4%bot,<r1=reg128#4%bot,#0 | |
5471 ++# asm 2: vext.32 <x01=d17,<r1=d6,<r1=d6,#0 | |
5472 ++vext.32 d17,d6,d6,#0 | |
5473 ++ | |
5474 ++# qhasm: x23 = x23[0,1] r3[0,1] | |
5475 ++# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#6%bot,<r3=reg128#6%bot,#0 | |
5476 ++# asm 2: vext.32 <x23=d19,<r3=d10,<r3=d10,#0 | |
5477 ++vext.32 d19,d10,d10,#0 | |
5478 ++ | |
5479 ++# qhasm: unsigned>? len - 32 | |
5480 ++# asm 1: cmp <len=int32#4,#32 | |
5481 ++# asm 2: cmp <len=r3,#32 | |
5482 ++cmp r3,#32 | |
5483 ++ | |
5484 ++# qhasm: goto mainloop if unsigned> | |
5485 ++bhi ._mainloop | |
5486 ++ | |
5487 ++# qhasm: end: | |
5488 ++._end: | |
5489 ++ | |
5490 ++# qhasm: mem128[input_0] = x01;input_0+=16 | |
5491 ++# asm 1: vst1.8 {<x01=reg128#9%bot-<x01=reg128#9%top},[<input_0=int32#1]! | |
5492 ++# asm 2: vst1.8 {<x01=d16-<x01=d17},[<input_0=r0]! | |
5493 ++vst1.8 {d16-d17},[r0]! | |
5494 ++ | |
5495 ++# qhasm: mem128[input_0] = x23;input_0+=16 | |
5496 ++# asm 1: vst1.8 {<x23=reg128#10%bot-<x23=reg128#10%top},[<input_0=int32#1]! | |
5497 ++# asm 2: vst1.8 {<x23=d18-<x23=d19},[<input_0=r0]! | |
5498 ++vst1.8 {d18-d19},[r0]! | |
5499 ++ | |
5500 ++# qhasm: mem64[input_0] = x4[0] | |
5501 ++# asm 1: vst1.8 <x4=reg128#11%bot,[<input_0=int32#1] | |
5502 ++# asm 2: vst1.8 <x4=d20,[<input_0=r0] | |
5503 ++vst1.8 d20,[r0] | |
5504 ++ | |
5505 ++# qhasm: len = len | |
5506 ++# asm 1: mov >len=int32#1,<len=int32#4 | |
5507 ++# asm 2: mov >len=r0,<len=r3 | |
5508 ++mov r0,r3 | |
5509 ++ | |
5510 ++# qhasm: qpopreturn len | |
5511 ++mov sp,r12 | |
5512 ++vpop {q4,q5,q6,q7} | |
5513 ++bx lr | |
5514 ++ | |
5515 ++# qhasm: int32 input_0 | |
5516 ++ | |
5517 ++# qhasm: int32 input_1 | |
5518 ++ | |
5519 ++# qhasm: int32 input_2 | |
5520 ++ | |
5521 ++# qhasm: int32 input_3 | |
5522 ++ | |
5523 ++# qhasm: stack32 input_4 | |
5524 ++ | |
5525 ++# qhasm: stack32 input_5 | |
5526 ++ | |
5527 ++# qhasm: stack32 input_6 | |
5528 ++ | |
5529 ++# qhasm: stack32 input_7 | |
5530 ++ | |
5531 ++# qhasm: int32 caller_r4 | |
5532 ++ | |
5533 ++# qhasm: int32 caller_r5 | |
5534 ++ | |
5535 ++# qhasm: int32 caller_r6 | |
5536 ++ | |
5537 ++# qhasm: int32 caller_r7 | |
5538 ++ | |
5539 ++# qhasm: int32 caller_r8 | |
5540 ++ | |
5541 ++# qhasm: int32 caller_r9 | |
5542 ++ | |
5543 ++# qhasm: int32 caller_r10 | |
5544 ++ | |
5545 ++# qhasm: int32 caller_r11 | |
5546 ++ | |
5547 ++# qhasm: int32 caller_r12 | |
5548 ++ | |
5549 ++# qhasm: int32 caller_r14 | |
5550 ++ | |
5551 ++# qhasm: reg128 caller_q4 | |
5552 ++ | |
5553 ++# qhasm: reg128 caller_q5 | |
5554 ++ | |
5555 ++# qhasm: reg128 caller_q6 | |
5556 ++ | |
5557 ++# qhasm: reg128 caller_q7 | |
5558 ++ | |
5559 ++# qhasm: reg128 r0 | |
5560 ++ | |
5561 ++# qhasm: reg128 r1 | |
5562 ++ | |
5563 ++# qhasm: reg128 r2 | |
5564 ++ | |
5565 ++# qhasm: reg128 r3 | |
5566 ++ | |
5567 ++# qhasm: reg128 r4 | |
5568 ++ | |
5569 ++# qhasm: reg128 x01 | |
5570 ++ | |
5571 ++# qhasm: reg128 x23 | |
5572 ++ | |
5573 ++# qhasm: reg128 x4 | |
5574 ++ | |
5575 ++# qhasm: reg128 y01 | |
5576 ++ | |
5577 ++# qhasm: reg128 y23 | |
5578 ++ | |
5579 ++# qhasm: reg128 y4 | |
5580 ++ | |
5581 ++# qhasm: reg128 _5y01 | |
5582 ++ | |
5583 ++# qhasm: reg128 _5y23 | |
5584 ++ | |
5585 ++# qhasm: reg128 _5y4 | |
5586 ++ | |
5587 ++# qhasm: reg128 c01 | |
5588 ++ | |
5589 ++# qhasm: reg128 c23 | |
5590 ++ | |
5591 ++# qhasm: reg128 c4 | |
5592 ++ | |
5593 ++# qhasm: reg128 t0 | |
5594 ++ | |
5595 ++# qhasm: reg128 t1 | |
5596 ++ | |
5597 ++# qhasm: reg128 t2 | |
5598 ++ | |
5599 ++# qhasm: reg128 t3 | |
5600 ++ | |
5601 ++# qhasm: reg128 t4 | |
5602 ++ | |
5603 ++# qhasm: reg128 mask | |
5604 ++ | |
5605 ++# qhasm: enter crypto_onetimeauth_poly1305_neon2_addmulmod | |
5606 ++.align 2 | |
5607 ++.global openssl_poly1305_neon2_addmulmod | |
5608 ++.type openssl_poly1305_neon2_addmulmod STT_FUNC | |
5609 ++openssl_poly1305_neon2_addmulmod: | |
5610 ++sub sp,sp,#0 | |
5611 ++ | |
5612 ++# qhasm: 2x mask = 0xffffffff | |
5613 ++# asm 1: vmov.i64 >mask=reg128#1,#0xffffffff | |
5614 ++# asm 2: vmov.i64 >mask=q0,#0xffffffff | |
5615 ++vmov.i64 q0,#0xffffffff | |
5616 ++ | |
5617 ++# qhasm: y01 aligned= mem128[input_2];input_2+=16 | |
5618 ++# asm 1: vld1.8 {>y01=reg128#2%bot->y01=reg128#2%top},[<input_2=int32#3,: 128]
! | |
5619 ++# asm 2: vld1.8 {>y01=d2->y01=d3},[<input_2=r2,: 128]! | |
5620 ++vld1.8 {d2-d3},[r2,: 128]! | |
5621 ++ | |
5622 ++# qhasm: 4x _5y01 = y01 << 2 | |
5623 ++# asm 1: vshl.i32 >_5y01=reg128#3,<y01=reg128#2,#2 | |
5624 ++# asm 2: vshl.i32 >_5y01=q2,<y01=q1,#2 | |
5625 ++vshl.i32 q2,q1,#2 | |
5626 ++ | |
5627 ++# qhasm: y23 aligned= mem128[input_2];input_2+=16 | |
5628 ++# asm 1: vld1.8 {>y23=reg128#4%bot->y23=reg128#4%top},[<input_2=int32#3,: 128]
! | |
5629 ++# asm 2: vld1.8 {>y23=d6->y23=d7},[<input_2=r2,: 128]! | |
5630 ++vld1.8 {d6-d7},[r2,: 128]! | |
5631 ++ | |
5632 ++# qhasm: 4x _5y23 = y23 << 2 | |
5633 ++# asm 1: vshl.i32 >_5y23=reg128#9,<y23=reg128#4,#2 | |
5634 ++# asm 2: vshl.i32 >_5y23=q8,<y23=q3,#2 | |
5635 ++vshl.i32 q8,q3,#2 | |
5636 ++ | |
5637 ++# qhasm: y4 aligned= mem64[input_2]y4[1] | |
5638 ++# asm 1: vld1.8 {<y4=reg128#10%bot},[<input_2=int32#3,: 64] | |
5639 ++# asm 2: vld1.8 {<y4=d18},[<input_2=r2,: 64] | |
5640 ++vld1.8 {d18},[r2,: 64] | |
5641 ++ | |
5642 ++# qhasm: 4x _5y4 = y4 << 2 | |
5643 ++# asm 1: vshl.i32 >_5y4=reg128#11,<y4=reg128#10,#2 | |
5644 ++# asm 2: vshl.i32 >_5y4=q10,<y4=q9,#2 | |
5645 ++vshl.i32 q10,q9,#2 | |
5646 ++ | |
5647 ++# qhasm: x01 aligned= mem128[input_1];input_1+=16 | |
5648 ++# asm 1: vld1.8 {>x01=reg128#12%bot->x01=reg128#12%top},[<input_1=int32#2,: 12
8]! | |
5649 ++# asm 2: vld1.8 {>x01=d22->x01=d23},[<input_1=r1,: 128]! | |
5650 ++vld1.8 {d22-d23},[r1,: 128]! | |
5651 ++ | |
5652 ++# qhasm: 4x _5y01 += y01 | |
5653 ++# asm 1: vadd.i32 >_5y01=reg128#3,<_5y01=reg128#3,<y01=reg128#2 | |
5654 ++# asm 2: vadd.i32 >_5y01=q2,<_5y01=q2,<y01=q1 | |
5655 ++vadd.i32 q2,q2,q1 | |
5656 ++ | |
5657 ++# qhasm: x23 aligned= mem128[input_1];input_1+=16 | |
5658 ++# asm 1: vld1.8 {>x23=reg128#13%bot->x23=reg128#13%top},[<input_1=int32#2,: 12
8]! | |
5659 ++# asm 2: vld1.8 {>x23=d24->x23=d25},[<input_1=r1,: 128]! | |
5660 ++vld1.8 {d24-d25},[r1,: 128]! | |
5661 ++ | |
5662 ++# qhasm: 4x _5y23 += y23 | |
5663 ++# asm 1: vadd.i32 >_5y23=reg128#9,<_5y23=reg128#9,<y23=reg128#4 | |
5664 ++# asm 2: vadd.i32 >_5y23=q8,<_5y23=q8,<y23=q3 | |
5665 ++vadd.i32 q8,q8,q3 | |
5666 ++ | |
5667 ++# qhasm: 4x _5y4 += y4 | |
5668 ++# asm 1: vadd.i32 >_5y4=reg128#11,<_5y4=reg128#11,<y4=reg128#10 | |
5669 ++# asm 2: vadd.i32 >_5y4=q10,<_5y4=q10,<y4=q9 | |
5670 ++vadd.i32 q10,q10,q9 | |
5671 ++ | |
5672 ++# qhasm: c01 aligned= mem128[input_3];input_3+=16 | |
5673 ++# asm 1: vld1.8 {>c01=reg128#14%bot->c01=reg128#14%top},[<input_3=int32#4,: 12
8]! | |
5674 ++# asm 2: vld1.8 {>c01=d26->c01=d27},[<input_3=r3,: 128]! | |
5675 ++vld1.8 {d26-d27},[r3,: 128]! | |
5676 ++ | |
5677 ++# qhasm: 4x x01 += c01 | |
5678 ++# asm 1: vadd.i32 >x01=reg128#12,<x01=reg128#12,<c01=reg128#14 | |
5679 ++# asm 2: vadd.i32 >x01=q11,<x01=q11,<c01=q13 | |
5680 ++vadd.i32 q11,q11,q13 | |
5681 ++ | |
5682 ++# qhasm: c23 aligned= mem128[input_3];input_3+=16 | |
5683 ++# asm 1: vld1.8 {>c23=reg128#14%bot->c23=reg128#14%top},[<input_3=int32#4,: 12
8]! | |
5684 ++# asm 2: vld1.8 {>c23=d26->c23=d27},[<input_3=r3,: 128]! | |
5685 ++vld1.8 {d26-d27},[r3,: 128]! | |
5686 ++ | |
5687 ++# qhasm: 4x x23 += c23 | |
5688 ++# asm 1: vadd.i32 >x23=reg128#13,<x23=reg128#13,<c23=reg128#14 | |
5689 ++# asm 2: vadd.i32 >x23=q12,<x23=q12,<c23=q13 | |
5690 ++vadd.i32 q12,q12,q13 | |
5691 ++ | |
5692 ++# qhasm: x4 aligned= mem64[input_1]x4[1] | |
5693 ++# asm 1: vld1.8 {<x4=reg128#14%bot},[<input_1=int32#2,: 64] | |
5694 ++# asm 2: vld1.8 {<x4=d26},[<input_1=r1,: 64] | |
5695 ++vld1.8 {d26},[r1,: 64] | |
5696 ++ | |
5697 ++# qhasm: 2x mask unsigned>>=6 | |
5698 ++# asm 1: vshr.u64 >mask=reg128#1,<mask=reg128#1,#6 | |
5699 ++# asm 2: vshr.u64 >mask=q0,<mask=q0,#6 | |
5700 ++vshr.u64 q0,q0,#6 | |
5701 ++ | |
5702 ++# qhasm: c4 aligned= mem64[input_3]c4[1] | |
5703 ++# asm 1: vld1.8 {<c4=reg128#15%bot},[<input_3=int32#4,: 64] | |
5704 ++# asm 2: vld1.8 {<c4=d28},[<input_3=r3,: 64] | |
5705 ++vld1.8 {d28},[r3,: 64] | |
5706 ++ | |
5707 ++# qhasm: 4x x4 += c4 | |
5708 ++# asm 1: vadd.i32 >x4=reg128#14,<x4=reg128#14,<c4=reg128#15 | |
5709 ++# asm 2: vadd.i32 >x4=q13,<x4=q13,<c4=q14 | |
5710 ++vadd.i32 q13,q13,q14 | |
5711 ++ | |
5712 ++# qhasm: r0[0,1] = x01[0] unsigned* y01[0]; r0[2,3] = x01[1] unsigned* y01
[1] | |
5713 ++# asm 1: vmull.u32 >r0=reg128#15,<x01=reg128#12%bot,<y01=reg128#2%bot | |
5714 ++# asm 2: vmull.u32 >r0=q14,<x01=d22,<y01=d2 | |
5715 ++vmull.u32 q14,d22,d2 | |
5716 ++ | |
5717 ++# qhasm: r0[0,1] += x01[2] unsigned* _5y4[0]; r0[2,3] += x01[3] unsigned* _5
y4[1] | |
5718 ++# asm 1: vmlal.u32 <r0=reg128#15,<x01=reg128#12%top,<_5y4=reg128#11%bot | |
5719 ++# asm 2: vmlal.u32 <r0=q14,<x01=d23,<_5y4=d20 | |
5720 ++vmlal.u32 q14,d23,d20 | |
5721 ++ | |
5722 ++# qhasm: r0[0,1] += x23[0] unsigned* _5y23[2]; r0[2,3] += x23[1] unsigned* _5y
23[3] | |
5723 ++# asm 1: vmlal.u32 <r0=reg128#15,<x23=reg128#13%bot,<_5y23=reg128#9%top | |
5724 ++# asm 2: vmlal.u32 <r0=q14,<x23=d24,<_5y23=d17 | |
5725 ++vmlal.u32 q14,d24,d17 | |
5726 ++ | |
5727 ++# qhasm: r0[0,1] += x23[2] unsigned* _5y23[0]; r0[2,3] += x23[3] unsigned* _5y
23[1] | |
5728 ++# asm 1: vmlal.u32 <r0=reg128#15,<x23=reg128#13%top,<_5y23=reg128#9%bot | |
5729 ++# asm 2: vmlal.u32 <r0=q14,<x23=d25,<_5y23=d16 | |
5730 ++vmlal.u32 q14,d25,d16 | |
5731 ++ | |
5732 ++# qhasm: r0[0,1] += x4[0] unsigned* _5y01[2]; r0[2,3] += x4[1] unsigned* _5y
01[3] | |
5733 ++# asm 1: vmlal.u32 <r0=reg128#15,<x4=reg128#14%bot,<_5y01=reg128#3%top | |
5734 ++# asm 2: vmlal.u32 <r0=q14,<x4=d26,<_5y01=d5 | |
5735 ++vmlal.u32 q14,d26,d5 | |
5736 ++ | |
5737 ++# qhasm: r1[0,1] = x01[0] unsigned* y01[2]; r1[2,3] = x01[1] unsigned* y01
[3] | |
5738 ++# asm 1: vmull.u32 >r1=reg128#3,<x01=reg128#12%bot,<y01=reg128#2%top | |
5739 ++# asm 2: vmull.u32 >r1=q2,<x01=d22,<y01=d3 | |
5740 ++vmull.u32 q2,d22,d3 | |
5741 ++ | |
5742 ++# qhasm: r1[0,1] += x01[2] unsigned* y01[0]; r1[2,3] += x01[3] unsigned* y01
[1] | |
5743 ++# asm 1: vmlal.u32 <r1=reg128#3,<x01=reg128#12%top,<y01=reg128#2%bot | |
5744 ++# asm 2: vmlal.u32 <r1=q2,<x01=d23,<y01=d2 | |
5745 ++vmlal.u32 q2,d23,d2 | |
5746 ++ | |
5747 ++# qhasm: r1[0,1] += x23[0] unsigned* _5y4[0]; r1[2,3] += x23[1] unsigned* _5
y4[1] | |
5748 ++# asm 1: vmlal.u32 <r1=reg128#3,<x23=reg128#13%bot,<_5y4=reg128#11%bot | |
5749 ++# asm 2: vmlal.u32 <r1=q2,<x23=d24,<_5y4=d20 | |
5750 ++vmlal.u32 q2,d24,d20 | |
5751 ++ | |
5752 ++# qhasm: r1[0,1] += x23[2] unsigned* _5y23[2]; r1[2,3] += x23[3] unsigned* _5y
23[3] | |
5753 ++# asm 1: vmlal.u32 <r1=reg128#3,<x23=reg128#13%top,<_5y23=reg128#9%top | |
5754 ++# asm 2: vmlal.u32 <r1=q2,<x23=d25,<_5y23=d17 | |
5755 ++vmlal.u32 q2,d25,d17 | |
5756 ++ | |
5757 ++# qhasm: r1[0,1] += x4[0] unsigned* _5y23[0]; r1[2,3] += x4[1] unsigned* _5y
23[1] | |
5758 ++# asm 1: vmlal.u32 <r1=reg128#3,<x4=reg128#14%bot,<_5y23=reg128#9%bot | |
5759 ++# asm 2: vmlal.u32 <r1=q2,<x4=d26,<_5y23=d16 | |
5760 ++vmlal.u32 q2,d26,d16 | |
5761 ++ | |
5762 ++# qhasm: r2[0,1] = x01[0] unsigned* y23[0]; r2[2,3] = x01[1] unsigned* y23
[1] | |
5763 ++# asm 1: vmull.u32 >r2=reg128#16,<x01=reg128#12%bot,<y23=reg128#4%bot | |
5764 ++# asm 2: vmull.u32 >r2=q15,<x01=d22,<y23=d6 | |
5765 ++vmull.u32 q15,d22,d6 | |
5766 ++ | |
5767 ++# qhasm: r2[0,1] += x01[2] unsigned* y01[2]; r2[2,3] += x01[3] unsigned* y01
[3] | |
5768 ++# asm 1: vmlal.u32 <r2=reg128#16,<x01=reg128#12%top,<y01=reg128#2%top | |
5769 ++# asm 2: vmlal.u32 <r2=q15,<x01=d23,<y01=d3 | |
5770 ++vmlal.u32 q15,d23,d3 | |
5771 ++ | |
5772 ++# qhasm: r2[0,1] += x23[0] unsigned* y01[0]; r2[2,3] += x23[1] unsigned* y01
[1] | |
5773 ++# asm 1: vmlal.u32 <r2=reg128#16,<x23=reg128#13%bot,<y01=reg128#2%bot | |
5774 ++# asm 2: vmlal.u32 <r2=q15,<x23=d24,<y01=d2 | |
5775 ++vmlal.u32 q15,d24,d2 | |
5776 ++ | |
5777 ++# qhasm: r2[0,1] += x23[2] unsigned* _5y4[0]; r2[2,3] += x23[3] unsigned* _5
y4[1] | |
5778 ++# asm 1: vmlal.u32 <r2=reg128#16,<x23=reg128#13%top,<_5y4=reg128#11%bot | |
5779 ++# asm 2: vmlal.u32 <r2=q15,<x23=d25,<_5y4=d20 | |
5780 ++vmlal.u32 q15,d25,d20 | |
5781 ++ | |
5782 ++# qhasm: r2[0,1] += x4[0] unsigned* _5y23[2]; r2[2,3] += x4[1] unsigned* _5y
23[3] | |
5783 ++# asm 1: vmlal.u32 <r2=reg128#16,<x4=reg128#14%bot,<_5y23=reg128#9%top | |
5784 ++# asm 2: vmlal.u32 <r2=q15,<x4=d26,<_5y23=d17 | |
5785 ++vmlal.u32 q15,d26,d17 | |
5786 ++ | |
5787 ++# qhasm: r3[0,1] = x01[0] unsigned* y23[2]; r3[2,3] = x01[1] unsigned* y23
[3] | |
5788 ++# asm 1: vmull.u32 >r3=reg128#9,<x01=reg128#12%bot,<y23=reg128#4%top | |
5789 ++# asm 2: vmull.u32 >r3=q8,<x01=d22,<y23=d7 | |
5790 ++vmull.u32 q8,d22,d7 | |
5791 ++ | |
5792 ++# qhasm: r3[0,1] += x01[2] unsigned* y23[0]; r3[2,3] += x01[3] unsigned* y23
[1] | |
5793 ++# asm 1: vmlal.u32 <r3=reg128#9,<x01=reg128#12%top,<y23=reg128#4%bot | |
5794 ++# asm 2: vmlal.u32 <r3=q8,<x01=d23,<y23=d6 | |
5795 ++vmlal.u32 q8,d23,d6 | |
5796 ++ | |
5797 ++# qhasm: r3[0,1] += x23[0] unsigned* y01[2]; r3[2,3] += x23[1] unsigned* y01
[3] | |
5798 ++# asm 1: vmlal.u32 <r3=reg128#9,<x23=reg128#13%bot,<y01=reg128#2%top | |
5799 ++# asm 2: vmlal.u32 <r3=q8,<x23=d24,<y01=d3 | |
5800 ++vmlal.u32 q8,d24,d3 | |
5801 ++ | |
5802 ++# qhasm: r3[0,1] += x23[2] unsigned* y01[0]; r3[2,3] += x23[3] unsigned* y01
[1] | |
5803 ++# asm 1: vmlal.u32 <r3=reg128#9,<x23=reg128#13%top,<y01=reg128#2%bot | |
5804 ++# asm 2: vmlal.u32 <r3=q8,<x23=d25,<y01=d2 | |
5805 ++vmlal.u32 q8,d25,d2 | |
5806 ++ | |
5807 ++# qhasm: r3[0,1] += x4[0] unsigned* _5y4[0]; r3[2,3] += x4[1] unsigned* _5
y4[1] | |
5808 ++# asm 1: vmlal.u32 <r3=reg128#9,<x4=reg128#14%bot,<_5y4=reg128#11%bot | |
5809 ++# asm 2: vmlal.u32 <r3=q8,<x4=d26,<_5y4=d20 | |
5810 ++vmlal.u32 q8,d26,d20 | |
5811 ++ | |
5812 ++# qhasm: r4[0,1] = x01[0] unsigned* y4[0]; r4[2,3] = x01[1] unsigned* y4[
1] | |
5813 ++# asm 1: vmull.u32 >r4=reg128#10,<x01=reg128#12%bot,<y4=reg128#10%bot | |
5814 ++# asm 2: vmull.u32 >r4=q9,<x01=d22,<y4=d18 | |
5815 ++vmull.u32 q9,d22,d18 | |
5816 ++ | |
5817 ++# qhasm: r4[0,1] += x01[2] unsigned* y23[2]; r4[2,3] += x01[3] unsigned* y23[
3] | |
5818 ++# asm 1: vmlal.u32 <r4=reg128#10,<x01=reg128#12%top,<y23=reg128#4%top | |
5819 ++# asm 2: vmlal.u32 <r4=q9,<x01=d23,<y23=d7 | |
5820 ++vmlal.u32 q9,d23,d7 | |
5821 ++ | |
5822 ++# qhasm: r4[0,1] += x23[0] unsigned* y23[0]; r4[2,3] += x23[1] unsigned* y23[
1] | |
5823 ++# asm 1: vmlal.u32 <r4=reg128#10,<x23=reg128#13%bot,<y23=reg128#4%bot | |
5824 ++# asm 2: vmlal.u32 <r4=q9,<x23=d24,<y23=d6 | |
5825 ++vmlal.u32 q9,d24,d6 | |
5826 ++ | |
5827 ++# qhasm: r4[0,1] += x23[2] unsigned* y01[2]; r4[2,3] += x23[3] unsigned* y01[
3] | |
5828 ++# asm 1: vmlal.u32 <r4=reg128#10,<x23=reg128#13%top,<y01=reg128#2%top | |
5829 ++# asm 2: vmlal.u32 <r4=q9,<x23=d25,<y01=d3 | |
5830 ++vmlal.u32 q9,d25,d3 | |
5831 ++ | |
5832 ++# qhasm: r4[0,1] += x4[0] unsigned* y01[0]; r4[2,3] += x4[1] unsigned* y01[
1] | |
5833 ++# asm 1: vmlal.u32 <r4=reg128#10,<x4=reg128#14%bot,<y01=reg128#2%bot | |
5834 ++# asm 2: vmlal.u32 <r4=q9,<x4=d26,<y01=d2 | |
5835 ++vmlal.u32 q9,d26,d2 | |
5836 ++ | |
5837 ++# qhasm: 2x t1 = r0 unsigned>> 26 | |
5838 ++# asm 1: vshr.u64 >t1=reg128#2,<r0=reg128#15,#26 | |
5839 ++# asm 2: vshr.u64 >t1=q1,<r0=q14,#26 | |
5840 ++vshr.u64 q1,q14,#26 | |
5841 ++ | |
5842 ++# qhasm: r0 &= mask | |
5843 ++# asm 1: vand >r0=reg128#4,<r0=reg128#15,<mask=reg128#1 | |
5844 ++# asm 2: vand >r0=q3,<r0=q14,<mask=q0 | |
5845 ++vand q3,q14,q0 | |
5846 ++ | |
5847 ++# qhasm: 2x r1 += t1 | |
5848 ++# asm 1: vadd.i64 >r1=reg128#2,<r1=reg128#3,<t1=reg128#2 | |
5849 ++# asm 2: vadd.i64 >r1=q1,<r1=q2,<t1=q1 | |
5850 ++vadd.i64 q1,q2,q1 | |
5851 ++ | |
5852 ++# qhasm: 2x t4 = r3 unsigned>> 26 | |
5853 ++# asm 1: vshr.u64 >t4=reg128#3,<r3=reg128#9,#26 | |
5854 ++# asm 2: vshr.u64 >t4=q2,<r3=q8,#26 | |
5855 ++vshr.u64 q2,q8,#26 | |
5856 ++ | |
5857 ++# qhasm: r3 &= mask | |
5858 ++# asm 1: vand >r3=reg128#9,<r3=reg128#9,<mask=reg128#1 | |
5859 ++# asm 2: vand >r3=q8,<r3=q8,<mask=q0 | |
5860 ++vand q8,q8,q0 | |
5861 ++ | |
5862 ++# qhasm: 2x r4 += t4 | |
5863 ++# asm 1: vadd.i64 >r4=reg128#3,<r4=reg128#10,<t4=reg128#3 | |
5864 ++# asm 2: vadd.i64 >r4=q2,<r4=q9,<t4=q2 | |
5865 ++vadd.i64 q2,q9,q2 | |
5866 ++ | |
5867 ++# qhasm: 2x t2 = r1 unsigned>> 26 | |
5868 ++# asm 1: vshr.u64 >t2=reg128#10,<r1=reg128#2,#26 | |
5869 ++# asm 2: vshr.u64 >t2=q9,<r1=q1,#26 | |
5870 ++vshr.u64 q9,q1,#26 | |
5871 ++ | |
5872 ++# qhasm: r1 &= mask | |
5873 ++# asm 1: vand >r1=reg128#2,<r1=reg128#2,<mask=reg128#1 | |
5874 ++# asm 2: vand >r1=q1,<r1=q1,<mask=q0 | |
5875 ++vand q1,q1,q0 | |
5876 ++ | |
5877 ++# qhasm: 2x t0 = r4 unsigned>> 26 | |
5878 ++# asm 1: vshr.u64 >t0=reg128#11,<r4=reg128#3,#26 | |
5879 ++# asm 2: vshr.u64 >t0=q10,<r4=q2,#26 | |
5880 ++vshr.u64 q10,q2,#26 | |
5881 ++ | |
5882 ++# qhasm: 2x r2 += t2 | |
5883 ++# asm 1: vadd.i64 >r2=reg128#10,<r2=reg128#16,<t2=reg128#10 | |
5884 ++# asm 2: vadd.i64 >r2=q9,<r2=q15,<t2=q9 | |
5885 ++vadd.i64 q9,q15,q9 | |
5886 ++ | |
5887 ++# qhasm: r4 &= mask | |
5888 ++# asm 1: vand >r4=reg128#3,<r4=reg128#3,<mask=reg128#1 | |
5889 ++# asm 2: vand >r4=q2,<r4=q2,<mask=q0 | |
5890 ++vand q2,q2,q0 | |
5891 ++ | |
5892 ++# qhasm: 2x r0 += t0 | |
5893 ++# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#11 | |
5894 ++# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q10 | |
5895 ++vadd.i64 q3,q3,q10 | |
5896 ++ | |
5897 ++# qhasm: 2x t0 <<= 2 | |
5898 ++# asm 1: vshl.i64 >t0=reg128#11,<t0=reg128#11,#2 | |
5899 ++# asm 2: vshl.i64 >t0=q10,<t0=q10,#2 | |
5900 ++vshl.i64 q10,q10,#2 | |
5901 ++ | |
5902 ++# qhasm: 2x t3 = r2 unsigned>> 26 | |
5903 ++# asm 1: vshr.u64 >t3=reg128#12,<r2=reg128#10,#26 | |
5904 ++# asm 2: vshr.u64 >t3=q11,<r2=q9,#26 | |
5905 ++vshr.u64 q11,q9,#26 | |
5906 ++ | |
5907 ++# qhasm: 2x r0 += t0 | |
5908 ++# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#11 | |
5909 ++# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q10 | |
5910 ++vadd.i64 q3,q3,q10 | |
5911 ++ | |
5912 ++# qhasm: x23 = r2 & mask | |
5913 ++# asm 1: vand >x23=reg128#10,<r2=reg128#10,<mask=reg128#1 | |
5914 ++# asm 2: vand >x23=q9,<r2=q9,<mask=q0 | |
5915 ++vand q9,q9,q0 | |
5916 ++ | |
5917 ++# qhasm: 2x r3 += t3 | |
5918 ++# asm 1: vadd.i64 >r3=reg128#9,<r3=reg128#9,<t3=reg128#12 | |
5919 ++# asm 2: vadd.i64 >r3=q8,<r3=q8,<t3=q11 | |
5920 ++vadd.i64 q8,q8,q11 | |
5921 ++ | |
5922 ++# qhasm: 2x t1 = r0 unsigned>> 26 | |
5923 ++# asm 1: vshr.u64 >t1=reg128#11,<r0=reg128#4,#26 | |
5924 ++# asm 2: vshr.u64 >t1=q10,<r0=q3,#26 | |
5925 ++vshr.u64 q10,q3,#26 | |
5926 ++ | |
5927 ++# qhasm: x23 = x23[0,2,1,3] | |
5928 ++# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top | |
5929 ++# asm 2: vtrn.32 <x23=d18,<x23=d19 | |
5930 ++vtrn.32 d18,d19 | |
5931 ++ | |
5932 ++# qhasm: x01 = r0 & mask | |
5933 ++# asm 1: vand >x01=reg128#4,<r0=reg128#4,<mask=reg128#1 | |
5934 ++# asm 2: vand >x01=q3,<r0=q3,<mask=q0 | |
5935 ++vand q3,q3,q0 | |
5936 ++ | |
5937 ++# qhasm: 2x r1 += t1 | |
5938 ++# asm 1: vadd.i64 >r1=reg128#2,<r1=reg128#2,<t1=reg128#11 | |
5939 ++# asm 2: vadd.i64 >r1=q1,<r1=q1,<t1=q10 | |
5940 ++vadd.i64 q1,q1,q10 | |
5941 ++ | |
5942 ++# qhasm: 2x t4 = r3 unsigned>> 26 | |
5943 ++# asm 1: vshr.u64 >t4=reg128#11,<r3=reg128#9,#26 | |
5944 ++# asm 2: vshr.u64 >t4=q10,<r3=q8,#26 | |
5945 ++vshr.u64 q10,q8,#26 | |
5946 ++ | |
5947 ++# qhasm: x01 = x01[0,2,1,3] | |
5948 ++# asm 1: vtrn.32 <x01=reg128#4%bot,<x01=reg128#4%top | |
5949 ++# asm 2: vtrn.32 <x01=d6,<x01=d7 | |
5950 ++vtrn.32 d6,d7 | |
5951 ++ | |
5952 ++# qhasm: r3 &= mask | |
5953 ++# asm 1: vand >r3=reg128#1,<r3=reg128#9,<mask=reg128#1 | |
5954 ++# asm 2: vand >r3=q0,<r3=q8,<mask=q0 | |
5955 ++vand q0,q8,q0 | |
5956 ++ | |
5957 ++# qhasm: r1 = r1[0,2,1,3] | |
5958 ++# asm 1: vtrn.32 <r1=reg128#2%bot,<r1=reg128#2%top | |
5959 ++# asm 2: vtrn.32 <r1=d2,<r1=d3 | |
5960 ++vtrn.32 d2,d3 | |
5961 ++ | |
5962 ++# qhasm: 2x x4 = r4 + t4 | |
5963 ++# asm 1: vadd.i64 >x4=reg128#3,<r4=reg128#3,<t4=reg128#11 | |
5964 ++# asm 2: vadd.i64 >x4=q2,<r4=q2,<t4=q10 | |
5965 ++vadd.i64 q2,q2,q10 | |
5966 ++ | |
5967 ++# qhasm: r3 = r3[0,2,1,3] | |
5968 ++# asm 1: vtrn.32 <r3=reg128#1%bot,<r3=reg128#1%top | |
5969 ++# asm 2: vtrn.32 <r3=d0,<r3=d1 | |
5970 ++vtrn.32 d0,d1 | |
5971 ++ | |
5972 ++# qhasm: x01 = x01[0,1] r1[0,1] | |
5973 ++# asm 1: vext.32 <x01=reg128#4%top,<r1=reg128#2%bot,<r1=reg128#2%bot,#0 | |
5974 ++# asm 2: vext.32 <x01=d7,<r1=d2,<r1=d2,#0 | |
5975 ++vext.32 d7,d2,d2,#0 | |
5976 ++ | |
5977 ++# qhasm: x23 = x23[0,1] r3[0,1] | |
5978 ++# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#1%bot,<r3=reg128#1%bot,#0 | |
5979 ++# asm 2: vext.32 <x23=d19,<r3=d0,<r3=d0,#0 | |
5980 ++vext.32 d19,d0,d0,#0 | |
5981 ++ | |
5982 ++# qhasm: x4 = x4[0,2,1,3] | |
5983 ++# asm 1: vtrn.32 <x4=reg128#3%bot,<x4=reg128#3%top | |
5984 ++# asm 2: vtrn.32 <x4=d4,<x4=d5 | |
5985 ++vtrn.32 d4,d5 | |
5986 ++ | |
5987 ++# qhasm: mem128[input_0] aligned= x01;input_0+=16 | |
5988 ++# asm 1: vst1.8 {<x01=reg128#4%bot-<x01=reg128#4%top},[<input_0=int32#1,: 128]
! | |
5989 ++# asm 2: vst1.8 {<x01=d6-<x01=d7},[<input_0=r0,: 128]! | |
5990 ++vst1.8 {d6-d7},[r0,: 128]! | |
5991 ++ | |
5992 ++# qhasm: mem128[input_0] aligned= x23;input_0+=16 | |
5993 ++# asm 1: vst1.8 {<x23=reg128#10%bot-<x23=reg128#10%top},[<input_0=int32#1,: 12
8]! | |
5994 ++# asm 2: vst1.8 {<x23=d18-<x23=d19},[<input_0=r0,: 128]! | |
5995 ++vst1.8 {d18-d19},[r0,: 128]! | |
5996 ++ | |
5997 ++# qhasm: mem64[input_0] aligned= x4[0] | |
5998 ++# asm 1: vst1.8 <x4=reg128#3%bot,[<input_0=int32#1,: 64] | |
5999 ++# asm 2: vst1.8 <x4=d4,[<input_0=r0,: 64] | |
6000 ++vst1.8 d4,[r0,: 64] | |
6001 ++ | |
6002 ++# qhasm: return | |
6003 ++add sp,sp,#0 | |
6004 ++bx lr | |
6005 +diff --git a/crypto/poly1305/poly1305_vec.c b/crypto/poly1305/poly1305_vec.c | |
6006 +new file mode 100644 | |
6007 +index 0000000..c546200 | |
6008 +--- /dev/null | |
6009 ++++ b/crypto/poly1305/poly1305_vec.c | |
6010 +@@ -0,0 +1,733 @@ | |
6011 ++/* ==================================================================== | |
6012 ++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
6013 ++ * | |
6014 ++ * Redistribution and use in source and binary forms, with or without | |
6015 ++ * modification, are permitted provided that the following conditions | |
6016 ++ * are met: | |
6017 ++ * | |
6018 ++ * 1. Redistributions of source code must retain the above copyright | |
6019 ++ * notice, this list of conditions and the following disclaimer. | |
6020 ++ * | |
6021 ++ * 2. Redistributions in binary form must reproduce the above copyright | |
6022 ++ * notice, this list of conditions and the following disclaimer in | |
6023 ++ * the documentation and/or other materials provided with the | |
6024 ++ * distribution. | |
6025 ++ * | |
6026 ++ * 3. All advertising materials mentioning features or use of this | |
6027 ++ * software must display the following acknowledgment: | |
6028 ++ * "This product includes software developed by the OpenSSL Project | |
6029 ++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
6030 ++ * | |
6031 ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
6032 ++ * endorse or promote products derived from this software without | |
6033 ++ * prior written permission. For written permission, please contact | |
6034 ++ * licensing@OpenSSL.org. | |
6035 ++ * | |
6036 ++ * 5. Products derived from this software may not be called "OpenSSL" | |
6037 ++ * nor may "OpenSSL" appear in their names without prior written | |
6038 ++ * permission of the OpenSSL Project. | |
6039 ++ * | |
6040 ++ * 6. Redistributions of any form whatsoever must retain the following | |
6041 ++ * acknowledgment: | |
6042 ++ * "This product includes software developed by the OpenSSL Project | |
6043 ++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
6044 ++ * | |
6045 ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
6046 ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
6047 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
6048 ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
6049 ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
6050 ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
6051 ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
6052 ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
6053 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
6054 ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
6055 ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
6056 ++ * OF THE POSSIBILITY OF SUCH DAMAGE. | |
6057 ++ * ==================================================================== | |
6058 ++ */ | |
6059 ++ | |
6060 ++/* This implementation of poly1305 is by Andrew Moon | |
6061 ++ * (https://github.com/floodyberry/poly1305-donna) and released as public | |
6062 ++ * domain. It implements SIMD vectorization based on the algorithm described i
n | |
6063 ++ * http://cr.yp.to/papers.html#neoncrypto. Unrolled to 2 powers, i.e. 64 byte | |
6064 ++ * block size | |
6065 ++*/ | |
6066 ++ | |
6067 ++#include <emmintrin.h> | |
6068 ++#include <stdint.h> | |
6069 ++#include <openssl/opensslconf.h> | |
6070 ++ | |
6071 ++#if !defined(OPENSSL_NO_POLY1305) | |
6072 ++ | |
6073 ++#include <openssl/poly1305.h> | |
6074 ++ | |
6075 ++#define ALIGN(x) __attribute__((aligned(x))) | |
6076 ++#define INLINE inline | |
6077 ++#define U8TO64_LE(m) (*(uint64_t*)(m)) | |
6078 ++#define U8TO32_LE(m) (*(uint32_t*)(m)) | |
6079 ++#define U64TO8_LE(m,v) (*(uint64_t*)(m)) = v | |
6080 ++ | |
6081 ++typedef __m128i xmmi; | |
6082 ++typedef unsigned __int128 uint128_t; | |
6083 ++ | |
6084 ++static const uint32_t ALIGN(16) poly1305_x64_sse2_message_mask[4] = | |
6085 ++ {(1 << 26) - 1, 0, (1 << 26) - 1, 0}; | |
6086 ++static const uint32_t ALIGN(16) poly1305_x64_sse2_5[4] = {5, 0, 5, 0}; | |
6087 ++static const uint32_t ALIGN(16) poly1305_x64_sse2_1shl128[4] = | |
6088 ++ {(1 << 24), 0, (1 << 24), 0}; | |
6089 ++ | |
6090 ++static uint128_t INLINE | |
6091 ++add128(uint128_t a, uint128_t b) | |
6092 ++ { | |
6093 ++ return a + b; | |
6094 ++ } | |
6095 ++ | |
6096 ++static uint128_t INLINE | |
6097 ++add128_64(uint128_t a, uint64_t b) | |
6098 ++ { | |
6099 ++ return a + b; | |
6100 ++ } | |
6101 ++ | |
6102 ++static uint128_t INLINE | |
6103 ++mul64x64_128(uint64_t a, uint64_t b) | |
6104 ++ { | |
6105 ++ return (uint128_t)a * b; | |
6106 ++ } | |
6107 ++ | |
6108 ++static uint64_t INLINE | |
6109 ++lo128(uint128_t a) | |
6110 ++ { | |
6111 ++ return (uint64_t)a; | |
6112 ++ } | |
6113 ++ | |
6114 ++static uint64_t INLINE | |
6115 ++shr128(uint128_t v, const int shift) | |
6116 ++ { | |
6117 ++ return (uint64_t)(v >> shift); | |
6118 ++ } | |
6119 ++ | |
6120 ++static uint64_t INLINE | |
6121 ++shr128_pair(uint64_t hi, uint64_t lo, const int shift) | |
6122 ++ { | |
6123 ++ return (uint64_t)((((uint128_t)hi << 64) | lo) >> shift); | |
6124 ++ } | |
6125 ++ | |
6126 ++typedef struct poly1305_power_t | |
6127 ++ { | |
6128 ++ union | |
6129 ++ { | |
6130 ++ xmmi v; | |
6131 ++ uint64_t u[2]; | |
6132 ++ uint32_t d[4]; | |
6133 ++ } R20,R21,R22,R23,R24,S21,S22,S23,S24; | |
6134 ++ } poly1305_power; | |
6135 ++ | |
6136 ++typedef struct poly1305_state_internal_t | |
6137 ++ { | |
6138 ++ poly1305_power P[2]; /* 288 bytes, top 32 bit halves unused = 144 | |
6139 ++ bytes of free storage */ | |
6140 ++ union | |
6141 ++ { | |
6142 ++ xmmi H[5]; /* 80 bytes */ | |
6143 ++ uint64_t HH[10]; | |
6144 ++ }; | |
6145 ++ /* uint64_t r0,r1,r2; [24 bytes] */ | |
6146 ++ /* uint64_t pad0,pad1; [16 bytes] */ | |
6147 ++ uint64_t started; /* 8 bytes */ | |
6148 ++ uint64_t leftover; /* 8 bytes */ | |
6149 ++ uint8_t buffer[64]; /* 64 bytes */ | |
6150 ++ } poly1305_state_internal; /* 448 bytes total + 63 bytes for | |
6151 ++ alignment = 511 bytes raw */ | |
6152 ++ | |
6153 ++static poly1305_state_internal INLINE | |
6154 ++*poly1305_aligned_state(poly1305_state *state) | |
6155 ++ { | |
6156 ++ return (poly1305_state_internal *)(((uint64_t)state + 63) & ~63); | |
6157 ++ } | |
6158 ++ | |
6159 ++/* copy 0-63 bytes */ | |
6160 ++static void INLINE | |
6161 ++poly1305_block_copy(uint8_t *dst, const uint8_t *src, size_t bytes) | |
6162 ++ { | |
6163 ++ size_t offset = src - dst; | |
6164 ++ if (bytes & 32) | |
6165 ++ { | |
6166 ++ _mm_storeu_si128((xmmi *)(dst + 0), _mm_loadu_si128((xmmi *)(dst
+ offset + 0))); | |
6167 ++ _mm_storeu_si128((xmmi *)(dst + 16), _mm_loadu_si128((xmmi *)(ds
t + offset + 16))); | |
6168 ++ dst += 32; | |
6169 ++ } | |
6170 ++ if (bytes & 16) | |
6171 ++ { | |
6172 ++ _mm_storeu_si128((xmmi *)dst, | |
6173 ++ _mm_loadu_si128((xmmi *)(dst + offset))); | |
6174 ++ dst += 16; | |
6175 ++ } | |
6176 ++ if (bytes & 8) | |
6177 ++ { | |
6178 ++ *(uint64_t *)dst = *(uint64_t *)(dst + offset); | |
6179 ++ dst += 8; | |
6180 ++ } | |
6181 ++ if (bytes & 4) | |
6182 ++ { | |
6183 ++ *(uint32_t *)dst = *(uint32_t *)(dst + offset); | |
6184 ++ dst += 4; | |
6185 ++ } | |
6186 ++ if (bytes & 2) | |
6187 ++ { | |
6188 ++ *(uint16_t *)dst = *(uint16_t *)(dst + offset); | |
6189 ++ dst += 2; | |
6190 ++ } | |
6191 ++ if (bytes & 1) | |
6192 ++ { | |
6193 ++ *( uint8_t *)dst = *( uint8_t *)(dst + offset); | |
6194 ++ } | |
6195 ++ } | |
6196 ++ | |
6197 ++/* zero 0-15 bytes */ | |
6198 ++static void INLINE | |
6199 ++poly1305_block_zero(uint8_t *dst, size_t bytes) | |
6200 ++ { | |
6201 ++ if (bytes & 8) { *(uint64_t *)dst = 0; dst += 8; } | |
6202 ++ if (bytes & 4) { *(uint32_t *)dst = 0; dst += 4; } | |
6203 ++ if (bytes & 2) { *(uint16_t *)dst = 0; dst += 2; } | |
6204 ++ if (bytes & 1) { *( uint8_t *)dst = 0; } | |
6205 ++ } | |
6206 ++ | |
6207 ++static size_t INLINE | |
6208 ++poly1305_min(size_t a, size_t b) | |
6209 ++ { | |
6210 ++ return (a < b) ? a : b; | |
6211 ++ } | |
6212 ++ | |
6213 ++void | |
6214 ++CRYPTO_poly1305_init(poly1305_state *state, const unsigned char key[32]) | |
6215 ++ { | |
6216 ++ poly1305_state_internal *st = poly1305_aligned_state(state); | |
6217 ++ poly1305_power *p; | |
6218 ++ uint64_t r0,r1,r2; | |
6219 ++ uint64_t t0,t1; | |
6220 ++ | |
6221 ++ /* clamp key */ | |
6222 ++ t0 = U8TO64_LE(key + 0); | |
6223 ++ t1 = U8TO64_LE(key + 8); | |
6224 ++ r0 = t0 & 0xffc0fffffff; t0 >>= 44; t0 |= t1 << 20; | |
6225 ++ r1 = t0 & 0xfffffc0ffff; t1 >>= 24; | |
6226 ++ r2 = t1 & 0x00ffffffc0f; | |
6227 ++ | |
6228 ++ /* store r in un-used space of st->P[1] */ | |
6229 ++ p = &st->P[1]; | |
6230 ++ p->R20.d[1] = (uint32_t)(r0 ); | |
6231 ++ p->R20.d[3] = (uint32_t)(r0 >> 32); | |
6232 ++ p->R21.d[1] = (uint32_t)(r1 ); | |
6233 ++ p->R21.d[3] = (uint32_t)(r1 >> 32); | |
6234 ++ p->R22.d[1] = (uint32_t)(r2 ); | |
6235 ++ p->R22.d[3] = (uint32_t)(r2 >> 32); | |
6236 ++ | |
6237 ++ /* store pad */ | |
6238 ++ p->R23.d[1] = U8TO32_LE(key + 16); | |
6239 ++ p->R23.d[3] = U8TO32_LE(key + 20); | |
6240 ++ p->R24.d[1] = U8TO32_LE(key + 24); | |
6241 ++ p->R24.d[3] = U8TO32_LE(key + 28); | |
6242 ++ | |
6243 ++ /* H = 0 */ | |
6244 ++ st->H[0] = _mm_setzero_si128(); | |
6245 ++ st->H[1] = _mm_setzero_si128(); | |
6246 ++ st->H[2] = _mm_setzero_si128(); | |
6247 ++ st->H[3] = _mm_setzero_si128(); | |
6248 ++ st->H[4] = _mm_setzero_si128(); | |
6249 ++ | |
6250 ++ st->started = 0; | |
6251 ++ st->leftover = 0; | |
6252 ++ } | |
6253 ++ | |
6254 ++static void | |
6255 ++poly1305_first_block(poly1305_state_internal *st, const uint8_t *m) | |
6256 ++ { | |
6257 ++ const xmmi MMASK = | |
6258 ++ _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask); | |
6259 ++ const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5); | |
6260 ++ const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128); | |
6261 ++ xmmi T5,T6; | |
6262 ++ poly1305_power *p; | |
6263 ++ uint128_t d[3]; | |
6264 ++ uint64_t r0,r1,r2; | |
6265 ++ uint64_t r20,r21,r22,s22; | |
6266 ++ uint64_t pad0,pad1; | |
6267 ++ uint64_t c; | |
6268 ++ uint64_t i; | |
6269 ++ | |
6270 ++ /* pull out stored info */ | |
6271 ++ p = &st->P[1]; | |
6272 ++ | |
6273 ++ r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; | |
6274 ++ r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; | |
6275 ++ r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; | |
6276 ++ pad0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1]; | |
6277 ++ pad1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1]; | |
6278 ++ | |
6279 ++ /* compute powers r^2,r^4 */ | |
6280 ++ r20 = r0; | |
6281 ++ r21 = r1; | |
6282 ++ r22 = r2; | |
6283 ++ for (i = 0; i < 2; i++) | |
6284 ++ { | |
6285 ++ s22 = r22 * (5 << 2); | |
6286 ++ | |
6287 ++ d[0] = add128(mul64x64_128(r20, r20), mul64x64_128(r21 * 2, s22)
); | |
6288 ++ d[1] = add128(mul64x64_128(r22, s22), mul64x64_128(r20 * 2, r21)
); | |
6289 ++ d[2] = add128(mul64x64_128(r21, r21), mul64x64_128(r22 * 2, r20)
); | |
6290 ++ | |
6291 ++ r20 = lo128(d[0]) & 0xfffffffffff; c
= shr128(d[0], 44); | |
6292 ++ d[1] = add128_64(d[1], c); r21 = lo128(d[1]) & 0xfffffffffff; c
= shr128(d[1], 44); | |
6293 ++ d[2] = add128_64(d[2], c); r22 = lo128(d[2]) & 0x3ffffffffff; c
= shr128(d[2], 42); | |
6294 ++ r20 += c * 5; c = (r20 >> 44); r20 = r20 & 0xfffffffffff; | |
6295 ++ r21 += c; | |
6296 ++ | |
6297 ++ p->R20.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)( r20
) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); | |
6298 ++ p->R21.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r20 >
> 26) | (r21 << 18)) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); | |
6299 ++ p->R22.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >
> 8) ) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); | |
6300 ++ p->R23.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >
> 34) | (r22 << 10)) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); | |
6301 ++ p->R24.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r22 >
> 16) ) ), _MM_SHUFFLE(1,0,1,0)); | |
6302 ++ p->S21.v = _mm_mul_epu32(p->R21.v, FIVE); | |
6303 ++ p->S22.v = _mm_mul_epu32(p->R22.v, FIVE); | |
6304 ++ p->S23.v = _mm_mul_epu32(p->R23.v, FIVE); | |
6305 ++ p->S24.v = _mm_mul_epu32(p->R24.v, FIVE); | |
6306 ++ p--; | |
6307 ++ } | |
6308 ++ | |
6309 ++ /* put saved info back */ | |
6310 ++ p = &st->P[1]; | |
6311 ++ p->R20.d[1] = (uint32_t)(r0 ); | |
6312 ++ p->R20.d[3] = (uint32_t)(r0 >> 32); | |
6313 ++ p->R21.d[1] = (uint32_t)(r1 ); | |
6314 ++ p->R21.d[3] = (uint32_t)(r1 >> 32); | |
6315 ++ p->R22.d[1] = (uint32_t)(r2 ); | |
6316 ++ p->R22.d[3] = (uint32_t)(r2 >> 32); | |
6317 ++ p->R23.d[1] = (uint32_t)(pad0 ); | |
6318 ++ p->R23.d[3] = (uint32_t)(pad0 >> 32); | |
6319 ++ p->R24.d[1] = (uint32_t)(pad1 ); | |
6320 ++ p->R24.d[3] = (uint32_t)(pad1 >> 32); | |
6321 ++ | |
6322 ++ /* H = [Mx,My] */ | |
6323 ++ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi6
4((xmmi *)(m + 16))); | |
6324 ++ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi6
4((xmmi *)(m + 24))); | |
6325 ++ st->H[0] = _mm_and_si128(MMASK, T5); | |
6326 ++ st->H[1] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
6327 ++ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); | |
6328 ++ st->H[2] = _mm_and_si128(MMASK, T5); | |
6329 ++ st->H[3] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
6330 ++ st->H[4] = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); | |
6331 ++ } | |
6332 ++ | |
6333 ++static void | |
6334 ++poly1305_blocks(poly1305_state_internal *st, const uint8_t *m, size_t bytes) | |
6335 ++ { | |
6336 ++ const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask
); | |
6337 ++ const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5); | |
6338 ++ const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128); | |
6339 ++ | |
6340 ++ poly1305_power *p; | |
6341 ++ xmmi H0,H1,H2,H3,H4; | |
6342 ++ xmmi T0,T1,T2,T3,T4,T5,T6; | |
6343 ++ xmmi M0,M1,M2,M3,M4; | |
6344 ++ xmmi C1,C2; | |
6345 ++ | |
6346 ++ H0 = st->H[0]; | |
6347 ++ H1 = st->H[1]; | |
6348 ++ H2 = st->H[2]; | |
6349 ++ H3 = st->H[3]; | |
6350 ++ H4 = st->H[4]; | |
6351 ++ | |
6352 ++ while (bytes >= 64) | |
6353 ++ { | |
6354 ++ /* H *= [r^4,r^4] */ | |
6355 ++ p = &st->P[0]; | |
6356 ++ T0 = _mm_mul_epu32(H0, p->R20.v); | |
6357 ++ T1 = _mm_mul_epu32(H0, p->R21.v); | |
6358 ++ T2 = _mm_mul_epu32(H0, p->R22.v); | |
6359 ++ T3 = _mm_mul_epu32(H0, p->R23.v); | |
6360 ++ T4 = _mm_mul_epu32(H0, p->R24.v); | |
6361 ++ T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
6362 ++ T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
6363 ++ T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
6364 ++ T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
6365 ++ T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
6366 ++ T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
6367 ++ T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
6368 ++ T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
6369 ++ T5 = _mm_mul_epu32(H1, p->R23.v);
T4 = _mm_add_epi64(T4, T5); | |
6370 ++ T5 = _mm_mul_epu32(H2, p->R22.v);
T4 = _mm_add_epi64(T4, T5); | |
6371 ++ T5 = _mm_mul_epu32(H3, p->R21.v);
T4 = _mm_add_epi64(T4, T5); | |
6372 ++ T5 = _mm_mul_epu32(H4, p->R20.v);
T4 = _mm_add_epi64(T4, T5); | |
6373 ++ | |
6374 ++ /* H += [Mx,My]*[r^2,r^2] */ | |
6375 ++ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_lo
adl_epi64((xmmi *)(m + 16))); | |
6376 ++ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_lo
adl_epi64((xmmi *)(m + 24))); | |
6377 ++ M0 = _mm_and_si128(MMASK, T5); | |
6378 ++ M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
6379 ++ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)
); | |
6380 ++ M2 = _mm_and_si128(MMASK, T5); | |
6381 ++ M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
6382 ++ M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); | |
6383 ++ | |
6384 ++ p = &st->P[1]; | |
6385 ++ T5 = _mm_mul_epu32(M0, p->R20.v); T6 = _mm_mul_epu32(M0, p->R21.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
6386 ++ T5 = _mm_mul_epu32(M1, p->S24.v); T6 = _mm_mul_epu32(M1, p->R20.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
6387 ++ T5 = _mm_mul_epu32(M2, p->S23.v); T6 = _mm_mul_epu32(M2, p->S24.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
6388 ++ T5 = _mm_mul_epu32(M3, p->S22.v); T6 = _mm_mul_epu32(M3, p->S23.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
6389 ++ T5 = _mm_mul_epu32(M4, p->S21.v); T6 = _mm_mul_epu32(M4, p->S22.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
6390 ++ T5 = _mm_mul_epu32(M0, p->R22.v); T6 = _mm_mul_epu32(M0, p->R23.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
6391 ++ T5 = _mm_mul_epu32(M1, p->R21.v); T6 = _mm_mul_epu32(M1, p->R22.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
6392 ++ T5 = _mm_mul_epu32(M2, p->R20.v); T6 = _mm_mul_epu32(M2, p->R21.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
6393 ++ T5 = _mm_mul_epu32(M3, p->S24.v); T6 = _mm_mul_epu32(M3, p->R20.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
6394 ++ T5 = _mm_mul_epu32(M4, p->S23.v); T6 = _mm_mul_epu32(M4, p->S24.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
6395 ++ T5 = _mm_mul_epu32(M0, p->R24.v);
T4 = _mm_add_epi64(T4, T5); | |
6396 ++ T5 = _mm_mul_epu32(M1, p->R23.v);
T4 = _mm_add_epi64(T4, T5); | |
6397 ++ T5 = _mm_mul_epu32(M2, p->R22.v);
T4 = _mm_add_epi64(T4, T5); | |
6398 ++ T5 = _mm_mul_epu32(M3, p->R21.v);
T4 = _mm_add_epi64(T4, T5); | |
6399 ++ T5 = _mm_mul_epu32(M4, p->R20.v);
T4 = _mm_add_epi64(T4, T5); | |
6400 ++ | |
6401 ++ /* H += [Mx,My] */ | |
6402 ++ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 32)), _mm_l
oadl_epi64((xmmi *)(m + 48))); | |
6403 ++ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 40)), _mm_l
oadl_epi64((xmmi *)(m + 56))); | |
6404 ++ M0 = _mm_and_si128(MMASK, T5); | |
6405 ++ M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
6406 ++ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)
); | |
6407 ++ M2 = _mm_and_si128(MMASK, T5); | |
6408 ++ M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
6409 ++ M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); | |
6410 ++ | |
6411 ++ T0 = _mm_add_epi64(T0, M0); | |
6412 ++ T1 = _mm_add_epi64(T1, M1); | |
6413 ++ T2 = _mm_add_epi64(T2, M2); | |
6414 ++ T3 = _mm_add_epi64(T3, M3); | |
6415 ++ T4 = _mm_add_epi64(T4, M4); | |
6416 ++ | |
6417 ++ /* reduce */ | |
6418 ++ C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _
mm_and_si128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C
1); T4 = _mm_add_epi64(T4, C2); | |
6419 ++ C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _
mm_and_si128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C
1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); | |
6420 ++ C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _
mm_and_si128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C
1); T1 = _mm_add_epi64(T1, C2); | |
6421 ++ C1 = _mm_srli_epi64(T3, 26); T3 = _
mm_and_si128(T3, MMASK); T4 = _mm_add_epi64(T4, C
1); | |
6422 ++ | |
6423 ++ /* H = (H*[r^4,r^4] + [Mx,My]*[r^2,r^2] + [Mx,My]) */ | |
6424 ++ H0 = T0; | |
6425 ++ H1 = T1; | |
6426 ++ H2 = T2; | |
6427 ++ H3 = T3; | |
6428 ++ H4 = T4; | |
6429 ++ | |
6430 ++ m += 64; | |
6431 ++ bytes -= 64; | |
6432 ++ } | |
6433 ++ | |
6434 ++ st->H[0] = H0; | |
6435 ++ st->H[1] = H1; | |
6436 ++ st->H[2] = H2; | |
6437 ++ st->H[3] = H3; | |
6438 ++ st->H[4] = H4; | |
6439 ++ } | |
6440 ++ | |
6441 ++static size_t | |
6442 ++poly1305_combine(poly1305_state_internal *st, const uint8_t *m, size_t bytes) | |
6443 ++ { | |
6444 ++ const xmmi MMASK = | |
6445 ++ _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask); | |
6446 ++ const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128); | |
6447 ++ const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5); | |
6448 ++ | |
6449 ++ poly1305_power *p; | |
6450 ++ xmmi H0,H1,H2,H3,H4; | |
6451 ++ xmmi M0,M1,M2,M3,M4; | |
6452 ++ xmmi T0,T1,T2,T3,T4,T5,T6; | |
6453 ++ xmmi C1,C2; | |
6454 ++ | |
6455 ++ uint64_t r0,r1,r2; | |
6456 ++ uint64_t t0,t1,t2,t3,t4; | |
6457 ++ uint64_t c; | |
6458 ++ size_t consumed = 0; | |
6459 ++ | |
6460 ++ H0 = st->H[0]; | |
6461 ++ H1 = st->H[1]; | |
6462 ++ H2 = st->H[2]; | |
6463 ++ H3 = st->H[3]; | |
6464 ++ H4 = st->H[4]; | |
6465 ++ | |
6466 ++ /* p = [r^2,r^2] */ | |
6467 ++ p = &st->P[1]; | |
6468 ++ | |
6469 ++ if (bytes >= 32) | |
6470 ++ { | |
6471 ++ /* H *= [r^2,r^2] */ | |
6472 ++ T0 = _mm_mul_epu32(H0, p->R20.v); | |
6473 ++ T1 = _mm_mul_epu32(H0, p->R21.v); | |
6474 ++ T2 = _mm_mul_epu32(H0, p->R22.v); | |
6475 ++ T3 = _mm_mul_epu32(H0, p->R23.v); | |
6476 ++ T4 = _mm_mul_epu32(H0, p->R24.v); | |
6477 ++ T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
6478 ++ T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
6479 ++ T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
6480 ++ T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
6481 ++ T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
6482 ++ T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
6483 ++ T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
6484 ++ T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
6485 ++ T5 = _mm_mul_epu32(H1, p->R23.v);
T4 = _mm_add_epi64(T4, T5); | |
6486 ++ T5 = _mm_mul_epu32(H2, p->R22.v);
T4 = _mm_add_epi64(T4, T5); | |
6487 ++ T5 = _mm_mul_epu32(H3, p->R21.v);
T4 = _mm_add_epi64(T4, T5); | |
6488 ++ T5 = _mm_mul_epu32(H4, p->R20.v);
T4 = _mm_add_epi64(T4, T5); | |
6489 ++ | |
6490 ++ /* H += [Mx,My] */ | |
6491 ++ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_lo
adl_epi64((xmmi *)(m + 16))); | |
6492 ++ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_lo
adl_epi64((xmmi *)(m + 24))); | |
6493 ++ M0 = _mm_and_si128(MMASK, T5); | |
6494 ++ M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
6495 ++ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)
); | |
6496 ++ M2 = _mm_and_si128(MMASK, T5); | |
6497 ++ M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
6498 ++ M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); | |
6499 ++ | |
6500 ++ T0 = _mm_add_epi64(T0, M0); | |
6501 ++ T1 = _mm_add_epi64(T1, M1); | |
6502 ++ T2 = _mm_add_epi64(T2, M2); | |
6503 ++ T3 = _mm_add_epi64(T3, M3); | |
6504 ++ T4 = _mm_add_epi64(T4, M4); | |
6505 ++ | |
6506 ++ /* reduce */ | |
6507 ++ C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _
mm_and_si128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C
1); T4 = _mm_add_epi64(T4, C2); | |
6508 ++ C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _
mm_and_si128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C
1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); | |
6509 ++ C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _
mm_and_si128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C
1); T1 = _mm_add_epi64(T1, C2); | |
6510 ++ C1 = _mm_srli_epi64(T3, 26); T3 = _
mm_and_si128(T3, MMASK); T4 = _mm_add_epi64(T4, C
1); | |
6511 ++ | |
6512 ++ /* H = (H*[r^2,r^2] + [Mx,My]) */ | |
6513 ++ H0 = T0; | |
6514 ++ H1 = T1; | |
6515 ++ H2 = T2; | |
6516 ++ H3 = T3; | |
6517 ++ H4 = T4; | |
6518 ++ | |
6519 ++ consumed = 32; | |
6520 ++ } | |
6521 ++ | |
6522 ++ /* finalize, H *= [r^2,r] */ | |
6523 ++ r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; | |
6524 ++ r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; | |
6525 ++ r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; | |
6526 ++ | |
6527 ++ p->R20.d[2] = (uint32_t)( r0 ) & 0x3ffffff; | |
6528 ++ p->R21.d[2] = (uint32_t)((r0 >> 26) | (r1 << 18)) & 0x3ffffff; | |
6529 ++ p->R22.d[2] = (uint32_t)((r1 >> 8) ) & 0x3ffffff; | |
6530 ++ p->R23.d[2] = (uint32_t)((r1 >> 34) | (r2 << 10)) & 0x3ffffff; | |
6531 ++ p->R24.d[2] = (uint32_t)((r2 >> 16) ) ; | |
6532 ++ p->S21.d[2] = p->R21.d[2] * 5; | |
6533 ++ p->S22.d[2] = p->R22.d[2] * 5; | |
6534 ++ p->S23.d[2] = p->R23.d[2] * 5; | |
6535 ++ p->S24.d[2] = p->R24.d[2] * 5; | |
6536 ++ | |
6537 ++ /* H *= [r^2,r] */ | |
6538 ++ T0 = _mm_mul_epu32(H0, p->R20.v); | |
6539 ++ T1 = _mm_mul_epu32(H0, p->R21.v); | |
6540 ++ T2 = _mm_mul_epu32(H0, p->R22.v); | |
6541 ++ T3 = _mm_mul_epu32(H0, p->R23.v); | |
6542 ++ T4 = _mm_mul_epu32(H0, p->R24.v); | |
6543 ++ T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20.v); T0 =
_mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
6544 ++ T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24.v); T0 =
_mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
6545 ++ T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23.v); T0 =
_mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
6546 ++ T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22.v); T0 =
_mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
6547 ++ T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22.v); T2 =
_mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
6548 ++ T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21.v); T2 =
_mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
6549 ++ T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20.v); T2 =
_mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
6550 ++ T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24.v); T2 =
_mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
6551 ++ T5 = _mm_mul_epu32(H1, p->R23.v); T4 =
_mm_add_epi64(T4, T5); | |
6552 ++ T5 = _mm_mul_epu32(H2, p->R22.v); T4 =
_mm_add_epi64(T4, T5); | |
6553 ++ T5 = _mm_mul_epu32(H3, p->R21.v); T4 =
_mm_add_epi64(T4, T5); | |
6554 ++ T5 = _mm_mul_epu32(H4, p->R20.v); T4 =
_mm_add_epi64(T4, T5); | |
6555 ++ | |
6556 ++ C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _mm_and_s
i128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C1); T4 =
_mm_add_epi64(T4, C2); | |
6557 ++ C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _mm_and_s
i128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C1); T0 =
_mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); | |
6558 ++ C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _mm_and_s
i128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C1); T1 =
_mm_add_epi64(T1, C2); | |
6559 ++ C1 = _mm_srli_epi64(T3, 26); T3 = _mm_and_s
i128(T3, MMASK); T4 = _mm_add_epi64(T4, C1); | |
6560 ++ | |
6561 ++ /* H = H[0]+H[1] */ | |
6562 ++ H0 = _mm_add_epi64(T0, _mm_srli_si128(T0, 8)); | |
6563 ++ H1 = _mm_add_epi64(T1, _mm_srli_si128(T1, 8)); | |
6564 ++ H2 = _mm_add_epi64(T2, _mm_srli_si128(T2, 8)); | |
6565 ++ H3 = _mm_add_epi64(T3, _mm_srli_si128(T3, 8)); | |
6566 ++ H4 = _mm_add_epi64(T4, _mm_srli_si128(T4, 8)); | |
6567 ++ | |
6568 ++ t0 = _mm_cvtsi128_si32(H0) ; c = (t0 >> 26); t0 &= 0x3ffffff; | |
6569 ++ t1 = _mm_cvtsi128_si32(H1) + c; c = (t1 >> 26); t1 &= 0x3ffffff; | |
6570 ++ t2 = _mm_cvtsi128_si32(H2) + c; c = (t2 >> 26); t2 &= 0x3ffffff; | |
6571 ++ t3 = _mm_cvtsi128_si32(H3) + c; c = (t3 >> 26); t3 &= 0x3ffffff; | |
6572 ++ t4 = _mm_cvtsi128_si32(H4) + c; c = (t4 >> 26); t4 &= 0x3ffffff; | |
6573 ++ t0 = t0 + (c * 5); c = (t0 >> 26); t0 &= 0x3ffffff; | |
6574 ++ t1 = t1 + c; | |
6575 ++ | |
6576 ++ st->HH[0] = ((t0 ) | (t1 << 26) ) & 0xfffffffffffull; | |
6577 ++ st->HH[1] = ((t1 >> 18) | (t2 << 8) | (t3 << 34)) & 0xfffffffffffull; | |
6578 ++ st->HH[2] = ((t3 >> 10) | (t4 << 16) ) & 0x3ffffffffffull; | |
6579 ++ | |
6580 ++ return consumed; | |
6581 ++ } | |
6582 ++ | |
6583 ++void | |
6584 ++CRYPTO_poly1305_update(poly1305_state *state, const unsigned char *m, | |
6585 ++ size_t bytes) | |
6586 ++ { | |
6587 ++ poly1305_state_internal *st = poly1305_aligned_state(state); | |
6588 ++ size_t want; | |
6589 ++ | |
6590 ++ /* need at least 32 initial bytes to start the accelerated branch */ | |
6591 ++ if (!st->started) | |
6592 ++ { | |
6593 ++ if ((st->leftover == 0) && (bytes > 32)) | |
6594 ++ { | |
6595 ++ poly1305_first_block(st, m); | |
6596 ++ m += 32; | |
6597 ++ bytes -= 32; | |
6598 ++ } | |
6599 ++ else | |
6600 ++ { | |
6601 ++ want = poly1305_min(32 - st->leftover, bytes); | |
6602 ++ poly1305_block_copy(st->buffer + st->leftover, m, want); | |
6603 ++ bytes -= want; | |
6604 ++ m += want; | |
6605 ++ st->leftover += want; | |
6606 ++ if ((st->leftover < 32) || (bytes == 0)) | |
6607 ++ return; | |
6608 ++ poly1305_first_block(st, st->buffer); | |
6609 ++ st->leftover = 0; | |
6610 ++ } | |
6611 ++ st->started = 1; | |
6612 ++ } | |
6613 ++ | |
6614 ++ /* handle leftover */ | |
6615 ++ if (st->leftover) | |
6616 ++ { | |
6617 ++ want = poly1305_min(64 - st->leftover, bytes); | |
6618 ++ poly1305_block_copy(st->buffer + st->leftover, m, want); | |
6619 ++ bytes -= want; | |
6620 ++ m += want; | |
6621 ++ st->leftover += want; | |
6622 ++ if (st->leftover < 64) | |
6623 ++ return; | |
6624 ++ poly1305_blocks(st, st->buffer, 64); | |
6625 ++ st->leftover = 0; | |
6626 ++ } | |
6627 ++ | |
6628 ++ /* process 64 byte blocks */ | |
6629 ++ if (bytes >= 64) | |
6630 ++ { | |
6631 ++ want = (bytes & ~63); | |
6632 ++ poly1305_blocks(st, m, want); | |
6633 ++ m += want; | |
6634 ++ bytes -= want; | |
6635 ++ } | |
6636 ++ | |
6637 ++ if (bytes) | |
6638 ++ { | |
6639 ++ poly1305_block_copy(st->buffer + st->leftover, m, bytes); | |
6640 ++ st->leftover += bytes; | |
6641 ++ } | |
6642 ++ } | |
6643 ++ | |
6644 ++void | |
6645 ++CRYPTO_poly1305_finish(poly1305_state *state, unsigned char mac[16]) | |
6646 ++ { | |
6647 ++ poly1305_state_internal *st = poly1305_aligned_state(state); | |
6648 ++ size_t leftover = st->leftover; | |
6649 ++ uint8_t *m = st->buffer; | |
6650 ++ uint128_t d[3]; | |
6651 ++ uint64_t h0,h1,h2; | |
6652 ++ uint64_t t0,t1; | |
6653 ++ uint64_t g0,g1,g2,c,nc; | |
6654 ++ uint64_t r0,r1,r2,s1,s2; | |
6655 ++ poly1305_power *p; | |
6656 ++ | |
6657 ++ if (st->started) | |
6658 ++ { | |
6659 ++ size_t consumed = poly1305_combine(st, m, leftover); | |
6660 ++ leftover -= consumed; | |
6661 ++ m += consumed; | |
6662 ++ } | |
6663 ++ | |
6664 ++ /* st->HH will either be 0 or have the combined result */ | |
6665 ++ h0 = st->HH[0]; | |
6666 ++ h1 = st->HH[1]; | |
6667 ++ h2 = st->HH[2]; | |
6668 ++ | |
6669 ++ p = &st->P[1]; | |
6670 ++ r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; | |
6671 ++ r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; | |
6672 ++ r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; | |
6673 ++ s1 = r1 * (5 << 2); | |
6674 ++ s2 = r2 * (5 << 2); | |
6675 ++ | |
6676 ++ if (leftover < 16) | |
6677 ++ goto poly1305_donna_atmost15bytes; | |
6678 ++ | |
6679 ++poly1305_donna_atleast16bytes: | |
6680 ++ t0 = U8TO64_LE(m + 0); | |
6681 ++ t1 = U8TO64_LE(m + 8); | |
6682 ++ h0 += t0 & 0xfffffffffff; | |
6683 ++ t0 = shr128_pair(t1, t0, 44); | |
6684 ++ h1 += t0 & 0xfffffffffff; | |
6685 ++ h2 += (t1 >> 24) | ((uint64_t)1 << 40); | |
6686 ++ | |
6687 ++poly1305_donna_mul: | |
6688 ++ d[0] = add128(add128(mul64x64_128(h0, r0), mul64x64_128(h1, s2)), mul64x
64_128(h2, s1)); | |
6689 ++ d[1] = add128(add128(mul64x64_128(h0, r1), mul64x64_128(h1, r0)), mul64x
64_128(h2, s2)); | |
6690 ++ d[2] = add128(add128(mul64x64_128(h0, r2), mul64x64_128(h1, r1)), mul64x
64_128(h2, r0)); | |
6691 ++ h0 = lo128(d[0]) & 0xfffffffffff; c = shr128(
d[0], 44); | |
6692 ++ d[1] = add128_64(d[1], c); h1 = lo128(d[1]) & 0xfffffffffff; c = shr128(
d[1], 44); | |
6693 ++ d[2] = add128_64(d[2], c); h2 = lo128(d[2]) & 0x3ffffffffff; c = shr128(
d[2], 42); | |
6694 ++ h0 += c * 5; | |
6695 ++ | |
6696 ++ m += 16; | |
6697 ++ leftover -= 16; | |
6698 ++ if (leftover >= 16) goto poly1305_donna_atleast16bytes; | |
6699 ++ | |
6700 ++ /* final bytes */ | |
6701 ++poly1305_donna_atmost15bytes: | |
6702 ++ if (!leftover) goto poly1305_donna_finish; | |
6703 ++ | |
6704 ++ m[leftover++] = 1; | |
6705 ++ poly1305_block_zero(m + leftover, 16 - leftover); | |
6706 ++ leftover = 16; | |
6707 ++ | |
6708 ++ t0 = U8TO64_LE(m+0); | |
6709 ++ t1 = U8TO64_LE(m+8); | |
6710 ++ h0 += t0 & 0xfffffffffff; t0 = shr128_pair(t1, t0, 44); | |
6711 ++ h1 += t0 & 0xfffffffffff; | |
6712 ++ h2 += (t1 >> 24); | |
6713 ++ | |
6714 ++ goto poly1305_donna_mul; | |
6715 ++ | |
6716 ++poly1305_donna_finish: | |
6717 ++ c = (h0 >> 44); h0 &= 0xfffffffffff; | |
6718 ++ h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff; | |
6719 ++ h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff; | |
6720 ++ h0 += c * 5; | |
6721 ++ | |
6722 ++ g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff; | |
6723 ++ g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff; | |
6724 ++ g2 = h2 + c - ((uint64_t)1 << 42); | |
6725 ++ | |
6726 ++ c = (g2 >> 63) - 1; | |
6727 ++ nc = ~c; | |
6728 ++ h0 = (h0 & nc) | (g0 & c); | |
6729 ++ h1 = (h1 & nc) | (g1 & c); | |
6730 ++ h2 = (h2 & nc) | (g2 & c); | |
6731 ++ | |
6732 ++ /* pad */ | |
6733 ++ t0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1]; | |
6734 ++ t1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1]; | |
6735 ++ h0 += (t0 & 0xfffffffffff) ; c = (h0 >> 44); h0 &= 0xfffffffffff; t0
= shr128_pair(t1, t0, 44); | |
6736 ++ h1 += (t0 & 0xfffffffffff) + c; c = (h1 >> 44); h1 &= 0xfffffffffff; t1
= (t1 >> 24); | |
6737 ++ h2 += (t1 ) + c; | |
6738 ++ | |
6739 ++ U64TO8_LE(mac + 0, ((h0 ) | (h1 << 44))); | |
6740 ++ U64TO8_LE(mac + 8, ((h1 >> 20) | (h2 << 24))); | |
6741 ++ } | |
6742 ++ | |
6743 ++#endif /* !OPENSSL_NO_POLY1305 */ | |
6744 +diff --git a/crypto/poly1305/poly1305test.c b/crypto/poly1305/poly1305test.c | |
6745 +new file mode 100644 | |
6746 +index 0000000..8dd26af | |
6747 +--- /dev/null | |
6748 ++++ b/crypto/poly1305/poly1305test.c | |
6749 +@@ -0,0 +1,166 @@ | |
6750 ++/* ==================================================================== | |
6751 ++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
6752 ++ * | |
6753 ++ * Redistribution and use in source and binary forms, with or without | |
6754 ++ * modification, are permitted provided that the following conditions | |
6755 ++ * are met: | |
6756 ++ * | |
6757 ++ * 1. Redistributions of source code must retain the above copyright | |
6758 ++ * notice, this list of conditions and the following disclaimer. | |
6759 ++ * | |
6760 ++ * 2. Redistributions in binary form must reproduce the above copyright | |
6761 ++ * notice, this list of conditions and the following disclaimer in | |
6762 ++ * the documentation and/or other materials provided with the | |
6763 ++ * distribution. | |
6764 ++ * | |
6765 ++ * 3. All advertising materials mentioning features or use of this | |
6766 ++ * software must display the following acknowledgment: | |
6767 ++ * "This product includes software developed by the OpenSSL Project | |
6768 ++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
6769 ++ * | |
6770 ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
6771 ++ * endorse or promote products derived from this software without | |
6772 ++ * prior written permission. For written permission, please contact | |
6773 ++ * licensing@OpenSSL.org. | |
6774 ++ * | |
6775 ++ * 5. Products derived from this software may not be called "OpenSSL" | |
6776 ++ * nor may "OpenSSL" appear in their names without prior written | |
6777 ++ * permission of the OpenSSL Project. | |
6778 ++ * | |
6779 ++ * 6. Redistributions of any form whatsoever must retain the following | |
6780 ++ * acknowledgment: | |
6781 ++ * "This product includes software developed by the OpenSSL Project | |
6782 ++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
6783 ++ * | |
6784 ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
6785 ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
6786 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
6787 ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
6788 ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
6789 ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
6790 ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
6791 ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
6792 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
6793 ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
6794 ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
6795 ++ * OF THE POSSIBILITY OF SUCH DAMAGE. | |
6796 ++ * ==================================================================== | |
6797 ++ */ | |
6798 ++ | |
6799 ++#include <stdio.h> | |
6800 ++#include <stdlib.h> | |
6801 ++#include <string.h> | |
6802 ++ | |
6803 ++#include <openssl/poly1305.h> | |
6804 ++ | |
6805 ++struct poly1305_test | |
6806 ++ { | |
6807 ++ const char *inputhex; | |
6808 ++ const char *keyhex; | |
6809 ++ const char *outhex; | |
6810 ++ }; | |
6811 ++ | |
6812 ++static const struct poly1305_test poly1305_tests[] = { | |
6813 ++ { | |
6814 ++ "", | |
6815 ++ "c8afaac331ee372cd6082de134943b174710130e9f6fea8d72293850a667d86
c", | |
6816 ++ "4710130e9f6fea8d72293850a667d86c", | |
6817 ++ }, | |
6818 ++ { | |
6819 ++ "48656c6c6f20776f726c6421", | |
6820 ++ "746869732069732033322d62797465206b657920666f7220506f6c793133303
5", | |
6821 ++ "a6f745008f81c916a20dcc74eef2b2f0", | |
6822 ++ }, | |
6823 ++ { | |
6824 ++ "000000000000000000000000000000000000000000000000000000000000000
0", | |
6825 ++ "746869732069732033322d62797465206b657920666f7220506f6c793133303
5", | |
6826 ++ "49ec78090e481ec6c26b33b91ccc0307", | |
6827 ++ }, | |
6828 ++ { | |
6829 ++ "000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000", | |
6830 ++ "746869732069732033322d62797465206b657920666f7220506f6c793133303
5", | |
6831 ++ "da84bcab02676c38cdb015604274c2aa", | |
6832 ++ }, | |
6833 ++}; | |
6834 ++ | |
6835 ++static unsigned char hex_digit(char h) | |
6836 ++ { | |
6837 ++ if (h >= '0' && h <= '9') | |
6838 ++ return h - '0'; | |
6839 ++ else if (h >= 'a' && h <= 'f') | |
6840 ++ return h - 'a' + 10; | |
6841 ++ else if (h >= 'A' && h <= 'F') | |
6842 ++ return h - 'A' + 10; | |
6843 ++ else | |
6844 ++ abort(); | |
6845 ++ } | |
6846 ++ | |
6847 ++static void hex_decode(unsigned char *out, const char* hex) | |
6848 ++ { | |
6849 ++ size_t j = 0; | |
6850 ++ | |
6851 ++ while (*hex != 0) | |
6852 ++ { | |
6853 ++ unsigned char v = hex_digit(*hex++); | |
6854 ++ v <<= 4; | |
6855 ++ v |= hex_digit(*hex++); | |
6856 ++ out[j++] = v; | |
6857 ++ } | |
6858 ++ } | |
6859 ++ | |
6860 ++static void hexdump(unsigned char *a, size_t len) | |
6861 ++ { | |
6862 ++ size_t i; | |
6863 ++ | |
6864 ++ for (i = 0; i < len; i++) | |
6865 ++ printf("%02x", a[i]); | |
6866 ++ } | |
6867 ++ | |
6868 ++int main() | |
6869 ++ { | |
6870 ++ static const unsigned num_tests = | |
6871 ++ sizeof(poly1305_tests) / sizeof(struct poly1305_test); | |
6872 ++ unsigned i; | |
6873 ++ unsigned char key[32], out[16], expected[16]; | |
6874 ++ poly1305_state poly1305; | |
6875 ++ | |
6876 ++ for (i = 0; i < num_tests; i++) | |
6877 ++ { | |
6878 ++ const struct poly1305_test *test = &poly1305_tests[i]; | |
6879 ++ unsigned char *in; | |
6880 ++ size_t inlen = strlen(test->inputhex); | |
6881 ++ | |
6882 ++ if (strlen(test->keyhex) != sizeof(key)*2 || | |
6883 ++ strlen(test->outhex) != sizeof(out)*2 || | |
6884 ++ (inlen & 1) == 1) | |
6885 ++ return 1; | |
6886 ++ | |
6887 ++ inlen /= 2; | |
6888 ++ | |
6889 ++ hex_decode(key, test->keyhex); | |
6890 ++ hex_decode(expected, test->outhex); | |
6891 ++ | |
6892 ++ in = malloc(inlen); | |
6893 ++ | |
6894 ++ hex_decode(in, test->inputhex); | |
6895 ++ CRYPTO_poly1305_init(&poly1305, key); | |
6896 ++ CRYPTO_poly1305_update(&poly1305, in, inlen); | |
6897 ++ CRYPTO_poly1305_finish(&poly1305, out); | |
6898 ++ | |
6899 ++ if (memcmp(out, expected, sizeof(expected)) != 0) | |
6900 ++ { | |
6901 ++ printf("Poly1305 test #%d failed.\n", i); | |
6902 ++ printf("got: "); | |
6903 ++ hexdump(out, sizeof(out)); | |
6904 ++ printf("\nexpected: "); | |
6905 ++ hexdump(expected, sizeof(expected)); | |
6906 ++ printf("\n"); | |
6907 ++ return 1; | |
6908 ++ } | |
6909 ++ | |
6910 ++ free(in); | |
6911 ++ } | |
6912 ++ | |
6913 ++ printf("PASS\n"); | |
6914 ++ return 0; | |
6915 ++ } | |
6916 +diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c | |
6917 +index 75b6560..a042b8d 100644 | |
6918 +--- a/ssl/s3_lib.c | |
6919 ++++ b/ssl/s3_lib.c | |
6920 +@@ -1841,7 +1841,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
6921 + SSL_AEAD, | |
6922 + SSL_TLSV1_2, | |
6923 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
6924 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
6925 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
6926 ++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
6927 + 128, | |
6928 + 128, | |
6929 + }, | |
6930 +@@ -1873,7 +1874,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
6931 + SSL_AEAD, | |
6932 + SSL_TLSV1_2, | |
6933 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
6934 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
6935 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
6936 ++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
6937 + 128, | |
6938 + 128, | |
6939 + }, | |
6940 +@@ -1905,7 +1907,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
6941 + SSL_AEAD, | |
6942 + SSL_TLSV1_2, | |
6943 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
6944 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
6945 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
6946 ++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
6947 + 128, | |
6948 + 128, | |
6949 + }, | |
6950 +@@ -1937,7 +1940,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
6951 + SSL_AEAD, | |
6952 + SSL_TLSV1_2, | |
6953 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
6954 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
6955 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
6956 ++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
6957 + 128, | |
6958 + 128, | |
6959 + }, | |
6960 +@@ -1969,7 +1973,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
6961 + SSL_AEAD, | |
6962 + SSL_TLSV1_2, | |
6963 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
6964 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
6965 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
6966 ++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
6967 + 128, | |
6968 + 128, | |
6969 + }, | |
6970 +@@ -2001,7 +2006,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
6971 + SSL_AEAD, | |
6972 + SSL_TLSV1_2, | |
6973 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
6974 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
6975 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
6976 ++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
6977 + 128, | |
6978 + 128, | |
6979 + }, | |
6980 +@@ -2714,7 +2720,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
6981 + SSL_AEAD, | |
6982 + SSL_TLSV1_2, | |
6983 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
6984 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
6985 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
6986 ++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
6987 + 128, | |
6988 + 128, | |
6989 + }, | |
6990 +@@ -2746,7 +2753,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
6991 + SSL_AEAD, | |
6992 + SSL_TLSV1_2, | |
6993 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
6994 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
6995 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
6996 ++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
6997 + 128, | |
6998 + 128, | |
6999 + }, | |
7000 +@@ -2778,7 +2786,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
7001 + SSL_AEAD, | |
7002 + SSL_TLSV1_2, | |
7003 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
7004 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
7005 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
7006 ++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
7007 + 128, | |
7008 + 128, | |
7009 + }, | |
7010 +@@ -2810,7 +2819,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
7011 + SSL_AEAD, | |
7012 + SSL_TLSV1_2, | |
7013 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
7014 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
7015 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
7016 ++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
7017 + 128, | |
7018 + 128, | |
7019 + }, | |
7020 +@@ -2894,6 +2904,51 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
7021 + }, | |
7022 + #endif | |
7023 + | |
7024 ++ { | |
7025 ++ 1, | |
7026 ++ TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305, | |
7027 ++ TLS1_CK_ECDHE_RSA_CHACHA20_POLY1305, | |
7028 ++ SSL_kEECDH, | |
7029 ++ SSL_aRSA, | |
7030 ++ SSL_CHACHA20POLY1305, | |
7031 ++ SSL_AEAD, | |
7032 ++ SSL_TLSV1_2, | |
7033 ++ SSL_NOT_EXP|SSL_HIGH, | |
7034 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(0), | |
7035 ++ 256, | |
7036 ++ 0, | |
7037 ++ }, | |
7038 ++ | |
7039 ++ { | |
7040 ++ 1, | |
7041 ++ TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, | |
7042 ++ TLS1_CK_ECDHE_ECDSA_CHACHA20_POLY1305, | |
7043 ++ SSL_kEECDH, | |
7044 ++ SSL_aECDSA, | |
7045 ++ SSL_CHACHA20POLY1305, | |
7046 ++ SSL_AEAD, | |
7047 ++ SSL_TLSV1_2, | |
7048 ++ SSL_NOT_EXP|SSL_HIGH, | |
7049 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(0), | |
7050 ++ 256, | |
7051 ++ 0, | |
7052 ++ }, | |
7053 ++ | |
7054 ++ { | |
7055 ++ 1, | |
7056 ++ TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305, | |
7057 ++ TLS1_CK_DHE_RSA_CHACHA20_POLY1305, | |
7058 ++ SSL_kEDH, | |
7059 ++ SSL_aRSA, | |
7060 ++ SSL_CHACHA20POLY1305, | |
7061 ++ SSL_AEAD, | |
7062 ++ SSL_TLSV1_2, | |
7063 ++ SSL_NOT_EXP|SSL_HIGH, | |
7064 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(0), | |
7065 ++ 256, | |
7066 ++ 0, | |
7067 ++ }, | |
7068 ++ | |
7069 + /* end of list */ | |
7070 + }; | |
7071 + | |
7072 +diff --git a/ssl/s3_pkt.c b/ssl/s3_pkt.c | |
7073 +index 5038f6c..04b474d 100644 | |
7074 +--- a/ssl/s3_pkt.c | |
7075 ++++ b/ssl/s3_pkt.c | |
7076 +@@ -790,8 +790,11 @@ static int do_ssl3_write(SSL *s, int type, const unsigned
char *buf, | |
7077 + else | |
7078 + eivlen = 0; | |
7079 + } | |
7080 +- else if (s->aead_write_ctx != NULL) | |
7081 ++ else if (s->aead_write_ctx != NULL && | |
7082 ++ s->aead_write_ctx->variable_nonce_included_in_record) | |
7083 ++ { | |
7084 + eivlen = s->aead_write_ctx->variable_nonce_len; | |
7085 ++ } | |
7086 + else | |
7087 + eivlen = 0; | |
7088 + | |
7089 +diff --git a/ssl/ssl.h b/ssl/ssl.h | |
7090 +index 0644cbf..d782a98 100644 | |
7091 +--- a/ssl/ssl.h | |
7092 ++++ b/ssl/ssl.h | |
7093 +@@ -291,6 +291,7 @@ extern "C" { | |
7094 + #define SSL_TXT_CAMELLIA128 "CAMELLIA128" | |
7095 + #define SSL_TXT_CAMELLIA256 "CAMELLIA256" | |
7096 + #define SSL_TXT_CAMELLIA "CAMELLIA" | |
7097 ++#define SSL_TXT_CHACHA20 "CHACHA20" | |
7098 + | |
7099 + #define SSL_TXT_MD5 "MD5" | |
7100 + #define SSL_TXT_SHA1 "SHA1" | |
7101 +diff --git a/ssl/ssl_ciph.c b/ssl/ssl_ciph.c | |
7102 +index 7e780cd..b6370bd 100644 | |
7103 +--- a/ssl/ssl_ciph.c | |
7104 ++++ b/ssl/ssl_ciph.c | |
7105 +@@ -298,6 +298,7 @@ static const SSL_CIPHER cipher_aliases[]={ | |
7106 + {0,SSL_TXT_CAMELLIA128,0,0,0,SSL_CAMELLIA128,0,0,0,0,0,0}, | |
7107 + {0,SSL_TXT_CAMELLIA256,0,0,0,SSL_CAMELLIA256,0,0,0,0,0,0}, | |
7108 + {0,SSL_TXT_CAMELLIA ,0,0,0,SSL_CAMELLIA128|SSL_CAMELLIA256,0,0,0,0,0,0
}, | |
7109 ++ {0,SSL_TXT_CHACHA20 ,0,0,0,SSL_CHACHA20POLY1305,0,0,0,0,0,0}, | |
7110 + | |
7111 + /* MAC aliases */ | |
7112 + {0,SSL_TXT_MD5,0, 0,0,0,SSL_MD5, 0,0,0,0,0}, | |
7113 +@@ -523,9 +524,15 @@ int ssl_cipher_get_evp_aead(const SSL_SESSION *s, const EV
P_AEAD **aead) | |
7114 + return 0; | |
7115 + | |
7116 + #ifndef OPENSSL_NO_AES | |
7117 +- /* There is only one AEAD for now. */ | |
7118 +- *aead = EVP_aead_aes_128_gcm(); | |
7119 +- return 1; | |
7120 ++ switch (c->algorithm_enc) | |
7121 ++ { | |
7122 ++ case SSL_AES128GCM: | |
7123 ++ *aead = EVP_aead_aes_128_gcm(); | |
7124 ++ return 1; | |
7125 ++ case SSL_CHACHA20POLY1305: | |
7126 ++ *aead = EVP_aead_chacha20_poly1305(); | |
7127 ++ return 1; | |
7128 ++ } | |
7129 + #endif | |
7130 + | |
7131 + return 0; | |
7132 +@@ -1715,6 +1722,9 @@ char *SSL_CIPHER_description(const SSL_CIPHER *cipher, ch
ar *buf, int len) | |
7133 + case SSL_SEED: | |
7134 + enc="SEED(128)"; | |
7135 + break; | |
7136 ++ case SSL_CHACHA20POLY1305: | |
7137 ++ enc="ChaCha20-Poly1305"; | |
7138 ++ break; | |
7139 + default: | |
7140 + enc="unknown"; | |
7141 + break; | |
7142 +diff --git a/ssl/ssl_locl.h b/ssl/ssl_locl.h | |
7143 +index 63bc28b..b83d8cd 100644 | |
7144 +--- a/ssl/ssl_locl.h | |
7145 ++++ b/ssl/ssl_locl.h | |
7146 +@@ -328,6 +328,7 @@ | |
7147 + #define SSL_SEED 0x00000800L | |
7148 + #define SSL_AES128GCM 0x00001000L | |
7149 + #define SSL_AES256GCM 0x00002000L | |
7150 ++#define SSL_CHACHA20POLY1305 0x00004000L | |
7151 + | |
7152 + #define SSL_AES (SSL_AES128|SSL_AES256|SSL_AES128GCM|SSL
_AES256GCM) | |
7153 + #define SSL_CAMELLIA (SSL_CAMELLIA128|SSL_CAMELLIA256) | |
7154 +@@ -389,6 +390,12 @@ | |
7155 + #define SSL_CIPHER_AEAD_FIXED_NONCE_LEN(ssl_cipher) \ | |
7156 + (((ssl_cipher->algorithm2 >> 24) & 0xf)*2) | |
7157 + | |
7158 ++/* SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD is a flag in | |
7159 ++ * SSL_CIPHER.algorithm2 which indicates that the variable part of the nonce i
s | |
7160 ++ * included as a prefix of the record. (AES-GCM, for example, does with with a
n | |
7161 ++ * 8-byte variable nonce.) */ | |
7162 ++#define SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD (1<<22) | |
7163 ++ | |
7164 + /* | |
7165 + * Export and cipher strength information. For each cipher we have to decide | |
7166 + * whether it is exportable or not. This information is likely to change | |
7167 +@@ -605,6 +612,9 @@ struct ssl_aead_ctx_st | |
7168 + * records. */ | |
7169 + unsigned char fixed_nonce[8]; | |
7170 + unsigned char fixed_nonce_len, variable_nonce_len, tag_len; | |
7171 ++ /* variable_nonce_included_in_record is non-zero if the variable nonce | |
7172 ++ * for a record is included as a prefix before the ciphertext. */ | |
7173 ++ char variable_nonce_included_in_record; | |
7174 + }; | |
7175 + | |
7176 + #ifndef OPENSSL_NO_COMP | |
7177 +diff --git a/ssl/t1_enc.c b/ssl/t1_enc.c | |
7178 +index 7af1a32..15800af 100644 | |
7179 +--- a/ssl/t1_enc.c | |
7180 ++++ b/ssl/t1_enc.c | |
7181 +@@ -366,6 +366,8 @@ static int tls1_change_cipher_state_aead(SSL *s, char is_re
ad, | |
7182 + memcpy(aead_ctx->fixed_nonce, iv, iv_len); | |
7183 + aead_ctx->fixed_nonce_len = iv_len; | |
7184 + aead_ctx->variable_nonce_len = 8; /* always the case, currently. */ | |
7185 ++ aead_ctx->variable_nonce_included_in_record = | |
7186 ++ (s->s3->tmp.new_cipher->algorithm2 & SSL_CIPHER_ALGORITHM2_VARIA
BLE_NONCE_INCLUDED_IN_RECORD) != 0; | |
7187 + if (aead_ctx->variable_nonce_len + aead_ctx->fixed_nonce_len != EVP_AEAD
_nonce_length(aead)) | |
7188 + { | |
7189 + SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE_AEAD, ERR_R_INTERNAL_ERROR
); | |
7190 +@@ -863,6 +865,7 @@ int tls1_enc(SSL *s, int send) | |
7191 + if (send) | |
7192 + { | |
7193 + size_t len = rec->length; | |
7194 ++ size_t eivlen = 0; | |
7195 + in = rec->input; | |
7196 + out = rec->data; | |
7197 + | |
7198 +@@ -878,18 +881,22 @@ int tls1_enc(SSL *s, int send) | |
7199 + * variable nonce. Thus we can copy the sequence number | |
7200 + * bytes into place without overwriting any of the | |
7201 + * plaintext. */ | |
7202 +- memcpy(out, ad, aead->variable_nonce_len); | |
7203 +- len -= aead->variable_nonce_len; | |
7204 ++ if (aead->variable_nonce_included_in_record) | |
7205 ++ { | |
7206 ++ memcpy(out, ad, aead->variable_nonce_len); | |
7207 ++ len -= aead->variable_nonce_len; | |
7208 ++ eivlen = aead->variable_nonce_len; | |
7209 ++ } | |
7210 + | |
7211 + ad[11] = len >> 8; | |
7212 + ad[12] = len & 0xff; | |
7213 + | |
7214 + n = EVP_AEAD_CTX_seal(&aead->ctx, | |
7215 +- out + aead->variable_nonce_len, le
n + aead->tag_len, | |
7216 ++ out + eivlen, len + aead->tag_len, | |
7217 + nonce, nonce_used, | |
7218 +- in + aead->variable_nonce_len, len
, | |
7219 ++ in + eivlen, len, | |
7220 + ad, sizeof(ad)); | |
7221 +- if (n >= 0) | |
7222 ++ if (n >= 0 && aead->variable_nonce_included_in_record) | |
7223 + n += aead->variable_nonce_len; | |
7224 + } | |
7225 + else | |
7226 +@@ -903,12 +910,17 @@ int tls1_enc(SSL *s, int send) | |
7227 + | |
7228 + if (len < aead->variable_nonce_len) | |
7229 + return 0; | |
7230 +- memcpy(nonce + nonce_used, in, aead->variable_nonce_len)
; | |
7231 ++ memcpy(nonce + nonce_used, | |
7232 ++ aead->variable_nonce_included_in_record ? in : ad
, | |
7233 ++ aead->variable_nonce_len); | |
7234 + nonce_used += aead->variable_nonce_len; | |
7235 + | |
7236 +- in += aead->variable_nonce_len; | |
7237 +- len -= aead->variable_nonce_len; | |
7238 +- out += aead->variable_nonce_len; | |
7239 ++ if (aead->variable_nonce_included_in_record) | |
7240 ++ { | |
7241 ++ in += aead->variable_nonce_len; | |
7242 ++ len -= aead->variable_nonce_len; | |
7243 ++ out += aead->variable_nonce_len; | |
7244 ++ } | |
7245 + | |
7246 + if (len < aead->tag_len) | |
7247 + return 0; | |
7248 +diff --git a/ssl/tls1.h b/ssl/tls1.h | |
7249 +index 8cac7df..3cbcb83 100644 | |
7250 +--- a/ssl/tls1.h | |
7251 ++++ b/ssl/tls1.h | |
7252 +@@ -526,6 +526,10 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_C
B,(void (*)(void))cb) | |
7253 + #define TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256 0x0300C031 | |
7254 + #define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384 0x0300C032 | |
7255 + | |
7256 ++#define TLS1_CK_ECDHE_RSA_CHACHA20_POLY1305 0x0300CC13 | |
7257 ++#define TLS1_CK_ECDHE_ECDSA_CHACHA20_POLY1305 0x0300CC14 | |
7258 ++#define TLS1_CK_DHE_RSA_CHACHA20_POLY1305 0x0300CC15 | |
7259 ++ | |
7260 + /* XXX | |
7261 + * Inconsistency alert: | |
7262 + * The OpenSSL names of ciphers with ephemeral DH here include the string | |
7263 +@@ -677,6 +681,10 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_C
B,(void (*)(void))cb) | |
7264 + #define TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256 "ECDH-RSA-AES128-GCM-S
HA256" | |
7265 + #define TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384 "ECDH-RSA-AES256-GCM-S
HA384" | |
7266 + | |
7267 ++#define TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305 "ECDHE-RSA-CHACHA20-POLY
1305" | |
7268 ++#define TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 "ECDHE-ECDSA-CHACHA20-PO
LY1305" | |
7269 ++#define TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305 "DHE-RSA-CHACHA2
0-POLY1305" | |
7270 ++ | |
7271 + #define TLS_CT_RSA_SIGN 1 | |
7272 + #define TLS_CT_DSS_SIGN 2 | |
7273 + #define TLS_CT_RSA_FIXED_DH 3 | |
7274 +diff --git a/test/Makefile b/test/Makefile | |
7275 +index 4c9eabc..4790aa8 100644 | |
7276 +--- a/test/Makefile | |
7277 ++++ b/test/Makefile | |
7278 +@@ -86,7 +86,9 @@ OBJ= $(BNTEST).o $(ECTEST).o $(ECDSATEST).o $(ECDHTEST).o $(
IDEATEST).o \ | |
7279 + $(MDC2TEST).o $(RMDTEST).o \ | |
7280 + $(RANDTEST).o $(DHTEST).o $(ENGINETEST).o $(CASTTEST).o \ | |
7281 + $(BFTEST).o $(SSLTEST).o $(DSATEST).o $(EXPTEST).o $(RSATEST).o \ | |
7282 +- $(EVPTEST).o $(IGETEST).o $(JPAKETEST).o $(ASN1TEST).o | |
7283 ++ $(EVPTEST).o $(IGETEST).o $(JPAKETEST).o $(ASN1TEST).o $(CHACHATEST).o \ | |
7284 ++ $(POLY1305TEST).o | |
7285 ++ | |
7286 + SRC= $(BNTEST).c $(ECTEST).c $(ECDSATEST).c $(ECDHTEST).c $(IDEATEST).c \ | |
7287 + $(MD2TEST).c $(MD4TEST).c $(MD5TEST).c \ | |
7288 + $(HMACTEST).c $(WPTEST).c \ | |
7289 +@@ -94,7 +96,8 @@ SRC= $(BNTEST).c $(ECTEST).c $(ECDSATEST).c $(ECDHTEST).c $(
IDEATEST).c \ | |
7290 + $(DESTEST).c $(SHATEST).c $(SHA1TEST).c $(MDC2TEST).c $(RMDTEST).c \ | |
7291 + $(RANDTEST).c $(DHTEST).c $(ENGINETEST).c $(CASTTEST).c \ | |
7292 + $(BFTEST).c $(SSLTEST).c $(DSATEST).c $(EXPTEST).c $(RSATEST).c \ | |
7293 +- $(EVPTEST).c $(IGETEST).c $(JPAKETEST).c $(SRPTEST).c $(ASN1TEST).c | |
7294 ++ $(EVPTEST).c $(IGETEST).c $(JPAKETEST).c $(SRPTEST).c $(ASN1TEST).c \ | |
7295 ++ $(CHACHATEST).c $(POLY1305TEST).c | |
7296 + | |
7297 + EXHEADER= | |
7298 + HEADER= $(EXHEADER) | |
7299 +@@ -137,7 +140,7 @@ alltests: \ | |
7300 + test_enc test_x509 test_rsa test_crl test_sid \ | |
7301 + test_gen test_req test_pkcs7 test_verify test_dh test_dsa \ | |
7302 + test_ss test_ca test_engine test_evp test_ssl test_tsa test_ige \ | |
7303 +- test_jpake test_srp test_cms | |
7304 ++ test_jpake test_srp test_cms test_chacha test_poly1305 | |
7305 + | |
7306 + test_evp: | |
7307 + ../util/shlib_wrap.sh ./$(EVPTEST) evptests.txt | |
7308 +@@ -318,6 +321,14 @@ test_srp: $(SRPTEST)$(EXE_EXT) | |
7309 + @echo "Test SRP" | |
7310 + ../util/shlib_wrap.sh ./srptest | |
7311 + | |
7312 ++test_chacha: $(CHACHATEST)$(EXE_EXT) | |
7313 ++ @echo "Test ChaCha" | |
7314 ++ ../util/shlib_wrap.sh ./$(CHACHATEST) | |
7315 ++ | |
7316 ++test_poly1305: $(POLY1305TEST)$(EXE_EXT) | |
7317 ++ @echo "Test Poly1305" | |
7318 ++ ../util/shlib_wrap.sh ./$(POLY1305TEST) | |
7319 ++ | |
7320 + lint: | |
7321 + lint -DLINT $(INCLUDES) $(SRC)>fluff | |
7322 + | |
7323 +@@ -394,6 +405,12 @@ $(SHA256TEST)$(EXE_EXT): $(SHA256TEST).o $(DLIBCRYPTO) | |
7324 + $(SHA512TEST)$(EXE_EXT): $(SHA512TEST).o $(DLIBCRYPTO) | |
7325 + @target=$(SHA512TEST); $(BUILD_CMD) | |
7326 + | |
7327 ++$(CHACHATEST)$(EXE_EXT): $(CHACHATEST).o $(DLIBCRYPTO) | |
7328 ++ @target=$(CHACHATEST); $(BUILD_CMD) | |
7329 ++ | |
7330 ++$(POLY1305TEST)$(EXE_EXT): $(POLY1305TEST).o $(DLIBCRYPTO) | |
7331 ++ @target=$(CHACHATEST); $(BUILD_CMD) | |
7332 ++ | |
7333 + $(RMDTEST)$(EXE_EXT): $(RMDTEST).o $(DLIBCRYPTO) | |
7334 + @target=$(RMDTEST); $(BUILD_CMD) | |
7335 + | |
7336 +-- | |
7337 +1.8.4.1 | |
7338 + | |
7339 diff -burN android-openssl-lhash2/patches/neon_runtime.patch android-openssl/pat
ches/neon_runtime.patch | |
7340 --- android-openssl-lhash2/patches/neon_runtime.patch 1969-12-31 19:00:00.0000
00000 -0500 | |
7341 +++ android-openssl/patches/neon_runtime.patch 2013-11-05 16:51:28.668287449 -0
500 | |
7342 @@ -0,0 +1,1123 @@ | |
7343 +From aea47606333cfd3e7a09cab3e42e488c79a416af Mon Sep 17 00:00:00 2001 | |
7344 +From: Adam Langley <agl@chromium.org> | |
7345 +Date: Tue, 5 Nov 2013 13:10:11 -0500 | |
7346 +Subject: [PATCH 52/52] Optional NEON support on ARM. | |
7347 + | |
7348 +This patch causes ARM to build both the NEON and generic versions of | |
7349 +ChaCha20 and Poly1305. The NEON code can be enabled at run-time by | |
7350 +calling CRYPTO_set_NEON_capable(1). | |
7351 +--- | |
7352 + .gitignore | 1 + | |
7353 + Configure | 2 +- | |
7354 + apps/speed.c | 5 + | |
7355 + crypto/chacha/chacha_enc.c | 18 + | |
7356 + crypto/chacha/chacha_vec.c | 7 + | |
7357 + crypto/chacha/chacha_vec_arm.s | 846 +++++++++++++++++++++++++++++++++++++++++ | |
7358 + crypto/cryptlib.c | 14 + | |
7359 + crypto/crypto.h | 8 + | |
7360 + crypto/poly1305/poly1305.c | 35 ++ | |
7361 + crypto/poly1305/poly1305_arm.c | 9 +- | |
7362 + 10 files changed, 941 insertions(+), 4 deletions(-) | |
7363 + create mode 100644 crypto/chacha/chacha_vec_arm.s | |
7364 + | |
7365 +diff --git a/Configure b/Configure | |
7366 +index 1b95384..18b7af0 100755 | |
7367 +--- a/Configure | |
7368 ++++ b/Configure | |
7369 +@@ -136,7 +136,7 @@ my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-
alpha.o:::::::ghash-a | |
7370 + my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::
::::::::"; | |
7371 + my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha
256-mips.o sha512-mips.o::::::::::"; | |
7372 + my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::a
es-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4
-s390x.o:::::::ghash-s390x.o:"; | |
7373 +-my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_c
bc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-
armv4.o::chacha_vec.o:poly1305_arm.o poly1305_arm_asm.o:void"; | |
7374 ++my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_c
bc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-
armv4.o::chacha_vec_arm.o chacha_enc.o:poly1305.o poly1305_arm.o poly1305_arm_as
m.o:void"; | |
7375 + my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-
parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash
-parisc.o::::32"; | |
7376 + my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o a
es-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::gh
ash-parisc.o::::64"; | |
7377 + my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.
o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::::"; | |
7378 +diff --git a/crypto/chacha/chacha_enc.c b/crypto/chacha/chacha_enc.c | |
7379 +index 54d1ca3..e4b648f 100644 | |
7380 +--- a/crypto/chacha/chacha_enc.c | |
7381 ++++ b/crypto/chacha/chacha_enc.c | |
7382 +@@ -61,6 +61,7 @@ | |
7383 + | |
7384 + #if !defined(OPENSSL_NO_CHACHA) | |
7385 + | |
7386 ++#include <openssl/crypto.h> | |
7387 + #include <openssl/chacha.h> | |
7388 + | |
7389 + /* sigma contains the ChaCha constants, which happen to be an ASCII string. */ | |
7390 +@@ -87,6 +88,15 @@ static const char sigma[16] = "expand 32-byte k"; | |
7391 + | |
7392 + typedef unsigned int uint32_t; | |
7393 + | |
7394 ++#if __arm__ | |
7395 ++/* Defined in chacha_vec.c */ | |
7396 ++void CRYPTO_chacha_20_neon(unsigned char *out, | |
7397 ++ const unsigned char *in, size_t in_len, | |
7398 ++ const unsigned char key[32], | |
7399 ++ const unsigned char nonce[8], | |
7400 ++ size_t counter); | |
7401 ++#endif | |
7402 ++ | |
7403 + /* chacha_core performs |num_rounds| rounds of ChaCha20 on the input words in | |
7404 + * |input| and writes the 64 output bytes to |output|. */ | |
7405 + static void chacha_core(unsigned char output[64], const uint32_t input[16], | |
7406 +@@ -124,6 +134,16 @@ void CRYPTO_chacha_20(unsigned char *out, | |
7407 + unsigned char buf[64]; | |
7408 + size_t todo, i; | |
7409 + | |
7410 ++#if __arm__ | |
7411 ++ if (CRYPTO_is_NEON_capable() && | |
7412 ++ ((intptr_t)in & 15) == 0 && | |
7413 ++ ((intptr_t)out & 15) == 0) | |
7414 ++ { | |
7415 ++ CRYPTO_chacha_20_neon(out, in, in_len, key, nonce, counter); | |
7416 ++ return; | |
7417 ++ } | |
7418 ++#endif | |
7419 ++ | |
7420 + input[0] = U8TO32_LITTLE(sigma + 0); | |
7421 + input[1] = U8TO32_LITTLE(sigma + 4); | |
7422 + input[2] = U8TO32_LITTLE(sigma + 8); | |
7423 +diff --git a/crypto/chacha/chacha_vec.c b/crypto/chacha/chacha_vec.c | |
7424 +index 33b2238..1226c39 100644 | |
7425 +--- a/crypto/chacha/chacha_vec.c | |
7426 ++++ b/crypto/chacha/chacha_vec.c | |
7427 +@@ -154,7 +154,14 @@ typedef unsigned vec __attribute__ ((vector_size (16))); | |
7428 + STORE(op + d + 8, LOAD(in + d + 8) ^ REVV_BE(v2)); \ | |
7429 + STORE(op + d +12, LOAD(in + d +12) ^ REVV_BE(v3)); | |
7430 + | |
7431 ++#if __ARM_NEON__ | |
7432 ++/* For ARM, we can't depend on NEON support, so this function is compiled with | |
7433 ++ * a different name, along with the generic code, and can be enabled at | |
7434 ++ * run-time. */ | |
7435 ++void CRYPTO_chacha_20_neon( | |
7436 ++#else | |
7437 + void CRYPTO_chacha_20( | |
7438 ++#endif | |
7439 + unsigned char *out, | |
7440 + const unsigned char *in, | |
7441 + size_t inlen, | |
7442 +diff --git a/crypto/chacha/chacha_vec_arm.S b/crypto/chacha/chacha_vec_arm.S | |
7443 +new file mode 100644 | |
7444 +index 0000000..24a5050 | |
7445 +--- /dev/null | |
7446 ++++ b/crypto/chacha/chacha_vec_arm.S | |
7447 +@@ -0,0 +1,863 @@ | |
7448 ++# This file contains a pre-compiled version of chacha_vec.c for ARM. This is | |
7449 ++# needed to support switching on NEON code at runtime. If the whole of OpenSSL | |
7450 ++# were to be compiled with the needed flags to build chacha_vec.c, then it | |
7451 ++# wouldn't be possible to run on non-NEON systems. | |
7452 ++# | |
7453 ++# This file was generated by: | |
7454 ++# | |
7455 ++# /opt/gcc-linaro-arm-linux-gnueabihf-4.7-2012.10-20121022_linux/bin/arm-l
inux-gnueabihf-gcc -O3 -mcpu=cortex-a8 -mfpu=neon -S chacha_vec.c -I ../../inclu
de -fpic -o chacha_vec_arm.S | |
7456 ++# | |
7457 ++# And then EABI attribute 28 was set to zero to allow linking with soft-float | |
7458 ++# code. | |
7459 ++ | |
7460 ++ .syntax unified | |
7461 ++ .cpu cortex-a8 | |
7462 ++ .eabi_attribute 27, 3 | |
7463 ++ .eabi_attribute 28, 0 | |
7464 ++ .fpu neon | |
7465 ++ .eabi_attribute 20, 1 | |
7466 ++ .eabi_attribute 21, 1 | |
7467 ++ .eabi_attribute 23, 3 | |
7468 ++ .eabi_attribute 24, 1 | |
7469 ++ .eabi_attribute 25, 1 | |
7470 ++ .eabi_attribute 26, 2 | |
7471 ++ .eabi_attribute 30, 2 | |
7472 ++ .eabi_attribute 34, 1 | |
7473 ++ .eabi_attribute 18, 4 | |
7474 ++ .thumb | |
7475 ++ .file "chacha_vec.c" | |
7476 ++ .text | |
7477 ++ .align 2 | |
7478 ++ .global CRYPTO_chacha_20_neon | |
7479 ++ .thumb | |
7480 ++ .thumb_func | |
7481 ++ .type CRYPTO_chacha_20_neon, %function | |
7482 ++CRYPTO_chacha_20_neon: | |
7483 ++ @ args = 8, pretend = 0, frame = 304 | |
7484 ++ @ frame_needed = 1, uses_anonymous_args = 0 | |
7485 ++ @ link register save eliminated. | |
7486 ++ push {r4, r5, r6, r7, r8, r9, sl, fp} | |
7487 ++ fstmfdd sp!, {d8, d9, d10, d11, d12, d13, d14, d15} | |
7488 ++ sub sp, sp, #304 | |
7489 ++ add r7, sp, #0 | |
7490 ++ movw ip, #43691 | |
7491 ++ movt ip, 43690 | |
7492 ++ str r2, [r7, #196] | |
7493 ++ sub sp, sp, #96 | |
7494 ++ ldr r4, [r7, #196] | |
7495 ++ ldr r6, [r7, #400] | |
7496 ++ ldr r2, .L38+16 | |
7497 ++ umull r4, ip, ip, r4 | |
7498 ++ ldr r6, [r6, #0] | |
7499 ++ ldr r8, [r7, #400] | |
7500 ++.LPIC24: | |
7501 ++ add r2, pc | |
7502 ++ add r4, sp, #15 | |
7503 ++ str r3, [r7, #244] | |
7504 ++ str r6, [r7, #176] | |
7505 ++ bic r4, r4, #15 | |
7506 ++ str r0, [r7, #188] | |
7507 ++ str r4, [r7, #200] | |
7508 ++ lsrs ip, ip, #7 | |
7509 ++ str r1, [r7, #184] | |
7510 ++ ldmia r2, {r0, r1, r2, r3} | |
7511 ++ ldr r4, [r8, #4] | |
7512 ++ ldr r5, [r7, #244] | |
7513 ++ vld1.64 {d24-d25}, [r5:64] | |
7514 ++ vldr d26, [r5, #16] | |
7515 ++ vldr d27, [r5, #24] | |
7516 ++ ldr r9, [r7, #200] | |
7517 ++ ldr r8, [r7, #404] | |
7518 ++ ldr r5, [r7, #176] | |
7519 ++ add r6, r9, #64 | |
7520 ++ str r4, [r7, #300] | |
7521 ++ mov r4, #0 | |
7522 ++ str r8, [r7, #288] | |
7523 ++ str r5, [r7, #296] | |
7524 ++ str r4, [r7, #292] | |
7525 ++ stmia r6, {r0, r1, r2, r3} | |
7526 ++ vldr d22, [r9, #64] | |
7527 ++ vldr d23, [r9, #72] | |
7528 ++ vldr d20, [r7, #288] | |
7529 ++ vldr d21, [r7, #296] | |
7530 ++ str ip, [r7, #192] | |
7531 ++ beq .L20 | |
7532 ++ lsl r6, ip, #1 | |
7533 ++ ldr r1, [r9, #68] | |
7534 ++ add r3, r6, ip | |
7535 ++ str r6, [r7, #180] | |
7536 ++ ldr r2, [r9, #72] | |
7537 ++ add r8, r8, #2 | |
7538 ++ ldr r5, [r9, #76] | |
7539 ++ vldr d18, .L38 | |
7540 ++ vldr d19, .L38+8 | |
7541 ++ str r4, [r7, #240] | |
7542 ++ ldr r6, [r7, #184] | |
7543 ++ ldr r4, [r7, #188] | |
7544 ++ str r0, [r7, #224] | |
7545 ++ str r1, [r7, #220] | |
7546 ++ str r8, [r7, #208] | |
7547 ++ str r2, [r7, #216] | |
7548 ++ str r3, [r7, #204] | |
7549 ++ str r5, [r7, #212] | |
7550 ++ str r6, [r7, #252] | |
7551 ++ str r4, [r7, #248] | |
7552 ++.L4: | |
7553 ++ ldr r2, [r7, #244] | |
7554 ++ add r9, r7, #216 | |
7555 ++ ldr r3, [r7, #244] | |
7556 ++ vadd.i32 q8, q10, q9 | |
7557 ++ ldr r6, [r7, #208] | |
7558 ++ vmov q15, q13 @ v4si | |
7559 ++ ldr r5, [r7, #240] | |
7560 ++ vmov q3, q12 @ v4si | |
7561 ++ ldr r4, [r7, #244] | |
7562 ++ vmov q2, q11 @ v4si | |
7563 ++ adds r5, r5, r6 | |
7564 ++ ldr r2, [r2, #8] | |
7565 ++ ldr r6, [r7, #400] | |
7566 ++ vmov q5, q10 @ v4si | |
7567 ++ ldr r3, [r3, #12] | |
7568 ++ vmov q1, q13 @ v4si | |
7569 ++ ldr r0, [r7, #244] | |
7570 ++ vmov q0, q12 @ v4si | |
7571 ++ ldr r1, [r7, #244] | |
7572 ++ vmov q4, q11 @ v4si | |
7573 ++ ldmia r9, {r9, sl, fp} | |
7574 ++ str r5, [r7, #228] | |
7575 ++ ldr r5, [r4, #24] | |
7576 ++ ldr r0, [r0, #0] | |
7577 ++ ldr r1, [r1, #4] | |
7578 ++ str r2, [r7, #264] | |
7579 ++ str r3, [r7, #236] | |
7580 ++ ldr r2, [r6, #4] | |
7581 ++ ldr r3, [r4, #28] | |
7582 ++ str r5, [r7, #280] | |
7583 ++ ldr r5, [r6, #0] | |
7584 ++ movs r6, #0 | |
7585 ++ ldr ip, [r7, #228] | |
7586 ++ ldr r8, [r7, #212] | |
7587 ++ str r0, [r7, #232] | |
7588 ++ str r1, [r7, #268] | |
7589 ++ ldr r0, [r4, #16] | |
7590 ++ ldr r1, [r4, #20] | |
7591 ++ movs r4, #10 | |
7592 ++ str r2, [r7, #24] | |
7593 ++ str r3, [r7, #284] | |
7594 ++ str r4, [r7, #256] | |
7595 ++ ldr r2, [r7, #264] | |
7596 ++ str r9, [r7, #276] | |
7597 ++ mov r9, r6 | |
7598 ++ ldr r6, [r7, #280] | |
7599 ++ str r8, [r7, #260] | |
7600 ++ mov r8, sl | |
7601 ++ str r1, [r7, #272] | |
7602 ++ mov sl, ip | |
7603 ++ str r6, [r7, #264] | |
7604 ++ mov r6, r5 | |
7605 ++ ldr r3, [r7, #236] | |
7606 ++ mov r5, r0 | |
7607 ++ ldr ip, [r7, #24] | |
7608 ++ ldr r1, [r7, #268] | |
7609 ++ ldr r0, [r7, #232] | |
7610 ++ b .L39 | |
7611 ++.L40: | |
7612 ++ .align 3 | |
7613 ++.L38: | |
7614 ++ .word 1 | |
7615 ++ .word 0 | |
7616 ++ .word 0 | |
7617 ++ .word 0 | |
7618 ++ .word .LANCHOR0-(.LPIC24+4) | |
7619 ++.L39: | |
7620 ++.L3: | |
7621 ++ vadd.i32 q4, q4, q0 | |
7622 ++ add r8, r8, r1 | |
7623 ++ vadd.i32 q2, q2, q3 | |
7624 ++ str r8, [r7, #268] | |
7625 ++ veor q5, q5, q4 | |
7626 ++ ldr r8, [r7, #276] | |
7627 ++ veor q8, q8, q2 | |
7628 ++ add fp, fp, r0 | |
7629 ++ str fp, [r7, #280] | |
7630 ++ add r8, r8, r2 | |
7631 ++ vrev32.16 q5, q5 | |
7632 ++ str r8, [r7, #276] | |
7633 ++ vrev32.16 q8, q8 | |
7634 ++ vadd.i32 q1, q1, q5 | |
7635 ++ vadd.i32 q15, q15, q8 | |
7636 ++ ldr r8, [r7, #280] | |
7637 ++ veor q0, q1, q0 | |
7638 ++ ldr r4, [r7, #260] | |
7639 ++ veor q3, q15, q3 | |
7640 ++ eor sl, sl, r8 | |
7641 ++ ldr r8, [r7, #276] | |
7642 ++ add fp, r4, r3 | |
7643 ++ vshl.i32 q7, q0, #12 | |
7644 ++ ldr r4, [r7, #268] | |
7645 ++ vshl.i32 q6, q3, #12 | |
7646 ++ eor r6, r6, r8 | |
7647 ++ eor r9, r9, r4 | |
7648 ++ ldr r4, [r7, #272] | |
7649 ++ vsri.32 q7, q0, #20 | |
7650 ++ ror r8, r6, #16 | |
7651 ++ ldr r6, [r7, #264] | |
7652 ++ eor ip, ip, fp | |
7653 ++ vsri.32 q6, q3, #20 | |
7654 ++ ror sl, sl, #16 | |
7655 ++ ror r9, r9, #16 | |
7656 ++ add r5, r5, sl | |
7657 ++ vadd.i32 q4, q4, q7 | |
7658 ++ str r5, [r7, #236] | |
7659 ++ vadd.i32 q2, q2, q6 | |
7660 ++ add r5, r4, r9 | |
7661 ++ add r4, r6, r8 | |
7662 ++ ldr r6, [r7, #284] | |
7663 ++ ror ip, ip, #16 | |
7664 ++ veor q5, q4, q5 | |
7665 ++ veor q8, q2, q8 | |
7666 ++ add r6, r6, ip | |
7667 ++ str r6, [r7, #264] | |
7668 ++ eors r1, r1, r5 | |
7669 ++ ldr r6, [r7, #236] | |
7670 ++ vshl.i32 q3, q5, #8 | |
7671 ++ vshl.i32 q14, q8, #8 | |
7672 ++ eors r2, r2, r4 | |
7673 ++ eors r0, r0, r6 | |
7674 ++ ldr r6, [r7, #264] | |
7675 ++ vsri.32 q3, q5, #24 | |
7676 ++ ror r1, r1, #20 | |
7677 ++ eors r3, r3, r6 | |
7678 ++ ldr r6, [r7, #280] | |
7679 ++ ror r0, r0, #20 | |
7680 ++ vsri.32 q14, q8, #24 | |
7681 ++ adds r6, r0, r6 | |
7682 ++ str r6, [r7, #284] | |
7683 ++ ldr r6, [r7, #268] | |
7684 ++ vadd.i32 q1, q1, q3 | |
7685 ++ vadd.i32 q15, q15, q14 | |
7686 ++ ror r2, r2, #20 | |
7687 ++ adds r6, r1, r6 | |
7688 ++ str r6, [r7, #260] | |
7689 ++ ldr r6, [r7, #276] | |
7690 ++ veor q6, q15, q6 | |
7691 ++ veor q7, q1, q7 | |
7692 ++ ror r3, r3, #20 | |
7693 ++ adds r6, r2, r6 | |
7694 ++ str r6, [r7, #280] | |
7695 ++ ldr r6, [r7, #284] | |
7696 ++ vshl.i32 q0, q6, #7 | |
7697 ++ vshl.i32 q5, q7, #7 | |
7698 ++ add fp, r3, fp | |
7699 ++ eor sl, r6, sl | |
7700 ++ ldr r6, [r7, #260] | |
7701 ++ eor ip, fp, ip | |
7702 ++ vsri.32 q0, q6, #25 | |
7703 ++ eor r9, r6, r9 | |
7704 ++ ldr r6, [r7, #280] | |
7705 ++ ror sl, sl, #24 | |
7706 ++ vsri.32 q5, q7, #25 | |
7707 ++ eor r8, r6, r8 | |
7708 ++ ldr r6, [r7, #236] | |
7709 ++ ror r9, r9, #24 | |
7710 ++ ror ip, ip, #24 | |
7711 ++ add r6, sl, r6 | |
7712 ++ str r6, [r7, #276] | |
7713 ++ ldr r6, [r7, #264] | |
7714 ++ add r5, r9, r5 | |
7715 ++ str r5, [r7, #272] | |
7716 ++ vext.32 q5, q5, q5, #1 | |
7717 ++ add r5, ip, r6 | |
7718 ++ ldr r6, [r7, #276] | |
7719 ++ vext.32 q0, q0, q0, #1 | |
7720 ++ vadd.i32 q4, q4, q5 | |
7721 ++ eors r0, r0, r6 | |
7722 ++ ldr r6, [r7, #272] | |
7723 ++ vadd.i32 q2, q2, q0 | |
7724 ++ vext.32 q3, q3, q3, #3 | |
7725 ++ ror r8, r8, #24 | |
7726 ++ eors r1, r1, r6 | |
7727 ++ vext.32 q14, q14, q14, #3 | |
7728 ++ add r4, r8, r4 | |
7729 ++ ldr r6, [r7, #284] | |
7730 ++ veor q3, q4, q3 | |
7731 ++ veor q14, q2, q14 | |
7732 ++ eors r2, r2, r4 | |
7733 ++ ror r1, r1, #25 | |
7734 ++ vext.32 q1, q1, q1, #2 | |
7735 ++ adds r6, r1, r6 | |
7736 ++ str r6, [r7, #284] | |
7737 ++ vext.32 q15, q15, q15, #2 | |
7738 ++ ldr r6, [r7, #260] | |
7739 ++ eors r3, r3, r5 | |
7740 ++ ror r2, r2, #25 | |
7741 ++ vrev32.16 q8, q14 | |
7742 ++ adds r6, r2, r6 | |
7743 ++ vrev32.16 q3, q3 | |
7744 ++ str r6, [r7, #268] | |
7745 ++ vadd.i32 q1, q1, q3 | |
7746 ++ ldr r6, [r7, #280] | |
7747 ++ vadd.i32 q15, q15, q8 | |
7748 ++ ror r3, r3, #25 | |
7749 ++ veor q5, q1, q5 | |
7750 ++ adds r6, r3, r6 | |
7751 ++ veor q0, q15, q0 | |
7752 ++ str r6, [r7, #264] | |
7753 ++ ldr r6, [r7, #268] | |
7754 ++ ror r0, r0, #25 | |
7755 ++ add fp, r0, fp | |
7756 ++ vshl.i32 q6, q5, #12 | |
7757 ++ eor sl, r6, sl | |
7758 ++ ldr r6, [r7, #284] | |
7759 ++ vshl.i32 q14, q0, #12 | |
7760 ++ eor r8, fp, r8 | |
7761 ++ eor ip, r6, ip | |
7762 ++ ldr r6, [r7, #264] | |
7763 ++ vsri.32 q6, q5, #20 | |
7764 ++ ror sl, sl, #16 | |
7765 ++ eor r9, r6, r9 | |
7766 ++ ror r6, r8, #16 | |
7767 ++ vsri.32 q14, q0, #20 | |
7768 ++ ldr r8, [r7, #272] | |
7769 ++ ror ip, ip, #16 | |
7770 ++ add r5, sl, r5 | |
7771 ++ add r8, r6, r8 | |
7772 ++ add r4, ip, r4 | |
7773 ++ str r4, [r7, #236] | |
7774 ++ eor r0, r8, r0 | |
7775 ++ str r5, [r7, #280] | |
7776 ++ vadd.i32 q4, q4, q6 | |
7777 ++ ldr r5, [r7, #236] | |
7778 ++ vadd.i32 q2, q2, q14 | |
7779 ++ ldr r4, [r7, #276] | |
7780 ++ ror r0, r0, #20 | |
7781 ++ veor q3, q4, q3 | |
7782 ++ eors r1, r1, r5 | |
7783 ++ veor q0, q2, q8 | |
7784 ++ str r8, [r7, #272] | |
7785 ++ str r0, [r7, #24] | |
7786 ++ add fp, r0, fp | |
7787 ++ ldr r8, [r7, #280] | |
7788 ++ ror r9, r9, #16 | |
7789 ++ ldr r0, [r7, #284] | |
7790 ++ add r4, r9, r4 | |
7791 ++ str fp, [r7, #260] | |
7792 ++ ror r1, r1, #20 | |
7793 ++ add fp, r1, r0 | |
7794 ++ eor r2, r8, r2 | |
7795 ++ ldr r0, [r7, #260] | |
7796 ++ eors r3, r3, r4 | |
7797 ++ vshl.i32 q5, q3, #8 | |
7798 ++ str r4, [r7, #232] | |
7799 ++ vshl.i32 q8, q0, #8 | |
7800 ++ ldr r4, [r7, #268] | |
7801 ++ ldr r5, [r7, #264] | |
7802 ++ ror r2, r2, #20 | |
7803 ++ ror r3, r3, #20 | |
7804 ++ eors r6, r6, r0 | |
7805 ++ adds r5, r3, r5 | |
7806 ++ add r8, r2, r4 | |
7807 ++ vsri.32 q5, q3, #24 | |
7808 ++ ldr r4, [r7, #272] | |
7809 ++ eor r9, r5, r9 | |
7810 ++ eor ip, fp, ip | |
7811 ++ vsri.32 q8, q0, #24 | |
7812 ++ eor sl, r8, sl | |
7813 ++ ror r6, r6, #24 | |
7814 ++ ldr r0, [r7, #280] | |
7815 ++ str r5, [r7, #276] | |
7816 ++ adds r4, r6, r4 | |
7817 ++ ldr r5, [r7, #236] | |
7818 ++ vadd.i32 q1, q1, q5 | |
7819 ++ str r4, [r7, #272] | |
7820 ++ vadd.i32 q15, q15, q8 | |
7821 ++ ldr r4, [r7, #232] | |
7822 ++ ror ip, ip, #24 | |
7823 ++ ror sl, sl, #24 | |
7824 ++ ror r9, r9, #24 | |
7825 ++ add r5, ip, r5 | |
7826 ++ add r0, sl, r0 | |
7827 ++ str r5, [r7, #264] | |
7828 ++ add r5, r9, r4 | |
7829 ++ str r0, [r7, #284] | |
7830 ++ veor q6, q1, q6 | |
7831 ++ ldr r4, [r7, #24] | |
7832 ++ veor q14, q15, q14 | |
7833 ++ ldr r0, [r7, #272] | |
7834 ++ eors r3, r3, r5 | |
7835 ++ vshl.i32 q0, q6, #7 | |
7836 ++ vext.32 q1, q1, q1, #2 | |
7837 ++ eors r0, r0, r4 | |
7838 ++ ldr r4, [r7, #284] | |
7839 ++ str r0, [r7, #280] | |
7840 ++ vshl.i32 q3, q14, #7 | |
7841 ++ eors r2, r2, r4 | |
7842 ++ ldr r4, [r7, #280] | |
7843 ++ ldr r0, [r7, #264] | |
7844 ++ vsri.32 q0, q6, #25 | |
7845 ++ ror r2, r2, #25 | |
7846 ++ ror r3, r3, #25 | |
7847 ++ eors r1, r1, r0 | |
7848 ++ vsri.32 q3, q14, #25 | |
7849 ++ ror r0, r4, #25 | |
7850 ++ ldr r4, [r7, #256] | |
7851 ++ ror r1, r1, #25 | |
7852 ++ vext.32 q5, q5, q5, #1 | |
7853 ++ subs r4, r4, #1 | |
7854 ++ str r4, [r7, #256] | |
7855 ++ vext.32 q15, q15, q15, #2 | |
7856 ++ vext.32 q8, q8, q8, #1 | |
7857 ++ vext.32 q0, q0, q0, #3 | |
7858 ++ vext.32 q3, q3, q3, #3 | |
7859 ++ bne .L3 | |
7860 ++ ldr r4, [r7, #264] | |
7861 ++ vadd.i32 q14, q10, q9 | |
7862 ++ str r2, [r7, #264] | |
7863 ++ vadd.i32 q10, q10, q5 | |
7864 ++ ldr r2, [r7, #252] | |
7865 ++ vld1.64 {d12-d13}, [r2:64] | |
7866 ++ ldr r2, [r7, #220] | |
7867 ++ vadd.i32 q4, q11, q4 | |
7868 ++ str ip, [r7, #24] | |
7869 ++ mov ip, sl | |
7870 ++ mov sl, r8 | |
7871 ++ ldr r8, [r7, #260] | |
7872 ++ add sl, sl, r2 | |
7873 ++ ldr r2, [r7, #212] | |
7874 ++ str r4, [r7, #280] | |
7875 ++ vadd.i32 q0, q12, q0 | |
7876 ++ ldr r4, [r7, #224] | |
7877 ++ add r8, r8, r2 | |
7878 ++ ldr r2, [r7, #240] | |
7879 ++ vadd.i32 q1, q13, q1 | |
7880 ++ str r0, [r7, #232] | |
7881 ++ add fp, fp, r4 | |
7882 ++ mov r0, r5 | |
7883 ++ ldr r4, [r7, #216] | |
7884 ++ mov r5, r6 | |
7885 ++ mov r6, r9 | |
7886 ++ ldr r9, [r7, #276] | |
7887 ++ adds r2, r2, #3 | |
7888 ++ str r2, [r7, #240] | |
7889 ++ vadd.i32 q2, q11, q2 | |
7890 ++ ldr r2, [r7, #252] | |
7891 ++ add r9, r9, r4 | |
7892 ++ vadd.i32 q3, q12, q3 | |
7893 ++ ldr r4, [r7, #228] | |
7894 ++ vadd.i32 q15, q13, q15 | |
7895 ++ str r1, [r7, #268] | |
7896 ++ vadd.i32 q8, q14, q8 | |
7897 ++ str r3, [r7, #236] | |
7898 ++ veor q4, q4, q6 | |
7899 ++ ldr r3, [r7, #284] | |
7900 ++ ldr r1, [r7, #272] | |
7901 ++ add ip, r4, ip | |
7902 ++ ldr r4, [r7, #248] | |
7903 ++ vst1.64 {d8-d9}, [r4:64] | |
7904 ++ vldr d8, [r2, #16] | |
7905 ++ vldr d9, [r2, #24] | |
7906 ++ veor q0, q0, q4 | |
7907 ++ vstr d0, [r4, #16] | |
7908 ++ vstr d1, [r4, #24] | |
7909 ++ vldr d0, [r2, #32] | |
7910 ++ vldr d1, [r2, #40] | |
7911 ++ veor q1, q1, q0 | |
7912 ++ vstr d2, [r4, #32] | |
7913 ++ vstr d3, [r4, #40] | |
7914 ++ vldr d2, [r2, #48] | |
7915 ++ vldr d3, [r2, #56] | |
7916 ++ veor q10, q10, q1 | |
7917 ++ vstr d20, [r4, #48] | |
7918 ++ vstr d21, [r4, #56] | |
7919 ++ vldr d8, [r2, #64] | |
7920 ++ vldr d9, [r2, #72] | |
7921 ++ veor q2, q2, q4 | |
7922 ++ vstr d4, [r4, #64] | |
7923 ++ vstr d5, [r4, #72] | |
7924 ++ vldr d10, [r2, #80] | |
7925 ++ vldr d11, [r2, #88] | |
7926 ++ veor q3, q3, q5 | |
7927 ++ vstr d6, [r4, #80] | |
7928 ++ vstr d7, [r4, #88] | |
7929 ++ vldr d12, [r2, #96] | |
7930 ++ vldr d13, [r2, #104] | |
7931 ++ veor q15, q15, q6 | |
7932 ++ vstr d30, [r4, #96] | |
7933 ++ vstr d31, [r4, #104] | |
7934 ++ vldr d20, [r2, #112] | |
7935 ++ vldr d21, [r2, #120] | |
7936 ++ veor q8, q8, q10 | |
7937 ++ vstr d16, [r4, #112] | |
7938 ++ vstr d17, [r4, #120] | |
7939 ++ ldr r4, [r2, #128] | |
7940 ++ ldr r2, [r7, #248] | |
7941 ++ vadd.i32 q10, q14, q9 | |
7942 ++ eor r4, fp, r4 | |
7943 ++ vadd.i32 q10, q10, q9 | |
7944 ++ str r4, [r2, #128] | |
7945 ++ ldr r4, [r7, #252] | |
7946 ++ ldr r2, [r4, #132] | |
7947 ++ eor r2, sl, r2 | |
7948 ++ ldr sl, [r7, #248] | |
7949 ++ str r2, [sl, #132] | |
7950 ++ ldr r2, [r4, #136] | |
7951 ++ eor r2, r9, r2 | |
7952 ++ str r2, [sl, #136] | |
7953 ++ ldr r2, [r4, #140] | |
7954 ++ eor r2, r8, r2 | |
7955 ++ str r2, [sl, #140] | |
7956 ++ ldr r2, [r7, #244] | |
7957 ++ ldr r4, [r4, #144] | |
7958 ++ ldr r2, [r2, #0] | |
7959 ++ str r4, [r7, #44] | |
7960 ++ ldr r4, [r7, #232] | |
7961 ++ add r8, r4, r2 | |
7962 ++ ldr r2, [r7, #44] | |
7963 ++ ldr r4, [r7, #244] | |
7964 ++ eor r8, r8, r2 | |
7965 ++ ldr r2, [r7, #252] | |
7966 ++ str r8, [sl, #144] | |
7967 ++ ldr r4, [r4, #4] | |
7968 ++ ldr r2, [r2, #148] | |
7969 ++ str r2, [r7, #40] | |
7970 ++ ldr r2, [r7, #268] | |
7971 ++ add r8, r2, r4 | |
7972 ++ ldr r4, [r7, #40] | |
7973 ++ ldr r2, [r7, #244] | |
7974 ++ eor r8, r8, r4 | |
7975 ++ ldr r4, [r7, #252] | |
7976 ++ str r8, [sl, #148] | |
7977 ++ ldr r2, [r2, #8] | |
7978 ++ ldr r4, [r4, #152] | |
7979 ++ str r4, [r7, #36] | |
7980 ++ ldr r4, [r7, #264] | |
7981 ++ add r8, r4, r2 | |
7982 ++ ldr r2, [r7, #36] | |
7983 ++ eor r8, r8, r2 | |
7984 ++ str r8, [sl, #152] | |
7985 ++ ldr r2, [r7, #252] | |
7986 ++ ldr r4, [r7, #244] | |
7987 ++ ldr r2, [r2, #156] | |
7988 ++ ldr r4, [r4, #12] | |
7989 ++ str r2, [r7, #32] | |
7990 ++ ldr r2, [r7, #236] | |
7991 ++ add r8, r2, r4 | |
7992 ++ ldr r4, [r7, #32] | |
7993 ++ ldr r2, [r7, #252] | |
7994 ++ eor r8, r8, r4 | |
7995 ++ str r8, [sl, #156] | |
7996 ++ ldr r8, [r7, #244] | |
7997 ++ ldr r2, [r2, #160] | |
7998 ++ ldr r4, [r8, #16] | |
7999 ++ adds r0, r0, r4 | |
8000 ++ ldr r4, [r7, #252] | |
8001 ++ eors r0, r0, r2 | |
8002 ++ str r0, [sl, #160] | |
8003 ++ ldr r0, [r8, #20] | |
8004 ++ ldr r2, [r4, #164] | |
8005 ++ adds r1, r1, r0 | |
8006 ++ ldr r0, [r7, #280] | |
8007 ++ eors r1, r1, r2 | |
8008 ++ str r1, [sl, #164] | |
8009 ++ ldr r2, [r8, #24] | |
8010 ++ ldr r1, [r4, #168] | |
8011 ++ adds r2, r0, r2 | |
8012 ++ eors r2, r2, r1 | |
8013 ++ str r2, [sl, #168] | |
8014 ++ ldr r1, [r8, #28] | |
8015 ++ ldr r2, [r4, #172] | |
8016 ++ adds r3, r3, r1 | |
8017 ++ eors r3, r3, r2 | |
8018 ++ str r3, [sl, #172] | |
8019 ++ ldr r3, [r4, #176] | |
8020 ++ eor r3, ip, r3 | |
8021 ++ str r3, [sl, #176] | |
8022 ++ ldr r3, [r4, #180] | |
8023 ++ ldr r4, [r7, #400] | |
8024 ++ eors r6, r6, r3 | |
8025 ++ str r6, [sl, #180] | |
8026 ++ ldr r6, [r7, #252] | |
8027 ++ ldr r2, [r4, #0] | |
8028 ++ ldr r3, [r6, #184] | |
8029 ++ adds r5, r5, r2 | |
8030 ++ eors r5, r5, r3 | |
8031 ++ str r5, [sl, #184] | |
8032 ++ ldr r2, [r6, #188] | |
8033 ++ adds r6, r6, #192 | |
8034 ++ ldr r3, [r4, #4] | |
8035 ++ str r6, [r7, #252] | |
8036 ++ ldr r0, [r7, #24] | |
8037 ++ ldr r1, [r7, #240] | |
8038 ++ adds r4, r0, r3 | |
8039 ++ eors r4, r4, r2 | |
8040 ++ ldr r2, [r7, #204] | |
8041 ++ str r4, [sl, #188] | |
8042 ++ add sl, sl, #192 | |
8043 ++ cmp r1, r2 | |
8044 ++ str sl, [r7, #248] | |
8045 ++ bne .L4 | |
8046 ++ ldr r4, [r7, #192] | |
8047 ++ ldr r3, [r7, #180] | |
8048 ++ ldr r6, [r7, #188] | |
8049 ++ adds r5, r3, r4 | |
8050 ++ ldr r8, [r7, #184] | |
8051 ++ lsls r5, r5, #6 | |
8052 ++ adds r4, r6, r5 | |
8053 ++ add r5, r8, r5 | |
8054 ++.L2: | |
8055 ++ ldr r9, [r7, #196] | |
8056 ++ movw r3, #43691 | |
8057 ++ movt r3, 43690 | |
8058 ++ ldr sl, [r7, #196] | |
8059 ++ umull r9, r3, r3, r9 | |
8060 ++ lsrs r3, r3, #7 | |
8061 ++ add r3, r3, r3, lsl #1 | |
8062 ++ sub r3, sl, r3, lsl #6 | |
8063 ++ lsrs r6, r3, #6 | |
8064 ++ beq .L5 | |
8065 ++ add r1, r5, #16 | |
8066 ++ add r2, r4, #16 | |
8067 ++ mov r0, r6 | |
8068 ++ vldr d30, .L41 | |
8069 ++ vldr d31, .L41+8 | |
8070 ++.L6: | |
8071 ++ vmov q8, q10 @ v4si | |
8072 ++ movs r3, #10 | |
8073 ++ vmov q1, q13 @ v4si | |
8074 ++ vmov q14, q12 @ v4si | |
8075 ++ vmov q3, q11 @ v4si | |
8076 ++.L7: | |
8077 ++ vadd.i32 q3, q3, q14 | |
8078 ++ subs r3, r3, #1 | |
8079 ++ veor q2, q8, q3 | |
8080 ++ vrev32.16 q2, q2 | |
8081 ++ vadd.i32 q8, q1, q2 | |
8082 ++ veor q9, q8, q14 | |
8083 ++ vshl.i32 q14, q9, #12 | |
8084 ++ vsri.32 q14, q9, #20 | |
8085 ++ vadd.i32 q3, q3, q14 | |
8086 ++ veor q2, q3, q2 | |
8087 ++ vshl.i32 q9, q2, #8 | |
8088 ++ vsri.32 q9, q2, #24 | |
8089 ++ vadd.i32 q8, q8, q9 | |
8090 ++ vext.32 q9, q9, q9, #3 | |
8091 ++ veor q14, q8, q14 | |
8092 ++ vext.32 q1, q8, q8, #2 | |
8093 ++ vshl.i32 q8, q14, #7 | |
8094 ++ vsri.32 q8, q14, #25 | |
8095 ++ vext.32 q8, q8, q8, #1 | |
8096 ++ vadd.i32 q3, q3, q8 | |
8097 ++ veor q2, q3, q9 | |
8098 ++ vrev32.16 q2, q2 | |
8099 ++ vadd.i32 q9, q1, q2 | |
8100 ++ veor q8, q9, q8 | |
8101 ++ vshl.i32 q14, q8, #12 | |
8102 ++ vsri.32 q14, q8, #20 | |
8103 ++ vadd.i32 q3, q3, q14 | |
8104 ++ veor q2, q3, q2 | |
8105 ++ vshl.i32 q8, q2, #8 | |
8106 ++ vsri.32 q8, q2, #24 | |
8107 ++ vadd.i32 q9, q9, q8 | |
8108 ++ vext.32 q8, q8, q8, #1 | |
8109 ++ veor q14, q9, q14 | |
8110 ++ vext.32 q1, q9, q9, #2 | |
8111 ++ vshl.i32 q9, q14, #7 | |
8112 ++ vsri.32 q9, q14, #25 | |
8113 ++ vext.32 q14, q9, q9, #3 | |
8114 ++ bne .L7 | |
8115 ++ vadd.i32 q8, q10, q8 | |
8116 ++ subs r0, r0, #1 | |
8117 ++ vadd.i32 q3, q11, q3 | |
8118 ++ vldr d0, [r1, #-16] | |
8119 ++ vldr d1, [r1, #-8] | |
8120 ++ vadd.i32 q14, q12, q14 | |
8121 ++ vadd.i32 q1, q13, q1 | |
8122 ++ veor q3, q3, q0 | |
8123 ++ vstr d6, [r2, #-16] | |
8124 ++ vstr d7, [r2, #-8] | |
8125 ++ vadd.i32 q10, q10, q15 | |
8126 ++ vld1.64 {d8-d9}, [r1:64] | |
8127 ++ veor q14, q14, q4 | |
8128 ++ vst1.64 {d28-d29}, [r2:64] | |
8129 ++ vldr d10, [r1, #16] | |
8130 ++ vldr d11, [r1, #24] | |
8131 ++ veor q1, q1, q5 | |
8132 ++ vstr d2, [r2, #16] | |
8133 ++ vstr d3, [r2, #24] | |
8134 ++ vldr d18, [r1, #32] | |
8135 ++ vldr d19, [r1, #40] | |
8136 ++ add r1, r1, #64 | |
8137 ++ veor q8, q8, q9 | |
8138 ++ vstr d16, [r2, #32] | |
8139 ++ vstr d17, [r2, #40] | |
8140 ++ add r2, r2, #64 | |
8141 ++ bne .L6 | |
8142 ++ lsls r6, r6, #6 | |
8143 ++ adds r4, r4, r6 | |
8144 ++ adds r5, r5, r6 | |
8145 ++.L5: | |
8146 ++ ldr r6, [r7, #196] | |
8147 ++ ands ip, r6, #63 | |
8148 ++ beq .L1 | |
8149 ++ vmov q8, q10 @ v4si | |
8150 ++ movs r3, #10 | |
8151 ++ vmov q14, q13 @ v4si | |
8152 ++ vmov q9, q12 @ v4si | |
8153 ++ vmov q15, q11 @ v4si | |
8154 ++.L10: | |
8155 ++ vadd.i32 q15, q15, q9 | |
8156 ++ subs r3, r3, #1 | |
8157 ++ veor q8, q8, q15 | |
8158 ++ vrev32.16 q8, q8 | |
8159 ++ vadd.i32 q3, q14, q8 | |
8160 ++ veor q9, q3, q9 | |
8161 ++ vshl.i32 q14, q9, #12 | |
8162 ++ vsri.32 q14, q9, #20 | |
8163 ++ vadd.i32 q15, q15, q14 | |
8164 ++ veor q9, q15, q8 | |
8165 ++ vshl.i32 q8, q9, #8 | |
8166 ++ vsri.32 q8, q9, #24 | |
8167 ++ vadd.i32 q9, q3, q8 | |
8168 ++ vext.32 q8, q8, q8, #3 | |
8169 ++ veor q2, q9, q14 | |
8170 ++ vext.32 q14, q9, q9, #2 | |
8171 ++ vshl.i32 q9, q2, #7 | |
8172 ++ vsri.32 q9, q2, #25 | |
8173 ++ vext.32 q9, q9, q9, #1 | |
8174 ++ vadd.i32 q15, q15, q9 | |
8175 ++ veor q3, q15, q8 | |
8176 ++ vrev32.16 q3, q3 | |
8177 ++ vadd.i32 q14, q14, q3 | |
8178 ++ veor q8, q14, q9 | |
8179 ++ vshl.i32 q9, q8, #12 | |
8180 ++ vsri.32 q9, q8, #20 | |
8181 ++ vadd.i32 q15, q15, q9 | |
8182 ++ veor q3, q15, q3 | |
8183 ++ vshl.i32 q8, q3, #8 | |
8184 ++ vsri.32 q8, q3, #24 | |
8185 ++ vadd.i32 q14, q14, q8 | |
8186 ++ vext.32 q8, q8, q8, #1 | |
8187 ++ veor q3, q14, q9 | |
8188 ++ vext.32 q14, q14, q14, #2 | |
8189 ++ vshl.i32 q9, q3, #7 | |
8190 ++ vsri.32 q9, q3, #25 | |
8191 ++ vext.32 q9, q9, q9, #3 | |
8192 ++ bne .L10 | |
8193 ++ cmp ip, #15 | |
8194 ++ vadd.i32 q11, q11, q15 | |
8195 ++ bhi .L37 | |
8196 ++ ldr r9, [r7, #200] | |
8197 ++ vst1.64 {d22-d23}, [r9:128] | |
8198 ++.L14: | |
8199 ++ ldr sl, [r7, #196] | |
8200 ++ and r3, sl, #48 | |
8201 ++ cmp ip, r3 | |
8202 ++ bls .L1 | |
8203 ++ adds r0, r5, r3 | |
8204 ++ adds r1, r4, r3 | |
8205 ++ add r2, r0, #16 | |
8206 ++ add r6, r1, #16 | |
8207 ++ cmp r1, r2 | |
8208 ++ it cc | |
8209 ++ cmpcc r0, r6 | |
8210 ++ rsb r9, r3, ip | |
8211 ++ ite cc | |
8212 ++ movcc r2, #0 | |
8213 ++ movcs r2, #1 | |
8214 ++ cmp r9, #15 | |
8215 ++ ite ls | |
8216 ++ movls r2, #0 | |
8217 ++ andhi r2, r2, #1 | |
8218 ++ lsr r8, r9, #4 | |
8219 ++ eor r2, r2, #1 | |
8220 ++ cmp r8, #0 | |
8221 ++ it eq | |
8222 ++ orreq r2, r2, #1 | |
8223 ++ lsl sl, r8, #4 | |
8224 ++ cbnz r2, .L35 | |
8225 ++ ldr fp, [r7, #200] | |
8226 ++ add r6, fp, r3 | |
8227 ++.L17: | |
8228 ++ vld1.8 {q8}, [r0]! | |
8229 ++ adds r2, r2, #1 | |
8230 ++ cmp r8, r2 | |
8231 ++ vld1.8 {q9}, [r6]! | |
8232 ++ veor q8, q9, q8 | |
8233 ++ vst1.8 {q8}, [r1]! | |
8234 ++ bhi .L17 | |
8235 ++ cmp r9, sl | |
8236 ++ add r3, r3, sl | |
8237 ++ beq .L1 | |
8238 ++.L35: | |
8239 ++ ldr r0, [r7, #200] | |
8240 ++.L25: | |
8241 ++ ldrb r2, [r5, r3] @ zero_extendqisi2 | |
8242 ++ ldrb r1, [r3, r0] @ zero_extendqisi2 | |
8243 ++ eors r2, r2, r1 | |
8244 ++ strb r2, [r4, r3] | |
8245 ++ adds r3, r3, #1 | |
8246 ++ cmp ip, r3 | |
8247 ++ bhi .L25 | |
8248 ++.L1: | |
8249 ++ add r7, r7, #304 | |
8250 ++ mov sp, r7 | |
8251 ++ fldmfdd sp!, {d8, d9, d10, d11, d12, d13, d14, d15} | |
8252 ++ pop {r4, r5, r6, r7, r8, r9, sl, fp} | |
8253 ++ bx lr | |
8254 ++.L37: | |
8255 ++ cmp ip, #31 | |
8256 ++ vld1.64 {d0-d1}, [r5:64] | |
8257 ++ vadd.i32 q9, q12, q9 | |
8258 ++ veor q11, q11, q0 | |
8259 ++ vst1.64 {d22-d23}, [r4:64] | |
8260 ++ bls .L12 | |
8261 ++ cmp ip, #47 | |
8262 ++ vldr d2, [r5, #16] | |
8263 ++ vldr d3, [r5, #24] | |
8264 ++ vadd.i32 q13, q13, q14 | |
8265 ++ veor q9, q9, q1 | |
8266 ++ vstr d18, [r4, #16] | |
8267 ++ vstr d19, [r4, #24] | |
8268 ++ bls .L13 | |
8269 ++ vadd.i32 q8, q8, q10 | |
8270 ++ vldr d0, [r5, #32] | |
8271 ++ vldr d1, [r5, #40] | |
8272 ++ ldr r6, [r7, #200] | |
8273 ++ vstr d16, [r6, #48] | |
8274 ++ vstr d17, [r6, #56] | |
8275 ++ veor q8, q13, q0 | |
8276 ++ vstr d16, [r4, #32] | |
8277 ++ vstr d17, [r4, #40] | |
8278 ++ b .L14 | |
8279 ++.L12: | |
8280 ++ ldr r8, [r7, #200] | |
8281 ++ vstr d18, [r8, #16] | |
8282 ++ vstr d19, [r8, #24] | |
8283 ++ b .L14 | |
8284 ++.L20: | |
8285 ++ ldr r5, [r7, #184] | |
8286 ++ ldr r4, [r7, #188] | |
8287 ++ b .L2 | |
8288 ++.L13: | |
8289 ++ ldr r6, [r7, #200] | |
8290 ++ vstr d26, [r6, #32] | |
8291 ++ vstr d27, [r6, #40] | |
8292 ++ b .L14 | |
8293 ++.L42: | |
8294 ++ .align 3 | |
8295 ++.L41: | |
8296 ++ .word 1 | |
8297 ++ .word 0 | |
8298 ++ .word 0 | |
8299 ++ .word 0 | |
8300 ++ .size CRYPTO_chacha_20_neon, .-CRYPTO_chacha_20_neon | |
8301 ++ .section .rodata | |
8302 ++ .align 3 | |
8303 ++.LANCHOR0 = . + 0 | |
8304 ++.LC0: | |
8305 ++ .word 1634760805 | |
8306 ++ .word 857760878 | |
8307 ++ .word 2036477234 | |
8308 ++ .word 1797285236 | |
8309 ++ .ident "GCC: (crosstool-NG linaro-1.13.1-4.7-2012.10-20121022 - Linaro
GCC 2012.10) 4.7.3 20121001 (prerelease)" | |
8310 ++ .section .note.GNU-stack,"",%progbits | |
8311 +diff --git a/crypto/cryptlib.c b/crypto/cryptlib.c | |
8312 +index 7bef015..3b6ab1d 100644 | |
8313 +--- a/crypto/cryptlib.c | |
8314 ++++ b/crypto/cryptlib.c | |
8315 +@@ -661,6 +661,20 @@ const char *CRYPTO_get_lock_name(int type) | |
8316 + return(sk_OPENSSL_STRING_value(app_locks,type-CRYPTO_NUM_LOCKS))
; | |
8317 + } | |
8318 + | |
8319 ++#if __arm__ | |
8320 ++static int global_arm_neon_enabled = 0; | |
8321 ++ | |
8322 ++void CRYPTO_set_NEON_capable(int on) | |
8323 ++ { | |
8324 ++ global_arm_neon_enabled = on != 0; | |
8325 ++ } | |
8326 ++ | |
8327 ++int CRYPTO_is_NEON_capable() | |
8328 ++ { | |
8329 ++ return global_arm_neon_enabled; | |
8330 ++ } | |
8331 ++#endif | |
8332 ++ | |
8333 + #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ | |
8334 + defined(__INTEL__) || \ | |
8335 + defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined
(_M_X64) | |
8336 +diff --git a/crypto/crypto.h b/crypto/crypto.h | |
8337 +index e11ac73..db339c3 100644 | |
8338 +--- a/crypto/crypto.h | |
8339 ++++ b/crypto/crypto.h | |
8340 +@@ -414,6 +414,14 @@ void CRYPTO_cleanup_all_ex_data(void); | |
8341 + | |
8342 + int CRYPTO_get_new_lockid(char *name); | |
8343 + | |
8344 ++/* CRYPTO_set_NEON_capable enables any NEON (ARM vector) dependent code. This | |
8345 ++ * code should be called before any non-init functions. */ | |
8346 ++void CRYPTO_set_NEON_capable(int on); | |
8347 ++ | |
8348 ++/* CRYPTO_is_NEON_capable returns the last value given to | |
8349 ++ * CRYPTO_set_NEON_capable, or else zero if it has never been called. */ | |
8350 ++int CRYPTO_is_NEON_capable(); | |
8351 ++ | |
8352 + int CRYPTO_num_locks(void); /* return CRYPTO_NUM_LOCKS (shared libs!) */ | |
8353 + void CRYPTO_lock(int mode, int type,const char *file,int line); | |
8354 + void CRYPTO_set_locking_callback(void (*func)(int mode,int type, | |
8355 +diff --git a/crypto/poly1305/poly1305.c b/crypto/poly1305/poly1305.c | |
8356 +index 2e5621d..00d53bf 100644 | |
8357 +--- a/crypto/poly1305/poly1305.c | |
8358 ++++ b/crypto/poly1305/poly1305.c | |
8359 +@@ -90,6 +90,17 @@ static void U32TO8_LE(unsigned char *m, uint32_t v) | |
8360 + } | |
8361 + #endif | |
8362 + | |
8363 ++#if __arm__ | |
8364 ++void CRYPTO_poly1305_init_neon(poly1305_state* state, | |
8365 ++ const unsigned char key[32]); | |
8366 ++ | |
8367 ++void CRYPTO_poly1305_update_neon(poly1305_state* state, | |
8368 ++ const unsigned char *in, | |
8369 ++ size_t in_len); | |
8370 ++ | |
8371 ++void CRYPTO_poly1305_finish_neon(poly1305_state* state, unsigned char mac[16])
; | |
8372 ++#endif | |
8373 ++ | |
8374 + static uint64_t | |
8375 + mul32x32_64(uint32_t a, uint32_t b) | |
8376 + { | |
8377 +@@ -207,6 +218,16 @@ void CRYPTO_poly1305_init(poly1305_state *statep, const un
signed char key[32]) | |
8378 + struct poly1305_state_st *state = (struct poly1305_state_st*) statep; | |
8379 + uint32_t t0,t1,t2,t3; | |
8380 + | |
8381 ++#if 0 /* Disabled because of crbug.com/341598 */ | |
8382 ++#if __arm__ | |
8383 ++ if (CRYPTO_is_NEON_capable()) | |
8384 ++ { | |
8385 ++ CRYPTO_poly1305_init_neon(statep, key); | |
8386 ++ return; | |
8387 ++ } | |
8388 ++#endif | |
8389 ++#endif | |
8390 ++ | |
8391 + t0 = U8TO32_LE(key+0); | |
8392 + t1 = U8TO32_LE(key+4); | |
8393 + t2 = U8TO32_LE(key+8); | |
8394 +@@ -241,6 +260,16 @@ void CRYPTO_poly1305_update(poly1305_state *statep, const
unsigned char *in, | |
8395 + unsigned int i; | |
8396 + struct poly1305_state_st *state = (struct poly1305_state_st*) statep; | |
8397 + | |
8398 ++#if 0 /* Disabled because of crbug.com/341598 */ | |
8399 ++#if __arm__ | |
8400 ++ if (CRYPTO_is_NEON_capable()) | |
8401 ++ { | |
8402 ++ CRYPTO_poly1305_update_neon(statep, in, in_len); | |
8403 ++ return; | |
8404 ++ } | |
8405 ++#endif | |
8406 ++#endif | |
8407 ++ | |
8408 + if (state->buf_used) | |
8409 + { | |
8410 + unsigned int todo = 16 - state->buf_used; | |
8411 +@@ -282,6 +309,16 @@ void CRYPTO_poly1305_finish(poly1305_state *statep, unsign
ed char mac[16]) | |
8412 + uint32_t g0,g1,g2,g3,g4; | |
8413 + uint32_t b, nb; | |
8414 + | |
8415 ++#if 0 /* Disabled because of crbug.com/341598 */ | |
8416 ++#if __arm__ | |
8417 ++ if (CRYPTO_is_NEON_capable()) | |
8418 ++ { | |
8419 ++ CRYPTO_poly1305_finish_neon(statep, mac); | |
8420 ++ return; | |
8421 ++ } | |
8422 ++#endif | |
8423 ++#endif | |
8424 ++ | |
8425 + if (state->buf_used) | |
8426 + poly1305_update(state, state->buf, state->buf_used); | |
8427 + | |
8428 +diff --git a/crypto/poly1305/poly1305_arm.c b/crypto/poly1305/poly1305_arm.c | |
8429 +index adcef35..34e339d 100644 | |
8430 +--- a/crypto/poly1305/poly1305_arm.c | |
8431 ++++ b/crypto/poly1305/poly1305_arm.c | |
8432 +@@ -51,6 +51,7 @@ | |
8433 + * SUPERCOP by D. J. Bernstein and Peter Schwabe. */ | |
8434 + | |
8435 + #include <stdint.h> | |
8436 ++#include <string.h> | |
8437 + | |
8438 + #include <openssl/poly1305.h> | |
8439 + | |
8440 +@@ -202,7 +203,8 @@ struct poly1305_state_st { | |
8441 + unsigned char key[16]; | |
8442 + }; | |
8443 + | |
8444 +-void CRYPTO_poly1305_init(poly1305_state *state, const unsigned char key[32]) | |
8445 ++void CRYPTO_poly1305_init_neon(poly1305_state *state, | |
8446 ++ const unsigned char key[32]) | |
8447 + { | |
8448 + struct poly1305_state_st *st = (struct poly1305_state_st*) (state); | |
8449 + fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); | |
8450 +@@ -227,7 +229,8 @@ void CRYPTO_poly1305_init(poly1305_state *state, const unsi
gned char key[32]) | |
8451 + st->buf_used = 0; | |
8452 + } | |
8453 + | |
8454 +-void CRYPTO_poly1305_update(poly1305_state *state, const unsigned char *in, si
ze_t in_len) | |
8455 ++void CRYPTO_poly1305_update_neon(poly1305_state *state, const unsigned char *i
n, | |
8456 ++ size_t in_len) | |
8457 + { | |
8458 + struct poly1305_state_st *st = (struct poly1305_state_st*) (state); | |
8459 + fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); | |
8460 +@@ -285,7 +288,7 @@ void CRYPTO_poly1305_update(poly1305_state *state, const un
signed char *in, size | |
8461 + } | |
8462 + } | |
8463 + | |
8464 +-void CRYPTO_poly1305_finish(poly1305_state* state, unsigned char mac[16]) | |
8465 ++void CRYPTO_poly1305_finish_neon(poly1305_state* state, unsigned char mac[16]) | |
8466 + { | |
8467 + struct poly1305_state_st *st = (struct poly1305_state_st*) (state); | |
8468 + fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); | |
8469 +-- | |
8470 +1.8.4.1 | |
8471 + | |
8472 diff -burN android-openssl-lhash2/patches/tls1_change_cipher_state_rewrite.patch
android-openssl/patches/tls1_change_cipher_state_rewrite.patch | |
8473 --- android-openssl-lhash2/patches/tls1_change_cipher_state_rewrite.patch
1969-12-31 19:00:00.000000000 -0500 | |
8474 +++ android-openssl/patches/tls1_change_cipher_state_rewrite.patch 2013-11-
05 14:14:34.631283497 -0500 | |
8475 @@ -0,0 +1,567 @@ | |
8476 +From d7f9af2d2682bc41e7bf1d669cda60f04630b04d Mon Sep 17 00:00:00 2001 | |
8477 +From: Adam Langley <agl@chromium.org> | |
8478 +Date: Thu, 25 Jul 2013 14:57:38 -0400 | |
8479 +Subject: [PATCH 39/50] tls1_change_cipher_state_rewrite | |
8480 + | |
8481 +The previous version of the function made adding AEAD changes very | |
8482 +difficult. This change should be a semantic no-op - it should be purely | |
8483 +a cleanup. | |
8484 +--- | |
8485 + ssl/ssl.h | 1 + | |
8486 + ssl/ssl_err.c | 2 +- | |
8487 + ssl/t1_enc.c | 445 +++++++++++++++++++++++++++++++--------------------------- | |
8488 + 3 files changed, 240 insertions(+), 208 deletions(-) | |
8489 + | |
8490 +diff --git a/ssl/ssl.h b/ssl/ssl.h | |
8491 +index 68e5648..672f3eb 100644 | |
8492 +--- a/ssl/ssl.h | |
8493 ++++ b/ssl/ssl.h | |
8494 +@@ -2439,6 +2439,7 @@ void ERR_load_SSL_strings(void); | |
8495 + #define SSL_F_SSL_WRITE 208 | |
8496 + #define SSL_F_TLS1_CERT_VERIFY_MAC 286 | |
8497 + #define SSL_F_TLS1_CHANGE_CIPHER_STATE 209 | |
8498 ++#define SSL_F_TLS1_CHANGE_CIPHER_STATE_CIPHER 338 | |
8499 + #define SSL_F_TLS1_CHECK_SERVERHELLO_TLSEXT 274 | |
8500 + #define SSL_F_TLS1_ENC 210 | |
8501 + #define SSL_F_TLS1_EXPORT_KEYING_MATERIAL 314 | |
8502 +diff --git a/ssl/ssl_err.c b/ssl/ssl_err.c | |
8503 +index fc98e6c..97b2a0d 100644 | |
8504 +--- a/ssl/ssl_err.c | |
8505 ++++ b/ssl/ssl_err.c | |
8506 +@@ -280,7 +280,7 @@ static ERR_STRING_DATA SSL_str_functs[]= | |
8507 + {ERR_FUNC(SSL_F_SSL_VERIFY_CERT_CHAIN), "SSL_VERIFY_CERT_CHAIN"}, | |
8508 + {ERR_FUNC(SSL_F_SSL_WRITE), "SSL_write"}, | |
8509 + {ERR_FUNC(SSL_F_TLS1_CERT_VERIFY_MAC), "tls1_cert_verify_mac"}, | |
8510 +-{ERR_FUNC(SSL_F_TLS1_CHANGE_CIPHER_STATE), "TLS1_CHANGE_CIPHER_STATE"}, | |
8511 ++{ERR_FUNC(SSL_F_TLS1_CHANGE_CIPHER_STATE_CIPHER), "TLS1_CHANGE_CIPHER_STAT
E_CIPHER"}, | |
8512 + {ERR_FUNC(SSL_F_TLS1_CHECK_SERVERHELLO_TLSEXT), "TLS1_CHECK_SERVERHELLO_
TLSEXT"}, | |
8513 + {ERR_FUNC(SSL_F_TLS1_ENC), "TLS1_ENC"}, | |
8514 + {ERR_FUNC(SSL_F_TLS1_EXPORT_KEYING_MATERIAL), "TLS1_EXPORT_KEYING_MATERIAL"}, | |
8515 +diff --git a/ssl/t1_enc.c b/ssl/t1_enc.c | |
8516 +index 3649544..e1f91ba 100644 | |
8517 +--- a/ssl/t1_enc.c | |
8518 ++++ b/ssl/t1_enc.c | |
8519 +@@ -316,56 +316,30 @@ static int tls1_generate_key_block(SSL *s, unsigned char
*km, | |
8520 + return ret; | |
8521 + } | |
8522 + | |
8523 +-int tls1_change_cipher_state(SSL *s, int which) | |
8524 ++/* tls1_change_cipher_state_cipher performs the work needed to switch cipher | |
8525 ++ * states when using EVP_CIPHER. The argument |is_read| is true iff this | |
8526 ++ * function is being called due to reading, as opposed to writing, a | |
8527 ++ * ChangeCipherSpec message. In order to support export ciphersuites, | |
8528 ++ * use_client_keys indicates whether the key material provided is in the | |
8529 ++ * "client write" direction. */ | |
8530 ++static int tls1_change_cipher_state_cipher( | |
8531 ++ SSL *s, char is_read, char use_client_keys, | |
8532 ++ const unsigned char *mac_secret, unsigned mac_secret_len, | |
8533 ++ const unsigned char *key, unsigned key_len, | |
8534 ++ const unsigned char *iv, unsigned iv_len) | |
8535 + { | |
8536 +- static const unsigned char empty[]=""; | |
8537 +- unsigned char *p,*mac_secret; | |
8538 +- unsigned char *exp_label; | |
8539 +- unsigned char tmp1[EVP_MAX_KEY_LENGTH]; | |
8540 +- unsigned char tmp2[EVP_MAX_KEY_LENGTH]; | |
8541 +- unsigned char iv1[EVP_MAX_IV_LENGTH*2]; | |
8542 +- unsigned char iv2[EVP_MAX_IV_LENGTH*2]; | |
8543 +- unsigned char *ms,*key,*iv; | |
8544 +- int client_write; | |
8545 +- EVP_CIPHER_CTX *dd; | |
8546 +- const EVP_CIPHER *c; | |
8547 +-#ifndef OPENSSL_NO_COMP | |
8548 +- const SSL_COMP *comp; | |
8549 +-#endif | |
8550 +- const EVP_MD *m; | |
8551 +- int mac_type; | |
8552 +- int *mac_secret_size; | |
8553 ++ const EVP_CIPHER *cipher = s->s3->tmp.new_sym_enc; | |
8554 ++ const char is_export = SSL_C_IS_EXPORT(s->s3->tmp.new_cipher) != 0; | |
8555 ++ EVP_CIPHER_CTX *cipher_ctx; | |
8556 + EVP_MD_CTX *mac_ctx; | |
8557 +- EVP_PKEY *mac_key; | |
8558 +- int is_export,n,i,j,k,exp_label_len,cl; | |
8559 +- int reuse_dd = 0; | |
8560 ++ char is_aead_cipher; | |
8561 + | |
8562 +- is_export=SSL_C_IS_EXPORT(s->s3->tmp.new_cipher); | |
8563 +- c=s->s3->tmp.new_sym_enc; | |
8564 +- m=s->s3->tmp.new_hash; | |
8565 +- mac_type = s->s3->tmp.new_mac_pkey_type; | |
8566 +-#ifndef OPENSSL_NO_COMP | |
8567 +- comp=s->s3->tmp.new_compression; | |
8568 +-#endif | |
8569 ++ unsigned char export_tmp1[EVP_MAX_KEY_LENGTH]; | |
8570 ++ unsigned char export_tmp2[EVP_MAX_KEY_LENGTH]; | |
8571 ++ unsigned char export_iv1[EVP_MAX_IV_LENGTH * 2]; | |
8572 ++ unsigned char export_iv2[EVP_MAX_IV_LENGTH * 2]; | |
8573 + | |
8574 +-#ifdef KSSL_DEBUG | |
8575 +- printf("tls1_change_cipher_state(which= %d) w/\n", which); | |
8576 +- printf("\talg= %ld/%ld, comp= %p\n", | |
8577 +- s->s3->tmp.new_cipher->algorithm_mkey, | |
8578 +- s->s3->tmp.new_cipher->algorithm_auth, | |
8579 +- comp); | |
8580 +- printf("\tevp_cipher == %p ==? &d_cbc_ede_cipher3\n", c); | |
8581 +- printf("\tevp_cipher: nid, blksz= %d, %d, keylen=%d, ivlen=%d\n", | |
8582 +- c->nid,c->block_size,c->key_len,c->iv_len); | |
8583 +- printf("\tkey_block: len= %d, data= ", s->s3->tmp.key_block_length); | |
8584 +- { | |
8585 +- int i; | |
8586 +- for (i=0; i<s->s3->tmp.key_block_length; i++) | |
8587 +- printf("%02x", s->s3->tmp.key_block[i]); printf("\n"); | |
8588 +- } | |
8589 +-#endif /* KSSL_DEBUG */ | |
8590 +- | |
8591 +- if (which & SSL3_CC_READ) | |
8592 ++ if (is_read) | |
8593 + { | |
8594 + if (s->s3->tmp.new_cipher->algorithm2 & TLS1_STREAM_MAC) | |
8595 + s->mac_flags |= SSL_MAC_FLAG_READ_MAC_STREAM; | |
8596 +@@ -373,206 +347,257 @@ int tls1_change_cipher_state(SSL *s, int which) | |
8597 + s->mac_flags &= ~SSL_MAC_FLAG_READ_MAC_STREAM; | |
8598 + | |
8599 + if (s->enc_read_ctx != NULL) | |
8600 +- reuse_dd = 1; | |
8601 ++ EVP_CIPHER_CTX_cleanup(s->enc_read_ctx); | |
8602 + else if ((s->enc_read_ctx=OPENSSL_malloc(sizeof(EVP_CIPHER_CTX))
) == NULL) | |
8603 + goto err; | |
8604 + else | |
8605 + /* make sure it's intialized in case we exit later with
an error */ | |
8606 + EVP_CIPHER_CTX_init(s->enc_read_ctx); | |
8607 +- dd= s->enc_read_ctx; | |
8608 +- mac_ctx=ssl_replace_hash(&s->read_hash,NULL); | |
8609 +-#ifndef OPENSSL_NO_COMP | |
8610 +- if (s->expand != NULL) | |
8611 +- { | |
8612 +- COMP_CTX_free(s->expand); | |
8613 +- s->expand=NULL; | |
8614 +- } | |
8615 +- if (comp != NULL) | |
8616 +- { | |
8617 +- s->expand=COMP_CTX_new(comp->method); | |
8618 +- if (s->expand == NULL) | |
8619 +- { | |
8620 +- SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE,SSL_R_COMP
RESSION_LIBRARY_ERROR); | |
8621 +- goto err2; | |
8622 +- } | |
8623 +- if (s->s3->rrec.comp == NULL) | |
8624 +- s->s3->rrec.comp=(unsigned char *) | |
8625 +- OPENSSL_malloc(SSL3_RT_MAX_ENCRYPTED_LEN
GTH); | |
8626 +- if (s->s3->rrec.comp == NULL) | |
8627 +- goto err; | |
8628 +- } | |
8629 +-#endif | |
8630 +- /* this is done by dtls1_reset_seq_numbers for DTLS1_VERSION */ | |
8631 +- if (s->version != DTLS1_VERSION) | |
8632 +- memset(&(s->s3->read_sequence[0]),0,8); | |
8633 +- mac_secret= &(s->s3->read_mac_secret[0]); | |
8634 +- mac_secret_size=&(s->s3->read_mac_secret_size); | |
8635 ++ | |
8636 ++ cipher_ctx = s->enc_read_ctx; | |
8637 ++ mac_ctx = ssl_replace_hash(&s->read_hash, NULL); | |
8638 ++ | |
8639 ++ memcpy(s->s3->read_mac_secret, mac_secret, mac_secret_len); | |
8640 ++ s->s3->read_mac_secret_size = mac_secret_len; | |
8641 + } | |
8642 + else | |
8643 + { | |
8644 + if (s->s3->tmp.new_cipher->algorithm2 & TLS1_STREAM_MAC) | |
8645 + s->mac_flags |= SSL_MAC_FLAG_WRITE_MAC_STREAM; | |
8646 +- else | |
8647 ++ else | |
8648 + s->mac_flags &= ~SSL_MAC_FLAG_WRITE_MAC_STREAM; | |
8649 ++ | |
8650 + if (s->enc_write_ctx != NULL) | |
8651 +- reuse_dd = 1; | |
8652 ++ EVP_CIPHER_CTX_cleanup(s->enc_write_ctx); | |
8653 + else if ((s->enc_write_ctx=OPENSSL_malloc(sizeof(EVP_CIPHER_CTX)
)) == NULL) | |
8654 + goto err; | |
8655 + else | |
8656 + /* make sure it's intialized in case we exit later with
an error */ | |
8657 + EVP_CIPHER_CTX_init(s->enc_write_ctx); | |
8658 +- dd= s->enc_write_ctx; | |
8659 +- mac_ctx = ssl_replace_hash(&s->write_hash,NULL); | |
8660 +-#ifndef OPENSSL_NO_COMP | |
8661 +- if (s->compress != NULL) | |
8662 +- { | |
8663 +- COMP_CTX_free(s->compress); | |
8664 +- s->compress=NULL; | |
8665 +- } | |
8666 +- if (comp != NULL) | |
8667 +- { | |
8668 +- s->compress=COMP_CTX_new(comp->method); | |
8669 +- if (s->compress == NULL) | |
8670 +- { | |
8671 +- SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE,SSL_R_COMP
RESSION_LIBRARY_ERROR); | |
8672 +- goto err2; | |
8673 +- } | |
8674 +- } | |
8675 +-#endif | |
8676 +- /* this is done by dtls1_reset_seq_numbers for DTLS1_VERSION */ | |
8677 +- if (s->version != DTLS1_VERSION) | |
8678 +- memset(&(s->s3->write_sequence[0]),0,8); | |
8679 +- mac_secret= &(s->s3->write_mac_secret[0]); | |
8680 +- mac_secret_size = &(s->s3->write_mac_secret_size); | |
8681 +- } | |
8682 +- | |
8683 +- if (reuse_dd) | |
8684 +- EVP_CIPHER_CTX_cleanup(dd); | |
8685 + | |
8686 +- p=s->s3->tmp.key_block; | |
8687 +- i=*mac_secret_size=s->s3->tmp.new_mac_secret_size; | |
8688 ++ cipher_ctx = s->enc_write_ctx; | |
8689 ++ mac_ctx = ssl_replace_hash(&s->write_hash, NULL); | |
8690 + | |
8691 +- cl=EVP_CIPHER_key_length(c); | |
8692 +- j=is_export ? (cl < SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher) ? | |
8693 +- cl : SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher)) : cl; | |
8694 +- /* Was j=(exp)?5:EVP_CIPHER_key_length(c); */ | |
8695 +- /* If GCM mode only part of IV comes from PRF */ | |
8696 +- if (EVP_CIPHER_mode(c) == EVP_CIPH_GCM_MODE) | |
8697 +- k = EVP_GCM_TLS_FIXED_IV_LEN; | |
8698 +- else | |
8699 +- k=EVP_CIPHER_iv_length(c); | |
8700 +- if ( (which == SSL3_CHANGE_CIPHER_CLIENT_WRITE) || | |
8701 +- (which == SSL3_CHANGE_CIPHER_SERVER_READ)) | |
8702 +- { | |
8703 +- ms= &(p[ 0]); n=i+i; | |
8704 +- key= &(p[ n]); n+=j+j; | |
8705 +- iv= &(p[ n]); n+=k+k; | |
8706 +- exp_label=(unsigned char *)TLS_MD_CLIENT_WRITE_KEY_CONST; | |
8707 +- exp_label_len=TLS_MD_CLIENT_WRITE_KEY_CONST_SIZE; | |
8708 +- client_write=1; | |
8709 +- } | |
8710 +- else | |
8711 +- { | |
8712 +- n=i; | |
8713 +- ms= &(p[ n]); n+=i+j; | |
8714 +- key= &(p[ n]); n+=j+k; | |
8715 +- iv= &(p[ n]); n+=k; | |
8716 +- exp_label=(unsigned char *)TLS_MD_SERVER_WRITE_KEY_CONST; | |
8717 +- exp_label_len=TLS_MD_SERVER_WRITE_KEY_CONST_SIZE; | |
8718 +- client_write=0; | |
8719 ++ memcpy(s->s3->write_mac_secret, mac_secret, mac_secret_len); | |
8720 ++ s->s3->write_mac_secret_size = mac_secret_len; | |
8721 + } | |
8722 + | |
8723 +- if (n > s->s3->tmp.key_block_length) | |
8724 +- { | |
8725 +- SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE,ERR_R_INTERNAL_ERROR); | |
8726 +- goto err2; | |
8727 +- } | |
8728 +- | |
8729 +- memcpy(mac_secret,ms,i); | |
8730 +- | |
8731 +- if (!(EVP_CIPHER_flags(c)&EVP_CIPH_FLAG_AEAD_CIPHER)) | |
8732 +- { | |
8733 +- mac_key = EVP_PKEY_new_mac_key(mac_type, NULL, | |
8734 +- mac_secret,*mac_secret_size); | |
8735 +- EVP_DigestSignInit(mac_ctx,NULL,m,NULL,mac_key); | |
8736 +- EVP_PKEY_free(mac_key); | |
8737 +- } | |
8738 +-#ifdef TLS_DEBUG | |
8739 +-printf("which = %04X\nmac key=",which); | |
8740 +-{ int z; for (z=0; z<i; z++) printf("%02X%c",ms[z],((z+1)%16)?' ':'\n'); } | |
8741 +-#endif | |
8742 + if (is_export) | |
8743 + { | |
8744 + /* In here I set both the read and write key/iv to the | |
8745 + * same value since only the correct one will be used :-). | |
8746 + */ | |
8747 ++ const unsigned char *label; | |
8748 ++ unsigned label_len; | |
8749 ++ | |
8750 ++ if (use_client_keys) | |
8751 ++ { | |
8752 ++ label = (const unsigned char*) TLS_MD_CLIENT_WRITE_KEY_C
ONST; | |
8753 ++ label_len = TLS_MD_CLIENT_WRITE_KEY_CONST_SIZE; | |
8754 ++ } | |
8755 ++ else | |
8756 ++ { | |
8757 ++ label = (const unsigned char*) TLS_MD_SERVER_WRITE_KEY_C
ONST; | |
8758 ++ label_len = TLS_MD_SERVER_WRITE_KEY_CONST_SIZE; | |
8759 ++ } | |
8760 ++ | |
8761 + if (!tls1_PRF(ssl_get_algorithm2(s), | |
8762 +- exp_label,exp_label_len, | |
8763 +- s->s3->client_random,SSL3_RANDOM_SIZE, | |
8764 +- s->s3->server_random,SSL3_RANDOM_SIZE, | |
8765 +- NULL,0,NULL,0, | |
8766 +- key,j,tmp1,tmp2,EVP_CIPHER_key_length(c))) | |
8767 +- goto err2; | |
8768 +- key=tmp1; | |
8769 ++ label, label_len, | |
8770 ++ s->s3->client_random, SSL3_RANDOM_SIZE, | |
8771 ++ s->s3->server_random, SSL3_RANDOM_SIZE, | |
8772 ++ NULL, 0, NULL, 0, | |
8773 ++ key /* secret */, key_len /* secret length */, | |
8774 ++ export_tmp1 /* output */, | |
8775 ++ export_tmp2 /* scratch space */, | |
8776 ++ EVP_CIPHER_key_length(s->s3->tmp.new_sym_enc) /*
output length */)) | |
8777 ++ return 0; | |
8778 ++ key = export_tmp1; | |
8779 + | |
8780 +- if (k > 0) | |
8781 ++ if (iv_len > 0) | |
8782 + { | |
8783 ++ static const unsigned char empty[] = ""; | |
8784 ++ | |
8785 + if (!tls1_PRF(ssl_get_algorithm2(s), | |
8786 +- TLS_MD_IV_BLOCK_CONST,TLS_MD_IV_BLOCK_CO
NST_SIZE, | |
8787 +- s->s3->client_random,SSL3_RANDOM_SIZE, | |
8788 +- s->s3->server_random,SSL3_RANDOM_SIZE, | |
8789 +- NULL,0,NULL,0, | |
8790 +- empty,0,iv1,iv2,k*2)) | |
8791 +- goto err2; | |
8792 +- if (client_write) | |
8793 +- iv=iv1; | |
8794 ++ TLS_MD_IV_BLOCK_CONST, TLS_MD_IV_BLOCK_C
ONST_SIZE, | |
8795 ++ s->s3->client_random, SSL3_RANDOM_SIZE, | |
8796 ++ s->s3->server_random, SSL3_RANDOM_SIZE, | |
8797 ++ NULL, 0, NULL, 0, | |
8798 ++ empty /* secret */ ,0 /* secret length *
/, | |
8799 ++ export_iv1 /* output */, | |
8800 ++ export_iv2 /* scratch space */, | |
8801 ++ iv_len * 2 /* output length */)) | |
8802 ++ return 0; | |
8803 ++ | |
8804 ++ if (use_client_keys) | |
8805 ++ iv = export_iv1; | |
8806 + else | |
8807 +- iv= &(iv1[k]); | |
8808 ++ iv = &export_iv1[iv_len]; | |
8809 + } | |
8810 + } | |
8811 + | |
8812 +- s->session->key_arg_length=0; | |
8813 +-#ifdef KSSL_DEBUG | |
8814 +- { | |
8815 +- int i; | |
8816 +- printf("EVP_CipherInit_ex(dd,c,key=,iv=,which)\n"); | |
8817 +- printf("\tkey= "); for (i=0; i<c->key_len; i++) printf("%02x", key[i]); | |
8818 +- printf("\n"); | |
8819 +- printf("\t iv= "); for (i=0; i<c->iv_len; i++) printf("%02x", iv[i]); | |
8820 +- printf("\n"); | |
8821 +- } | |
8822 +-#endif /* KSSL_DEBUG */ | |
8823 ++ /* is_aead_cipher indicates whether the EVP_CIPHER implements an AEAD | |
8824 ++ * interface. This is different from the newer EVP_AEAD interface. */ | |
8825 ++ is_aead_cipher = (EVP_CIPHER_flags(cipher) & EVP_CIPH_FLAG_AEAD_CIPHER)
!= 0; | |
8826 + | |
8827 +- if (EVP_CIPHER_mode(c) == EVP_CIPH_GCM_MODE) | |
8828 ++ if (!is_aead_cipher) | |
8829 + { | |
8830 +- EVP_CipherInit_ex(dd,c,NULL,key,NULL,(which & SSL3_CC_WRITE)); | |
8831 +- EVP_CIPHER_CTX_ctrl(dd, EVP_CTRL_GCM_SET_IV_FIXED, k, iv); | |
8832 ++ EVP_PKEY *mac_key = | |
8833 ++ EVP_PKEY_new_mac_key(s->s3->tmp.new_mac_pkey_type, | |
8834 ++ NULL, mac_secret, mac_secret_len); | |
8835 ++ if (!mac_key) | |
8836 ++ return 0; | |
8837 ++ EVP_DigestSignInit(mac_ctx, NULL, s->s3->tmp.new_hash, NULL, mac
_key); | |
8838 ++ EVP_PKEY_free(mac_key); | |
8839 + } | |
8840 +- else | |
8841 +- EVP_CipherInit_ex(dd,c,NULL,key,iv,(which & SSL3_CC_WRITE)); | |
8842 ++ | |
8843 ++ if (EVP_CIPHER_mode(cipher) == EVP_CIPH_GCM_MODE) | |
8844 ++ { | |
8845 ++ EVP_CipherInit_ex(cipher_ctx, cipher, NULL /* engine */, key, | |
8846 ++ NULL /* iv */, !is_read); | |
8847 ++ EVP_CIPHER_CTX_ctrl(cipher_ctx, EVP_CTRL_GCM_SET_IV_FIXED, iv_le
n, (void*) iv); | |
8848 ++ } | |
8849 ++ else | |
8850 ++ EVP_CipherInit_ex(cipher_ctx, cipher, NULL /* engine */, key, iv
, !is_read); | |
8851 + | |
8852 + /* Needed for "composite" AEADs, such as RC4-HMAC-MD5 */ | |
8853 +- if ((EVP_CIPHER_flags(c)&EVP_CIPH_FLAG_AEAD_CIPHER) && *mac_secret_size) | |
8854 +- EVP_CIPHER_CTX_ctrl(dd,EVP_CTRL_AEAD_SET_MAC_KEY, | |
8855 +- *mac_secret_size,mac_secret); | |
8856 +- | |
8857 +-#ifdef TLS_DEBUG | |
8858 +-printf("which = %04X\nkey=",which); | |
8859 +-{ int z; for (z=0; z<EVP_CIPHER_key_length(c); z++) printf("%02X%c",key[z],((z
+1)%16)?' ':'\n'); } | |
8860 +-printf("\niv="); | |
8861 +-{ int z; for (z=0; z<k; z++) printf("%02X%c",iv[z],((z+1)%16)?' ':'\n'); } | |
8862 +-printf("\n"); | |
8863 +-#endif | |
8864 +- | |
8865 +- OPENSSL_cleanse(tmp1,sizeof(tmp1)); | |
8866 +- OPENSSL_cleanse(tmp2,sizeof(tmp1)); | |
8867 +- OPENSSL_cleanse(iv1,sizeof(iv1)); | |
8868 +- OPENSSL_cleanse(iv2,sizeof(iv2)); | |
8869 +- return(1); | |
8870 ++ if (is_aead_cipher && mac_secret_len > 0) | |
8871 ++ EVP_CIPHER_CTX_ctrl(cipher_ctx, EVP_CTRL_AEAD_SET_MAC_KEY, | |
8872 ++ mac_secret_len, (void*) mac_secret); | |
8873 ++ | |
8874 ++ if (is_export) | |
8875 ++ { | |
8876 ++ OPENSSL_cleanse(export_tmp1, sizeof(export_tmp1)); | |
8877 ++ OPENSSL_cleanse(export_tmp2, sizeof(export_tmp1)); | |
8878 ++ OPENSSL_cleanse(export_iv1, sizeof(export_iv1)); | |
8879 ++ OPENSSL_cleanse(export_iv2, sizeof(export_iv2)); | |
8880 ++ } | |
8881 ++ | |
8882 ++ return 1; | |
8883 ++ | |
8884 ++err: | |
8885 ++ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE_CIPHER, ERR_R_MALLOC_FAILURE); | |
8886 ++ return 0; | |
8887 ++ } | |
8888 ++ | |
8889 ++int tls1_change_cipher_state(SSL *s, int which) | |
8890 ++ { | |
8891 ++ /* is_read is true if we have just read a ChangeCipherSpec message - | |
8892 ++ * i.e. we need to update the read cipherspec. Otherwise we have just | |
8893 ++ * written one. */ | |
8894 ++ const char is_read = (which & SSL3_CC_READ) != 0; | |
8895 ++ /* use_client_keys is true if we wish to use the keys for the "client | |
8896 ++ * write" direction. This is the case if we're a client sending a | |
8897 ++ * ChangeCipherSpec, or a server reading a client's ChangeCipherSpec. */ | |
8898 ++ const char use_client_keys = which == SSL3_CHANGE_CIPHER_CLIENT_WRITE || | |
8899 ++ which == SSL3_CHANGE_CIPHER_SERVER_READ; | |
8900 ++ const unsigned char *client_write_mac_secret, *server_write_mac_secret,
*mac_secret; | |
8901 ++ const unsigned char *client_write_key, *server_write_key, *key; | |
8902 ++ const unsigned char *client_write_iv, *server_write_iv, *iv; | |
8903 ++ const EVP_CIPHER *cipher = s->s3->tmp.new_sym_enc; | |
8904 ++ unsigned key_len, iv_len, mac_secret_len; | |
8905 ++ const unsigned char *key_data; | |
8906 ++ const char is_export = SSL_C_IS_EXPORT(s->s3->tmp.new_cipher) != 0; | |
8907 ++ | |
8908 ++ /* Update compression contexts. */ | |
8909 ++#ifndef OPENSSL_NO_COMP | |
8910 ++ const SSL_COMP *comp = s->s3->tmp.new_compression; | |
8911 ++ | |
8912 ++ if (is_read) | |
8913 ++ { | |
8914 ++ if (s->expand != NULL) | |
8915 ++ { | |
8916 ++ COMP_CTX_free(s->expand); | |
8917 ++ s->expand = NULL; | |
8918 ++ } | |
8919 ++ if (comp != NULL) | |
8920 ++ { | |
8921 ++ s->expand=COMP_CTX_new(comp->method); | |
8922 ++ if (s->expand == NULL) | |
8923 ++ { | |
8924 ++ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE,SSL_R_COMP
RESSION_LIBRARY_ERROR); | |
8925 ++ return 0; | |
8926 ++ } | |
8927 ++ if (s->s3->rrec.comp == NULL) | |
8928 ++ s->s3->rrec.comp = | |
8929 ++ (unsigned char *)OPENSSL_malloc(SSL3_RT_
MAX_ENCRYPTED_LENGTH); | |
8930 ++ if (s->s3->rrec.comp == NULL) | |
8931 ++ goto err; | |
8932 ++ } | |
8933 ++ } | |
8934 ++ else | |
8935 ++ { | |
8936 ++ if (s->compress != NULL) | |
8937 ++ { | |
8938 ++ COMP_CTX_free(s->compress); | |
8939 ++ s->compress = NULL; | |
8940 ++ } | |
8941 ++ if (comp != NULL) | |
8942 ++ { | |
8943 ++ s->compress = COMP_CTX_new(comp->method); | |
8944 ++ if (s->compress == NULL) | |
8945 ++ { | |
8946 ++ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE,SSL_R_COMP
RESSION_LIBRARY_ERROR); | |
8947 ++ return 0; | |
8948 ++ } | |
8949 ++ } | |
8950 ++ } | |
8951 ++#endif /* OPENSSL_NO_COMP */ | |
8952 ++ | |
8953 ++ /* Reset sequence number to zero. */ | |
8954 ++ memset(is_read ? s->s3->read_sequence : s->s3->write_sequence, 0, 8); | |
8955 ++ | |
8956 ++ /* key_arg is used for SSLv2. We don't need it for TLS. */ | |
8957 ++ s->session->key_arg_length = 0; | |
8958 ++ | |
8959 ++ mac_secret_len = s->s3->tmp.new_mac_secret_size; | |
8960 ++ | |
8961 ++ key_len = EVP_CIPHER_key_length(cipher); | |
8962 ++ if (is_export && key_len > SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher)
) | |
8963 ++ key_len = SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher); | |
8964 ++ | |
8965 ++ if (EVP_CIPHER_mode(cipher) == EVP_CIPH_GCM_MODE) | |
8966 ++ iv_len = EVP_GCM_TLS_FIXED_IV_LEN; | |
8967 ++ else | |
8968 ++ iv_len = EVP_CIPHER_iv_length(cipher); | |
8969 ++ | |
8970 ++ key_data = s->s3->tmp.key_block; | |
8971 ++ client_write_mac_secret = key_data; key_data += mac_secret_len; | |
8972 ++ server_write_mac_secret = key_data; key_data += mac_secret_len; | |
8973 ++ client_write_key = key_data; key_data += key_len; | |
8974 ++ server_write_key = key_data; key_data += key_len; | |
8975 ++ client_write_iv = key_data; key_data += iv_len; | |
8976 ++ server_write_iv = key_data; key_data += iv_len; | |
8977 ++ | |
8978 ++ if (use_client_keys) | |
8979 ++ { | |
8980 ++ mac_secret = client_write_mac_secret; | |
8981 ++ key = client_write_key; | |
8982 ++ iv = client_write_iv; | |
8983 ++ } | |
8984 ++ else | |
8985 ++ { | |
8986 ++ mac_secret = server_write_mac_secret; | |
8987 ++ key = server_write_key; | |
8988 ++ iv = server_write_iv; | |
8989 ++ } | |
8990 ++ | |
8991 ++ if (key_data - s->s3->tmp.key_block != s->s3->tmp.key_block_length) | |
8992 ++ { | |
8993 ++ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE,ERR_R_INTERNAL_ERROR); | |
8994 ++ return 0; | |
8995 ++ } | |
8996 ++ | |
8997 ++ if (!tls1_change_cipher_state_cipher(s, is_read, use_client_keys, | |
8998 ++ mac_secret, mac_secret_len, | |
8999 ++ key, key_len, | |
9000 ++ iv, iv_len)) { | |
9001 ++ return 0; | |
9002 ++ } | |
9003 ++ | |
9004 ++ return 1; | |
9005 + err: | |
9006 +- SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE,ERR_R_MALLOC_FAILURE); | |
9007 +-err2: | |
9008 +- return(0); | |
9009 ++ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE, ERR_R_MALLOC_FAILURE); | |
9010 ++ return 0; | |
9011 + } | |
9012 + | |
9013 + int tls1_setup_key_block(SSL *s) | |
9014 +@@ -584,6 +609,7 @@ int tls1_setup_key_block(SSL *s) | |
9015 + SSL_COMP *comp; | |
9016 + int mac_type= NID_undef,mac_secret_size=0; | |
9017 + int ret=0; | |
9018 ++ int iv_len; | |
9019 + | |
9020 + #ifdef KSSL_DEBUG | |
9021 + printf ("tls1_setup_key_block()\n"); | |
9022 +@@ -598,11 +624,16 @@ int tls1_setup_key_block(SSL *s) | |
9023 + return(0); | |
9024 + } | |
9025 + | |
9026 ++ if (EVP_CIPHER_mode(c) == EVP_CIPH_GCM_MODE) | |
9027 ++ iv_len = EVP_GCM_TLS_FIXED_IV_LEN; | |
9028 ++ else | |
9029 ++ iv_len = EVP_CIPHER_iv_length(c); | |
9030 ++ | |
9031 + s->s3->tmp.new_sym_enc=c; | |
9032 + s->s3->tmp.new_hash=hash; | |
9033 + s->s3->tmp.new_mac_pkey_type = mac_type; | |
9034 + s->s3->tmp.new_mac_secret_size = mac_secret_size; | |
9035 +- num=EVP_CIPHER_key_length(c)+mac_secret_size+EVP_CIPHER_iv_length(c); | |
9036 ++ num=EVP_CIPHER_key_length(c)+mac_secret_size+iv_len; | |
9037 + num*=2; | |
9038 + | |
9039 + ssl3_cleanup_key_block(s); | |
9040 +-- | |
9041 +1.8.4.1 | |
9042 + | |
9043 diff -burN android-openssl-lhash2/patches/use_aead_for_aes_gcm.patch android-ope
nssl/patches/use_aead_for_aes_gcm.patch | |
9044 --- android-openssl-lhash2/patches/use_aead_for_aes_gcm.patch 1969-12-31 19:00
:00.000000000 -0500 | |
9045 +++ android-openssl/patches/use_aead_for_aes_gcm.patch 2013-11-05 14:14:34.6312
83497 -0500 | |
9046 @@ -0,0 +1,119 @@ | |
9047 +From 7156ca9ce97c1084d7fd010146c522633ad73e7a Mon Sep 17 00:00:00 2001 | |
9048 +From: Adam Langley <agl@chromium.org> | |
9049 +Date: Wed, 4 Sep 2013 12:21:12 -0400 | |
9050 +Subject: [PATCH 42/50] use_aead_for_aes_gcm. | |
9051 + | |
9052 +Switches AES-GCM ciphersuites to use AEAD interfaces. | |
9053 +--- | |
9054 + ssl/s3_lib.c | 25 +++++++++++++++---------- | |
9055 + 1 file changed, 15 insertions(+), 10 deletions(-) | |
9056 + | |
9057 +diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c | |
9058 +index 2cd1654..75b6560 100644 | |
9059 +--- a/ssl/s3_lib.c | |
9060 ++++ b/ssl/s3_lib.c | |
9061 +@@ -166,6 +166,11 @@ const char ssl3_version_str[]="SSLv3" OPENSSL_VERSION_PTEX
T; | |
9062 + | |
9063 + #define SSL3_NUM_CIPHERS (sizeof(ssl3_ciphers)/sizeof(SSL_CIPHER)) | |
9064 + | |
9065 ++/* FIXED_NONCE_LEN is a macro that results in the correct value to set the | |
9066 ++ * fixed nonce length in SSL_CIPHER.algorithms2. It's the inverse of | |
9067 ++ * SSL_CIPHER_AEAD_FIXED_NONCE_LEN. */ | |
9068 ++#define FIXED_NONCE_LEN(x) ((x/2)<<24) | |
9069 ++ | |
9070 + /* list of available SSLv3 ciphers (sorted by id) */ | |
9071 + OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
9072 + | |
9073 +@@ -1836,7 +1841,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
9074 + SSL_AEAD, | |
9075 + SSL_TLSV1_2, | |
9076 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
9077 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, | |
9078 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
9079 + 128, | |
9080 + 128, | |
9081 + }, | |
9082 +@@ -1868,7 +1873,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
9083 + SSL_AEAD, | |
9084 + SSL_TLSV1_2, | |
9085 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
9086 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, | |
9087 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
9088 + 128, | |
9089 + 128, | |
9090 + }, | |
9091 +@@ -1900,7 +1905,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
9092 + SSL_AEAD, | |
9093 + SSL_TLSV1_2, | |
9094 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
9095 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, | |
9096 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
9097 + 128, | |
9098 + 128, | |
9099 + }, | |
9100 +@@ -1932,7 +1937,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
9101 + SSL_AEAD, | |
9102 + SSL_TLSV1_2, | |
9103 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
9104 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, | |
9105 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
9106 + 128, | |
9107 + 128, | |
9108 + }, | |
9109 +@@ -1964,7 +1969,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
9110 + SSL_AEAD, | |
9111 + SSL_TLSV1_2, | |
9112 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
9113 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, | |
9114 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
9115 + 128, | |
9116 + 128, | |
9117 + }, | |
9118 +@@ -1996,7 +2001,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
9119 + SSL_AEAD, | |
9120 + SSL_TLSV1_2, | |
9121 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
9122 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, | |
9123 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
9124 + 128, | |
9125 + 128, | |
9126 + }, | |
9127 +@@ -2709,7 +2714,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
9128 + SSL_AEAD, | |
9129 + SSL_TLSV1_2, | |
9130 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
9131 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, | |
9132 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
9133 + 128, | |
9134 + 128, | |
9135 + }, | |
9136 +@@ -2741,7 +2746,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
9137 + SSL_AEAD, | |
9138 + SSL_TLSV1_2, | |
9139 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
9140 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, | |
9141 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
9142 + 128, | |
9143 + 128, | |
9144 + }, | |
9145 +@@ -2773,7 +2778,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
9146 + SSL_AEAD, | |
9147 + SSL_TLSV1_2, | |
9148 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
9149 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, | |
9150 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
9151 + 128, | |
9152 + 128, | |
9153 + }, | |
9154 +@@ -2805,7 +2810,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
9155 + SSL_AEAD, | |
9156 + SSL_TLSV1_2, | |
9157 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
9158 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, | |
9159 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
9160 + 128, | |
9161 + 128, | |
9162 + }, | |
9163 +-- | |
9164 +1.8.4.1 | |
9165 + | |
OLD | NEW |