OLD | NEW |
(Empty) | |
| 1 diff -burN android-openssl-lhash2/openssl.config android-openssl/openssl.config |
| 2 --- android-openssl-lhash2/openssl.config 2013-11-05 14:38:31.187575574 -0
500 |
| 3 +++ android-openssl/openssl.config 2013-11-05 15:03:54.661551145 -0500 |
| 4 @@ -432,6 +432,7 @@ |
| 5 crypto/buffer/buf_err.c \ |
| 6 crypto/buffer/buf_str.c \ |
| 7 crypto/buffer/buffer.c \ |
| 8 +crypto/chacha/chacha_enc.c \ |
| 9 crypto/cmac/cm_ameth.c \ |
| 10 crypto/cmac/cm_pmeth.c \ |
| 11 crypto/cmac/cmac.c \ |
| 12 @@ -565,6 +566,7 @@ |
| 13 crypto/evp/e_aes.c \ |
| 14 crypto/evp/e_aes_cbc_hmac_sha1.c \ |
| 15 crypto/evp/e_bf.c \ |
| 16 +crypto/evp/e_chacha20poly1305.c \ |
| 17 crypto/evp/e_des.c \ |
| 18 crypto/evp/e_des3.c \ |
| 19 crypto/evp/e_null.c \ |
| 20 @@ -576,6 +578,7 @@ |
| 21 crypto/evp/e_xcbc_d.c \ |
| 22 crypto/evp/encode.c \ |
| 23 crypto/evp/evp_acnf.c \ |
| 24 +crypto/evp/evp_aead.c \ |
| 25 crypto/evp/evp_cnf.c \ |
| 26 crypto/evp/evp_enc.c \ |
| 27 crypto/evp/evp_err.c \ |
| 28 @@ -674,6 +677,7 @@ |
| 29 crypto/pkcs7/pk7_smime.c \ |
| 30 crypto/pkcs7/pkcs7err.c \ |
| 31 crypto/pqueue/pqueue.c \ |
| 32 +crypto/poly1305/poly1305.c \ |
| 33 crypto/rand/md_rand.c \ |
| 34 crypto/rand/rand_egd.c \ |
| 35 crypto/rand/rand_err.c \ |
| 36 @@ -789,7 +793,10 @@ |
| 37 crypto/aes/asm/aes-armv4.S \ |
| 38 crypto/bn/asm/armv4-gf2m.S \ |
| 39 crypto/bn/asm/armv4-mont.S \ |
| 40 +crypto/chacha/chacha_vec_arm.s \ |
| 41 crypto/modes/asm/ghash-armv4.S \ |
| 42 +crypto/poly1305/poly1305_arm.c \ |
| 43 +crypto/poly1305/poly1305_arm_asm.s \ |
| 44 crypto/sha/asm/sha1-armv4-large.S \ |
| 45 crypto/sha/asm/sha256-armv4.S \ |
| 46 crypto/sha/asm/sha512-armv4.S \ |
| 47 @@ -821,10 +828,12 @@ |
| 48 crypto/bn/asm/co-586.S \ |
| 49 crypto/bn/asm/x86-gf2m.S \ |
| 50 crypto/bn/asm/x86-mont.S \ |
| 51 +crypto/chacha/chacha_vec.c \ |
| 52 crypto/des/asm/crypt586.S \ |
| 53 crypto/des/asm/des-586.S \ |
| 54 crypto/md5/asm/md5-586.S \ |
| 55 crypto/modes/asm/ghash-x86.S \ |
| 56 +crypto/poly1305/poly1305_vec.c \ |
| 57 crypto/sha/asm/sha1-586.S \ |
| 58 crypto/sha/asm/sha256-586.S \ |
| 59 crypto/sha/asm/sha512-586.S \ |
| 60 @@ -836,9 +845,11 @@ |
| 61 crypto/aes/aes_cbc.c \ |
| 62 crypto/bf/bf_enc.c \ |
| 63 crypto/bn/bn_asm.c \ |
| 64 +crypto/chacha/chacha_enc.c \ |
| 65 crypto/des/des_enc.c \ |
| 66 crypto/des/fcrypt_b.c \ |
| 67 crypto/mem_clr.c \ |
| 68 +crypto/poly1305/poly1305.c \ |
| 69 " |
| 70 |
| 71 OPENSSL_CRYPTO_SOURCES_x86_64="\ |
| 72 @@ -852,6 +863,7 @@ |
| 73 crypto/bn/asm/x86_64-gf2m.S \ |
| 74 crypto/bn/asm/x86_64-mont.S \ |
| 75 crypto/bn/asm/x86_64-mont5.S \ |
| 76 +crypto/chacha/chacha_vec.c \ |
| 77 crypto/md5/asm/md5-x86_64.S \ |
| 78 crypto/modes/asm/ghash-x86_64.S \ |
| 79 crypto/rc4/asm/rc4-md5-x86_64.S \ |
| 80 @@ -859,6 +871,7 @@ |
| 81 crypto/sha/asm/sha1-x86_64.S \ |
| 82 crypto/sha/asm/sha256-x86_64.S \ |
| 83 crypto/sha/asm/sha512-x86_64.S \ |
| 84 +crypto/poly1305/poly1305_vec.c \ |
| 85 crypto/x86_64cpuid.S \ |
| 86 " |
| 87 |
| 88 @@ -866,7 +879,9 @@ |
| 89 crypto/aes/aes_cbc.c \ |
| 90 crypto/aes/aes_core.c \ |
| 91 crypto/bn/bn_asm.c \ |
| 92 +crypto/chacha/chacha_enc.c \ |
| 93 crypto/mem_clr.c \ |
| 94 +crypto/poly1305/poly1305.c \ |
| 95 crypto/rc4/rc4_enc.c \ |
| 96 crypto/rc4/rc4_skey.c \ |
| 97 " |
| 98 @@ -998,6 +1013,12 @@ |
| 99 x509_hash_name_algorithm_change.patch \ |
| 100 reduce_client_hello_size.patch \ |
| 101 fix_lhash_iteration.patch \ |
| 102 +tls1_change_cipher_state_rewrite.patch \ |
| 103 +aead_support.patch \ |
| 104 +aead_ssl_support.patch \ |
| 105 +use_aead_for_aes_gcm.patch \ |
| 106 +chacha20poly1305.patch \ |
| 107 +neon_runtime.patch \ |
| 108 " |
| 109 |
| 110 OPENSSL_PATCHES_progs_SOURCES="\ |
| 111 diff -burN android-openssl-lhash2/patches/aead_ssl_support.patch android-openssl
/patches/aead_ssl_support.patch |
| 112 --- android-openssl-lhash2/patches/aead_ssl_support.patch 1969-12-31 19:00
:00.000000000 -0500 |
| 113 +++ android-openssl/patches/aead_ssl_support.patch 2013-11-05 14:14:34.6312
83497 -0500 |
| 114 @@ -0,0 +1,690 @@ |
| 115 +From dc8386dbb390f4b867019873cd072a5fe01ba4e9 Mon Sep 17 00:00:00 2001 |
| 116 +From: Adam Langley <agl@chromium.org> |
| 117 +Date: Thu, 25 Jul 2013 17:35:23 -0400 |
| 118 +Subject: [PATCH 41/50] aead_ssl_support. |
| 119 + |
| 120 +This change allows AEADs to be used in ssl/ to implement SSL/TLS |
| 121 +ciphersuites. |
| 122 +--- |
| 123 + ssl/s2_clnt.c | 2 +- |
| 124 + ssl/s2_enc.c | 2 +- |
| 125 + ssl/s2_srvr.c | 2 +- |
| 126 + ssl/s3_enc.c | 8 +- |
| 127 + ssl/s3_pkt.c | 4 +- |
| 128 + ssl/ssl.h | 15 +++- |
| 129 + ssl/ssl3.h | 1 + |
| 130 + ssl/ssl_ciph.c | 70 +++++++++++---- |
| 131 + ssl/ssl_err.c | 3 + |
| 132 + ssl/ssl_lib.c | 12 +++ |
| 133 + ssl/ssl_locl.h | 23 ++++- |
| 134 + ssl/ssl_txt.c | 2 +- |
| 135 + ssl/t1_enc.c | 262 +++++++++++++++++++++++++++++++++++++++++++++++++++------ |
| 136 + 13 files changed, 356 insertions(+), 50 deletions(-) |
| 137 + |
| 138 +diff --git a/ssl/s2_clnt.c b/ssl/s2_clnt.c |
| 139 +index 03b6cf9..32adaf5 100644 |
| 140 +--- a/ssl/s2_clnt.c |
| 141 ++++ b/ssl/s2_clnt.c |
| 142 +@@ -623,7 +623,7 @@ static int client_master_key(SSL *s) |
| 143 + if (s->state == SSL2_ST_SEND_CLIENT_MASTER_KEY_A) |
| 144 + { |
| 145 + |
| 146 +- if (!ssl_cipher_get_evp(s->session,&c,&md,NULL,NULL,NULL)) |
| 147 ++ if (!ssl_cipher_get_evp(s->session,&c,&md,NULL,NULL)) |
| 148 + { |
| 149 + ssl2_return_error(s,SSL2_PE_NO_CIPHER); |
| 150 + SSLerr(SSL_F_CLIENT_MASTER_KEY,SSL_R_PROBLEMS_MAPPING_CI
PHER_FUNCTIONS); |
| 151 +diff --git a/ssl/s2_enc.c b/ssl/s2_enc.c |
| 152 +index ff3395f..087c4a2 100644 |
| 153 +--- a/ssl/s2_enc.c |
| 154 ++++ b/ssl/s2_enc.c |
| 155 +@@ -68,7 +68,7 @@ int ssl2_enc_init(SSL *s, int client) |
| 156 + const EVP_MD *md; |
| 157 + int num; |
| 158 + |
| 159 +- if (!ssl_cipher_get_evp(s->session,&c,&md,NULL,NULL,NULL)) |
| 160 ++ if (!ssl_cipher_get_evp(s->session,&c,&md,NULL,NULL)) |
| 161 + { |
| 162 + ssl2_return_error(s,SSL2_PE_NO_CIPHER); |
| 163 + SSLerr(SSL_F_SSL2_ENC_INIT,SSL_R_PROBLEMS_MAPPING_CIPHER_FUNCTIO
NS); |
| 164 +diff --git a/ssl/s2_srvr.c b/ssl/s2_srvr.c |
| 165 +index 9b1a6ac..9392921 100644 |
| 166 +--- a/ssl/s2_srvr.c |
| 167 ++++ b/ssl/s2_srvr.c |
| 168 +@@ -452,7 +452,7 @@ static int get_client_master_key(SSL *s) |
| 169 + |
| 170 + is_export=SSL_C_IS_EXPORT(s->session->cipher); |
| 171 + |
| 172 +- if (!ssl_cipher_get_evp(s->session,&c,&md,NULL,NULL,NULL)) |
| 173 ++ if (!ssl_cipher_get_evp(s->session,&c,&md,NULL,NULL)) |
| 174 + { |
| 175 + ssl2_return_error(s,SSL2_PE_NO_CIPHER); |
| 176 + SSLerr(SSL_F_GET_CLIENT_MASTER_KEY,SSL_R_PROBLEMS_MAPPING_CIPHER
_FUNCTIONS); |
| 177 +diff --git a/ssl/s3_enc.c b/ssl/s3_enc.c |
| 178 +index e3cd4f0..191b86b 100644 |
| 179 +--- a/ssl/s3_enc.c |
| 180 ++++ b/ssl/s3_enc.c |
| 181 +@@ -397,7 +397,13 @@ int ssl3_setup_key_block(SSL *s) |
| 182 + if (s->s3->tmp.key_block_length != 0) |
| 183 + return(1); |
| 184 + |
| 185 +- if (!ssl_cipher_get_evp(s->session,&c,&hash,NULL,NULL,&comp)) |
| 186 ++ if (!ssl_cipher_get_comp(s->session, &comp)) |
| 187 ++ { |
| 188 ++ SSLerr(SSL_F_SSL3_SETUP_KEY_BLOCK,SSL_R_CIPHER_OR_HASH_UNAVAILAB
LE); |
| 189 ++ return(0); |
| 190 ++ } |
| 191 ++ |
| 192 ++ if (!ssl_cipher_get_evp(s->session,&c,&hash,NULL,NULL)) |
| 193 + { |
| 194 + SSLerr(SSL_F_SSL3_SETUP_KEY_BLOCK,SSL_R_CIPHER_OR_HASH_UNAVAILAB
LE); |
| 195 + return(0); |
| 196 +diff --git a/ssl/s3_pkt.c b/ssl/s3_pkt.c |
| 197 +index 33bb78a..5038f6c 100644 |
| 198 +--- a/ssl/s3_pkt.c |
| 199 ++++ b/ssl/s3_pkt.c |
| 200 +@@ -790,7 +790,9 @@ static int do_ssl3_write(SSL *s, int type, const unsigned c
har *buf, |
| 201 + else |
| 202 + eivlen = 0; |
| 203 + } |
| 204 +- else |
| 205 ++ else if (s->aead_write_ctx != NULL) |
| 206 ++ eivlen = s->aead_write_ctx->variable_nonce_len; |
| 207 ++ else |
| 208 + eivlen = 0; |
| 209 + |
| 210 + /* lets setup the record stuff. */ |
| 211 +diff --git a/ssl/ssl.h b/ssl/ssl.h |
| 212 +index 672f3eb..0644cbf 100644 |
| 213 +--- a/ssl/ssl.h |
| 214 ++++ b/ssl/ssl.h |
| 215 +@@ -406,7 +406,9 @@ struct ssl_cipher_st |
| 216 + unsigned long algorithm_ssl; /* (major) protocol version */ |
| 217 + |
| 218 + unsigned long algo_strength; /* strength and export flags */ |
| 219 +- unsigned long algorithm2; /* Extra flags */ |
| 220 ++ unsigned long algorithm2; /* Extra flags. See SSL2_CF_* in ssl2.h |
| 221 ++ and algorithm2 section in |
| 222 ++ ssl_locl.h */ |
| 223 + int strength_bits; /* Number of bits really used */ |
| 224 + int alg_bits; /* Number of bits for algorithm */ |
| 225 + }; |
| 226 +@@ -748,6 +750,9 @@ int SRP_generate_client_master_secret(SSL *s,unsigned char
*master_key); |
| 227 + |
| 228 + #endif |
| 229 + |
| 230 ++struct ssl_aead_ctx_st; |
| 231 ++typedef struct ssl_aead_ctx_st SSL_AEAD_CTX; |
| 232 ++ |
| 233 + #if defined(OPENSSL_SYS_MSDOS) && !defined(OPENSSL_SYS_WIN32) |
| 234 + #define SSL_MAX_CERT_LIST_DEFAULT 1024*30 /* 30k max cert list :-) */ |
| 235 + #else |
| 236 +@@ -1294,6 +1299,9 @@ struct ssl_st |
| 237 + /* These are the ones being used, the ones in SSL_SESSION are |
| 238 + * the ones to be 'copied' into these ones */ |
| 239 + int mac_flags; |
| 240 ++ SSL_AEAD_CTX *aead_read_ctx; /* AEAD context. If non-NULL, then |
| 241 ++ |enc_read_ctx| and |read_hash| are |
| 242 ++ ignored. */ |
| 243 + EVP_CIPHER_CTX *enc_read_ctx; /* cryptographic state */ |
| 244 + EVP_MD_CTX *read_hash; /* used for mac generation */ |
| 245 + #ifndef OPENSSL_NO_COMP |
| 246 +@@ -1302,6 +1310,9 @@ struct ssl_st |
| 247 + char *expand; |
| 248 + #endif |
| 249 + |
| 250 ++ SSL_AEAD_CTX *aead_write_ctx; /* AEAD context. If non-NULL, then |
| 251 ++ |enc_write_ctx| and |write_hash| are |
| 252 ++ ignored. */ |
| 253 + EVP_CIPHER_CTX *enc_write_ctx; /* cryptographic state */ |
| 254 + EVP_MD_CTX *write_hash; /* used for mac generation */ |
| 255 + #ifndef OPENSSL_NO_COMP |
| 256 +@@ -2437,8 +2448,10 @@ void ERR_load_SSL_strings(void); |
| 257 + #define SSL_F_SSL_USE_RSAPRIVATEKEY_FILE 206 |
| 258 + #define SSL_F_SSL_VERIFY_CERT_CHAIN 207 |
| 259 + #define SSL_F_SSL_WRITE 208 |
| 260 ++#define SSL_F_TLS1_AEAD_CTX_INIT 339 |
| 261 + #define SSL_F_TLS1_CERT_VERIFY_MAC 286 |
| 262 + #define SSL_F_TLS1_CHANGE_CIPHER_STATE 209 |
| 263 ++#define SSL_F_TLS1_CHANGE_CIPHER_STATE_AEAD 340 |
| 264 + #define SSL_F_TLS1_CHANGE_CIPHER_STATE_CIPHER 338 |
| 265 + #define SSL_F_TLS1_CHECK_SERVERHELLO_TLSEXT 274 |
| 266 + #define SSL_F_TLS1_ENC 210 |
| 267 +diff --git a/ssl/ssl3.h b/ssl/ssl3.h |
| 268 +index a4f6d4a..6a5cdbe 100644 |
| 269 +--- a/ssl/ssl3.h |
| 270 ++++ b/ssl/ssl3.h |
| 271 +@@ -517,6 +517,7 @@ typedef struct ssl3_state_st |
| 272 + unsigned char *key_block; |
| 273 + |
| 274 + const EVP_CIPHER *new_sym_enc; |
| 275 ++ const EVP_AEAD *new_aead; |
| 276 + const EVP_MD *new_hash; |
| 277 + int new_mac_pkey_type; |
| 278 + int new_mac_secret_size; |
| 279 +diff --git a/ssl/ssl_ciph.c b/ssl/ssl_ciph.c |
| 280 +index 2966ddf..7e780cd 100644 |
| 281 +--- a/ssl/ssl_ciph.c |
| 282 ++++ b/ssl/ssl_ciph.c |
| 283 +@@ -484,32 +484,66 @@ static void load_builtin_compressions(void) |
| 284 + } |
| 285 + #endif |
| 286 + |
| 287 ++/* ssl_cipher_get_comp sets |comp| to the correct SSL_COMP for the given |
| 288 ++ * session and returns 1. On error it returns 0. */ |
| 289 ++int ssl_cipher_get_comp(const SSL_SESSION *s, SSL_COMP **comp) |
| 290 ++ { |
| 291 ++ int i; |
| 292 ++ |
| 293 ++ SSL_COMP ctmp; |
| 294 ++#ifndef OPENSSL_NO_COMP |
| 295 ++ load_builtin_compressions(); |
| 296 ++#endif |
| 297 ++ |
| 298 ++ *comp=NULL; |
| 299 ++ ctmp.id=s->compress_meth; |
| 300 ++ if (ssl_comp_methods != NULL) |
| 301 ++ { |
| 302 ++ i=sk_SSL_COMP_find(ssl_comp_methods,&ctmp); |
| 303 ++ if (i >= 0) |
| 304 ++ *comp=sk_SSL_COMP_value(ssl_comp_methods,i); |
| 305 ++ else |
| 306 ++ *comp=NULL; |
| 307 ++ } |
| 308 ++ |
| 309 ++ return 1; |
| 310 ++ } |
| 311 ++ |
| 312 ++/* ssl_cipher_get_evp_aead sets |*aead| to point to the correct EVP_AEAD objec
t |
| 313 ++ * for |s->cipher|. It returns 1 on success and 0 on error. */ |
| 314 ++int ssl_cipher_get_evp_aead(const SSL_SESSION *s, const EVP_AEAD **aead) |
| 315 ++ { |
| 316 ++ const SSL_CIPHER *c = s->cipher; |
| 317 ++ |
| 318 ++ *aead = NULL; |
| 319 ++ |
| 320 ++ if (c == NULL) |
| 321 ++ return 0; |
| 322 ++ if ((c->algorithm2 & SSL_CIPHER_ALGORITHM2_AEAD) == 0) |
| 323 ++ return 0; |
| 324 ++ |
| 325 ++#ifndef OPENSSL_NO_AES |
| 326 ++ /* There is only one AEAD for now. */ |
| 327 ++ *aead = EVP_aead_aes_128_gcm(); |
| 328 ++ return 1; |
| 329 ++#endif |
| 330 ++ |
| 331 ++ return 0; |
| 332 ++ } |
| 333 ++ |
| 334 + int ssl_cipher_get_evp(const SSL_SESSION *s, const EVP_CIPHER **enc, |
| 335 +- const EVP_MD **md, int *mac_pkey_type, int *mac_secret_size,SSL_COM
P **comp) |
| 336 ++ const EVP_MD **md, int *mac_pkey_type, int *mac_secret_size) |
| 337 + { |
| 338 + int i; |
| 339 + const SSL_CIPHER *c; |
| 340 + |
| 341 + c=s->cipher; |
| 342 + if (c == NULL) return(0); |
| 343 +- if (comp != NULL) |
| 344 +- { |
| 345 +- SSL_COMP ctmp; |
| 346 +-#ifndef OPENSSL_NO_COMP |
| 347 +- load_builtin_compressions(); |
| 348 +-#endif |
| 349 + |
| 350 +- *comp=NULL; |
| 351 +- ctmp.id=s->compress_meth; |
| 352 +- if (ssl_comp_methods != NULL) |
| 353 +- { |
| 354 +- i=sk_SSL_COMP_find(ssl_comp_methods,&ctmp); |
| 355 +- if (i >= 0) |
| 356 +- *comp=sk_SSL_COMP_value(ssl_comp_methods,i); |
| 357 +- else |
| 358 +- *comp=NULL; |
| 359 +- } |
| 360 +- } |
| 361 ++ /* This function doesn't deal with EVP_AEAD. See |
| 362 ++ * |ssl_cipher_get_aead_evp|. */ |
| 363 ++ if (c->algorithm2 & SSL_CIPHER_ALGORITHM2_AEAD) |
| 364 ++ return(0); |
| 365 + |
| 366 + if ((enc == NULL) || (md == NULL)) return(0); |
| 367 + |
| 368 +diff --git a/ssl/ssl_err.c b/ssl/ssl_err.c |
| 369 +index 97b2a0d..ad3a7b9 100644 |
| 370 +--- a/ssl/ssl_err.c |
| 371 ++++ b/ssl/ssl_err.c |
| 372 +@@ -280,6 +280,9 @@ static ERR_STRING_DATA SSL_str_functs[]= |
| 373 + {ERR_FUNC(SSL_F_SSL_VERIFY_CERT_CHAIN), "SSL_VERIFY_CERT_CHAIN"}, |
| 374 + {ERR_FUNC(SSL_F_SSL_WRITE), "SSL_write"}, |
| 375 + {ERR_FUNC(SSL_F_TLS1_CERT_VERIFY_MAC), "tls1_cert_verify_mac"}, |
| 376 ++{ERR_FUNC(SSL_F_TLS1_AEAD_CTX_INIT), "TLS1_AEAD_CTX_INIT"}, |
| 377 ++{ERR_FUNC(SSL_F_TLS1_CHANGE_CIPHER_STATE), "tls1_change_cipher_state"}, |
| 378 ++{ERR_FUNC(SSL_F_TLS1_CHANGE_CIPHER_STATE_AEAD), "TLS1_CHANGE_CIPHER_STAT
E_AEAD"}, |
| 379 + {ERR_FUNC(SSL_F_TLS1_CHANGE_CIPHER_STATE_CIPHER), "TLS1_CHANGE_CIPHER_STAT
E_CIPHER"}, |
| 380 + {ERR_FUNC(SSL_F_TLS1_CHECK_SERVERHELLO_TLSEXT), "TLS1_CHECK_SERVERHELLO_
TLSEXT"}, |
| 381 + {ERR_FUNC(SSL_F_TLS1_ENC), "TLS1_ENC"}, |
| 382 +diff --git a/ssl/ssl_lib.c b/ssl/ssl_lib.c |
| 383 +index 3b264b6..8a0150c 100644 |
| 384 +--- a/ssl/ssl_lib.c |
| 385 ++++ b/ssl/ssl_lib.c |
| 386 +@@ -2881,6 +2881,18 @@ void ssl_clear_cipher_ctx(SSL *s) |
| 387 + OPENSSL_free(s->enc_write_ctx); |
| 388 + s->enc_write_ctx=NULL; |
| 389 + } |
| 390 ++ if (s->aead_read_ctx != NULL) |
| 391 ++ { |
| 392 ++ EVP_AEAD_CTX_cleanup(&s->aead_read_ctx->ctx); |
| 393 ++ OPENSSL_free(s->aead_read_ctx); |
| 394 ++ s->aead_read_ctx = NULL; |
| 395 ++ } |
| 396 ++ if (s->aead_write_ctx != NULL) |
| 397 ++ { |
| 398 ++ EVP_AEAD_CTX_cleanup(&s->aead_write_ctx->ctx); |
| 399 ++ OPENSSL_free(s->aead_write_ctx); |
| 400 ++ s->aead_write_ctx = NULL; |
| 401 ++ } |
| 402 + #ifndef OPENSSL_NO_COMP |
| 403 + if (s->expand != NULL) |
| 404 + { |
| 405 +diff --git a/ssl/ssl_locl.h b/ssl/ssl_locl.h |
| 406 +index 3d800af..63bc28b 100644 |
| 407 +--- a/ssl/ssl_locl.h |
| 408 ++++ b/ssl/ssl_locl.h |
| 409 +@@ -380,6 +380,14 @@ |
| 410 + |
| 411 + #define TLSEXT_CHANNEL_ID_SIZE 128 |
| 412 + |
| 413 ++/* SSL_CIPHER_ALGORITHM2_AEAD is a flag in SSL_CIPHER.algorithm2 which |
| 414 ++ * indicates that the cipher is implemented via an EVP_AEAD. */ |
| 415 ++#define SSL_CIPHER_ALGORITHM2_AEAD (1<<23) |
| 416 ++ |
| 417 ++/* SSL_CIPHER_AEAD_FIXED_NONCE_LEN returns the number of bytes of fixed nonce |
| 418 ++ * for an SSL_CIPHER* with the SSL_CIPHER_ALGORITHM2_AEAD flag. */ |
| 419 ++#define SSL_CIPHER_AEAD_FIXED_NONCE_LEN(ssl_cipher) \ |
| 420 ++ (((ssl_cipher->algorithm2 >> 24) & 0xf)*2) |
| 421 + |
| 422 + /* |
| 423 + * Export and cipher strength information. For each cipher we have to decide |
| 424 +@@ -588,6 +596,17 @@ typedef struct ssl3_enc_method |
| 425 + int use_context); |
| 426 + } SSL3_ENC_METHOD; |
| 427 + |
| 428 ++/* ssl_aead_ctx_st contains information about an AEAD that is being used to |
| 429 ++ * encrypt an SSL connection. */ |
| 430 ++struct ssl_aead_ctx_st |
| 431 ++ { |
| 432 ++ EVP_AEAD_CTX ctx; |
| 433 ++ /* fixed_nonce contains any bytes of the nonce that are fixed for all |
| 434 ++ * records. */ |
| 435 ++ unsigned char fixed_nonce[8]; |
| 436 ++ unsigned char fixed_nonce_len, variable_nonce_len, tag_len; |
| 437 ++ }; |
| 438 ++ |
| 439 + #ifndef OPENSSL_NO_COMP |
| 440 + /* Used for holding the relevant compression methods loaded into SSL_CTX */ |
| 441 + typedef struct ssl3_comp_st |
| 442 +@@ -834,8 +853,10 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_list(const SSL_MET
HOD *meth, |
| 443 + STACK_OF(SSL_CIPHER) **sorted, |
| 444 + const char *rule_str); |
| 445 + void ssl_update_cache(SSL *s, int mode); |
| 446 ++int ssl_cipher_get_comp(const SSL_SESSION *s, SSL_COMP **comp); |
| 447 ++int ssl_cipher_get_evp_aead(const SSL_SESSION *s, const EVP_AEAD **aead); |
| 448 + int ssl_cipher_get_evp(const SSL_SESSION *s,const EVP_CIPHER **enc, |
| 449 +- const EVP_MD **md,int *mac_pkey_type,int *mac_secret_size
, SSL_COMP **comp); |
| 450 ++ const EVP_MD **md,int *mac_pkey_type,int *mac_secret_size
); |
| 451 + int ssl_get_handshake_digest(int i,long *mask,const EVP_MD **md);
|
| 452 + int ssl_verify_cert_chain(SSL *s,STACK_OF(X509) *sk); |
| 453 + int ssl_undefined_function(SSL *s); |
| 454 +diff --git a/ssl/ssl_txt.c b/ssl/ssl_txt.c |
| 455 +index 6479d52..07826d5 100644 |
| 456 +--- a/ssl/ssl_txt.c |
| 457 ++++ b/ssl/ssl_txt.c |
| 458 +@@ -216,7 +216,7 @@ int SSL_SESSION_print(BIO *bp, const SSL_SESSION *x) |
| 459 + { |
| 460 + SSL_COMP *comp = NULL; |
| 461 + |
| 462 +- ssl_cipher_get_evp(x,NULL,NULL,NULL,NULL,&comp); |
| 463 ++ ssl_cipher_get_comp(x, &comp); |
| 464 + if (comp == NULL) |
| 465 + { |
| 466 + if (BIO_printf(bp,"\n Compression: %d",x->compress_me
th) <= 0) goto err; |
| 467 +diff --git a/ssl/t1_enc.c b/ssl/t1_enc.c |
| 468 +index e1f91ba..7af1a32 100644 |
| 469 +--- a/ssl/t1_enc.c |
| 470 ++++ b/ssl/t1_enc.c |
| 471 +@@ -316,6 +316,66 @@ static int tls1_generate_key_block(SSL *s, unsigned char *
km, |
| 472 + return ret; |
| 473 + } |
| 474 + |
| 475 ++/* tls1_aead_ctx_init allocates |*aead_ctx|, if needed and returns 1. It |
| 476 ++ * returns 0 on malloc error. */ |
| 477 ++static int tls1_aead_ctx_init(SSL_AEAD_CTX **aead_ctx) |
| 478 ++ { |
| 479 ++ if (*aead_ctx != NULL) |
| 480 ++ EVP_AEAD_CTX_cleanup(&(*aead_ctx)->ctx); |
| 481 ++ else |
| 482 ++ { |
| 483 ++ *aead_ctx = (SSL_AEAD_CTX*) OPENSSL_malloc(sizeof(SSL_AEAD_CTX))
; |
| 484 ++ if (*aead_ctx == NULL) |
| 485 ++ { |
| 486 ++ SSLerr(SSL_F_TLS1_AEAD_CTX_INIT, ERR_R_MALLOC_FAILURE); |
| 487 ++ return 0; |
| 488 ++ } |
| 489 ++ } |
| 490 ++ |
| 491 ++ return 1; |
| 492 ++ } |
| 493 ++ |
| 494 ++static int tls1_change_cipher_state_aead(SSL *s, char is_read, |
| 495 ++ const unsigned char *key, unsigned key_len, |
| 496 ++ const unsigned char *iv, unsigned iv_len) |
| 497 ++ { |
| 498 ++ const EVP_AEAD *aead = s->s3->tmp.new_aead; |
| 499 ++ SSL_AEAD_CTX *aead_ctx; |
| 500 ++ |
| 501 ++ if (is_read) |
| 502 ++ { |
| 503 ++ if (!tls1_aead_ctx_init(&s->aead_read_ctx)) |
| 504 ++ return 0; |
| 505 ++ aead_ctx = s->aead_read_ctx; |
| 506 ++ } |
| 507 ++ else |
| 508 ++ { |
| 509 ++ if (!tls1_aead_ctx_init(&s->aead_write_ctx)) |
| 510 ++ return 0; |
| 511 ++ aead_ctx = s->aead_write_ctx; |
| 512 ++ } |
| 513 ++ |
| 514 ++ if (!EVP_AEAD_CTX_init(&aead_ctx->ctx, aead, key, key_len, |
| 515 ++ EVP_AEAD_DEFAULT_TAG_LENGTH, NULL /* engine */)) |
| 516 ++ return 0; |
| 517 ++ if (iv_len > sizeof(aead_ctx->fixed_nonce)) |
| 518 ++ { |
| 519 ++ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE_AEAD, ERR_R_INTERNAL_ERROR
); |
| 520 ++ return 0; |
| 521 ++ } |
| 522 ++ memcpy(aead_ctx->fixed_nonce, iv, iv_len); |
| 523 ++ aead_ctx->fixed_nonce_len = iv_len; |
| 524 ++ aead_ctx->variable_nonce_len = 8; /* always the case, currently. */ |
| 525 ++ if (aead_ctx->variable_nonce_len + aead_ctx->fixed_nonce_len != EVP_AEAD
_nonce_length(aead)) |
| 526 ++ { |
| 527 ++ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE_AEAD, ERR_R_INTERNAL_ERROR
); |
| 528 ++ return 0; |
| 529 ++ } |
| 530 ++ aead_ctx->tag_len = EVP_AEAD_max_overhead(aead); |
| 531 ++ |
| 532 ++ return 1; |
| 533 ++ } |
| 534 ++ |
| 535 + /* tls1_change_cipher_state_cipher performs the work needed to switch cipher |
| 536 + * states when using EVP_CIPHER. The argument |is_read| is true iff this |
| 537 + * function is being called due to reading, as opposed to writing, a |
| 538 +@@ -494,6 +554,7 @@ int tls1_change_cipher_state(SSL *s, int which) |
| 539 + const unsigned char *client_write_key, *server_write_key, *key; |
| 540 + const unsigned char *client_write_iv, *server_write_iv, *iv; |
| 541 + const EVP_CIPHER *cipher = s->s3->tmp.new_sym_enc; |
| 542 ++ const EVP_AEAD *aead = s->s3->tmp.new_aead; |
| 543 + unsigned key_len, iv_len, mac_secret_len; |
| 544 + const unsigned char *key_data; |
| 545 + const char is_export = SSL_C_IS_EXPORT(s->s3->tmp.new_cipher) != 0; |
| 546 +@@ -551,14 +612,22 @@ int tls1_change_cipher_state(SSL *s, int which) |
| 547 + |
| 548 + mac_secret_len = s->s3->tmp.new_mac_secret_size; |
| 549 + |
| 550 +- key_len = EVP_CIPHER_key_length(cipher); |
| 551 +- if (is_export && key_len > SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher)
) |
| 552 +- key_len = SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher); |
| 553 +- |
| 554 +- if (EVP_CIPHER_mode(cipher) == EVP_CIPH_GCM_MODE) |
| 555 +- iv_len = EVP_GCM_TLS_FIXED_IV_LEN; |
| 556 ++ if (aead != NULL) |
| 557 ++ { |
| 558 ++ key_len = EVP_AEAD_key_length(aead); |
| 559 ++ iv_len = SSL_CIPHER_AEAD_FIXED_NONCE_LEN(s->s3->tmp.new_cipher); |
| 560 ++ } |
| 561 + else |
| 562 +- iv_len = EVP_CIPHER_iv_length(cipher); |
| 563 ++ { |
| 564 ++ key_len = EVP_CIPHER_key_length(cipher); |
| 565 ++ if (is_export && key_len > SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new
_cipher)) |
| 566 ++ key_len = SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher); |
| 567 ++ |
| 568 ++ if (EVP_CIPHER_mode(cipher) == EVP_CIPH_GCM_MODE) |
| 569 ++ iv_len = EVP_GCM_TLS_FIXED_IV_LEN; |
| 570 ++ else |
| 571 ++ iv_len = EVP_CIPHER_iv_length(cipher); |
| 572 ++ } |
| 573 + |
| 574 + key_data = s->s3->tmp.key_block; |
| 575 + client_write_mac_secret = key_data; key_data += mac_secret_len; |
| 576 +@@ -587,12 +656,20 @@ int tls1_change_cipher_state(SSL *s, int which) |
| 577 + return 0; |
| 578 + } |
| 579 + |
| 580 +- if (!tls1_change_cipher_state_cipher(s, is_read, use_client_keys, |
| 581 +- mac_secret, mac_secret_len, |
| 582 +- key, key_len, |
| 583 +- iv, iv_len)) { |
| 584 +- return 0; |
| 585 +- } |
| 586 ++ if (aead != NULL) |
| 587 ++ { |
| 588 ++ if (!tls1_change_cipher_state_aead(s, is_read, |
| 589 ++ key, key_len, iv, iv_len)) |
| 590 ++ return 0; |
| 591 ++ } |
| 592 ++ else |
| 593 ++ { |
| 594 ++ if (!tls1_change_cipher_state_cipher(s, is_read, use_client_keys
, |
| 595 ++ mac_secret, mac_secret_len, |
| 596 ++ key, key_len, |
| 597 ++ iv, iv_len)) |
| 598 ++ return 0; |
| 599 ++ } |
| 600 + |
| 601 + return 1; |
| 602 + err: |
| 603 +@@ -603,13 +680,14 @@ err: |
| 604 + int tls1_setup_key_block(SSL *s) |
| 605 + { |
| 606 + unsigned char *p1,*p2=NULL; |
| 607 +- const EVP_CIPHER *c; |
| 608 +- const EVP_MD *hash; |
| 609 ++ const EVP_CIPHER *c = NULL; |
| 610 ++ const EVP_MD *hash = NULL; |
| 611 ++ const EVP_AEAD *aead = NULL; |
| 612 + int num; |
| 613 + SSL_COMP *comp; |
| 614 + int mac_type= NID_undef,mac_secret_size=0; |
| 615 + int ret=0; |
| 616 +- int iv_len; |
| 617 ++ unsigned key_len, iv_len; |
| 618 + |
| 619 + #ifdef KSSL_DEBUG |
| 620 + printf ("tls1_setup_key_block()\n"); |
| 621 +@@ -618,22 +696,36 @@ int tls1_setup_key_block(SSL *s) |
| 622 + if (s->s3->tmp.key_block_length != 0) |
| 623 + return(1); |
| 624 + |
| 625 +- if (!ssl_cipher_get_evp(s->session,&c,&hash,&mac_type,&mac_secret_size,&
comp)) |
| 626 ++ if (!ssl_cipher_get_comp(s->session, &comp)) |
| 627 ++ goto cipher_unavailable_err; |
| 628 ++ |
| 629 ++ if (s->session->cipher && |
| 630 ++ (s->session->cipher->algorithm2 & SSL_CIPHER_ALGORITHM2_AEAD)) |
| 631 + { |
| 632 +- SSLerr(SSL_F_TLS1_SETUP_KEY_BLOCK,SSL_R_CIPHER_OR_HASH_UNAVAILAB
LE); |
| 633 +- return(0); |
| 634 ++ if (!ssl_cipher_get_evp_aead(s->session, &aead)) |
| 635 ++ goto cipher_unavailable_err; |
| 636 ++ key_len = EVP_AEAD_key_length(aead); |
| 637 ++ iv_len = SSL_CIPHER_AEAD_FIXED_NONCE_LEN(s->session->cipher); |
| 638 + } |
| 639 +- |
| 640 +- if (EVP_CIPHER_mode(c) == EVP_CIPH_GCM_MODE) |
| 641 +- iv_len = EVP_GCM_TLS_FIXED_IV_LEN; |
| 642 + else |
| 643 +- iv_len = EVP_CIPHER_iv_length(c); |
| 644 ++ { |
| 645 ++ if (!ssl_cipher_get_evp(s->session,&c,&hash,&mac_type,&mac_secre
t_size)) |
| 646 ++ goto cipher_unavailable_err; |
| 647 ++ key_len = EVP_CIPHER_key_length(c); |
| 648 + |
| 649 ++ if (EVP_CIPHER_mode(c) == EVP_CIPH_GCM_MODE) |
| 650 ++ iv_len = EVP_GCM_TLS_FIXED_IV_LEN; |
| 651 ++ else |
| 652 ++ iv_len = EVP_CIPHER_iv_length(c); |
| 653 ++ } |
| 654 ++ |
| 655 ++ s->s3->tmp.new_aead=aead; |
| 656 + s->s3->tmp.new_sym_enc=c; |
| 657 + s->s3->tmp.new_hash=hash; |
| 658 + s->s3->tmp.new_mac_pkey_type = mac_type; |
| 659 + s->s3->tmp.new_mac_secret_size = mac_secret_size; |
| 660 +- num=EVP_CIPHER_key_length(c)+mac_secret_size+iv_len; |
| 661 ++ |
| 662 ++ num=key_len+mac_secret_size+iv_len; |
| 663 + num*=2; |
| 664 + |
| 665 + ssl3_cleanup_key_block(s); |
| 666 +@@ -696,6 +788,10 @@ err: |
| 667 + OPENSSL_free(p2); |
| 668 + } |
| 669 + return(ret); |
| 670 ++ |
| 671 ++cipher_unavailable_err: |
| 672 ++ SSLerr(SSL_F_TLS1_SETUP_KEY_BLOCK,SSL_R_CIPHER_OR_HASH_UNAVAILABLE); |
| 673 ++ return 0; |
| 674 + } |
| 675 + |
| 676 + /* tls1_enc encrypts/decrypts the record in |s->wrec| / |s->rrec|, respectivel
y. |
| 677 +@@ -714,6 +810,124 @@ int tls1_enc(SSL *s, int send) |
| 678 + unsigned long l; |
| 679 + int bs,i,j,k,pad=0,ret,mac_size=0; |
| 680 + const EVP_CIPHER *enc; |
| 681 ++ const SSL_AEAD_CTX *aead; |
| 682 ++ |
| 683 ++ if (send) |
| 684 ++ rec = &s->s3->wrec; |
| 685 ++ else |
| 686 ++ rec = &s->s3->rrec; |
| 687 ++ |
| 688 ++ if (send) |
| 689 ++ aead = s->aead_write_ctx; |
| 690 ++ else |
| 691 ++ aead = s->aead_read_ctx; |
| 692 ++ |
| 693 ++ if (aead) |
| 694 ++ { |
| 695 ++ unsigned char ad[13], *seq, *in, *out, nonce[16]; |
| 696 ++ unsigned nonce_used; |
| 697 ++ ssize_t n; |
| 698 ++ |
| 699 ++ seq = send ? s->s3->write_sequence : s->s3->read_sequence; |
| 700 ++ |
| 701 ++ if (s->version == DTLS1_VERSION || s->version == DTLS1_BAD_VER) |
| 702 ++ { |
| 703 ++ unsigned char dtlsseq[9], *p = dtlsseq; |
| 704 ++ |
| 705 ++ s2n(send ? s->d1->w_epoch : s->d1->r_epoch, p); |
| 706 ++ memcpy(p, &seq[2], 6); |
| 707 ++ memcpy(ad, dtlsseq, 8); |
| 708 ++ } |
| 709 ++ else |
| 710 ++ { |
| 711 ++ memcpy(ad, seq, 8); |
| 712 ++ for (i=7; i>=0; i--) /* increment */ |
| 713 ++ { |
| 714 ++ ++seq[i]; |
| 715 ++ if (seq[i] != 0) |
| 716 ++ break; |
| 717 ++ } |
| 718 ++ } |
| 719 ++ |
| 720 ++ ad[8] = rec->type; |
| 721 ++ ad[9] = (unsigned char)(s->version>>8); |
| 722 ++ ad[10] = (unsigned char)(s->version); |
| 723 ++ |
| 724 ++ if (aead->fixed_nonce_len + aead->variable_nonce_len > sizeof(no
nce) || |
| 725 ++ aead->variable_nonce_len > 8) |
| 726 ++ return -1; /* internal error - should never happen. */ |
| 727 ++ |
| 728 ++ memcpy(nonce, aead->fixed_nonce, aead->fixed_nonce_len); |
| 729 ++ nonce_used = aead->fixed_nonce_len; |
| 730 ++ |
| 731 ++ if (send) |
| 732 ++ { |
| 733 ++ size_t len = rec->length; |
| 734 ++ in = rec->input; |
| 735 ++ out = rec->data; |
| 736 ++ |
| 737 ++ /* When sending we use the sequence number as the |
| 738 ++ * variable part of the nonce. */ |
| 739 ++ if (aead->variable_nonce_len > 8) |
| 740 ++ return -1; |
| 741 ++ memcpy(nonce + nonce_used, ad, aead->variable_nonce_len)
; |
| 742 ++ nonce_used += aead->variable_nonce_len; |
| 743 ++ |
| 744 ++ /* in do_ssl3_write, rec->input is moved forward by |
| 745 ++ * variable_nonce_len in order to leave space for the |
| 746 ++ * variable nonce. Thus we can copy the sequence number |
| 747 ++ * bytes into place without overwriting any of the |
| 748 ++ * plaintext. */ |
| 749 ++ memcpy(out, ad, aead->variable_nonce_len); |
| 750 ++ len -= aead->variable_nonce_len; |
| 751 ++ |
| 752 ++ ad[11] = len >> 8; |
| 753 ++ ad[12] = len & 0xff; |
| 754 ++ |
| 755 ++ n = EVP_AEAD_CTX_seal(&aead->ctx, |
| 756 ++ out + aead->variable_nonce_len, le
n + aead->tag_len, |
| 757 ++ nonce, nonce_used, |
| 758 ++ in + aead->variable_nonce_len, len
, |
| 759 ++ ad, sizeof(ad)); |
| 760 ++ if (n >= 0) |
| 761 ++ n += aead->variable_nonce_len; |
| 762 ++ } |
| 763 ++ else |
| 764 ++ { |
| 765 ++ /* receive */ |
| 766 ++ size_t len = rec->length; |
| 767 ++ |
| 768 ++ if (rec->data != rec->input) |
| 769 ++ return -1; /* internal error - should never hap
pen. */ |
| 770 ++ out = in = rec->input; |
| 771 ++ |
| 772 ++ if (len < aead->variable_nonce_len) |
| 773 ++ return 0; |
| 774 ++ memcpy(nonce + nonce_used, in, aead->variable_nonce_len)
; |
| 775 ++ nonce_used += aead->variable_nonce_len; |
| 776 ++ |
| 777 ++ in += aead->variable_nonce_len; |
| 778 ++ len -= aead->variable_nonce_len; |
| 779 ++ out += aead->variable_nonce_len; |
| 780 ++ |
| 781 ++ if (len < aead->tag_len) |
| 782 ++ return 0; |
| 783 ++ len -= aead->tag_len; |
| 784 ++ |
| 785 ++ ad[11] = len >> 8; |
| 786 ++ ad[12] = len & 0xff; |
| 787 ++ |
| 788 ++ n = EVP_AEAD_CTX_open(&aead->ctx, out, len, nonce, nonce
_used, |
| 789 ++ in, len + aead->tag_len, ad, sizeo
f(ad)); |
| 790 ++ |
| 791 ++ rec->data = rec->input = out; |
| 792 ++ } |
| 793 ++ |
| 794 ++ if (n == -1) |
| 795 ++ return -1; |
| 796 ++ rec->length = n; |
| 797 ++ return 1; |
| 798 ++ } |
| 799 + |
| 800 + if (send) |
| 801 + { |
| 802 +-- |
| 803 +1.8.4.1 |
| 804 + |
| 805 diff -burN android-openssl-lhash2/patches/aead_support.patch android-openssl/pat
ches/aead_support.patch |
| 806 --- android-openssl-lhash2/patches/aead_support.patch 1969-12-31 19:00:00.0000
00000 -0500 |
| 807 +++ android-openssl/patches/aead_support.patch 2013-11-05 14:14:34.631283497 -0
500 |
| 808 @@ -0,0 +1,811 @@ |
| 809 +From 98f0c6e114f55b4451bea824b05ab29db3351f12 Mon Sep 17 00:00:00 2001 |
| 810 +From: Adam Langley <agl@chromium.org> |
| 811 +Date: Thu, 25 Jul 2013 16:52:35 -0400 |
| 812 +Subject: [PATCH 40/50] aead_support |
| 813 + |
| 814 +This change adds an AEAD interface to EVP and an AES-GCM implementation |
| 815 +suitable for use in TLS. |
| 816 +--- |
| 817 + crypto/evp/Makefile | 4 +- |
| 818 + crypto/evp/e_aes.c | 214 +++++++++++++++++++++++++++++++++++---- |
| 819 + crypto/evp/evp.h | 111 ++++++++++++++++++++ |
| 820 + crypto/evp/evp_aead.c | 192 +++++++++++++++++++++++++++++++++++ |
| 821 + crypto/evp/evp_err.c | 8 ++ |
| 822 + crypto/evp/evp_locl.h | 24 +++++ |
| 823 + doc/crypto/EVP_AEAD_CTX_init.pod | 96 ++++++++++++++++++ |
| 824 + 7 files changed, 626 insertions(+), 23 deletions(-) |
| 825 + create mode 100644 crypto/evp/evp_aead.c |
| 826 + create mode 100644 doc/crypto/EVP_AEAD_CTX_init.pod |
| 827 + |
| 828 +diff --git a/crypto/evp/Makefile b/crypto/evp/Makefile |
| 829 +index 1e46ceb..b73038d 100644 |
| 830 +--- a/crypto/evp/Makefile |
| 831 ++++ b/crypto/evp/Makefile |
| 832 +@@ -29,7 +29,7 @@ LIBSRC= encode.c digest.c evp_enc.c evp_key.c evp_acnf.c evp_
cnf.c \ |
| 833 + c_all.c c_allc.c c_alld.c evp_lib.c bio_ok.c \ |
| 834 + evp_pkey.c evp_pbe.c p5_crpt.c p5_crpt2.c \ |
| 835 + e_old.c pmeth_lib.c pmeth_fn.c pmeth_gn.c m_sigver.c evp_fips.c \ |
| 836 +- e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c |
| 837 ++ e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c evp_aead.c |
| 838 + |
| 839 + LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \ |
| 840 + e_des.o e_bf.o e_idea.o e_des3.o e_camellia.o\ |
| 841 +@@ -42,7 +42,7 @@ LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o
evp_cnf.o \ |
| 842 + c_all.o c_allc.o c_alld.o evp_lib.o bio_ok.o \ |
| 843 + evp_pkey.o evp_pbe.o p5_crpt.o p5_crpt2.o \ |
| 844 + e_old.o pmeth_lib.o pmeth_fn.o pmeth_gn.o m_sigver.o evp_fips.o \ |
| 845 +- e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o |
| 846 ++ e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o evp_aead.o |
| 847 + |
| 848 + SRC= $(LIBSRC) |
| 849 + |
| 850 +diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c |
| 851 +index ef44f63..e4485e4 100644 |
| 852 +--- a/crypto/evp/e_aes.c |
| 853 ++++ b/crypto/evp/e_aes.c |
| 854 +@@ -814,44 +814,45 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int
arg, void *ptr) |
| 855 + } |
| 856 + } |
| 857 + |
| 858 +-static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
| 859 +- const unsigned char *iv, int enc) |
| 860 ++static ctr128_f aes_gcm_set_key(AES_KEY *aes_key, GCM128_CONTEXT *gcm_ctx, |
| 861 ++ const unsigned char *key, size_t key_len) |
| 862 + { |
| 863 +- EVP_AES_GCM_CTX *gctx = ctx->cipher_data; |
| 864 +- if (!iv && !key) |
| 865 +- return 1; |
| 866 +- if (key) |
| 867 +- { do { |
| 868 + #ifdef BSAES_CAPABLE |
| 869 + if (BSAES_CAPABLE) |
| 870 + { |
| 871 +- AES_set_encrypt_key(key,ctx->key_len*8,&gctx->ks); |
| 872 +- CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks, |
| 873 ++ AES_set_encrypt_key(key,key_len*8,aes_key); |
| 874 ++ CRYPTO_gcm128_init(gcm_ctx,aes_key, |
| 875 + (block128_f)AES_encrypt); |
| 876 +- gctx->ctr = (ctr128_f)bsaes_ctr32_encrypt_blocks; |
| 877 +- break; |
| 878 ++ return (ctr128_f)bsaes_ctr32_encrypt_blocks; |
| 879 + } |
| 880 +- else |
| 881 + #endif |
| 882 + #ifdef VPAES_CAPABLE |
| 883 + if (VPAES_CAPABLE) |
| 884 + { |
| 885 +- vpaes_set_encrypt_key(key,ctx->key_len*8,&gctx->ks); |
| 886 +- CRYPTO_gcm128_init(&gctx->gcm,&gctx->ks, |
| 887 ++ vpaes_set_encrypt_key(key,key_len*8,aes_key); |
| 888 ++ CRYPTO_gcm128_init(gcm_ctx,aes_key, |
| 889 + (block128_f)vpaes_encrypt); |
| 890 +- gctx->ctr = NULL; |
| 891 +- break; |
| 892 ++ return NULL; |
| 893 + } |
| 894 + #endif |
| 895 +- AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks); |
| 896 +- CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks, (block128_f)AES_encryp
t); |
| 897 ++ AES_set_encrypt_key(key, key_len*8, aes_key); |
| 898 ++ CRYPTO_gcm128_init(gcm_ctx, aes_key, (block128_f)AES_encrypt); |
| 899 + #ifdef AES_CTR_ASM |
| 900 +- gctx->ctr = (ctr128_f)AES_ctr32_encrypt; |
| 901 ++ return (ctr128_f)AES_ctr32_encrypt; |
| 902 + #else |
| 903 +- gctx->ctr = NULL; |
| 904 ++ return NULL; |
| 905 + #endif |
| 906 +- } while (0); |
| 907 ++ } |
| 908 + |
| 909 ++static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
| 910 ++ const unsigned char *iv, int enc) |
| 911 ++ { |
| 912 ++ EVP_AES_GCM_CTX *gctx = ctx->cipher_data; |
| 913 ++ if (!iv && !key) |
| 914 ++ return 1; |
| 915 ++ if (key) |
| 916 ++ { |
| 917 ++ gctx->ctr = aes_gcm_set_key(&gctx->ks, &gctx->gcm, key, ctx->key
_len); |
| 918 + /* If we have an iv can set it directly, otherwise use |
| 919 + * saved IV. |
| 920 + */ |
| 921 +@@ -1310,5 +1311,176 @@ BLOCK_CIPHER_custom(NID_aes,128,1,12,ccm,CCM,EVP_CIPH_F
LAG_FIPS|CUSTOM_FLAGS) |
| 922 + BLOCK_CIPHER_custom(NID_aes,192,1,12,ccm,CCM,EVP_CIPH_FLAG_FIPS|CUSTOM_FLAGS) |
| 923 + BLOCK_CIPHER_custom(NID_aes,256,1,12,ccm,CCM,EVP_CIPH_FLAG_FIPS|CUSTOM_FLAGS) |
| 924 + |
| 925 ++#define EVP_AEAD_AES_128_GCM_TAG_LEN 16 |
| 926 ++ |
| 927 ++struct aead_aes_128_gcm_ctx { |
| 928 ++ union { double align; AES_KEY ks; } ks; |
| 929 ++ GCM128_CONTEXT gcm; |
| 930 ++ ctr128_f ctr; |
| 931 ++ unsigned char tag_len; |
| 932 ++}; |
| 933 ++ |
| 934 ++static int aead_aes_128_gcm_init(EVP_AEAD_CTX *ctx, |
| 935 ++ const unsigned char *key, size_t key_len, size_t tag_len) |
| 936 ++ { |
| 937 ++ struct aead_aes_128_gcm_ctx *gcm_ctx; |
| 938 ++ |
| 939 ++ if (key_len*8 != 128) |
| 940 ++ { |
| 941 ++ EVPerr(EVP_F_AEAD_AES_128_GCM_INIT, EVP_R_BAD_KEY_LENGTH); |
| 942 ++ return 0; /* EVP_AEAD_CTX_init should catch this. */ |
| 943 ++ } |
| 944 ++ |
| 945 ++ if (tag_len == EVP_AEAD_DEFAULT_TAG_LENGTH) |
| 946 ++ tag_len = EVP_AEAD_AES_128_GCM_TAG_LEN; |
| 947 ++ |
| 948 ++ if (tag_len > EVP_AEAD_AES_128_GCM_TAG_LEN) |
| 949 ++ { |
| 950 ++ EVPerr(EVP_F_AEAD_AES_128_GCM_INIT, EVP_R_TAG_TOO_LARGE); |
| 951 ++ return 0; |
| 952 ++ } |
| 953 ++ |
| 954 ++ gcm_ctx = OPENSSL_malloc(sizeof(struct aead_aes_128_gcm_ctx)); |
| 955 ++ if (gcm_ctx == NULL) |
| 956 ++ return 0; |
| 957 ++ |
| 958 ++#ifdef AESNI_CAPABLE |
| 959 ++ if (AESNI_CAPABLE) |
| 960 ++ { |
| 961 ++ aesni_set_encrypt_key(key, key_len * 8, &gcm_ctx->ks.ks); |
| 962 ++ CRYPTO_gcm128_init(&gcm_ctx->gcm, &gcm_ctx->ks.ks, |
| 963 ++ (block128_f)aesni_encrypt); |
| 964 ++ gcm_ctx->ctr = (ctr128_f) aesni_ctr32_encrypt_blocks; |
| 965 ++ } |
| 966 ++ else |
| 967 ++#endif |
| 968 ++ { |
| 969 ++ gcm_ctx->ctr = aes_gcm_set_key(&gcm_ctx->ks.ks, &gcm_ctx->gcm, |
| 970 ++ key, key_len); |
| 971 ++ } |
| 972 ++ gcm_ctx->tag_len = tag_len; |
| 973 ++ ctx->aead_state = gcm_ctx; |
| 974 ++ |
| 975 ++ return 1; |
| 976 ++ } |
| 977 ++ |
| 978 ++static void aead_aes_128_gcm_cleanup(EVP_AEAD_CTX *ctx) |
| 979 ++ { |
| 980 ++ struct aead_aes_128_gcm_ctx *gcm_ctx = ctx->aead_state; |
| 981 ++ OPENSSL_free(gcm_ctx); |
| 982 ++ } |
| 983 ++ |
| 984 ++static ssize_t aead_aes_128_gcm_seal(const EVP_AEAD_CTX *ctx, |
| 985 ++ unsigned char *out, size_t max_out_len, |
| 986 ++ const unsigned char *nonce, size_t nonce_len, |
| 987 ++ const unsigned char *in, size_t in_len, |
| 988 ++ const unsigned char *ad, size_t ad_len) |
| 989 ++ { |
| 990 ++ size_t bulk = 0; |
| 991 ++ const struct aead_aes_128_gcm_ctx *gcm_ctx = ctx->aead_state; |
| 992 ++ GCM128_CONTEXT gcm; |
| 993 ++ |
| 994 ++ if (max_out_len < in_len + gcm_ctx->tag_len) |
| 995 ++ { |
| 996 ++ EVPerr(EVP_F_AEAD_AES_128_GCM_SEAL, EVP_R_BUFFER_TOO_SMALL); |
| 997 ++ return -1; |
| 998 ++ } |
| 999 ++ |
| 1000 ++ memcpy(&gcm, &gcm_ctx->gcm, sizeof(gcm)); |
| 1001 ++ CRYPTO_gcm128_setiv(&gcm, nonce, nonce_len); |
| 1002 ++ |
| 1003 ++ if (ad_len > 0 && CRYPTO_gcm128_aad(&gcm, ad, ad_len)) |
| 1004 ++ return -1; |
| 1005 ++ |
| 1006 ++ if (gcm_ctx->ctr) |
| 1007 ++ { |
| 1008 ++ if (CRYPTO_gcm128_encrypt_ctr32(&gcm, in + bulk, out + bulk, |
| 1009 ++ in_len - bulk, gcm_ctx->ctr)) |
| 1010 ++ return -1; |
| 1011 ++ } |
| 1012 ++ else |
| 1013 ++ { |
| 1014 ++ if (CRYPTO_gcm128_encrypt(&gcm, in + bulk, out + bulk, |
| 1015 ++ in_len - bulk)) |
| 1016 ++ return -1; |
| 1017 ++ } |
| 1018 ++ |
| 1019 ++ CRYPTO_gcm128_tag(&gcm, out + in_len, gcm_ctx->tag_len); |
| 1020 ++ return in_len + gcm_ctx->tag_len; |
| 1021 ++ } |
| 1022 ++ |
| 1023 ++static ssize_t aead_aes_128_gcm_open(const EVP_AEAD_CTX *ctx, |
| 1024 ++ unsigned char *out, size_t max_out_len, |
| 1025 ++ const unsigned char *nonce, size_t nonce_len, |
| 1026 ++ const unsigned char *in, size_t in_len, |
| 1027 ++ const unsigned char *ad, size_t ad_len) |
| 1028 ++ { |
| 1029 ++ size_t bulk = 0; |
| 1030 ++ const struct aead_aes_128_gcm_ctx *gcm_ctx = ctx->aead_state; |
| 1031 ++ unsigned char tag[EVP_AEAD_AES_128_GCM_TAG_LEN]; |
| 1032 ++ size_t out_len; |
| 1033 ++ GCM128_CONTEXT gcm; |
| 1034 ++ |
| 1035 ++ if (in_len < gcm_ctx->tag_len) |
| 1036 ++ { |
| 1037 ++ EVPerr(EVP_F_AEAD_AES_128_GCM_OPEN, EVP_R_BAD_DECRYPT); |
| 1038 ++ return -1; |
| 1039 ++ } |
| 1040 ++ |
| 1041 ++ out_len = in_len - gcm_ctx->tag_len; |
| 1042 ++ |
| 1043 ++ if (max_out_len < out_len) |
| 1044 ++ { |
| 1045 ++ EVPerr(EVP_F_AEAD_AES_128_GCM_OPEN, EVP_R_BUFFER_TOO_SMALL); |
| 1046 ++ return -1; |
| 1047 ++ } |
| 1048 ++ |
| 1049 ++ memcpy(&gcm, &gcm_ctx->gcm, sizeof(gcm)); |
| 1050 ++ CRYPTO_gcm128_setiv(&gcm, nonce, nonce_len); |
| 1051 ++ |
| 1052 ++ if (CRYPTO_gcm128_aad(&gcm, ad, ad_len)) |
| 1053 ++ return -1; |
| 1054 ++ |
| 1055 ++ if (gcm_ctx->ctr) |
| 1056 ++ { |
| 1057 ++ if (CRYPTO_gcm128_decrypt_ctr32(&gcm, in + bulk, out + bulk, |
| 1058 ++ in_len-bulk-gcm_ctx->tag_len, |
| 1059 ++ gcm_ctx->ctr)) |
| 1060 ++ return -1; |
| 1061 ++ } |
| 1062 ++ else |
| 1063 ++ { |
| 1064 ++ if (CRYPTO_gcm128_decrypt(&gcm, in + bulk, out + bulk, |
| 1065 ++ in_len - bulk - gcm_ctx->tag_len)) |
| 1066 ++ return -1; |
| 1067 ++ } |
| 1068 ++ |
| 1069 ++ CRYPTO_gcm128_tag(&gcm, tag, gcm_ctx->tag_len); |
| 1070 ++ if (CRYPTO_memcmp(tag, in + out_len, gcm_ctx->tag_len) != 0) |
| 1071 ++ { |
| 1072 ++ EVPerr(EVP_F_AEAD_AES_128_GCM_OPEN, EVP_R_BAD_DECRYPT); |
| 1073 ++ return -1; |
| 1074 ++ } |
| 1075 ++ |
| 1076 ++ return out_len; |
| 1077 ++ } |
| 1078 ++ |
| 1079 ++static const EVP_AEAD aead_aes_128_gcm = { |
| 1080 ++ 16, /* key len */ |
| 1081 ++ 12, /* nonce len */ |
| 1082 ++ EVP_AEAD_AES_128_GCM_TAG_LEN, /* overhead */ |
| 1083 ++ EVP_AEAD_AES_128_GCM_TAG_LEN, /* max tag length */ |
| 1084 ++ |
| 1085 ++ aead_aes_128_gcm_init, |
| 1086 ++ aead_aes_128_gcm_cleanup, |
| 1087 ++ aead_aes_128_gcm_seal, |
| 1088 ++ aead_aes_128_gcm_open, |
| 1089 ++}; |
| 1090 ++ |
| 1091 ++const EVP_AEAD *EVP_aead_aes_128_gcm() |
| 1092 ++ { |
| 1093 ++ return &aead_aes_128_gcm; |
| 1094 ++ } |
| 1095 ++ |
| 1096 + #endif |
| 1097 + #endif |
| 1098 +diff --git a/crypto/evp/evp.h b/crypto/evp/evp.h |
| 1099 +index 5f18d4b..bd10642 100644 |
| 1100 +--- a/crypto/evp/evp.h |
| 1101 ++++ b/crypto/evp/evp.h |
| 1102 +@@ -1243,6 +1243,109 @@ void EVP_PKEY_meth_set_ctrl(EVP_PKEY_METHOD *pmeth, |
| 1103 + int (*ctrl_str)(EVP_PKEY_CTX *ctx, |
| 1104 + const char *type, const char *value)); |
| 1105 + |
| 1106 ++/* Authenticated Encryption with Additional Data. |
| 1107 ++ * |
| 1108 ++ * AEAD couples confidentiality and integrity in a single primtive. AEAD |
| 1109 ++ * algorithms take a key and then can seal and open individual messages. Each |
| 1110 ++ * message has a unique, per-message nonce and, optionally, additional data |
| 1111 ++ * which is authenticated but not included in the output. */ |
| 1112 ++ |
| 1113 ++struct evp_aead_st; |
| 1114 ++typedef struct evp_aead_st EVP_AEAD; |
| 1115 ++ |
| 1116 ++#ifndef OPENSSL_NO_AES |
| 1117 ++/* EVP_aes_128_gcm is AES-128 in Galois Counter Mode. */ |
| 1118 ++const EVP_AEAD *EVP_aead_aes_128_gcm(void); |
| 1119 ++#endif |
| 1120 ++ |
| 1121 ++/* EVP_AEAD_key_length returns the length, in bytes, of the keys used by |
| 1122 ++ * |aead|. */ |
| 1123 ++size_t EVP_AEAD_key_length(const EVP_AEAD *aead); |
| 1124 ++ |
| 1125 ++/* EVP_AEAD_nonce_length returns the length, in bytes, of the per-message nonc
e |
| 1126 ++ * for |aead|. */ |
| 1127 ++size_t EVP_AEAD_nonce_length(const EVP_AEAD *aead); |
| 1128 ++ |
| 1129 ++/* EVP_AEAD_max_overhead returns the maximum number of additional bytes added |
| 1130 ++ * by the act of sealing data with |aead|. */ |
| 1131 ++size_t EVP_AEAD_max_overhead(const EVP_AEAD *aead); |
| 1132 ++ |
| 1133 ++/* EVP_AEAD_max_tag_len returns the maximum tag length when using |aead|. This |
| 1134 ++ * is the largest value that can be passed as |tag_len| to |
| 1135 ++ * |EVP_AEAD_CTX_init|. */ |
| 1136 ++size_t EVP_AEAD_max_tag_len(const EVP_AEAD *aead); |
| 1137 ++ |
| 1138 ++/* An EVP_AEAD_CTX represents an AEAD algorithm configured with a specific key |
| 1139 ++ * and message-independent IV. */ |
| 1140 ++typedef struct evp_aead_ctx_st { |
| 1141 ++ const EVP_AEAD *aead; |
| 1142 ++ /* aead_state is an opaque pointer to whatever state the AEAD needs to |
| 1143 ++ * maintain. */ |
| 1144 ++ void *aead_state; |
| 1145 ++} EVP_AEAD_CTX; |
| 1146 ++ |
| 1147 ++#define EVP_AEAD_DEFAULT_TAG_LENGTH 0 |
| 1148 ++ |
| 1149 ++/* EVP_AEAD_init initializes |ctx| for the given AEAD algorithm from |impl|. |
| 1150 ++ * The |impl| argument may be NULL to choose the default implementation. |
| 1151 ++ * Authentication tags may be truncated by passing a size as |tag_len|. A |
| 1152 ++ * |tag_len| of zero indicates the default tag length and this is defined as |
| 1153 ++ * EVP_AEAD_DEFAULT_TAG_LENGTH for readability. |
| 1154 ++ * Returns 1 on success. Otherwise returns 0 and pushes to the error stack. */ |
| 1155 ++int EVP_AEAD_CTX_init(EVP_AEAD_CTX *ctx, const EVP_AEAD *aead, |
| 1156 ++ const unsigned char *key, size_t key_len, |
| 1157 ++ size_t tag_len, ENGINE *impl); |
| 1158 ++ |
| 1159 ++/* EVP_AEAD_CTX_cleanup frees any data allocated by |ctx|. */ |
| 1160 ++void EVP_AEAD_CTX_cleanup(EVP_AEAD_CTX *ctx); |
| 1161 ++ |
| 1162 ++/* EVP_AEAD_CTX_seal encrypts and authenticates |in_len| bytes from |in| and |
| 1163 ++ * authenticates |ad_len| bytes from |ad| and writes the result to |out|, |
| 1164 ++ * returning the number of bytes written, or -1 on error. |
| 1165 ++ * |
| 1166 ++ * This function may be called (with the same EVP_AEAD_CTX) concurrently with |
| 1167 ++ * itself or EVP_AEAD_CTX_open. |
| 1168 ++ * |
| 1169 ++ * At most |max_out_len| bytes are written to |out| and, in order to ensure |
| 1170 ++ * success, |max_out_len| should be |in_len| plus the result of |
| 1171 ++ * EVP_AEAD_overhead. |
| 1172 ++ * |
| 1173 ++ * The length of |nonce|, |nonce_len|, must be equal to the result of |
| 1174 ++ * EVP_AEAD_nonce_length for this AEAD. |
| 1175 ++ * |
| 1176 ++ * EVP_AEAD_CTX_seal never results in a partial output. If |max_out_len| is |
| 1177 ++ * insufficient, -1 will be returned. |
| 1178 ++ * |
| 1179 ++ * If |in| and |out| alias then |out| must be <= |in|. */ |
| 1180 ++ssize_t EVP_AEAD_CTX_seal(const EVP_AEAD_CTX *ctx, |
| 1181 ++ unsigned char *out, size_t max_out_len, |
| 1182 ++ const unsigned char *nonce, size_t nonce_len, |
| 1183 ++ const unsigned char *in, size_t in_len, |
| 1184 ++ const unsigned char *ad, size_t ad_len); |
| 1185 ++ |
| 1186 ++/* EVP_AEAD_CTX_open authenticates |in_len| bytes from |in| and |ad_len| bytes |
| 1187 ++ * from |ad| and decrypts at most |in_len| bytes into |out|. It returns the |
| 1188 ++ * number of bytes written, or -1 on error. |
| 1189 ++ * |
| 1190 ++ * This function may be called (with the same EVP_AEAD_CTX) concurrently with |
| 1191 ++ * itself or EVP_AEAD_CTX_seal. |
| 1192 ++ * |
| 1193 ++ * At most |in_len| bytes are written to |out|. In order to ensure success, |
| 1194 ++ * |max_out_len| should be at least |in_len|. |
| 1195 ++ * |
| 1196 ++ * The length of |nonce|, |nonce_len|, must be equal to the result of |
| 1197 ++ * EVP_AEAD_nonce_length for this AEAD. |
| 1198 ++ * |
| 1199 ++ * EVP_AEAD_CTX_open never results in a partial output. If |max_out_len| is |
| 1200 ++ * insufficient, -1 will be returned. |
| 1201 ++ * |
| 1202 ++ * If |in| and |out| alias then |out| must be <= |in|. */ |
| 1203 ++ssize_t EVP_AEAD_CTX_open(const EVP_AEAD_CTX *ctx, |
| 1204 ++ unsigned char *out, size_t max_out_len, |
| 1205 ++ const unsigned char *nonce, size_t nonce_len, |
| 1206 ++ const unsigned char *in, size_t in_len, |
| 1207 ++ const unsigned char *ad, size_t ad_len); |
| 1208 ++ |
| 1209 + void EVP_add_alg_module(void); |
| 1210 + |
| 1211 + /* BEGIN ERROR CODES */ |
| 1212 +@@ -1254,6 +1357,11 @@ void ERR_load_EVP_strings(void); |
| 1213 + /* Error codes for the EVP functions. */ |
| 1214 + |
| 1215 + /* Function codes. */ |
| 1216 ++#define EVP_F_AEAD_AES_128_GCM_INIT 183 |
| 1217 ++#define EVP_F_AEAD_AES_128_GCM_OPEN 181 |
| 1218 ++#define EVP_F_AEAD_AES_128_GCM_SEAL 182 |
| 1219 ++#define EVP_F_AEAD_CTX_OPEN 185 |
| 1220 ++#define EVP_F_AEAD_CTX_SEAL 186 |
| 1221 + #define EVP_F_AESNI_INIT_KEY 165 |
| 1222 + #define EVP_F_AESNI_XTS_CIPHER 176 |
| 1223 + #define EVP_F_AES_INIT_KEY 133 |
| 1224 +@@ -1268,6 +1376,7 @@ void ERR_load_EVP_strings(void); |
| 1225 + #define EVP_F_DSA_PKEY2PKCS8 135 |
| 1226 + #define EVP_F_ECDSA_PKEY2PKCS8 129 |
| 1227 + #define EVP_F_ECKEY_PKEY2PKCS8 132 |
| 1228 ++#define EVP_F_EVP_AEAD_CTX_INIT 180 |
| 1229 + #define EVP_F_EVP_CIPHERINIT_EX 123 |
| 1230 + #define EVP_F_EVP_CIPHER_CTX_COPY 163 |
| 1231 + #define EVP_F_EVP_CIPHER_CTX_CTRL 124 |
| 1232 +@@ -1383,10 +1492,12 @@ void ERR_load_EVP_strings(void); |
| 1233 + #define EVP_R_NO_VERIFY_FUNCTION_CONFIGURED 105 |
| 1234 + #define EVP_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE 150 |
| 1235 + #define EVP_R_OPERATON_NOT_INITIALIZED 151 |
| 1236 ++#define EVP_R_OUTPUT_ALIASES_INPUT 170 |
| 1237 + #define EVP_R_PKCS8_UNKNOWN_BROKEN_TYPE 117 |
| 1238 + #define EVP_R_PRIVATE_KEY_DECODE_ERROR 145 |
| 1239 + #define EVP_R_PRIVATE_KEY_ENCODE_ERROR 146 |
| 1240 + #define EVP_R_PUBLIC_KEY_NOT_RSA 106 |
| 1241 ++#define EVP_R_TAG_TOO_LARGE 171 |
| 1242 + #define EVP_R_TOO_LARGE 164 |
| 1243 + #define EVP_R_UNKNOWN_CIPHER 160 |
| 1244 + #define EVP_R_UNKNOWN_DIGEST 161 |
| 1245 +diff --git a/crypto/evp/evp_aead.c b/crypto/evp/evp_aead.c |
| 1246 +new file mode 100644 |
| 1247 +index 0000000..91da561 |
| 1248 +--- /dev/null |
| 1249 ++++ b/crypto/evp/evp_aead.c |
| 1250 +@@ -0,0 +1,192 @@ |
| 1251 ++/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 1252 ++ * All rights reserved. |
| 1253 ++ * |
| 1254 ++ * This package is an SSL implementation written |
| 1255 ++ * by Eric Young (eay@cryptsoft.com). |
| 1256 ++ * The implementation was written so as to conform with Netscapes SSL. |
| 1257 ++ * |
| 1258 ++ * This library is free for commercial and non-commercial use as long as |
| 1259 ++ * the following conditions are aheared to. The following conditions |
| 1260 ++ * apply to all code found in this distribution, be it the RC4, RSA, |
| 1261 ++ * lhash, DES, etc., code; not just the SSL code. The SSL documentation |
| 1262 ++ * included with this distribution is covered by the same copyright terms |
| 1263 ++ * except that the holder is Tim Hudson (tjh@cryptsoft.com). |
| 1264 ++ * |
| 1265 ++ * Copyright remains Eric Young's, and as such any Copyright notices in |
| 1266 ++ * the code are not to be removed. |
| 1267 ++ * If this package is used in a product, Eric Young should be given attributio
n |
| 1268 ++ * as the author of the parts of the library used. |
| 1269 ++ * This can be in the form of a textual message at program startup or |
| 1270 ++ * in documentation (online or textual) provided with the package. |
| 1271 ++ * |
| 1272 ++ * Redistribution and use in source and binary forms, with or without |
| 1273 ++ * modification, are permitted provided that the following conditions |
| 1274 ++ * are met: |
| 1275 ++ * 1. Redistributions of source code must retain the copyright |
| 1276 ++ * notice, this list of conditions and the following disclaimer. |
| 1277 ++ * 2. Redistributions in binary form must reproduce the above copyright |
| 1278 ++ * notice, this list of conditions and the following disclaimer in the |
| 1279 ++ * documentation and/or other materials provided with the distribution. |
| 1280 ++ * 3. All advertising materials mentioning features or use of this software |
| 1281 ++ * must display the following acknowledgement: |
| 1282 ++ * "This product includes cryptographic software written by |
| 1283 ++ * Eric Young (eay@cryptsoft.com)" |
| 1284 ++ * The word 'cryptographic' can be left out if the rouines from the library |
| 1285 ++ * being used are not cryptographic related :-). |
| 1286 ++ * 4. If you include any Windows specific code (or a derivative thereof) from |
| 1287 ++ * the apps directory (application code) you must include an acknowledgemen
t: |
| 1288 ++ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com
)" |
| 1289 ++ * |
| 1290 ++ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND |
| 1291 ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 1292 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 1293 ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
| 1294 ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 1295 ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 1296 ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 1297 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 1298 ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 1299 ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 1300 ++ * SUCH DAMAGE. |
| 1301 ++ * |
| 1302 ++ * The licence and distribution terms for any publically available version or |
| 1303 ++ * derivative of this code cannot be changed. i.e. this code cannot simply be |
| 1304 ++ * copied and put under another distribution licence |
| 1305 ++ * [including the GNU Public Licence.] |
| 1306 ++ */ |
| 1307 ++ |
| 1308 ++#include <limits.h> |
| 1309 ++#include <string.h> |
| 1310 ++ |
| 1311 ++#include <openssl/evp.h> |
| 1312 ++#include <openssl/err.h> |
| 1313 ++ |
| 1314 ++#include "evp_locl.h" |
| 1315 ++ |
| 1316 ++size_t EVP_AEAD_key_length(const EVP_AEAD *aead) |
| 1317 ++ { |
| 1318 ++ return aead->key_len; |
| 1319 ++ } |
| 1320 ++ |
| 1321 ++size_t EVP_AEAD_nonce_length(const EVP_AEAD *aead) |
| 1322 ++ { |
| 1323 ++ return aead->nonce_len; |
| 1324 ++ } |
| 1325 ++ |
| 1326 ++size_t EVP_AEAD_max_overhead(const EVP_AEAD *aead) |
| 1327 ++ { |
| 1328 ++ return aead->overhead; |
| 1329 ++ } |
| 1330 ++ |
| 1331 ++size_t EVP_AEAD_max_tag_len(const EVP_AEAD *aead) |
| 1332 ++ { |
| 1333 ++ return aead->max_tag_len; |
| 1334 ++ } |
| 1335 ++ |
| 1336 ++int EVP_AEAD_CTX_init(EVP_AEAD_CTX *ctx, const EVP_AEAD *aead, |
| 1337 ++ const unsigned char *key, size_t key_len, |
| 1338 ++ size_t tag_len, ENGINE *impl) |
| 1339 ++ { |
| 1340 ++ ctx->aead = aead; |
| 1341 ++ if (key_len != aead->key_len) |
| 1342 ++ { |
| 1343 ++ EVPerr(EVP_F_EVP_AEAD_CTX_INIT,EVP_R_UNSUPPORTED_KEY_SIZE); |
| 1344 ++ return 0; |
| 1345 ++ } |
| 1346 ++ return aead->init(ctx, key, key_len, tag_len); |
| 1347 ++ } |
| 1348 ++ |
| 1349 ++void EVP_AEAD_CTX_cleanup(EVP_AEAD_CTX *ctx) |
| 1350 ++ { |
| 1351 ++ if (ctx->aead == NULL) |
| 1352 ++ return; |
| 1353 ++ ctx->aead->cleanup(ctx); |
| 1354 ++ ctx->aead = NULL; |
| 1355 ++ } |
| 1356 ++ |
| 1357 ++/* check_alias returns 0 if |out| points within the buffer determined by |in| |
| 1358 ++ * and |in_len| and 1 otherwise. |
| 1359 ++ * |
| 1360 ++ * When processing, there's only an issue if |out| points within in[:in_len] |
| 1361 ++ * and isn't equal to |in|. If that's the case then writing the output will |
| 1362 ++ * stomp input that hasn't been read yet. |
| 1363 ++ * |
| 1364 ++ * This function checks for that case. */ |
| 1365 ++static int check_alias(const unsigned char *in, size_t in_len, |
| 1366 ++ const unsigned char *out) |
| 1367 ++ { |
| 1368 ++ if (out <= in) |
| 1369 ++ return 1; |
| 1370 ++ if (in + in_len <= out) |
| 1371 ++ return 1; |
| 1372 ++ return 0; |
| 1373 ++ } |
| 1374 ++ |
| 1375 ++ssize_t EVP_AEAD_CTX_seal(const EVP_AEAD_CTX *ctx, |
| 1376 ++ unsigned char *out, size_t max_out_len, |
| 1377 ++ const unsigned char *nonce, size_t nonce_len, |
| 1378 ++ const unsigned char *in, size_t in_len, |
| 1379 ++ const unsigned char *ad, size_t ad_len) |
| 1380 ++ { |
| 1381 ++ size_t possible_out_len = in_len + ctx->aead->overhead; |
| 1382 ++ ssize_t r; |
| 1383 ++ |
| 1384 ++ if (possible_out_len < in_len /* overflow */ || |
| 1385 ++ possible_out_len > SSIZE_MAX /* return value cannot be |
| 1386 ++ represented */) |
| 1387 ++ { |
| 1388 ++ EVPerr(EVP_F_AEAD_CTX_SEAL, EVP_R_TOO_LARGE); |
| 1389 ++ goto error; |
| 1390 ++ } |
| 1391 ++ |
| 1392 ++ if (!check_alias(in, in_len, out)) |
| 1393 ++ { |
| 1394 ++ EVPerr(EVP_F_AEAD_CTX_SEAL, EVP_R_OUTPUT_ALIASES_INPUT); |
| 1395 ++ goto error; |
| 1396 ++ } |
| 1397 ++ |
| 1398 ++ r = ctx->aead->seal(ctx, out, max_out_len, nonce, nonce_len, |
| 1399 ++ in, in_len, ad, ad_len); |
| 1400 ++ if (r >= 0) |
| 1401 ++ return r; |
| 1402 ++ |
| 1403 ++error: |
| 1404 ++ /* In the event of an error, clear the output buffer so that a caller |
| 1405 ++ * that doesn't check the return value doesn't send raw data. */ |
| 1406 ++ memset(out, 0, max_out_len); |
| 1407 ++ return -1; |
| 1408 ++ } |
| 1409 ++ |
| 1410 ++ssize_t EVP_AEAD_CTX_open(const EVP_AEAD_CTX *ctx, |
| 1411 ++ unsigned char *out, size_t max_out_len, |
| 1412 ++ const unsigned char *nonce, size_t nonce_len, |
| 1413 ++ const unsigned char *in, size_t in_len, |
| 1414 ++ const unsigned char *ad, size_t ad_len) |
| 1415 ++ { |
| 1416 ++ ssize_t r; |
| 1417 ++ |
| 1418 ++ if (in_len > SSIZE_MAX) |
| 1419 ++ { |
| 1420 ++ EVPerr(EVP_F_AEAD_CTX_OPEN, EVP_R_TOO_LARGE); |
| 1421 ++ goto error; /* may not be able to represent return value. */ |
| 1422 ++ } |
| 1423 ++ |
| 1424 ++ if (!check_alias(in, in_len, out)) |
| 1425 ++ { |
| 1426 ++ EVPerr(EVP_F_AEAD_CTX_OPEN, EVP_R_OUTPUT_ALIASES_INPUT); |
| 1427 ++ goto error; |
| 1428 ++ } |
| 1429 ++ |
| 1430 ++ r = ctx->aead->open(ctx, out, max_out_len, nonce, nonce_len, |
| 1431 ++ in, in_len, ad, ad_len); |
| 1432 ++ |
| 1433 ++ if (r >= 0) |
| 1434 ++ return r; |
| 1435 ++ |
| 1436 ++error: |
| 1437 ++ /* In the event of an error, clear the output buffer so that a caller |
| 1438 ++ * that doesn't check the return value doesn't try and process bad |
| 1439 ++ * data. */ |
| 1440 ++ memset(out, 0, max_out_len); |
| 1441 ++ return -1; |
| 1442 ++ } |
| 1443 +diff --git a/crypto/evp/evp_err.c b/crypto/evp/evp_err.c |
| 1444 +index 08eab98..c47969c 100644 |
| 1445 +--- a/crypto/evp/evp_err.c |
| 1446 ++++ b/crypto/evp/evp_err.c |
| 1447 +@@ -70,6 +70,11 @@ |
| 1448 + |
| 1449 + static ERR_STRING_DATA EVP_str_functs[]= |
| 1450 + { |
| 1451 ++{ERR_FUNC(EVP_F_AEAD_AES_128_GCM_INIT), "AEAD_AES_128_GCM_INIT"}, |
| 1452 ++{ERR_FUNC(EVP_F_AEAD_AES_128_GCM_OPEN), "AEAD_AES_128_GCM_OPEN"}, |
| 1453 ++{ERR_FUNC(EVP_F_AEAD_AES_128_GCM_SEAL), "AEAD_AES_128_GCM_SEAL"}, |
| 1454 ++{ERR_FUNC(EVP_F_AEAD_CTX_OPEN), "AEAD_CTX_OPEN"}, |
| 1455 ++{ERR_FUNC(EVP_F_AEAD_CTX_SEAL), "AEAD_CTX_SEAL"}, |
| 1456 + {ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"}, |
| 1457 + {ERR_FUNC(EVP_F_AESNI_XTS_CIPHER), "AESNI_XTS_CIPHER"}, |
| 1458 + {ERR_FUNC(EVP_F_AES_INIT_KEY), "AES_INIT_KEY"}, |
| 1459 +@@ -84,6 +89,7 @@ static ERR_STRING_DATA EVP_str_functs[]= |
| 1460 + {ERR_FUNC(EVP_F_DSA_PKEY2PKCS8), "DSA_PKEY2PKCS8"}, |
| 1461 + {ERR_FUNC(EVP_F_ECDSA_PKEY2PKCS8), "ECDSA_PKEY2PKCS8"}, |
| 1462 + {ERR_FUNC(EVP_F_ECKEY_PKEY2PKCS8), "ECKEY_PKEY2PKCS8"}, |
| 1463 ++{ERR_FUNC(EVP_F_EVP_AEAD_CTX_INIT), "EVP_AEAD_CTX_init"}, |
| 1464 + {ERR_FUNC(EVP_F_EVP_CIPHERINIT_EX), "EVP_CipherInit_ex"}, |
| 1465 + {ERR_FUNC(EVP_F_EVP_CIPHER_CTX_COPY), "EVP_CIPHER_CTX_copy"}, |
| 1466 + {ERR_FUNC(EVP_F_EVP_CIPHER_CTX_CTRL), "EVP_CIPHER_CTX_ctrl"}, |
| 1467 +@@ -202,10 +208,12 @@ static ERR_STRING_DATA EVP_str_reasons[]= |
| 1468 + {ERR_REASON(EVP_R_NO_VERIFY_FUNCTION_CONFIGURED),"no verify function configure
d"}, |
| 1469 + {ERR_REASON(EVP_R_OPERATION_NOT_SUPPORTED_FOR_THIS_KEYTYPE),"operation not sup
ported for this keytype"}, |
| 1470 + {ERR_REASON(EVP_R_OPERATON_NOT_INITIALIZED),"operaton not initialized"}, |
| 1471 ++{ERR_REASON(EVP_R_OUTPUT_ALIASES_INPUT) ,"output aliases input"}, |
| 1472 + {ERR_REASON(EVP_R_PKCS8_UNKNOWN_BROKEN_TYPE),"pkcs8 unknown broken type"}, |
| 1473 + {ERR_REASON(EVP_R_PRIVATE_KEY_DECODE_ERROR),"private key decode error"}, |
| 1474 + {ERR_REASON(EVP_R_PRIVATE_KEY_ENCODE_ERROR),"private key encode error"}, |
| 1475 + {ERR_REASON(EVP_R_PUBLIC_KEY_NOT_RSA) ,"public key not rsa"}, |
| 1476 ++{ERR_REASON(EVP_R_TAG_TOO_LARGE) ,"tag too large"}, |
| 1477 + {ERR_REASON(EVP_R_TOO_LARGE) ,"too large"}, |
| 1478 + {ERR_REASON(EVP_R_UNKNOWN_CIPHER) ,"unknown cipher"}, |
| 1479 + {ERR_REASON(EVP_R_UNKNOWN_DIGEST) ,"unknown digest"}, |
| 1480 +diff --git a/crypto/evp/evp_locl.h b/crypto/evp/evp_locl.h |
| 1481 +index 08c0a66..c0f9fdf 100644 |
| 1482 +--- a/crypto/evp/evp_locl.h |
| 1483 ++++ b/crypto/evp/evp_locl.h |
| 1484 +@@ -348,6 +348,30 @@ int PKCS5_v2_PBKDF2_keyivgen(EVP_CIPHER_CTX *ctx, const ch
ar *pass, int passlen, |
| 1485 + ASN1_TYPE *param, |
| 1486 + const EVP_CIPHER *c, const EVP_MD *md, int en_de); |
| 1487 + |
| 1488 ++/* EVP_AEAD represents a specific AEAD algorithm. */ |
| 1489 ++struct evp_aead_st { |
| 1490 ++ unsigned char key_len; |
| 1491 ++ unsigned char nonce_len; |
| 1492 ++ unsigned char overhead; |
| 1493 ++ unsigned char max_tag_len; |
| 1494 ++ |
| 1495 ++ int (*init) (struct evp_aead_ctx_st*, const unsigned char *key, |
| 1496 ++ size_t key_len, size_t tag_len); |
| 1497 ++ void (*cleanup) (struct evp_aead_ctx_st*); |
| 1498 ++ |
| 1499 ++ ssize_t (*seal) (const struct evp_aead_ctx_st *ctx, |
| 1500 ++ unsigned char *out, size_t max_out_len, |
| 1501 ++ const unsigned char *nonce, size_t nonce_len, |
| 1502 ++ const unsigned char *in, size_t in_len, |
| 1503 ++ const unsigned char *ad, size_t ad_len); |
| 1504 ++ |
| 1505 ++ ssize_t (*open) (const struct evp_aead_ctx_st *ctx, |
| 1506 ++ unsigned char *out, size_t max_out_len, |
| 1507 ++ const unsigned char *nonce, size_t nonce_len, |
| 1508 ++ const unsigned char *in, size_t in_len, |
| 1509 ++ const unsigned char *ad, size_t ad_len); |
| 1510 ++}; |
| 1511 ++ |
| 1512 + #ifdef OPENSSL_FIPS |
| 1513 + |
| 1514 + #ifdef OPENSSL_DOING_MAKEDEPEND |
| 1515 +diff --git a/doc/crypto/EVP_AEAD_CTX_init.pod b/doc/crypto/EVP_AEAD_CTX_init.po
d |
| 1516 +new file mode 100644 |
| 1517 +index 0000000..20e455d |
| 1518 +--- /dev/null |
| 1519 ++++ b/doc/crypto/EVP_AEAD_CTX_init.pod |
| 1520 +@@ -0,0 +1,96 @@ |
| 1521 ++=pod |
| 1522 ++ |
| 1523 ++=head1 NAME |
| 1524 ++ |
| 1525 ++EVP_AEAD_CTX_init, EVP_AEAD_CTX_cleanup, EVP_AEAD_CTX_seal, EVP_AEAD_CTX_open
- authenticated encryption functions. |
| 1526 ++ |
| 1527 ++=head1 SYNOPSIS |
| 1528 ++ |
| 1529 ++ #include <openssl/evp.h> |
| 1530 ++ |
| 1531 ++ int EVP_AEAD_CTX_init(EVP_AEAD_CTX *ctx, const EVP_AEAD *aead, |
| 1532 ++ const unsigned char *key, size_t key_len, |
| 1533 ++ size_t tag_len, ENGINE *impl); |
| 1534 ++ void EVP_AEAD_CTX_cleanup(EVP_AEAD_CTX *ctx); |
| 1535 ++ ssize_t EVP_AEAD_CTX_seal(const EVP_AEAD_CTX *ctx, |
| 1536 ++ unsigned char *out, size_t max_out_len, |
| 1537 ++ const unsigned char *nonce, size_t nonce_len, |
| 1538 ++ const unsigned char *in, size_t in_len, |
| 1539 ++ const unsigned char *ad, size_t ad_len); |
| 1540 ++ ssize_t EVP_AEAD_CTX_open(const EVP_AEAD_CTX *ctx, |
| 1541 ++ unsigned char *out, size_t max_out_len, |
| 1542 ++ const unsigned char *nonce, size_t nonce_len, |
| 1543 ++ const unsigned char *in, size_t in_len, |
| 1544 ++ const unsigned char *ad, size_t ad_len); |
| 1545 ++ |
| 1546 ++=head1 DESCRIPTION |
| 1547 ++ |
| 1548 ++The EVP_AEAD_CTX_init() function initialises an B<EVP_AEAD_CTX> structure and |
| 1549 ++performs any precomputation needed to use B<aead> with B<key>. The length of |
| 1550 ++the key, B<key_len>, is given in bytes. |
| 1551 ++ |
| 1552 ++The B<tag_len> argument contains the length of the tags, in bytes, and allows |
| 1553 ++for the processing of truncated authenticators. A zero value indicates that th
e |
| 1554 ++default tag length should be used and this is defined as |
| 1555 ++C<EVP_AEAD_DEFAULT_TAG_LENGTH> in order to make the code clear. Using truncate
d |
| 1556 ++tags increases an attacker's chance of creating a valid forgery. Be aware that |
| 1557 ++the attacker's chance may increase more than exponentially as would naively be |
| 1558 ++expected. |
| 1559 ++ |
| 1560 ++When no longer needed, the initialised B<EVP_AEAD_CTX> structure must be passe
d |
| 1561 ++to EVP_AEAD_CTX_cleanup(), which will deallocate any memory used. |
| 1562 ++ |
| 1563 ++With an B<EVP_AEAD_CTX> in hand, one can seal and open messages. These |
| 1564 ++operations are intended to meet the standard notions of privacy and |
| 1565 ++authenticity for authenticated encryption. For formal definitions see I<Bellar
e |
| 1566 ++and Namprempre>, "Authenticated encryption: relations among notions and |
| 1567 ++analysis of the generic composition paradigm," Lecture Notes in Computer |
| 1568 ++Science B<1976> (2000), 531–545, |
| 1569 ++L<http://www-cse.ucsd.edu/~mihir/papers/oem.html>. |
| 1570 ++ |
| 1571 ++When sealing messages, a nonce must be given. The length of the nonce is fixed |
| 1572 ++by the AEAD in use and is returned by EVP_AEAD_nonce_length(). I<The nonce mus
t |
| 1573 ++be unique for all messages with the same key>. This is critically important - |
| 1574 ++nonce reuse may completely undermine the security of the AEAD. Nonces may be |
| 1575 ++predictable and public, so long as they are unique. Uniqueness may be achieved |
| 1576 ++with a simple counter or, if long enough, may be generated randomly. The nonce |
| 1577 ++must be passed into the "open" operation by the receiver so must either be |
| 1578 ++implicit (e.g. a counter), or must be transmitted along with the sealed messag
e. |
| 1579 ++ |
| 1580 ++The "seal" and "open" operations are atomic - an entire message must be |
| 1581 ++encrypted or decrypted in a single call. Large messages may have to be split u
p |
| 1582 ++in order to accomodate this. When doing so, be mindful of the need not to |
| 1583 ++repeat nonces and the possibility that an attacker could duplicate, reorder or |
| 1584 ++drop message chunks. For example, using a single key for a given (large) |
| 1585 ++message and sealing chunks with nonces counting from zero would be secure as |
| 1586 ++long as the number of chunks was securely transmitted. (Otherwise an attacker |
| 1587 ++could truncate the message by dropping chunks from the end.) |
| 1588 ++ |
| 1589 ++The number of chunks could be transmitted by prefixing it to the plaintext, fo
r |
| 1590 ++example. This also assumes that no other message would ever use the same key |
| 1591 ++otherwise the rule that nonces must be unique for a given key would be |
| 1592 ++violated. |
| 1593 ++ |
| 1594 ++The "seal" and "open" operations also permit additional data to be |
| 1595 ++authenticated via the B<ad> parameter. This data is not included in the |
| 1596 ++ciphertext and must be identical for both the "seal" and "open" call. This |
| 1597 ++permits implicit context to be authenticated but may be C<NULL> if not needed. |
| 1598 ++ |
| 1599 ++The "seal" and "open" operations may work inplace if the B<out> and B<in> |
| 1600 ++arguments are equal. They may also be used to shift the data left inside the |
| 1601 ++same buffer if B<out> is less than B<in>. However, B<out> may not point inside |
| 1602 ++the input data otherwise the input may be overwritten before it has been read. |
| 1603 ++This case will cause an error. |
| 1604 ++ |
| 1605 ++=head1 RETURN VALUES |
| 1606 ++ |
| 1607 ++The "seal" and "open" operations return an C<ssize_t> with value -1 on error, |
| 1608 ++otherwise they return the number of output bytes written. An error will be |
| 1609 ++returned if the input length is large enough that the output size exceeds the |
| 1610 ++range of a C<ssize_t>. |
| 1611 ++ |
| 1612 ++=head1 HISTORY |
| 1613 ++ |
| 1614 ++These functions were first added to OpenSSL 1.0.2. |
| 1615 ++ |
| 1616 ++=cut |
| 1617 +-- |
| 1618 +1.8.4.1 |
| 1619 + |
| 1620 diff -burN android-openssl-lhash2/patches/chacha20poly1305.patch android-openssl
/patches/chacha20poly1305.patch |
| 1621 --- android-openssl-lhash2/patches/chacha20poly1305.patch 1969-12-31 19:00
:00.000000000 -0500 |
| 1622 +++ android-openssl/patches/chacha20poly1305.patch 2013-11-05 15:15:28.4544
80948 -0500 |
| 1623 @@ -0,0 +1,5726 @@ |
| 1624 +From 2688f00904e4ffd647afcff69bb8fe6df8c5902b Mon Sep 17 00:00:00 2001 |
| 1625 +From: Adam Langley <agl@chromium.org> |
| 1626 +Date: Mon, 9 Sep 2013 12:13:24 -0400 |
| 1627 +Subject: [PATCH 43/52] chacha20poly1305 |
| 1628 + |
| 1629 +Add support for Chacha20 + Poly1305. |
| 1630 +--- |
| 1631 + .gitignore | 1 + |
| 1632 + Configure | 56 +- |
| 1633 + Makefile.org | 6 +- |
| 1634 + apps/speed.c | 64 +- |
| 1635 + crypto/chacha/Makefile | 80 ++ |
| 1636 + crypto/chacha/chacha.h | 85 ++ |
| 1637 + crypto/chacha/chacha_enc.c | 167 +++ |
| 1638 + crypto/chacha/chacha_vec.c | 345 +++++++ |
| 1639 + crypto/chacha/chachatest.c | 211 ++++ |
| 1640 + crypto/evp/Makefile | 35 +- |
| 1641 + crypto/evp/e_chacha20poly1305.c | 261 +++++ |
| 1642 + crypto/evp/evp.h | 8 + |
| 1643 + crypto/evp/evp_err.c | 3 + |
| 1644 + crypto/poly1305/Makefile | 81 ++ |
| 1645 + crypto/poly1305/poly1305.c | 320 ++++++ |
| 1646 + crypto/poly1305/poly1305.h | 88 ++ |
| 1647 + crypto/poly1305/poly1305_arm.c | 335 ++++++ |
| 1648 + crypto/poly1305/poly1305_arm_asm.s | 2009 ++++++++++++++++++++++++++++++++++++ |
| 1649 + crypto/poly1305/poly1305_vec.c | 733 +++++++++++++ |
| 1650 + crypto/poly1305/poly1305test.c | 166 +++ |
| 1651 + ssl/s3_lib.c | 75 +- |
| 1652 + ssl/s3_pkt.c | 5 +- |
| 1653 + ssl/ssl.h | 1 + |
| 1654 + ssl/ssl_ciph.c | 16 +- |
| 1655 + ssl/ssl_locl.h | 10 + |
| 1656 + ssl/t1_enc.c | 30 +- |
| 1657 + ssl/tls1.h | 8 + |
| 1658 + test/Makefile | 23 +- |
| 1659 + 28 files changed, 5166 insertions(+), 56 deletions(-) |
| 1660 + create mode 100644 crypto/chacha/Makefile |
| 1661 + create mode 100644 crypto/chacha/chacha.h |
| 1662 + create mode 100644 crypto/chacha/chacha_enc.c |
| 1663 + create mode 100644 crypto/chacha/chacha_vec.c |
| 1664 + create mode 100644 crypto/chacha/chachatest.c |
| 1665 + create mode 100644 crypto/evp/e_chacha20poly1305.c |
| 1666 + create mode 100644 crypto/poly1305/Makefile |
| 1667 + create mode 100644 crypto/poly1305/poly1305.c |
| 1668 + create mode 100644 crypto/poly1305/poly1305.h |
| 1669 + create mode 100644 crypto/poly1305/poly1305_arm.c |
| 1670 + create mode 100644 crypto/poly1305/poly1305_arm_asm.s |
| 1671 + create mode 100644 crypto/poly1305/poly1305_vec.c |
| 1672 + create mode 100644 crypto/poly1305/poly1305test.c |
| 1673 + |
| 1674 +diff --git a/Configure b/Configure |
| 1675 +index 9c803dc..1b95384 100755 |
| 1676 +--- a/Configure |
| 1677 ++++ b/Configure |
| 1678 +@@ -124,24 +124,24 @@ my $tlib="-lnsl -lsocket"; |
| 1679 + my $bits1="THIRTY_TWO_BIT "; |
| 1680 + my $bits2="SIXTY_FOUR_BIT "; |
| 1681 + |
| 1682 +-my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o cryp
t586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-58
6.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cm
ll-x86.o:ghash-x86.o:"; |
| 1683 ++my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o cryp
t586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-58
6.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cm
ll-x86.o:ghash-x86.o:::"; |
| 1684 + |
| 1685 + my $x86_elf_asm="$x86_asm:elf"; |
| 1686 + |
| 1687 +-my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64
-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86
_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86
_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:gha
sh-x86_64.o:"; |
| 1688 +-my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64
.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::
::ghash-ia64.o::void"; |
| 1689 +-my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9
a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sp
arcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; |
| 1690 +-my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void"; |
| 1691 +-my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash
-alpha.o::void"; |
| 1692 +-my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::
::::::"; |
| 1693 +-my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha
256-mips.o sha512-mips.o::::::::"; |
| 1694 +-my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::a
es-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4
-s390x.o:::::ghash-s390x.o:"; |
| 1695 +-my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_c
bc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-
armv4.o::void"; |
| 1696 +-my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-
parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash
-parisc.o::32"; |
| 1697 +-my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o a
es-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::gh
ash-parisc.o::64"; |
| 1698 +-my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.
o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::"; |
| 1699 +-my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.
o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::"; |
| 1700 +-my $no_asm=":::::::::::::::void"; |
| 1701 ++my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64
-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86
_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86
_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:gha
sh-x86_64.o::chacha_vec.o:poly1305_vec.o"; |
| 1702 ++my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64
.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::
::ghash-ia64.o::::void"; |
| 1703 ++my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9
a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sp
arcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::::void"; |
| 1704 ++my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::::void"; |
| 1705 ++my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash
-alpha.o::::void"; |
| 1706 ++my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::
::::::::"; |
| 1707 ++my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha
256-mips.o sha512-mips.o::::::::::"; |
| 1708 ++my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::a
es-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4
-s390x.o:::::::ghash-s390x.o:"; |
| 1709 ++my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_c
bc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-
armv4.o::chacha_vec.o:poly1305_arm.o poly1305_arm_asm.o:void"; |
| 1710 ++my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-
parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash
-parisc.o::::32"; |
| 1711 ++my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o a
es-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::gh
ash-parisc.o::::64"; |
| 1712 ++my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.
o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::::"; |
| 1713 ++my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.
o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::::"; |
| 1714 ++my $no_asm=":::::::::::::::::void"; |
| 1715 + |
| 1716 + # As for $BSDthreads. Idea is to maintain "collective" set of flags, |
| 1717 + # which would cover all BSD flavors. -pthread applies to them all, |
| 1718 +@@ -152,7 +152,7 @@ my $no_asm=":::::::::::::::void"; |
| 1719 + # seems to be sufficient? |
| 1720 + my $BSDthreads="-pthread -D_THREAD_SAFE -D_REENTRANT"; |
| 1721 + |
| 1722 +-#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lfl
ags : $bn_ops : $cpuid_obj : $bn_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj
: $sha1_obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_ob
j : $modes_obj : $engines_obj : $dso_scheme : $shared_target : $shared_cflag : $
shared_ldflag : $shared_extension : $ranlib : $arflags : $multilib |
| 1723 ++#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lfl
ags : $bn_ops : $cpuid_obj : $bn_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj
: $sha1_obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_ob
j : $modes_obj : $engines_obj : $chacha_obj : $poly1305_obj : $dso_scheme : $sha
red_target : $shared_cflag : $shared_ldflag : $shared_extension : $ranlib : $arf
lags : $multilib : |
| 1724 + |
| 1725 + my %table=( |
| 1726 + # File 'TABLE' (created by 'make TABLE') contains the data from this list, |
| 1727 +@@ -647,6 +647,8 @@ my $idx_wp_obj = $idx++; |
| 1728 + my $idx_cmll_obj = $idx++; |
| 1729 + my $idx_modes_obj = $idx++; |
| 1730 + my $idx_engines_obj = $idx++; |
| 1731 ++my $idx_chacha_obj = $idx++; |
| 1732 ++my $idx_poly1305_obj = $idx++; |
| 1733 + my $idx_perlasm_scheme = $idx++; |
| 1734 + my $idx_dso_scheme = $idx++; |
| 1735 + my $idx_shared_target = $idx++; |
| 1736 +@@ -692,6 +694,8 @@ my $aes_enc="aes_core.o aes_cbc.o"; |
| 1737 + my $bf_enc ="bf_enc.o"; |
| 1738 + my $cast_enc="c_enc.o"; |
| 1739 + my $rc4_enc="rc4_enc.o rc4_skey.o"; |
| 1740 ++my $chacha_enc="chacha_enc.o"; |
| 1741 ++my $poly1305 ="poly1305.o"; |
| 1742 + my $rc5_enc="rc5_enc.o"; |
| 1743 + my $md5_obj=""; |
| 1744 + my $sha1_obj=""; |
| 1745 +@@ -1144,7 +1148,7 @@ $openssldir=$prefix . "/" . $openssldir if $openssldir !~
/(^\/|^[a-zA-Z]:[\\\/] |
| 1746 + |
| 1747 + print "IsMK1MF=$IsMK1MF\n"; |
| 1748 + |
| 1749 +-my @fields = split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); |
| 1750 ++my @fields = split(/\s*:\s*/,$table{$target} . ":" x 31 , -1); |
| 1751 + my $cc = $fields[$idx_cc]; |
| 1752 + # Allow environment CC to override compiler... |
| 1753 + if($ENV{CC}) { |
| 1754 +@@ -1181,6 +1185,8 @@ my $ranlib = $ENV{'RANLIB'} || $fields[$idx_ranlib]; |
| 1755 + my $ar = $ENV{'AR'} || "ar"; |
| 1756 + my $arflags = $fields[$idx_arflags]; |
| 1757 + my $multilib = $fields[$idx_multilib]; |
| 1758 ++my $chacha_obj = $fields[$idx_chacha_obj]; |
| 1759 ++my $poly1305_obj = $fields[$idx_poly1305_obj]; |
| 1760 + |
| 1761 + # if $prefix/lib$multilib is not an existing directory, then |
| 1762 + # assume that it's not searched by linker automatically, in |
| 1763 +@@ -1477,6 +1483,8 @@ $des_obj=$des_enc unless ($des_obj =~ /\.o$/); |
| 1764 + $bf_obj=$bf_enc unless ($bf_obj =~ /\.o$/); |
| 1765 + $cast_obj=$cast_enc unless ($cast_obj =~ /\.o$/); |
| 1766 + $rc4_obj=$rc4_enc unless ($rc4_obj =~ /\.o$/); |
| 1767 ++$chacha_obj=$chacha_enc unless ($chacha_obj =~ /\.o$/); |
| 1768 ++$poly1305_obj=$poly1305 unless ($poly1305_obj =~ /\.o$/); |
| 1769 + $rc5_obj=$rc5_enc unless ($rc5_obj =~ /\.o$/); |
| 1770 + if ($sha1_obj =~ /\.o$/) |
| 1771 + { |
| 1772 +@@ -1637,6 +1645,8 @@ while (<IN>) |
| 1773 + s/^BF_ENC=.*$/BF_ENC= $bf_obj/; |
| 1774 + s/^CAST_ENC=.*$/CAST_ENC= $cast_obj/; |
| 1775 + s/^RC4_ENC=.*$/RC4_ENC= $rc4_obj/; |
| 1776 ++ s/^CHACHA_ENC=.*$/CHACHA_ENC= $chacha_obj/; |
| 1777 ++ s/^POLY1305=.*$/POLY1305= $poly1305_obj/; |
| 1778 + s/^RC5_ENC=.*$/RC5_ENC= $rc5_obj/; |
| 1779 + s/^MD5_ASM_OBJ=.*$/MD5_ASM_OBJ= $md5_obj/; |
| 1780 + s/^SHA1_ASM_OBJ=.*$/SHA1_ASM_OBJ= $sha1_obj/; |
| 1781 +@@ -1698,6 +1708,8 @@ print "AES_ENC =$aes_obj\n"; |
| 1782 + print "BF_ENC =$bf_obj\n"; |
| 1783 + print "CAST_ENC =$cast_obj\n"; |
| 1784 + print "RC4_ENC =$rc4_obj\n"; |
| 1785 ++print "CHACHA_ENC =$chacha_obj\n"; |
| 1786 ++print "POLY1305 =$poly1305_obj\n"; |
| 1787 + print "RC5_ENC =$rc5_obj\n"; |
| 1788 + print "MD5_OBJ_ASM =$md5_obj\n"; |
| 1789 + print "SHA1_OBJ_ASM =$sha1_obj\n"; |
| 1790 +@@ -2096,11 +2108,11 @@ sub print_table_entry |
| 1791 + |
| 1792 + (my $cc,my $cflags,my $unistd,my $thread_cflag,my $sys_id,my $lflags, |
| 1793 + my $bn_ops,my $cpuid_obj,my $bn_obj,my $des_obj,my $aes_obj, my $bf_obj, |
| 1794 +- my $md5_obj,my $sha1_obj,my $cast_obj,my $rc4_obj,my $rmd160_obj, |
| 1795 +- my $rc5_obj,my $wp_obj,my $cmll_obj,my $modes_obj, my $engines_obj, |
| 1796 ++ my $md5_obj,my $sha1_obj,my $cast_obj,my $rc4_obj,my $chacha_obj,my $pol
y1305_obj, |
| 1797 ++ my $rmd160_obj, my $rc5_obj,my $wp_obj,my $cmll_obj,my $modes_obj, my $e
ngines_obj, |
| 1798 + my $perlasm_scheme,my $dso_scheme,my $shared_target,my $shared_cflag, |
| 1799 + my $shared_ldflag,my $shared_extension,my $ranlib,my $arflags,my $multil
ib)= |
| 1800 +- split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); |
| 1801 ++ split(/\s*:\s*/,$table{$target} . ":" x 31 , -1); |
| 1802 + |
| 1803 + print <<EOF |
| 1804 + |
| 1805 +@@ -2121,6 +2133,8 @@ sub print_table_entry |
| 1806 + \$sha1_obj = $sha1_obj |
| 1807 + \$cast_obj = $cast_obj |
| 1808 + \$rc4_obj = $rc4_obj |
| 1809 ++\$chacha_obj = $chacha_obj |
| 1810 ++\$poly1305_obj = $poly1305_obj |
| 1811 + \$rmd160_obj = $rmd160_obj |
| 1812 + \$rc5_obj = $rc5_obj |
| 1813 + \$wp_obj = $wp_obj |
| 1814 +@@ -2150,7 +2164,7 @@ sub test_sanity |
| 1815 + |
| 1816 + foreach $target (sort keys %table) |
| 1817 + { |
| 1818 +- @fields = split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); |
| 1819 ++ @fields = split(/\s*:\s*/,$table{$target} . ":" x 31 , -1); |
| 1820 + |
| 1821 + if ($fields[$idx_dso_scheme-1] =~ /^(beos|dl|dlfcn|win32|vms)$/) |
| 1822 + { |
| 1823 +diff --git a/Makefile.org b/Makefile.org |
| 1824 +index 2db31ea..919466d 100644 |
| 1825 +--- a/Makefile.org |
| 1826 ++++ b/Makefile.org |
| 1827 +@@ -94,6 +94,8 @@ BF_ENC= bf_enc.o |
| 1828 + CAST_ENC= c_enc.o |
| 1829 + RC4_ENC= rc4_enc.o |
| 1830 + RC5_ENC= rc5_enc.o |
| 1831 ++CHACHA_ENC= chacha_enc.o |
| 1832 ++POLY1305= poly1305.o |
| 1833 + MD5_ASM_OBJ= |
| 1834 + SHA1_ASM_OBJ= |
| 1835 + RMD160_ASM_OBJ= |
| 1836 +@@ -147,7 +149,7 @@ SDIRS= \ |
| 1837 + bn ec rsa dsa ecdsa dh ecdh dso engine \ |
| 1838 + buffer bio stack lhash rand err \ |
| 1839 + evp asn1 pem x509 x509v3 conf txt_db pkcs7 pkcs12 comp ocsp ui krb5 \ |
| 1840 +- cms pqueue ts jpake srp store cmac |
| 1841 ++ cms pqueue ts jpake srp store cmac poly1305 chacha |
| 1842 + # keep in mind that the above list is adjusted by ./Configure |
| 1843 + # according to no-xxx arguments... |
| 1844 + |
| 1845 +@@ -232,6 +234,8 @@ BUILDENV= PLATFORM='$(PLATFORM)' PROCESSOR='$(PROCESSOR)'
\ |
| 1846 + WP_ASM_OBJ='$(WP_ASM_OBJ)' \ |
| 1847 + MODES_ASM_OBJ='$(MODES_ASM_OBJ)' \ |
| 1848 + ENGINES_ASM_OBJ='$(ENGINES_ASM_OBJ)' \ |
| 1849 ++ CHACHA_ENC='$(CHACHA_ENC)' \ |
| 1850 ++ POLY1305='$(POLY1305)' \ |
| 1851 + PERLASM_SCHEME='$(PERLASM_SCHEME)' \ |
| 1852 + FIPSLIBDIR='${FIPSLIBDIR}' \ |
| 1853 + FIPSDIR='${FIPSDIR}' \ |
| 1854 +diff --git a/crypto/chacha/Makefile b/crypto/chacha/Makefile |
| 1855 +new file mode 100644 |
| 1856 +index 0000000..289933b |
| 1857 +--- /dev/null |
| 1858 ++++ b/crypto/chacha/Makefile |
| 1859 +@@ -0,0 +1,80 @@ |
| 1860 ++# |
| 1861 ++# OpenSSL/crypto/chacha/Makefile |
| 1862 ++# |
| 1863 ++ |
| 1864 ++DIR= chacha |
| 1865 ++TOP= ../.. |
| 1866 ++CC= cc |
| 1867 ++CPP= $(CC) -E |
| 1868 ++INCLUDES= |
| 1869 ++CFLAG=-g |
| 1870 ++AR= ar r |
| 1871 ++ |
| 1872 ++CFLAGS= $(INCLUDES) $(CFLAG) |
| 1873 ++ASFLAGS= $(INCLUDES) $(ASFLAG) |
| 1874 ++AFLAGS= $(ASFLAGS) |
| 1875 ++ |
| 1876 ++CHACHA_ENC=chacha_enc.o |
| 1877 ++ |
| 1878 ++GENERAL=Makefile |
| 1879 ++TEST=chachatest.o |
| 1880 ++APPS= |
| 1881 ++ |
| 1882 ++LIB=$(TOP)/libcrypto.a |
| 1883 ++LIBSRC= |
| 1884 ++LIBOBJ=$(CHACHA_ENC) |
| 1885 ++ |
| 1886 ++SRC= $(LIBSRC) |
| 1887 ++ |
| 1888 ++EXHEADER=chacha.h |
| 1889 ++HEADER= $(EXHEADER) |
| 1890 ++ |
| 1891 ++ALL= $(GENERAL) $(SRC) $(HEADER) |
| 1892 ++ |
| 1893 ++top: |
| 1894 ++ (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all) |
| 1895 ++ |
| 1896 ++all: lib |
| 1897 ++ |
| 1898 ++lib: $(LIBOBJ) |
| 1899 ++ $(AR) $(LIB) $(LIBOBJ) |
| 1900 ++ $(RANLIB) $(LIB) || echo Never mind. |
| 1901 ++ @touch lib |
| 1902 ++ |
| 1903 ++files: |
| 1904 ++ $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO |
| 1905 ++ |
| 1906 ++links: |
| 1907 ++ @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER) |
| 1908 ++ @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST) |
| 1909 ++ @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS) |
| 1910 ++ |
| 1911 ++install: |
| 1912 ++ @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile... |
| 1913 ++ @headerlist="$(EXHEADER)"; for i in $$headerlist ; \ |
| 1914 ++ do \ |
| 1915 ++ (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \ |
| 1916 ++ chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \ |
| 1917 ++ done; |
| 1918 ++ |
| 1919 ++tags: |
| 1920 ++ ctags $(SRC) |
| 1921 ++ |
| 1922 ++tests: |
| 1923 ++ |
| 1924 ++lint: |
| 1925 ++ lint -DLINT $(INCLUDES) $(SRC)>fluff |
| 1926 ++ |
| 1927 ++depend: |
| 1928 ++ @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... |
| 1929 ++ $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) |
| 1930 ++ |
| 1931 ++dclean: |
| 1932 ++ $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKE
FILE) >Makefile.new |
| 1933 ++ mv -f Makefile.new $(MAKEFILE) |
| 1934 ++ |
| 1935 ++clean: |
| 1936 ++ rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff |
| 1937 ++ |
| 1938 ++# DO NOT DELETE THIS LINE -- make depend depends on it. |
| 1939 ++ |
| 1940 +diff --git a/crypto/chacha/chacha.h b/crypto/chacha/chacha.h |
| 1941 +new file mode 100644 |
| 1942 +index 0000000..d56519d |
| 1943 +--- /dev/null |
| 1944 ++++ b/crypto/chacha/chacha.h |
| 1945 +@@ -0,0 +1,85 @@ |
| 1946 ++/* |
| 1947 ++ * Chacha stream algorithm. |
| 1948 ++ * |
| 1949 ++ * Created on: Jun, 2013 |
| 1950 ++ * Author: Elie Bursztein (elieb@google.com) |
| 1951 ++ * |
| 1952 ++ * Adapted from the estream code by D. Bernstein. |
| 1953 ++ */ |
| 1954 ++/* ==================================================================== |
| 1955 ++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. |
| 1956 ++ * |
| 1957 ++ * Redistribution and use in source and binary forms, with or without |
| 1958 ++ * modification, are permitted provided that the following conditions |
| 1959 ++ * are met: |
| 1960 ++ * |
| 1961 ++ * 1. Redistributions of source code must retain the above copyright |
| 1962 ++ * notice, this list of conditions and the following disclaimer. |
| 1963 ++ * |
| 1964 ++ * 2. Redistributions in binary form must reproduce the above copyright |
| 1965 ++ * notice, this list of conditions and the following disclaimer in |
| 1966 ++ * the documentation and/or other materials provided with the |
| 1967 ++ * distribution. |
| 1968 ++ * |
| 1969 ++ * 3. All advertising materials mentioning features or use of this |
| 1970 ++ * software must display the following acknowledgment: |
| 1971 ++ * "This product includes software developed by the OpenSSL Project |
| 1972 ++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" |
| 1973 ++ * |
| 1974 ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
| 1975 ++ * endorse or promote products derived from this software without |
| 1976 ++ * prior written permission. For written permission, please contact |
| 1977 ++ * licensing@OpenSSL.org. |
| 1978 ++ * |
| 1979 ++ * 5. Products derived from this software may not be called "OpenSSL" |
| 1980 ++ * nor may "OpenSSL" appear in their names without prior written |
| 1981 ++ * permission of the OpenSSL Project. |
| 1982 ++ * |
| 1983 ++ * 6. Redistributions of any form whatsoever must retain the following |
| 1984 ++ * acknowledgment: |
| 1985 ++ * "This product includes software developed by the OpenSSL Project |
| 1986 ++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" |
| 1987 ++ * |
| 1988 ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
| 1989 ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 1990 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 1991 ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
| 1992 ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 1993 ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 1994 ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 1995 ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 1996 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 1997 ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 1998 ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 1999 ++ * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 2000 ++ * ==================================================================== |
| 2001 ++ */ |
| 2002 ++#ifndef HEADER_CHACHA_H |
| 2003 ++#define HEADER_CHACHA_H |
| 2004 ++ |
| 2005 ++#include <openssl/opensslconf.h> |
| 2006 ++ |
| 2007 ++#if defined(OPENSSL_NO_CHACHA) |
| 2008 ++#error ChaCha support is disabled. |
| 2009 ++#endif |
| 2010 ++ |
| 2011 ++#include <stddef.h> |
| 2012 ++ |
| 2013 ++#ifdef __cplusplus |
| 2014 ++extern "C" { |
| 2015 ++#endif |
| 2016 ++ |
| 2017 ++/* CRYPTO_chacha_20 encrypts |in_len| bytes from |in| with the given key and |
| 2018 ++ * nonce and writes the result to |out|, which may be equal to |in|. The |
| 2019 ++ * initial block counter is specified by |counter|. */ |
| 2020 ++void CRYPTO_chacha_20(unsigned char *out, |
| 2021 ++ const unsigned char *in, size_t in_len, |
| 2022 ++ const unsigned char key[32], |
| 2023 ++ const unsigned char nonce[8], |
| 2024 ++ size_t counter); |
| 2025 ++ |
| 2026 ++#ifdef __cplusplus |
| 2027 ++} |
| 2028 ++#endif |
| 2029 ++ |
| 2030 ++#endif |
| 2031 +diff --git a/crypto/chacha/chacha_enc.c b/crypto/chacha/chacha_enc.c |
| 2032 +new file mode 100644 |
| 2033 +index 0000000..54d1ca3 |
| 2034 +--- /dev/null |
| 2035 ++++ b/crypto/chacha/chacha_enc.c |
| 2036 +@@ -0,0 +1,167 @@ |
| 2037 ++/* |
| 2038 ++ * Chacha stream algorithm. |
| 2039 ++ * |
| 2040 ++ * Created on: Jun, 2013 |
| 2041 ++ * Author: Elie Bursztein (elieb@google.com) |
| 2042 ++ * |
| 2043 ++ * Adapted from the estream code by D. Bernstein. |
| 2044 ++ */ |
| 2045 ++/* ==================================================================== |
| 2046 ++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. |
| 2047 ++ * |
| 2048 ++ * Redistribution and use in source and binary forms, with or without |
| 2049 ++ * modification, are permitted provided that the following conditions |
| 2050 ++ * are met: |
| 2051 ++ * |
| 2052 ++ * 1. Redistributions of source code must retain the above copyright |
| 2053 ++ * notice, this list of conditions and the following disclaimer. |
| 2054 ++ * |
| 2055 ++ * 2. Redistributions in binary form must reproduce the above copyright |
| 2056 ++ * notice, this list of conditions and the following disclaimer in |
| 2057 ++ * the documentation and/or other materials provided with the |
| 2058 ++ * distribution. |
| 2059 ++ * |
| 2060 ++ * 3. All advertising materials mentioning features or use of this |
| 2061 ++ * software must display the following acknowledgment: |
| 2062 ++ * "This product includes software developed by the OpenSSL Project |
| 2063 ++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" |
| 2064 ++ * |
| 2065 ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
| 2066 ++ * endorse or promote products derived from this software without |
| 2067 ++ * prior written permission. For written permission, please contact |
| 2068 ++ * licensing@OpenSSL.org. |
| 2069 ++ * |
| 2070 ++ * 5. Products derived from this software may not be called "OpenSSL" |
| 2071 ++ * nor may "OpenSSL" appear in their names without prior written |
| 2072 ++ * permission of the OpenSSL Project. |
| 2073 ++ * |
| 2074 ++ * 6. Redistributions of any form whatsoever must retain the following |
| 2075 ++ * acknowledgment: |
| 2076 ++ * "This product includes software developed by the OpenSSL Project |
| 2077 ++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" |
| 2078 ++ * |
| 2079 ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
| 2080 ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 2081 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 2082 ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
| 2083 ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 2084 ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 2085 ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 2086 ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 2087 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 2088 ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 2089 ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 2090 ++ * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 2091 ++ * ==================================================================== |
| 2092 ++ */ |
| 2093 ++ |
| 2094 ++#include <stdint.h> |
| 2095 ++#include <string.h> |
| 2096 ++#include <openssl/opensslconf.h> |
| 2097 ++ |
| 2098 ++#if !defined(OPENSSL_NO_CHACHA) |
| 2099 ++ |
| 2100 ++#include <openssl/chacha.h> |
| 2101 ++ |
| 2102 ++/* sigma contains the ChaCha constants, which happen to be an ASCII string. */ |
| 2103 ++static const char sigma[16] = "expand 32-byte k"; |
| 2104 ++ |
| 2105 ++#define ROTATE(v, n) (((v) << (n)) | ((v) >> (32 - (n)))) |
| 2106 ++#define XOR(v, w) ((v) ^ (w)) |
| 2107 ++#define PLUS(x, y) ((x) + (y)) |
| 2108 ++#define PLUSONE(v) (PLUS((v), 1)) |
| 2109 ++ |
| 2110 ++#define U32TO8_LITTLE(p, v) \ |
| 2111 ++ { (p)[0] = (v >> 0) & 0xff; (p)[1] = (v >> 8) & 0xff; \ |
| 2112 ++ (p)[2] = (v >> 16) & 0xff; (p)[3] = (v >> 24) & 0xff; } |
| 2113 ++#define U8TO32_LITTLE(p) \ |
| 2114 ++ (((uint32_t)((p)[0]) ) | ((uint32_t)((p)[1]) << 8) | \ |
| 2115 ++ ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24) ) |
| 2116 ++ |
| 2117 ++/* QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round. */ |
| 2118 ++#define QUARTERROUND(a,b,c,d) \ |
| 2119 ++ x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \ |
| 2120 ++ x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \ |
| 2121 ++ x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \ |
| 2122 ++ x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7); |
| 2123 ++ |
| 2124 ++typedef unsigned int uint32_t; |
| 2125 ++ |
| 2126 ++/* chacha_core performs |num_rounds| rounds of ChaCha20 on the input words in |
| 2127 ++ * |input| and writes the 64 output bytes to |output|. */ |
| 2128 ++static void chacha_core(unsigned char output[64], const uint32_t input[16], |
| 2129 ++ int num_rounds) |
| 2130 ++ { |
| 2131 ++ uint32_t x[16]; |
| 2132 ++ int i; |
| 2133 ++ |
| 2134 ++ memcpy(x, input, sizeof(uint32_t) * 16); |
| 2135 ++ for (i = 20; i > 0; i -= 2) |
| 2136 ++ { |
| 2137 ++ QUARTERROUND( 0, 4, 8,12) |
| 2138 ++ QUARTERROUND( 1, 5, 9,13) |
| 2139 ++ QUARTERROUND( 2, 6,10,14) |
| 2140 ++ QUARTERROUND( 3, 7,11,15) |
| 2141 ++ QUARTERROUND( 0, 5,10,15) |
| 2142 ++ QUARTERROUND( 1, 6,11,12) |
| 2143 ++ QUARTERROUND( 2, 7, 8,13) |
| 2144 ++ QUARTERROUND( 3, 4, 9,14) |
| 2145 ++ } |
| 2146 ++ |
| 2147 ++ for (i = 0; i < 16; ++i) |
| 2148 ++ x[i] = PLUS(x[i], input[i]); |
| 2149 ++ for (i = 0; i < 16; ++i) |
| 2150 ++ U32TO8_LITTLE(output + 4 * i, x[i]); |
| 2151 ++ } |
| 2152 ++ |
| 2153 ++void CRYPTO_chacha_20(unsigned char *out, |
| 2154 ++ const unsigned char *in, size_t in_len, |
| 2155 ++ const unsigned char key[32], |
| 2156 ++ const unsigned char nonce[8], |
| 2157 ++ size_t counter) |
| 2158 ++ { |
| 2159 ++ uint32_t input[16]; |
| 2160 ++ unsigned char buf[64]; |
| 2161 ++ size_t todo, i; |
| 2162 ++ |
| 2163 ++ input[0] = U8TO32_LITTLE(sigma + 0); |
| 2164 ++ input[1] = U8TO32_LITTLE(sigma + 4); |
| 2165 ++ input[2] = U8TO32_LITTLE(sigma + 8); |
| 2166 ++ input[3] = U8TO32_LITTLE(sigma + 12); |
| 2167 ++ |
| 2168 ++ input[4] = U8TO32_LITTLE(key + 0); |
| 2169 ++ input[5] = U8TO32_LITTLE(key + 4); |
| 2170 ++ input[6] = U8TO32_LITTLE(key + 8); |
| 2171 ++ input[7] = U8TO32_LITTLE(key + 12); |
| 2172 ++ |
| 2173 ++ input[8] = U8TO32_LITTLE(key + 16); |
| 2174 ++ input[9] = U8TO32_LITTLE(key + 20); |
| 2175 ++ input[10] = U8TO32_LITTLE(key + 24); |
| 2176 ++ input[11] = U8TO32_LITTLE(key + 28); |
| 2177 ++ |
| 2178 ++ input[12] = counter; |
| 2179 ++ input[13] = ((uint64_t) counter) >> 32; |
| 2180 ++ input[14] = U8TO32_LITTLE(nonce + 0); |
| 2181 ++ input[15] = U8TO32_LITTLE(nonce + 4); |
| 2182 ++ |
| 2183 ++ while (in_len > 0) |
| 2184 ++ { |
| 2185 ++ todo = sizeof(buf); |
| 2186 ++ if (in_len < todo) |
| 2187 ++ todo = in_len; |
| 2188 ++ |
| 2189 ++ chacha_core(buf, input, 20); |
| 2190 ++ for (i = 0; i < todo; i++) |
| 2191 ++ out[i] = in[i] ^ buf[i]; |
| 2192 ++ |
| 2193 ++ out += todo; |
| 2194 ++ in += todo; |
| 2195 ++ in_len -= todo; |
| 2196 ++ |
| 2197 ++ input[12]++; |
| 2198 ++ if (input[12] == 0) |
| 2199 ++ input[13]++; |
| 2200 ++ } |
| 2201 ++ } |
| 2202 ++ |
| 2203 ++#endif /* !OPENSSL_NO_CHACHA */ |
| 2204 +diff --git a/crypto/chacha/chacha_vec.c b/crypto/chacha/chacha_vec.c |
| 2205 +new file mode 100644 |
| 2206 +index 0000000..33b2238 |
| 2207 +--- /dev/null |
| 2208 ++++ b/crypto/chacha/chacha_vec.c |
| 2209 +@@ -0,0 +1,345 @@ |
| 2210 ++/* ==================================================================== |
| 2211 ++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. |
| 2212 ++ * |
| 2213 ++ * Redistribution and use in source and binary forms, with or without |
| 2214 ++ * modification, are permitted provided that the following conditions |
| 2215 ++ * are met: |
| 2216 ++ * |
| 2217 ++ * 1. Redistributions of source code must retain the above copyright |
| 2218 ++ * notice, this list of conditions and the following disclaimer. |
| 2219 ++ * |
| 2220 ++ * 2. Redistributions in binary form must reproduce the above copyright |
| 2221 ++ * notice, this list of conditions and the following disclaimer in |
| 2222 ++ * the documentation and/or other materials provided with the |
| 2223 ++ * distribution. |
| 2224 ++ * |
| 2225 ++ * 3. All advertising materials mentioning features or use of this |
| 2226 ++ * software must display the following acknowledgment: |
| 2227 ++ * "This product includes software developed by the OpenSSL Project |
| 2228 ++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" |
| 2229 ++ * |
| 2230 ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
| 2231 ++ * endorse or promote products derived from this software without |
| 2232 ++ * prior written permission. For written permission, please contact |
| 2233 ++ * licensing@OpenSSL.org. |
| 2234 ++ * |
| 2235 ++ * 5. Products derived from this software may not be called "OpenSSL" |
| 2236 ++ * nor may "OpenSSL" appear in their names without prior written |
| 2237 ++ * permission of the OpenSSL Project. |
| 2238 ++ * |
| 2239 ++ * 6. Redistributions of any form whatsoever must retain the following |
| 2240 ++ * acknowledgment: |
| 2241 ++ * "This product includes software developed by the OpenSSL Project |
| 2242 ++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" |
| 2243 ++ * |
| 2244 ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
| 2245 ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 2246 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 2247 ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
| 2248 ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 2249 ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 2250 ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 2251 ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 2252 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 2253 ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 2254 ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 2255 ++ * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 2256 ++ * ==================================================================== |
| 2257 ++ */ |
| 2258 ++ |
| 2259 ++/* This implementation is by Ted Krovetz and was submitted to SUPERCOP and |
| 2260 ++ * marked as public domain. It was been altered to allow for non-aligned input
s |
| 2261 ++ * and to allow the block counter to be passed in specifically. */ |
| 2262 ++ |
| 2263 ++#include <string.h> |
| 2264 ++#include <stdint.h> |
| 2265 ++#include <openssl/opensslconf.h> |
| 2266 ++ |
| 2267 ++#if !defined(OPENSSL_NO_CHACHA) |
| 2268 ++ |
| 2269 ++#include <openssl/chacha.h> |
| 2270 ++ |
| 2271 ++#ifndef CHACHA_RNDS |
| 2272 ++#define CHACHA_RNDS 20 /* 8 (high speed), 20 (conservative), 12 (middle) */ |
| 2273 ++#endif |
| 2274 ++ |
| 2275 ++/* Architecture-neutral way to specify 16-byte vector of ints */ |
| 2276 ++typedef unsigned vec __attribute__ ((vector_size (16))); |
| 2277 ++ |
| 2278 ++/* This implementation is designed for Neon, SSE and AltiVec machines. The |
| 2279 ++ * following specify how to do certain vector operations efficiently on |
| 2280 ++ * each architecture, using intrinsics. |
| 2281 ++ * This implementation supports parallel processing of multiple blocks, |
| 2282 ++ * including potentially using general-purpose registers. |
| 2283 ++ */ |
| 2284 ++#if __ARM_NEON__ |
| 2285 ++#include <arm_neon.h> |
| 2286 ++#define GPR_TOO 1 |
| 2287 ++#define VBPI 2 |
| 2288 ++#define ONE (vec)vsetq_lane_u32(1,vdupq_n_u32(0),0) |
| 2289 ++#define LOAD(m) (vec)(*((vec*)(m))) |
| 2290 ++#define STORE(m,r) (*((vec*)(m))) = (r) |
| 2291 ++#define ROTV1(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,1) |
| 2292 ++#define ROTV2(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,2) |
| 2293 ++#define ROTV3(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,3) |
| 2294 ++#define ROTW16(x) (vec)vrev32q_u16((uint16x8_t)x) |
| 2295 ++#if __clang__ |
| 2296 ++#define ROTW7(x) (x << ((vec){ 7, 7, 7, 7})) ^ (x >> ((vec){25,25,25,25})) |
| 2297 ++#define ROTW8(x) (x << ((vec){ 8, 8, 8, 8})) ^ (x >> ((vec){24,24,24,24})) |
| 2298 ++#define ROTW12(x) (x << ((vec){12,12,12,12})) ^ (x >> ((vec){20,20,20,20})) |
| 2299 ++#else |
| 2300 ++#define ROTW7(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,7),(uint32x4_t)x,
25) |
| 2301 ++#define ROTW8(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,8),(uint32x4_t)x,
24) |
| 2302 ++#define ROTW12(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,12),(uint32x4_t)x
,20) |
| 2303 ++#endif |
| 2304 ++#elif __SSE2__ |
| 2305 ++#include <emmintrin.h> |
| 2306 ++#define GPR_TOO 0 |
| 2307 ++#if __clang__ |
| 2308 ++#define VBPI 4 |
| 2309 ++#else |
| 2310 ++#define VBPI 3 |
| 2311 ++#endif |
| 2312 ++#define ONE (vec)_mm_set_epi32(0,0,0,1) |
| 2313 ++#define LOAD(m) (vec)_mm_loadu_si128((__m128i*)(m)) |
| 2314 ++#define STORE(m,r) _mm_storeu_si128((__m128i*)(m), (__m128i) (r)) |
| 2315 ++#define ROTV1(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(0,3,2,1)) |
| 2316 ++#define ROTV2(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(1,0,3,2)) |
| 2317 ++#define ROTV3(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(2,1,0,3)) |
| 2318 ++#define ROTW7(x) (vec)(_mm_slli_epi32((__m128i)x, 7) ^ _mm_srli_epi32((__m128
i)x,25)) |
| 2319 ++#define ROTW12(x) (vec)(_mm_slli_epi32((__m128i)x,12) ^ _mm_srli_epi32((__m128
i)x,20)) |
| 2320 ++#if __SSSE3__ |
| 2321 ++#include <tmmintrin.h> |
| 2322 ++#define ROTW8(x) (vec)_mm_shuffle_epi8((__m128i)x,_mm_set_epi8(14,13,12,15,10
,9,8,11,6,5,4,7,2,1,0,3)) |
| 2323 ++#define ROTW16(x) (vec)_mm_shuffle_epi8((__m128i)x,_mm_set_epi8(13,12,15,14,9,
8,11,10,5,4,7,6,1,0,3,2)) |
| 2324 ++#else |
| 2325 ++#define ROTW8(x) (vec)(_mm_slli_epi32((__m128i)x, 8) ^ _mm_srli_epi32((__m128
i)x,24)) |
| 2326 ++#define ROTW16(x) (vec)(_mm_slli_epi32((__m128i)x,16) ^ _mm_srli_epi32((__m128
i)x,16)) |
| 2327 ++#endif |
| 2328 ++#else |
| 2329 ++#error -- Implementation supports only machines with neon or SSE2 |
| 2330 ++#endif |
| 2331 ++ |
| 2332 ++#ifndef REVV_BE |
| 2333 ++#define REVV_BE(x) (x) |
| 2334 ++#endif |
| 2335 ++ |
| 2336 ++#ifndef REVW_BE |
| 2337 ++#define REVW_BE(x) (x) |
| 2338 ++#endif |
| 2339 ++ |
| 2340 ++#define BPI (VBPI + GPR_TOO) /* Blocks computed per loop iteration */ |
| 2341 ++ |
| 2342 ++#define DQROUND_VECTORS(a,b,c,d) \ |
| 2343 ++ a += b; d ^= a; d = ROTW16(d); \ |
| 2344 ++ c += d; b ^= c; b = ROTW12(b); \ |
| 2345 ++ a += b; d ^= a; d = ROTW8(d); \ |
| 2346 ++ c += d; b ^= c; b = ROTW7(b); \ |
| 2347 ++ b = ROTV1(b); c = ROTV2(c); d = ROTV3(d); \ |
| 2348 ++ a += b; d ^= a; d = ROTW16(d); \ |
| 2349 ++ c += d; b ^= c; b = ROTW12(b); \ |
| 2350 ++ a += b; d ^= a; d = ROTW8(d); \ |
| 2351 ++ c += d; b ^= c; b = ROTW7(b); \ |
| 2352 ++ b = ROTV3(b); c = ROTV2(c); d = ROTV1(d); |
| 2353 ++ |
| 2354 ++#define QROUND_WORDS(a,b,c,d) \ |
| 2355 ++ a = a+b; d ^= a; d = d<<16 | d>>16; \ |
| 2356 ++ c = c+d; b ^= c; b = b<<12 | b>>20; \ |
| 2357 ++ a = a+b; d ^= a; d = d<< 8 | d>>24; \ |
| 2358 ++ c = c+d; b ^= c; b = b<< 7 | b>>25; |
| 2359 ++ |
| 2360 ++#define WRITE_XOR(in, op, d, v0, v1, v2, v3) \ |
| 2361 ++ STORE(op + d + 0, LOAD(in + d + 0) ^ REVV_BE(v0)); \ |
| 2362 ++ STORE(op + d + 4, LOAD(in + d + 4) ^ REVV_BE(v1)); \ |
| 2363 ++ STORE(op + d + 8, LOAD(in + d + 8) ^ REVV_BE(v2)); \ |
| 2364 ++ STORE(op + d +12, LOAD(in + d +12) ^ REVV_BE(v3)); |
| 2365 ++ |
| 2366 ++void CRYPTO_chacha_20( |
| 2367 ++ unsigned char *out, |
| 2368 ++ const unsigned char *in, |
| 2369 ++ size_t inlen, |
| 2370 ++ const unsigned char key[32], |
| 2371 ++ const unsigned char nonce[8], |
| 2372 ++ size_t counter) |
| 2373 ++ { |
| 2374 ++ unsigned iters, i, *op=(unsigned *)out, *ip=(unsigned *)in, *kp; |
| 2375 ++#if defined(__ARM_NEON__) |
| 2376 ++ unsigned *np; |
| 2377 ++#endif |
| 2378 ++ vec s0, s1, s2, s3; |
| 2379 ++#if !defined(__ARM_NEON__) && !defined(__SSE2__) |
| 2380 ++ __attribute__ ((aligned (16))) unsigned key[8], nonce[4]; |
| 2381 ++#endif |
| 2382 ++ __attribute__ ((aligned (16))) unsigned chacha_const[] = |
| 2383 ++ {0x61707865,0x3320646E,0x79622D32,0x6B206574}; |
| 2384 ++#if defined(__ARM_NEON__) || defined(__SSE2__) |
| 2385 ++ kp = (unsigned *)key; |
| 2386 ++#else |
| 2387 ++ ((vec *)key)[0] = REVV_BE(((vec *)key)[0]); |
| 2388 ++ ((vec *)key)[1] = REVV_BE(((vec *)key)[1]); |
| 2389 ++ nonce[0] = REVW_BE(((unsigned *)nonce)[0]); |
| 2390 ++ nonce[1] = REVW_BE(((unsigned *)nonce)[1]); |
| 2391 ++ nonce[2] = REVW_BE(((unsigned *)nonce)[2]); |
| 2392 ++ nonce[3] = REVW_BE(((unsigned *)nonce)[3]); |
| 2393 ++ kp = (unsigned *)key; |
| 2394 ++ np = (unsigned *)nonce; |
| 2395 ++#endif |
| 2396 ++#if defined(__ARM_NEON__) |
| 2397 ++ np = (unsigned*) nonce; |
| 2398 ++#endif |
| 2399 ++ s0 = LOAD(chacha_const); |
| 2400 ++ s1 = LOAD(&((vec*)kp)[0]); |
| 2401 ++ s2 = LOAD(&((vec*)kp)[1]); |
| 2402 ++ s3 = (vec){ |
| 2403 ++ counter & 0xffffffff, |
| 2404 ++#if __ARM_NEON__ |
| 2405 ++ 0, /* can't right-shift 32 bits on a 32-bit system. */ |
| 2406 ++#else |
| 2407 ++ counter >> 32, |
| 2408 ++#endif |
| 2409 ++ ((uint32_t*)nonce)[0], |
| 2410 ++ ((uint32_t*)nonce)[1] |
| 2411 ++ }; |
| 2412 ++ |
| 2413 ++ for (iters = 0; iters < inlen/(BPI*64); iters++) |
| 2414 ++ { |
| 2415 ++#if GPR_TOO |
| 2416 ++ register unsigned x0, x1, x2, x3, x4, x5, x6, x7, x8, |
| 2417 ++ x9, x10, x11, x12, x13, x14, x15; |
| 2418 ++#endif |
| 2419 ++#if VBPI > 2 |
| 2420 ++ vec v8,v9,v10,v11; |
| 2421 ++#endif |
| 2422 ++#if VBPI > 3 |
| 2423 ++ vec v12,v13,v14,v15; |
| 2424 ++#endif |
| 2425 ++ |
| 2426 ++ vec v0,v1,v2,v3,v4,v5,v6,v7; |
| 2427 ++ v4 = v0 = s0; v5 = v1 = s1; v6 = v2 = s2; v3 = s3; |
| 2428 ++ v7 = v3 + ONE; |
| 2429 ++#if VBPI > 2 |
| 2430 ++ v8 = v4; v9 = v5; v10 = v6; |
| 2431 ++ v11 = v7 + ONE; |
| 2432 ++#endif |
| 2433 ++#if VBPI > 3 |
| 2434 ++ v12 = v8; v13 = v9; v14 = v10; |
| 2435 ++ v15 = v11 + ONE; |
| 2436 ++#endif |
| 2437 ++#if GPR_TOO |
| 2438 ++ x0 = chacha_const[0]; x1 = chacha_const[1]; |
| 2439 ++ x2 = chacha_const[2]; x3 = chacha_const[3]; |
| 2440 ++ x4 = kp[0]; x5 = kp[1]; x6 = kp[2]; x7 = kp[3]; |
| 2441 ++ x8 = kp[4]; x9 = kp[5]; x10 = kp[6]; x11 = kp[7]; |
| 2442 ++ x12 = counter+BPI*iters+(BPI-1); x13 = 0; |
| 2443 ++ x14 = np[0]; x15 = np[1]; |
| 2444 ++#endif |
| 2445 ++ for (i = CHACHA_RNDS/2; i; i--) |
| 2446 ++ { |
| 2447 ++ DQROUND_VECTORS(v0,v1,v2,v3) |
| 2448 ++ DQROUND_VECTORS(v4,v5,v6,v7) |
| 2449 ++#if VBPI > 2 |
| 2450 ++ DQROUND_VECTORS(v8,v9,v10,v11) |
| 2451 ++#endif |
| 2452 ++#if VBPI > 3 |
| 2453 ++ DQROUND_VECTORS(v12,v13,v14,v15) |
| 2454 ++#endif |
| 2455 ++#if GPR_TOO |
| 2456 ++ QROUND_WORDS( x0, x4, x8,x12) |
| 2457 ++ QROUND_WORDS( x1, x5, x9,x13) |
| 2458 ++ QROUND_WORDS( x2, x6,x10,x14) |
| 2459 ++ QROUND_WORDS( x3, x7,x11,x15) |
| 2460 ++ QROUND_WORDS( x0, x5,x10,x15) |
| 2461 ++ QROUND_WORDS( x1, x6,x11,x12) |
| 2462 ++ QROUND_WORDS( x2, x7, x8,x13) |
| 2463 ++ QROUND_WORDS( x3, x4, x9,x14) |
| 2464 ++#endif |
| 2465 ++ } |
| 2466 ++ |
| 2467 ++ WRITE_XOR(ip, op, 0, v0+s0, v1+s1, v2+s2, v3+s3) |
| 2468 ++ s3 += ONE; |
| 2469 ++ WRITE_XOR(ip, op, 16, v4+s0, v5+s1, v6+s2, v7+s3) |
| 2470 ++ s3 += ONE; |
| 2471 ++#if VBPI > 2 |
| 2472 ++ WRITE_XOR(ip, op, 32, v8+s0, v9+s1, v10+s2, v11+s3) |
| 2473 ++ s3 += ONE; |
| 2474 ++#endif |
| 2475 ++#if VBPI > 3 |
| 2476 ++ WRITE_XOR(ip, op, 48, v12+s0, v13+s1, v14+s2, v15+s3) |
| 2477 ++ s3 += ONE; |
| 2478 ++#endif |
| 2479 ++ ip += VBPI*16; |
| 2480 ++ op += VBPI*16; |
| 2481 ++#if GPR_TOO |
| 2482 ++ op[0] = REVW_BE(REVW_BE(ip[0]) ^ (x0 + chacha_const[0])); |
| 2483 ++ op[1] = REVW_BE(REVW_BE(ip[1]) ^ (x1 + chacha_const[1])); |
| 2484 ++ op[2] = REVW_BE(REVW_BE(ip[2]) ^ (x2 + chacha_const[2])); |
| 2485 ++ op[3] = REVW_BE(REVW_BE(ip[3]) ^ (x3 + chacha_const[3])); |
| 2486 ++ op[4] = REVW_BE(REVW_BE(ip[4]) ^ (x4 + kp[0])); |
| 2487 ++ op[5] = REVW_BE(REVW_BE(ip[5]) ^ (x5 + kp[1])); |
| 2488 ++ op[6] = REVW_BE(REVW_BE(ip[6]) ^ (x6 + kp[2])); |
| 2489 ++ op[7] = REVW_BE(REVW_BE(ip[7]) ^ (x7 + kp[3])); |
| 2490 ++ op[8] = REVW_BE(REVW_BE(ip[8]) ^ (x8 + kp[4])); |
| 2491 ++ op[9] = REVW_BE(REVW_BE(ip[9]) ^ (x9 + kp[5])); |
| 2492 ++ op[10] = REVW_BE(REVW_BE(ip[10]) ^ (x10 + kp[6])); |
| 2493 ++ op[11] = REVW_BE(REVW_BE(ip[11]) ^ (x11 + kp[7])); |
| 2494 ++ op[12] = REVW_BE(REVW_BE(ip[12]) ^ (x12 + BPI*iters+(BPI-1))); |
| 2495 ++ op[13] = REVW_BE(REVW_BE(ip[13]) ^ (x13)); |
| 2496 ++ op[14] = REVW_BE(REVW_BE(ip[14]) ^ (x14 + np[0])); |
| 2497 ++ op[15] = REVW_BE(REVW_BE(ip[15]) ^ (x15 + np[1])); |
| 2498 ++ s3 += ONE; |
| 2499 ++ ip += 16; |
| 2500 ++ op += 16; |
| 2501 ++#endif |
| 2502 ++ } |
| 2503 ++ |
| 2504 ++ for (iters = inlen%(BPI*64)/64; iters != 0; iters--) |
| 2505 ++ { |
| 2506 ++ vec v0 = s0, v1 = s1, v2 = s2, v3 = s3; |
| 2507 ++ for (i = CHACHA_RNDS/2; i; i--) |
| 2508 ++ { |
| 2509 ++ DQROUND_VECTORS(v0,v1,v2,v3); |
| 2510 ++ } |
| 2511 ++ WRITE_XOR(ip, op, 0, v0+s0, v1+s1, v2+s2, v3+s3) |
| 2512 ++ s3 += ONE; |
| 2513 ++ ip += 16; |
| 2514 ++ op += 16; |
| 2515 ++ } |
| 2516 ++ |
| 2517 ++ inlen = inlen % 64; |
| 2518 ++ if (inlen) |
| 2519 ++ { |
| 2520 ++ __attribute__ ((aligned (16))) vec buf[4]; |
| 2521 ++ vec v0,v1,v2,v3; |
| 2522 ++ v0 = s0; v1 = s1; v2 = s2; v3 = s3; |
| 2523 ++ for (i = CHACHA_RNDS/2; i; i--) |
| 2524 ++ { |
| 2525 ++ DQROUND_VECTORS(v0,v1,v2,v3); |
| 2526 ++ } |
| 2527 ++ |
| 2528 ++ if (inlen >= 16) |
| 2529 ++ { |
| 2530 ++ STORE(op + 0, LOAD(ip + 0) ^ REVV_BE(v0 + s0)); |
| 2531 ++ if (inlen >= 32) |
| 2532 ++ { |
| 2533 ++ STORE(op + 4, LOAD(ip + 4) ^ REVV_BE(v1 + s1)); |
| 2534 ++ if (inlen >= 48) |
| 2535 ++ { |
| 2536 ++ STORE(op + 8, LOAD(ip + 8) ^ |
| 2537 ++ REVV_BE(v2 + s2)); |
| 2538 ++ buf[3] = REVV_BE(v3 + s3); |
| 2539 ++ } |
| 2540 ++ else |
| 2541 ++ buf[2] = REVV_BE(v2 + s2); |
| 2542 ++ } |
| 2543 ++ else |
| 2544 ++ buf[1] = REVV_BE(v1 + s1); |
| 2545 ++ } |
| 2546 ++ else |
| 2547 ++ buf[0] = REVV_BE(v0 + s0); |
| 2548 ++ |
| 2549 ++ for (i=inlen & ~15; i<inlen; i++) |
| 2550 ++ ((char *)op)[i] = ((char *)ip)[i] ^ ((char *)buf)[i]; |
| 2551 ++ } |
| 2552 ++ } |
| 2553 ++ |
| 2554 ++#endif /* !OPENSSL_NO_CHACHA */ |
| 2555 +diff --git a/crypto/chacha/chachatest.c b/crypto/chacha/chachatest.c |
| 2556 +new file mode 100644 |
| 2557 +index 0000000..b2a9389 |
| 2558 +--- /dev/null |
| 2559 ++++ b/crypto/chacha/chachatest.c |
| 2560 +@@ -0,0 +1,211 @@ |
| 2561 ++/* |
| 2562 ++ * Chacha stream algorithm. |
| 2563 ++ * |
| 2564 ++ * Created on: Jun, 2013 |
| 2565 ++ * Author: Elie Bursztein (elieb@google.com) |
| 2566 ++ * |
| 2567 ++ * Adapted from the estream code by D. Bernstein. |
| 2568 ++ */ |
| 2569 ++/* ==================================================================== |
| 2570 ++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. |
| 2571 ++ * |
| 2572 ++ * Redistribution and use in source and binary forms, with or without |
| 2573 ++ * modification, are permitted provided that the following conditions |
| 2574 ++ * are met: |
| 2575 ++ * |
| 2576 ++ * 1. Redistributions of source code must retain the above copyright |
| 2577 ++ * notice, this list of conditions and the following disclaimer. |
| 2578 ++ * |
| 2579 ++ * 2. Redistributions in binary form must reproduce the above copyright |
| 2580 ++ * notice, this list of conditions and the following disclaimer in |
| 2581 ++ * the documentation and/or other materials provided with the |
| 2582 ++ * distribution. |
| 2583 ++ * |
| 2584 ++ * 3. All advertising materials mentioning features or use of this |
| 2585 ++ * software must display the following acknowledgment: |
| 2586 ++ * "This product includes software developed by the OpenSSL Project |
| 2587 ++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" |
| 2588 ++ * |
| 2589 ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
| 2590 ++ * endorse or promote products derived from this software without |
| 2591 ++ * prior written permission. For written permission, please contact |
| 2592 ++ * licensing@OpenSSL.org. |
| 2593 ++ * |
| 2594 ++ * 5. Products derived from this software may not be called "OpenSSL" |
| 2595 ++ * nor may "OpenSSL" appear in their names without prior written |
| 2596 ++ * permission of the OpenSSL Project. |
| 2597 ++ * |
| 2598 ++ * 6. Redistributions of any form whatsoever must retain the following |
| 2599 ++ * acknowledgment: |
| 2600 ++ * "This product includes software developed by the OpenSSL Project |
| 2601 ++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" |
| 2602 ++ * |
| 2603 ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
| 2604 ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 2605 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 2606 ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
| 2607 ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 2608 ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 2609 ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 2610 ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 2611 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 2612 ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 2613 ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 2614 ++ * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 2615 ++ * ==================================================================== |
| 2616 ++ */ |
| 2617 ++ |
| 2618 ++#include <stdio.h> |
| 2619 ++#include <stdlib.h> |
| 2620 ++#include <string.h> |
| 2621 ++#include <stdint.h> |
| 2622 ++ |
| 2623 ++#include <openssl/chacha.h> |
| 2624 ++ |
| 2625 ++struct chacha_test { |
| 2626 ++ const char *keyhex; |
| 2627 ++ const char *noncehex; |
| 2628 ++ const char *outhex; |
| 2629 ++}; |
| 2630 ++ |
| 2631 ++static const struct chacha_test chacha_tests[] = { |
| 2632 ++ { |
| 2633 ++ "000000000000000000000000000000000000000000000000000000000000000
0", |
| 2634 ++ "0000000000000000", |
| 2635 ++ "76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc
7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee6586", |
| 2636 ++ }, |
| 2637 ++ { |
| 2638 ++ "000000000000000000000000000000000000000000000000000000000000000
1", |
| 2639 ++ "0000000000000000", |
| 2640 ++ "4540f05a9f1fb296d7736e7b208e3c96eb4fe1834688d2604f450952ed432d4
1bbe2a0b6ea7566d2a5d1e7e20d42af2c53d792b1c43fea817e9ad275ae546963", |
| 2641 ++ }, |
| 2642 ++ { |
| 2643 ++ "000000000000000000000000000000000000000000000000000000000000000
0", |
| 2644 ++ "0000000000000001", |
| 2645 ++ "de9cba7bf3d69ef5e786dc63973f653a0b49e015adbff7134fcb7df13782103
1e85a050278a7084527214f73efc7fa5b5277062eb7a0433e445f41e31afab757", |
| 2646 ++ }, |
| 2647 ++ { |
| 2648 ++ "000000000000000000000000000000000000000000000000000000000000000
0", |
| 2649 ++ "0100000000000000", |
| 2650 ++ "ef3fdfd6c61578fbf5cf35bd3dd33b8009631634d21e42ac33960bd138e50d3
2111e4caf237ee53ca8ad6426194a88545ddc497a0b466e7d6bbdb0041b2f586b", |
| 2651 ++ }, |
| 2652 ++ { |
| 2653 ++ "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1
f", |
| 2654 ++ "0001020304050607", |
| 2655 ++ "f798a189f195e66982105ffb640bb7757f579da31602fc93ec01ac56f85ac3c
134a4547b733b46413042c9440049176905d3be59ea1c53f15916155c2be8241a38008b9a26bc359
41e2444177c8ade6689de95264986d95889fb60e84629c9bd9a5acb1cc118be563eb9b3a4a472f82
e09a7e778492b562ef7130e88dfe031c79db9d4f7c7a899151b9a475032b63fc385245fe054e3dd5
a97a5f576fe064025d3ce042c566ab2c507b138db853e3d6959660996546cc9c4a6eafdc777c040d
70eaf46f76dad3979e5c5360c3317166a1c894c94a371876a94df7628fe4eaaf2ccb27d5aaae0ad7
ad0f9d4b6ad3b54098746d4524d38407a6deb", |
| 2656 ++ }, |
| 2657 ++}; |
| 2658 ++ |
| 2659 ++static unsigned char hex_digit(char h) |
| 2660 ++ { |
| 2661 ++ if (h >= '0' && h <= '9') |
| 2662 ++ return h - '0'; |
| 2663 ++ else if (h >= 'a' && h <= 'f') |
| 2664 ++ return h - 'a' + 10; |
| 2665 ++ else if (h >= 'A' && h <= 'F') |
| 2666 ++ return h - 'A' + 10; |
| 2667 ++ else |
| 2668 ++ abort(); |
| 2669 ++ } |
| 2670 ++ |
| 2671 ++static void hex_decode(unsigned char *out, const char* hex) |
| 2672 ++ { |
| 2673 ++ size_t j = 0; |
| 2674 ++ |
| 2675 ++ while (*hex != 0) |
| 2676 ++ { |
| 2677 ++ unsigned char v = hex_digit(*hex++); |
| 2678 ++ v <<= 4; |
| 2679 ++ v |= hex_digit(*hex++); |
| 2680 ++ out[j++] = v; |
| 2681 ++ } |
| 2682 ++ } |
| 2683 ++ |
| 2684 ++static void hexdump(unsigned char *a, size_t len) |
| 2685 ++ { |
| 2686 ++ size_t i; |
| 2687 ++ |
| 2688 ++ for (i = 0; i < len; i++) |
| 2689 ++ printf("%02x", a[i]); |
| 2690 ++ } |
| 2691 ++ |
| 2692 ++/* misalign returns a pointer that points 0 to 15 bytes into |in| such that th
e |
| 2693 ++ * returned pointer has alignment 1 mod 16. */ |
| 2694 ++static void* misalign(void* in) |
| 2695 ++ { |
| 2696 ++ intptr_t x = (intptr_t) in; |
| 2697 ++ x += (17 - (x % 16)) % 16; |
| 2698 ++ return (void*) x; |
| 2699 ++ } |
| 2700 ++ |
| 2701 ++int main() |
| 2702 ++ { |
| 2703 ++ static const unsigned num_tests = |
| 2704 ++ sizeof(chacha_tests) / sizeof(struct chacha_test); |
| 2705 ++ unsigned i; |
| 2706 ++ unsigned char key_bytes[32 + 16]; |
| 2707 ++ unsigned char nonce_bytes[8 + 16] = {0}; |
| 2708 ++ |
| 2709 ++ unsigned char *key = misalign(key_bytes); |
| 2710 ++ unsigned char *nonce = misalign(nonce_bytes); |
| 2711 ++ |
| 2712 ++ for (i = 0; i < num_tests; i++) |
| 2713 ++ { |
| 2714 ++ const struct chacha_test *test = &chacha_tests[i]; |
| 2715 ++ unsigned char *expected, *out_bytes, *zero_bytes, *out, *zeros; |
| 2716 ++ size_t len = strlen(test->outhex); |
| 2717 ++ |
| 2718 ++ if (strlen(test->keyhex) != 32*2 || |
| 2719 ++ strlen(test->noncehex) != 8*2 || |
| 2720 ++ (len & 1) == 1) |
| 2721 ++ return 1; |
| 2722 ++ |
| 2723 ++ len /= 2; |
| 2724 ++ |
| 2725 ++ hex_decode(key, test->keyhex); |
| 2726 ++ hex_decode(nonce, test->noncehex); |
| 2727 ++ |
| 2728 ++ expected = malloc(len); |
| 2729 ++ out_bytes = malloc(len+16); |
| 2730 ++ zero_bytes = malloc(len+16); |
| 2731 ++ /* Attempt to test unaligned inputs. */ |
| 2732 ++ out = misalign(out_bytes); |
| 2733 ++ zeros = misalign(zero_bytes); |
| 2734 ++ memset(zeros, 0, len); |
| 2735 ++ |
| 2736 ++ hex_decode(expected, test->outhex); |
| 2737 ++ CRYPTO_chacha_20(out, zeros, len, key, nonce, 0); |
| 2738 ++ |
| 2739 ++ if (memcmp(out, expected, len) != 0) |
| 2740 ++ { |
| 2741 ++ printf("ChaCha20 test #%d failed.\n", i); |
| 2742 ++ printf("got: "); |
| 2743 ++ hexdump(out, len); |
| 2744 ++ printf("\nexpected: "); |
| 2745 ++ hexdump(expected, len); |
| 2746 ++ printf("\n"); |
| 2747 ++ return 1; |
| 2748 ++ } |
| 2749 ++ |
| 2750 ++ /* The last test has a large output. We test whether the |
| 2751 ++ * counter works as expected by skipping the first 64 bytes of |
| 2752 ++ * it. */ |
| 2753 ++ if (i == num_tests - 1) |
| 2754 ++ { |
| 2755 ++ CRYPTO_chacha_20(out, zeros, len - 64, key, nonce, 1); |
| 2756 ++ if (memcmp(out, expected + 64, len - 64) != 0) |
| 2757 ++ { |
| 2758 ++ printf("ChaCha20 skip test failed.\n"); |
| 2759 ++ return 1; |
| 2760 ++ } |
| 2761 ++ } |
| 2762 ++ |
| 2763 ++ free(expected); |
| 2764 ++ free(zero_bytes); |
| 2765 ++ free(out_bytes); |
| 2766 ++ } |
| 2767 ++ |
| 2768 ++ |
| 2769 ++ printf("PASS\n"); |
| 2770 ++ return 0; |
| 2771 ++ } |
| 2772 +diff --git a/crypto/evp/Makefile b/crypto/evp/Makefile |
| 2773 +index b73038d..86b0504 100644 |
| 2774 +--- a/crypto/evp/Makefile |
| 2775 ++++ b/crypto/evp/Makefile |
| 2776 +@@ -29,7 +29,8 @@ LIBSRC= encode.c digest.c evp_enc.c evp_key.c evp_acnf.c evp_
cnf.c \ |
| 2777 + c_all.c c_allc.c c_alld.c evp_lib.c bio_ok.c \ |
| 2778 + evp_pkey.c evp_pbe.c p5_crpt.c p5_crpt2.c \ |
| 2779 + e_old.c pmeth_lib.c pmeth_fn.c pmeth_gn.c m_sigver.c evp_fips.c \ |
| 2780 +- e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c evp_aead.c |
| 2781 ++ e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c evp_aead.c \ |
| 2782 ++ e_chacha20poly1305.c |
| 2783 + |
| 2784 + LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \ |
| 2785 + e_des.o e_bf.o e_idea.o e_des3.o e_camellia.o\ |
| 2786 +@@ -42,7 +43,7 @@ LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o
evp_cnf.o \ |
| 2787 + c_all.o c_allc.o c_alld.o evp_lib.o bio_ok.o \ |
| 2788 + evp_pkey.o evp_pbe.o p5_crpt.o p5_crpt2.o \ |
| 2789 + e_old.o pmeth_lib.o pmeth_fn.o pmeth_gn.o m_sigver.o evp_fips.o \ |
| 2790 +- e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o evp_aead.o |
| 2791 ++ e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o evp_aead.o e_chacha20poly1305.o |
| 2792 + |
| 2793 + SRC= $(LIBSRC) |
| 2794 + |
| 2795 +@@ -239,6 +240,21 @@ e_cast.o: ../../include/openssl/objects.h ../../include/op
enssl/opensslconf.h |
| 2796 + e_cast.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h |
| 2797 + e_cast.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
| 2798 + e_cast.o: ../../include/openssl/symhacks.h ../cryptlib.h e_cast.c evp_locl.h |
| 2799 ++e_chacha20poly1305.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h |
| 2800 ++e_chacha20poly1305.o: ../../include/openssl/chacha.h |
| 2801 ++e_chacha20poly1305.o: ../../include/openssl/crypto.h |
| 2802 ++e_chacha20poly1305.o: ../../include/openssl/e_os2.h ../../include/openssl/err.
h |
| 2803 ++e_chacha20poly1305.o: ../../include/openssl/evp.h ../../include/openssl/lhash.
h |
| 2804 ++e_chacha20poly1305.o: ../../include/openssl/obj_mac.h |
| 2805 ++e_chacha20poly1305.o: ../../include/openssl/objects.h |
| 2806 ++e_chacha20poly1305.o: ../../include/openssl/opensslconf.h |
| 2807 ++e_chacha20poly1305.o: ../../include/openssl/opensslv.h |
| 2808 ++e_chacha20poly1305.o: ../../include/openssl/ossl_typ.h |
| 2809 ++e_chacha20poly1305.o: ../../include/openssl/poly1305.h |
| 2810 ++e_chacha20poly1305.o: ../../include/openssl/safestack.h |
| 2811 ++e_chacha20poly1305.o: ../../include/openssl/stack.h |
| 2812 ++e_chacha20poly1305.o: ../../include/openssl/symhacks.h e_chacha20poly1305.c |
| 2813 ++e_chacha20poly1305.o: evp_locl.h |
| 2814 + e_des.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h |
| 2815 + e_des.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
| 2816 + e_des.o: ../../include/openssl/des.h ../../include/openssl/des_old.h |
| 2817 +@@ -258,9 +274,10 @@ e_des3.o: ../../include/openssl/evp.h ../../include/openss
l/lhash.h |
| 2818 + e_des3.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h |
| 2819 + e_des3.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h |
| 2820 + e_des3.o: ../../include/openssl/ossl_typ.h ../../include/openssl/rand.h |
| 2821 +-e_des3.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
| 2822 +-e_des3.o: ../../include/openssl/symhacks.h ../../include/openssl/ui.h |
| 2823 +-e_des3.o: ../../include/openssl/ui_compat.h ../cryptlib.h e_des3.c evp_locl.h |
| 2824 ++e_des3.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h |
| 2825 ++e_des3.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h |
| 2826 ++e_des3.o: ../../include/openssl/ui.h ../../include/openssl/ui_compat.h |
| 2827 ++e_des3.o: ../cryptlib.h e_des3.c evp_locl.h |
| 2828 + e_idea.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.
h |
| 2829 + e_idea.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
| 2830 + e_idea.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h |
| 2831 +@@ -356,6 +373,14 @@ evp_acnf.o: ../../include/openssl/opensslconf.h |
| 2832 + evp_acnf.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h |
| 2833 + evp_acnf.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
| 2834 + evp_acnf.o: ../../include/openssl/symhacks.h ../cryptlib.h evp_acnf.c |
| 2835 ++evp_aead.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h |
| 2836 ++evp_aead.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h |
| 2837 ++evp_aead.o: ../../include/openssl/err.h ../../include/openssl/evp.h |
| 2838 ++evp_aead.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h |
| 2839 ++evp_aead.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.
h |
| 2840 ++evp_aead.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h |
| 2841 ++evp_aead.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
| 2842 ++evp_aead.o: ../../include/openssl/symhacks.h evp_aead.c |
| 2843 + evp_cnf.o: ../../e_os.h ../../include/openssl/asn1.h |
| 2844 + evp_cnf.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h |
| 2845 + evp_cnf.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h |
| 2846 +diff --git a/crypto/evp/e_chacha20poly1305.c b/crypto/evp/e_chacha20poly1305.c |
| 2847 +new file mode 100644 |
| 2848 +index 0000000..1c0c0fb |
| 2849 +--- /dev/null |
| 2850 ++++ b/crypto/evp/e_chacha20poly1305.c |
| 2851 +@@ -0,0 +1,261 @@ |
| 2852 ++/* ==================================================================== |
| 2853 ++ * Copyright (c) 2013 The OpenSSL Project. All rights reserved. |
| 2854 ++ * |
| 2855 ++ * Redistribution and use in source and binary forms, with or without |
| 2856 ++ * modification, are permitted provided that the following conditions |
| 2857 ++ * are met: |
| 2858 ++ * |
| 2859 ++ * 1. Redistributions of source code must retain the above copyright |
| 2860 ++ * notice, this list of conditions and the following disclaimer. |
| 2861 ++ * |
| 2862 ++ * 2. Redistributions in binary form must reproduce the above copyright |
| 2863 ++ * notice, this list of conditions and the following disclaimer in |
| 2864 ++ * the documentation and/or other materials provided with the |
| 2865 ++ * distribution. |
| 2866 ++ * |
| 2867 ++ * 3. All advertising materials mentioning features or use of this |
| 2868 ++ * software must display the following acknowledgment: |
| 2869 ++ * "This product includes software developed by the OpenSSL Project |
| 2870 ++ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" |
| 2871 ++ * |
| 2872 ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
| 2873 ++ * endorse or promote products derived from this software without |
| 2874 ++ * prior written permission. For written permission, please contact |
| 2875 ++ * openssl-core@openssl.org. |
| 2876 ++ * |
| 2877 ++ * 5. Products derived from this software may not be called "OpenSSL" |
| 2878 ++ * nor may "OpenSSL" appear in their names without prior written |
| 2879 ++ * permission of the OpenSSL Project. |
| 2880 ++ * |
| 2881 ++ * 6. Redistributions of any form whatsoever must retain the following |
| 2882 ++ * acknowledgment: |
| 2883 ++ * "This product includes software developed by the OpenSSL Project |
| 2884 ++ * for use in the OpenSSL Toolkit (http://www.openssl.org/)" |
| 2885 ++ * |
| 2886 ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
| 2887 ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 2888 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 2889 ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
| 2890 ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 2891 ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 2892 ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 2893 ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 2894 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 2895 ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 2896 ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 2897 ++ * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 2898 ++ * ==================================================================== |
| 2899 ++ * |
| 2900 ++ */ |
| 2901 ++ |
| 2902 ++#include <stdint.h> |
| 2903 ++#include <string.h> |
| 2904 ++#include <openssl/opensslconf.h> |
| 2905 ++ |
| 2906 ++#if !defined(OPENSSL_NO_CHACHA) && !defined(OPENSSL_NO_POLY1305) |
| 2907 ++ |
| 2908 ++#include <openssl/chacha.h> |
| 2909 ++#include <openssl/poly1305.h> |
| 2910 ++#include <openssl/evp.h> |
| 2911 ++#include <openssl/err.h> |
| 2912 ++#include "evp_locl.h" |
| 2913 ++ |
| 2914 ++#define POLY1305_TAG_LEN 16 |
| 2915 ++#define CHACHA20_NONCE_LEN 8 |
| 2916 ++ |
| 2917 ++struct aead_chacha20_poly1305_ctx |
| 2918 ++ { |
| 2919 ++ unsigned char key[32]; |
| 2920 ++ unsigned char tag_len; |
| 2921 ++ }; |
| 2922 ++ |
| 2923 ++static int aead_chacha20_poly1305_init(EVP_AEAD_CTX *ctx, const unsigned char
*key, size_t key_len, size_t tag_len) |
| 2924 ++ { |
| 2925 ++ struct aead_chacha20_poly1305_ctx *c20_ctx; |
| 2926 ++ |
| 2927 ++ if (tag_len == 0) |
| 2928 ++ tag_len = POLY1305_TAG_LEN; |
| 2929 ++ |
| 2930 ++ if (tag_len > POLY1305_TAG_LEN) |
| 2931 ++ { |
| 2932 ++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_INIT, EVP_R_TOO_LARGE); |
| 2933 ++ return 0; |
| 2934 ++ } |
| 2935 ++ |
| 2936 ++ if (key_len != sizeof(c20_ctx->key)) |
| 2937 ++ return 0; /* internal error - EVP_AEAD_CTX_init should catch th
is. */ |
| 2938 ++ |
| 2939 ++ c20_ctx = OPENSSL_malloc(sizeof(struct aead_chacha20_poly1305_ctx)); |
| 2940 ++ if (c20_ctx == NULL) |
| 2941 ++ return 0; |
| 2942 ++ |
| 2943 ++ memcpy(&c20_ctx->key[0], key, key_len); |
| 2944 ++ c20_ctx->tag_len = tag_len; |
| 2945 ++ ctx->aead_state = c20_ctx; |
| 2946 ++ |
| 2947 ++ return 1; |
| 2948 ++ } |
| 2949 ++ |
| 2950 ++static void aead_chacha20_poly1305_cleanup(EVP_AEAD_CTX *ctx) |
| 2951 ++ { |
| 2952 ++ struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state; |
| 2953 ++ OPENSSL_cleanse(c20_ctx->key, sizeof(c20_ctx->key)); |
| 2954 ++ OPENSSL_free(c20_ctx); |
| 2955 ++ } |
| 2956 ++ |
| 2957 ++static void poly1305_update_with_length(poly1305_state *poly1305, |
| 2958 ++ const unsigned char *data, size_t data_len) |
| 2959 ++ { |
| 2960 ++ size_t j = data_len; |
| 2961 ++ unsigned char length_bytes[8]; |
| 2962 ++ unsigned i; |
| 2963 ++ |
| 2964 ++ for (i = 0; i < sizeof(length_bytes); i++) |
| 2965 ++ { |
| 2966 ++ length_bytes[i] = j; |
| 2967 ++ j >>= 8; |
| 2968 ++ } |
| 2969 ++ |
| 2970 ++ CRYPTO_poly1305_update(poly1305, data, data_len); |
| 2971 ++ CRYPTO_poly1305_update(poly1305, length_bytes, sizeof(length_bytes)); |
| 2972 ++} |
| 2973 ++ |
| 2974 ++static ssize_t aead_chacha20_poly1305_seal(const EVP_AEAD_CTX *ctx, |
| 2975 ++ unsigned char *out, size_t max_out_len, |
| 2976 ++ const unsigned char *nonce, size_t nonce_len, |
| 2977 ++ const unsigned char *in, size_t in_len, |
| 2978 ++ const unsigned char *ad, size_t ad_len) |
| 2979 ++ { |
| 2980 ++ const struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state; |
| 2981 ++ unsigned char poly1305_key[32]; |
| 2982 ++ poly1305_state poly1305; |
| 2983 ++ const uint64_t in_len_64 = in_len; |
| 2984 ++ |
| 2985 ++ /* The underlying ChaCha implementation may not overflow the block |
| 2986 ++ * counter into the second counter word. Therefore we disallow |
| 2987 ++ * individual operations that work on more than 2TB at a time. |
| 2988 ++ * |in_len_64| is needed because, on 32-bit platforms, size_t is only |
| 2989 ++ * 32-bits and this produces a warning because it's always false. |
| 2990 ++ * Casting to uint64_t inside the conditional is not sufficient to stop |
| 2991 ++ * the warning. */ |
| 2992 ++ if (in_len_64 >= (1ull << 32)*64-64) |
| 2993 ++ { |
| 2994 ++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_TOO_LARGE); |
| 2995 ++ return -1; |
| 2996 ++ } |
| 2997 ++ |
| 2998 ++ if (max_out_len < in_len + c20_ctx->tag_len) |
| 2999 ++ { |
| 3000 ++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_BUFFER_TOO_SMALL
); |
| 3001 ++ return -1; |
| 3002 ++ } |
| 3003 ++ |
| 3004 ++ if (nonce_len != CHACHA20_NONCE_LEN) |
| 3005 ++ { |
| 3006 ++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_IV_TOO_LARGE); |
| 3007 ++ return -1; |
| 3008 ++ } |
| 3009 ++ |
| 3010 ++ memset(poly1305_key, 0, sizeof(poly1305_key)); |
| 3011 ++ CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key), c20_c
tx->key, nonce, 0); |
| 3012 ++ |
| 3013 ++ CRYPTO_poly1305_init(&poly1305, poly1305_key); |
| 3014 ++ poly1305_update_with_length(&poly1305, ad, ad_len); |
| 3015 ++ CRYPTO_chacha_20(out, in, in_len, c20_ctx->key, nonce, 1); |
| 3016 ++ poly1305_update_with_length(&poly1305, out, in_len); |
| 3017 ++ |
| 3018 ++ if (c20_ctx->tag_len != POLY1305_TAG_LEN) |
| 3019 ++ { |
| 3020 ++ unsigned char tag[POLY1305_TAG_LEN]; |
| 3021 ++ CRYPTO_poly1305_finish(&poly1305, tag); |
| 3022 ++ memcpy(out + in_len, tag, c20_ctx->tag_len); |
| 3023 ++ return in_len + c20_ctx->tag_len; |
| 3024 ++ } |
| 3025 ++ |
| 3026 ++ CRYPTO_poly1305_finish(&poly1305, out + in_len); |
| 3027 ++ return in_len + POLY1305_TAG_LEN; |
| 3028 ++ } |
| 3029 ++ |
| 3030 ++static ssize_t aead_chacha20_poly1305_open(const EVP_AEAD_CTX *ctx, |
| 3031 ++ unsigned char *out, size_t max_out_len, |
| 3032 ++ const unsigned char *nonce, size_t nonce_len, |
| 3033 ++ const unsigned char *in, size_t in_len, |
| 3034 ++ const unsigned char *ad, size_t ad_len) |
| 3035 ++ { |
| 3036 ++ const struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state; |
| 3037 ++ unsigned char mac[POLY1305_TAG_LEN]; |
| 3038 ++ unsigned char poly1305_key[32]; |
| 3039 ++ size_t out_len; |
| 3040 ++ poly1305_state poly1305; |
| 3041 ++ const uint64_t in_len_64 = in_len; |
| 3042 ++ |
| 3043 ++ if (in_len < c20_ctx->tag_len) |
| 3044 ++ { |
| 3045 ++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BAD_DECRYPT); |
| 3046 ++ return -1; |
| 3047 ++ } |
| 3048 ++ |
| 3049 ++ /* The underlying ChaCha implementation may not overflow the block |
| 3050 ++ * counter into the second counter word. Therefore we disallow |
| 3051 ++ * individual operations that work on more than 2TB at a time. |
| 3052 ++ * |in_len_64| is needed because, on 32-bit platforms, size_t is only |
| 3053 ++ * 32-bits and this produces a warning because it's always false. |
| 3054 ++ * Casting to uint64_t inside the conditional is not sufficient to stop |
| 3055 ++ * the warning. */ |
| 3056 ++ if (in_len_64 >= (1ull << 32)*64-64) |
| 3057 ++ { |
| 3058 ++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_TOO_LARGE); |
| 3059 ++ return -1; |
| 3060 ++ } |
| 3061 ++ |
| 3062 ++ if (nonce_len != CHACHA20_NONCE_LEN) |
| 3063 ++ { |
| 3064 ++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_IV_TOO_LARGE); |
| 3065 ++ return -1; |
| 3066 ++ } |
| 3067 ++ |
| 3068 ++ out_len = in_len - c20_ctx->tag_len; |
| 3069 ++ |
| 3070 ++ if (max_out_len < out_len) |
| 3071 ++ { |
| 3072 ++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BUFFER_TOO_SMALL
); |
| 3073 ++ return -1; |
| 3074 ++ } |
| 3075 ++ |
| 3076 ++ memset(poly1305_key, 0, sizeof(poly1305_key)); |
| 3077 ++ CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key), c20_c
tx->key, nonce, 0); |
| 3078 ++ |
| 3079 ++ CRYPTO_poly1305_init(&poly1305, poly1305_key); |
| 3080 ++ poly1305_update_with_length(&poly1305, ad, ad_len); |
| 3081 ++ poly1305_update_with_length(&poly1305, in, out_len); |
| 3082 ++ CRYPTO_poly1305_finish(&poly1305, mac); |
| 3083 ++ |
| 3084 ++ if (CRYPTO_memcmp(mac, in + out_len, c20_ctx->tag_len) != 0) |
| 3085 ++ { |
| 3086 ++ EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BAD_DECRYPT); |
| 3087 ++ return -1; |
| 3088 ++ } |
| 3089 ++ |
| 3090 ++ CRYPTO_chacha_20(out, in, out_len, c20_ctx->key, nonce, 1); |
| 3091 ++ return out_len; |
| 3092 ++ } |
| 3093 ++ |
| 3094 ++static const EVP_AEAD aead_chacha20_poly1305 = |
| 3095 ++ { |
| 3096 ++ 32, /* key len */ |
| 3097 ++ CHACHA20_NONCE_LEN, /* nonce len */ |
| 3098 ++ POLY1305_TAG_LEN, /* overhead */ |
| 3099 ++ POLY1305_TAG_LEN, /* max tag length */ |
| 3100 ++ |
| 3101 ++ aead_chacha20_poly1305_init, |
| 3102 ++ aead_chacha20_poly1305_cleanup, |
| 3103 ++ aead_chacha20_poly1305_seal, |
| 3104 ++ aead_chacha20_poly1305_open, |
| 3105 ++ }; |
| 3106 ++ |
| 3107 ++const EVP_AEAD *EVP_aead_chacha20_poly1305() |
| 3108 ++ { |
| 3109 ++ return &aead_chacha20_poly1305; |
| 3110 ++ } |
| 3111 ++ |
| 3112 ++#endif /* !OPENSSL_NO_CHACHA && !OPENSSL_NO_POLY1305 */ |
| 3113 +diff --git a/crypto/evp/evp.h b/crypto/evp/evp.h |
| 3114 +index bd10642..7dc1656 100644 |
| 3115 +--- a/crypto/evp/evp.h |
| 3116 ++++ b/crypto/evp/evp.h |
| 3117 +@@ -1258,6 +1258,11 @@ typedef struct evp_aead_st EVP_AEAD; |
| 3118 + const EVP_AEAD *EVP_aead_aes_128_gcm(void); |
| 3119 + #endif |
| 3120 + |
| 3121 ++#if !defined(OPENSSL_NO_CHACHA) && !defined(OPENSSL_NO_POLY1305) |
| 3122 ++/* EVP_aead_chacha20_poly1305 is ChaCha20 with a Poly1305 authenticator. */ |
| 3123 ++const EVP_AEAD *EVP_aead_chacha20_poly1305(void); |
| 3124 ++#endif |
| 3125 ++ |
| 3126 + /* EVP_AEAD_key_length returns the length, in bytes, of the keys used by |
| 3127 + * |aead|. */ |
| 3128 + size_t EVP_AEAD_key_length(const EVP_AEAD *aead); |
| 3129 +@@ -1360,6 +1365,9 @@ void ERR_load_EVP_strings(void); |
| 3130 + #define EVP_F_AEAD_AES_128_GCM_INIT 183 |
| 3131 + #define EVP_F_AEAD_AES_128_GCM_OPEN 181 |
| 3132 + #define EVP_F_AEAD_AES_128_GCM_SEAL 182 |
| 3133 ++#define EVP_F_AEAD_CHACHA20_POLY1305_INIT 187 |
| 3134 ++#define EVP_F_AEAD_CHACHA20_POLY1305_OPEN 184 |
| 3135 ++#define EVP_F_AEAD_CHACHA20_POLY1305_SEAL 183 |
| 3136 + #define EVP_F_AEAD_CTX_OPEN 185 |
| 3137 + #define EVP_F_AEAD_CTX_SEAL 186 |
| 3138 + #define EVP_F_AESNI_INIT_KEY 165 |
| 3139 +diff --git a/crypto/evp/evp_err.c b/crypto/evp/evp_err.c |
| 3140 +index c47969c..fb747e5 100644 |
| 3141 +--- a/crypto/evp/evp_err.c |
| 3142 ++++ b/crypto/evp/evp_err.c |
| 3143 +@@ -73,6 +73,9 @@ static ERR_STRING_DATA EVP_str_functs[]= |
| 3144 + {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_INIT), "AEAD_AES_128_GCM_INIT"}, |
| 3145 + {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_OPEN), "AEAD_AES_128_GCM_OPEN"}, |
| 3146 + {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_SEAL), "AEAD_AES_128_GCM_SEAL"}, |
| 3147 ++{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_INIT), "AEAD_CHACHA20_POLY1305_INIT"}, |
| 3148 ++{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_OPEN), "AEAD_CHACHA20_POLY1305_OPEN"}, |
| 3149 ++{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_SEAL), "AEAD_CHACHA20_POLY1305_SEAL"}, |
| 3150 + {ERR_FUNC(EVP_F_AEAD_CTX_OPEN), "AEAD_CTX_OPEN"}, |
| 3151 + {ERR_FUNC(EVP_F_AEAD_CTX_SEAL), "AEAD_CTX_SEAL"}, |
| 3152 + {ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"}, |
| 3153 +diff --git a/crypto/poly1305/Makefile b/crypto/poly1305/Makefile |
| 3154 +new file mode 100644 |
| 3155 +index 0000000..397d7cd |
| 3156 +--- /dev/null |
| 3157 ++++ b/crypto/poly1305/Makefile |
| 3158 +@@ -0,0 +1,81 @@ |
| 3159 ++# |
| 3160 ++# OpenSSL/crypto/poly1305/Makefile |
| 3161 ++# |
| 3162 ++ |
| 3163 ++DIR= poly1305 |
| 3164 ++TOP= ../.. |
| 3165 ++CC= cc |
| 3166 ++CPP= $(CC) -E |
| 3167 ++INCLUDES= |
| 3168 ++CFLAG=-g |
| 3169 ++AR= ar r |
| 3170 ++ |
| 3171 ++POLY1305=poly1305_vec.o |
| 3172 ++ |
| 3173 ++CFLAGS= $(INCLUDES) $(CFLAG) |
| 3174 ++ASFLAGS= $(INCLUDES) $(ASFLAG) |
| 3175 ++AFLAGS= $(ASFLAGS) |
| 3176 ++ |
| 3177 ++GENERAL=Makefile |
| 3178 ++TEST= |
| 3179 ++APPS= |
| 3180 ++ |
| 3181 ++LIB=$(TOP)/libcrypto.a |
| 3182 ++LIBSRC=poly1305_vec.c |
| 3183 ++LIBOBJ=$(POLY1305) |
| 3184 ++ |
| 3185 ++SRC= $(LIBSRC) |
| 3186 ++ |
| 3187 ++EXHEADER=poly1305.h |
| 3188 ++HEADER= $(EXHEADER) |
| 3189 ++ |
| 3190 ++ALL= $(GENERAL) $(SRC) $(HEADER) |
| 3191 ++ |
| 3192 ++top: |
| 3193 ++ (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all) |
| 3194 ++ |
| 3195 ++all: lib |
| 3196 ++ |
| 3197 ++lib: $(LIBOBJ) |
| 3198 ++ $(AR) $(LIB) $(LIBOBJ) |
| 3199 ++ $(RANLIB) $(LIB) || echo Never mind. |
| 3200 ++ @touch lib |
| 3201 ++ |
| 3202 ++files: |
| 3203 ++ $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO |
| 3204 ++ |
| 3205 ++links: |
| 3206 ++ @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER) |
| 3207 ++ @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST) |
| 3208 ++ @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS) |
| 3209 ++ |
| 3210 ++install: |
| 3211 ++ @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile... |
| 3212 ++ @headerlist="$(EXHEADER)"; for i in $$headerlist ; \ |
| 3213 ++ do \ |
| 3214 ++ (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \ |
| 3215 ++ chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \ |
| 3216 ++ done; |
| 3217 ++ |
| 3218 ++tags: |
| 3219 ++ ctags $(SRC) |
| 3220 ++ |
| 3221 ++tests: |
| 3222 ++ |
| 3223 ++lint: |
| 3224 ++ lint -DLINT $(INCLUDES) $(SRC)>fluff |
| 3225 ++ |
| 3226 ++depend: |
| 3227 ++ @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... |
| 3228 ++ $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) |
| 3229 ++ |
| 3230 ++dclean: |
| 3231 ++ $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKE
FILE) >Makefile.new |
| 3232 ++ mv -f Makefile.new $(MAKEFILE) |
| 3233 ++ |
| 3234 ++clean: |
| 3235 ++ rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff |
| 3236 ++ |
| 3237 ++# DO NOT DELETE THIS LINE -- make depend depends on it. |
| 3238 ++ |
| 3239 ++poly1305_vec.o: ../../include/openssl/poly1305.h poly1305_vec.c |
| 3240 +diff --git a/crypto/poly1305/poly1305.c b/crypto/poly1305/poly1305.c |
| 3241 +new file mode 100644 |
| 3242 +index 0000000..2e5621d |
| 3243 +--- /dev/null |
| 3244 ++++ b/crypto/poly1305/poly1305.c |
| 3245 +@@ -0,0 +1,320 @@ |
| 3246 ++/* ==================================================================== |
| 3247 ++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. |
| 3248 ++ * |
| 3249 ++ * Redistribution and use in source and binary forms, with or without |
| 3250 ++ * modification, are permitted provided that the following conditions |
| 3251 ++ * are met: |
| 3252 ++ * |
| 3253 ++ * 1. Redistributions of source code must retain the above copyright |
| 3254 ++ * notice, this list of conditions and the following disclaimer. |
| 3255 ++ * |
| 3256 ++ * 2. Redistributions in binary form must reproduce the above copyright |
| 3257 ++ * notice, this list of conditions and the following disclaimer in |
| 3258 ++ * the documentation and/or other materials provided with the |
| 3259 ++ * distribution. |
| 3260 ++ * |
| 3261 ++ * 3. All advertising materials mentioning features or use of this |
| 3262 ++ * software must display the following acknowledgment: |
| 3263 ++ * "This product includes software developed by the OpenSSL Project |
| 3264 ++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" |
| 3265 ++ * |
| 3266 ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
| 3267 ++ * endorse or promote products derived from this software without |
| 3268 ++ * prior written permission. For written permission, please contact |
| 3269 ++ * licensing@OpenSSL.org. |
| 3270 ++ * |
| 3271 ++ * 5. Products derived from this software may not be called "OpenSSL" |
| 3272 ++ * nor may "OpenSSL" appear in their names without prior written |
| 3273 ++ * permission of the OpenSSL Project. |
| 3274 ++ * |
| 3275 ++ * 6. Redistributions of any form whatsoever must retain the following |
| 3276 ++ * acknowledgment: |
| 3277 ++ * "This product includes software developed by the OpenSSL Project |
| 3278 ++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" |
| 3279 ++ * |
| 3280 ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
| 3281 ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 3282 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 3283 ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
| 3284 ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 3285 ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 3286 ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 3287 ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 3288 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 3289 ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 3290 ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 3291 ++ * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 3292 ++ * ==================================================================== |
| 3293 ++ */ |
| 3294 ++ |
| 3295 ++/* This implementation of poly1305 is by Andrew Moon |
| 3296 ++ * (https://github.com/floodyberry/poly1305-donna) and released as public |
| 3297 ++ * domain. */ |
| 3298 ++ |
| 3299 ++#include <string.h> |
| 3300 ++#include <stdint.h> |
| 3301 ++#include <openssl/opensslconf.h> |
| 3302 ++ |
| 3303 ++#if !defined(OPENSSL_NO_POLY1305) |
| 3304 ++ |
| 3305 ++#include <openssl/poly1305.h> |
| 3306 ++ |
| 3307 ++#if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86
_64__) |
| 3308 ++/* We can assume little-endian. */ |
| 3309 ++static uint32_t U8TO32_LE(const unsigned char *m) |
| 3310 ++ { |
| 3311 ++ uint32_t r; |
| 3312 ++ memcpy(&r, m, sizeof(r)); |
| 3313 ++ return r; |
| 3314 ++ } |
| 3315 ++ |
| 3316 ++static void U32TO8_LE(unsigned char *m, uint32_t v) |
| 3317 ++ { |
| 3318 ++ memcpy(m, &v, sizeof(v)); |
| 3319 ++ } |
| 3320 ++#else |
| 3321 ++static uint32_t U8TO32_LE(const unsigned char *m) |
| 3322 ++ { |
| 3323 ++ return (uint32_t)m[0] | |
| 3324 ++ (uint32_t)m[1] << 8 | |
| 3325 ++ (uint32_t)m[2] << 16 | |
| 3326 ++ (uint32_t)m[3] << 24; |
| 3327 ++ } |
| 3328 ++ |
| 3329 ++static void U32TO8_LE(unsigned char *m, uint32_t v) |
| 3330 ++ { |
| 3331 ++ m[0] = v; |
| 3332 ++ m[1] = v >> 8; |
| 3333 ++ m[2] = v >> 16; |
| 3334 ++ m[3] = v >> 24; |
| 3335 ++ } |
| 3336 ++#endif |
| 3337 ++ |
| 3338 ++static uint64_t |
| 3339 ++mul32x32_64(uint32_t a, uint32_t b) |
| 3340 ++ { |
| 3341 ++ return (uint64_t)a * b; |
| 3342 ++ } |
| 3343 ++ |
| 3344 ++ |
| 3345 ++struct poly1305_state_st |
| 3346 ++ { |
| 3347 ++ uint32_t r0,r1,r2,r3,r4; |
| 3348 ++ uint32_t s1,s2,s3,s4; |
| 3349 ++ uint32_t h0,h1,h2,h3,h4; |
| 3350 ++ unsigned char buf[16]; |
| 3351 ++ unsigned int buf_used; |
| 3352 ++ unsigned char key[16]; |
| 3353 ++ }; |
| 3354 ++ |
| 3355 ++/* poly1305_blocks updates |state| given some amount of input data. This |
| 3356 ++ * function may only be called with a |len| that is not a multiple of 16 at th
e |
| 3357 ++ * end of the data. Otherwise the input must be buffered into 16 byte blocks. |
| 3358 ++ * */ |
| 3359 ++static void poly1305_update(struct poly1305_state_st *state, |
| 3360 ++ const unsigned char *in, size_t len) |
| 3361 ++ { |
| 3362 ++ uint32_t t0,t1,t2,t3; |
| 3363 ++ uint64_t t[5]; |
| 3364 ++ uint32_t b; |
| 3365 ++ uint64_t c; |
| 3366 ++ size_t j; |
| 3367 ++ unsigned char mp[16]; |
| 3368 ++ |
| 3369 ++ if (len < 16) |
| 3370 ++ goto poly1305_donna_atmost15bytes; |
| 3371 ++ |
| 3372 ++poly1305_donna_16bytes: |
| 3373 ++ t0 = U8TO32_LE(in); |
| 3374 ++ t1 = U8TO32_LE(in+4); |
| 3375 ++ t2 = U8TO32_LE(in+8); |
| 3376 ++ t3 = U8TO32_LE(in+12); |
| 3377 ++ |
| 3378 ++ in += 16; |
| 3379 ++ len -= 16; |
| 3380 ++ |
| 3381 ++ state->h0 += t0 & 0x3ffffff; |
| 3382 ++ state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; |
| 3383 ++ state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; |
| 3384 ++ state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; |
| 3385 ++ state->h4 += (t3 >> 8) | (1 << 24); |
| 3386 ++ |
| 3387 ++poly1305_donna_mul: |
| 3388 ++ t[0] = mul32x32_64(state->h0,state->r0) + |
| 3389 ++ mul32x32_64(state->h1,state->s4) + |
| 3390 ++ mul32x32_64(state->h2,state->s3) + |
| 3391 ++ mul32x32_64(state->h3,state->s2) + |
| 3392 ++ mul32x32_64(state->h4,state->s1); |
| 3393 ++ t[1] = mul32x32_64(state->h0,state->r1) + |
| 3394 ++ mul32x32_64(state->h1,state->r0) + |
| 3395 ++ mul32x32_64(state->h2,state->s4) + |
| 3396 ++ mul32x32_64(state->h3,state->s3) + |
| 3397 ++ mul32x32_64(state->h4,state->s2); |
| 3398 ++ t[2] = mul32x32_64(state->h0,state->r2) + |
| 3399 ++ mul32x32_64(state->h1,state->r1) + |
| 3400 ++ mul32x32_64(state->h2,state->r0) + |
| 3401 ++ mul32x32_64(state->h3,state->s4) + |
| 3402 ++ mul32x32_64(state->h4,state->s3); |
| 3403 ++ t[3] = mul32x32_64(state->h0,state->r3) + |
| 3404 ++ mul32x32_64(state->h1,state->r2) + |
| 3405 ++ mul32x32_64(state->h2,state->r1) + |
| 3406 ++ mul32x32_64(state->h3,state->r0) + |
| 3407 ++ mul32x32_64(state->h4,state->s4); |
| 3408 ++ t[4] = mul32x32_64(state->h0,state->r4) + |
| 3409 ++ mul32x32_64(state->h1,state->r3) + |
| 3410 ++ mul32x32_64(state->h2,state->r2) + |
| 3411 ++ mul32x32_64(state->h3,state->r1) + |
| 3412 ++ mul32x32_64(state->h4,state->r0); |
| 3413 ++ |
| 3414 ++ state->h0 = (uint32_t)t[0] & 0x3ffffff; c = (t[0] >
> 26); |
| 3415 ++ t[1] += c; state->h1 = (uint32_t)t[1] & 0x3ffffff; b = (uint32_t)(t[1] >
> 26); |
| 3416 ++ t[2] += b; state->h2 = (uint32_t)t[2] & 0x3ffffff; b = (uint32_t)(t[2] >
> 26); |
| 3417 ++ t[3] += b; state->h3 = (uint32_t)t[3] & 0x3ffffff; b = (uint32_t)(t[3] >
> 26); |
| 3418 ++ t[4] += b; state->h4 = (uint32_t)t[4] & 0x3ffffff; b = (uint32_t)(t[4] >
> 26); |
| 3419 ++ state->h0 += b * 5; |
| 3420 ++ |
| 3421 ++ if (len >= 16) |
| 3422 ++ goto poly1305_donna_16bytes; |
| 3423 ++ |
| 3424 ++ /* final bytes */ |
| 3425 ++poly1305_donna_atmost15bytes: |
| 3426 ++ if (!len) |
| 3427 ++ return; |
| 3428 ++ |
| 3429 ++ for (j = 0; j < len; j++) |
| 3430 ++ mp[j] = in[j]; |
| 3431 ++ mp[j++] = 1; |
| 3432 ++ for (; j < 16; j++) |
| 3433 ++ mp[j] = 0; |
| 3434 ++ len = 0; |
| 3435 ++ |
| 3436 ++ t0 = U8TO32_LE(mp+0); |
| 3437 ++ t1 = U8TO32_LE(mp+4); |
| 3438 ++ t2 = U8TO32_LE(mp+8); |
| 3439 ++ t3 = U8TO32_LE(mp+12); |
| 3440 ++ |
| 3441 ++ state->h0 += t0 & 0x3ffffff; |
| 3442 ++ state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; |
| 3443 ++ state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; |
| 3444 ++ state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; |
| 3445 ++ state->h4 += (t3 >> 8); |
| 3446 ++ |
| 3447 ++ goto poly1305_donna_mul; |
| 3448 ++ } |
| 3449 ++ |
| 3450 ++void CRYPTO_poly1305_init(poly1305_state *statep, const unsigned char key[32]) |
| 3451 ++ { |
| 3452 ++ struct poly1305_state_st *state = (struct poly1305_state_st*) statep; |
| 3453 ++ uint32_t t0,t1,t2,t3; |
| 3454 ++ |
| 3455 ++ t0 = U8TO32_LE(key+0); |
| 3456 ++ t1 = U8TO32_LE(key+4); |
| 3457 ++ t2 = U8TO32_LE(key+8); |
| 3458 ++ t3 = U8TO32_LE(key+12); |
| 3459 ++ |
| 3460 ++ /* precompute multipliers */ |
| 3461 ++ state->r0 = t0 & 0x3ffffff; t0 >>= 26; t0 |= t1 << 6; |
| 3462 ++ state->r1 = t0 & 0x3ffff03; t1 >>= 20; t1 |= t2 << 12; |
| 3463 ++ state->r2 = t1 & 0x3ffc0ff; t2 >>= 14; t2 |= t3 << 18; |
| 3464 ++ state->r3 = t2 & 0x3f03fff; t3 >>= 8; |
| 3465 ++ state->r4 = t3 & 0x00fffff; |
| 3466 ++ |
| 3467 ++ state->s1 = state->r1 * 5; |
| 3468 ++ state->s2 = state->r2 * 5; |
| 3469 ++ state->s3 = state->r3 * 5; |
| 3470 ++ state->s4 = state->r4 * 5; |
| 3471 ++ |
| 3472 ++ /* init state */ |
| 3473 ++ state->h0 = 0; |
| 3474 ++ state->h1 = 0; |
| 3475 ++ state->h2 = 0; |
| 3476 ++ state->h3 = 0; |
| 3477 ++ state->h4 = 0; |
| 3478 ++ |
| 3479 ++ state->buf_used = 0; |
| 3480 ++ memcpy(state->key, key + 16, sizeof(state->key)); |
| 3481 ++ } |
| 3482 ++ |
| 3483 ++void CRYPTO_poly1305_update(poly1305_state *statep, const unsigned char *in, |
| 3484 ++ size_t in_len) |
| 3485 ++ { |
| 3486 ++ unsigned int i; |
| 3487 ++ struct poly1305_state_st *state = (struct poly1305_state_st*) statep; |
| 3488 ++ |
| 3489 ++ if (state->buf_used) |
| 3490 ++ { |
| 3491 ++ unsigned int todo = 16 - state->buf_used; |
| 3492 ++ if (todo > in_len) |
| 3493 ++ todo = in_len; |
| 3494 ++ for (i = 0; i < todo; i++) |
| 3495 ++ state->buf[state->buf_used + i] = in[i]; |
| 3496 ++ state->buf_used += todo; |
| 3497 ++ in_len -= todo; |
| 3498 ++ in += todo; |
| 3499 ++ |
| 3500 ++ if (state->buf_used == 16) |
| 3501 ++ { |
| 3502 ++ poly1305_update(state, state->buf, 16); |
| 3503 ++ state->buf_used = 0; |
| 3504 ++ } |
| 3505 ++ } |
| 3506 ++ |
| 3507 ++ if (in_len >= 16) |
| 3508 ++ { |
| 3509 ++ size_t todo = in_len & ~0xf; |
| 3510 ++ poly1305_update(state, in, todo); |
| 3511 ++ in += todo; |
| 3512 ++ in_len &= 0xf; |
| 3513 ++ } |
| 3514 ++ |
| 3515 ++ if (in_len) |
| 3516 ++ { |
| 3517 ++ for (i = 0; i < in_len; i++) |
| 3518 ++ state->buf[i] = in[i]; |
| 3519 ++ state->buf_used = in_len; |
| 3520 ++ } |
| 3521 ++ } |
| 3522 ++ |
| 3523 ++void CRYPTO_poly1305_finish(poly1305_state *statep, unsigned char mac[16]) |
| 3524 ++ { |
| 3525 ++ struct poly1305_state_st *state = (struct poly1305_state_st*) statep; |
| 3526 ++ uint64_t f0,f1,f2,f3; |
| 3527 ++ uint32_t g0,g1,g2,g3,g4; |
| 3528 ++ uint32_t b, nb; |
| 3529 ++ |
| 3530 ++ if (state->buf_used) |
| 3531 ++ poly1305_update(state, state->buf, state->buf_used); |
| 3532 ++ |
| 3533 ++ b = state->h0 >> 26; state->h0 = state->h0 & 0x3ffff
ff; |
| 3534 ++ state->h1 += b; b = state->h1 >> 26; state->h1 = state->h1 & 0x3ffff
ff; |
| 3535 ++ state->h2 += b; b = state->h2 >> 26; state->h2 = state->h2 & 0x3ffff
ff; |
| 3536 ++ state->h3 += b; b = state->h3 >> 26; state->h3 = state->h3 & 0x3ffff
ff; |
| 3537 ++ state->h4 += b; b = state->h4 >> 26; state->h4 = state->h4 & 0x3ffff
ff; |
| 3538 ++ state->h0 += b * 5; |
| 3539 ++ |
| 3540 ++ g0 = state->h0 + 5; b = g0 >> 26; g0 &= 0x3ffffff; |
| 3541 ++ g1 = state->h1 + b; b = g1 >> 26; g1 &= 0x3ffffff; |
| 3542 ++ g2 = state->h2 + b; b = g2 >> 26; g2 &= 0x3ffffff; |
| 3543 ++ g3 = state->h3 + b; b = g3 >> 26; g3 &= 0x3ffffff; |
| 3544 ++ g4 = state->h4 + b - (1 << 26); |
| 3545 ++ |
| 3546 ++ b = (g4 >> 31) - 1; |
| 3547 ++ nb = ~b; |
| 3548 ++ state->h0 = (state->h0 & nb) | (g0 & b); |
| 3549 ++ state->h1 = (state->h1 & nb) | (g1 & b); |
| 3550 ++ state->h2 = (state->h2 & nb) | (g2 & b); |
| 3551 ++ state->h3 = (state->h3 & nb) | (g3 & b); |
| 3552 ++ state->h4 = (state->h4 & nb) | (g4 & b); |
| 3553 ++ |
| 3554 ++ f0 = ((state->h0 ) | (state->h1 << 26)) + (uint64_t)U8TO32_LE(&stat
e->key[0]); |
| 3555 ++ f1 = ((state->h1 >> 6) | (state->h2 << 20)) + (uint64_t)U8TO32_LE(&stat
e->key[4]); |
| 3556 ++ f2 = ((state->h2 >> 12) | (state->h3 << 14)) + (uint64_t)U8TO32_LE(&stat
e->key[8]); |
| 3557 ++ f3 = ((state->h3 >> 18) | (state->h4 << 8)) + (uint64_t)U8TO32_LE(&stat
e->key[12]); |
| 3558 ++ |
| 3559 ++ U32TO8_LE(&mac[ 0], f0); f1 += (f0 >> 32); |
| 3560 ++ U32TO8_LE(&mac[ 4], f1); f2 += (f1 >> 32); |
| 3561 ++ U32TO8_LE(&mac[ 8], f2); f3 += (f2 >> 32); |
| 3562 ++ U32TO8_LE(&mac[12], f3); |
| 3563 ++ } |
| 3564 ++ |
| 3565 ++#endif /* !OPENSSL_NO_POLY1305 */ |
| 3566 +diff --git a/crypto/poly1305/poly1305.h b/crypto/poly1305/poly1305.h |
| 3567 +new file mode 100644 |
| 3568 +index 0000000..28f85ed |
| 3569 +--- /dev/null |
| 3570 ++++ b/crypto/poly1305/poly1305.h |
| 3571 +@@ -0,0 +1,88 @@ |
| 3572 ++/* |
| 3573 ++ * Poly1305 |
| 3574 ++ * |
| 3575 ++ * Created on: Jun, 2013 |
| 3576 ++ * Author: Elie Bursztein (elieb@google.com) |
| 3577 ++ * |
| 3578 ++ * Adapted from the estream code by D. Bernstein. |
| 3579 ++ */ |
| 3580 ++/* ==================================================================== |
| 3581 ++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. |
| 3582 ++ * |
| 3583 ++ * Redistribution and use in source and binary forms, with or without |
| 3584 ++ * modification, are permitted provided that the following conditions |
| 3585 ++ * are met: |
| 3586 ++ * |
| 3587 ++ * 1. Redistributions of source code must retain the above copyright |
| 3588 ++ * notice, this list of conditions and the following disclaimer. |
| 3589 ++ * |
| 3590 ++ * 2. Redistributions in binary form must reproduce the above copyright |
| 3591 ++ * notice, this list of conditions and the following disclaimer in |
| 3592 ++ * the documentation and/or other materials provided with the |
| 3593 ++ * distribution. |
| 3594 ++ * |
| 3595 ++ * 3. All advertising materials mentioning features or use of this |
| 3596 ++ * software must display the following acknowledgment: |
| 3597 ++ * "This product includes software developed by the OpenSSL Project |
| 3598 ++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" |
| 3599 ++ * |
| 3600 ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
| 3601 ++ * endorse or promote products derived from this software without |
| 3602 ++ * prior written permission. For written permission, please contact |
| 3603 ++ * licensing@OpenSSL.org. |
| 3604 ++ * |
| 3605 ++ * 5. Products derived from this software may not be called "OpenSSL" |
| 3606 ++ * nor may "OpenSSL" appear in their names without prior written |
| 3607 ++ * permission of the OpenSSL Project. |
| 3608 ++ * |
| 3609 ++ * 6. Redistributions of any form whatsoever must retain the following |
| 3610 ++ * acknowledgment: |
| 3611 ++ * "This product includes software developed by the OpenSSL Project |
| 3612 ++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" |
| 3613 ++ * |
| 3614 ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
| 3615 ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 3616 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 3617 ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
| 3618 ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 3619 ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 3620 ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 3621 ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 3622 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 3623 ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 3624 ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 3625 ++ * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 3626 ++ * ==================================================================== |
| 3627 ++ */ |
| 3628 ++ |
| 3629 ++#ifndef HEADER_POLY1305_H_ |
| 3630 ++#define HEADER_POLY1305_H_ |
| 3631 ++ |
| 3632 ++#include <stdint.h> |
| 3633 ++#include <openssl/opensslconf.h> |
| 3634 ++ |
| 3635 ++#if defined(OPENSSL_NO_POLY1305) |
| 3636 ++#error Poly1305 support is disabled. |
| 3637 ++#endif |
| 3638 ++ |
| 3639 ++typedef unsigned char poly1305_state[512]; |
| 3640 ++ |
| 3641 ++/* poly1305_init sets up |state| so that it can be used to calculate an |
| 3642 ++ * authentication tag with the one-time key |key|. Note that |key| is a |
| 3643 ++ * one-time key and therefore there is no `reset' method because that would |
| 3644 ++ * enable several messages to be authenticated with the same key. */ |
| 3645 ++extern void CRYPTO_poly1305_init(poly1305_state* state, |
| 3646 ++ const unsigned char key[32]); |
| 3647 ++ |
| 3648 ++/* poly1305_update processes |in_len| bytes from |in|. It can be called zero o
r |
| 3649 ++ * more times after poly1305_init. */ |
| 3650 ++extern void CRYPTO_poly1305_update(poly1305_state* state, |
| 3651 ++ const unsigned char *in, |
| 3652 ++ size_t in_len); |
| 3653 ++ |
| 3654 ++/* poly1305_finish completes the poly1305 calculation and writes a 16 byte |
| 3655 ++ * authentication tag to |mac|. */ |
| 3656 ++extern void CRYPTO_poly1305_finish(poly1305_state* state, |
| 3657 ++ unsigned char mac[16]); |
| 3658 ++ |
| 3659 ++#endif /* HEADER_POLY1305_H_ */ |
| 3660 +diff --git a/crypto/poly1305/poly1305_arm.c b/crypto/poly1305/poly1305_arm.c |
| 3661 +new file mode 100644 |
| 3662 +index 0000000..adcef35 |
| 3663 +--- /dev/null |
| 3664 ++++ b/crypto/poly1305/poly1305_arm.c |
| 3665 +@@ -0,0 +1,335 @@ |
| 3666 ++/* ==================================================================== |
| 3667 ++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. |
| 3668 ++ * |
| 3669 ++ * Redistribution and use in source and binary forms, with or without |
| 3670 ++ * modification, are permitted provided that the following conditions |
| 3671 ++ * are met: |
| 3672 ++ * |
| 3673 ++ * 1. Redistributions of source code must retain the above copyright |
| 3674 ++ * notice, this list of conditions and the following disclaimer. |
| 3675 ++ * |
| 3676 ++ * 2. Redistributions in binary form must reproduce the above copyright |
| 3677 ++ * notice, this list of conditions and the following disclaimer in |
| 3678 ++ * the documentation and/or other materials provided with the |
| 3679 ++ * distribution. |
| 3680 ++ * |
| 3681 ++ * 3. All advertising materials mentioning features or use of this |
| 3682 ++ * software must display the following acknowledgment: |
| 3683 ++ * "This product includes software developed by the OpenSSL Project |
| 3684 ++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" |
| 3685 ++ * |
| 3686 ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
| 3687 ++ * endorse or promote products derived from this software without |
| 3688 ++ * prior written permission. For written permission, please contact |
| 3689 ++ * licensing@OpenSSL.org. |
| 3690 ++ * |
| 3691 ++ * 5. Products derived from this software may not be called "OpenSSL" |
| 3692 ++ * nor may "OpenSSL" appear in their names without prior written |
| 3693 ++ * permission of the OpenSSL Project. |
| 3694 ++ * |
| 3695 ++ * 6. Redistributions of any form whatsoever must retain the following |
| 3696 ++ * acknowledgment: |
| 3697 ++ * "This product includes software developed by the OpenSSL Project |
| 3698 ++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" |
| 3699 ++ * |
| 3700 ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
| 3701 ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 3702 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 3703 ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
| 3704 ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 3705 ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 3706 ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 3707 ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 3708 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 3709 ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 3710 ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 3711 ++ * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 3712 ++ * ==================================================================== |
| 3713 ++ */ |
| 3714 ++ |
| 3715 ++/* This implementation was taken from the public domain, neon2 version in |
| 3716 ++ * SUPERCOP by D. J. Bernstein and Peter Schwabe. */ |
| 3717 ++ |
| 3718 ++#include <stdint.h> |
| 3719 ++ |
| 3720 ++#include <openssl/poly1305.h> |
| 3721 ++ |
| 3722 ++#if !defined(OPENSSL_NO_POLY1305) |
| 3723 ++ |
| 3724 ++typedef struct { |
| 3725 ++ uint32_t v[12]; /* for alignment; only using 10 */ |
| 3726 ++} fe1305x2; |
| 3727 ++ |
| 3728 ++#define addmulmod openssl_poly1305_neon2_addmulmod |
| 3729 ++#define blocks openssl_poly1305_neon2_blocks |
| 3730 ++ |
| 3731 ++extern void addmulmod(fe1305x2 *r, const fe1305x2 *x, const fe1305x2 *y, const
fe1305x2 *c); |
| 3732 ++ |
| 3733 ++extern int blocks(fe1305x2 *h, const fe1305x2 *precomp, const unsigned char *i
n, unsigned int inlen); |
| 3734 ++ |
| 3735 ++static void freeze(fe1305x2 *r) |
| 3736 ++ { |
| 3737 ++ int i; |
| 3738 ++ |
| 3739 ++ uint32_t x0 = r->v[0]; |
| 3740 ++ uint32_t x1 = r->v[2]; |
| 3741 ++ uint32_t x2 = r->v[4]; |
| 3742 ++ uint32_t x3 = r->v[6]; |
| 3743 ++ uint32_t x4 = r->v[8]; |
| 3744 ++ uint32_t y0; |
| 3745 ++ uint32_t y1; |
| 3746 ++ uint32_t y2; |
| 3747 ++ uint32_t y3; |
| 3748 ++ uint32_t y4; |
| 3749 ++ uint32_t swap; |
| 3750 ++ |
| 3751 ++ for (i = 0;i < 3;++i) |
| 3752 ++ { |
| 3753 ++ x1 += x0 >> 26; x0 &= 0x3ffffff; |
| 3754 ++ x2 += x1 >> 26; x1 &= 0x3ffffff; |
| 3755 ++ x3 += x2 >> 26; x2 &= 0x3ffffff; |
| 3756 ++ x4 += x3 >> 26; x3 &= 0x3ffffff; |
| 3757 ++ x0 += 5*(x4 >> 26); x4 &= 0x3ffffff; |
| 3758 ++ } |
| 3759 ++ |
| 3760 ++ y0 = x0 + 5; |
| 3761 ++ y1 = x1 + (y0 >> 26); y0 &= 0x3ffffff; |
| 3762 ++ y2 = x2 + (y1 >> 26); y1 &= 0x3ffffff; |
| 3763 ++ y3 = x3 + (y2 >> 26); y2 &= 0x3ffffff; |
| 3764 ++ y4 = x4 + (y3 >> 26); y3 &= 0x3ffffff; |
| 3765 ++ swap = -(y4 >> 26); y4 &= 0x3ffffff; |
| 3766 ++ |
| 3767 ++ y0 ^= x0; |
| 3768 ++ y1 ^= x1; |
| 3769 ++ y2 ^= x2; |
| 3770 ++ y3 ^= x3; |
| 3771 ++ y4 ^= x4; |
| 3772 ++ |
| 3773 ++ y0 &= swap; |
| 3774 ++ y1 &= swap; |
| 3775 ++ y2 &= swap; |
| 3776 ++ y3 &= swap; |
| 3777 ++ y4 &= swap; |
| 3778 ++ |
| 3779 ++ y0 ^= x0; |
| 3780 ++ y1 ^= x1; |
| 3781 ++ y2 ^= x2; |
| 3782 ++ y3 ^= x3; |
| 3783 ++ y4 ^= x4; |
| 3784 ++ |
| 3785 ++ r->v[0] = y0; |
| 3786 ++ r->v[2] = y1; |
| 3787 ++ r->v[4] = y2; |
| 3788 ++ r->v[6] = y3; |
| 3789 ++ r->v[8] = y4; |
| 3790 ++ } |
| 3791 ++ |
| 3792 ++static void fe1305x2_tobytearray(unsigned char *r, fe1305x2 *x) |
| 3793 ++ { |
| 3794 ++ uint32_t x0 = x->v[0]; |
| 3795 ++ uint32_t x1 = x->v[2]; |
| 3796 ++ uint32_t x2 = x->v[4]; |
| 3797 ++ uint32_t x3 = x->v[6]; |
| 3798 ++ uint32_t x4 = x->v[8]; |
| 3799 ++ |
| 3800 ++ x1 += x0 >> 26; |
| 3801 ++ x0 &= 0x3ffffff; |
| 3802 ++ x2 += x1 >> 26; |
| 3803 ++ x1 &= 0x3ffffff; |
| 3804 ++ x3 += x2 >> 26; |
| 3805 ++ x2 &= 0x3ffffff; |
| 3806 ++ x4 += x3 >> 26; |
| 3807 ++ x3 &= 0x3ffffff; |
| 3808 ++ |
| 3809 ++ *(uint32_t *) r = x0 + (x1 << 26); |
| 3810 ++ *(uint32_t *) (r + 4) = (x1 >> 6) + (x2 << 20); |
| 3811 ++ *(uint32_t *) (r + 8) = (x2 >> 12) + (x3 << 14); |
| 3812 ++ *(uint32_t *) (r + 12) = (x3 >> 18) + (x4 << 8); |
| 3813 ++ } |
| 3814 ++ |
| 3815 ++/* load32 exists to avoid breaking strict aliasing rules in |
| 3816 ++ * fe1305x2_frombytearray. */ |
| 3817 ++static uint32_t load32(unsigned char *t) |
| 3818 ++ { |
| 3819 ++ uint32_t tmp; |
| 3820 ++ memcpy(&tmp, t, sizeof(tmp)); |
| 3821 ++ return tmp; |
| 3822 ++ } |
| 3823 ++ |
| 3824 ++static void fe1305x2_frombytearray(fe1305x2 *r, const unsigned char *x, unsign
ed long long xlen) |
| 3825 ++ { |
| 3826 ++ int i; |
| 3827 ++ unsigned char t[17]; |
| 3828 ++ |
| 3829 ++ for (i = 0; (i < 16) && (i < xlen); i++) |
| 3830 ++ t[i] = x[i]; |
| 3831 ++ xlen -= i; |
| 3832 ++ x += i; |
| 3833 ++ t[i++] = 1; |
| 3834 ++ for (; i<17; i++) |
| 3835 ++ t[i] = 0; |
| 3836 ++ |
| 3837 ++ r->v[0] = 0x3ffffff & load32(t); |
| 3838 ++ r->v[2] = 0x3ffffff & (load32(t + 3) >> 2); |
| 3839 ++ r->v[4] = 0x3ffffff & (load32(t + 6) >> 4); |
| 3840 ++ r->v[6] = 0x3ffffff & (load32(t + 9) >> 6); |
| 3841 ++ r->v[8] = load32(t + 13); |
| 3842 ++ |
| 3843 ++ if (xlen) |
| 3844 ++ { |
| 3845 ++ for (i = 0; (i < 16) && (i < xlen); i++) |
| 3846 ++ t[i] = x[i]; |
| 3847 ++ t[i++] = 1; |
| 3848 ++ for (; i<17; i++) |
| 3849 ++ t[i] = 0; |
| 3850 ++ |
| 3851 ++ r->v[1] = 0x3ffffff & load32(t); |
| 3852 ++ r->v[3] = 0x3ffffff & (load32(t + 3) >> 2); |
| 3853 ++ r->v[5] = 0x3ffffff & (load32(t + 6) >> 4); |
| 3854 ++ r->v[7] = 0x3ffffff & (load32(t + 9) >> 6); |
| 3855 ++ r->v[9] = load32(t + 13); |
| 3856 ++ } |
| 3857 ++ else |
| 3858 ++ r->v[1] = r->v[3] = r->v[5] = r->v[7] = r->v[9] = 0; |
| 3859 ++ } |
| 3860 ++ |
| 3861 ++static const fe1305x2 zero __attribute__ ((aligned (16))); |
| 3862 ++ |
| 3863 ++struct poly1305_state_st { |
| 3864 ++ unsigned char data[sizeof(fe1305x2[5]) + 128]; |
| 3865 ++ unsigned char buf[32]; |
| 3866 ++ unsigned int buf_used; |
| 3867 ++ unsigned char key[16]; |
| 3868 ++}; |
| 3869 ++ |
| 3870 ++void CRYPTO_poly1305_init(poly1305_state *state, const unsigned char key[32]) |
| 3871 ++ { |
| 3872 ++ struct poly1305_state_st *st = (struct poly1305_state_st*) (state); |
| 3873 ++ fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); |
| 3874 ++ fe1305x2 *const h = r + 1; |
| 3875 ++ fe1305x2 *const c = h + 1; |
| 3876 ++ fe1305x2 *const precomp = c + 1; |
| 3877 ++ unsigned int j; |
| 3878 ++ |
| 3879 ++ r->v[1] = r->v[0] = 0x3ffffff & *(uint32_t *) key; |
| 3880 ++ r->v[3] = r->v[2] = 0x3ffff03 & ((*(uint32_t *) (key + 3)) >> 2); |
| 3881 ++ r->v[5] = r->v[4] = 0x3ffc0ff & ((*(uint32_t *) (key + 6)) >> 4); |
| 3882 ++ r->v[7] = r->v[6] = 0x3f03fff & ((*(uint32_t *) (key + 9)) >> 6); |
| 3883 ++ r->v[9] = r->v[8] = 0x00fffff & ((*(uint32_t *) (key + 12)) >> 8); |
| 3884 ++ |
| 3885 ++ for (j = 0; j < 10; j++) |
| 3886 ++ h->v[j] = 0; /* XXX: should fast-forward a bit */ |
| 3887 ++ |
| 3888 ++ addmulmod(precomp,r,r,&zero); /* precompute r^2 */ |
| 3889 ++ addmulmod(precomp + 1,precomp,precomp,&zero); /* precompute r^4 */ |
| 3890 ++ |
| 3891 ++ memcpy(st->key, key + 16, 16); |
| 3892 ++ st->buf_used = 0; |
| 3893 ++ } |
| 3894 ++ |
| 3895 ++void CRYPTO_poly1305_update(poly1305_state *state, const unsigned char *in, si
ze_t in_len) |
| 3896 ++ { |
| 3897 ++ struct poly1305_state_st *st = (struct poly1305_state_st*) (state); |
| 3898 ++ fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); |
| 3899 ++ fe1305x2 *const h = r + 1; |
| 3900 ++ fe1305x2 *const c = h + 1; |
| 3901 ++ fe1305x2 *const precomp = c + 1; |
| 3902 ++ unsigned int i; |
| 3903 ++ unsigned char data[sizeof(fe1305x2) + 16]; |
| 3904 ++ fe1305x2 *const r2r = (fe1305x2 *) (data + (15 & (-(int) data))); |
| 3905 ++ |
| 3906 ++ if (st->buf_used) |
| 3907 ++ { |
| 3908 ++ unsigned int todo = 32 - st->buf_used; |
| 3909 ++ if (todo > in_len) |
| 3910 ++ todo = in_len; |
| 3911 ++ for (i = 0; i < todo; i++) |
| 3912 ++ st->buf[st->buf_used + i] = in[i]; |
| 3913 ++ st->buf_used += todo; |
| 3914 ++ in_len -= todo; |
| 3915 ++ in += todo; |
| 3916 ++ |
| 3917 ++ if (st->buf_used == sizeof(st->buf)) |
| 3918 ++ { |
| 3919 ++ fe1305x2_frombytearray(c, st->buf, sizeof(st->buf)); |
| 3920 ++ r2r->v[0] = precomp->v[0]; |
| 3921 ++ r2r->v[2] = precomp->v[2]; |
| 3922 ++ r2r->v[4] = precomp->v[4]; |
| 3923 ++ r2r->v[6] = precomp->v[6]; |
| 3924 ++ r2r->v[8] = precomp->v[8]; |
| 3925 ++ r2r->v[1] = r->v[1]; |
| 3926 ++ r2r->v[3] = r->v[3]; |
| 3927 ++ r2r->v[5] = r->v[5]; |
| 3928 ++ r2r->v[7] = r->v[7]; |
| 3929 ++ r2r->v[9] = r->v[9]; |
| 3930 ++ addmulmod(h,h,r2r,c); |
| 3931 ++ st->buf_used = 0; |
| 3932 ++ } |
| 3933 ++ } |
| 3934 ++ |
| 3935 ++ while (in_len > 32) |
| 3936 ++ { |
| 3937 ++ unsigned int tlen = 1048576; |
| 3938 ++ if (in_len < 1048576) |
| 3939 ++ tlen = in_len; |
| 3940 ++ tlen -= blocks(h, precomp, in, tlen); |
| 3941 ++ in_len -= tlen; |
| 3942 ++ in += tlen; |
| 3943 ++ } |
| 3944 ++ |
| 3945 ++ if (in_len) |
| 3946 ++ { |
| 3947 ++ for (i = 0; i < in_len; i++) |
| 3948 ++ st->buf[i] = in[i]; |
| 3949 ++ st->buf_used = in_len; |
| 3950 ++ } |
| 3951 ++ } |
| 3952 ++ |
| 3953 ++void CRYPTO_poly1305_finish(poly1305_state* state, unsigned char mac[16]) |
| 3954 ++ { |
| 3955 ++ struct poly1305_state_st *st = (struct poly1305_state_st*) (state); |
| 3956 ++ fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); |
| 3957 ++ fe1305x2 *const h = r + 1; |
| 3958 ++ fe1305x2 *const c = h + 1; |
| 3959 ++ fe1305x2 *const precomp = c + 1; |
| 3960 ++ |
| 3961 ++ if (st->buf_used > 16) |
| 3962 ++ { |
| 3963 ++ fe1305x2_frombytearray(c, st->buf, st->buf_used); |
| 3964 ++ precomp->v[1] = r->v[1]; |
| 3965 ++ precomp->v[3] = r->v[3]; |
| 3966 ++ precomp->v[5] = r->v[5]; |
| 3967 ++ precomp->v[7] = r->v[7]; |
| 3968 ++ precomp->v[9] = r->v[9]; |
| 3969 ++ addmulmod(h,h,precomp,c); |
| 3970 ++ } |
| 3971 ++ else if (st->buf_used > 0) |
| 3972 ++ { |
| 3973 ++ fe1305x2_frombytearray(c, st->buf, st->buf_used); |
| 3974 ++ r->v[1] = 1; |
| 3975 ++ r->v[3] = 0; |
| 3976 ++ r->v[5] = 0; |
| 3977 ++ r->v[7] = 0; |
| 3978 ++ r->v[9] = 0; |
| 3979 ++ addmulmod(h,h,r,c); |
| 3980 ++ } |
| 3981 ++ |
| 3982 ++ h->v[0] += h->v[1]; |
| 3983 ++ h->v[2] += h->v[3]; |
| 3984 ++ h->v[4] += h->v[5]; |
| 3985 ++ h->v[6] += h->v[7]; |
| 3986 ++ h->v[8] += h->v[9]; |
| 3987 ++ freeze(h); |
| 3988 ++ |
| 3989 ++ fe1305x2_frombytearray(c, st->key, 16); |
| 3990 ++ c->v[8] ^= (1 << 24); |
| 3991 ++ |
| 3992 ++ h->v[0] += c->v[0]; |
| 3993 ++ h->v[2] += c->v[2]; |
| 3994 ++ h->v[4] += c->v[4]; |
| 3995 ++ h->v[6] += c->v[6]; |
| 3996 ++ h->v[8] += c->v[8]; |
| 3997 ++ fe1305x2_tobytearray(mac, h); |
| 3998 ++ } |
| 3999 ++ |
| 4000 ++#endif /* !OPENSSL_NO_POLY1305 */ |
| 4001 +diff --git a/crypto/poly1305/poly1305_arm_asm.s b/crypto/poly1305/poly1305_arm_
asm.s |
| 4002 +new file mode 100644 |
| 4003 +index 0000000..449d16f |
| 4004 +--- /dev/null |
| 4005 ++++ b/crypto/poly1305/poly1305_arm_asm.s |
| 4006 +@@ -0,0 +1,2009 @@ |
| 4007 ++# This implementation was taken from the public domain, neon2 version in |
| 4008 ++# SUPERCOP by D. J. Bernstein and Peter Schwabe. |
| 4009 ++ |
| 4010 ++# qhasm: int32 input_0 |
| 4011 ++ |
| 4012 ++# qhasm: int32 input_1 |
| 4013 ++ |
| 4014 ++# qhasm: int32 input_2 |
| 4015 ++ |
| 4016 ++# qhasm: int32 input_3 |
| 4017 ++ |
| 4018 ++# qhasm: stack32 input_4 |
| 4019 ++ |
| 4020 ++# qhasm: stack32 input_5 |
| 4021 ++ |
| 4022 ++# qhasm: stack32 input_6 |
| 4023 ++ |
| 4024 ++# qhasm: stack32 input_7 |
| 4025 ++ |
| 4026 ++# qhasm: int32 caller_r4 |
| 4027 ++ |
| 4028 ++# qhasm: int32 caller_r5 |
| 4029 ++ |
| 4030 ++# qhasm: int32 caller_r6 |
| 4031 ++ |
| 4032 ++# qhasm: int32 caller_r7 |
| 4033 ++ |
| 4034 ++# qhasm: int32 caller_r8 |
| 4035 ++ |
| 4036 ++# qhasm: int32 caller_r9 |
| 4037 ++ |
| 4038 ++# qhasm: int32 caller_r10 |
| 4039 ++ |
| 4040 ++# qhasm: int32 caller_r11 |
| 4041 ++ |
| 4042 ++# qhasm: int32 caller_r12 |
| 4043 ++ |
| 4044 ++# qhasm: int32 caller_r14 |
| 4045 ++ |
| 4046 ++# qhasm: reg128 caller_q4 |
| 4047 ++ |
| 4048 ++# qhasm: reg128 caller_q5 |
| 4049 ++ |
| 4050 ++# qhasm: reg128 caller_q6 |
| 4051 ++ |
| 4052 ++# qhasm: reg128 caller_q7 |
| 4053 ++ |
| 4054 ++# qhasm: startcode |
| 4055 ++.fpu neon |
| 4056 ++.text |
| 4057 ++ |
| 4058 ++# qhasm: reg128 r0 |
| 4059 ++ |
| 4060 ++# qhasm: reg128 r1 |
| 4061 ++ |
| 4062 ++# qhasm: reg128 r2 |
| 4063 ++ |
| 4064 ++# qhasm: reg128 r3 |
| 4065 ++ |
| 4066 ++# qhasm: reg128 r4 |
| 4067 ++ |
| 4068 ++# qhasm: reg128 x01 |
| 4069 ++ |
| 4070 ++# qhasm: reg128 x23 |
| 4071 ++ |
| 4072 ++# qhasm: reg128 x4 |
| 4073 ++ |
| 4074 ++# qhasm: reg128 y0 |
| 4075 ++ |
| 4076 ++# qhasm: reg128 y12 |
| 4077 ++ |
| 4078 ++# qhasm: reg128 y34 |
| 4079 ++ |
| 4080 ++# qhasm: reg128 5y12 |
| 4081 ++ |
| 4082 ++# qhasm: reg128 5y34 |
| 4083 ++ |
| 4084 ++# qhasm: stack128 y0_stack |
| 4085 ++ |
| 4086 ++# qhasm: stack128 y12_stack |
| 4087 ++ |
| 4088 ++# qhasm: stack128 y34_stack |
| 4089 ++ |
| 4090 ++# qhasm: stack128 5y12_stack |
| 4091 ++ |
| 4092 ++# qhasm: stack128 5y34_stack |
| 4093 ++ |
| 4094 ++# qhasm: reg128 z0 |
| 4095 ++ |
| 4096 ++# qhasm: reg128 z12 |
| 4097 ++ |
| 4098 ++# qhasm: reg128 z34 |
| 4099 ++ |
| 4100 ++# qhasm: reg128 5z12 |
| 4101 ++ |
| 4102 ++# qhasm: reg128 5z34 |
| 4103 ++ |
| 4104 ++# qhasm: stack128 z0_stack |
| 4105 ++ |
| 4106 ++# qhasm: stack128 z12_stack |
| 4107 ++ |
| 4108 ++# qhasm: stack128 z34_stack |
| 4109 ++ |
| 4110 ++# qhasm: stack128 5z12_stack |
| 4111 ++ |
| 4112 ++# qhasm: stack128 5z34_stack |
| 4113 ++ |
| 4114 ++# qhasm: stack128 two24 |
| 4115 ++ |
| 4116 ++# qhasm: int32 ptr |
| 4117 ++ |
| 4118 ++# qhasm: reg128 c01 |
| 4119 ++ |
| 4120 ++# qhasm: reg128 c23 |
| 4121 ++ |
| 4122 ++# qhasm: reg128 d01 |
| 4123 ++ |
| 4124 ++# qhasm: reg128 d23 |
| 4125 ++ |
| 4126 ++# qhasm: reg128 t0 |
| 4127 ++ |
| 4128 ++# qhasm: reg128 t1 |
| 4129 ++ |
| 4130 ++# qhasm: reg128 t2 |
| 4131 ++ |
| 4132 ++# qhasm: reg128 t3 |
| 4133 ++ |
| 4134 ++# qhasm: reg128 t4 |
| 4135 ++ |
| 4136 ++# qhasm: reg128 mask |
| 4137 ++ |
| 4138 ++# qhasm: reg128 u0 |
| 4139 ++ |
| 4140 ++# qhasm: reg128 u1 |
| 4141 ++ |
| 4142 ++# qhasm: reg128 u2 |
| 4143 ++ |
| 4144 ++# qhasm: reg128 u3 |
| 4145 ++ |
| 4146 ++# qhasm: reg128 u4 |
| 4147 ++ |
| 4148 ++# qhasm: reg128 v01 |
| 4149 ++ |
| 4150 ++# qhasm: reg128 mid |
| 4151 ++ |
| 4152 ++# qhasm: reg128 v23 |
| 4153 ++ |
| 4154 ++# qhasm: reg128 v4 |
| 4155 ++ |
| 4156 ++# qhasm: int32 len |
| 4157 ++ |
| 4158 ++# qhasm: qpushenter crypto_onetimeauth_poly1305_neon2_blocks |
| 4159 ++.align 4 |
| 4160 ++.global openssl_poly1305_neon2_blocks |
| 4161 ++.type openssl_poly1305_neon2_blocks STT_FUNC |
| 4162 ++openssl_poly1305_neon2_blocks: |
| 4163 ++vpush {q4,q5,q6,q7} |
| 4164 ++mov r12,sp |
| 4165 ++sub sp,sp,#192 |
| 4166 ++and sp,sp,#0xffffffe0 |
| 4167 ++ |
| 4168 ++# qhasm: len = input_3 |
| 4169 ++# asm 1: mov >len=int32#4,<input_3=int32#4 |
| 4170 ++# asm 2: mov >len=r3,<input_3=r3 |
| 4171 ++mov r3,r3 |
| 4172 ++ |
| 4173 ++# qhasm: new y0 |
| 4174 ++ |
| 4175 ++# qhasm: y0 = mem64[input_1]y0[1]; input_1 += 8 |
| 4176 ++# asm 1: vld1.8 {<y0=reg128#1%bot},[<input_1=int32#2]! |
| 4177 ++# asm 2: vld1.8 {<y0=d0},[<input_1=r1]! |
| 4178 ++vld1.8 {d0},[r1]! |
| 4179 ++ |
| 4180 ++# qhasm: y12 = mem128[input_1]; input_1 += 16 |
| 4181 ++# asm 1: vld1.8 {>y12=reg128#2%bot->y12=reg128#2%top},[<input_1=int32#2]! |
| 4182 ++# asm 2: vld1.8 {>y12=d2->y12=d3},[<input_1=r1]! |
| 4183 ++vld1.8 {d2-d3},[r1]! |
| 4184 ++ |
| 4185 ++# qhasm: y34 = mem128[input_1]; input_1 += 16 |
| 4186 ++# asm 1: vld1.8 {>y34=reg128#3%bot->y34=reg128#3%top},[<input_1=int32#2]! |
| 4187 ++# asm 2: vld1.8 {>y34=d4->y34=d5},[<input_1=r1]! |
| 4188 ++vld1.8 {d4-d5},[r1]! |
| 4189 ++ |
| 4190 ++# qhasm: input_1 += 8 |
| 4191 ++# asm 1: add >input_1=int32#2,<input_1=int32#2,#8 |
| 4192 ++# asm 2: add >input_1=r1,<input_1=r1,#8 |
| 4193 ++add r1,r1,#8 |
| 4194 ++ |
| 4195 ++# qhasm: new z0 |
| 4196 ++ |
| 4197 ++# qhasm: z0 = mem64[input_1]z0[1]; input_1 += 8 |
| 4198 ++# asm 1: vld1.8 {<z0=reg128#4%bot},[<input_1=int32#2]! |
| 4199 ++# asm 2: vld1.8 {<z0=d6},[<input_1=r1]! |
| 4200 ++vld1.8 {d6},[r1]! |
| 4201 ++ |
| 4202 ++# qhasm: z12 = mem128[input_1]; input_1 += 16 |
| 4203 ++# asm 1: vld1.8 {>z12=reg128#5%bot->z12=reg128#5%top},[<input_1=int32#2]! |
| 4204 ++# asm 2: vld1.8 {>z12=d8->z12=d9},[<input_1=r1]! |
| 4205 ++vld1.8 {d8-d9},[r1]! |
| 4206 ++ |
| 4207 ++# qhasm: z34 = mem128[input_1]; input_1 += 16 |
| 4208 ++# asm 1: vld1.8 {>z34=reg128#6%bot->z34=reg128#6%top},[<input_1=int32#2]! |
| 4209 ++# asm 2: vld1.8 {>z34=d10->z34=d11},[<input_1=r1]! |
| 4210 ++vld1.8 {d10-d11},[r1]! |
| 4211 ++ |
| 4212 ++# qhasm: 2x mask = 0xffffffff |
| 4213 ++# asm 1: vmov.i64 >mask=reg128#7,#0xffffffff |
| 4214 ++# asm 2: vmov.i64 >mask=q6,#0xffffffff |
| 4215 ++vmov.i64 q6,#0xffffffff |
| 4216 ++ |
| 4217 ++# qhasm: 2x u4 = 0xff |
| 4218 ++# asm 1: vmov.i64 >u4=reg128#8,#0xff |
| 4219 ++# asm 2: vmov.i64 >u4=q7,#0xff |
| 4220 ++vmov.i64 q7,#0xff |
| 4221 ++ |
| 4222 ++# qhasm: x01 aligned= mem128[input_0];input_0+=16 |
| 4223 ++# asm 1: vld1.8 {>x01=reg128#9%bot->x01=reg128#9%top},[<input_0=int32#1,: 128]
! |
| 4224 ++# asm 2: vld1.8 {>x01=d16->x01=d17},[<input_0=r0,: 128]! |
| 4225 ++vld1.8 {d16-d17},[r0,: 128]! |
| 4226 ++ |
| 4227 ++# qhasm: x23 aligned= mem128[input_0];input_0+=16 |
| 4228 ++# asm 1: vld1.8 {>x23=reg128#10%bot->x23=reg128#10%top},[<input_0=int32#1,: 12
8]! |
| 4229 ++# asm 2: vld1.8 {>x23=d18->x23=d19},[<input_0=r0,: 128]! |
| 4230 ++vld1.8 {d18-d19},[r0,: 128]! |
| 4231 ++ |
| 4232 ++# qhasm: x4 aligned= mem64[input_0]x4[1] |
| 4233 ++# asm 1: vld1.8 {<x4=reg128#11%bot},[<input_0=int32#1,: 64] |
| 4234 ++# asm 2: vld1.8 {<x4=d20},[<input_0=r0,: 64] |
| 4235 ++vld1.8 {d20},[r0,: 64] |
| 4236 ++ |
| 4237 ++# qhasm: input_0 -= 32 |
| 4238 ++# asm 1: sub >input_0=int32#1,<input_0=int32#1,#32 |
| 4239 ++# asm 2: sub >input_0=r0,<input_0=r0,#32 |
| 4240 ++sub r0,r0,#32 |
| 4241 ++ |
| 4242 ++# qhasm: 2x mask unsigned>>=6 |
| 4243 ++# asm 1: vshr.u64 >mask=reg128#7,<mask=reg128#7,#6 |
| 4244 ++# asm 2: vshr.u64 >mask=q6,<mask=q6,#6 |
| 4245 ++vshr.u64 q6,q6,#6 |
| 4246 ++ |
| 4247 ++# qhasm: 2x u4 unsigned>>= 7 |
| 4248 ++# asm 1: vshr.u64 >u4=reg128#8,<u4=reg128#8,#7 |
| 4249 ++# asm 2: vshr.u64 >u4=q7,<u4=q7,#7 |
| 4250 ++vshr.u64 q7,q7,#7 |
| 4251 ++ |
| 4252 ++# qhasm: 4x 5y12 = y12 << 2 |
| 4253 ++# asm 1: vshl.i32 >5y12=reg128#12,<y12=reg128#2,#2 |
| 4254 ++# asm 2: vshl.i32 >5y12=q11,<y12=q1,#2 |
| 4255 ++vshl.i32 q11,q1,#2 |
| 4256 ++ |
| 4257 ++# qhasm: 4x 5y34 = y34 << 2 |
| 4258 ++# asm 1: vshl.i32 >5y34=reg128#13,<y34=reg128#3,#2 |
| 4259 ++# asm 2: vshl.i32 >5y34=q12,<y34=q2,#2 |
| 4260 ++vshl.i32 q12,q2,#2 |
| 4261 ++ |
| 4262 ++# qhasm: 4x 5y12 += y12 |
| 4263 ++# asm 1: vadd.i32 >5y12=reg128#12,<5y12=reg128#12,<y12=reg128#2 |
| 4264 ++# asm 2: vadd.i32 >5y12=q11,<5y12=q11,<y12=q1 |
| 4265 ++vadd.i32 q11,q11,q1 |
| 4266 ++ |
| 4267 ++# qhasm: 4x 5y34 += y34 |
| 4268 ++# asm 1: vadd.i32 >5y34=reg128#13,<5y34=reg128#13,<y34=reg128#3 |
| 4269 ++# asm 2: vadd.i32 >5y34=q12,<5y34=q12,<y34=q2 |
| 4270 ++vadd.i32 q12,q12,q2 |
| 4271 ++ |
| 4272 ++# qhasm: 2x u4 <<= 24 |
| 4273 ++# asm 1: vshl.i64 >u4=reg128#8,<u4=reg128#8,#24 |
| 4274 ++# asm 2: vshl.i64 >u4=q7,<u4=q7,#24 |
| 4275 ++vshl.i64 q7,q7,#24 |
| 4276 ++ |
| 4277 ++# qhasm: 4x 5z12 = z12 << 2 |
| 4278 ++# asm 1: vshl.i32 >5z12=reg128#14,<z12=reg128#5,#2 |
| 4279 ++# asm 2: vshl.i32 >5z12=q13,<z12=q4,#2 |
| 4280 ++vshl.i32 q13,q4,#2 |
| 4281 ++ |
| 4282 ++# qhasm: 4x 5z34 = z34 << 2 |
| 4283 ++# asm 1: vshl.i32 >5z34=reg128#15,<z34=reg128#6,#2 |
| 4284 ++# asm 2: vshl.i32 >5z34=q14,<z34=q5,#2 |
| 4285 ++vshl.i32 q14,q5,#2 |
| 4286 ++ |
| 4287 ++# qhasm: 4x 5z12 += z12 |
| 4288 ++# asm 1: vadd.i32 >5z12=reg128#14,<5z12=reg128#14,<z12=reg128#5 |
| 4289 ++# asm 2: vadd.i32 >5z12=q13,<5z12=q13,<z12=q4 |
| 4290 ++vadd.i32 q13,q13,q4 |
| 4291 ++ |
| 4292 ++# qhasm: 4x 5z34 += z34 |
| 4293 ++# asm 1: vadd.i32 >5z34=reg128#15,<5z34=reg128#15,<z34=reg128#6 |
| 4294 ++# asm 2: vadd.i32 >5z34=q14,<5z34=q14,<z34=q5 |
| 4295 ++vadd.i32 q14,q14,q5 |
| 4296 ++ |
| 4297 ++# qhasm: new two24 |
| 4298 ++ |
| 4299 ++# qhasm: new y0_stack |
| 4300 ++ |
| 4301 ++# qhasm: new y12_stack |
| 4302 ++ |
| 4303 ++# qhasm: new y34_stack |
| 4304 ++ |
| 4305 ++# qhasm: new 5y12_stack |
| 4306 ++ |
| 4307 ++# qhasm: new 5y34_stack |
| 4308 ++ |
| 4309 ++# qhasm: new z0_stack |
| 4310 ++ |
| 4311 ++# qhasm: new z12_stack |
| 4312 ++ |
| 4313 ++# qhasm: new z34_stack |
| 4314 ++ |
| 4315 ++# qhasm: new 5z12_stack |
| 4316 ++ |
| 4317 ++# qhasm: new 5z34_stack |
| 4318 ++ |
| 4319 ++# qhasm: ptr = &two24 |
| 4320 ++# asm 1: lea >ptr=int32#2,<two24=stack128#1 |
| 4321 ++# asm 2: lea >ptr=r1,<two24=[sp,#0] |
| 4322 ++add r1,sp,#0 |
| 4323 ++ |
| 4324 ++# qhasm: mem128[ptr] aligned= u4 |
| 4325 ++# asm 1: vst1.8 {<u4=reg128#8%bot-<u4=reg128#8%top},[<ptr=int32#2,: 128] |
| 4326 ++# asm 2: vst1.8 {<u4=d14-<u4=d15},[<ptr=r1,: 128] |
| 4327 ++vst1.8 {d14-d15},[r1,: 128] |
| 4328 ++ |
| 4329 ++# qhasm: r4 = u4 |
| 4330 ++# asm 1: vmov >r4=reg128#16,<u4=reg128#8 |
| 4331 ++# asm 2: vmov >r4=q15,<u4=q7 |
| 4332 ++vmov q15,q7 |
| 4333 ++ |
| 4334 ++# qhasm: r0 = u4 |
| 4335 ++# asm 1: vmov >r0=reg128#8,<u4=reg128#8 |
| 4336 ++# asm 2: vmov >r0=q7,<u4=q7 |
| 4337 ++vmov q7,q7 |
| 4338 ++ |
| 4339 ++# qhasm: ptr = &y0_stack |
| 4340 ++# asm 1: lea >ptr=int32#2,<y0_stack=stack128#2 |
| 4341 ++# asm 2: lea >ptr=r1,<y0_stack=[sp,#16] |
| 4342 ++add r1,sp,#16 |
| 4343 ++ |
| 4344 ++# qhasm: mem128[ptr] aligned= y0 |
| 4345 ++# asm 1: vst1.8 {<y0=reg128#1%bot-<y0=reg128#1%top},[<ptr=int32#2,: 128] |
| 4346 ++# asm 2: vst1.8 {<y0=d0-<y0=d1},[<ptr=r1,: 128] |
| 4347 ++vst1.8 {d0-d1},[r1,: 128] |
| 4348 ++ |
| 4349 ++# qhasm: ptr = &y12_stack |
| 4350 ++# asm 1: lea >ptr=int32#2,<y12_stack=stack128#3 |
| 4351 ++# asm 2: lea >ptr=r1,<y12_stack=[sp,#32] |
| 4352 ++add r1,sp,#32 |
| 4353 ++ |
| 4354 ++# qhasm: mem128[ptr] aligned= y12 |
| 4355 ++# asm 1: vst1.8 {<y12=reg128#2%bot-<y12=reg128#2%top},[<ptr=int32#2,: 128] |
| 4356 ++# asm 2: vst1.8 {<y12=d2-<y12=d3},[<ptr=r1,: 128] |
| 4357 ++vst1.8 {d2-d3},[r1,: 128] |
| 4358 ++ |
| 4359 ++# qhasm: ptr = &y34_stack |
| 4360 ++# asm 1: lea >ptr=int32#2,<y34_stack=stack128#4 |
| 4361 ++# asm 2: lea >ptr=r1,<y34_stack=[sp,#48] |
| 4362 ++add r1,sp,#48 |
| 4363 ++ |
| 4364 ++# qhasm: mem128[ptr] aligned= y34 |
| 4365 ++# asm 1: vst1.8 {<y34=reg128#3%bot-<y34=reg128#3%top},[<ptr=int32#2,: 128] |
| 4366 ++# asm 2: vst1.8 {<y34=d4-<y34=d5},[<ptr=r1,: 128] |
| 4367 ++vst1.8 {d4-d5},[r1,: 128] |
| 4368 ++ |
| 4369 ++# qhasm: ptr = &z0_stack |
| 4370 ++# asm 1: lea >ptr=int32#2,<z0_stack=stack128#7 |
| 4371 ++# asm 2: lea >ptr=r1,<z0_stack=[sp,#96] |
| 4372 ++add r1,sp,#96 |
| 4373 ++ |
| 4374 ++# qhasm: mem128[ptr] aligned= z0 |
| 4375 ++# asm 1: vst1.8 {<z0=reg128#4%bot-<z0=reg128#4%top},[<ptr=int32#2,: 128] |
| 4376 ++# asm 2: vst1.8 {<z0=d6-<z0=d7},[<ptr=r1,: 128] |
| 4377 ++vst1.8 {d6-d7},[r1,: 128] |
| 4378 ++ |
| 4379 ++# qhasm: ptr = &z12_stack |
| 4380 ++# asm 1: lea >ptr=int32#2,<z12_stack=stack128#8 |
| 4381 ++# asm 2: lea >ptr=r1,<z12_stack=[sp,#112] |
| 4382 ++add r1,sp,#112 |
| 4383 ++ |
| 4384 ++# qhasm: mem128[ptr] aligned= z12 |
| 4385 ++# asm 1: vst1.8 {<z12=reg128#5%bot-<z12=reg128#5%top},[<ptr=int32#2,: 128] |
| 4386 ++# asm 2: vst1.8 {<z12=d8-<z12=d9},[<ptr=r1,: 128] |
| 4387 ++vst1.8 {d8-d9},[r1,: 128] |
| 4388 ++ |
| 4389 ++# qhasm: ptr = &z34_stack |
| 4390 ++# asm 1: lea >ptr=int32#2,<z34_stack=stack128#9 |
| 4391 ++# asm 2: lea >ptr=r1,<z34_stack=[sp,#128] |
| 4392 ++add r1,sp,#128 |
| 4393 ++ |
| 4394 ++# qhasm: mem128[ptr] aligned= z34 |
| 4395 ++# asm 1: vst1.8 {<z34=reg128#6%bot-<z34=reg128#6%top},[<ptr=int32#2,: 128] |
| 4396 ++# asm 2: vst1.8 {<z34=d10-<z34=d11},[<ptr=r1,: 128] |
| 4397 ++vst1.8 {d10-d11},[r1,: 128] |
| 4398 ++ |
| 4399 ++# qhasm: ptr = &5y12_stack |
| 4400 ++# asm 1: lea >ptr=int32#2,<5y12_stack=stack128#5 |
| 4401 ++# asm 2: lea >ptr=r1,<5y12_stack=[sp,#64] |
| 4402 ++add r1,sp,#64 |
| 4403 ++ |
| 4404 ++# qhasm: mem128[ptr] aligned= 5y12 |
| 4405 ++# asm 1: vst1.8 {<5y12=reg128#12%bot-<5y12=reg128#12%top},[<ptr=int32#2,: 128] |
| 4406 ++# asm 2: vst1.8 {<5y12=d22-<5y12=d23},[<ptr=r1,: 128] |
| 4407 ++vst1.8 {d22-d23},[r1,: 128] |
| 4408 ++ |
| 4409 ++# qhasm: ptr = &5y34_stack |
| 4410 ++# asm 1: lea >ptr=int32#2,<5y34_stack=stack128#6 |
| 4411 ++# asm 2: lea >ptr=r1,<5y34_stack=[sp,#80] |
| 4412 ++add r1,sp,#80 |
| 4413 ++ |
| 4414 ++# qhasm: mem128[ptr] aligned= 5y34 |
| 4415 ++# asm 1: vst1.8 {<5y34=reg128#13%bot-<5y34=reg128#13%top},[<ptr=int32#2,: 128] |
| 4416 ++# asm 2: vst1.8 {<5y34=d24-<5y34=d25},[<ptr=r1,: 128] |
| 4417 ++vst1.8 {d24-d25},[r1,: 128] |
| 4418 ++ |
| 4419 ++# qhasm: ptr = &5z12_stack |
| 4420 ++# asm 1: lea >ptr=int32#2,<5z12_stack=stack128#10 |
| 4421 ++# asm 2: lea >ptr=r1,<5z12_stack=[sp,#144] |
| 4422 ++add r1,sp,#144 |
| 4423 ++ |
| 4424 ++# qhasm: mem128[ptr] aligned= 5z12 |
| 4425 ++# asm 1: vst1.8 {<5z12=reg128#14%bot-<5z12=reg128#14%top},[<ptr=int32#2,: 128] |
| 4426 ++# asm 2: vst1.8 {<5z12=d26-<5z12=d27},[<ptr=r1,: 128] |
| 4427 ++vst1.8 {d26-d27},[r1,: 128] |
| 4428 ++ |
| 4429 ++# qhasm: ptr = &5z34_stack |
| 4430 ++# asm 1: lea >ptr=int32#2,<5z34_stack=stack128#11 |
| 4431 ++# asm 2: lea >ptr=r1,<5z34_stack=[sp,#160] |
| 4432 ++add r1,sp,#160 |
| 4433 ++ |
| 4434 ++# qhasm: mem128[ptr] aligned= 5z34 |
| 4435 ++# asm 1: vst1.8 {<5z34=reg128#15%bot-<5z34=reg128#15%top},[<ptr=int32#2,: 128] |
| 4436 ++# asm 2: vst1.8 {<5z34=d28-<5z34=d29},[<ptr=r1,: 128] |
| 4437 ++vst1.8 {d28-d29},[r1,: 128] |
| 4438 ++ |
| 4439 ++# qhasm: unsigned>? len - 64 |
| 4440 ++# asm 1: cmp <len=int32#4,#64 |
| 4441 ++# asm 2: cmp <len=r3,#64 |
| 4442 ++cmp r3,#64 |
| 4443 ++ |
| 4444 ++# qhasm: goto below64bytes if !unsigned> |
| 4445 ++bls ._below64bytes |
| 4446 ++ |
| 4447 ++# qhasm: input_2 += 32 |
| 4448 ++# asm 1: add >input_2=int32#2,<input_2=int32#3,#32 |
| 4449 ++# asm 2: add >input_2=r1,<input_2=r2,#32 |
| 4450 ++add r1,r2,#32 |
| 4451 ++ |
| 4452 ++# qhasm: mainloop2: |
| 4453 ++._mainloop2: |
| 4454 ++ |
| 4455 ++# qhasm: c01 = mem128[input_2];input_2+=16 |
| 4456 ++# asm 1: vld1.8 {>c01=reg128#1%bot->c01=reg128#1%top},[<input_2=int32#2]! |
| 4457 ++# asm 2: vld1.8 {>c01=d0->c01=d1},[<input_2=r1]! |
| 4458 ++vld1.8 {d0-d1},[r1]! |
| 4459 ++ |
| 4460 ++# qhasm: c23 = mem128[input_2];input_2+=16 |
| 4461 ++# asm 1: vld1.8 {>c23=reg128#2%bot->c23=reg128#2%top},[<input_2=int32#2]! |
| 4462 ++# asm 2: vld1.8 {>c23=d2->c23=d3},[<input_2=r1]! |
| 4463 ++vld1.8 {d2-d3},[r1]! |
| 4464 ++ |
| 4465 ++# qhasm: r4[0,1] += x01[0] unsigned* z34[2]; r4[2,3] += x01[1] unsigned* z3
4[3] |
| 4466 ++# asm 1: vmlal.u32 <r4=reg128#16,<x01=reg128#9%bot,<z34=reg128#6%top |
| 4467 ++# asm 2: vmlal.u32 <r4=q15,<x01=d16,<z34=d11 |
| 4468 ++vmlal.u32 q15,d16,d11 |
| 4469 ++ |
| 4470 ++# qhasm: ptr = &z12_stack |
| 4471 ++# asm 1: lea >ptr=int32#3,<z12_stack=stack128#8 |
| 4472 ++# asm 2: lea >ptr=r2,<z12_stack=[sp,#112] |
| 4473 ++add r2,sp,#112 |
| 4474 ++ |
| 4475 ++# qhasm: z12 aligned= mem128[ptr] |
| 4476 ++# asm 1: vld1.8 {>z12=reg128#3%bot->z12=reg128#3%top},[<ptr=int32#3,: 128] |
| 4477 ++# asm 2: vld1.8 {>z12=d4->z12=d5},[<ptr=r2,: 128] |
| 4478 ++vld1.8 {d4-d5},[r2,: 128] |
| 4479 ++ |
| 4480 ++# qhasm: r4[0,1] += x01[2] unsigned* z34[0]; r4[2,3] += x01[3] unsigned* z34[
1] |
| 4481 ++# asm 1: vmlal.u32 <r4=reg128#16,<x01=reg128#9%top,<z34=reg128#6%bot |
| 4482 ++# asm 2: vmlal.u32 <r4=q15,<x01=d17,<z34=d10 |
| 4483 ++vmlal.u32 q15,d17,d10 |
| 4484 ++ |
| 4485 ++# qhasm: ptr = &z0_stack |
| 4486 ++# asm 1: lea >ptr=int32#3,<z0_stack=stack128#7 |
| 4487 ++# asm 2: lea >ptr=r2,<z0_stack=[sp,#96] |
| 4488 ++add r2,sp,#96 |
| 4489 ++ |
| 4490 ++# qhasm: z0 aligned= mem128[ptr] |
| 4491 ++# asm 1: vld1.8 {>z0=reg128#4%bot->z0=reg128#4%top},[<ptr=int32#3,: 128] |
| 4492 ++# asm 2: vld1.8 {>z0=d6->z0=d7},[<ptr=r2,: 128] |
| 4493 ++vld1.8 {d6-d7},[r2,: 128] |
| 4494 ++ |
| 4495 ++# qhasm: r4[0,1] += x23[0] unsigned* z12[2]; r4[2,3] += x23[1] unsigned* z12[
3] |
| 4496 ++# asm 1: vmlal.u32 <r4=reg128#16,<x23=reg128#10%bot,<z12=reg128#3%top |
| 4497 ++# asm 2: vmlal.u32 <r4=q15,<x23=d18,<z12=d5 |
| 4498 ++vmlal.u32 q15,d18,d5 |
| 4499 ++ |
| 4500 ++# qhasm: c01 c23 = c01[0]c01[1]c01[2]c23[2]c23[0]c23[1]c01[3]c23[3] |
| 4501 ++# asm 1: vtrn.32 <c01=reg128#1%top,<c23=reg128#2%top |
| 4502 ++# asm 2: vtrn.32 <c01=d1,<c23=d3 |
| 4503 ++vtrn.32 d1,d3 |
| 4504 ++ |
| 4505 ++# qhasm: r4[0,1] += x23[2] unsigned* z12[0]; r4[2,3] += x23[3] unsigned* z12[
1] |
| 4506 ++# asm 1: vmlal.u32 <r4=reg128#16,<x23=reg128#10%top,<z12=reg128#3%bot |
| 4507 ++# asm 2: vmlal.u32 <r4=q15,<x23=d19,<z12=d4 |
| 4508 ++vmlal.u32 q15,d19,d4 |
| 4509 ++ |
| 4510 ++# qhasm: r4[0,1] += x4[0] unsigned* z0[0]; r4[2,3] += x4[1] unsigned* z0[1] |
| 4511 ++# asm 1: vmlal.u32 <r4=reg128#16,<x4=reg128#11%bot,<z0=reg128#4%bot |
| 4512 ++# asm 2: vmlal.u32 <r4=q15,<x4=d20,<z0=d6 |
| 4513 ++vmlal.u32 q15,d20,d6 |
| 4514 ++ |
| 4515 ++# qhasm: r3[0,1] = c23[2]<<18; r3[2,3] = c23[3]<<18 |
| 4516 ++# asm 1: vshll.u32 >r3=reg128#5,<c23=reg128#2%top,#18 |
| 4517 ++# asm 2: vshll.u32 >r3=q4,<c23=d3,#18 |
| 4518 ++vshll.u32 q4,d3,#18 |
| 4519 ++ |
| 4520 ++# qhasm: c01 c23 = c01[0]c23[0]c01[2]c01[3]c01[1]c23[1]c23[2]c23[3] |
| 4521 ++# asm 1: vtrn.32 <c01=reg128#1%bot,<c23=reg128#2%bot |
| 4522 ++# asm 2: vtrn.32 <c01=d0,<c23=d2 |
| 4523 ++vtrn.32 d0,d2 |
| 4524 ++ |
| 4525 ++# qhasm: r3[0,1] += x01[0] unsigned* z34[0]; r3[2,3] += x01[1] unsigned* z34
[1] |
| 4526 ++# asm 1: vmlal.u32 <r3=reg128#5,<x01=reg128#9%bot,<z34=reg128#6%bot |
| 4527 ++# asm 2: vmlal.u32 <r3=q4,<x01=d16,<z34=d10 |
| 4528 ++vmlal.u32 q4,d16,d10 |
| 4529 ++ |
| 4530 ++# qhasm: r3[0,1] += x01[2] unsigned* z12[2]; r3[2,3] += x01[3] unsigned* z12
[3] |
| 4531 ++# asm 1: vmlal.u32 <r3=reg128#5,<x01=reg128#9%top,<z12=reg128#3%top |
| 4532 ++# asm 2: vmlal.u32 <r3=q4,<x01=d17,<z12=d5 |
| 4533 ++vmlal.u32 q4,d17,d5 |
| 4534 ++ |
| 4535 ++# qhasm: r0 = r0[1]c01[0]r0[2,3] |
| 4536 ++# asm 1: vext.32 <r0=reg128#8%bot,<r0=reg128#8%bot,<c01=reg128#1%bot,#1 |
| 4537 ++# asm 2: vext.32 <r0=d14,<r0=d14,<c01=d0,#1 |
| 4538 ++vext.32 d14,d14,d0,#1 |
| 4539 ++ |
| 4540 ++# qhasm: r3[0,1] += x23[0] unsigned* z12[0]; r3[2,3] += x23[1] unsigned* z12
[1] |
| 4541 ++# asm 1: vmlal.u32 <r3=reg128#5,<x23=reg128#10%bot,<z12=reg128#3%bot |
| 4542 ++# asm 2: vmlal.u32 <r3=q4,<x23=d18,<z12=d4 |
| 4543 ++vmlal.u32 q4,d18,d4 |
| 4544 ++ |
| 4545 ++# qhasm: input_2
-= 64 |
| 4546 ++# asm 1: sub >input_2=int32#2,<input_2=int32#2,#64 |
| 4547 ++# asm 2: sub >input_2=r1,<input_2=r1,#64 |
| 4548 ++sub r1,r1,#64 |
| 4549 ++ |
| 4550 ++# qhasm: r3[0,1] += x23[2] unsigned* z0[0]; r3[2,3] += x23[3] unsigned* z0[1
] |
| 4551 ++# asm 1: vmlal.u32 <r3=reg128#5,<x23=reg128#10%top,<z0=reg128#4%bot |
| 4552 ++# asm 2: vmlal.u32 <r3=q4,<x23=d19,<z0=d6 |
| 4553 ++vmlal.u32 q4,d19,d6 |
| 4554 ++ |
| 4555 ++# qhasm: ptr = &5z34_stack |
| 4556 ++# asm 1: lea >ptr=int32#3,<5z34_stack=stack128#11 |
| 4557 ++# asm 2: lea >ptr=r2,<5z34_stack=[sp,#160] |
| 4558 ++add r2,sp,#160 |
| 4559 ++ |
| 4560 ++# qhasm: 5z34 aligned= mem128[ptr] |
| 4561 ++# asm 1: vld1.8 {>5z34=reg128#6%bot->5z34=reg128#6%top},[<ptr=int32#3,: 128] |
| 4562 ++# asm 2: vld1.8 {>5z34=d10->5z34=d11},[<ptr=r2,: 128] |
| 4563 ++vld1.8 {d10-d11},[r2,: 128] |
| 4564 ++ |
| 4565 ++# qhasm: r3[0,1] += x4[0] unsigned* 5z34[2]; r3[2,3] += x4[1] unsigned* 5z
34[3] |
| 4566 ++# asm 1: vmlal.u32 <r3=reg128#5,<x4=reg128#11%bot,<5z34=reg128#6%top |
| 4567 ++# asm 2: vmlal.u32 <r3=q4,<x4=d20,<5z34=d11 |
| 4568 ++vmlal.u32 q4,d20,d11 |
| 4569 ++ |
| 4570 ++# qhasm: r0 = r0[1]r0[0]r0[3]r0[2] |
| 4571 ++# asm 1: vrev64.i32 >r0=reg128#8,<r0=reg128#8 |
| 4572 ++# asm 2: vrev64.i32 >r0=q7,<r0=q7 |
| 4573 ++vrev64.i32 q7,q7 |
| 4574 ++ |
| 4575 ++# qhasm: r2[0,1] = c01[2]<<12; r2[2,3] = c01[3]<<12 |
| 4576 ++# asm 1: vshll.u32 >r2=reg128#14,<c01=reg128#1%top,#12 |
| 4577 ++# asm 2: vshll.u32 >r2=q13,<c01=d1,#12 |
| 4578 ++vshll.u32 q13,d1,#12 |
| 4579 ++ |
| 4580 ++# qhasm: d01 = mem128[input_2];input_2+=16 |
| 4581 ++# asm 1: vld1.8 {>d01=reg128#12%bot->d01=reg128#12%top},[<input_2=int32#2]! |
| 4582 ++# asm 2: vld1.8 {>d01=d22->d01=d23},[<input_2=r1]! |
| 4583 ++vld1.8 {d22-d23},[r1]! |
| 4584 ++ |
| 4585 ++# qhasm: r2[0,1] += x01[0] unsigned* z12[2]; r2[2,3] += x01[1] unsigned* z12
[3] |
| 4586 ++# asm 1: vmlal.u32 <r2=reg128#14,<x01=reg128#9%bot,<z12=reg128#3%top |
| 4587 ++# asm 2: vmlal.u32 <r2=q13,<x01=d16,<z12=d5 |
| 4588 ++vmlal.u32 q13,d16,d5 |
| 4589 ++ |
| 4590 ++# qhasm: r2[0,1] += x01[2] unsigned* z12[0]; r2[2,3] += x01[3] unsigned* z12
[1] |
| 4591 ++# asm 1: vmlal.u32 <r2=reg128#14,<x01=reg128#9%top,<z12=reg128#3%bot |
| 4592 ++# asm 2: vmlal.u32 <r2=q13,<x01=d17,<z12=d4 |
| 4593 ++vmlal.u32 q13,d17,d4 |
| 4594 ++ |
| 4595 ++# qhasm: r2[0,1] += x23[0] unsigned* z0[0]; r2[2,3] += x23[1] unsigned* z0[1
] |
| 4596 ++# asm 1: vmlal.u32 <r2=reg128#14,<x23=reg128#10%bot,<z0=reg128#4%bot |
| 4597 ++# asm 2: vmlal.u32 <r2=q13,<x23=d18,<z0=d6 |
| 4598 ++vmlal.u32 q13,d18,d6 |
| 4599 ++ |
| 4600 ++# qhasm: r2[0,1] += x23[2] unsigned* 5z34[2]; r2[2,3] += x23[3] unsigned* 5z
34[3] |
| 4601 ++# asm 1: vmlal.u32 <r2=reg128#14,<x23=reg128#10%top,<5z34=reg128#6%top |
| 4602 ++# asm 2: vmlal.u32 <r2=q13,<x23=d19,<5z34=d11 |
| 4603 ++vmlal.u32 q13,d19,d11 |
| 4604 ++ |
| 4605 ++# qhasm: r2[0,1] += x4[0] unsigned* 5z34[0]; r2[2,3] += x4[1] unsigned* 5z34
[1] |
| 4606 ++# asm 1: vmlal.u32 <r2=reg128#14,<x4=reg128#11%bot,<5z34=reg128#6%bot |
| 4607 ++# asm 2: vmlal.u32 <r2=q13,<x4=d20,<5z34=d10 |
| 4608 ++vmlal.u32 q13,d20,d10 |
| 4609 ++ |
| 4610 ++# qhasm: r0 = r0[0,1]c01[1]r0[2] |
| 4611 ++# asm 1: vext.32 <r0=reg128#8%top,<c01=reg128#1%bot,<r0=reg128#8%top,#1 |
| 4612 ++# asm 2: vext.32 <r0=d15,<c01=d0,<r0=d15,#1 |
| 4613 ++vext.32 d15,d0,d15,#1 |
| 4614 ++ |
| 4615 ++# qhasm: r1[0,1] = c23[0]<<6; r1[2,3] = c23[1]<<6 |
| 4616 ++# asm 1: vshll.u32 >r1=reg128#15,<c23=reg128#2%bot,#6 |
| 4617 ++# asm 2: vshll.u32 >r1=q14,<c23=d2,#6 |
| 4618 ++vshll.u32 q14,d2,#6 |
| 4619 ++ |
| 4620 ++# qhasm: r1[0,1] += x01[0] unsigned* z12[0]; r1[2,3] += x01[1] unsigned* z12
[1] |
| 4621 ++# asm 1: vmlal.u32 <r1=reg128#15,<x01=reg128#9%bot,<z12=reg128#3%bot |
| 4622 ++# asm 2: vmlal.u32 <r1=q14,<x01=d16,<z12=d4 |
| 4623 ++vmlal.u32 q14,d16,d4 |
| 4624 ++ |
| 4625 ++# qhasm: r1[0,1] += x01[2] unsigned* z0[0]; r1[2,3] += x01[3] unsigned* z0[1
] |
| 4626 ++# asm 1: vmlal.u32 <r1=reg128#15,<x01=reg128#9%top,<z0=reg128#4%bot |
| 4627 ++# asm 2: vmlal.u32 <r1=q14,<x01=d17,<z0=d6 |
| 4628 ++vmlal.u32 q14,d17,d6 |
| 4629 ++ |
| 4630 ++# qhasm: r1[0,1] += x23[0] unsigned* 5z34[2]; r1[2,3] += x23[1] unsigned* 5z
34[3] |
| 4631 ++# asm 1: vmlal.u32 <r1=reg128#15,<x23=reg128#10%bot,<5z34=reg128#6%top |
| 4632 ++# asm 2: vmlal.u32 <r1=q14,<x23=d18,<5z34=d11 |
| 4633 ++vmlal.u32 q14,d18,d11 |
| 4634 ++ |
| 4635 ++# qhasm: r1[0,1] += x23[2] unsigned* 5z34[0]; r1[2,3] += x23[3] unsigned* 5z34
[1] |
| 4636 ++# asm 1: vmlal.u32 <r1=reg128#15,<x23=reg128#10%top,<5z34=reg128#6%bot |
| 4637 ++# asm 2: vmlal.u32 <r1=q14,<x23=d19,<5z34=d10 |
| 4638 ++vmlal.u32 q14,d19,d10 |
| 4639 ++ |
| 4640 ++# qhasm: ptr = &5z12_stack |
| 4641 ++# asm 1: lea >ptr=int32#3,<5z12_stack=stack128#10 |
| 4642 ++# asm 2: lea >ptr=r2,<5z12_stack=[sp,#144] |
| 4643 ++add r2,sp,#144 |
| 4644 ++ |
| 4645 ++# qhasm: 5z12 aligned= mem128[ptr] |
| 4646 ++# asm 1: vld1.8 {>5z12=reg128#1%bot->5z12=reg128#1%top},[<ptr=int32#3,: 128] |
| 4647 ++# asm 2: vld1.8 {>5z12=d0->5z12=d1},[<ptr=r2,: 128] |
| 4648 ++vld1.8 {d0-d1},[r2,: 128] |
| 4649 ++ |
| 4650 ++# qhasm: r1[0,1] += x4[0] unsigned* 5z12[2]; r1[2,3] += x4[1] unsigned* 5z12
[3] |
| 4651 ++# asm 1: vmlal.u32 <r1=reg128#15,<x4=reg128#11%bot,<5z12=reg128#1%top |
| 4652 ++# asm 2: vmlal.u32 <r1=q14,<x4=d20,<5z12=d1 |
| 4653 ++vmlal.u32 q14,d20,d1 |
| 4654 ++ |
| 4655 ++# qhasm: d23 = mem128[input_2];input_2+=16 |
| 4656 ++# asm 1: vld1.8 {>d23=reg128#2%bot->d23=reg128#2%top},[<input_2=int32#2]! |
| 4657 ++# asm 2: vld1.8 {>d23=d2->d23=d3},[<input_2=r1]! |
| 4658 ++vld1.8 {d2-d3},[r1]! |
| 4659 ++ |
| 4660 ++# qhasm: input_2 += 32 |
| 4661 ++# asm 1: add >input_2=int32#2,<input_2=int32#2,#32 |
| 4662 ++# asm 2: add >input_2=r1,<input_2=r1,#32 |
| 4663 ++add r1,r1,#32 |
| 4664 ++ |
| 4665 ++# qhasm: r0[0,1] += x4[0] unsigned* 5z12[0]; r0[2,3] += x4[1] unsigned* 5z12
[1] |
| 4666 ++# asm 1: vmlal.u32 <r0=reg128#8,<x4=reg128#11%bot,<5z12=reg128#1%bot |
| 4667 ++# asm 2: vmlal.u32 <r0=q7,<x4=d20,<5z12=d0 |
| 4668 ++vmlal.u32 q7,d20,d0 |
| 4669 ++ |
| 4670 ++# qhasm: r0[0,1] += x23[0] unsigned* 5z34[0]; r0[2,3] += x23[1] unsigned* 5z34
[1] |
| 4671 ++# asm 1: vmlal.u32 <r0=reg128#8,<x23=reg128#10%bot,<5z34=reg128#6%bot |
| 4672 ++# asm 2: vmlal.u32 <r0=q7,<x23=d18,<5z34=d10 |
| 4673 ++vmlal.u32 q7,d18,d10 |
| 4674 ++ |
| 4675 ++# qhasm: d01 d23 = d01[0] d23[0] d01[1] d23[1] |
| 4676 ++# asm 1: vswp <d23=reg128#2%bot,<d01=reg128#12%top |
| 4677 ++# asm 2: vswp <d23=d2,<d01=d23 |
| 4678 ++vswp d2,d23 |
| 4679 ++ |
| 4680 ++# qhasm: r0[0,1] += x23[2] unsigned* 5z12[2]; r0[2,3] += x23[3] unsigned* 5z12
[3] |
| 4681 ++# asm 1: vmlal.u32 <r0=reg128#8,<x23=reg128#10%top,<5z12=reg128#1%top |
| 4682 ++# asm 2: vmlal.u32 <r0=q7,<x23=d19,<5z12=d1 |
| 4683 ++vmlal.u32 q7,d19,d1 |
| 4684 ++ |
| 4685 ++# qhasm: r0[0,1] += x01[0] unsigned* z0[0]; r0[2,3] += x01[1] unsigned* z0[1
] |
| 4686 ++# asm 1: vmlal.u32 <r0=reg128#8,<x01=reg128#9%bot,<z0=reg128#4%bot |
| 4687 ++# asm 2: vmlal.u32 <r0=q7,<x01=d16,<z0=d6 |
| 4688 ++vmlal.u32 q7,d16,d6 |
| 4689 ++ |
| 4690 ++# qhasm: new mid |
| 4691 ++ |
| 4692 ++# qhasm: 2x v4 = d23 unsigned>> 40 |
| 4693 ++# asm 1: vshr.u64 >v4=reg128#4,<d23=reg128#2,#40 |
| 4694 ++# asm 2: vshr.u64 >v4=q3,<d23=q1,#40 |
| 4695 ++vshr.u64 q3,q1,#40 |
| 4696 ++ |
| 4697 ++# qhasm: mid = d01[1]d23[0] mid[2,3] |
| 4698 ++# asm 1: vext.32 <mid=reg128#1%bot,<d01=reg128#12%bot,<d23=reg128#2%bot,#1 |
| 4699 ++# asm 2: vext.32 <mid=d0,<d01=d22,<d23=d2,#1 |
| 4700 ++vext.32 d0,d22,d2,#1 |
| 4701 ++ |
| 4702 ++# qhasm: new v23 |
| 4703 ++ |
| 4704 ++# qhasm: v23[2] = d23[0,1] unsigned>> 14; v23[3] = d23[2,3] unsig
ned>> 14 |
| 4705 ++# asm 1: vshrn.u64 <v23=reg128#10%top,<d23=reg128#2,#14 |
| 4706 ++# asm 2: vshrn.u64 <v23=d19,<d23=q1,#14 |
| 4707 ++vshrn.u64 d19,q1,#14 |
| 4708 ++ |
| 4709 ++# qhasm: mid = mid[0,1] d01[3]d23[2] |
| 4710 ++# asm 1: vext.32 <mid=reg128#1%top,<d01=reg128#12%top,<d23=reg128#2%top,#1 |
| 4711 ++# asm 2: vext.32 <mid=d1,<d01=d23,<d23=d3,#1 |
| 4712 ++vext.32 d1,d23,d3,#1 |
| 4713 ++ |
| 4714 ++# qhasm: new v01 |
| 4715 ++ |
| 4716 ++# qhasm: v01[2] = d01[0,1] unsigned>> 26; v01[3] = d01[2,3] unsig
ned>> 26 |
| 4717 ++# asm 1: vshrn.u64 <v01=reg128#11%top,<d01=reg128#12,#26 |
| 4718 ++# asm 2: vshrn.u64 <v01=d21,<d01=q11,#26 |
| 4719 ++vshrn.u64 d21,q11,#26 |
| 4720 ++ |
| 4721 ++# qhasm: v01 = d01[1]d01[0] v01[2,3] |
| 4722 ++# asm 1: vext.32 <v01=reg128#11%bot,<d01=reg128#12%bot,<d01=reg128#12%bot,#1 |
| 4723 ++# asm 2: vext.32 <v01=d20,<d01=d22,<d01=d22,#1 |
| 4724 ++vext.32 d20,d22,d22,#1 |
| 4725 ++ |
| 4726 ++# qhasm: r0[0,1] += x01[2] unsigned* 5z34[2]; r0[2,3] += x01[3] unsigned* 5z
34[3] |
| 4727 ++# asm 1: vmlal.u32 <r0=reg128#8,<x01=reg128#9%top,<5z34=reg128#6%top |
| 4728 ++# asm 2: vmlal.u32 <r0=q7,<x01=d17,<5z34=d11 |
| 4729 ++vmlal.u32 q7,d17,d11 |
| 4730 ++ |
| 4731 ++# qhasm: v01 = v01[1]d01[2] v01[2,3] |
| 4732 ++# asm 1: vext.32 <v01=reg128#11%bot,<v01=reg128#11%bot,<d01=reg128#12%top,#1 |
| 4733 ++# asm 2: vext.32 <v01=d20,<v01=d20,<d01=d23,#1 |
| 4734 ++vext.32 d20,d20,d23,#1 |
| 4735 ++ |
| 4736 ++# qhasm: v23[0] = mid[0,1] unsigned>> 20; v23[1] = mid[2,3] unsig
ned>> 20 |
| 4737 ++# asm 1: vshrn.u64 <v23=reg128#10%bot,<mid=reg128#1,#20 |
| 4738 ++# asm 2: vshrn.u64 <v23=d18,<mid=q0,#20 |
| 4739 ++vshrn.u64 d18,q0,#20 |
| 4740 ++ |
| 4741 ++# qhasm: v4 = v4[0]v4[2]v4[1]v4[3] |
| 4742 ++# asm 1: vtrn.32 <v4=reg128#4%bot,<v4=reg128#4%top |
| 4743 ++# asm 2: vtrn.32 <v4=d6,<v4=d7 |
| 4744 ++vtrn.32 d6,d7 |
| 4745 ++ |
| 4746 ++# qhasm: 4x v01 &= 0x03ffffff |
| 4747 ++# asm 1: vand.i32 <v01=reg128#11,#0x03ffffff |
| 4748 ++# asm 2: vand.i32 <v01=q10,#0x03ffffff |
| 4749 ++vand.i32 q10,#0x03ffffff |
| 4750 ++ |
| 4751 ++# qhasm: ptr = &y34_stack |
| 4752 ++# asm 1: lea >ptr=int32#3,<y34_stack=stack128#4 |
| 4753 ++# asm 2: lea >ptr=r2,<y34_stack=[sp,#48] |
| 4754 ++add r2,sp,#48 |
| 4755 ++ |
| 4756 ++# qhasm: y34 aligned= mem128[ptr] |
| 4757 ++# asm 1: vld1.8 {>y34=reg128#3%bot->y34=reg128#3%top},[<ptr=int32#3,: 128] |
| 4758 ++# asm 2: vld1.8 {>y34=d4->y34=d5},[<ptr=r2,: 128] |
| 4759 ++vld1.8 {d4-d5},[r2,: 128] |
| 4760 ++ |
| 4761 ++# qhasm: 4x v23 &= 0x03ffffff |
| 4762 ++# asm 1: vand.i32 <v23=reg128#10,#0x03ffffff |
| 4763 ++# asm 2: vand.i32 <v23=q9,#0x03ffffff |
| 4764 ++vand.i32 q9,#0x03ffffff |
| 4765 ++ |
| 4766 ++# qhasm: ptr = &y12_stack |
| 4767 ++# asm 1: lea >ptr=int32#3,<y12_stack=stack128#3 |
| 4768 ++# asm 2: lea >ptr=r2,<y12_stack=[sp,#32] |
| 4769 ++add r2,sp,#32 |
| 4770 ++ |
| 4771 ++# qhasm: y12 aligned= mem128[ptr] |
| 4772 ++# asm 1: vld1.8 {>y12=reg128#2%bot->y12=reg128#2%top},[<ptr=int32#3,: 128] |
| 4773 ++# asm 2: vld1.8 {>y12=d2->y12=d3},[<ptr=r2,: 128] |
| 4774 ++vld1.8 {d2-d3},[r2,: 128] |
| 4775 ++ |
| 4776 ++# qhasm: 4x v4 |= 0x01000000 |
| 4777 ++# asm 1: vorr.i32 <v4=reg128#4,#0x01000000 |
| 4778 ++# asm 2: vorr.i32 <v4=q3,#0x01000000 |
| 4779 ++vorr.i32 q3,#0x01000000 |
| 4780 ++ |
| 4781 ++# qhasm: ptr = &y0_stack |
| 4782 ++# asm 1: lea >ptr=int32#3,<y0_stack=stack128#2 |
| 4783 ++# asm 2: lea >ptr=r2,<y0_stack=[sp,#16] |
| 4784 ++add r2,sp,#16 |
| 4785 ++ |
| 4786 ++# qhasm: y0 aligned= mem128[ptr] |
| 4787 ++# asm 1: vld1.8 {>y0=reg128#1%bot->y0=reg128#1%top},[<ptr=int32#3,: 128] |
| 4788 ++# asm 2: vld1.8 {>y0=d0->y0=d1},[<ptr=r2,: 128] |
| 4789 ++vld1.8 {d0-d1},[r2,: 128] |
| 4790 ++ |
| 4791 ++# qhasm: r4[0,1] += v01[0] unsigned* y34[2]; r4[2,3] += v01[1] unsigned* y3
4[3] |
| 4792 ++# asm 1: vmlal.u32 <r4=reg128#16,<v01=reg128#11%bot,<y34=reg128#3%top |
| 4793 ++# asm 2: vmlal.u32 <r4=q15,<v01=d20,<y34=d5 |
| 4794 ++vmlal.u32 q15,d20,d5 |
| 4795 ++ |
| 4796 ++# qhasm: r4[0,1] += v01[2] unsigned* y34[0]; r4[2,3] += v01[3] unsigned* y34[
1] |
| 4797 ++# asm 1: vmlal.u32 <r4=reg128#16,<v01=reg128#11%top,<y34=reg128#3%bot |
| 4798 ++# asm 2: vmlal.u32 <r4=q15,<v01=d21,<y34=d4 |
| 4799 ++vmlal.u32 q15,d21,d4 |
| 4800 ++ |
| 4801 ++# qhasm: r4[0,1] += v23[0] unsigned* y12[2]; r4[2,3] += v23[1] unsigned* y12[
3] |
| 4802 ++# asm 1: vmlal.u32 <r4=reg128#16,<v23=reg128#10%bot,<y12=reg128#2%top |
| 4803 ++# asm 2: vmlal.u32 <r4=q15,<v23=d18,<y12=d3 |
| 4804 ++vmlal.u32 q15,d18,d3 |
| 4805 ++ |
| 4806 ++# qhasm: r4[0,1] += v23[2] unsigned* y12[0]; r4[2,3] += v23[3] unsigned* y12[
1] |
| 4807 ++# asm 1: vmlal.u32 <r4=reg128#16,<v23=reg128#10%top,<y12=reg128#2%bot |
| 4808 ++# asm 2: vmlal.u32 <r4=q15,<v23=d19,<y12=d2 |
| 4809 ++vmlal.u32 q15,d19,d2 |
| 4810 ++ |
| 4811 ++# qhasm: r4[0,1] += v4[0] unsigned* y0[0]; r4[2,3] += v4[1] unsigned* y0[1] |
| 4812 ++# asm 1: vmlal.u32 <r4=reg128#16,<v4=reg128#4%bot,<y0=reg128#1%bot |
| 4813 ++# asm 2: vmlal.u32 <r4=q15,<v4=d6,<y0=d0 |
| 4814 ++vmlal.u32 q15,d6,d0 |
| 4815 ++ |
| 4816 ++# qhasm: ptr = &5y34_stack |
| 4817 ++# asm 1: lea >ptr=int32#3,<5y34_stack=stack128#6 |
| 4818 ++# asm 2: lea >ptr=r2,<5y34_stack=[sp,#80] |
| 4819 ++add r2,sp,#80 |
| 4820 ++ |
| 4821 ++# qhasm: 5y34 aligned= mem128[ptr] |
| 4822 ++# asm 1: vld1.8 {>5y34=reg128#13%bot->5y34=reg128#13%top},[<ptr=int32#3,: 128] |
| 4823 ++# asm 2: vld1.8 {>5y34=d24->5y34=d25},[<ptr=r2,: 128] |
| 4824 ++vld1.8 {d24-d25},[r2,: 128] |
| 4825 ++ |
| 4826 ++# qhasm: r3[0,1] += v01[0] unsigned* y34[0]; r3[2,3] += v01[1] unsigned* y34
[1] |
| 4827 ++# asm 1: vmlal.u32 <r3=reg128#5,<v01=reg128#11%bot,<y34=reg128#3%bot |
| 4828 ++# asm 2: vmlal.u32 <r3=q4,<v01=d20,<y34=d4 |
| 4829 ++vmlal.u32 q4,d20,d4 |
| 4830 ++ |
| 4831 ++# qhasm: r3[0,1] += v01[2] unsigned* y12[2]; r3[2,3] += v01[3] unsigned* y12
[3] |
| 4832 ++# asm 1: vmlal.u32 <r3=reg128#5,<v01=reg128#11%top,<y12=reg128#2%top |
| 4833 ++# asm 2: vmlal.u32 <r3=q4,<v01=d21,<y12=d3 |
| 4834 ++vmlal.u32 q4,d21,d3 |
| 4835 ++ |
| 4836 ++# qhasm: r3[0,1] += v23[0] unsigned* y12[0]; r3[2,3] += v23[1] unsigned* y12
[1] |
| 4837 ++# asm 1: vmlal.u32 <r3=reg128#5,<v23=reg128#10%bot,<y12=reg128#2%bot |
| 4838 ++# asm 2: vmlal.u32 <r3=q4,<v23=d18,<y12=d2 |
| 4839 ++vmlal.u32 q4,d18,d2 |
| 4840 ++ |
| 4841 ++# qhasm: r3[0,1] += v23[2] unsigned* y0[0]; r3[2,3] += v23[3] unsigned* y0[1
] |
| 4842 ++# asm 1: vmlal.u32 <r3=reg128#5,<v23=reg128#10%top,<y0=reg128#1%bot |
| 4843 ++# asm 2: vmlal.u32 <r3=q4,<v23=d19,<y0=d0 |
| 4844 ++vmlal.u32 q4,d19,d0 |
| 4845 ++ |
| 4846 ++# qhasm: r3[0,1] += v4[0] unsigned* 5y34[2]; r3[2,3] += v4[1] unsigned* 5y
34[3] |
| 4847 ++# asm 1: vmlal.u32 <r3=reg128#5,<v4=reg128#4%bot,<5y34=reg128#13%top |
| 4848 ++# asm 2: vmlal.u32 <r3=q4,<v4=d6,<5y34=d25 |
| 4849 ++vmlal.u32 q4,d6,d25 |
| 4850 ++ |
| 4851 ++# qhasm: ptr = &5y12_stack |
| 4852 ++# asm 1: lea >ptr=int32#3,<5y12_stack=stack128#5 |
| 4853 ++# asm 2: lea >ptr=r2,<5y12_stack=[sp,#64] |
| 4854 ++add r2,sp,#64 |
| 4855 ++ |
| 4856 ++# qhasm: 5y12 aligned= mem128[ptr] |
| 4857 ++# asm 1: vld1.8 {>5y12=reg128#12%bot->5y12=reg128#12%top},[<ptr=int32#3,: 128] |
| 4858 ++# asm 2: vld1.8 {>5y12=d22->5y12=d23},[<ptr=r2,: 128] |
| 4859 ++vld1.8 {d22-d23},[r2,: 128] |
| 4860 ++ |
| 4861 ++# qhasm: r0[0,1] += v4[0] unsigned* 5y12[0]; r0[2,3] += v4[1] unsigned* 5y12
[1] |
| 4862 ++# asm 1: vmlal.u32 <r0=reg128#8,<v4=reg128#4%bot,<5y12=reg128#12%bot |
| 4863 ++# asm 2: vmlal.u32 <r0=q7,<v4=d6,<5y12=d22 |
| 4864 ++vmlal.u32 q7,d6,d22 |
| 4865 ++ |
| 4866 ++# qhasm: r0[0,1] += v23[0] unsigned* 5y34[0]; r0[2,3] += v23[1] unsigned* 5y34
[1] |
| 4867 ++# asm 1: vmlal.u32 <r0=reg128#8,<v23=reg128#10%bot,<5y34=reg128#13%bot |
| 4868 ++# asm 2: vmlal.u32 <r0=q7,<v23=d18,<5y34=d24 |
| 4869 ++vmlal.u32 q7,d18,d24 |
| 4870 ++ |
| 4871 ++# qhasm: r0[0,1] += v23[2] unsigned* 5y12[2]; r0[2,3] += v23[3] unsigned* 5y12
[3] |
| 4872 ++# asm 1: vmlal.u32 <r0=reg128#8,<v23=reg128#10%top,<5y12=reg128#12%top |
| 4873 ++# asm 2: vmlal.u32 <r0=q7,<v23=d19,<5y12=d23 |
| 4874 ++vmlal.u32 q7,d19,d23 |
| 4875 ++ |
| 4876 ++# qhasm: r0[0,1] += v01[0] unsigned* y0[0]; r0[2,3] += v01[1] unsigned* y0[1
] |
| 4877 ++# asm 1: vmlal.u32 <r0=reg128#8,<v01=reg128#11%bot,<y0=reg128#1%bot |
| 4878 ++# asm 2: vmlal.u32 <r0=q7,<v01=d20,<y0=d0 |
| 4879 ++vmlal.u32 q7,d20,d0 |
| 4880 ++ |
| 4881 ++# qhasm: r0[0,1] += v01[2] unsigned* 5y34[2]; r0[2,3] += v01[3] unsigned* 5y
34[3] |
| 4882 ++# asm 1: vmlal.u32 <r0=reg128#8,<v01=reg128#11%top,<5y34=reg128#13%top |
| 4883 ++# asm 2: vmlal.u32 <r0=q7,<v01=d21,<5y34=d25 |
| 4884 ++vmlal.u32 q7,d21,d25 |
| 4885 ++ |
| 4886 ++# qhasm: r1[0,1] += v01[0] unsigned* y12[0]; r1[2,3] += v01[1] unsigned* y12
[1] |
| 4887 ++# asm 1: vmlal.u32 <r1=reg128#15,<v01=reg128#11%bot,<y12=reg128#2%bot |
| 4888 ++# asm 2: vmlal.u32 <r1=q14,<v01=d20,<y12=d2 |
| 4889 ++vmlal.u32 q14,d20,d2 |
| 4890 ++ |
| 4891 ++# qhasm: r1[0,1] += v01[2] unsigned* y0[0]; r1[2,3] += v01[3] unsigned* y0[1
] |
| 4892 ++# asm 1: vmlal.u32 <r1=reg128#15,<v01=reg128#11%top,<y0=reg128#1%bot |
| 4893 ++# asm 2: vmlal.u32 <r1=q14,<v01=d21,<y0=d0 |
| 4894 ++vmlal.u32 q14,d21,d0 |
| 4895 ++ |
| 4896 ++# qhasm: r1[0,1] += v23[0] unsigned* 5y34[2]; r1[2,3] += v23[1] unsigned* 5y
34[3] |
| 4897 ++# asm 1: vmlal.u32 <r1=reg128#15,<v23=reg128#10%bot,<5y34=reg128#13%top |
| 4898 ++# asm 2: vmlal.u32 <r1=q14,<v23=d18,<5y34=d25 |
| 4899 ++vmlal.u32 q14,d18,d25 |
| 4900 ++ |
| 4901 ++# qhasm: r1[0,1] += v23[2] unsigned* 5y34[0]; r1[2,3] += v23[3] unsigned* 5y34
[1] |
| 4902 ++# asm 1: vmlal.u32 <r1=reg128#15,<v23=reg128#10%top,<5y34=reg128#13%bot |
| 4903 ++# asm 2: vmlal.u32 <r1=q14,<v23=d19,<5y34=d24 |
| 4904 ++vmlal.u32 q14,d19,d24 |
| 4905 ++ |
| 4906 ++# qhasm: r1[0,1] += v4[0] unsigned* 5y12[2]; r1[2,3] += v4[1] unsigned* 5y12
[3] |
| 4907 ++# asm 1: vmlal.u32 <r1=reg128#15,<v4=reg128#4%bot,<5y12=reg128#12%top |
| 4908 ++# asm 2: vmlal.u32 <r1=q14,<v4=d6,<5y12=d23 |
| 4909 ++vmlal.u32 q14,d6,d23 |
| 4910 ++ |
| 4911 ++# qhasm: r2[0,1] += v01[0] unsigned* y12[2]; r2[2,3] += v01[1] unsigned* y12
[3] |
| 4912 ++# asm 1: vmlal.u32 <r2=reg128#14,<v01=reg128#11%bot,<y12=reg128#2%top |
| 4913 ++# asm 2: vmlal.u32 <r2=q13,<v01=d20,<y12=d3 |
| 4914 ++vmlal.u32 q13,d20,d3 |
| 4915 ++ |
| 4916 ++# qhasm: r2[0,1] += v01[2] unsigned* y12[0]; r2[2,3] += v01[3] unsigned* y12
[1] |
| 4917 ++# asm 1: vmlal.u32 <r2=reg128#14,<v01=reg128#11%top,<y12=reg128#2%bot |
| 4918 ++# asm 2: vmlal.u32 <r2=q13,<v01=d21,<y12=d2 |
| 4919 ++vmlal.u32 q13,d21,d2 |
| 4920 ++ |
| 4921 ++# qhasm: r2[0,1] += v23[0] unsigned* y0[0]; r2[2,3] += v23[1] unsigned* y0[1
] |
| 4922 ++# asm 1: vmlal.u32 <r2=reg128#14,<v23=reg128#10%bot,<y0=reg128#1%bot |
| 4923 ++# asm 2: vmlal.u32 <r2=q13,<v23=d18,<y0=d0 |
| 4924 ++vmlal.u32 q13,d18,d0 |
| 4925 ++ |
| 4926 ++# qhasm: r2[0,1] += v23[2] unsigned* 5y34[2]; r2[2,3] += v23[3] unsigned* 5y
34[3] |
| 4927 ++# asm 1: vmlal.u32 <r2=reg128#14,<v23=reg128#10%top,<5y34=reg128#13%top |
| 4928 ++# asm 2: vmlal.u32 <r2=q13,<v23=d19,<5y34=d25 |
| 4929 ++vmlal.u32 q13,d19,d25 |
| 4930 ++ |
| 4931 ++# qhasm: r2[0,1] += v4[0] unsigned* 5y34[0]; r2[2,3] += v4[1] unsigned* 5y34
[1] |
| 4932 ++# asm 1: vmlal.u32 <r2=reg128#14,<v4=reg128#4%bot,<5y34=reg128#13%bot |
| 4933 ++# asm 2: vmlal.u32 <r2=q13,<v4=d6,<5y34=d24 |
| 4934 ++vmlal.u32 q13,d6,d24 |
| 4935 ++ |
| 4936 ++# qhasm: ptr = &two24 |
| 4937 ++# asm 1: lea >ptr=int32#3,<two24=stack128#1 |
| 4938 ++# asm 2: lea >ptr=r2,<two24=[sp,#0] |
| 4939 ++add r2,sp,#0 |
| 4940 ++ |
| 4941 ++# qhasm: 2x t1 = r0 unsigned>> 26 |
| 4942 ++# asm 1: vshr.u64 >t1=reg128#4,<r0=reg128#8,#26 |
| 4943 ++# asm 2: vshr.u64 >t1=q3,<r0=q7,#26 |
| 4944 ++vshr.u64 q3,q7,#26 |
| 4945 ++ |
| 4946 ++# qhasm: len -= 64 |
| 4947 ++# asm 1: sub >len=int32#4,<len=int32#4,#64 |
| 4948 ++# asm 2: sub >len=r3,<len=r3,#64 |
| 4949 ++sub r3,r3,#64 |
| 4950 ++ |
| 4951 ++# qhasm: r0 &= mask |
| 4952 ++# asm 1: vand >r0=reg128#6,<r0=reg128#8,<mask=reg128#7 |
| 4953 ++# asm 2: vand >r0=q5,<r0=q7,<mask=q6 |
| 4954 ++vand q5,q7,q6 |
| 4955 ++ |
| 4956 ++# qhasm: 2x r1 += t1 |
| 4957 ++# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#15,<t1=reg128#4 |
| 4958 ++# asm 2: vadd.i64 >r1=q3,<r1=q14,<t1=q3 |
| 4959 ++vadd.i64 q3,q14,q3 |
| 4960 ++ |
| 4961 ++# qhasm: 2x t4 = r3 unsigned>> 26 |
| 4962 ++# asm 1: vshr.u64 >t4=reg128#8,<r3=reg128#5,#26 |
| 4963 ++# asm 2: vshr.u64 >t4=q7,<r3=q4,#26 |
| 4964 ++vshr.u64 q7,q4,#26 |
| 4965 ++ |
| 4966 ++# qhasm: r3 &= mask |
| 4967 ++# asm 1: vand >r3=reg128#5,<r3=reg128#5,<mask=reg128#7 |
| 4968 ++# asm 2: vand >r3=q4,<r3=q4,<mask=q6 |
| 4969 ++vand q4,q4,q6 |
| 4970 ++ |
| 4971 ++# qhasm: 2x x4 = r4 + t4 |
| 4972 ++# asm 1: vadd.i64 >x4=reg128#8,<r4=reg128#16,<t4=reg128#8 |
| 4973 ++# asm 2: vadd.i64 >x4=q7,<r4=q15,<t4=q7 |
| 4974 ++vadd.i64 q7,q15,q7 |
| 4975 ++ |
| 4976 ++# qhasm: r4 aligned= mem128[ptr] |
| 4977 ++# asm 1: vld1.8 {>r4=reg128#16%bot->r4=reg128#16%top},[<ptr=int32#3,: 128] |
| 4978 ++# asm 2: vld1.8 {>r4=d30->r4=d31},[<ptr=r2,: 128] |
| 4979 ++vld1.8 {d30-d31},[r2,: 128] |
| 4980 ++ |
| 4981 ++# qhasm: 2x t2 = r1 unsigned>> 26 |
| 4982 ++# asm 1: vshr.u64 >t2=reg128#9,<r1=reg128#4,#26 |
| 4983 ++# asm 2: vshr.u64 >t2=q8,<r1=q3,#26 |
| 4984 ++vshr.u64 q8,q3,#26 |
| 4985 ++ |
| 4986 ++# qhasm: r1 &= mask |
| 4987 ++# asm 1: vand >r1=reg128#4,<r1=reg128#4,<mask=reg128#7 |
| 4988 ++# asm 2: vand >r1=q3,<r1=q3,<mask=q6 |
| 4989 ++vand q3,q3,q6 |
| 4990 ++ |
| 4991 ++# qhasm: 2x t0 = x4 unsigned>> 26 |
| 4992 ++# asm 1: vshr.u64 >t0=reg128#10,<x4=reg128#8,#26 |
| 4993 ++# asm 2: vshr.u64 >t0=q9,<x4=q7,#26 |
| 4994 ++vshr.u64 q9,q7,#26 |
| 4995 ++ |
| 4996 ++# qhasm: 2x r2 += t2 |
| 4997 ++# asm 1: vadd.i64 >r2=reg128#9,<r2=reg128#14,<t2=reg128#9 |
| 4998 ++# asm 2: vadd.i64 >r2=q8,<r2=q13,<t2=q8 |
| 4999 ++vadd.i64 q8,q13,q8 |
| 5000 ++ |
| 5001 ++# qhasm: x4 &= mask |
| 5002 ++# asm 1: vand >x4=reg128#11,<x4=reg128#8,<mask=reg128#7 |
| 5003 ++# asm 2: vand >x4=q10,<x4=q7,<mask=q6 |
| 5004 ++vand q10,q7,q6 |
| 5005 ++ |
| 5006 ++# qhasm: 2x x01 = r0 + t0 |
| 5007 ++# asm 1: vadd.i64 >x01=reg128#6,<r0=reg128#6,<t0=reg128#10 |
| 5008 ++# asm 2: vadd.i64 >x01=q5,<r0=q5,<t0=q9 |
| 5009 ++vadd.i64 q5,q5,q9 |
| 5010 ++ |
| 5011 ++# qhasm: r0 aligned= mem128[ptr] |
| 5012 ++# asm 1: vld1.8 {>r0=reg128#8%bot->r0=reg128#8%top},[<ptr=int32#3,: 128] |
| 5013 ++# asm 2: vld1.8 {>r0=d14->r0=d15},[<ptr=r2,: 128] |
| 5014 ++vld1.8 {d14-d15},[r2,: 128] |
| 5015 ++ |
| 5016 ++# qhasm: ptr = &z34_stack |
| 5017 ++# asm 1: lea >ptr=int32#3,<z34_stack=stack128#9 |
| 5018 ++# asm 2: lea >ptr=r2,<z34_stack=[sp,#128] |
| 5019 ++add r2,sp,#128 |
| 5020 ++ |
| 5021 ++# qhasm: 2x t0 <<= 2 |
| 5022 ++# asm 1: vshl.i64 >t0=reg128#10,<t0=reg128#10,#2 |
| 5023 ++# asm 2: vshl.i64 >t0=q9,<t0=q9,#2 |
| 5024 ++vshl.i64 q9,q9,#2 |
| 5025 ++ |
| 5026 ++# qhasm: 2x t3 = r2 unsigned>> 26 |
| 5027 ++# asm 1: vshr.u64 >t3=reg128#14,<r2=reg128#9,#26 |
| 5028 ++# asm 2: vshr.u64 >t3=q13,<r2=q8,#26 |
| 5029 ++vshr.u64 q13,q8,#26 |
| 5030 ++ |
| 5031 ++# qhasm: 2x x01 += t0 |
| 5032 ++# asm 1: vadd.i64 >x01=reg128#15,<x01=reg128#6,<t0=reg128#10 |
| 5033 ++# asm 2: vadd.i64 >x01=q14,<x01=q5,<t0=q9 |
| 5034 ++vadd.i64 q14,q5,q9 |
| 5035 ++ |
| 5036 ++# qhasm: z34 aligned= mem128[ptr] |
| 5037 ++# asm 1: vld1.8 {>z34=reg128#6%bot->z34=reg128#6%top},[<ptr=int32#3,: 128] |
| 5038 ++# asm 2: vld1.8 {>z34=d10->z34=d11},[<ptr=r2,: 128] |
| 5039 ++vld1.8 {d10-d11},[r2,: 128] |
| 5040 ++ |
| 5041 ++# qhasm: x23 = r2 & mask |
| 5042 ++# asm 1: vand >x23=reg128#10,<r2=reg128#9,<mask=reg128#7 |
| 5043 ++# asm 2: vand >x23=q9,<r2=q8,<mask=q6 |
| 5044 ++vand q9,q8,q6 |
| 5045 ++ |
| 5046 ++# qhasm: 2x r3 += t3 |
| 5047 ++# asm 1: vadd.i64 >r3=reg128#5,<r3=reg128#5,<t3=reg128#14 |
| 5048 ++# asm 2: vadd.i64 >r3=q4,<r3=q4,<t3=q13 |
| 5049 ++vadd.i64 q4,q4,q13 |
| 5050 ++ |
| 5051 ++# qhasm: input_2
+= 32 |
| 5052 ++# asm 1: add >input_2=int32#2,<input_2=int32#2,#32 |
| 5053 ++# asm 2: add >input_2=r1,<input_2=r1,#32 |
| 5054 ++add r1,r1,#32 |
| 5055 ++ |
| 5056 ++# qhasm: 2x t1 = x01 unsigned>> 26 |
| 5057 ++# asm 1: vshr.u64 >t1=reg128#14,<x01=reg128#15,#26 |
| 5058 ++# asm 2: vshr.u64 >t1=q13,<x01=q14,#26 |
| 5059 ++vshr.u64 q13,q14,#26 |
| 5060 ++ |
| 5061 ++# qhasm: x23 = x23[0,2,1,3] |
| 5062 ++# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top |
| 5063 ++# asm 2: vtrn.32 <x23=d18,<x23=d19 |
| 5064 ++vtrn.32 d18,d19 |
| 5065 ++ |
| 5066 ++# qhasm: x01 = x01 & mask |
| 5067 ++# asm 1: vand >x01=reg128#9,<x01=reg128#15,<mask=reg128#7 |
| 5068 ++# asm 2: vand >x01=q8,<x01=q14,<mask=q6 |
| 5069 ++vand q8,q14,q6 |
| 5070 ++ |
| 5071 ++# qhasm: 2x r1 += t1 |
| 5072 ++# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#4,<t1=reg128#14 |
| 5073 ++# asm 2: vadd.i64 >r1=q3,<r1=q3,<t1=q13 |
| 5074 ++vadd.i64 q3,q3,q13 |
| 5075 ++ |
| 5076 ++# qhasm: 2x t4 = r3 unsigned>> 26 |
| 5077 ++# asm 1: vshr.u64 >t4=reg128#14,<r3=reg128#5,#26 |
| 5078 ++# asm 2: vshr.u64 >t4=q13,<r3=q4,#26 |
| 5079 ++vshr.u64 q13,q4,#26 |
| 5080 ++ |
| 5081 ++# qhasm: x01 = x01[0,2,1,3] |
| 5082 ++# asm 1: vtrn.32 <x01=reg128#9%bot,<x01=reg128#9%top |
| 5083 ++# asm 2: vtrn.32 <x01=d16,<x01=d17 |
| 5084 ++vtrn.32 d16,d17 |
| 5085 ++ |
| 5086 ++# qhasm: r3 &= mask |
| 5087 ++# asm 1: vand >r3=reg128#5,<r3=reg128#5,<mask=reg128#7 |
| 5088 ++# asm 2: vand >r3=q4,<r3=q4,<mask=q6 |
| 5089 ++vand q4,q4,q6 |
| 5090 ++ |
| 5091 ++# qhasm: r1 = r1[0,2,1,3] |
| 5092 ++# asm 1: vtrn.32 <r1=reg128#4%bot,<r1=reg128#4%top |
| 5093 ++# asm 2: vtrn.32 <r1=d6,<r1=d7 |
| 5094 ++vtrn.32 d6,d7 |
| 5095 ++ |
| 5096 ++# qhasm: 2x x4 += t4 |
| 5097 ++# asm 1: vadd.i64 >x4=reg128#11,<x4=reg128#11,<t4=reg128#14 |
| 5098 ++# asm 2: vadd.i64 >x4=q10,<x4=q10,<t4=q13 |
| 5099 ++vadd.i64 q10,q10,q13 |
| 5100 ++ |
| 5101 ++# qhasm: r3 = r3[0,2,1,3] |
| 5102 ++# asm 1: vtrn.32 <r3=reg128#5%bot,<r3=reg128#5%top |
| 5103 ++# asm 2: vtrn.32 <r3=d8,<r3=d9 |
| 5104 ++vtrn.32 d8,d9 |
| 5105 ++ |
| 5106 ++# qhasm: x01 = x01[0,1] r1[0,1] |
| 5107 ++# asm 1: vext.32 <x01=reg128#9%top,<r1=reg128#4%bot,<r1=reg128#4%bot,#0 |
| 5108 ++# asm 2: vext.32 <x01=d17,<r1=d6,<r1=d6,#0 |
| 5109 ++vext.32 d17,d6,d6,#0 |
| 5110 ++ |
| 5111 ++# qhasm: x23 = x23[0,1] r3[0,1] |
| 5112 ++# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#5%bot,<r3=reg128#5%bot,#0 |
| 5113 ++# asm 2: vext.32 <x23=d19,<r3=d8,<r3=d8,#0 |
| 5114 ++vext.32 d19,d8,d8,#0 |
| 5115 ++ |
| 5116 ++# qhasm: x4 = x4[0,2,1,3] |
| 5117 ++# asm 1: vtrn.32 <x4=reg128#11%bot,<x4=reg128#11%top |
| 5118 ++# asm 2: vtrn.32 <x4=d20,<x4=d21 |
| 5119 ++vtrn.32 d20,d21 |
| 5120 ++ |
| 5121 ++# qhasm: unsigned>? len - 64 |
| 5122 ++# asm 1: cmp <len=int32#4,#64 |
| 5123 ++# asm 2: cmp <len=r3,#64 |
| 5124 ++cmp r3,#64 |
| 5125 ++ |
| 5126 ++# qhasm: goto mainloop2 if unsigned> |
| 5127 ++bhi ._mainloop2 |
| 5128 ++ |
| 5129 ++# qhasm: input_2 -= 32 |
| 5130 ++# asm 1: sub >input_2=int32#3,<input_2=int32#2,#32 |
| 5131 ++# asm 2: sub >input_2=r2,<input_2=r1,#32 |
| 5132 ++sub r2,r1,#32 |
| 5133 ++ |
| 5134 ++# qhasm: below64bytes: |
| 5135 ++._below64bytes: |
| 5136 ++ |
| 5137 ++# qhasm: unsigned>? len - 32 |
| 5138 ++# asm 1: cmp <len=int32#4,#32 |
| 5139 ++# asm 2: cmp <len=r3,#32 |
| 5140 ++cmp r3,#32 |
| 5141 ++ |
| 5142 ++# qhasm: goto end if !unsigned> |
| 5143 ++bls ._end |
| 5144 ++ |
| 5145 ++# qhasm: mainloop: |
| 5146 ++._mainloop: |
| 5147 ++ |
| 5148 ++# qhasm: new r0 |
| 5149 ++ |
| 5150 ++# qhasm: ptr = &two24 |
| 5151 ++# asm 1: lea >ptr=int32#2,<two24=stack128#1 |
| 5152 ++# asm 2: lea >ptr=r1,<two24=[sp,#0] |
| 5153 ++add r1,sp,#0 |
| 5154 ++ |
| 5155 ++# qhasm: r4 aligned= mem128[ptr] |
| 5156 ++# asm 1: vld1.8 {>r4=reg128#5%bot->r4=reg128#5%top},[<ptr=int32#2,: 128] |
| 5157 ++# asm 2: vld1.8 {>r4=d8->r4=d9},[<ptr=r1,: 128] |
| 5158 ++vld1.8 {d8-d9},[r1,: 128] |
| 5159 ++ |
| 5160 ++# qhasm: u4 aligned= mem128[ptr] |
| 5161 ++# asm 1: vld1.8 {>u4=reg128#6%bot->u4=reg128#6%top},[<ptr=int32#2,: 128] |
| 5162 ++# asm 2: vld1.8 {>u4=d10->u4=d11},[<ptr=r1,: 128] |
| 5163 ++vld1.8 {d10-d11},[r1,: 128] |
| 5164 ++ |
| 5165 ++# qhasm: c01 = mem128[input_2];input_2+=16 |
| 5166 ++# asm 1: vld1.8 {>c01=reg128#8%bot->c01=reg128#8%top},[<input_2=int32#3]! |
| 5167 ++# asm 2: vld1.8 {>c01=d14->c01=d15},[<input_2=r2]! |
| 5168 ++vld1.8 {d14-d15},[r2]! |
| 5169 ++ |
| 5170 ++# qhasm: r4[0,1] += x01[0] unsigned* y34[2]; r4[2,3] += x01[1] unsigned* y3
4[3] |
| 5171 ++# asm 1: vmlal.u32 <r4=reg128#5,<x01=reg128#9%bot,<y34=reg128#3%top |
| 5172 ++# asm 2: vmlal.u32 <r4=q4,<x01=d16,<y34=d5 |
| 5173 ++vmlal.u32 q4,d16,d5 |
| 5174 ++ |
| 5175 ++# qhasm: c23 = mem128[input_2];input_2+=16 |
| 5176 ++# asm 1: vld1.8 {>c23=reg128#14%bot->c23=reg128#14%top},[<input_2=int32#3]! |
| 5177 ++# asm 2: vld1.8 {>c23=d26->c23=d27},[<input_2=r2]! |
| 5178 ++vld1.8 {d26-d27},[r2]! |
| 5179 ++ |
| 5180 ++# qhasm: r4[0,1] += x01[2] unsigned* y34[0]; r4[2,3] += x01[3] unsigned* y34[
1] |
| 5181 ++# asm 1: vmlal.u32 <r4=reg128#5,<x01=reg128#9%top,<y34=reg128#3%bot |
| 5182 ++# asm 2: vmlal.u32 <r4=q4,<x01=d17,<y34=d4 |
| 5183 ++vmlal.u32 q4,d17,d4 |
| 5184 ++ |
| 5185 ++# qhasm: r0 = u4[1]c01[0]r0[2,3] |
| 5186 ++# asm 1: vext.32 <r0=reg128#4%bot,<u4=reg128#6%bot,<c01=reg128#8%bot,#1 |
| 5187 ++# asm 2: vext.32 <r0=d6,<u4=d10,<c01=d14,#1 |
| 5188 ++vext.32 d6,d10,d14,#1 |
| 5189 ++ |
| 5190 ++# qhasm: r4[0,1] += x23[0] unsigned* y12[2]; r4[2,3] += x23[1] unsigned* y12[
3] |
| 5191 ++# asm 1: vmlal.u32 <r4=reg128#5,<x23=reg128#10%bot,<y12=reg128#2%top |
| 5192 ++# asm 2: vmlal.u32 <r4=q4,<x23=d18,<y12=d3 |
| 5193 ++vmlal.u32 q4,d18,d3 |
| 5194 ++ |
| 5195 ++# qhasm: r0 = r0[0,1]u4[1]c23[0] |
| 5196 ++# asm 1: vext.32 <r0=reg128#4%top,<u4=reg128#6%bot,<c23=reg128#14%bot,#1 |
| 5197 ++# asm 2: vext.32 <r0=d7,<u4=d10,<c23=d26,#1 |
| 5198 ++vext.32 d7,d10,d26,#1 |
| 5199 ++ |
| 5200 ++# qhasm: r4[0,1] += x23[2] unsigned* y12[0]; r4[2,3] += x23[3] unsigned* y12[
1] |
| 5201 ++# asm 1: vmlal.u32 <r4=reg128#5,<x23=reg128#10%top,<y12=reg128#2%bot |
| 5202 ++# asm 2: vmlal.u32 <r4=q4,<x23=d19,<y12=d2 |
| 5203 ++vmlal.u32 q4,d19,d2 |
| 5204 ++ |
| 5205 ++# qhasm: r0 = r0[1]r0[0]r0[3]r0[2] |
| 5206 ++# asm 1: vrev64.i32 >r0=reg128#4,<r0=reg128#4 |
| 5207 ++# asm 2: vrev64.i32 >r0=q3,<r0=q3 |
| 5208 ++vrev64.i32 q3,q3 |
| 5209 ++ |
| 5210 ++# qhasm: r4[0,1] += x4[0] unsigned* y0[0]; r4[2,3] += x4[1] unsigned* y0[1] |
| 5211 ++# asm 1: vmlal.u32 <r4=reg128#5,<x4=reg128#11%bot,<y0=reg128#1%bot |
| 5212 ++# asm 2: vmlal.u32 <r4=q4,<x4=d20,<y0=d0 |
| 5213 ++vmlal.u32 q4,d20,d0 |
| 5214 ++ |
| 5215 ++# qhasm: r0[0,1] += x4[0] unsigned* 5y12[0]; r0[2,3] += x4[1] unsigned* 5y12
[1] |
| 5216 ++# asm 1: vmlal.u32 <r0=reg128#4,<x4=reg128#11%bot,<5y12=reg128#12%bot |
| 5217 ++# asm 2: vmlal.u32 <r0=q3,<x4=d20,<5y12=d22 |
| 5218 ++vmlal.u32 q3,d20,d22 |
| 5219 ++ |
| 5220 ++# qhasm: r0[0,1] += x23[0] unsigned* 5y34[0]; r0[2,3] += x23[1] unsigned* 5y34
[1] |
| 5221 ++# asm 1: vmlal.u32 <r0=reg128#4,<x23=reg128#10%bot,<5y34=reg128#13%bot |
| 5222 ++# asm 2: vmlal.u32 <r0=q3,<x23=d18,<5y34=d24 |
| 5223 ++vmlal.u32 q3,d18,d24 |
| 5224 ++ |
| 5225 ++# qhasm: r0[0,1] += x23[2] unsigned* 5y12[2]; r0[2,3] += x23[3] unsigned* 5y12
[3] |
| 5226 ++# asm 1: vmlal.u32 <r0=reg128#4,<x23=reg128#10%top,<5y12=reg128#12%top |
| 5227 ++# asm 2: vmlal.u32 <r0=q3,<x23=d19,<5y12=d23 |
| 5228 ++vmlal.u32 q3,d19,d23 |
| 5229 ++ |
| 5230 ++# qhasm: c01 c23 = c01[0]c23[0]c01[2]c23[2]c01[1]c23[1]c01[3]c23[3] |
| 5231 ++# asm 1: vtrn.32 <c01=reg128#8,<c23=reg128#14 |
| 5232 ++# asm 2: vtrn.32 <c01=q7,<c23=q13 |
| 5233 ++vtrn.32 q7,q13 |
| 5234 ++ |
| 5235 ++# qhasm: r0[0,1] += x01[0] unsigned* y0[0]; r0[2,3] += x01[1] unsigned* y0[1
] |
| 5236 ++# asm 1: vmlal.u32 <r0=reg128#4,<x01=reg128#9%bot,<y0=reg128#1%bot |
| 5237 ++# asm 2: vmlal.u32 <r0=q3,<x01=d16,<y0=d0 |
| 5238 ++vmlal.u32 q3,d16,d0 |
| 5239 ++ |
| 5240 ++# qhasm: r3[0,1] = c23[2]<<18; r3[2,3] = c23[3]<<18 |
| 5241 ++# asm 1: vshll.u32 >r3=reg128#6,<c23=reg128#14%top,#18 |
| 5242 ++# asm 2: vshll.u32 >r3=q5,<c23=d27,#18 |
| 5243 ++vshll.u32 q5,d27,#18 |
| 5244 ++ |
| 5245 ++# qhasm: r0[0,1] += x01[2] unsigned* 5y34[2]; r0[2,3] += x01[3] unsigned* 5y
34[3] |
| 5246 ++# asm 1: vmlal.u32 <r0=reg128#4,<x01=reg128#9%top,<5y34=reg128#13%top |
| 5247 ++# asm 2: vmlal.u32 <r0=q3,<x01=d17,<5y34=d25 |
| 5248 ++vmlal.u32 q3,d17,d25 |
| 5249 ++ |
| 5250 ++# qhasm: r3[0,1] += x01[0] unsigned* y34[0]; r3[2,3] += x01[1] unsigned* y34
[1] |
| 5251 ++# asm 1: vmlal.u32 <r3=reg128#6,<x01=reg128#9%bot,<y34=reg128#3%bot |
| 5252 ++# asm 2: vmlal.u32 <r3=q5,<x01=d16,<y34=d4 |
| 5253 ++vmlal.u32 q5,d16,d4 |
| 5254 ++ |
| 5255 ++# qhasm: r3[0,1] += x01[2] unsigned* y12[2]; r3[2,3] += x01[3] unsigned* y12
[3] |
| 5256 ++# asm 1: vmlal.u32 <r3=reg128#6,<x01=reg128#9%top,<y12=reg128#2%top |
| 5257 ++# asm 2: vmlal.u32 <r3=q5,<x01=d17,<y12=d3 |
| 5258 ++vmlal.u32 q5,d17,d3 |
| 5259 ++ |
| 5260 ++# qhasm: r3[0,1] += x23[0] unsigned* y12[0]; r3[2,3] += x23[1] unsigned* y12
[1] |
| 5261 ++# asm 1: vmlal.u32 <r3=reg128#6,<x23=reg128#10%bot,<y12=reg128#2%bot |
| 5262 ++# asm 2: vmlal.u32 <r3=q5,<x23=d18,<y12=d2 |
| 5263 ++vmlal.u32 q5,d18,d2 |
| 5264 ++ |
| 5265 ++# qhasm: r3[0,1] += x23[2] unsigned* y0[0]; r3[2,3] += x23[3] unsigned* y0[1
] |
| 5266 ++# asm 1: vmlal.u32 <r3=reg128#6,<x23=reg128#10%top,<y0=reg128#1%bot |
| 5267 ++# asm 2: vmlal.u32 <r3=q5,<x23=d19,<y0=d0 |
| 5268 ++vmlal.u32 q5,d19,d0 |
| 5269 ++ |
| 5270 ++# qhasm: r1[0,1] = c23[0]<<6; r1[2,3] = c23[1]<<6 |
| 5271 ++# asm 1: vshll.u32 >r1=reg128#14,<c23=reg128#14%bot,#6 |
| 5272 ++# asm 2: vshll.u32 >r1=q13,<c23=d26,#6 |
| 5273 ++vshll.u32 q13,d26,#6 |
| 5274 ++ |
| 5275 ++# qhasm: r3[0,1] += x4[0] unsigned* 5y34[2]; r3[2,3] += x4[1] unsigned* 5y
34[3] |
| 5276 ++# asm 1: vmlal.u32 <r3=reg128#6,<x4=reg128#11%bot,<5y34=reg128#13%top |
| 5277 ++# asm 2: vmlal.u32 <r3=q5,<x4=d20,<5y34=d25 |
| 5278 ++vmlal.u32 q5,d20,d25 |
| 5279 ++ |
| 5280 ++# qhasm: r1[0,1] += x01[0] unsigned* y12[0]; r1[2,3] += x01[1] unsigned* y12
[1] |
| 5281 ++# asm 1: vmlal.u32 <r1=reg128#14,<x01=reg128#9%bot,<y12=reg128#2%bot |
| 5282 ++# asm 2: vmlal.u32 <r1=q13,<x01=d16,<y12=d2 |
| 5283 ++vmlal.u32 q13,d16,d2 |
| 5284 ++ |
| 5285 ++# qhasm: r1[0,1] += x01[2] unsigned* y0[0]; r1[2,3] += x01[3] unsigned* y0[1
] |
| 5286 ++# asm 1: vmlal.u32 <r1=reg128#14,<x01=reg128#9%top,<y0=reg128#1%bot |
| 5287 ++# asm 2: vmlal.u32 <r1=q13,<x01=d17,<y0=d0 |
| 5288 ++vmlal.u32 q13,d17,d0 |
| 5289 ++ |
| 5290 ++# qhasm: r1[0,1] += x23[0] unsigned* 5y34[2]; r1[2,3] += x23[1] unsigned* 5y
34[3] |
| 5291 ++# asm 1: vmlal.u32 <r1=reg128#14,<x23=reg128#10%bot,<5y34=reg128#13%top |
| 5292 ++# asm 2: vmlal.u32 <r1=q13,<x23=d18,<5y34=d25 |
| 5293 ++vmlal.u32 q13,d18,d25 |
| 5294 ++ |
| 5295 ++# qhasm: r1[0,1] += x23[2] unsigned* 5y34[0]; r1[2,3] += x23[3] unsigned* 5y34
[1] |
| 5296 ++# asm 1: vmlal.u32 <r1=reg128#14,<x23=reg128#10%top,<5y34=reg128#13%bot |
| 5297 ++# asm 2: vmlal.u32 <r1=q13,<x23=d19,<5y34=d24 |
| 5298 ++vmlal.u32 q13,d19,d24 |
| 5299 ++ |
| 5300 ++# qhasm: r2[0,1] = c01[2]<<12; r2[2,3] = c01[3]<<12 |
| 5301 ++# asm 1: vshll.u32 >r2=reg128#8,<c01=reg128#8%top,#12 |
| 5302 ++# asm 2: vshll.u32 >r2=q7,<c01=d15,#12 |
| 5303 ++vshll.u32 q7,d15,#12 |
| 5304 ++ |
| 5305 ++# qhasm: r1[0,1] += x4[0] unsigned* 5y12[2]; r1[2,3] += x4[1] unsigned* 5y12
[3] |
| 5306 ++# asm 1: vmlal.u32 <r1=reg128#14,<x4=reg128#11%bot,<5y12=reg128#12%top |
| 5307 ++# asm 2: vmlal.u32 <r1=q13,<x4=d20,<5y12=d23 |
| 5308 ++vmlal.u32 q13,d20,d23 |
| 5309 ++ |
| 5310 ++# qhasm: r2[0,1] += x01[0] unsigned* y12[2]; r2[2,3] += x01[1] unsigned* y12
[3] |
| 5311 ++# asm 1: vmlal.u32 <r2=reg128#8,<x01=reg128#9%bot,<y12=reg128#2%top |
| 5312 ++# asm 2: vmlal.u32 <r2=q7,<x01=d16,<y12=d3 |
| 5313 ++vmlal.u32 q7,d16,d3 |
| 5314 ++ |
| 5315 ++# qhasm: r2[0,1] += x01[2] unsigned* y12[0]; r2[2,3] += x01[3] unsigned* y12
[1] |
| 5316 ++# asm 1: vmlal.u32 <r2=reg128#8,<x01=reg128#9%top,<y12=reg128#2%bot |
| 5317 ++# asm 2: vmlal.u32 <r2=q7,<x01=d17,<y12=d2 |
| 5318 ++vmlal.u32 q7,d17,d2 |
| 5319 ++ |
| 5320 ++# qhasm: r2[0,1] += x23[0] unsigned* y0[0]; r2[2,3] += x23[1] unsigned* y0[1
] |
| 5321 ++# asm 1: vmlal.u32 <r2=reg128#8,<x23=reg128#10%bot,<y0=reg128#1%bot |
| 5322 ++# asm 2: vmlal.u32 <r2=q7,<x23=d18,<y0=d0 |
| 5323 ++vmlal.u32 q7,d18,d0 |
| 5324 ++ |
| 5325 ++# qhasm: r2[0,1] += x23[2] unsigned* 5y34[2]; r2[2,3] += x23[3] unsigned* 5y
34[3] |
| 5326 ++# asm 1: vmlal.u32 <r2=reg128#8,<x23=reg128#10%top,<5y34=reg128#13%top |
| 5327 ++# asm 2: vmlal.u32 <r2=q7,<x23=d19,<5y34=d25 |
| 5328 ++vmlal.u32 q7,d19,d25 |
| 5329 ++ |
| 5330 ++# qhasm: r2[0,1] += x4[0] unsigned* 5y34[0]; r2[2,3] += x4[1] unsigned* 5y34
[1] |
| 5331 ++# asm 1: vmlal.u32 <r2=reg128#8,<x4=reg128#11%bot,<5y34=reg128#13%bot |
| 5332 ++# asm 2: vmlal.u32 <r2=q7,<x4=d20,<5y34=d24 |
| 5333 ++vmlal.u32 q7,d20,d24 |
| 5334 ++ |
| 5335 ++# qhasm: 2x t1 = r0 unsigned>> 26 |
| 5336 ++# asm 1: vshr.u64 >t1=reg128#9,<r0=reg128#4,#26 |
| 5337 ++# asm 2: vshr.u64 >t1=q8,<r0=q3,#26 |
| 5338 ++vshr.u64 q8,q3,#26 |
| 5339 ++ |
| 5340 ++# qhasm: r0 &= mask |
| 5341 ++# asm 1: vand >r0=reg128#4,<r0=reg128#4,<mask=reg128#7 |
| 5342 ++# asm 2: vand >r0=q3,<r0=q3,<mask=q6 |
| 5343 ++vand q3,q3,q6 |
| 5344 ++ |
| 5345 ++# qhasm: 2x r1 += t1 |
| 5346 ++# asm 1: vadd.i64 >r1=reg128#9,<r1=reg128#14,<t1=reg128#9 |
| 5347 ++# asm 2: vadd.i64 >r1=q8,<r1=q13,<t1=q8 |
| 5348 ++vadd.i64 q8,q13,q8 |
| 5349 ++ |
| 5350 ++# qhasm: 2x t4 = r3 unsigned>> 26 |
| 5351 ++# asm 1: vshr.u64 >t4=reg128#10,<r3=reg128#6,#26 |
| 5352 ++# asm 2: vshr.u64 >t4=q9,<r3=q5,#26 |
| 5353 ++vshr.u64 q9,q5,#26 |
| 5354 ++ |
| 5355 ++# qhasm: r3 &= mask |
| 5356 ++# asm 1: vand >r3=reg128#6,<r3=reg128#6,<mask=reg128#7 |
| 5357 ++# asm 2: vand >r3=q5,<r3=q5,<mask=q6 |
| 5358 ++vand q5,q5,q6 |
| 5359 ++ |
| 5360 ++# qhasm: 2x r4 += t4 |
| 5361 ++# asm 1: vadd.i64 >r4=reg128#5,<r4=reg128#5,<t4=reg128#10 |
| 5362 ++# asm 2: vadd.i64 >r4=q4,<r4=q4,<t4=q9 |
| 5363 ++vadd.i64 q4,q4,q9 |
| 5364 ++ |
| 5365 ++# qhasm: 2x t2 = r1 unsigned>> 26 |
| 5366 ++# asm 1: vshr.u64 >t2=reg128#10,<r1=reg128#9,#26 |
| 5367 ++# asm 2: vshr.u64 >t2=q9,<r1=q8,#26 |
| 5368 ++vshr.u64 q9,q8,#26 |
| 5369 ++ |
| 5370 ++# qhasm: r1 &= mask |
| 5371 ++# asm 1: vand >r1=reg128#11,<r1=reg128#9,<mask=reg128#7 |
| 5372 ++# asm 2: vand >r1=q10,<r1=q8,<mask=q6 |
| 5373 ++vand q10,q8,q6 |
| 5374 ++ |
| 5375 ++# qhasm: 2x t0 = r4 unsigned>> 26 |
| 5376 ++# asm 1: vshr.u64 >t0=reg128#9,<r4=reg128#5,#26 |
| 5377 ++# asm 2: vshr.u64 >t0=q8,<r4=q4,#26 |
| 5378 ++vshr.u64 q8,q4,#26 |
| 5379 ++ |
| 5380 ++# qhasm: 2x r2 += t2 |
| 5381 ++# asm 1: vadd.i64 >r2=reg128#8,<r2=reg128#8,<t2=reg128#10 |
| 5382 ++# asm 2: vadd.i64 >r2=q7,<r2=q7,<t2=q9 |
| 5383 ++vadd.i64 q7,q7,q9 |
| 5384 ++ |
| 5385 ++# qhasm: r4 &= mask |
| 5386 ++# asm 1: vand >r4=reg128#5,<r4=reg128#5,<mask=reg128#7 |
| 5387 ++# asm 2: vand >r4=q4,<r4=q4,<mask=q6 |
| 5388 ++vand q4,q4,q6 |
| 5389 ++ |
| 5390 ++# qhasm: 2x r0 += t0 |
| 5391 ++# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#9 |
| 5392 ++# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q8 |
| 5393 ++vadd.i64 q3,q3,q8 |
| 5394 ++ |
| 5395 ++# qhasm: 2x t0 <<= 2 |
| 5396 ++# asm 1: vshl.i64 >t0=reg128#9,<t0=reg128#9,#2 |
| 5397 ++# asm 2: vshl.i64 >t0=q8,<t0=q8,#2 |
| 5398 ++vshl.i64 q8,q8,#2 |
| 5399 ++ |
| 5400 ++# qhasm: 2x t3 = r2 unsigned>> 26 |
| 5401 ++# asm 1: vshr.u64 >t3=reg128#14,<r2=reg128#8,#26 |
| 5402 ++# asm 2: vshr.u64 >t3=q13,<r2=q7,#26 |
| 5403 ++vshr.u64 q13,q7,#26 |
| 5404 ++ |
| 5405 ++# qhasm: 2x r0 += t0 |
| 5406 ++# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#9 |
| 5407 ++# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q8 |
| 5408 ++vadd.i64 q3,q3,q8 |
| 5409 ++ |
| 5410 ++# qhasm: x23 = r2 & mask |
| 5411 ++# asm 1: vand >x23=reg128#10,<r2=reg128#8,<mask=reg128#7 |
| 5412 ++# asm 2: vand >x23=q9,<r2=q7,<mask=q6 |
| 5413 ++vand q9,q7,q6 |
| 5414 ++ |
| 5415 ++# qhasm: 2x r3 += t3 |
| 5416 ++# asm 1: vadd.i64 >r3=reg128#6,<r3=reg128#6,<t3=reg128#14 |
| 5417 ++# asm 2: vadd.i64 >r3=q5,<r3=q5,<t3=q13 |
| 5418 ++vadd.i64 q5,q5,q13 |
| 5419 ++ |
| 5420 ++# qhasm: 2x t1 = r0 unsigned>> 26 |
| 5421 ++# asm 1: vshr.u64 >t1=reg128#8,<r0=reg128#4,#26 |
| 5422 ++# asm 2: vshr.u64 >t1=q7,<r0=q3,#26 |
| 5423 ++vshr.u64 q7,q3,#26 |
| 5424 ++ |
| 5425 ++# qhasm: x01 = r0 & mask |
| 5426 ++# asm 1: vand >x01=reg128#9,<r0=reg128#4,<mask=reg128#7 |
| 5427 ++# asm 2: vand >x01=q8,<r0=q3,<mask=q6 |
| 5428 ++vand q8,q3,q6 |
| 5429 ++ |
| 5430 ++# qhasm: 2x r1 += t1 |
| 5431 ++# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#11,<t1=reg128#8 |
| 5432 ++# asm 2: vadd.i64 >r1=q3,<r1=q10,<t1=q7 |
| 5433 ++vadd.i64 q3,q10,q7 |
| 5434 ++ |
| 5435 ++# qhasm: 2x t4 = r3 unsigned>> 26 |
| 5436 ++# asm 1: vshr.u64 >t4=reg128#8,<r3=reg128#6,#26 |
| 5437 ++# asm 2: vshr.u64 >t4=q7,<r3=q5,#26 |
| 5438 ++vshr.u64 q7,q5,#26 |
| 5439 ++ |
| 5440 ++# qhasm: r3 &= mask |
| 5441 ++# asm 1: vand >r3=reg128#6,<r3=reg128#6,<mask=reg128#7 |
| 5442 ++# asm 2: vand >r3=q5,<r3=q5,<mask=q6 |
| 5443 ++vand q5,q5,q6 |
| 5444 ++ |
| 5445 ++# qhasm: 2x x4 = r4 + t4 |
| 5446 ++# asm 1: vadd.i64 >x4=reg128#11,<r4=reg128#5,<t4=reg128#8 |
| 5447 ++# asm 2: vadd.i64 >x4=q10,<r4=q4,<t4=q7 |
| 5448 ++vadd.i64 q10,q4,q7 |
| 5449 ++ |
| 5450 ++# qhasm: len -= 32 |
| 5451 ++# asm 1: sub >len=int32#4,<len=int32#4,#32 |
| 5452 ++# asm 2: sub >len=r3,<len=r3,#32 |
| 5453 ++sub r3,r3,#32 |
| 5454 ++ |
| 5455 ++# qhasm: x01 = x01[0,2,1,3] |
| 5456 ++# asm 1: vtrn.32 <x01=reg128#9%bot,<x01=reg128#9%top |
| 5457 ++# asm 2: vtrn.32 <x01=d16,<x01=d17 |
| 5458 ++vtrn.32 d16,d17 |
| 5459 ++ |
| 5460 ++# qhasm: x23 = x23[0,2,1,3] |
| 5461 ++# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top |
| 5462 ++# asm 2: vtrn.32 <x23=d18,<x23=d19 |
| 5463 ++vtrn.32 d18,d19 |
| 5464 ++ |
| 5465 ++# qhasm: r1 = r1[0,2,1,3] |
| 5466 ++# asm 1: vtrn.32 <r1=reg128#4%bot,<r1=reg128#4%top |
| 5467 ++# asm 2: vtrn.32 <r1=d6,<r1=d7 |
| 5468 ++vtrn.32 d6,d7 |
| 5469 ++ |
| 5470 ++# qhasm: r3 = r3[0,2,1,3] |
| 5471 ++# asm 1: vtrn.32 <r3=reg128#6%bot,<r3=reg128#6%top |
| 5472 ++# asm 2: vtrn.32 <r3=d10,<r3=d11 |
| 5473 ++vtrn.32 d10,d11 |
| 5474 ++ |
| 5475 ++# qhasm: x4 = x4[0,2,1,3] |
| 5476 ++# asm 1: vtrn.32 <x4=reg128#11%bot,<x4=reg128#11%top |
| 5477 ++# asm 2: vtrn.32 <x4=d20,<x4=d21 |
| 5478 ++vtrn.32 d20,d21 |
| 5479 ++ |
| 5480 ++# qhasm: x01 = x01[0,1] r1[0,1] |
| 5481 ++# asm 1: vext.32 <x01=reg128#9%top,<r1=reg128#4%bot,<r1=reg128#4%bot,#0 |
| 5482 ++# asm 2: vext.32 <x01=d17,<r1=d6,<r1=d6,#0 |
| 5483 ++vext.32 d17,d6,d6,#0 |
| 5484 ++ |
| 5485 ++# qhasm: x23 = x23[0,1] r3[0,1] |
| 5486 ++# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#6%bot,<r3=reg128#6%bot,#0 |
| 5487 ++# asm 2: vext.32 <x23=d19,<r3=d10,<r3=d10,#0 |
| 5488 ++vext.32 d19,d10,d10,#0 |
| 5489 ++ |
| 5490 ++# qhasm: unsigned>? len - 32 |
| 5491 ++# asm 1: cmp <len=int32#4,#32 |
| 5492 ++# asm 2: cmp <len=r3,#32 |
| 5493 ++cmp r3,#32 |
| 5494 ++ |
| 5495 ++# qhasm: goto mainloop if unsigned> |
| 5496 ++bhi ._mainloop |
| 5497 ++ |
| 5498 ++# qhasm: end: |
| 5499 ++._end: |
| 5500 ++ |
| 5501 ++# qhasm: mem128[input_0] = x01;input_0+=16 |
| 5502 ++# asm 1: vst1.8 {<x01=reg128#9%bot-<x01=reg128#9%top},[<input_0=int32#1]! |
| 5503 ++# asm 2: vst1.8 {<x01=d16-<x01=d17},[<input_0=r0]! |
| 5504 ++vst1.8 {d16-d17},[r0]! |
| 5505 ++ |
| 5506 ++# qhasm: mem128[input_0] = x23;input_0+=16 |
| 5507 ++# asm 1: vst1.8 {<x23=reg128#10%bot-<x23=reg128#10%top},[<input_0=int32#1]! |
| 5508 ++# asm 2: vst1.8 {<x23=d18-<x23=d19},[<input_0=r0]! |
| 5509 ++vst1.8 {d18-d19},[r0]! |
| 5510 ++ |
| 5511 ++# qhasm: mem64[input_0] = x4[0] |
| 5512 ++# asm 1: vst1.8 <x4=reg128#11%bot,[<input_0=int32#1] |
| 5513 ++# asm 2: vst1.8 <x4=d20,[<input_0=r0] |
| 5514 ++vst1.8 d20,[r0] |
| 5515 ++ |
| 5516 ++# qhasm: len = len |
| 5517 ++# asm 1: mov >len=int32#1,<len=int32#4 |
| 5518 ++# asm 2: mov >len=r0,<len=r3 |
| 5519 ++mov r0,r3 |
| 5520 ++ |
| 5521 ++# qhasm: qpopreturn len |
| 5522 ++mov sp,r12 |
| 5523 ++vpop {q4,q5,q6,q7} |
| 5524 ++bx lr |
| 5525 ++ |
| 5526 ++# qhasm: int32 input_0 |
| 5527 ++ |
| 5528 ++# qhasm: int32 input_1 |
| 5529 ++ |
| 5530 ++# qhasm: int32 input_2 |
| 5531 ++ |
| 5532 ++# qhasm: int32 input_3 |
| 5533 ++ |
| 5534 ++# qhasm: stack32 input_4 |
| 5535 ++ |
| 5536 ++# qhasm: stack32 input_5 |
| 5537 ++ |
| 5538 ++# qhasm: stack32 input_6 |
| 5539 ++ |
| 5540 ++# qhasm: stack32 input_7 |
| 5541 ++ |
| 5542 ++# qhasm: int32 caller_r4 |
| 5543 ++ |
| 5544 ++# qhasm: int32 caller_r5 |
| 5545 ++ |
| 5546 ++# qhasm: int32 caller_r6 |
| 5547 ++ |
| 5548 ++# qhasm: int32 caller_r7 |
| 5549 ++ |
| 5550 ++# qhasm: int32 caller_r8 |
| 5551 ++ |
| 5552 ++# qhasm: int32 caller_r9 |
| 5553 ++ |
| 5554 ++# qhasm: int32 caller_r10 |
| 5555 ++ |
| 5556 ++# qhasm: int32 caller_r11 |
| 5557 ++ |
| 5558 ++# qhasm: int32 caller_r12 |
| 5559 ++ |
| 5560 ++# qhasm: int32 caller_r14 |
| 5561 ++ |
| 5562 ++# qhasm: reg128 caller_q4 |
| 5563 ++ |
| 5564 ++# qhasm: reg128 caller_q5 |
| 5565 ++ |
| 5566 ++# qhasm: reg128 caller_q6 |
| 5567 ++ |
| 5568 ++# qhasm: reg128 caller_q7 |
| 5569 ++ |
| 5570 ++# qhasm: reg128 r0 |
| 5571 ++ |
| 5572 ++# qhasm: reg128 r1 |
| 5573 ++ |
| 5574 ++# qhasm: reg128 r2 |
| 5575 ++ |
| 5576 ++# qhasm: reg128 r3 |
| 5577 ++ |
| 5578 ++# qhasm: reg128 r4 |
| 5579 ++ |
| 5580 ++# qhasm: reg128 x01 |
| 5581 ++ |
| 5582 ++# qhasm: reg128 x23 |
| 5583 ++ |
| 5584 ++# qhasm: reg128 x4 |
| 5585 ++ |
| 5586 ++# qhasm: reg128 y01 |
| 5587 ++ |
| 5588 ++# qhasm: reg128 y23 |
| 5589 ++ |
| 5590 ++# qhasm: reg128 y4 |
| 5591 ++ |
| 5592 ++# qhasm: reg128 _5y01 |
| 5593 ++ |
| 5594 ++# qhasm: reg128 _5y23 |
| 5595 ++ |
| 5596 ++# qhasm: reg128 _5y4 |
| 5597 ++ |
| 5598 ++# qhasm: reg128 c01 |
| 5599 ++ |
| 5600 ++# qhasm: reg128 c23 |
| 5601 ++ |
| 5602 ++# qhasm: reg128 c4 |
| 5603 ++ |
| 5604 ++# qhasm: reg128 t0 |
| 5605 ++ |
| 5606 ++# qhasm: reg128 t1 |
| 5607 ++ |
| 5608 ++# qhasm: reg128 t2 |
| 5609 ++ |
| 5610 ++# qhasm: reg128 t3 |
| 5611 ++ |
| 5612 ++# qhasm: reg128 t4 |
| 5613 ++ |
| 5614 ++# qhasm: reg128 mask |
| 5615 ++ |
| 5616 ++# qhasm: enter crypto_onetimeauth_poly1305_neon2_addmulmod |
| 5617 ++.align 2 |
| 5618 ++.global openssl_poly1305_neon2_addmulmod |
| 5619 ++.type openssl_poly1305_neon2_addmulmod STT_FUNC |
| 5620 ++openssl_poly1305_neon2_addmulmod: |
| 5621 ++sub sp,sp,#0 |
| 5622 ++ |
| 5623 ++# qhasm: 2x mask = 0xffffffff |
| 5624 ++# asm 1: vmov.i64 >mask=reg128#1,#0xffffffff |
| 5625 ++# asm 2: vmov.i64 >mask=q0,#0xffffffff |
| 5626 ++vmov.i64 q0,#0xffffffff |
| 5627 ++ |
| 5628 ++# qhasm: y01 aligned= mem128[input_2];input_2+=16 |
| 5629 ++# asm 1: vld1.8 {>y01=reg128#2%bot->y01=reg128#2%top},[<input_2=int32#3,: 128]
! |
| 5630 ++# asm 2: vld1.8 {>y01=d2->y01=d3},[<input_2=r2,: 128]! |
| 5631 ++vld1.8 {d2-d3},[r2,: 128]! |
| 5632 ++ |
| 5633 ++# qhasm: 4x _5y01 = y01 << 2 |
| 5634 ++# asm 1: vshl.i32 >_5y01=reg128#3,<y01=reg128#2,#2 |
| 5635 ++# asm 2: vshl.i32 >_5y01=q2,<y01=q1,#2 |
| 5636 ++vshl.i32 q2,q1,#2 |
| 5637 ++ |
| 5638 ++# qhasm: y23 aligned= mem128[input_2];input_2+=16 |
| 5639 ++# asm 1: vld1.8 {>y23=reg128#4%bot->y23=reg128#4%top},[<input_2=int32#3,: 128]
! |
| 5640 ++# asm 2: vld1.8 {>y23=d6->y23=d7},[<input_2=r2,: 128]! |
| 5641 ++vld1.8 {d6-d7},[r2,: 128]! |
| 5642 ++ |
| 5643 ++# qhasm: 4x _5y23 = y23 << 2 |
| 5644 ++# asm 1: vshl.i32 >_5y23=reg128#9,<y23=reg128#4,#2 |
| 5645 ++# asm 2: vshl.i32 >_5y23=q8,<y23=q3,#2 |
| 5646 ++vshl.i32 q8,q3,#2 |
| 5647 ++ |
| 5648 ++# qhasm: y4 aligned= mem64[input_2]y4[1] |
| 5649 ++# asm 1: vld1.8 {<y4=reg128#10%bot},[<input_2=int32#3,: 64] |
| 5650 ++# asm 2: vld1.8 {<y4=d18},[<input_2=r2,: 64] |
| 5651 ++vld1.8 {d18},[r2,: 64] |
| 5652 ++ |
| 5653 ++# qhasm: 4x _5y4 = y4 << 2 |
| 5654 ++# asm 1: vshl.i32 >_5y4=reg128#11,<y4=reg128#10,#2 |
| 5655 ++# asm 2: vshl.i32 >_5y4=q10,<y4=q9,#2 |
| 5656 ++vshl.i32 q10,q9,#2 |
| 5657 ++ |
| 5658 ++# qhasm: x01 aligned= mem128[input_1];input_1+=16 |
| 5659 ++# asm 1: vld1.8 {>x01=reg128#12%bot->x01=reg128#12%top},[<input_1=int32#2,: 12
8]! |
| 5660 ++# asm 2: vld1.8 {>x01=d22->x01=d23},[<input_1=r1,: 128]! |
| 5661 ++vld1.8 {d22-d23},[r1,: 128]! |
| 5662 ++ |
| 5663 ++# qhasm: 4x _5y01 += y01 |
| 5664 ++# asm 1: vadd.i32 >_5y01=reg128#3,<_5y01=reg128#3,<y01=reg128#2 |
| 5665 ++# asm 2: vadd.i32 >_5y01=q2,<_5y01=q2,<y01=q1 |
| 5666 ++vadd.i32 q2,q2,q1 |
| 5667 ++ |
| 5668 ++# qhasm: x23 aligned= mem128[input_1];input_1+=16 |
| 5669 ++# asm 1: vld1.8 {>x23=reg128#13%bot->x23=reg128#13%top},[<input_1=int32#2,: 12
8]! |
| 5670 ++# asm 2: vld1.8 {>x23=d24->x23=d25},[<input_1=r1,: 128]! |
| 5671 ++vld1.8 {d24-d25},[r1,: 128]! |
| 5672 ++ |
| 5673 ++# qhasm: 4x _5y23 += y23 |
| 5674 ++# asm 1: vadd.i32 >_5y23=reg128#9,<_5y23=reg128#9,<y23=reg128#4 |
| 5675 ++# asm 2: vadd.i32 >_5y23=q8,<_5y23=q8,<y23=q3 |
| 5676 ++vadd.i32 q8,q8,q3 |
| 5677 ++ |
| 5678 ++# qhasm: 4x _5y4 += y4 |
| 5679 ++# asm 1: vadd.i32 >_5y4=reg128#11,<_5y4=reg128#11,<y4=reg128#10 |
| 5680 ++# asm 2: vadd.i32 >_5y4=q10,<_5y4=q10,<y4=q9 |
| 5681 ++vadd.i32 q10,q10,q9 |
| 5682 ++ |
| 5683 ++# qhasm: c01 aligned= mem128[input_3];input_3+=16 |
| 5684 ++# asm 1: vld1.8 {>c01=reg128#14%bot->c01=reg128#14%top},[<input_3=int32#4,: 12
8]! |
| 5685 ++# asm 2: vld1.8 {>c01=d26->c01=d27},[<input_3=r3,: 128]! |
| 5686 ++vld1.8 {d26-d27},[r3,: 128]! |
| 5687 ++ |
| 5688 ++# qhasm: 4x x01 += c01 |
| 5689 ++# asm 1: vadd.i32 >x01=reg128#12,<x01=reg128#12,<c01=reg128#14 |
| 5690 ++# asm 2: vadd.i32 >x01=q11,<x01=q11,<c01=q13 |
| 5691 ++vadd.i32 q11,q11,q13 |
| 5692 ++ |
| 5693 ++# qhasm: c23 aligned= mem128[input_3];input_3+=16 |
| 5694 ++# asm 1: vld1.8 {>c23=reg128#14%bot->c23=reg128#14%top},[<input_3=int32#4,: 12
8]! |
| 5695 ++# asm 2: vld1.8 {>c23=d26->c23=d27},[<input_3=r3,: 128]! |
| 5696 ++vld1.8 {d26-d27},[r3,: 128]! |
| 5697 ++ |
| 5698 ++# qhasm: 4x x23 += c23 |
| 5699 ++# asm 1: vadd.i32 >x23=reg128#13,<x23=reg128#13,<c23=reg128#14 |
| 5700 ++# asm 2: vadd.i32 >x23=q12,<x23=q12,<c23=q13 |
| 5701 ++vadd.i32 q12,q12,q13 |
| 5702 ++ |
| 5703 ++# qhasm: x4 aligned= mem64[input_1]x4[1] |
| 5704 ++# asm 1: vld1.8 {<x4=reg128#14%bot},[<input_1=int32#2,: 64] |
| 5705 ++# asm 2: vld1.8 {<x4=d26},[<input_1=r1,: 64] |
| 5706 ++vld1.8 {d26},[r1,: 64] |
| 5707 ++ |
| 5708 ++# qhasm: 2x mask unsigned>>=6 |
| 5709 ++# asm 1: vshr.u64 >mask=reg128#1,<mask=reg128#1,#6 |
| 5710 ++# asm 2: vshr.u64 >mask=q0,<mask=q0,#6 |
| 5711 ++vshr.u64 q0,q0,#6 |
| 5712 ++ |
| 5713 ++# qhasm: c4 aligned= mem64[input_3]c4[1] |
| 5714 ++# asm 1: vld1.8 {<c4=reg128#15%bot},[<input_3=int32#4,: 64] |
| 5715 ++# asm 2: vld1.8 {<c4=d28},[<input_3=r3,: 64] |
| 5716 ++vld1.8 {d28},[r3,: 64] |
| 5717 ++ |
| 5718 ++# qhasm: 4x x4 += c4 |
| 5719 ++# asm 1: vadd.i32 >x4=reg128#14,<x4=reg128#14,<c4=reg128#15 |
| 5720 ++# asm 2: vadd.i32 >x4=q13,<x4=q13,<c4=q14 |
| 5721 ++vadd.i32 q13,q13,q14 |
| 5722 ++ |
| 5723 ++# qhasm: r0[0,1] = x01[0] unsigned* y01[0]; r0[2,3] = x01[1] unsigned* y01
[1] |
| 5724 ++# asm 1: vmull.u32 >r0=reg128#15,<x01=reg128#12%bot,<y01=reg128#2%bot |
| 5725 ++# asm 2: vmull.u32 >r0=q14,<x01=d22,<y01=d2 |
| 5726 ++vmull.u32 q14,d22,d2 |
| 5727 ++ |
| 5728 ++# qhasm: r0[0,1] += x01[2] unsigned* _5y4[0]; r0[2,3] += x01[3] unsigned* _5
y4[1] |
| 5729 ++# asm 1: vmlal.u32 <r0=reg128#15,<x01=reg128#12%top,<_5y4=reg128#11%bot |
| 5730 ++# asm 2: vmlal.u32 <r0=q14,<x01=d23,<_5y4=d20 |
| 5731 ++vmlal.u32 q14,d23,d20 |
| 5732 ++ |
| 5733 ++# qhasm: r0[0,1] += x23[0] unsigned* _5y23[2]; r0[2,3] += x23[1] unsigned* _5y
23[3] |
| 5734 ++# asm 1: vmlal.u32 <r0=reg128#15,<x23=reg128#13%bot,<_5y23=reg128#9%top |
| 5735 ++# asm 2: vmlal.u32 <r0=q14,<x23=d24,<_5y23=d17 |
| 5736 ++vmlal.u32 q14,d24,d17 |
| 5737 ++ |
| 5738 ++# qhasm: r0[0,1] += x23[2] unsigned* _5y23[0]; r0[2,3] += x23[3] unsigned* _5y
23[1] |
| 5739 ++# asm 1: vmlal.u32 <r0=reg128#15,<x23=reg128#13%top,<_5y23=reg128#9%bot |
| 5740 ++# asm 2: vmlal.u32 <r0=q14,<x23=d25,<_5y23=d16 |
| 5741 ++vmlal.u32 q14,d25,d16 |
| 5742 ++ |
| 5743 ++# qhasm: r0[0,1] += x4[0] unsigned* _5y01[2]; r0[2,3] += x4[1] unsigned* _5y
01[3] |
| 5744 ++# asm 1: vmlal.u32 <r0=reg128#15,<x4=reg128#14%bot,<_5y01=reg128#3%top |
| 5745 ++# asm 2: vmlal.u32 <r0=q14,<x4=d26,<_5y01=d5 |
| 5746 ++vmlal.u32 q14,d26,d5 |
| 5747 ++ |
| 5748 ++# qhasm: r1[0,1] = x01[0] unsigned* y01[2]; r1[2,3] = x01[1] unsigned* y01
[3] |
| 5749 ++# asm 1: vmull.u32 >r1=reg128#3,<x01=reg128#12%bot,<y01=reg128#2%top |
| 5750 ++# asm 2: vmull.u32 >r1=q2,<x01=d22,<y01=d3 |
| 5751 ++vmull.u32 q2,d22,d3 |
| 5752 ++ |
| 5753 ++# qhasm: r1[0,1] += x01[2] unsigned* y01[0]; r1[2,3] += x01[3] unsigned* y01
[1] |
| 5754 ++# asm 1: vmlal.u32 <r1=reg128#3,<x01=reg128#12%top,<y01=reg128#2%bot |
| 5755 ++# asm 2: vmlal.u32 <r1=q2,<x01=d23,<y01=d2 |
| 5756 ++vmlal.u32 q2,d23,d2 |
| 5757 ++ |
| 5758 ++# qhasm: r1[0,1] += x23[0] unsigned* _5y4[0]; r1[2,3] += x23[1] unsigned* _5
y4[1] |
| 5759 ++# asm 1: vmlal.u32 <r1=reg128#3,<x23=reg128#13%bot,<_5y4=reg128#11%bot |
| 5760 ++# asm 2: vmlal.u32 <r1=q2,<x23=d24,<_5y4=d20 |
| 5761 ++vmlal.u32 q2,d24,d20 |
| 5762 ++ |
| 5763 ++# qhasm: r1[0,1] += x23[2] unsigned* _5y23[2]; r1[2,3] += x23[3] unsigned* _5y
23[3] |
| 5764 ++# asm 1: vmlal.u32 <r1=reg128#3,<x23=reg128#13%top,<_5y23=reg128#9%top |
| 5765 ++# asm 2: vmlal.u32 <r1=q2,<x23=d25,<_5y23=d17 |
| 5766 ++vmlal.u32 q2,d25,d17 |
| 5767 ++ |
| 5768 ++# qhasm: r1[0,1] += x4[0] unsigned* _5y23[0]; r1[2,3] += x4[1] unsigned* _5y
23[1] |
| 5769 ++# asm 1: vmlal.u32 <r1=reg128#3,<x4=reg128#14%bot,<_5y23=reg128#9%bot |
| 5770 ++# asm 2: vmlal.u32 <r1=q2,<x4=d26,<_5y23=d16 |
| 5771 ++vmlal.u32 q2,d26,d16 |
| 5772 ++ |
| 5773 ++# qhasm: r2[0,1] = x01[0] unsigned* y23[0]; r2[2,3] = x01[1] unsigned* y23
[1] |
| 5774 ++# asm 1: vmull.u32 >r2=reg128#16,<x01=reg128#12%bot,<y23=reg128#4%bot |
| 5775 ++# asm 2: vmull.u32 >r2=q15,<x01=d22,<y23=d6 |
| 5776 ++vmull.u32 q15,d22,d6 |
| 5777 ++ |
| 5778 ++# qhasm: r2[0,1] += x01[2] unsigned* y01[2]; r2[2,3] += x01[3] unsigned* y01
[3] |
| 5779 ++# asm 1: vmlal.u32 <r2=reg128#16,<x01=reg128#12%top,<y01=reg128#2%top |
| 5780 ++# asm 2: vmlal.u32 <r2=q15,<x01=d23,<y01=d3 |
| 5781 ++vmlal.u32 q15,d23,d3 |
| 5782 ++ |
| 5783 ++# qhasm: r2[0,1] += x23[0] unsigned* y01[0]; r2[2,3] += x23[1] unsigned* y01
[1] |
| 5784 ++# asm 1: vmlal.u32 <r2=reg128#16,<x23=reg128#13%bot,<y01=reg128#2%bot |
| 5785 ++# asm 2: vmlal.u32 <r2=q15,<x23=d24,<y01=d2 |
| 5786 ++vmlal.u32 q15,d24,d2 |
| 5787 ++ |
| 5788 ++# qhasm: r2[0,1] += x23[2] unsigned* _5y4[0]; r2[2,3] += x23[3] unsigned* _5
y4[1] |
| 5789 ++# asm 1: vmlal.u32 <r2=reg128#16,<x23=reg128#13%top,<_5y4=reg128#11%bot |
| 5790 ++# asm 2: vmlal.u32 <r2=q15,<x23=d25,<_5y4=d20 |
| 5791 ++vmlal.u32 q15,d25,d20 |
| 5792 ++ |
| 5793 ++# qhasm: r2[0,1] += x4[0] unsigned* _5y23[2]; r2[2,3] += x4[1] unsigned* _5y
23[3] |
| 5794 ++# asm 1: vmlal.u32 <r2=reg128#16,<x4=reg128#14%bot,<_5y23=reg128#9%top |
| 5795 ++# asm 2: vmlal.u32 <r2=q15,<x4=d26,<_5y23=d17 |
| 5796 ++vmlal.u32 q15,d26,d17 |
| 5797 ++ |
| 5798 ++# qhasm: r3[0,1] = x01[0] unsigned* y23[2]; r3[2,3] = x01[1] unsigned* y23
[3] |
| 5799 ++# asm 1: vmull.u32 >r3=reg128#9,<x01=reg128#12%bot,<y23=reg128#4%top |
| 5800 ++# asm 2: vmull.u32 >r3=q8,<x01=d22,<y23=d7 |
| 5801 ++vmull.u32 q8,d22,d7 |
| 5802 ++ |
| 5803 ++# qhasm: r3[0,1] += x01[2] unsigned* y23[0]; r3[2,3] += x01[3] unsigned* y23
[1] |
| 5804 ++# asm 1: vmlal.u32 <r3=reg128#9,<x01=reg128#12%top,<y23=reg128#4%bot |
| 5805 ++# asm 2: vmlal.u32 <r3=q8,<x01=d23,<y23=d6 |
| 5806 ++vmlal.u32 q8,d23,d6 |
| 5807 ++ |
| 5808 ++# qhasm: r3[0,1] += x23[0] unsigned* y01[2]; r3[2,3] += x23[1] unsigned* y01
[3] |
| 5809 ++# asm 1: vmlal.u32 <r3=reg128#9,<x23=reg128#13%bot,<y01=reg128#2%top |
| 5810 ++# asm 2: vmlal.u32 <r3=q8,<x23=d24,<y01=d3 |
| 5811 ++vmlal.u32 q8,d24,d3 |
| 5812 ++ |
| 5813 ++# qhasm: r3[0,1] += x23[2] unsigned* y01[0]; r3[2,3] += x23[3] unsigned* y01
[1] |
| 5814 ++# asm 1: vmlal.u32 <r3=reg128#9,<x23=reg128#13%top,<y01=reg128#2%bot |
| 5815 ++# asm 2: vmlal.u32 <r3=q8,<x23=d25,<y01=d2 |
| 5816 ++vmlal.u32 q8,d25,d2 |
| 5817 ++ |
| 5818 ++# qhasm: r3[0,1] += x4[0] unsigned* _5y4[0]; r3[2,3] += x4[1] unsigned* _5
y4[1] |
| 5819 ++# asm 1: vmlal.u32 <r3=reg128#9,<x4=reg128#14%bot,<_5y4=reg128#11%bot |
| 5820 ++# asm 2: vmlal.u32 <r3=q8,<x4=d26,<_5y4=d20 |
| 5821 ++vmlal.u32 q8,d26,d20 |
| 5822 ++ |
| 5823 ++# qhasm: r4[0,1] = x01[0] unsigned* y4[0]; r4[2,3] = x01[1] unsigned* y4[
1] |
| 5824 ++# asm 1: vmull.u32 >r4=reg128#10,<x01=reg128#12%bot,<y4=reg128#10%bot |
| 5825 ++# asm 2: vmull.u32 >r4=q9,<x01=d22,<y4=d18 |
| 5826 ++vmull.u32 q9,d22,d18 |
| 5827 ++ |
| 5828 ++# qhasm: r4[0,1] += x01[2] unsigned* y23[2]; r4[2,3] += x01[3] unsigned* y23[
3] |
| 5829 ++# asm 1: vmlal.u32 <r4=reg128#10,<x01=reg128#12%top,<y23=reg128#4%top |
| 5830 ++# asm 2: vmlal.u32 <r4=q9,<x01=d23,<y23=d7 |
| 5831 ++vmlal.u32 q9,d23,d7 |
| 5832 ++ |
| 5833 ++# qhasm: r4[0,1] += x23[0] unsigned* y23[0]; r4[2,3] += x23[1] unsigned* y23[
1] |
| 5834 ++# asm 1: vmlal.u32 <r4=reg128#10,<x23=reg128#13%bot,<y23=reg128#4%bot |
| 5835 ++# asm 2: vmlal.u32 <r4=q9,<x23=d24,<y23=d6 |
| 5836 ++vmlal.u32 q9,d24,d6 |
| 5837 ++ |
| 5838 ++# qhasm: r4[0,1] += x23[2] unsigned* y01[2]; r4[2,3] += x23[3] unsigned* y01[
3] |
| 5839 ++# asm 1: vmlal.u32 <r4=reg128#10,<x23=reg128#13%top,<y01=reg128#2%top |
| 5840 ++# asm 2: vmlal.u32 <r4=q9,<x23=d25,<y01=d3 |
| 5841 ++vmlal.u32 q9,d25,d3 |
| 5842 ++ |
| 5843 ++# qhasm: r4[0,1] += x4[0] unsigned* y01[0]; r4[2,3] += x4[1] unsigned* y01[
1] |
| 5844 ++# asm 1: vmlal.u32 <r4=reg128#10,<x4=reg128#14%bot,<y01=reg128#2%bot |
| 5845 ++# asm 2: vmlal.u32 <r4=q9,<x4=d26,<y01=d2 |
| 5846 ++vmlal.u32 q9,d26,d2 |
| 5847 ++ |
| 5848 ++# qhasm: 2x t1 = r0 unsigned>> 26 |
| 5849 ++# asm 1: vshr.u64 >t1=reg128#2,<r0=reg128#15,#26 |
| 5850 ++# asm 2: vshr.u64 >t1=q1,<r0=q14,#26 |
| 5851 ++vshr.u64 q1,q14,#26 |
| 5852 ++ |
| 5853 ++# qhasm: r0 &= mask |
| 5854 ++# asm 1: vand >r0=reg128#4,<r0=reg128#15,<mask=reg128#1 |
| 5855 ++# asm 2: vand >r0=q3,<r0=q14,<mask=q0 |
| 5856 ++vand q3,q14,q0 |
| 5857 ++ |
| 5858 ++# qhasm: 2x r1 += t1 |
| 5859 ++# asm 1: vadd.i64 >r1=reg128#2,<r1=reg128#3,<t1=reg128#2 |
| 5860 ++# asm 2: vadd.i64 >r1=q1,<r1=q2,<t1=q1 |
| 5861 ++vadd.i64 q1,q2,q1 |
| 5862 ++ |
| 5863 ++# qhasm: 2x t4 = r3 unsigned>> 26 |
| 5864 ++# asm 1: vshr.u64 >t4=reg128#3,<r3=reg128#9,#26 |
| 5865 ++# asm 2: vshr.u64 >t4=q2,<r3=q8,#26 |
| 5866 ++vshr.u64 q2,q8,#26 |
| 5867 ++ |
| 5868 ++# qhasm: r3 &= mask |
| 5869 ++# asm 1: vand >r3=reg128#9,<r3=reg128#9,<mask=reg128#1 |
| 5870 ++# asm 2: vand >r3=q8,<r3=q8,<mask=q0 |
| 5871 ++vand q8,q8,q0 |
| 5872 ++ |
| 5873 ++# qhasm: 2x r4 += t4 |
| 5874 ++# asm 1: vadd.i64 >r4=reg128#3,<r4=reg128#10,<t4=reg128#3 |
| 5875 ++# asm 2: vadd.i64 >r4=q2,<r4=q9,<t4=q2 |
| 5876 ++vadd.i64 q2,q9,q2 |
| 5877 ++ |
| 5878 ++# qhasm: 2x t2 = r1 unsigned>> 26 |
| 5879 ++# asm 1: vshr.u64 >t2=reg128#10,<r1=reg128#2,#26 |
| 5880 ++# asm 2: vshr.u64 >t2=q9,<r1=q1,#26 |
| 5881 ++vshr.u64 q9,q1,#26 |
| 5882 ++ |
| 5883 ++# qhasm: r1 &= mask |
| 5884 ++# asm 1: vand >r1=reg128#2,<r1=reg128#2,<mask=reg128#1 |
| 5885 ++# asm 2: vand >r1=q1,<r1=q1,<mask=q0 |
| 5886 ++vand q1,q1,q0 |
| 5887 ++ |
| 5888 ++# qhasm: 2x t0 = r4 unsigned>> 26 |
| 5889 ++# asm 1: vshr.u64 >t0=reg128#11,<r4=reg128#3,#26 |
| 5890 ++# asm 2: vshr.u64 >t0=q10,<r4=q2,#26 |
| 5891 ++vshr.u64 q10,q2,#26 |
| 5892 ++ |
| 5893 ++# qhasm: 2x r2 += t2 |
| 5894 ++# asm 1: vadd.i64 >r2=reg128#10,<r2=reg128#16,<t2=reg128#10 |
| 5895 ++# asm 2: vadd.i64 >r2=q9,<r2=q15,<t2=q9 |
| 5896 ++vadd.i64 q9,q15,q9 |
| 5897 ++ |
| 5898 ++# qhasm: r4 &= mask |
| 5899 ++# asm 1: vand >r4=reg128#3,<r4=reg128#3,<mask=reg128#1 |
| 5900 ++# asm 2: vand >r4=q2,<r4=q2,<mask=q0 |
| 5901 ++vand q2,q2,q0 |
| 5902 ++ |
| 5903 ++# qhasm: 2x r0 += t0 |
| 5904 ++# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#11 |
| 5905 ++# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q10 |
| 5906 ++vadd.i64 q3,q3,q10 |
| 5907 ++ |
| 5908 ++# qhasm: 2x t0 <<= 2 |
| 5909 ++# asm 1: vshl.i64 >t0=reg128#11,<t0=reg128#11,#2 |
| 5910 ++# asm 2: vshl.i64 >t0=q10,<t0=q10,#2 |
| 5911 ++vshl.i64 q10,q10,#2 |
| 5912 ++ |
| 5913 ++# qhasm: 2x t3 = r2 unsigned>> 26 |
| 5914 ++# asm 1: vshr.u64 >t3=reg128#12,<r2=reg128#10,#26 |
| 5915 ++# asm 2: vshr.u64 >t3=q11,<r2=q9,#26 |
| 5916 ++vshr.u64 q11,q9,#26 |
| 5917 ++ |
| 5918 ++# qhasm: 2x r0 += t0 |
| 5919 ++# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#11 |
| 5920 ++# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q10 |
| 5921 ++vadd.i64 q3,q3,q10 |
| 5922 ++ |
| 5923 ++# qhasm: x23 = r2 & mask |
| 5924 ++# asm 1: vand >x23=reg128#10,<r2=reg128#10,<mask=reg128#1 |
| 5925 ++# asm 2: vand >x23=q9,<r2=q9,<mask=q0 |
| 5926 ++vand q9,q9,q0 |
| 5927 ++ |
| 5928 ++# qhasm: 2x r3 += t3 |
| 5929 ++# asm 1: vadd.i64 >r3=reg128#9,<r3=reg128#9,<t3=reg128#12 |
| 5930 ++# asm 2: vadd.i64 >r3=q8,<r3=q8,<t3=q11 |
| 5931 ++vadd.i64 q8,q8,q11 |
| 5932 ++ |
| 5933 ++# qhasm: 2x t1 = r0 unsigned>> 26 |
| 5934 ++# asm 1: vshr.u64 >t1=reg128#11,<r0=reg128#4,#26 |
| 5935 ++# asm 2: vshr.u64 >t1=q10,<r0=q3,#26 |
| 5936 ++vshr.u64 q10,q3,#26 |
| 5937 ++ |
| 5938 ++# qhasm: x23 = x23[0,2,1,3] |
| 5939 ++# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top |
| 5940 ++# asm 2: vtrn.32 <x23=d18,<x23=d19 |
| 5941 ++vtrn.32 d18,d19 |
| 5942 ++ |
| 5943 ++# qhasm: x01 = r0 & mask |
| 5944 ++# asm 1: vand >x01=reg128#4,<r0=reg128#4,<mask=reg128#1 |
| 5945 ++# asm 2: vand >x01=q3,<r0=q3,<mask=q0 |
| 5946 ++vand q3,q3,q0 |
| 5947 ++ |
| 5948 ++# qhasm: 2x r1 += t1 |
| 5949 ++# asm 1: vadd.i64 >r1=reg128#2,<r1=reg128#2,<t1=reg128#11 |
| 5950 ++# asm 2: vadd.i64 >r1=q1,<r1=q1,<t1=q10 |
| 5951 ++vadd.i64 q1,q1,q10 |
| 5952 ++ |
| 5953 ++# qhasm: 2x t4 = r3 unsigned>> 26 |
| 5954 ++# asm 1: vshr.u64 >t4=reg128#11,<r3=reg128#9,#26 |
| 5955 ++# asm 2: vshr.u64 >t4=q10,<r3=q8,#26 |
| 5956 ++vshr.u64 q10,q8,#26 |
| 5957 ++ |
| 5958 ++# qhasm: x01 = x01[0,2,1,3] |
| 5959 ++# asm 1: vtrn.32 <x01=reg128#4%bot,<x01=reg128#4%top |
| 5960 ++# asm 2: vtrn.32 <x01=d6,<x01=d7 |
| 5961 ++vtrn.32 d6,d7 |
| 5962 ++ |
| 5963 ++# qhasm: r3 &= mask |
| 5964 ++# asm 1: vand >r3=reg128#1,<r3=reg128#9,<mask=reg128#1 |
| 5965 ++# asm 2: vand >r3=q0,<r3=q8,<mask=q0 |
| 5966 ++vand q0,q8,q0 |
| 5967 ++ |
| 5968 ++# qhasm: r1 = r1[0,2,1,3] |
| 5969 ++# asm 1: vtrn.32 <r1=reg128#2%bot,<r1=reg128#2%top |
| 5970 ++# asm 2: vtrn.32 <r1=d2,<r1=d3 |
| 5971 ++vtrn.32 d2,d3 |
| 5972 ++ |
| 5973 ++# qhasm: 2x x4 = r4 + t4 |
| 5974 ++# asm 1: vadd.i64 >x4=reg128#3,<r4=reg128#3,<t4=reg128#11 |
| 5975 ++# asm 2: vadd.i64 >x4=q2,<r4=q2,<t4=q10 |
| 5976 ++vadd.i64 q2,q2,q10 |
| 5977 ++ |
| 5978 ++# qhasm: r3 = r3[0,2,1,3] |
| 5979 ++# asm 1: vtrn.32 <r3=reg128#1%bot,<r3=reg128#1%top |
| 5980 ++# asm 2: vtrn.32 <r3=d0,<r3=d1 |
| 5981 ++vtrn.32 d0,d1 |
| 5982 ++ |
| 5983 ++# qhasm: x01 = x01[0,1] r1[0,1] |
| 5984 ++# asm 1: vext.32 <x01=reg128#4%top,<r1=reg128#2%bot,<r1=reg128#2%bot,#0 |
| 5985 ++# asm 2: vext.32 <x01=d7,<r1=d2,<r1=d2,#0 |
| 5986 ++vext.32 d7,d2,d2,#0 |
| 5987 ++ |
| 5988 ++# qhasm: x23 = x23[0,1] r3[0,1] |
| 5989 ++# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#1%bot,<r3=reg128#1%bot,#0 |
| 5990 ++# asm 2: vext.32 <x23=d19,<r3=d0,<r3=d0,#0 |
| 5991 ++vext.32 d19,d0,d0,#0 |
| 5992 ++ |
| 5993 ++# qhasm: x4 = x4[0,2,1,3] |
| 5994 ++# asm 1: vtrn.32 <x4=reg128#3%bot,<x4=reg128#3%top |
| 5995 ++# asm 2: vtrn.32 <x4=d4,<x4=d5 |
| 5996 ++vtrn.32 d4,d5 |
| 5997 ++ |
| 5998 ++# qhasm: mem128[input_0] aligned= x01;input_0+=16 |
| 5999 ++# asm 1: vst1.8 {<x01=reg128#4%bot-<x01=reg128#4%top},[<input_0=int32#1,: 128]
! |
| 6000 ++# asm 2: vst1.8 {<x01=d6-<x01=d7},[<input_0=r0,: 128]! |
| 6001 ++vst1.8 {d6-d7},[r0,: 128]! |
| 6002 ++ |
| 6003 ++# qhasm: mem128[input_0] aligned= x23;input_0+=16 |
| 6004 ++# asm 1: vst1.8 {<x23=reg128#10%bot-<x23=reg128#10%top},[<input_0=int32#1,: 12
8]! |
| 6005 ++# asm 2: vst1.8 {<x23=d18-<x23=d19},[<input_0=r0,: 128]! |
| 6006 ++vst1.8 {d18-d19},[r0,: 128]! |
| 6007 ++ |
| 6008 ++# qhasm: mem64[input_0] aligned= x4[0] |
| 6009 ++# asm 1: vst1.8 <x4=reg128#3%bot,[<input_0=int32#1,: 64] |
| 6010 ++# asm 2: vst1.8 <x4=d4,[<input_0=r0,: 64] |
| 6011 ++vst1.8 d4,[r0,: 64] |
| 6012 ++ |
| 6013 ++# qhasm: return |
| 6014 ++add sp,sp,#0 |
| 6015 ++bx lr |
| 6016 +diff --git a/crypto/poly1305/poly1305_vec.c b/crypto/poly1305/poly1305_vec.c |
| 6017 +new file mode 100644 |
| 6018 +index 0000000..c546200 |
| 6019 +--- /dev/null |
| 6020 ++++ b/crypto/poly1305/poly1305_vec.c |
| 6021 +@@ -0,0 +1,733 @@ |
| 6022 ++/* ==================================================================== |
| 6023 ++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. |
| 6024 ++ * |
| 6025 ++ * Redistribution and use in source and binary forms, with or without |
| 6026 ++ * modification, are permitted provided that the following conditions |
| 6027 ++ * are met: |
| 6028 ++ * |
| 6029 ++ * 1. Redistributions of source code must retain the above copyright |
| 6030 ++ * notice, this list of conditions and the following disclaimer. |
| 6031 ++ * |
| 6032 ++ * 2. Redistributions in binary form must reproduce the above copyright |
| 6033 ++ * notice, this list of conditions and the following disclaimer in |
| 6034 ++ * the documentation and/or other materials provided with the |
| 6035 ++ * distribution. |
| 6036 ++ * |
| 6037 ++ * 3. All advertising materials mentioning features or use of this |
| 6038 ++ * software must display the following acknowledgment: |
| 6039 ++ * "This product includes software developed by the OpenSSL Project |
| 6040 ++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" |
| 6041 ++ * |
| 6042 ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
| 6043 ++ * endorse or promote products derived from this software without |
| 6044 ++ * prior written permission. For written permission, please contact |
| 6045 ++ * licensing@OpenSSL.org. |
| 6046 ++ * |
| 6047 ++ * 5. Products derived from this software may not be called "OpenSSL" |
| 6048 ++ * nor may "OpenSSL" appear in their names without prior written |
| 6049 ++ * permission of the OpenSSL Project. |
| 6050 ++ * |
| 6051 ++ * 6. Redistributions of any form whatsoever must retain the following |
| 6052 ++ * acknowledgment: |
| 6053 ++ * "This product includes software developed by the OpenSSL Project |
| 6054 ++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" |
| 6055 ++ * |
| 6056 ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
| 6057 ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 6058 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 6059 ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
| 6060 ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 6061 ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 6062 ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 6063 ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 6064 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 6065 ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 6066 ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 6067 ++ * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 6068 ++ * ==================================================================== |
| 6069 ++ */ |
| 6070 ++ |
| 6071 ++/* This implementation of poly1305 is by Andrew Moon |
| 6072 ++ * (https://github.com/floodyberry/poly1305-donna) and released as public |
| 6073 ++ * domain. It implements SIMD vectorization based on the algorithm described i
n |
| 6074 ++ * http://cr.yp.to/papers.html#neoncrypto. Unrolled to 2 powers, i.e. 64 byte |
| 6075 ++ * block size |
| 6076 ++*/ |
| 6077 ++ |
| 6078 ++#include <emmintrin.h> |
| 6079 ++#include <stdint.h> |
| 6080 ++#include <openssl/opensslconf.h> |
| 6081 ++ |
| 6082 ++#if !defined(OPENSSL_NO_POLY1305) |
| 6083 ++ |
| 6084 ++#include <openssl/poly1305.h> |
| 6085 ++ |
| 6086 ++#define ALIGN(x) __attribute__((aligned(x))) |
| 6087 ++#define INLINE inline |
| 6088 ++#define U8TO64_LE(m) (*(uint64_t*)(m)) |
| 6089 ++#define U8TO32_LE(m) (*(uint32_t*)(m)) |
| 6090 ++#define U64TO8_LE(m,v) (*(uint64_t*)(m)) = v |
| 6091 ++ |
| 6092 ++typedef __m128i xmmi; |
| 6093 ++typedef unsigned __int128 uint128_t; |
| 6094 ++ |
| 6095 ++static const uint32_t ALIGN(16) poly1305_x64_sse2_message_mask[4] = |
| 6096 ++ {(1 << 26) - 1, 0, (1 << 26) - 1, 0}; |
| 6097 ++static const uint32_t ALIGN(16) poly1305_x64_sse2_5[4] = {5, 0, 5, 0}; |
| 6098 ++static const uint32_t ALIGN(16) poly1305_x64_sse2_1shl128[4] = |
| 6099 ++ {(1 << 24), 0, (1 << 24), 0}; |
| 6100 ++ |
| 6101 ++static uint128_t INLINE |
| 6102 ++add128(uint128_t a, uint128_t b) |
| 6103 ++ { |
| 6104 ++ return a + b; |
| 6105 ++ } |
| 6106 ++ |
| 6107 ++static uint128_t INLINE |
| 6108 ++add128_64(uint128_t a, uint64_t b) |
| 6109 ++ { |
| 6110 ++ return a + b; |
| 6111 ++ } |
| 6112 ++ |
| 6113 ++static uint128_t INLINE |
| 6114 ++mul64x64_128(uint64_t a, uint64_t b) |
| 6115 ++ { |
| 6116 ++ return (uint128_t)a * b; |
| 6117 ++ } |
| 6118 ++ |
| 6119 ++static uint64_t INLINE |
| 6120 ++lo128(uint128_t a) |
| 6121 ++ { |
| 6122 ++ return (uint64_t)a; |
| 6123 ++ } |
| 6124 ++ |
| 6125 ++static uint64_t INLINE |
| 6126 ++shr128(uint128_t v, const int shift) |
| 6127 ++ { |
| 6128 ++ return (uint64_t)(v >> shift); |
| 6129 ++ } |
| 6130 ++ |
| 6131 ++static uint64_t INLINE |
| 6132 ++shr128_pair(uint64_t hi, uint64_t lo, const int shift) |
| 6133 ++ { |
| 6134 ++ return (uint64_t)((((uint128_t)hi << 64) | lo) >> shift); |
| 6135 ++ } |
| 6136 ++ |
| 6137 ++typedef struct poly1305_power_t |
| 6138 ++ { |
| 6139 ++ union |
| 6140 ++ { |
| 6141 ++ xmmi v; |
| 6142 ++ uint64_t u[2]; |
| 6143 ++ uint32_t d[4]; |
| 6144 ++ } R20,R21,R22,R23,R24,S21,S22,S23,S24; |
| 6145 ++ } poly1305_power; |
| 6146 ++ |
| 6147 ++typedef struct poly1305_state_internal_t |
| 6148 ++ { |
| 6149 ++ poly1305_power P[2]; /* 288 bytes, top 32 bit halves unused = 144 |
| 6150 ++ bytes of free storage */ |
| 6151 ++ union |
| 6152 ++ { |
| 6153 ++ xmmi H[5]; /* 80 bytes */ |
| 6154 ++ uint64_t HH[10]; |
| 6155 ++ }; |
| 6156 ++ /* uint64_t r0,r1,r2; [24 bytes] */ |
| 6157 ++ /* uint64_t pad0,pad1; [16 bytes] */ |
| 6158 ++ uint64_t started; /* 8 bytes */ |
| 6159 ++ uint64_t leftover; /* 8 bytes */ |
| 6160 ++ uint8_t buffer[64]; /* 64 bytes */ |
| 6161 ++ } poly1305_state_internal; /* 448 bytes total + 63 bytes for |
| 6162 ++ alignment = 511 bytes raw */ |
| 6163 ++ |
| 6164 ++static poly1305_state_internal INLINE |
| 6165 ++*poly1305_aligned_state(poly1305_state *state) |
| 6166 ++ { |
| 6167 ++ return (poly1305_state_internal *)(((uint64_t)state + 63) & ~63); |
| 6168 ++ } |
| 6169 ++ |
| 6170 ++/* copy 0-63 bytes */ |
| 6171 ++static void INLINE |
| 6172 ++poly1305_block_copy(uint8_t *dst, const uint8_t *src, size_t bytes) |
| 6173 ++ { |
| 6174 ++ size_t offset = src - dst; |
| 6175 ++ if (bytes & 32) |
| 6176 ++ { |
| 6177 ++ _mm_storeu_si128((xmmi *)(dst + 0), _mm_loadu_si128((xmmi *)(dst
+ offset + 0))); |
| 6178 ++ _mm_storeu_si128((xmmi *)(dst + 16), _mm_loadu_si128((xmmi *)(ds
t + offset + 16))); |
| 6179 ++ dst += 32; |
| 6180 ++ } |
| 6181 ++ if (bytes & 16) |
| 6182 ++ { |
| 6183 ++ _mm_storeu_si128((xmmi *)dst, |
| 6184 ++ _mm_loadu_si128((xmmi *)(dst + offset))); |
| 6185 ++ dst += 16; |
| 6186 ++ } |
| 6187 ++ if (bytes & 8) |
| 6188 ++ { |
| 6189 ++ *(uint64_t *)dst = *(uint64_t *)(dst + offset); |
| 6190 ++ dst += 8; |
| 6191 ++ } |
| 6192 ++ if (bytes & 4) |
| 6193 ++ { |
| 6194 ++ *(uint32_t *)dst = *(uint32_t *)(dst + offset); |
| 6195 ++ dst += 4; |
| 6196 ++ } |
| 6197 ++ if (bytes & 2) |
| 6198 ++ { |
| 6199 ++ *(uint16_t *)dst = *(uint16_t *)(dst + offset); |
| 6200 ++ dst += 2; |
| 6201 ++ } |
| 6202 ++ if (bytes & 1) |
| 6203 ++ { |
| 6204 ++ *( uint8_t *)dst = *( uint8_t *)(dst + offset); |
| 6205 ++ } |
| 6206 ++ } |
| 6207 ++ |
| 6208 ++/* zero 0-15 bytes */ |
| 6209 ++static void INLINE |
| 6210 ++poly1305_block_zero(uint8_t *dst, size_t bytes) |
| 6211 ++ { |
| 6212 ++ if (bytes & 8) { *(uint64_t *)dst = 0; dst += 8; } |
| 6213 ++ if (bytes & 4) { *(uint32_t *)dst = 0; dst += 4; } |
| 6214 ++ if (bytes & 2) { *(uint16_t *)dst = 0; dst += 2; } |
| 6215 ++ if (bytes & 1) { *( uint8_t *)dst = 0; } |
| 6216 ++ } |
| 6217 ++ |
| 6218 ++static size_t INLINE |
| 6219 ++poly1305_min(size_t a, size_t b) |
| 6220 ++ { |
| 6221 ++ return (a < b) ? a : b; |
| 6222 ++ } |
| 6223 ++ |
| 6224 ++void |
| 6225 ++CRYPTO_poly1305_init(poly1305_state *state, const unsigned char key[32]) |
| 6226 ++ { |
| 6227 ++ poly1305_state_internal *st = poly1305_aligned_state(state); |
| 6228 ++ poly1305_power *p; |
| 6229 ++ uint64_t r0,r1,r2; |
| 6230 ++ uint64_t t0,t1; |
| 6231 ++ |
| 6232 ++ /* clamp key */ |
| 6233 ++ t0 = U8TO64_LE(key + 0); |
| 6234 ++ t1 = U8TO64_LE(key + 8); |
| 6235 ++ r0 = t0 & 0xffc0fffffff; t0 >>= 44; t0 |= t1 << 20; |
| 6236 ++ r1 = t0 & 0xfffffc0ffff; t1 >>= 24; |
| 6237 ++ r2 = t1 & 0x00ffffffc0f; |
| 6238 ++ |
| 6239 ++ /* store r in un-used space of st->P[1] */ |
| 6240 ++ p = &st->P[1]; |
| 6241 ++ p->R20.d[1] = (uint32_t)(r0 ); |
| 6242 ++ p->R20.d[3] = (uint32_t)(r0 >> 32); |
| 6243 ++ p->R21.d[1] = (uint32_t)(r1 ); |
| 6244 ++ p->R21.d[3] = (uint32_t)(r1 >> 32); |
| 6245 ++ p->R22.d[1] = (uint32_t)(r2 ); |
| 6246 ++ p->R22.d[3] = (uint32_t)(r2 >> 32); |
| 6247 ++ |
| 6248 ++ /* store pad */ |
| 6249 ++ p->R23.d[1] = U8TO32_LE(key + 16); |
| 6250 ++ p->R23.d[3] = U8TO32_LE(key + 20); |
| 6251 ++ p->R24.d[1] = U8TO32_LE(key + 24); |
| 6252 ++ p->R24.d[3] = U8TO32_LE(key + 28); |
| 6253 ++ |
| 6254 ++ /* H = 0 */ |
| 6255 ++ st->H[0] = _mm_setzero_si128(); |
| 6256 ++ st->H[1] = _mm_setzero_si128(); |
| 6257 ++ st->H[2] = _mm_setzero_si128(); |
| 6258 ++ st->H[3] = _mm_setzero_si128(); |
| 6259 ++ st->H[4] = _mm_setzero_si128(); |
| 6260 ++ |
| 6261 ++ st->started = 0; |
| 6262 ++ st->leftover = 0; |
| 6263 ++ } |
| 6264 ++ |
| 6265 ++static void |
| 6266 ++poly1305_first_block(poly1305_state_internal *st, const uint8_t *m) |
| 6267 ++ { |
| 6268 ++ const xmmi MMASK = |
| 6269 ++ _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask); |
| 6270 ++ const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5); |
| 6271 ++ const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128); |
| 6272 ++ xmmi T5,T6; |
| 6273 ++ poly1305_power *p; |
| 6274 ++ uint128_t d[3]; |
| 6275 ++ uint64_t r0,r1,r2; |
| 6276 ++ uint64_t r20,r21,r22,s22; |
| 6277 ++ uint64_t pad0,pad1; |
| 6278 ++ uint64_t c; |
| 6279 ++ uint64_t i; |
| 6280 ++ |
| 6281 ++ /* pull out stored info */ |
| 6282 ++ p = &st->P[1]; |
| 6283 ++ |
| 6284 ++ r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; |
| 6285 ++ r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; |
| 6286 ++ r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; |
| 6287 ++ pad0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1]; |
| 6288 ++ pad1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1]; |
| 6289 ++ |
| 6290 ++ /* compute powers r^2,r^4 */ |
| 6291 ++ r20 = r0; |
| 6292 ++ r21 = r1; |
| 6293 ++ r22 = r2; |
| 6294 ++ for (i = 0; i < 2; i++) |
| 6295 ++ { |
| 6296 ++ s22 = r22 * (5 << 2); |
| 6297 ++ |
| 6298 ++ d[0] = add128(mul64x64_128(r20, r20), mul64x64_128(r21 * 2, s22)
); |
| 6299 ++ d[1] = add128(mul64x64_128(r22, s22), mul64x64_128(r20 * 2, r21)
); |
| 6300 ++ d[2] = add128(mul64x64_128(r21, r21), mul64x64_128(r22 * 2, r20)
); |
| 6301 ++ |
| 6302 ++ r20 = lo128(d[0]) & 0xfffffffffff; c
= shr128(d[0], 44); |
| 6303 ++ d[1] = add128_64(d[1], c); r21 = lo128(d[1]) & 0xfffffffffff; c
= shr128(d[1], 44); |
| 6304 ++ d[2] = add128_64(d[2], c); r22 = lo128(d[2]) & 0x3ffffffffff; c
= shr128(d[2], 42); |
| 6305 ++ r20 += c * 5; c = (r20 >> 44); r20 = r20 & 0xfffffffffff; |
| 6306 ++ r21 += c; |
| 6307 ++ |
| 6308 ++ p->R20.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)( r20
) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); |
| 6309 ++ p->R21.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r20 >
> 26) | (r21 << 18)) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); |
| 6310 ++ p->R22.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >
> 8) ) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); |
| 6311 ++ p->R23.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >
> 34) | (r22 << 10)) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); |
| 6312 ++ p->R24.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r22 >
> 16) ) ), _MM_SHUFFLE(1,0,1,0)); |
| 6313 ++ p->S21.v = _mm_mul_epu32(p->R21.v, FIVE); |
| 6314 ++ p->S22.v = _mm_mul_epu32(p->R22.v, FIVE); |
| 6315 ++ p->S23.v = _mm_mul_epu32(p->R23.v, FIVE); |
| 6316 ++ p->S24.v = _mm_mul_epu32(p->R24.v, FIVE); |
| 6317 ++ p--; |
| 6318 ++ } |
| 6319 ++ |
| 6320 ++ /* put saved info back */ |
| 6321 ++ p = &st->P[1]; |
| 6322 ++ p->R20.d[1] = (uint32_t)(r0 ); |
| 6323 ++ p->R20.d[3] = (uint32_t)(r0 >> 32); |
| 6324 ++ p->R21.d[1] = (uint32_t)(r1 ); |
| 6325 ++ p->R21.d[3] = (uint32_t)(r1 >> 32); |
| 6326 ++ p->R22.d[1] = (uint32_t)(r2 ); |
| 6327 ++ p->R22.d[3] = (uint32_t)(r2 >> 32); |
| 6328 ++ p->R23.d[1] = (uint32_t)(pad0 ); |
| 6329 ++ p->R23.d[3] = (uint32_t)(pad0 >> 32); |
| 6330 ++ p->R24.d[1] = (uint32_t)(pad1 ); |
| 6331 ++ p->R24.d[3] = (uint32_t)(pad1 >> 32); |
| 6332 ++ |
| 6333 ++ /* H = [Mx,My] */ |
| 6334 ++ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi6
4((xmmi *)(m + 16))); |
| 6335 ++ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi6
4((xmmi *)(m + 24))); |
| 6336 ++ st->H[0] = _mm_and_si128(MMASK, T5); |
| 6337 ++ st->H[1] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); |
| 6338 ++ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); |
| 6339 ++ st->H[2] = _mm_and_si128(MMASK, T5); |
| 6340 ++ st->H[3] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); |
| 6341 ++ st->H[4] = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); |
| 6342 ++ } |
| 6343 ++ |
| 6344 ++static void |
| 6345 ++poly1305_blocks(poly1305_state_internal *st, const uint8_t *m, size_t bytes) |
| 6346 ++ { |
| 6347 ++ const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask
); |
| 6348 ++ const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5); |
| 6349 ++ const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128); |
| 6350 ++ |
| 6351 ++ poly1305_power *p; |
| 6352 ++ xmmi H0,H1,H2,H3,H4; |
| 6353 ++ xmmi T0,T1,T2,T3,T4,T5,T6; |
| 6354 ++ xmmi M0,M1,M2,M3,M4; |
| 6355 ++ xmmi C1,C2; |
| 6356 ++ |
| 6357 ++ H0 = st->H[0]; |
| 6358 ++ H1 = st->H[1]; |
| 6359 ++ H2 = st->H[2]; |
| 6360 ++ H3 = st->H[3]; |
| 6361 ++ H4 = st->H[4]; |
| 6362 ++ |
| 6363 ++ while (bytes >= 64) |
| 6364 ++ { |
| 6365 ++ /* H *= [r^4,r^4] */ |
| 6366 ++ p = &st->P[0]; |
| 6367 ++ T0 = _mm_mul_epu32(H0, p->R20.v); |
| 6368 ++ T1 = _mm_mul_epu32(H0, p->R21.v); |
| 6369 ++ T2 = _mm_mul_epu32(H0, p->R22.v); |
| 6370 ++ T3 = _mm_mul_epu32(H0, p->R23.v); |
| 6371 ++ T4 = _mm_mul_epu32(H0, p->R24.v); |
| 6372 ++ T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 6373 ++ T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 6374 ++ T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 6375 ++ T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 6376 ++ T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 6377 ++ T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 6378 ++ T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 6379 ++ T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 6380 ++ T5 = _mm_mul_epu32(H1, p->R23.v);
T4 = _mm_add_epi64(T4, T5); |
| 6381 ++ T5 = _mm_mul_epu32(H2, p->R22.v);
T4 = _mm_add_epi64(T4, T5); |
| 6382 ++ T5 = _mm_mul_epu32(H3, p->R21.v);
T4 = _mm_add_epi64(T4, T5); |
| 6383 ++ T5 = _mm_mul_epu32(H4, p->R20.v);
T4 = _mm_add_epi64(T4, T5); |
| 6384 ++ |
| 6385 ++ /* H += [Mx,My]*[r^2,r^2] */ |
| 6386 ++ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_lo
adl_epi64((xmmi *)(m + 16))); |
| 6387 ++ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_lo
adl_epi64((xmmi *)(m + 24))); |
| 6388 ++ M0 = _mm_and_si128(MMASK, T5); |
| 6389 ++ M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); |
| 6390 ++ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)
); |
| 6391 ++ M2 = _mm_and_si128(MMASK, T5); |
| 6392 ++ M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); |
| 6393 ++ M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); |
| 6394 ++ |
| 6395 ++ p = &st->P[1]; |
| 6396 ++ T5 = _mm_mul_epu32(M0, p->R20.v); T6 = _mm_mul_epu32(M0, p->R21.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 6397 ++ T5 = _mm_mul_epu32(M1, p->S24.v); T6 = _mm_mul_epu32(M1, p->R20.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 6398 ++ T5 = _mm_mul_epu32(M2, p->S23.v); T6 = _mm_mul_epu32(M2, p->S24.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 6399 ++ T5 = _mm_mul_epu32(M3, p->S22.v); T6 = _mm_mul_epu32(M3, p->S23.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 6400 ++ T5 = _mm_mul_epu32(M4, p->S21.v); T6 = _mm_mul_epu32(M4, p->S22.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 6401 ++ T5 = _mm_mul_epu32(M0, p->R22.v); T6 = _mm_mul_epu32(M0, p->R23.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 6402 ++ T5 = _mm_mul_epu32(M1, p->R21.v); T6 = _mm_mul_epu32(M1, p->R22.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 6403 ++ T5 = _mm_mul_epu32(M2, p->R20.v); T6 = _mm_mul_epu32(M2, p->R21.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 6404 ++ T5 = _mm_mul_epu32(M3, p->S24.v); T6 = _mm_mul_epu32(M3, p->R20.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 6405 ++ T5 = _mm_mul_epu32(M4, p->S23.v); T6 = _mm_mul_epu32(M4, p->S24.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 6406 ++ T5 = _mm_mul_epu32(M0, p->R24.v);
T4 = _mm_add_epi64(T4, T5); |
| 6407 ++ T5 = _mm_mul_epu32(M1, p->R23.v);
T4 = _mm_add_epi64(T4, T5); |
| 6408 ++ T5 = _mm_mul_epu32(M2, p->R22.v);
T4 = _mm_add_epi64(T4, T5); |
| 6409 ++ T5 = _mm_mul_epu32(M3, p->R21.v);
T4 = _mm_add_epi64(T4, T5); |
| 6410 ++ T5 = _mm_mul_epu32(M4, p->R20.v);
T4 = _mm_add_epi64(T4, T5); |
| 6411 ++ |
| 6412 ++ /* H += [Mx,My] */ |
| 6413 ++ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 32)), _mm_l
oadl_epi64((xmmi *)(m + 48))); |
| 6414 ++ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 40)), _mm_l
oadl_epi64((xmmi *)(m + 56))); |
| 6415 ++ M0 = _mm_and_si128(MMASK, T5); |
| 6416 ++ M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); |
| 6417 ++ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)
); |
| 6418 ++ M2 = _mm_and_si128(MMASK, T5); |
| 6419 ++ M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); |
| 6420 ++ M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); |
| 6421 ++ |
| 6422 ++ T0 = _mm_add_epi64(T0, M0); |
| 6423 ++ T1 = _mm_add_epi64(T1, M1); |
| 6424 ++ T2 = _mm_add_epi64(T2, M2); |
| 6425 ++ T3 = _mm_add_epi64(T3, M3); |
| 6426 ++ T4 = _mm_add_epi64(T4, M4); |
| 6427 ++ |
| 6428 ++ /* reduce */ |
| 6429 ++ C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _
mm_and_si128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C
1); T4 = _mm_add_epi64(T4, C2); |
| 6430 ++ C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _
mm_and_si128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C
1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); |
| 6431 ++ C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _
mm_and_si128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C
1); T1 = _mm_add_epi64(T1, C2); |
| 6432 ++ C1 = _mm_srli_epi64(T3, 26); T3 = _
mm_and_si128(T3, MMASK); T4 = _mm_add_epi64(T4, C
1); |
| 6433 ++ |
| 6434 ++ /* H = (H*[r^4,r^4] + [Mx,My]*[r^2,r^2] + [Mx,My]) */ |
| 6435 ++ H0 = T0; |
| 6436 ++ H1 = T1; |
| 6437 ++ H2 = T2; |
| 6438 ++ H3 = T3; |
| 6439 ++ H4 = T4; |
| 6440 ++ |
| 6441 ++ m += 64; |
| 6442 ++ bytes -= 64; |
| 6443 ++ } |
| 6444 ++ |
| 6445 ++ st->H[0] = H0; |
| 6446 ++ st->H[1] = H1; |
| 6447 ++ st->H[2] = H2; |
| 6448 ++ st->H[3] = H3; |
| 6449 ++ st->H[4] = H4; |
| 6450 ++ } |
| 6451 ++ |
| 6452 ++static size_t |
| 6453 ++poly1305_combine(poly1305_state_internal *st, const uint8_t *m, size_t bytes) |
| 6454 ++ { |
| 6455 ++ const xmmi MMASK = |
| 6456 ++ _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask); |
| 6457 ++ const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128); |
| 6458 ++ const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5); |
| 6459 ++ |
| 6460 ++ poly1305_power *p; |
| 6461 ++ xmmi H0,H1,H2,H3,H4; |
| 6462 ++ xmmi M0,M1,M2,M3,M4; |
| 6463 ++ xmmi T0,T1,T2,T3,T4,T5,T6; |
| 6464 ++ xmmi C1,C2; |
| 6465 ++ |
| 6466 ++ uint64_t r0,r1,r2; |
| 6467 ++ uint64_t t0,t1,t2,t3,t4; |
| 6468 ++ uint64_t c; |
| 6469 ++ size_t consumed = 0; |
| 6470 ++ |
| 6471 ++ H0 = st->H[0]; |
| 6472 ++ H1 = st->H[1]; |
| 6473 ++ H2 = st->H[2]; |
| 6474 ++ H3 = st->H[3]; |
| 6475 ++ H4 = st->H[4]; |
| 6476 ++ |
| 6477 ++ /* p = [r^2,r^2] */ |
| 6478 ++ p = &st->P[1]; |
| 6479 ++ |
| 6480 ++ if (bytes >= 32) |
| 6481 ++ { |
| 6482 ++ /* H *= [r^2,r^2] */ |
| 6483 ++ T0 = _mm_mul_epu32(H0, p->R20.v); |
| 6484 ++ T1 = _mm_mul_epu32(H0, p->R21.v); |
| 6485 ++ T2 = _mm_mul_epu32(H0, p->R22.v); |
| 6486 ++ T3 = _mm_mul_epu32(H0, p->R23.v); |
| 6487 ++ T4 = _mm_mul_epu32(H0, p->R24.v); |
| 6488 ++ T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 6489 ++ T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 6490 ++ T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 6491 ++ T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 6492 ++ T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 6493 ++ T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 6494 ++ T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 6495 ++ T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 6496 ++ T5 = _mm_mul_epu32(H1, p->R23.v);
T4 = _mm_add_epi64(T4, T5); |
| 6497 ++ T5 = _mm_mul_epu32(H2, p->R22.v);
T4 = _mm_add_epi64(T4, T5); |
| 6498 ++ T5 = _mm_mul_epu32(H3, p->R21.v);
T4 = _mm_add_epi64(T4, T5); |
| 6499 ++ T5 = _mm_mul_epu32(H4, p->R20.v);
T4 = _mm_add_epi64(T4, T5); |
| 6500 ++ |
| 6501 ++ /* H += [Mx,My] */ |
| 6502 ++ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_lo
adl_epi64((xmmi *)(m + 16))); |
| 6503 ++ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_lo
adl_epi64((xmmi *)(m + 24))); |
| 6504 ++ M0 = _mm_and_si128(MMASK, T5); |
| 6505 ++ M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); |
| 6506 ++ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)
); |
| 6507 ++ M2 = _mm_and_si128(MMASK, T5); |
| 6508 ++ M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); |
| 6509 ++ M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); |
| 6510 ++ |
| 6511 ++ T0 = _mm_add_epi64(T0, M0); |
| 6512 ++ T1 = _mm_add_epi64(T1, M1); |
| 6513 ++ T2 = _mm_add_epi64(T2, M2); |
| 6514 ++ T3 = _mm_add_epi64(T3, M3); |
| 6515 ++ T4 = _mm_add_epi64(T4, M4); |
| 6516 ++ |
| 6517 ++ /* reduce */ |
| 6518 ++ C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _
mm_and_si128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C
1); T4 = _mm_add_epi64(T4, C2); |
| 6519 ++ C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _
mm_and_si128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C
1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); |
| 6520 ++ C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _
mm_and_si128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C
1); T1 = _mm_add_epi64(T1, C2); |
| 6521 ++ C1 = _mm_srli_epi64(T3, 26); T3 = _
mm_and_si128(T3, MMASK); T4 = _mm_add_epi64(T4, C
1); |
| 6522 ++ |
| 6523 ++ /* H = (H*[r^2,r^2] + [Mx,My]) */ |
| 6524 ++ H0 = T0; |
| 6525 ++ H1 = T1; |
| 6526 ++ H2 = T2; |
| 6527 ++ H3 = T3; |
| 6528 ++ H4 = T4; |
| 6529 ++ |
| 6530 ++ consumed = 32; |
| 6531 ++ } |
| 6532 ++ |
| 6533 ++ /* finalize, H *= [r^2,r] */ |
| 6534 ++ r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; |
| 6535 ++ r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; |
| 6536 ++ r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; |
| 6537 ++ |
| 6538 ++ p->R20.d[2] = (uint32_t)( r0 ) & 0x3ffffff; |
| 6539 ++ p->R21.d[2] = (uint32_t)((r0 >> 26) | (r1 << 18)) & 0x3ffffff; |
| 6540 ++ p->R22.d[2] = (uint32_t)((r1 >> 8) ) & 0x3ffffff; |
| 6541 ++ p->R23.d[2] = (uint32_t)((r1 >> 34) | (r2 << 10)) & 0x3ffffff; |
| 6542 ++ p->R24.d[2] = (uint32_t)((r2 >> 16) ) ; |
| 6543 ++ p->S21.d[2] = p->R21.d[2] * 5; |
| 6544 ++ p->S22.d[2] = p->R22.d[2] * 5; |
| 6545 ++ p->S23.d[2] = p->R23.d[2] * 5; |
| 6546 ++ p->S24.d[2] = p->R24.d[2] * 5; |
| 6547 ++ |
| 6548 ++ /* H *= [r^2,r] */ |
| 6549 ++ T0 = _mm_mul_epu32(H0, p->R20.v); |
| 6550 ++ T1 = _mm_mul_epu32(H0, p->R21.v); |
| 6551 ++ T2 = _mm_mul_epu32(H0, p->R22.v); |
| 6552 ++ T3 = _mm_mul_epu32(H0, p->R23.v); |
| 6553 ++ T4 = _mm_mul_epu32(H0, p->R24.v); |
| 6554 ++ T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20.v); T0 =
_mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 6555 ++ T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24.v); T0 =
_mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 6556 ++ T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23.v); T0 =
_mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 6557 ++ T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22.v); T0 =
_mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 6558 ++ T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22.v); T2 =
_mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 6559 ++ T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21.v); T2 =
_mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 6560 ++ T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20.v); T2 =
_mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 6561 ++ T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24.v); T2 =
_mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 6562 ++ T5 = _mm_mul_epu32(H1, p->R23.v); T4 =
_mm_add_epi64(T4, T5); |
| 6563 ++ T5 = _mm_mul_epu32(H2, p->R22.v); T4 =
_mm_add_epi64(T4, T5); |
| 6564 ++ T5 = _mm_mul_epu32(H3, p->R21.v); T4 =
_mm_add_epi64(T4, T5); |
| 6565 ++ T5 = _mm_mul_epu32(H4, p->R20.v); T4 =
_mm_add_epi64(T4, T5); |
| 6566 ++ |
| 6567 ++ C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _mm_and_s
i128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C1); T4 =
_mm_add_epi64(T4, C2); |
| 6568 ++ C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _mm_and_s
i128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C1); T0 =
_mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); |
| 6569 ++ C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _mm_and_s
i128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C1); T1 =
_mm_add_epi64(T1, C2); |
| 6570 ++ C1 = _mm_srli_epi64(T3, 26); T3 = _mm_and_s
i128(T3, MMASK); T4 = _mm_add_epi64(T4, C1); |
| 6571 ++ |
| 6572 ++ /* H = H[0]+H[1] */ |
| 6573 ++ H0 = _mm_add_epi64(T0, _mm_srli_si128(T0, 8)); |
| 6574 ++ H1 = _mm_add_epi64(T1, _mm_srli_si128(T1, 8)); |
| 6575 ++ H2 = _mm_add_epi64(T2, _mm_srli_si128(T2, 8)); |
| 6576 ++ H3 = _mm_add_epi64(T3, _mm_srli_si128(T3, 8)); |
| 6577 ++ H4 = _mm_add_epi64(T4, _mm_srli_si128(T4, 8)); |
| 6578 ++ |
| 6579 ++ t0 = _mm_cvtsi128_si32(H0) ; c = (t0 >> 26); t0 &= 0x3ffffff; |
| 6580 ++ t1 = _mm_cvtsi128_si32(H1) + c; c = (t1 >> 26); t1 &= 0x3ffffff; |
| 6581 ++ t2 = _mm_cvtsi128_si32(H2) + c; c = (t2 >> 26); t2 &= 0x3ffffff; |
| 6582 ++ t3 = _mm_cvtsi128_si32(H3) + c; c = (t3 >> 26); t3 &= 0x3ffffff; |
| 6583 ++ t4 = _mm_cvtsi128_si32(H4) + c; c = (t4 >> 26); t4 &= 0x3ffffff; |
| 6584 ++ t0 = t0 + (c * 5); c = (t0 >> 26); t0 &= 0x3ffffff; |
| 6585 ++ t1 = t1 + c; |
| 6586 ++ |
| 6587 ++ st->HH[0] = ((t0 ) | (t1 << 26) ) & 0xfffffffffffull; |
| 6588 ++ st->HH[1] = ((t1 >> 18) | (t2 << 8) | (t3 << 34)) & 0xfffffffffffull; |
| 6589 ++ st->HH[2] = ((t3 >> 10) | (t4 << 16) ) & 0x3ffffffffffull; |
| 6590 ++ |
| 6591 ++ return consumed; |
| 6592 ++ } |
| 6593 ++ |
| 6594 ++void |
| 6595 ++CRYPTO_poly1305_update(poly1305_state *state, const unsigned char *m, |
| 6596 ++ size_t bytes) |
| 6597 ++ { |
| 6598 ++ poly1305_state_internal *st = poly1305_aligned_state(state); |
| 6599 ++ size_t want; |
| 6600 ++ |
| 6601 ++ /* need at least 32 initial bytes to start the accelerated branch */ |
| 6602 ++ if (!st->started) |
| 6603 ++ { |
| 6604 ++ if ((st->leftover == 0) && (bytes > 32)) |
| 6605 ++ { |
| 6606 ++ poly1305_first_block(st, m); |
| 6607 ++ m += 32; |
| 6608 ++ bytes -= 32; |
| 6609 ++ } |
| 6610 ++ else |
| 6611 ++ { |
| 6612 ++ want = poly1305_min(32 - st->leftover, bytes); |
| 6613 ++ poly1305_block_copy(st->buffer + st->leftover, m, want); |
| 6614 ++ bytes -= want; |
| 6615 ++ m += want; |
| 6616 ++ st->leftover += want; |
| 6617 ++ if ((st->leftover < 32) || (bytes == 0)) |
| 6618 ++ return; |
| 6619 ++ poly1305_first_block(st, st->buffer); |
| 6620 ++ st->leftover = 0; |
| 6621 ++ } |
| 6622 ++ st->started = 1; |
| 6623 ++ } |
| 6624 ++ |
| 6625 ++ /* handle leftover */ |
| 6626 ++ if (st->leftover) |
| 6627 ++ { |
| 6628 ++ want = poly1305_min(64 - st->leftover, bytes); |
| 6629 ++ poly1305_block_copy(st->buffer + st->leftover, m, want); |
| 6630 ++ bytes -= want; |
| 6631 ++ m += want; |
| 6632 ++ st->leftover += want; |
| 6633 ++ if (st->leftover < 64) |
| 6634 ++ return; |
| 6635 ++ poly1305_blocks(st, st->buffer, 64); |
| 6636 ++ st->leftover = 0; |
| 6637 ++ } |
| 6638 ++ |
| 6639 ++ /* process 64 byte blocks */ |
| 6640 ++ if (bytes >= 64) |
| 6641 ++ { |
| 6642 ++ want = (bytes & ~63); |
| 6643 ++ poly1305_blocks(st, m, want); |
| 6644 ++ m += want; |
| 6645 ++ bytes -= want; |
| 6646 ++ } |
| 6647 ++ |
| 6648 ++ if (bytes) |
| 6649 ++ { |
| 6650 ++ poly1305_block_copy(st->buffer + st->leftover, m, bytes); |
| 6651 ++ st->leftover += bytes; |
| 6652 ++ } |
| 6653 ++ } |
| 6654 ++ |
| 6655 ++void |
| 6656 ++CRYPTO_poly1305_finish(poly1305_state *state, unsigned char mac[16]) |
| 6657 ++ { |
| 6658 ++ poly1305_state_internal *st = poly1305_aligned_state(state); |
| 6659 ++ size_t leftover = st->leftover; |
| 6660 ++ uint8_t *m = st->buffer; |
| 6661 ++ uint128_t d[3]; |
| 6662 ++ uint64_t h0,h1,h2; |
| 6663 ++ uint64_t t0,t1; |
| 6664 ++ uint64_t g0,g1,g2,c,nc; |
| 6665 ++ uint64_t r0,r1,r2,s1,s2; |
| 6666 ++ poly1305_power *p; |
| 6667 ++ |
| 6668 ++ if (st->started) |
| 6669 ++ { |
| 6670 ++ size_t consumed = poly1305_combine(st, m, leftover); |
| 6671 ++ leftover -= consumed; |
| 6672 ++ m += consumed; |
| 6673 ++ } |
| 6674 ++ |
| 6675 ++ /* st->HH will either be 0 or have the combined result */ |
| 6676 ++ h0 = st->HH[0]; |
| 6677 ++ h1 = st->HH[1]; |
| 6678 ++ h2 = st->HH[2]; |
| 6679 ++ |
| 6680 ++ p = &st->P[1]; |
| 6681 ++ r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; |
| 6682 ++ r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; |
| 6683 ++ r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; |
| 6684 ++ s1 = r1 * (5 << 2); |
| 6685 ++ s2 = r2 * (5 << 2); |
| 6686 ++ |
| 6687 ++ if (leftover < 16) |
| 6688 ++ goto poly1305_donna_atmost15bytes; |
| 6689 ++ |
| 6690 ++poly1305_donna_atleast16bytes: |
| 6691 ++ t0 = U8TO64_LE(m + 0); |
| 6692 ++ t1 = U8TO64_LE(m + 8); |
| 6693 ++ h0 += t0 & 0xfffffffffff; |
| 6694 ++ t0 = shr128_pair(t1, t0, 44); |
| 6695 ++ h1 += t0 & 0xfffffffffff; |
| 6696 ++ h2 += (t1 >> 24) | ((uint64_t)1 << 40); |
| 6697 ++ |
| 6698 ++poly1305_donna_mul: |
| 6699 ++ d[0] = add128(add128(mul64x64_128(h0, r0), mul64x64_128(h1, s2)), mul64x
64_128(h2, s1)); |
| 6700 ++ d[1] = add128(add128(mul64x64_128(h0, r1), mul64x64_128(h1, r0)), mul64x
64_128(h2, s2)); |
| 6701 ++ d[2] = add128(add128(mul64x64_128(h0, r2), mul64x64_128(h1, r1)), mul64x
64_128(h2, r0)); |
| 6702 ++ h0 = lo128(d[0]) & 0xfffffffffff; c = shr128(
d[0], 44); |
| 6703 ++ d[1] = add128_64(d[1], c); h1 = lo128(d[1]) & 0xfffffffffff; c = shr128(
d[1], 44); |
| 6704 ++ d[2] = add128_64(d[2], c); h2 = lo128(d[2]) & 0x3ffffffffff; c = shr128(
d[2], 42); |
| 6705 ++ h0 += c * 5; |
| 6706 ++ |
| 6707 ++ m += 16; |
| 6708 ++ leftover -= 16; |
| 6709 ++ if (leftover >= 16) goto poly1305_donna_atleast16bytes; |
| 6710 ++ |
| 6711 ++ /* final bytes */ |
| 6712 ++poly1305_donna_atmost15bytes: |
| 6713 ++ if (!leftover) goto poly1305_donna_finish; |
| 6714 ++ |
| 6715 ++ m[leftover++] = 1; |
| 6716 ++ poly1305_block_zero(m + leftover, 16 - leftover); |
| 6717 ++ leftover = 16; |
| 6718 ++ |
| 6719 ++ t0 = U8TO64_LE(m+0); |
| 6720 ++ t1 = U8TO64_LE(m+8); |
| 6721 ++ h0 += t0 & 0xfffffffffff; t0 = shr128_pair(t1, t0, 44); |
| 6722 ++ h1 += t0 & 0xfffffffffff; |
| 6723 ++ h2 += (t1 >> 24); |
| 6724 ++ |
| 6725 ++ goto poly1305_donna_mul; |
| 6726 ++ |
| 6727 ++poly1305_donna_finish: |
| 6728 ++ c = (h0 >> 44); h0 &= 0xfffffffffff; |
| 6729 ++ h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff; |
| 6730 ++ h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff; |
| 6731 ++ h0 += c * 5; |
| 6732 ++ |
| 6733 ++ g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff; |
| 6734 ++ g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff; |
| 6735 ++ g2 = h2 + c - ((uint64_t)1 << 42); |
| 6736 ++ |
| 6737 ++ c = (g2 >> 63) - 1; |
| 6738 ++ nc = ~c; |
| 6739 ++ h0 = (h0 & nc) | (g0 & c); |
| 6740 ++ h1 = (h1 & nc) | (g1 & c); |
| 6741 ++ h2 = (h2 & nc) | (g2 & c); |
| 6742 ++ |
| 6743 ++ /* pad */ |
| 6744 ++ t0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1]; |
| 6745 ++ t1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1]; |
| 6746 ++ h0 += (t0 & 0xfffffffffff) ; c = (h0 >> 44); h0 &= 0xfffffffffff; t0
= shr128_pair(t1, t0, 44); |
| 6747 ++ h1 += (t0 & 0xfffffffffff) + c; c = (h1 >> 44); h1 &= 0xfffffffffff; t1
= (t1 >> 24); |
| 6748 ++ h2 += (t1 ) + c; |
| 6749 ++ |
| 6750 ++ U64TO8_LE(mac + 0, ((h0 ) | (h1 << 44))); |
| 6751 ++ U64TO8_LE(mac + 8, ((h1 >> 20) | (h2 << 24))); |
| 6752 ++ } |
| 6753 ++ |
| 6754 ++#endif /* !OPENSSL_NO_POLY1305 */ |
| 6755 +diff --git a/crypto/poly1305/poly1305test.c b/crypto/poly1305/poly1305test.c |
| 6756 +new file mode 100644 |
| 6757 +index 0000000..8dd26af |
| 6758 +--- /dev/null |
| 6759 ++++ b/crypto/poly1305/poly1305test.c |
| 6760 +@@ -0,0 +1,166 @@ |
| 6761 ++/* ==================================================================== |
| 6762 ++ * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. |
| 6763 ++ * |
| 6764 ++ * Redistribution and use in source and binary forms, with or without |
| 6765 ++ * modification, are permitted provided that the following conditions |
| 6766 ++ * are met: |
| 6767 ++ * |
| 6768 ++ * 1. Redistributions of source code must retain the above copyright |
| 6769 ++ * notice, this list of conditions and the following disclaimer. |
| 6770 ++ * |
| 6771 ++ * 2. Redistributions in binary form must reproduce the above copyright |
| 6772 ++ * notice, this list of conditions and the following disclaimer in |
| 6773 ++ * the documentation and/or other materials provided with the |
| 6774 ++ * distribution. |
| 6775 ++ * |
| 6776 ++ * 3. All advertising materials mentioning features or use of this |
| 6777 ++ * software must display the following acknowledgment: |
| 6778 ++ * "This product includes software developed by the OpenSSL Project |
| 6779 ++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" |
| 6780 ++ * |
| 6781 ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
| 6782 ++ * endorse or promote products derived from this software without |
| 6783 ++ * prior written permission. For written permission, please contact |
| 6784 ++ * licensing@OpenSSL.org. |
| 6785 ++ * |
| 6786 ++ * 5. Products derived from this software may not be called "OpenSSL" |
| 6787 ++ * nor may "OpenSSL" appear in their names without prior written |
| 6788 ++ * permission of the OpenSSL Project. |
| 6789 ++ * |
| 6790 ++ * 6. Redistributions of any form whatsoever must retain the following |
| 6791 ++ * acknowledgment: |
| 6792 ++ * "This product includes software developed by the OpenSSL Project |
| 6793 ++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" |
| 6794 ++ * |
| 6795 ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
| 6796 ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 6797 ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 6798 ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
| 6799 ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 6800 ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 6801 ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 6802 ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 6803 ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 6804 ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 6805 ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 6806 ++ * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 6807 ++ * ==================================================================== |
| 6808 ++ */ |
| 6809 ++ |
| 6810 ++#include <stdio.h> |
| 6811 ++#include <stdlib.h> |
| 6812 ++#include <string.h> |
| 6813 ++ |
| 6814 ++#include <openssl/poly1305.h> |
| 6815 ++ |
| 6816 ++struct poly1305_test |
| 6817 ++ { |
| 6818 ++ const char *inputhex; |
| 6819 ++ const char *keyhex; |
| 6820 ++ const char *outhex; |
| 6821 ++ }; |
| 6822 ++ |
| 6823 ++static const struct poly1305_test poly1305_tests[] = { |
| 6824 ++ { |
| 6825 ++ "", |
| 6826 ++ "c8afaac331ee372cd6082de134943b174710130e9f6fea8d72293850a667d86
c", |
| 6827 ++ "4710130e9f6fea8d72293850a667d86c", |
| 6828 ++ }, |
| 6829 ++ { |
| 6830 ++ "48656c6c6f20776f726c6421", |
| 6831 ++ "746869732069732033322d62797465206b657920666f7220506f6c793133303
5", |
| 6832 ++ "a6f745008f81c916a20dcc74eef2b2f0", |
| 6833 ++ }, |
| 6834 ++ { |
| 6835 ++ "000000000000000000000000000000000000000000000000000000000000000
0", |
| 6836 ++ "746869732069732033322d62797465206b657920666f7220506f6c793133303
5", |
| 6837 ++ "49ec78090e481ec6c26b33b91ccc0307", |
| 6838 ++ }, |
| 6839 ++ { |
| 6840 ++ "000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000", |
| 6841 ++ "746869732069732033322d62797465206b657920666f7220506f6c793133303
5", |
| 6842 ++ "da84bcab02676c38cdb015604274c2aa", |
| 6843 ++ }, |
| 6844 ++}; |
| 6845 ++ |
| 6846 ++static unsigned char hex_digit(char h) |
| 6847 ++ { |
| 6848 ++ if (h >= '0' && h <= '9') |
| 6849 ++ return h - '0'; |
| 6850 ++ else if (h >= 'a' && h <= 'f') |
| 6851 ++ return h - 'a' + 10; |
| 6852 ++ else if (h >= 'A' && h <= 'F') |
| 6853 ++ return h - 'A' + 10; |
| 6854 ++ else |
| 6855 ++ abort(); |
| 6856 ++ } |
| 6857 ++ |
| 6858 ++static void hex_decode(unsigned char *out, const char* hex) |
| 6859 ++ { |
| 6860 ++ size_t j = 0; |
| 6861 ++ |
| 6862 ++ while (*hex != 0) |
| 6863 ++ { |
| 6864 ++ unsigned char v = hex_digit(*hex++); |
| 6865 ++ v <<= 4; |
| 6866 ++ v |= hex_digit(*hex++); |
| 6867 ++ out[j++] = v; |
| 6868 ++ } |
| 6869 ++ } |
| 6870 ++ |
| 6871 ++static void hexdump(unsigned char *a, size_t len) |
| 6872 ++ { |
| 6873 ++ size_t i; |
| 6874 ++ |
| 6875 ++ for (i = 0; i < len; i++) |
| 6876 ++ printf("%02x", a[i]); |
| 6877 ++ } |
| 6878 ++ |
| 6879 ++int main() |
| 6880 ++ { |
| 6881 ++ static const unsigned num_tests = |
| 6882 ++ sizeof(poly1305_tests) / sizeof(struct poly1305_test); |
| 6883 ++ unsigned i; |
| 6884 ++ unsigned char key[32], out[16], expected[16]; |
| 6885 ++ poly1305_state poly1305; |
| 6886 ++ |
| 6887 ++ for (i = 0; i < num_tests; i++) |
| 6888 ++ { |
| 6889 ++ const struct poly1305_test *test = &poly1305_tests[i]; |
| 6890 ++ unsigned char *in; |
| 6891 ++ size_t inlen = strlen(test->inputhex); |
| 6892 ++ |
| 6893 ++ if (strlen(test->keyhex) != sizeof(key)*2 || |
| 6894 ++ strlen(test->outhex) != sizeof(out)*2 || |
| 6895 ++ (inlen & 1) == 1) |
| 6896 ++ return 1; |
| 6897 ++ |
| 6898 ++ inlen /= 2; |
| 6899 ++ |
| 6900 ++ hex_decode(key, test->keyhex); |
| 6901 ++ hex_decode(expected, test->outhex); |
| 6902 ++ |
| 6903 ++ in = malloc(inlen); |
| 6904 ++ |
| 6905 ++ hex_decode(in, test->inputhex); |
| 6906 ++ CRYPTO_poly1305_init(&poly1305, key); |
| 6907 ++ CRYPTO_poly1305_update(&poly1305, in, inlen); |
| 6908 ++ CRYPTO_poly1305_finish(&poly1305, out); |
| 6909 ++ |
| 6910 ++ if (memcmp(out, expected, sizeof(expected)) != 0) |
| 6911 ++ { |
| 6912 ++ printf("Poly1305 test #%d failed.\n", i); |
| 6913 ++ printf("got: "); |
| 6914 ++ hexdump(out, sizeof(out)); |
| 6915 ++ printf("\nexpected: "); |
| 6916 ++ hexdump(expected, sizeof(expected)); |
| 6917 ++ printf("\n"); |
| 6918 ++ return 1; |
| 6919 ++ } |
| 6920 ++ |
| 6921 ++ free(in); |
| 6922 ++ } |
| 6923 ++ |
| 6924 ++ printf("PASS\n"); |
| 6925 ++ return 0; |
| 6926 ++ } |
| 6927 +diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c |
| 6928 +index 75b6560..a042b8d 100644 |
| 6929 +--- a/ssl/s3_lib.c |
| 6930 ++++ b/ssl/s3_lib.c |
| 6931 +@@ -1841,7 +1841,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 6932 + SSL_AEAD, |
| 6933 + SSL_TLSV1_2, |
| 6934 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 6935 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 6936 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| |
| 6937 ++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, |
| 6938 + 128, |
| 6939 + 128, |
| 6940 + }, |
| 6941 +@@ -1873,7 +1874,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 6942 + SSL_AEAD, |
| 6943 + SSL_TLSV1_2, |
| 6944 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 6945 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 6946 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| |
| 6947 ++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, |
| 6948 + 128, |
| 6949 + 128, |
| 6950 + }, |
| 6951 +@@ -1905,7 +1907,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 6952 + SSL_AEAD, |
| 6953 + SSL_TLSV1_2, |
| 6954 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 6955 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 6956 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| |
| 6957 ++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, |
| 6958 + 128, |
| 6959 + 128, |
| 6960 + }, |
| 6961 +@@ -1937,7 +1940,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 6962 + SSL_AEAD, |
| 6963 + SSL_TLSV1_2, |
| 6964 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 6965 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 6966 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| |
| 6967 ++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, |
| 6968 + 128, |
| 6969 + 128, |
| 6970 + }, |
| 6971 +@@ -1969,7 +1973,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 6972 + SSL_AEAD, |
| 6973 + SSL_TLSV1_2, |
| 6974 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 6975 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 6976 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| |
| 6977 ++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, |
| 6978 + 128, |
| 6979 + 128, |
| 6980 + }, |
| 6981 +@@ -2001,7 +2006,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 6982 + SSL_AEAD, |
| 6983 + SSL_TLSV1_2, |
| 6984 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 6985 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 6986 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| |
| 6987 ++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, |
| 6988 + 128, |
| 6989 + 128, |
| 6990 + }, |
| 6991 +@@ -2714,7 +2720,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 6992 + SSL_AEAD, |
| 6993 + SSL_TLSV1_2, |
| 6994 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 6995 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 6996 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| |
| 6997 ++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, |
| 6998 + 128, |
| 6999 + 128, |
| 7000 + }, |
| 7001 +@@ -2746,7 +2753,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 7002 + SSL_AEAD, |
| 7003 + SSL_TLSV1_2, |
| 7004 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 7005 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 7006 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| |
| 7007 ++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, |
| 7008 + 128, |
| 7009 + 128, |
| 7010 + }, |
| 7011 +@@ -2778,7 +2786,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 7012 + SSL_AEAD, |
| 7013 + SSL_TLSV1_2, |
| 7014 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 7015 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 7016 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| |
| 7017 ++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, |
| 7018 + 128, |
| 7019 + 128, |
| 7020 + }, |
| 7021 +@@ -2810,7 +2819,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 7022 + SSL_AEAD, |
| 7023 + SSL_TLSV1_2, |
| 7024 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 7025 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 7026 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| |
| 7027 ++ SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, |
| 7028 + 128, |
| 7029 + 128, |
| 7030 + }, |
| 7031 +@@ -2894,6 +2904,51 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 7032 + }, |
| 7033 + #endif |
| 7034 + |
| 7035 ++ { |
| 7036 ++ 1, |
| 7037 ++ TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305, |
| 7038 ++ TLS1_CK_ECDHE_RSA_CHACHA20_POLY1305, |
| 7039 ++ SSL_kEECDH, |
| 7040 ++ SSL_aRSA, |
| 7041 ++ SSL_CHACHA20POLY1305, |
| 7042 ++ SSL_AEAD, |
| 7043 ++ SSL_TLSV1_2, |
| 7044 ++ SSL_NOT_EXP|SSL_HIGH, |
| 7045 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(0), |
| 7046 ++ 256, |
| 7047 ++ 0, |
| 7048 ++ }, |
| 7049 ++ |
| 7050 ++ { |
| 7051 ++ 1, |
| 7052 ++ TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, |
| 7053 ++ TLS1_CK_ECDHE_ECDSA_CHACHA20_POLY1305, |
| 7054 ++ SSL_kEECDH, |
| 7055 ++ SSL_aECDSA, |
| 7056 ++ SSL_CHACHA20POLY1305, |
| 7057 ++ SSL_AEAD, |
| 7058 ++ SSL_TLSV1_2, |
| 7059 ++ SSL_NOT_EXP|SSL_HIGH, |
| 7060 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(0), |
| 7061 ++ 256, |
| 7062 ++ 0, |
| 7063 ++ }, |
| 7064 ++ |
| 7065 ++ { |
| 7066 ++ 1, |
| 7067 ++ TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305, |
| 7068 ++ TLS1_CK_DHE_RSA_CHACHA20_POLY1305, |
| 7069 ++ SSL_kEDH, |
| 7070 ++ SSL_aRSA, |
| 7071 ++ SSL_CHACHA20POLY1305, |
| 7072 ++ SSL_AEAD, |
| 7073 ++ SSL_TLSV1_2, |
| 7074 ++ SSL_NOT_EXP|SSL_HIGH, |
| 7075 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(0), |
| 7076 ++ 256, |
| 7077 ++ 0, |
| 7078 ++ }, |
| 7079 ++ |
| 7080 + /* end of list */ |
| 7081 + }; |
| 7082 + |
| 7083 +diff --git a/ssl/s3_pkt.c b/ssl/s3_pkt.c |
| 7084 +index 5038f6c..04b474d 100644 |
| 7085 +--- a/ssl/s3_pkt.c |
| 7086 ++++ b/ssl/s3_pkt.c |
| 7087 +@@ -790,8 +790,11 @@ static int do_ssl3_write(SSL *s, int type, const unsigned
char *buf, |
| 7088 + else |
| 7089 + eivlen = 0; |
| 7090 + } |
| 7091 +- else if (s->aead_write_ctx != NULL) |
| 7092 ++ else if (s->aead_write_ctx != NULL && |
| 7093 ++ s->aead_write_ctx->variable_nonce_included_in_record) |
| 7094 ++ { |
| 7095 + eivlen = s->aead_write_ctx->variable_nonce_len; |
| 7096 ++ } |
| 7097 + else |
| 7098 + eivlen = 0; |
| 7099 + |
| 7100 +diff --git a/ssl/ssl.h b/ssl/ssl.h |
| 7101 +index 0644cbf..d782a98 100644 |
| 7102 +--- a/ssl/ssl.h |
| 7103 ++++ b/ssl/ssl.h |
| 7104 +@@ -291,6 +291,7 @@ extern "C" { |
| 7105 + #define SSL_TXT_CAMELLIA128 "CAMELLIA128" |
| 7106 + #define SSL_TXT_CAMELLIA256 "CAMELLIA256" |
| 7107 + #define SSL_TXT_CAMELLIA "CAMELLIA" |
| 7108 ++#define SSL_TXT_CHACHA20 "CHACHA20" |
| 7109 + |
| 7110 + #define SSL_TXT_MD5 "MD5" |
| 7111 + #define SSL_TXT_SHA1 "SHA1" |
| 7112 +diff --git a/ssl/ssl_ciph.c b/ssl/ssl_ciph.c |
| 7113 +index 7e780cd..b6370bd 100644 |
| 7114 +--- a/ssl/ssl_ciph.c |
| 7115 ++++ b/ssl/ssl_ciph.c |
| 7116 +@@ -298,6 +298,7 @@ static const SSL_CIPHER cipher_aliases[]={ |
| 7117 + {0,SSL_TXT_CAMELLIA128,0,0,0,SSL_CAMELLIA128,0,0,0,0,0,0}, |
| 7118 + {0,SSL_TXT_CAMELLIA256,0,0,0,SSL_CAMELLIA256,0,0,0,0,0,0}, |
| 7119 + {0,SSL_TXT_CAMELLIA ,0,0,0,SSL_CAMELLIA128|SSL_CAMELLIA256,0,0,0,0,0,0
}, |
| 7120 ++ {0,SSL_TXT_CHACHA20 ,0,0,0,SSL_CHACHA20POLY1305,0,0,0,0,0,0}, |
| 7121 + |
| 7122 + /* MAC aliases */ |
| 7123 + {0,SSL_TXT_MD5,0, 0,0,0,SSL_MD5, 0,0,0,0,0}, |
| 7124 +@@ -523,9 +524,15 @@ int ssl_cipher_get_evp_aead(const SSL_SESSION *s, const EV
P_AEAD **aead) |
| 7125 + return 0; |
| 7126 + |
| 7127 + #ifndef OPENSSL_NO_AES |
| 7128 +- /* There is only one AEAD for now. */ |
| 7129 +- *aead = EVP_aead_aes_128_gcm(); |
| 7130 +- return 1; |
| 7131 ++ switch (c->algorithm_enc) |
| 7132 ++ { |
| 7133 ++ case SSL_AES128GCM: |
| 7134 ++ *aead = EVP_aead_aes_128_gcm(); |
| 7135 ++ return 1; |
| 7136 ++ case SSL_CHACHA20POLY1305: |
| 7137 ++ *aead = EVP_aead_chacha20_poly1305(); |
| 7138 ++ return 1; |
| 7139 ++ } |
| 7140 + #endif |
| 7141 + |
| 7142 + return 0; |
| 7143 +@@ -1715,6 +1722,9 @@ char *SSL_CIPHER_description(const SSL_CIPHER *cipher, ch
ar *buf, int len) |
| 7144 + case SSL_SEED: |
| 7145 + enc="SEED(128)"; |
| 7146 + break; |
| 7147 ++ case SSL_CHACHA20POLY1305: |
| 7148 ++ enc="ChaCha20-Poly1305"; |
| 7149 ++ break; |
| 7150 + default: |
| 7151 + enc="unknown"; |
| 7152 + break; |
| 7153 +diff --git a/ssl/ssl_locl.h b/ssl/ssl_locl.h |
| 7154 +index 63bc28b..b83d8cd 100644 |
| 7155 +--- a/ssl/ssl_locl.h |
| 7156 ++++ b/ssl/ssl_locl.h |
| 7157 +@@ -328,6 +328,7 @@ |
| 7158 + #define SSL_SEED 0x00000800L |
| 7159 + #define SSL_AES128GCM 0x00001000L |
| 7160 + #define SSL_AES256GCM 0x00002000L |
| 7161 ++#define SSL_CHACHA20POLY1305 0x00004000L |
| 7162 + |
| 7163 + #define SSL_AES (SSL_AES128|SSL_AES256|SSL_AES128GCM|SSL
_AES256GCM) |
| 7164 + #define SSL_CAMELLIA (SSL_CAMELLIA128|SSL_CAMELLIA256) |
| 7165 +@@ -389,6 +390,12 @@ |
| 7166 + #define SSL_CIPHER_AEAD_FIXED_NONCE_LEN(ssl_cipher) \ |
| 7167 + (((ssl_cipher->algorithm2 >> 24) & 0xf)*2) |
| 7168 + |
| 7169 ++/* SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD is a flag in |
| 7170 ++ * SSL_CIPHER.algorithm2 which indicates that the variable part of the nonce i
s |
| 7171 ++ * included as a prefix of the record. (AES-GCM, for example, does with with a
n |
| 7172 ++ * 8-byte variable nonce.) */ |
| 7173 ++#define SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD (1<<22) |
| 7174 ++ |
| 7175 + /* |
| 7176 + * Export and cipher strength information. For each cipher we have to decide |
| 7177 + * whether it is exportable or not. This information is likely to change |
| 7178 +@@ -605,6 +612,9 @@ struct ssl_aead_ctx_st |
| 7179 + * records. */ |
| 7180 + unsigned char fixed_nonce[8]; |
| 7181 + unsigned char fixed_nonce_len, variable_nonce_len, tag_len; |
| 7182 ++ /* variable_nonce_included_in_record is non-zero if the variable nonce |
| 7183 ++ * for a record is included as a prefix before the ciphertext. */ |
| 7184 ++ char variable_nonce_included_in_record; |
| 7185 + }; |
| 7186 + |
| 7187 + #ifndef OPENSSL_NO_COMP |
| 7188 +diff --git a/ssl/t1_enc.c b/ssl/t1_enc.c |
| 7189 +index 7af1a32..15800af 100644 |
| 7190 +--- a/ssl/t1_enc.c |
| 7191 ++++ b/ssl/t1_enc.c |
| 7192 +@@ -366,6 +366,8 @@ static int tls1_change_cipher_state_aead(SSL *s, char is_re
ad, |
| 7193 + memcpy(aead_ctx->fixed_nonce, iv, iv_len); |
| 7194 + aead_ctx->fixed_nonce_len = iv_len; |
| 7195 + aead_ctx->variable_nonce_len = 8; /* always the case, currently. */ |
| 7196 ++ aead_ctx->variable_nonce_included_in_record = |
| 7197 ++ (s->s3->tmp.new_cipher->algorithm2 & SSL_CIPHER_ALGORITHM2_VARIA
BLE_NONCE_INCLUDED_IN_RECORD) != 0; |
| 7198 + if (aead_ctx->variable_nonce_len + aead_ctx->fixed_nonce_len != EVP_AEAD
_nonce_length(aead)) |
| 7199 + { |
| 7200 + SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE_AEAD, ERR_R_INTERNAL_ERROR
); |
| 7201 +@@ -863,6 +865,7 @@ int tls1_enc(SSL *s, int send) |
| 7202 + if (send) |
| 7203 + { |
| 7204 + size_t len = rec->length; |
| 7205 ++ size_t eivlen = 0; |
| 7206 + in = rec->input; |
| 7207 + out = rec->data; |
| 7208 + |
| 7209 +@@ -878,18 +881,22 @@ int tls1_enc(SSL *s, int send) |
| 7210 + * variable nonce. Thus we can copy the sequence number |
| 7211 + * bytes into place without overwriting any of the |
| 7212 + * plaintext. */ |
| 7213 +- memcpy(out, ad, aead->variable_nonce_len); |
| 7214 +- len -= aead->variable_nonce_len; |
| 7215 ++ if (aead->variable_nonce_included_in_record) |
| 7216 ++ { |
| 7217 ++ memcpy(out, ad, aead->variable_nonce_len); |
| 7218 ++ len -= aead->variable_nonce_len; |
| 7219 ++ eivlen = aead->variable_nonce_len; |
| 7220 ++ } |
| 7221 + |
| 7222 + ad[11] = len >> 8; |
| 7223 + ad[12] = len & 0xff; |
| 7224 + |
| 7225 + n = EVP_AEAD_CTX_seal(&aead->ctx, |
| 7226 +- out + aead->variable_nonce_len, le
n + aead->tag_len, |
| 7227 ++ out + eivlen, len + aead->tag_len, |
| 7228 + nonce, nonce_used, |
| 7229 +- in + aead->variable_nonce_len, len
, |
| 7230 ++ in + eivlen, len, |
| 7231 + ad, sizeof(ad)); |
| 7232 +- if (n >= 0) |
| 7233 ++ if (n >= 0 && aead->variable_nonce_included_in_record) |
| 7234 + n += aead->variable_nonce_len; |
| 7235 + } |
| 7236 + else |
| 7237 +@@ -903,12 +910,17 @@ int tls1_enc(SSL *s, int send) |
| 7238 + |
| 7239 + if (len < aead->variable_nonce_len) |
| 7240 + return 0; |
| 7241 +- memcpy(nonce + nonce_used, in, aead->variable_nonce_len)
; |
| 7242 ++ memcpy(nonce + nonce_used, |
| 7243 ++ aead->variable_nonce_included_in_record ? in : ad
, |
| 7244 ++ aead->variable_nonce_len); |
| 7245 + nonce_used += aead->variable_nonce_len; |
| 7246 + |
| 7247 +- in += aead->variable_nonce_len; |
| 7248 +- len -= aead->variable_nonce_len; |
| 7249 +- out += aead->variable_nonce_len; |
| 7250 ++ if (aead->variable_nonce_included_in_record) |
| 7251 ++ { |
| 7252 ++ in += aead->variable_nonce_len; |
| 7253 ++ len -= aead->variable_nonce_len; |
| 7254 ++ out += aead->variable_nonce_len; |
| 7255 ++ } |
| 7256 + |
| 7257 + if (len < aead->tag_len) |
| 7258 + return 0; |
| 7259 +diff --git a/ssl/tls1.h b/ssl/tls1.h |
| 7260 +index 8cac7df..3cbcb83 100644 |
| 7261 +--- a/ssl/tls1.h |
| 7262 ++++ b/ssl/tls1.h |
| 7263 +@@ -526,6 +526,10 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_C
B,(void (*)(void))cb) |
| 7264 + #define TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256 0x0300C031 |
| 7265 + #define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384 0x0300C032 |
| 7266 + |
| 7267 ++#define TLS1_CK_ECDHE_RSA_CHACHA20_POLY1305 0x0300CC13 |
| 7268 ++#define TLS1_CK_ECDHE_ECDSA_CHACHA20_POLY1305 0x0300CC14 |
| 7269 ++#define TLS1_CK_DHE_RSA_CHACHA20_POLY1305 0x0300CC15 |
| 7270 ++ |
| 7271 + /* XXX |
| 7272 + * Inconsistency alert: |
| 7273 + * The OpenSSL names of ciphers with ephemeral DH here include the string |
| 7274 +@@ -677,6 +681,10 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_C
B,(void (*)(void))cb) |
| 7275 + #define TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256 "ECDH-RSA-AES128-GCM-S
HA256" |
| 7276 + #define TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384 "ECDH-RSA-AES256-GCM-S
HA384" |
| 7277 + |
| 7278 ++#define TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305 "ECDHE-RSA-CHACHA20-POLY
1305" |
| 7279 ++#define TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 "ECDHE-ECDSA-CHACHA20-PO
LY1305" |
| 7280 ++#define TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305 "DHE-RSA-CHACHA2
0-POLY1305" |
| 7281 ++ |
| 7282 + #define TLS_CT_RSA_SIGN 1 |
| 7283 + #define TLS_CT_DSS_SIGN 2 |
| 7284 + #define TLS_CT_RSA_FIXED_DH 3 |
| 7285 +diff --git a/test/Makefile b/test/Makefile |
| 7286 +index 4c9eabc..4790aa8 100644 |
| 7287 +--- a/test/Makefile |
| 7288 ++++ b/test/Makefile |
| 7289 +@@ -86,7 +86,9 @@ OBJ= $(BNTEST).o $(ECTEST).o $(ECDSATEST).o $(ECDHTEST).o $(
IDEATEST).o \ |
| 7290 + $(MDC2TEST).o $(RMDTEST).o \ |
| 7291 + $(RANDTEST).o $(DHTEST).o $(ENGINETEST).o $(CASTTEST).o \ |
| 7292 + $(BFTEST).o $(SSLTEST).o $(DSATEST).o $(EXPTEST).o $(RSATEST).o \ |
| 7293 +- $(EVPTEST).o $(IGETEST).o $(JPAKETEST).o $(ASN1TEST).o |
| 7294 ++ $(EVPTEST).o $(IGETEST).o $(JPAKETEST).o $(ASN1TEST).o $(CHACHATEST).o \ |
| 7295 ++ $(POLY1305TEST).o |
| 7296 ++ |
| 7297 + SRC= $(BNTEST).c $(ECTEST).c $(ECDSATEST).c $(ECDHTEST).c $(IDEATEST).c \ |
| 7298 + $(MD2TEST).c $(MD4TEST).c $(MD5TEST).c \ |
| 7299 + $(HMACTEST).c $(WPTEST).c \ |
| 7300 +@@ -94,7 +96,8 @@ SRC= $(BNTEST).c $(ECTEST).c $(ECDSATEST).c $(ECDHTEST).c $(
IDEATEST).c \ |
| 7301 + $(DESTEST).c $(SHATEST).c $(SHA1TEST).c $(MDC2TEST).c $(RMDTEST).c \ |
| 7302 + $(RANDTEST).c $(DHTEST).c $(ENGINETEST).c $(CASTTEST).c \ |
| 7303 + $(BFTEST).c $(SSLTEST).c $(DSATEST).c $(EXPTEST).c $(RSATEST).c \ |
| 7304 +- $(EVPTEST).c $(IGETEST).c $(JPAKETEST).c $(SRPTEST).c $(ASN1TEST).c |
| 7305 ++ $(EVPTEST).c $(IGETEST).c $(JPAKETEST).c $(SRPTEST).c $(ASN1TEST).c \ |
| 7306 ++ $(CHACHATEST).c $(POLY1305TEST).c |
| 7307 + |
| 7308 + EXHEADER= |
| 7309 + HEADER= $(EXHEADER) |
| 7310 +@@ -137,7 +140,7 @@ alltests: \ |
| 7311 + test_enc test_x509 test_rsa test_crl test_sid \ |
| 7312 + test_gen test_req test_pkcs7 test_verify test_dh test_dsa \ |
| 7313 + test_ss test_ca test_engine test_evp test_ssl test_tsa test_ige \ |
| 7314 +- test_jpake test_srp test_cms |
| 7315 ++ test_jpake test_srp test_cms test_chacha test_poly1305 |
| 7316 + |
| 7317 + test_evp: |
| 7318 + ../util/shlib_wrap.sh ./$(EVPTEST) evptests.txt |
| 7319 +@@ -318,6 +321,14 @@ test_srp: $(SRPTEST)$(EXE_EXT) |
| 7320 + @echo "Test SRP" |
| 7321 + ../util/shlib_wrap.sh ./srptest |
| 7322 + |
| 7323 ++test_chacha: $(CHACHATEST)$(EXE_EXT) |
| 7324 ++ @echo "Test ChaCha" |
| 7325 ++ ../util/shlib_wrap.sh ./$(CHACHATEST) |
| 7326 ++ |
| 7327 ++test_poly1305: $(POLY1305TEST)$(EXE_EXT) |
| 7328 ++ @echo "Test Poly1305" |
| 7329 ++ ../util/shlib_wrap.sh ./$(POLY1305TEST) |
| 7330 ++ |
| 7331 + lint: |
| 7332 + lint -DLINT $(INCLUDES) $(SRC)>fluff |
| 7333 + |
| 7334 +@@ -394,6 +405,12 @@ $(SHA256TEST)$(EXE_EXT): $(SHA256TEST).o $(DLIBCRYPTO) |
| 7335 + $(SHA512TEST)$(EXE_EXT): $(SHA512TEST).o $(DLIBCRYPTO) |
| 7336 + @target=$(SHA512TEST); $(BUILD_CMD) |
| 7337 + |
| 7338 ++$(CHACHATEST)$(EXE_EXT): $(CHACHATEST).o $(DLIBCRYPTO) |
| 7339 ++ @target=$(CHACHATEST); $(BUILD_CMD) |
| 7340 ++ |
| 7341 ++$(POLY1305TEST)$(EXE_EXT): $(POLY1305TEST).o $(DLIBCRYPTO) |
| 7342 ++ @target=$(CHACHATEST); $(BUILD_CMD) |
| 7343 ++ |
| 7344 + $(RMDTEST)$(EXE_EXT): $(RMDTEST).o $(DLIBCRYPTO) |
| 7345 + @target=$(RMDTEST); $(BUILD_CMD) |
| 7346 + |
| 7347 +-- |
| 7348 +1.8.4.1 |
| 7349 + |
| 7350 diff -burN android-openssl-lhash2/patches/neon_runtime.patch android-openssl/pat
ches/neon_runtime.patch |
| 7351 --- android-openssl-lhash2/patches/neon_runtime.patch 1969-12-31 19:00:00.0000
00000 -0500 |
| 7352 +++ android-openssl/patches/neon_runtime.patch 2013-11-05 16:51:28.668287449 -0
500 |
| 7353 @@ -0,0 +1,1104 @@ |
| 7354 +From aea47606333cfd3e7a09cab3e42e488c79a416af Mon Sep 17 00:00:00 2001 |
| 7355 +From: Adam Langley <agl@chromium.org> |
| 7356 +Date: Tue, 5 Nov 2013 13:10:11 -0500 |
| 7357 +Subject: [PATCH 52/52] Optional NEON support on ARM. |
| 7358 + |
| 7359 +This patch causes ARM to build both the NEON and generic versions of |
| 7360 +ChaCha20 and Poly1305. The NEON code can be enabled at run-time by |
| 7361 +calling CRYPTO_set_NEON_capable(1). |
| 7362 +--- |
| 7363 + .gitignore | 1 + |
| 7364 + Configure | 2 +- |
| 7365 + apps/speed.c | 5 + |
| 7366 + crypto/chacha/chacha_enc.c | 18 + |
| 7367 + crypto/chacha/chacha_vec.c | 7 + |
| 7368 + crypto/chacha/chacha_vec_arm.s | 846 +++++++++++++++++++++++++++++++++++++++++ |
| 7369 + crypto/cryptlib.c | 14 + |
| 7370 + crypto/crypto.h | 8 + |
| 7371 + crypto/poly1305/poly1305.c | 35 ++ |
| 7372 + crypto/poly1305/poly1305_arm.c | 9 +- |
| 7373 + 10 files changed, 941 insertions(+), 4 deletions(-) |
| 7374 + create mode 100644 crypto/chacha/chacha_vec_arm.s |
| 7375 + |
| 7376 +diff --git a/Configure b/Configure |
| 7377 +index 1b95384..18b7af0 100755 |
| 7378 +--- a/Configure |
| 7379 ++++ b/Configure |
| 7380 +@@ -136,7 +136,7 @@ my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-
alpha.o:::::::ghash-a |
| 7381 + my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::
::::::::"; |
| 7382 + my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha
256-mips.o sha512-mips.o::::::::::"; |
| 7383 + my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::a
es-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4
-s390x.o:::::::ghash-s390x.o:"; |
| 7384 +-my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_c
bc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-
armv4.o::chacha_vec.o:poly1305_arm.o poly1305_arm_asm.o:void"; |
| 7385 ++my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_c
bc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-
armv4.o::chacha_vec_arm.o chacha_enc.o:poly1305.o poly1305_arm.o poly1305_arm_as
m.o:void"; |
| 7386 + my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-
parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash
-parisc.o::::32"; |
| 7387 + my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o a
es-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::gh
ash-parisc.o::::64"; |
| 7388 + my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.
o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::::"; |
| 7389 +diff --git a/crypto/chacha/chacha_enc.c b/crypto/chacha/chacha_enc.c |
| 7390 +index 54d1ca3..e4b648f 100644 |
| 7391 +--- a/crypto/chacha/chacha_enc.c |
| 7392 ++++ b/crypto/chacha/chacha_enc.c |
| 7393 +@@ -61,6 +61,7 @@ |
| 7394 + |
| 7395 + #if !defined(OPENSSL_NO_CHACHA) |
| 7396 + |
| 7397 ++#include <openssl/crypto.h> |
| 7398 + #include <openssl/chacha.h> |
| 7399 + |
| 7400 + /* sigma contains the ChaCha constants, which happen to be an ASCII string. */ |
| 7401 +@@ -87,6 +88,15 @@ static const char sigma[16] = "expand 32-byte k"; |
| 7402 + |
| 7403 + typedef unsigned int uint32_t; |
| 7404 + |
| 7405 ++#if __arm__ |
| 7406 ++/* Defined in chacha_vec.c */ |
| 7407 ++void CRYPTO_chacha_20_neon(unsigned char *out, |
| 7408 ++ const unsigned char *in, size_t in_len, |
| 7409 ++ const unsigned char key[32], |
| 7410 ++ const unsigned char nonce[8], |
| 7411 ++ size_t counter); |
| 7412 ++#endif |
| 7413 ++ |
| 7414 + /* chacha_core performs |num_rounds| rounds of ChaCha20 on the input words in |
| 7415 + * |input| and writes the 64 output bytes to |output|. */ |
| 7416 + static void chacha_core(unsigned char output[64], const uint32_t input[16], |
| 7417 +@@ -124,6 +134,14 @@ void CRYPTO_chacha_20(unsigned char *out, |
| 7418 + unsigned char buf[64]; |
| 7419 + size_t todo, i; |
| 7420 + |
| 7421 ++#if __arm__ |
| 7422 ++ if (CRYPTO_is_NEON_capable()) |
| 7423 ++ { |
| 7424 ++ CRYPTO_chacha_20_neon(out, in, in_len, key, nonce, counter); |
| 7425 ++ return; |
| 7426 ++ } |
| 7427 ++#endif |
| 7428 ++ |
| 7429 + input[0] = U8TO32_LITTLE(sigma + 0); |
| 7430 + input[1] = U8TO32_LITTLE(sigma + 4); |
| 7431 + input[2] = U8TO32_LITTLE(sigma + 8); |
| 7432 +diff --git a/crypto/chacha/chacha_vec.c b/crypto/chacha/chacha_vec.c |
| 7433 +index 33b2238..1226c39 100644 |
| 7434 +--- a/crypto/chacha/chacha_vec.c |
| 7435 ++++ b/crypto/chacha/chacha_vec.c |
| 7436 +@@ -154,7 +154,14 @@ typedef unsigned vec __attribute__ ((vector_size (16))); |
| 7437 + STORE(op + d + 8, LOAD(in + d + 8) ^ REVV_BE(v2)); \ |
| 7438 + STORE(op + d +12, LOAD(in + d +12) ^ REVV_BE(v3)); |
| 7439 + |
| 7440 ++#if __ARM_NEON__ |
| 7441 ++/* For ARM, we can't depend on NEON support, so this function is compiled with |
| 7442 ++ * a different name, along with the generic code, and can be enabled at |
| 7443 ++ * run-time. */ |
| 7444 ++void CRYPTO_chacha_20_neon( |
| 7445 ++#else |
| 7446 + void CRYPTO_chacha_20( |
| 7447 ++#endif |
| 7448 + unsigned char *out, |
| 7449 + const unsigned char *in, |
| 7450 + size_t inlen, |
| 7451 +diff --git a/crypto/chacha/chacha_vec_arm.s b/crypto/chacha/chacha_vec_arm.s |
| 7452 +new file mode 100644 |
| 7453 +index 0000000..24a5050 |
| 7454 +--- /dev/null |
| 7455 ++++ b/crypto/chacha/chacha_vec_arm.s |
| 7456 +@@ -0,0 +1,846 @@ |
| 7457 ++ .syntax unified |
| 7458 ++ .cpu cortex-a8 |
| 7459 ++ .eabi_attribute 27, 3 |
| 7460 ++ .eabi_attribute 28, 1 |
| 7461 ++ .fpu neon |
| 7462 ++ .eabi_attribute 20, 1 |
| 7463 ++ .eabi_attribute 21, 1 |
| 7464 ++ .eabi_attribute 23, 3 |
| 7465 ++ .eabi_attribute 24, 1 |
| 7466 ++ .eabi_attribute 25, 1 |
| 7467 ++ .eabi_attribute 26, 2 |
| 7468 ++ .eabi_attribute 30, 2 |
| 7469 ++ .eabi_attribute 34, 1 |
| 7470 ++ .eabi_attribute 18, 4 |
| 7471 ++ .thumb |
| 7472 ++ .file "chacha_vec.c" |
| 7473 ++ .text |
| 7474 ++ .align 2 |
| 7475 ++ .global CRYPTO_chacha_20_neon |
| 7476 ++ .thumb |
| 7477 ++ .thumb_func |
| 7478 ++ .type CRYPTO_chacha_20_neon, %function |
| 7479 ++CRYPTO_chacha_20_neon: |
| 7480 ++ @ args = 8, pretend = 0, frame = 296 |
| 7481 ++ @ frame_needed = 1, uses_anonymous_args = 0 |
| 7482 ++ @ link register save eliminated. |
| 7483 ++ push {r4, r5, r6, r7, r8, r9, sl, fp} |
| 7484 ++ fstmfdd sp!, {d8, d9, d10, d11, d12, d13, d14, d15} |
| 7485 ++ sub sp, sp, #296 |
| 7486 ++ add r7, sp, #0 |
| 7487 ++ movw ip, #43691 |
| 7488 ++ movt ip, 43690 |
| 7489 ++ str r2, [r7, #192] |
| 7490 ++ sub sp, sp, #96 |
| 7491 ++ ldr r4, [r7, #192] |
| 7492 ++ ldr r6, [r7, #392] |
| 7493 ++ ldr r2, .L38+16 |
| 7494 ++ umull r4, ip, ip, r4 |
| 7495 ++ ldr r6, [r6, #0] |
| 7496 ++ ldr r8, [r7, #392] |
| 7497 ++ add r4, sp, #15 |
| 7498 ++ str r3, [r7, #236] |
| 7499 ++ bic r4, r4, #15 |
| 7500 ++ str r6, [r7, #172] |
| 7501 ++ str r4, [r7, #196] |
| 7502 ++ str r0, [r7, #184] |
| 7503 ++ lsrs ip, ip, #7 |
| 7504 ++ str r1, [r7, #180] |
| 7505 ++ ldmia r2, {r0, r1, r2, r3} |
| 7506 ++ ldr r4, [r8, #4] |
| 7507 ++ ldr r5, [r7, #236] |
| 7508 ++ vld1.64 {d24-d25}, [r5:64] |
| 7509 ++ vldr d26, [r5, #16] |
| 7510 ++ vldr d27, [r5, #24] |
| 7511 ++ ldr fp, [r7, #196] |
| 7512 ++ ldr r8, [r7, #396] |
| 7513 ++ ldr r5, [r7, #172] |
| 7514 ++ add r6, fp, #64 |
| 7515 ++ str r4, [r7, #292] |
| 7516 ++ mov r4, #0 |
| 7517 ++ str r8, [r7, #280] |
| 7518 ++ str r5, [r7, #288] |
| 7519 ++ str r4, [r7, #284] |
| 7520 ++ stmia r6, {r0, r1, r2, r3} |
| 7521 ++ vldr d22, [fp, #64] |
| 7522 ++ vldr d23, [fp, #72] |
| 7523 ++ vldr d20, [r7, #280] |
| 7524 ++ vldr d21, [r7, #288] |
| 7525 ++ str ip, [r7, #188] |
| 7526 ++ beq .L20 |
| 7527 ++ lsl r6, ip, #1 |
| 7528 ++ ldr r1, [fp, #68] |
| 7529 ++ add r3, r6, ip |
| 7530 ++ str r6, [r7, #176] |
| 7531 ++ ldr r2, [fp, #72] |
| 7532 ++ add r8, r8, #2 |
| 7533 ++ ldr r5, [fp, #76] |
| 7534 ++ vldr d18, .L38 |
| 7535 ++ vldr d19, .L38+8 |
| 7536 ++ str r4, [r7, #232] |
| 7537 ++ ldr r6, [r7, #180] |
| 7538 ++ ldr r4, [r7, #184] |
| 7539 ++ str r0, [r7, #220] |
| 7540 ++ str r1, [r7, #216] |
| 7541 ++ str r8, [r7, #200] |
| 7542 ++ str r2, [r7, #212] |
| 7543 ++ str r3, [r7, #204] |
| 7544 ++ str r5, [r7, #208] |
| 7545 ++ str r6, [r7, #244] |
| 7546 ++ str r4, [r7, #240] |
| 7547 ++.L4: |
| 7548 ++ ldr r6, [r7, #236] |
| 7549 ++ vadd.i32 q8, q10, q9 |
| 7550 ++ ldr r5, [r7, #236] |
| 7551 ++ vmov q15, q13 @ v4si |
| 7552 ++ ldr r8, [r7, #232] |
| 7553 ++ vmov q3, q12 @ v4si |
| 7554 ++ ldr r6, [r6, #4] |
| 7555 ++ vmov q2, q11 @ v4si |
| 7556 ++ ldr fp, [r7, #200] |
| 7557 ++ vmov q5, q10 @ v4si |
| 7558 ++ ldr r4, [r7, #236] |
| 7559 ++ vmov q1, q13 @ v4si |
| 7560 ++ add ip, r8, fp |
| 7561 ++ ldr r5, [r5, #0] |
| 7562 ++ ldr r0, [r7, #236] |
| 7563 ++ add r8, r7, #208 |
| 7564 ++ ldr r1, [r7, #236] |
| 7565 ++ vmov q0, q12 @ v4si |
| 7566 ++ str r6, [r7, #260] |
| 7567 ++ vmov q4, q11 @ v4si |
| 7568 ++ ldr r6, [r7, #392] |
| 7569 ++ ldmia r8, {r8, r9, sl, fp} |
| 7570 ++ ldr r0, [r0, #8] |
| 7571 ++ ldr r1, [r1, #12] |
| 7572 ++ str r5, [r7, #224] |
| 7573 ++ ldr r5, [r4, #24] |
| 7574 ++ ldr r3, [r4, #28] |
| 7575 ++ ldr r2, [r6, #4] |
| 7576 ++ str r0, [r7, #256] |
| 7577 ++ str r1, [r7, #228] |
| 7578 ++ str r5, [r7, #272] |
| 7579 ++ ldr r5, [r6, #0] |
| 7580 ++ movs r6, #0 |
| 7581 ++ ldr r0, [r4, #16] |
| 7582 ++ ldr r1, [r4, #20] |
| 7583 ++ movs r4, #10 |
| 7584 ++ str r2, [r7, #20] |
| 7585 ++ str r3, [r7, #276] |
| 7586 ++ str r9, [r7, #268] |
| 7587 ++ mov r9, r6 |
| 7588 ++ str r4, [r7, #248] |
| 7589 ++ ldr r2, [r7, #256] |
| 7590 ++ ldr r3, [r7, #228] |
| 7591 ++ str r8, [r7, #252] |
| 7592 ++ mov r8, sl |
| 7593 ++ ldr r6, [r7, #272] |
| 7594 ++ mov sl, ip |
| 7595 ++ str r1, [r7, #264] |
| 7596 ++ ldr ip, [r7, #20] |
| 7597 ++ str r6, [r7, #256] |
| 7598 ++ mov r6, r5 |
| 7599 ++ ldr r1, [r7, #260] |
| 7600 ++ mov r5, r0 |
| 7601 ++ ldr r0, [r7, #224] |
| 7602 ++ b .L39 |
| 7603 ++.L40: |
| 7604 ++ .align 3 |
| 7605 ++.L38: |
| 7606 ++ .word 1 |
| 7607 ++ .word 0 |
| 7608 ++ .word 0 |
| 7609 ++ .word 0 |
| 7610 ++ .word .LANCHOR0 |
| 7611 ++.L39: |
| 7612 ++.L3: |
| 7613 ++ vadd.i32 q4, q4, q0 |
| 7614 ++ add r8, r8, r1 |
| 7615 ++ vadd.i32 q2, q2, q3 |
| 7616 ++ str r8, [r7, #260] |
| 7617 ++ veor q5, q5, q4 |
| 7618 ++ ldr r8, [r7, #268] |
| 7619 ++ veor q8, q8, q2 |
| 7620 ++ add fp, fp, r0 |
| 7621 ++ str fp, [r7, #272] |
| 7622 ++ add r8, r8, r2 |
| 7623 ++ vrev32.16 q5, q5 |
| 7624 ++ str r8, [r7, #268] |
| 7625 ++ vrev32.16 q8, q8 |
| 7626 ++ vadd.i32 q1, q1, q5 |
| 7627 ++ vadd.i32 q15, q15, q8 |
| 7628 ++ ldr r8, [r7, #272] |
| 7629 ++ veor q0, q1, q0 |
| 7630 ++ ldr r4, [r7, #252] |
| 7631 ++ veor q3, q15, q3 |
| 7632 ++ eor sl, sl, r8 |
| 7633 ++ ldr r8, [r7, #268] |
| 7634 ++ add fp, r4, r3 |
| 7635 ++ vshl.i32 q7, q0, #12 |
| 7636 ++ ldr r4, [r7, #260] |
| 7637 ++ vshl.i32 q6, q3, #12 |
| 7638 ++ eor r6, r6, r8 |
| 7639 ++ eor r9, r9, r4 |
| 7640 ++ ldr r4, [r7, #264] |
| 7641 ++ vsri.32 q7, q0, #20 |
| 7642 ++ ror r8, r6, #16 |
| 7643 ++ ldr r6, [r7, #256] |
| 7644 ++ eor ip, ip, fp |
| 7645 ++ vsri.32 q6, q3, #20 |
| 7646 ++ ror sl, sl, #16 |
| 7647 ++ ror r9, r9, #16 |
| 7648 ++ add r5, r5, sl |
| 7649 ++ vadd.i32 q4, q4, q7 |
| 7650 ++ str r5, [r7, #228] |
| 7651 ++ vadd.i32 q2, q2, q6 |
| 7652 ++ add r5, r4, r9 |
| 7653 ++ add r4, r6, r8 |
| 7654 ++ ldr r6, [r7, #276] |
| 7655 ++ ror ip, ip, #16 |
| 7656 ++ veor q5, q4, q5 |
| 7657 ++ veor q8, q2, q8 |
| 7658 ++ add r6, r6, ip |
| 7659 ++ str r6, [r7, #256] |
| 7660 ++ eors r1, r1, r5 |
| 7661 ++ ldr r6, [r7, #228] |
| 7662 ++ vshl.i32 q3, q5, #8 |
| 7663 ++ vshl.i32 q14, q8, #8 |
| 7664 ++ eors r2, r2, r4 |
| 7665 ++ eors r0, r0, r6 |
| 7666 ++ ldr r6, [r7, #256] |
| 7667 ++ vsri.32 q3, q5, #24 |
| 7668 ++ ror r1, r1, #20 |
| 7669 ++ eors r3, r3, r6 |
| 7670 ++ ldr r6, [r7, #272] |
| 7671 ++ ror r0, r0, #20 |
| 7672 ++ vsri.32 q14, q8, #24 |
| 7673 ++ adds r6, r0, r6 |
| 7674 ++ str r6, [r7, #276] |
| 7675 ++ ldr r6, [r7, #260] |
| 7676 ++ vadd.i32 q1, q1, q3 |
| 7677 ++ vadd.i32 q15, q15, q14 |
| 7678 ++ ror r2, r2, #20 |
| 7679 ++ adds r6, r1, r6 |
| 7680 ++ str r6, [r7, #252] |
| 7681 ++ ldr r6, [r7, #268] |
| 7682 ++ veor q6, q15, q6 |
| 7683 ++ veor q7, q1, q7 |
| 7684 ++ ror r3, r3, #20 |
| 7685 ++ adds r6, r2, r6 |
| 7686 ++ str r6, [r7, #272] |
| 7687 ++ ldr r6, [r7, #276] |
| 7688 ++ vshl.i32 q0, q6, #7 |
| 7689 ++ vshl.i32 q5, q7, #7 |
| 7690 ++ add fp, r3, fp |
| 7691 ++ eor sl, r6, sl |
| 7692 ++ ldr r6, [r7, #252] |
| 7693 ++ eor ip, fp, ip |
| 7694 ++ vsri.32 q0, q6, #25 |
| 7695 ++ eor r9, r6, r9 |
| 7696 ++ ldr r6, [r7, #272] |
| 7697 ++ ror sl, sl, #24 |
| 7698 ++ vsri.32 q5, q7, #25 |
| 7699 ++ eor r8, r6, r8 |
| 7700 ++ ldr r6, [r7, #228] |
| 7701 ++ ror r9, r9, #24 |
| 7702 ++ ror ip, ip, #24 |
| 7703 ++ add r6, sl, r6 |
| 7704 ++ str r6, [r7, #268] |
| 7705 ++ ldr r6, [r7, #256] |
| 7706 ++ add r5, r9, r5 |
| 7707 ++ str r5, [r7, #264] |
| 7708 ++ vext.32 q5, q5, q5, #1 |
| 7709 ++ add r5, ip, r6 |
| 7710 ++ ldr r6, [r7, #268] |
| 7711 ++ vext.32 q0, q0, q0, #1 |
| 7712 ++ vadd.i32 q4, q4, q5 |
| 7713 ++ eors r0, r0, r6 |
| 7714 ++ ldr r6, [r7, #264] |
| 7715 ++ vadd.i32 q2, q2, q0 |
| 7716 ++ vext.32 q3, q3, q3, #3 |
| 7717 ++ ror r8, r8, #24 |
| 7718 ++ eors r1, r1, r6 |
| 7719 ++ vext.32 q14, q14, q14, #3 |
| 7720 ++ add r4, r8, r4 |
| 7721 ++ ldr r6, [r7, #276] |
| 7722 ++ veor q3, q4, q3 |
| 7723 ++ veor q14, q2, q14 |
| 7724 ++ eors r2, r2, r4 |
| 7725 ++ ror r1, r1, #25 |
| 7726 ++ vext.32 q1, q1, q1, #2 |
| 7727 ++ adds r6, r1, r6 |
| 7728 ++ str r6, [r7, #276] |
| 7729 ++ vext.32 q15, q15, q15, #2 |
| 7730 ++ ldr r6, [r7, #252] |
| 7731 ++ eors r3, r3, r5 |
| 7732 ++ ror r2, r2, #25 |
| 7733 ++ vrev32.16 q8, q14 |
| 7734 ++ adds r6, r2, r6 |
| 7735 ++ vrev32.16 q3, q3 |
| 7736 ++ str r6, [r7, #260] |
| 7737 ++ vadd.i32 q1, q1, q3 |
| 7738 ++ ldr r6, [r7, #272] |
| 7739 ++ vadd.i32 q15, q15, q8 |
| 7740 ++ ror r3, r3, #25 |
| 7741 ++ veor q5, q1, q5 |
| 7742 ++ adds r6, r3, r6 |
| 7743 ++ veor q0, q15, q0 |
| 7744 ++ str r6, [r7, #256] |
| 7745 ++ ldr r6, [r7, #260] |
| 7746 ++ ror r0, r0, #25 |
| 7747 ++ add fp, r0, fp |
| 7748 ++ vshl.i32 q6, q5, #12 |
| 7749 ++ eor sl, r6, sl |
| 7750 ++ ldr r6, [r7, #276] |
| 7751 ++ vshl.i32 q14, q0, #12 |
| 7752 ++ eor r8, fp, r8 |
| 7753 ++ eor ip, r6, ip |
| 7754 ++ ldr r6, [r7, #256] |
| 7755 ++ vsri.32 q6, q5, #20 |
| 7756 ++ ror sl, sl, #16 |
| 7757 ++ eor r9, r6, r9 |
| 7758 ++ ror r6, r8, #16 |
| 7759 ++ vsri.32 q14, q0, #20 |
| 7760 ++ ldr r8, [r7, #264] |
| 7761 ++ ror ip, ip, #16 |
| 7762 ++ add r5, sl, r5 |
| 7763 ++ add r8, r6, r8 |
| 7764 ++ add r4, ip, r4 |
| 7765 ++ str r4, [r7, #228] |
| 7766 ++ eor r0, r8, r0 |
| 7767 ++ str r5, [r7, #272] |
| 7768 ++ vadd.i32 q4, q4, q6 |
| 7769 ++ ldr r5, [r7, #228] |
| 7770 ++ vadd.i32 q2, q2, q14 |
| 7771 ++ ldr r4, [r7, #268] |
| 7772 ++ ror r0, r0, #20 |
| 7773 ++ veor q3, q4, q3 |
| 7774 ++ eors r1, r1, r5 |
| 7775 ++ veor q0, q2, q8 |
| 7776 ++ str r8, [r7, #264] |
| 7777 ++ str r0, [r7, #20] |
| 7778 ++ add fp, r0, fp |
| 7779 ++ ldr r8, [r7, #272] |
| 7780 ++ ror r9, r9, #16 |
| 7781 ++ ldr r0, [r7, #276] |
| 7782 ++ add r4, r9, r4 |
| 7783 ++ str fp, [r7, #252] |
| 7784 ++ ror r1, r1, #20 |
| 7785 ++ add fp, r1, r0 |
| 7786 ++ eor r2, r8, r2 |
| 7787 ++ ldr r0, [r7, #252] |
| 7788 ++ eors r3, r3, r4 |
| 7789 ++ vshl.i32 q5, q3, #8 |
| 7790 ++ str r4, [r7, #224] |
| 7791 ++ vshl.i32 q8, q0, #8 |
| 7792 ++ ldr r4, [r7, #260] |
| 7793 ++ ldr r5, [r7, #256] |
| 7794 ++ ror r2, r2, #20 |
| 7795 ++ ror r3, r3, #20 |
| 7796 ++ eors r6, r6, r0 |
| 7797 ++ adds r5, r3, r5 |
| 7798 ++ add r8, r2, r4 |
| 7799 ++ vsri.32 q5, q3, #24 |
| 7800 ++ ldr r4, [r7, #264] |
| 7801 ++ eor r9, r5, r9 |
| 7802 ++ eor ip, fp, ip |
| 7803 ++ vsri.32 q8, q0, #24 |
| 7804 ++ eor sl, r8, sl |
| 7805 ++ ror r6, r6, #24 |
| 7806 ++ ldr r0, [r7, #272] |
| 7807 ++ str r5, [r7, #268] |
| 7808 ++ adds r4, r6, r4 |
| 7809 ++ ldr r5, [r7, #228] |
| 7810 ++ vadd.i32 q1, q1, q5 |
| 7811 ++ str r4, [r7, #264] |
| 7812 ++ vadd.i32 q15, q15, q8 |
| 7813 ++ ldr r4, [r7, #224] |
| 7814 ++ ror ip, ip, #24 |
| 7815 ++ ror sl, sl, #24 |
| 7816 ++ ror r9, r9, #24 |
| 7817 ++ add r5, ip, r5 |
| 7818 ++ add r0, sl, r0 |
| 7819 ++ str r5, [r7, #256] |
| 7820 ++ add r5, r9, r4 |
| 7821 ++ str r0, [r7, #276] |
| 7822 ++ veor q6, q1, q6 |
| 7823 ++ ldr r4, [r7, #20] |
| 7824 ++ veor q14, q15, q14 |
| 7825 ++ ldr r0, [r7, #264] |
| 7826 ++ eors r3, r3, r5 |
| 7827 ++ vshl.i32 q0, q6, #7 |
| 7828 ++ vext.32 q1, q1, q1, #2 |
| 7829 ++ eors r0, r0, r4 |
| 7830 ++ ldr r4, [r7, #276] |
| 7831 ++ str r0, [r7, #272] |
| 7832 ++ vshl.i32 q3, q14, #7 |
| 7833 ++ eors r2, r2, r4 |
| 7834 ++ ldr r4, [r7, #272] |
| 7835 ++ ldr r0, [r7, #256] |
| 7836 ++ vsri.32 q0, q6, #25 |
| 7837 ++ ror r2, r2, #25 |
| 7838 ++ ror r3, r3, #25 |
| 7839 ++ eors r1, r1, r0 |
| 7840 ++ vsri.32 q3, q14, #25 |
| 7841 ++ ror r0, r4, #25 |
| 7842 ++ ldr r4, [r7, #248] |
| 7843 ++ ror r1, r1, #25 |
| 7844 ++ vext.32 q5, q5, q5, #1 |
| 7845 ++ subs r4, r4, #1 |
| 7846 ++ str r4, [r7, #248] |
| 7847 ++ vext.32 q15, q15, q15, #2 |
| 7848 ++ vext.32 q8, q8, q8, #1 |
| 7849 ++ vext.32 q0, q0, q0, #3 |
| 7850 ++ vext.32 q3, q3, q3, #3 |
| 7851 ++ bne .L3 |
| 7852 ++ ldr r4, [r7, #256] |
| 7853 ++ vadd.i32 q4, q11, q4 |
| 7854 ++ str r2, [r7, #256] |
| 7855 ++ vadd.i32 q14, q10, q9 |
| 7856 ++ ldr r2, [r7, #244] |
| 7857 ++ vld1.64 {d12-d13}, [r2:64] |
| 7858 ++ str r4, [r7, #272] |
| 7859 ++ veor q4, q4, q6 |
| 7860 ++ ldr r4, [r7, #220] |
| 7861 ++ vadd.i32 q10, q10, q5 |
| 7862 ++ ldr r2, [r7, #216] |
| 7863 ++ vadd.i32 q0, q12, q0 |
| 7864 ++ add fp, fp, r4 |
| 7865 ++ str ip, [r7, #20] |
| 7866 ++ ldr r4, [r7, #212] |
| 7867 ++ mov ip, sl |
| 7868 ++ str r0, [r7, #224] |
| 7869 ++ mov sl, r8 |
| 7870 ++ mov r0, r5 |
| 7871 ++ ldr r8, [r7, #252] |
| 7872 ++ mov r5, r6 |
| 7873 ++ add sl, sl, r2 |
| 7874 ++ mov r6, r9 |
| 7875 ++ ldr r2, [r7, #208] |
| 7876 ++ ldr r9, [r7, #268] |
| 7877 ++ vadd.i32 q1, q13, q1 |
| 7878 ++ vadd.i32 q2, q11, q2 |
| 7879 ++ str r1, [r7, #260] |
| 7880 ++ add r9, r9, r4 |
| 7881 ++ add r4, r8, r2 |
| 7882 ++ ldr r8, [r7, #232] |
| 7883 ++ vadd.i32 q3, q12, q3 |
| 7884 ++ vadd.i32 q15, q13, q15 |
| 7885 ++ str r3, [r7, #228] |
| 7886 ++ add r2, r8, #2 |
| 7887 ++ vadd.i32 q8, q14, q8 |
| 7888 ++ add ip, r2, ip |
| 7889 ++ ldr r2, [r7, #240] |
| 7890 ++ vst1.64 {d8-d9}, [r2:64] |
| 7891 ++ ldr r2, [r7, #244] |
| 7892 ++ ldr r3, [r7, #276] |
| 7893 ++ vldr d8, [r2, #16] |
| 7894 ++ vldr d9, [r2, #24] |
| 7895 ++ ldr r1, [r7, #264] |
| 7896 ++ veor q0, q0, q4 |
| 7897 ++ add r8, r8, #3 |
| 7898 ++ str r8, [r7, #232] |
| 7899 ++ ldr r8, [r7, #240] |
| 7900 ++ vstr d0, [r8, #16] |
| 7901 ++ vstr d1, [r8, #24] |
| 7902 ++ vldr d0, [r2, #32] |
| 7903 ++ vldr d1, [r2, #40] |
| 7904 ++ veor q1, q1, q0 |
| 7905 ++ vstr d2, [r8, #32] |
| 7906 ++ vstr d3, [r8, #40] |
| 7907 ++ vldr d2, [r2, #48] |
| 7908 ++ vldr d3, [r2, #56] |
| 7909 ++ veor q10, q10, q1 |
| 7910 ++ vstr d20, [r8, #48] |
| 7911 ++ vstr d21, [r8, #56] |
| 7912 ++ vldr d8, [r2, #64] |
| 7913 ++ vldr d9, [r2, #72] |
| 7914 ++ veor q2, q2, q4 |
| 7915 ++ vstr d4, [r8, #64] |
| 7916 ++ vstr d5, [r8, #72] |
| 7917 ++ vldr d10, [r2, #80] |
| 7918 ++ vldr d11, [r2, #88] |
| 7919 ++ veor q3, q3, q5 |
| 7920 ++ vstr d6, [r8, #80] |
| 7921 ++ vstr d7, [r8, #88] |
| 7922 ++ vldr d12, [r2, #96] |
| 7923 ++ vldr d13, [r2, #104] |
| 7924 ++ veor q15, q15, q6 |
| 7925 ++ vstr d30, [r8, #96] |
| 7926 ++ vstr d31, [r8, #104] |
| 7927 ++ vldr d20, [r2, #112] |
| 7928 ++ vldr d21, [r2, #120] |
| 7929 ++ veor q8, q8, q10 |
| 7930 ++ vstr d16, [r8, #112] |
| 7931 ++ vstr d17, [r8, #120] |
| 7932 ++ mov r8, r2 |
| 7933 ++ ldr r2, [r2, #128] |
| 7934 ++ vadd.i32 q10, q14, q9 |
| 7935 ++ eor r2, fp, r2 |
| 7936 ++ ldr fp, [r7, #240] |
| 7937 ++ vadd.i32 q10, q10, q9 |
| 7938 ++ str r2, [fp, #128] |
| 7939 ++ ldr r2, [r8, #132] |
| 7940 ++ eor r2, sl, r2 |
| 7941 ++ str r2, [fp, #132] |
| 7942 ++ ldr r2, [r8, #136] |
| 7943 ++ eor r2, r9, r2 |
| 7944 ++ str r2, [fp, #136] |
| 7945 ++ ldr r2, [r8, #140] |
| 7946 ++ eors r2, r2, r4 |
| 7947 ++ str r2, [fp, #140] |
| 7948 ++ ldr r2, [r7, #236] |
| 7949 ++ ldr r4, [r8, #144] |
| 7950 ++ ldr r2, [r2, #0] |
| 7951 ++ str r4, [r7, #168] |
| 7952 ++ ldr r4, [r7, #224] |
| 7953 ++ add r8, r4, r2 |
| 7954 ++ ldr r2, [r7, #168] |
| 7955 ++ ldr r4, [r7, #236] |
| 7956 ++ eor r8, r8, r2 |
| 7957 ++ ldr r2, [r7, #244] |
| 7958 ++ str r8, [fp, #144] |
| 7959 ++ ldr r4, [r4, #4] |
| 7960 ++ ldr r2, [r2, #148] |
| 7961 ++ str r2, [r7, #36] |
| 7962 ++ ldr r2, [r7, #260] |
| 7963 ++ add r8, r2, r4 |
| 7964 ++ ldr r4, [r7, #36] |
| 7965 ++ ldr r2, [r7, #236] |
| 7966 ++ eor r8, r8, r4 |
| 7967 ++ ldr r4, [r7, #244] |
| 7968 ++ str r8, [fp, #148] |
| 7969 ++ ldr r2, [r2, #8] |
| 7970 ++ ldr r4, [r4, #152] |
| 7971 ++ str r4, [r7, #32] |
| 7972 ++ ldr r4, [r7, #256] |
| 7973 ++ add r8, r4, r2 |
| 7974 ++ ldr r2, [r7, #32] |
| 7975 ++ eor r8, r8, r2 |
| 7976 ++ str r8, [fp, #152] |
| 7977 ++ ldr r2, [r7, #244] |
| 7978 ++ ldr r4, [r7, #236] |
| 7979 ++ ldr r2, [r2, #156] |
| 7980 ++ ldr r4, [r4, #12] |
| 7981 ++ str r2, [r7, #28] |
| 7982 ++ ldr r2, [r7, #228] |
| 7983 ++ add r8, r2, r4 |
| 7984 ++ ldr r4, [r7, #28] |
| 7985 ++ ldr r2, [r7, #244] |
| 7986 ++ eor r8, r8, r4 |
| 7987 ++ str r8, [fp, #156] |
| 7988 ++ ldr r8, [r7, #236] |
| 7989 ++ ldr r2, [r2, #160] |
| 7990 ++ ldr r4, [r8, #16] |
| 7991 ++ adds r0, r0, r4 |
| 7992 ++ ldr r4, [r7, #244] |
| 7993 ++ eors r0, r0, r2 |
| 7994 ++ str r0, [fp, #160] |
| 7995 ++ ldr r0, [r8, #20] |
| 7996 ++ ldr r2, [r4, #164] |
| 7997 ++ adds r1, r1, r0 |
| 7998 ++ ldr r0, [r7, #272] |
| 7999 ++ eors r1, r1, r2 |
| 8000 ++ str r1, [fp, #164] |
| 8001 ++ ldr r2, [r8, #24] |
| 8002 ++ ldr r1, [r4, #168] |
| 8003 ++ adds r2, r0, r2 |
| 8004 ++ eors r2, r2, r1 |
| 8005 ++ str r2, [fp, #168] |
| 8006 ++ ldr r1, [r8, #28] |
| 8007 ++ ldr r2, [r4, #172] |
| 8008 ++ adds r3, r3, r1 |
| 8009 ++ eors r3, r3, r2 |
| 8010 ++ str r3, [fp, #172] |
| 8011 ++ ldr r3, [r4, #176] |
| 8012 ++ eor r3, ip, r3 |
| 8013 ++ str r3, [fp, #176] |
| 8014 ++ ldr r3, [r4, #180] |
| 8015 ++ ldr r4, [r7, #392] |
| 8016 ++ eors r6, r6, r3 |
| 8017 ++ str r6, [fp, #180] |
| 8018 ++ ldr r6, [r7, #244] |
| 8019 ++ ldr r2, [r4, #0] |
| 8020 ++ ldr r3, [r6, #184] |
| 8021 ++ adds r5, r5, r2 |
| 8022 ++ eors r5, r5, r3 |
| 8023 ++ str r5, [fp, #184] |
| 8024 ++ ldr r2, [r6, #188] |
| 8025 ++ adds r6, r6, #192 |
| 8026 ++ ldr r3, [r4, #4] |
| 8027 ++ str r6, [r7, #244] |
| 8028 ++ ldr r0, [r7, #20] |
| 8029 ++ ldr r1, [r7, #232] |
| 8030 ++ adds r4, r0, r3 |
| 8031 ++ eors r4, r4, r2 |
| 8032 ++ ldr r2, [r7, #204] |
| 8033 ++ str r4, [fp, #188] |
| 8034 ++ add fp, fp, #192 |
| 8035 ++ cmp r1, r2 |
| 8036 ++ str fp, [r7, #240] |
| 8037 ++ bne .L4 |
| 8038 ++ ldr r4, [r7, #188] |
| 8039 ++ ldr r3, [r7, #176] |
| 8040 ++ ldr r6, [r7, #184] |
| 8041 ++ adds r5, r3, r4 |
| 8042 ++ ldr r8, [r7, #180] |
| 8043 ++ lsls r5, r5, #6 |
| 8044 ++ adds r4, r6, r5 |
| 8045 ++ add r5, r8, r5 |
| 8046 ++.L2: |
| 8047 ++ ldr fp, [r7, #192] |
| 8048 ++ movw r3, #43691 |
| 8049 ++ movt r3, 43690 |
| 8050 ++ ldr r6, [r7, #192] |
| 8051 ++ umull fp, r3, r3, fp |
| 8052 ++ lsrs r3, r3, #7 |
| 8053 ++ add r3, r3, r3, lsl #1 |
| 8054 ++ sub r3, r6, r3, lsl #6 |
| 8055 ++ lsrs r6, r3, #6 |
| 8056 ++ beq .L5 |
| 8057 ++ add r1, r5, #16 |
| 8058 ++ add r2, r4, #16 |
| 8059 ++ mov r0, r6 |
| 8060 ++ vldr d30, .L41 |
| 8061 ++ vldr d31, .L41+8 |
| 8062 ++.L6: |
| 8063 ++ vmov q8, q10 @ v4si |
| 8064 ++ movs r3, #10 |
| 8065 ++ vmov q1, q13 @ v4si |
| 8066 ++ vmov q14, q12 @ v4si |
| 8067 ++ vmov q3, q11 @ v4si |
| 8068 ++.L7: |
| 8069 ++ vadd.i32 q3, q3, q14 |
| 8070 ++ subs r3, r3, #1 |
| 8071 ++ veor q2, q8, q3 |
| 8072 ++ vrev32.16 q2, q2 |
| 8073 ++ vadd.i32 q8, q1, q2 |
| 8074 ++ veor q9, q8, q14 |
| 8075 ++ vshl.i32 q14, q9, #12 |
| 8076 ++ vsri.32 q14, q9, #20 |
| 8077 ++ vadd.i32 q3, q3, q14 |
| 8078 ++ veor q2, q3, q2 |
| 8079 ++ vshl.i32 q9, q2, #8 |
| 8080 ++ vsri.32 q9, q2, #24 |
| 8081 ++ vadd.i32 q8, q8, q9 |
| 8082 ++ vext.32 q9, q9, q9, #3 |
| 8083 ++ veor q14, q8, q14 |
| 8084 ++ vext.32 q1, q8, q8, #2 |
| 8085 ++ vshl.i32 q8, q14, #7 |
| 8086 ++ vsri.32 q8, q14, #25 |
| 8087 ++ vext.32 q8, q8, q8, #1 |
| 8088 ++ vadd.i32 q3, q3, q8 |
| 8089 ++ veor q2, q3, q9 |
| 8090 ++ vrev32.16 q2, q2 |
| 8091 ++ vadd.i32 q9, q1, q2 |
| 8092 ++ veor q8, q9, q8 |
| 8093 ++ vshl.i32 q14, q8, #12 |
| 8094 ++ vsri.32 q14, q8, #20 |
| 8095 ++ vadd.i32 q3, q3, q14 |
| 8096 ++ veor q2, q3, q2 |
| 8097 ++ vshl.i32 q8, q2, #8 |
| 8098 ++ vsri.32 q8, q2, #24 |
| 8099 ++ vadd.i32 q9, q9, q8 |
| 8100 ++ vext.32 q8, q8, q8, #1 |
| 8101 ++ veor q14, q9, q14 |
| 8102 ++ vext.32 q1, q9, q9, #2 |
| 8103 ++ vshl.i32 q9, q14, #7 |
| 8104 ++ vsri.32 q9, q14, #25 |
| 8105 ++ vext.32 q14, q9, q9, #3 |
| 8106 ++ bne .L7 |
| 8107 ++ vadd.i32 q8, q10, q8 |
| 8108 ++ subs r0, r0, #1 |
| 8109 ++ vadd.i32 q3, q11, q3 |
| 8110 ++ vldr d0, [r1, #-16] |
| 8111 ++ vldr d1, [r1, #-8] |
| 8112 ++ vadd.i32 q14, q12, q14 |
| 8113 ++ vadd.i32 q1, q13, q1 |
| 8114 ++ veor q3, q3, q0 |
| 8115 ++ vstr d6, [r2, #-16] |
| 8116 ++ vstr d7, [r2, #-8] |
| 8117 ++ vadd.i32 q10, q10, q15 |
| 8118 ++ vld1.64 {d8-d9}, [r1:64] |
| 8119 ++ veor q14, q14, q4 |
| 8120 ++ vst1.64 {d28-d29}, [r2:64] |
| 8121 ++ vldr d10, [r1, #16] |
| 8122 ++ vldr d11, [r1, #24] |
| 8123 ++ veor q1, q1, q5 |
| 8124 ++ vstr d2, [r2, #16] |
| 8125 ++ vstr d3, [r2, #24] |
| 8126 ++ vldr d18, [r1, #32] |
| 8127 ++ vldr d19, [r1, #40] |
| 8128 ++ add r1, r1, #64 |
| 8129 ++ veor q8, q8, q9 |
| 8130 ++ vstr d16, [r2, #32] |
| 8131 ++ vstr d17, [r2, #40] |
| 8132 ++ add r2, r2, #64 |
| 8133 ++ bne .L6 |
| 8134 ++ lsls r6, r6, #6 |
| 8135 ++ adds r4, r4, r6 |
| 8136 ++ adds r5, r5, r6 |
| 8137 ++.L5: |
| 8138 ++ ldr r6, [r7, #192] |
| 8139 ++ ands ip, r6, #63 |
| 8140 ++ beq .L1 |
| 8141 ++ vmov q8, q10 @ v4si |
| 8142 ++ movs r3, #10 |
| 8143 ++ vmov q14, q13 @ v4si |
| 8144 ++ vmov q9, q12 @ v4si |
| 8145 ++ vmov q15, q11 @ v4si |
| 8146 ++.L10: |
| 8147 ++ vadd.i32 q15, q15, q9 |
| 8148 ++ subs r3, r3, #1 |
| 8149 ++ veor q8, q8, q15 |
| 8150 ++ vrev32.16 q8, q8 |
| 8151 ++ vadd.i32 q3, q14, q8 |
| 8152 ++ veor q9, q3, q9 |
| 8153 ++ vshl.i32 q14, q9, #12 |
| 8154 ++ vsri.32 q14, q9, #20 |
| 8155 ++ vadd.i32 q15, q15, q14 |
| 8156 ++ veor q9, q15, q8 |
| 8157 ++ vshl.i32 q8, q9, #8 |
| 8158 ++ vsri.32 q8, q9, #24 |
| 8159 ++ vadd.i32 q9, q3, q8 |
| 8160 ++ vext.32 q8, q8, q8, #3 |
| 8161 ++ veor q2, q9, q14 |
| 8162 ++ vext.32 q14, q9, q9, #2 |
| 8163 ++ vshl.i32 q9, q2, #7 |
| 8164 ++ vsri.32 q9, q2, #25 |
| 8165 ++ vext.32 q9, q9, q9, #1 |
| 8166 ++ vadd.i32 q15, q15, q9 |
| 8167 ++ veor q3, q15, q8 |
| 8168 ++ vrev32.16 q3, q3 |
| 8169 ++ vadd.i32 q14, q14, q3 |
| 8170 ++ veor q8, q14, q9 |
| 8171 ++ vshl.i32 q9, q8, #12 |
| 8172 ++ vsri.32 q9, q8, #20 |
| 8173 ++ vadd.i32 q15, q15, q9 |
| 8174 ++ veor q3, q15, q3 |
| 8175 ++ vshl.i32 q8, q3, #8 |
| 8176 ++ vsri.32 q8, q3, #24 |
| 8177 ++ vadd.i32 q14, q14, q8 |
| 8178 ++ vext.32 q8, q8, q8, #1 |
| 8179 ++ veor q3, q14, q9 |
| 8180 ++ vext.32 q14, q14, q14, #2 |
| 8181 ++ vshl.i32 q9, q3, #7 |
| 8182 ++ vsri.32 q9, q3, #25 |
| 8183 ++ vext.32 q9, q9, q9, #3 |
| 8184 ++ bne .L10 |
| 8185 ++ cmp ip, #15 |
| 8186 ++ vadd.i32 q11, q11, q15 |
| 8187 ++ bhi .L37 |
| 8188 ++ ldr fp, [r7, #196] |
| 8189 ++ vst1.64 {d22-d23}, [fp:128] |
| 8190 ++.L14: |
| 8191 ++ ldr r6, [r7, #192] |
| 8192 ++ and r3, r6, #48 |
| 8193 ++ cmp ip, r3 |
| 8194 ++ bls .L1 |
| 8195 ++ adds r0, r5, r3 |
| 8196 ++ adds r1, r4, r3 |
| 8197 ++ add r2, r0, #16 |
| 8198 ++ add r6, r1, #16 |
| 8199 ++ cmp r1, r2 |
| 8200 ++ it cc |
| 8201 ++ cmpcc r0, r6 |
| 8202 ++ rsb r9, r3, ip |
| 8203 ++ ite cc |
| 8204 ++ movcc r2, #0 |
| 8205 ++ movcs r2, #1 |
| 8206 ++ cmp r9, #15 |
| 8207 ++ ite ls |
| 8208 ++ movls r2, #0 |
| 8209 ++ andhi r2, r2, #1 |
| 8210 ++ lsr r8, r9, #4 |
| 8211 ++ eor r2, r2, #1 |
| 8212 ++ cmp r8, #0 |
| 8213 ++ it eq |
| 8214 ++ orreq r2, r2, #1 |
| 8215 ++ lsl sl, r8, #4 |
| 8216 ++ cbnz r2, .L35 |
| 8217 ++ ldr fp, [r7, #196] |
| 8218 ++ add r6, fp, r3 |
| 8219 ++.L17: |
| 8220 ++ vld1.8 {q8}, [r0]! |
| 8221 ++ adds r2, r2, #1 |
| 8222 ++ cmp r8, r2 |
| 8223 ++ vld1.8 {q9}, [r6]! |
| 8224 ++ veor q8, q9, q8 |
| 8225 ++ vst1.8 {q8}, [r1]! |
| 8226 ++ bhi .L17 |
| 8227 ++ cmp r9, sl |
| 8228 ++ add r3, r3, sl |
| 8229 ++ beq .L1 |
| 8230 ++.L35: |
| 8231 ++ ldr r0, [r7, #196] |
| 8232 ++.L25: |
| 8233 ++ ldrb r2, [r5, r3] @ zero_extendqisi2 |
| 8234 ++ ldrb r1, [r3, r0] @ zero_extendqisi2 |
| 8235 ++ eors r2, r2, r1 |
| 8236 ++ strb r2, [r4, r3] |
| 8237 ++ adds r3, r3, #1 |
| 8238 ++ cmp ip, r3 |
| 8239 ++ bhi .L25 |
| 8240 ++.L1: |
| 8241 ++ add r7, r7, #296 |
| 8242 ++ mov sp, r7 |
| 8243 ++ fldmfdd sp!, {d8, d9, d10, d11, d12, d13, d14, d15} |
| 8244 ++ pop {r4, r5, r6, r7, r8, r9, sl, fp} |
| 8245 ++ bx lr |
| 8246 ++.L37: |
| 8247 ++ cmp ip, #31 |
| 8248 ++ vld1.64 {d0-d1}, [r5:64] |
| 8249 ++ vadd.i32 q9, q12, q9 |
| 8250 ++ veor q11, q11, q0 |
| 8251 ++ vst1.64 {d22-d23}, [r4:64] |
| 8252 ++ bls .L12 |
| 8253 ++ cmp ip, #47 |
| 8254 ++ vldr d2, [r5, #16] |
| 8255 ++ vldr d3, [r5, #24] |
| 8256 ++ vadd.i32 q13, q13, q14 |
| 8257 ++ veor q9, q9, q1 |
| 8258 ++ vstr d18, [r4, #16] |
| 8259 ++ vstr d19, [r4, #24] |
| 8260 ++ bls .L13 |
| 8261 ++ vadd.i32 q8, q8, q10 |
| 8262 ++ vldr d0, [r5, #32] |
| 8263 ++ vldr d1, [r5, #40] |
| 8264 ++ ldr r6, [r7, #196] |
| 8265 ++ vstr d16, [r6, #48] |
| 8266 ++ vstr d17, [r6, #56] |
| 8267 ++ veor q8, q13, q0 |
| 8268 ++ vstr d16, [r4, #32] |
| 8269 ++ vstr d17, [r4, #40] |
| 8270 ++ b .L14 |
| 8271 ++.L12: |
| 8272 ++ ldr r8, [r7, #196] |
| 8273 ++ vstr d18, [r8, #16] |
| 8274 ++ vstr d19, [r8, #24] |
| 8275 ++ b .L14 |
| 8276 ++.L20: |
| 8277 ++ ldr r5, [r7, #180] |
| 8278 ++ ldr r4, [r7, #184] |
| 8279 ++ b .L2 |
| 8280 ++.L13: |
| 8281 ++ ldr r6, [r7, #196] |
| 8282 ++ vstr d26, [r6, #32] |
| 8283 ++ vstr d27, [r6, #40] |
| 8284 ++ b .L14 |
| 8285 ++.L42: |
| 8286 ++ .align 3 |
| 8287 ++.L41: |
| 8288 ++ .word 1 |
| 8289 ++ .word 0 |
| 8290 ++ .word 0 |
| 8291 ++ .word 0 |
| 8292 ++ .size CRYPTO_chacha_20_neon, .-CRYPTO_chacha_20_neon |
| 8293 ++ .section .rodata |
| 8294 ++ .align 3 |
| 8295 ++.LANCHOR0 = . + 0 |
| 8296 ++.LC0: |
| 8297 ++ .word 1634760805 |
| 8298 ++ .word 857760878 |
| 8299 ++ .word 2036477234 |
| 8300 ++ .word 1797285236 |
| 8301 ++ .ident "GCC: (crosstool-NG linaro-1.13.1-4.7-2012.10-20121022 - Linaro
GCC 2012.10) 4.7.3 20121001 (prerelease)" |
| 8302 ++ .section .note.GNU-stack,"",%progbits |
| 8303 +diff --git a/crypto/cryptlib.c b/crypto/cryptlib.c |
| 8304 +index 7bef015..3b6ab1d 100644 |
| 8305 +--- a/crypto/cryptlib.c |
| 8306 ++++ b/crypto/cryptlib.c |
| 8307 +@@ -661,6 +661,20 @@ const char *CRYPTO_get_lock_name(int type) |
| 8308 + return(sk_OPENSSL_STRING_value(app_locks,type-CRYPTO_NUM_LOCKS))
; |
| 8309 + } |
| 8310 + |
| 8311 ++#if __arm__ |
| 8312 ++static int global_arm_neon_enabled = 0; |
| 8313 ++ |
| 8314 ++void CRYPTO_set_NEON_capable(int on) |
| 8315 ++ { |
| 8316 ++ global_arm_neon_enabled = on != 0; |
| 8317 ++ } |
| 8318 ++ |
| 8319 ++int CRYPTO_is_NEON_capable() |
| 8320 ++ { |
| 8321 ++ return global_arm_neon_enabled; |
| 8322 ++ } |
| 8323 ++#endif |
| 8324 ++ |
| 8325 + #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ |
| 8326 + defined(__INTEL__) || \ |
| 8327 + defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined
(_M_X64) |
| 8328 +diff --git a/crypto/crypto.h b/crypto/crypto.h |
| 8329 +index e11ac73..db339c3 100644 |
| 8330 +--- a/crypto/crypto.h |
| 8331 ++++ b/crypto/crypto.h |
| 8332 +@@ -414,6 +414,14 @@ void CRYPTO_cleanup_all_ex_data(void); |
| 8333 + |
| 8334 + int CRYPTO_get_new_lockid(char *name); |
| 8335 + |
| 8336 ++/* CRYPTO_set_NEON_capable enables any NEON (ARM vector) dependent code. This |
| 8337 ++ * code should be called before any non-init functions. */ |
| 8338 ++void CRYPTO_set_NEON_capable(int on); |
| 8339 ++ |
| 8340 ++/* CRYPTO_is_NEON_capable returns the last value given to |
| 8341 ++ * CRYPTO_set_NEON_capable, or else zero if it has never been called. */ |
| 8342 ++int CRYPTO_is_NEON_capable(); |
| 8343 ++ |
| 8344 + int CRYPTO_num_locks(void); /* return CRYPTO_NUM_LOCKS (shared libs!) */ |
| 8345 + void CRYPTO_lock(int mode, int type,const char *file,int line); |
| 8346 + void CRYPTO_set_locking_callback(void (*func)(int mode,int type, |
| 8347 +diff --git a/crypto/poly1305/poly1305.c b/crypto/poly1305/poly1305.c |
| 8348 +index 2e5621d..00d53bf 100644 |
| 8349 +--- a/crypto/poly1305/poly1305.c |
| 8350 ++++ b/crypto/poly1305/poly1305.c |
| 8351 +@@ -90,6 +90,17 @@ static void U32TO8_LE(unsigned char *m, uint32_t v) |
| 8352 + } |
| 8353 + #endif |
| 8354 + |
| 8355 ++#if __arm__ |
| 8356 ++void CRYPTO_poly1305_init_neon(poly1305_state* state, |
| 8357 ++ const unsigned char key[32]); |
| 8358 ++ |
| 8359 ++void CRYPTO_poly1305_update_neon(poly1305_state* state, |
| 8360 ++ const unsigned char *in, |
| 8361 ++ size_t in_len); |
| 8362 ++ |
| 8363 ++void CRYPTO_poly1305_finish_neon(poly1305_state* state, unsigned char mac[16])
; |
| 8364 ++#endif |
| 8365 ++ |
| 8366 + static uint64_t |
| 8367 + mul32x32_64(uint32_t a, uint32_t b) |
| 8368 + { |
| 8369 +@@ -207,6 +218,14 @@ void CRYPTO_poly1305_init(poly1305_state *statep, const un
signed char key[32]) |
| 8370 + struct poly1305_state_st *state = (struct poly1305_state_st*) statep; |
| 8371 + uint32_t t0,t1,t2,t3; |
| 8372 + |
| 8373 ++#if __arm__ |
| 8374 ++ if (CRYPTO_is_NEON_capable()) |
| 8375 ++ { |
| 8376 ++ CRYPTO_poly1305_init_neon(statep, key); |
| 8377 ++ return; |
| 8378 ++ } |
| 8379 ++#endif |
| 8380 ++ |
| 8381 + t0 = U8TO32_LE(key+0); |
| 8382 + t1 = U8TO32_LE(key+4); |
| 8383 + t2 = U8TO32_LE(key+8); |
| 8384 +@@ -241,6 +260,14 @@ void CRYPTO_poly1305_update(poly1305_state *statep, const
unsigned char *in, |
| 8385 + unsigned int i; |
| 8386 + struct poly1305_state_st *state = (struct poly1305_state_st*) statep; |
| 8387 + |
| 8388 ++#if __arm__ |
| 8389 ++ if (CRYPTO_is_NEON_capable()) |
| 8390 ++ { |
| 8391 ++ CRYPTO_poly1305_update_neon(statep, in, in_len); |
| 8392 ++ return; |
| 8393 ++ } |
| 8394 ++#endif |
| 8395 ++ |
| 8396 + if (state->buf_used) |
| 8397 + { |
| 8398 + unsigned int todo = 16 - state->buf_used; |
| 8399 +@@ -282,6 +309,14 @@ void CRYPTO_poly1305_finish(poly1305_state *statep, unsign
ed char mac[16]) |
| 8400 + uint32_t g0,g1,g2,g3,g4; |
| 8401 + uint32_t b, nb; |
| 8402 + |
| 8403 ++#if __arm__ |
| 8404 ++ if (CRYPTO_is_NEON_capable()) |
| 8405 ++ { |
| 8406 ++ CRYPTO_poly1305_finish_neon(statep, mac); |
| 8407 ++ return; |
| 8408 ++ } |
| 8409 ++#endif |
| 8410 ++ |
| 8411 + if (state->buf_used) |
| 8412 + poly1305_update(state, state->buf, state->buf_used); |
| 8413 + |
| 8414 +diff --git a/crypto/poly1305/poly1305_arm.c b/crypto/poly1305/poly1305_arm.c |
| 8415 +index adcef35..34e339d 100644 |
| 8416 +--- a/crypto/poly1305/poly1305_arm.c |
| 8417 ++++ b/crypto/poly1305/poly1305_arm.c |
| 8418 +@@ -51,6 +51,7 @@ |
| 8419 + * SUPERCOP by D. J. Bernstein and Peter Schwabe. */ |
| 8420 + |
| 8421 + #include <stdint.h> |
| 8422 ++#include <string.h> |
| 8423 + |
| 8424 + #include <openssl/poly1305.h> |
| 8425 + |
| 8426 +@@ -202,7 +203,8 @@ struct poly1305_state_st { |
| 8427 + unsigned char key[16]; |
| 8428 + }; |
| 8429 + |
| 8430 +-void CRYPTO_poly1305_init(poly1305_state *state, const unsigned char key[32]) |
| 8431 ++void CRYPTO_poly1305_init_neon(poly1305_state *state, |
| 8432 ++ const unsigned char key[32]) |
| 8433 + { |
| 8434 + struct poly1305_state_st *st = (struct poly1305_state_st*) (state); |
| 8435 + fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); |
| 8436 +@@ -227,7 +229,8 @@ void CRYPTO_poly1305_init(poly1305_state *state, const unsi
gned char key[32]) |
| 8437 + st->buf_used = 0; |
| 8438 + } |
| 8439 + |
| 8440 +-void CRYPTO_poly1305_update(poly1305_state *state, const unsigned char *in, si
ze_t in_len) |
| 8441 ++void CRYPTO_poly1305_update_neon(poly1305_state *state, const unsigned char *i
n, |
| 8442 ++ size_t in_len) |
| 8443 + { |
| 8444 + struct poly1305_state_st *st = (struct poly1305_state_st*) (state); |
| 8445 + fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); |
| 8446 +@@ -285,7 +288,7 @@ void CRYPTO_poly1305_update(poly1305_state *state, const un
signed char *in, size |
| 8447 + } |
| 8448 + } |
| 8449 + |
| 8450 +-void CRYPTO_poly1305_finish(poly1305_state* state, unsigned char mac[16]) |
| 8451 ++void CRYPTO_poly1305_finish_neon(poly1305_state* state, unsigned char mac[16]) |
| 8452 + { |
| 8453 + struct poly1305_state_st *st = (struct poly1305_state_st*) (state); |
| 8454 + fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); |
| 8455 +-- |
| 8456 +1.8.4.1 |
| 8457 + |
| 8458 diff -burN android-openssl-lhash2/patches/tls1_change_cipher_state_rewrite.patch
android-openssl/patches/tls1_change_cipher_state_rewrite.patch |
| 8459 --- android-openssl-lhash2/patches/tls1_change_cipher_state_rewrite.patch
1969-12-31 19:00:00.000000000 -0500 |
| 8460 +++ android-openssl/patches/tls1_change_cipher_state_rewrite.patch 2013-11-
05 14:14:34.631283497 -0500 |
| 8461 @@ -0,0 +1,567 @@ |
| 8462 +From d7f9af2d2682bc41e7bf1d669cda60f04630b04d Mon Sep 17 00:00:00 2001 |
| 8463 +From: Adam Langley <agl@chromium.org> |
| 8464 +Date: Thu, 25 Jul 2013 14:57:38 -0400 |
| 8465 +Subject: [PATCH 39/50] tls1_change_cipher_state_rewrite |
| 8466 + |
| 8467 +The previous version of the function made adding AEAD changes very |
| 8468 +difficult. This change should be a semantic no-op - it should be purely |
| 8469 +a cleanup. |
| 8470 +--- |
| 8471 + ssl/ssl.h | 1 + |
| 8472 + ssl/ssl_err.c | 2 +- |
| 8473 + ssl/t1_enc.c | 445 +++++++++++++++++++++++++++++++--------------------------- |
| 8474 + 3 files changed, 240 insertions(+), 208 deletions(-) |
| 8475 + |
| 8476 +diff --git a/ssl/ssl.h b/ssl/ssl.h |
| 8477 +index 68e5648..672f3eb 100644 |
| 8478 +--- a/ssl/ssl.h |
| 8479 ++++ b/ssl/ssl.h |
| 8480 +@@ -2439,6 +2439,7 @@ void ERR_load_SSL_strings(void); |
| 8481 + #define SSL_F_SSL_WRITE 208 |
| 8482 + #define SSL_F_TLS1_CERT_VERIFY_MAC 286 |
| 8483 + #define SSL_F_TLS1_CHANGE_CIPHER_STATE 209 |
| 8484 ++#define SSL_F_TLS1_CHANGE_CIPHER_STATE_CIPHER 338 |
| 8485 + #define SSL_F_TLS1_CHECK_SERVERHELLO_TLSEXT 274 |
| 8486 + #define SSL_F_TLS1_ENC 210 |
| 8487 + #define SSL_F_TLS1_EXPORT_KEYING_MATERIAL 314 |
| 8488 +diff --git a/ssl/ssl_err.c b/ssl/ssl_err.c |
| 8489 +index fc98e6c..97b2a0d 100644 |
| 8490 +--- a/ssl/ssl_err.c |
| 8491 ++++ b/ssl/ssl_err.c |
| 8492 +@@ -280,7 +280,7 @@ static ERR_STRING_DATA SSL_str_functs[]= |
| 8493 + {ERR_FUNC(SSL_F_SSL_VERIFY_CERT_CHAIN), "SSL_VERIFY_CERT_CHAIN"}, |
| 8494 + {ERR_FUNC(SSL_F_SSL_WRITE), "SSL_write"}, |
| 8495 + {ERR_FUNC(SSL_F_TLS1_CERT_VERIFY_MAC), "tls1_cert_verify_mac"}, |
| 8496 +-{ERR_FUNC(SSL_F_TLS1_CHANGE_CIPHER_STATE), "TLS1_CHANGE_CIPHER_STATE"}, |
| 8497 ++{ERR_FUNC(SSL_F_TLS1_CHANGE_CIPHER_STATE_CIPHER), "TLS1_CHANGE_CIPHER_STAT
E_CIPHER"}, |
| 8498 + {ERR_FUNC(SSL_F_TLS1_CHECK_SERVERHELLO_TLSEXT), "TLS1_CHECK_SERVERHELLO_
TLSEXT"}, |
| 8499 + {ERR_FUNC(SSL_F_TLS1_ENC), "TLS1_ENC"}, |
| 8500 + {ERR_FUNC(SSL_F_TLS1_EXPORT_KEYING_MATERIAL), "TLS1_EXPORT_KEYING_MATERIAL"}, |
| 8501 +diff --git a/ssl/t1_enc.c b/ssl/t1_enc.c |
| 8502 +index 3649544..e1f91ba 100644 |
| 8503 +--- a/ssl/t1_enc.c |
| 8504 ++++ b/ssl/t1_enc.c |
| 8505 +@@ -316,56 +316,30 @@ static int tls1_generate_key_block(SSL *s, unsigned char
*km, |
| 8506 + return ret; |
| 8507 + } |
| 8508 + |
| 8509 +-int tls1_change_cipher_state(SSL *s, int which) |
| 8510 ++/* tls1_change_cipher_state_cipher performs the work needed to switch cipher |
| 8511 ++ * states when using EVP_CIPHER. The argument |is_read| is true iff this |
| 8512 ++ * function is being called due to reading, as opposed to writing, a |
| 8513 ++ * ChangeCipherSpec message. In order to support export ciphersuites, |
| 8514 ++ * use_client_keys indicates whether the key material provided is in the |
| 8515 ++ * "client write" direction. */ |
| 8516 ++static int tls1_change_cipher_state_cipher( |
| 8517 ++ SSL *s, char is_read, char use_client_keys, |
| 8518 ++ const unsigned char *mac_secret, unsigned mac_secret_len, |
| 8519 ++ const unsigned char *key, unsigned key_len, |
| 8520 ++ const unsigned char *iv, unsigned iv_len) |
| 8521 + { |
| 8522 +- static const unsigned char empty[]=""; |
| 8523 +- unsigned char *p,*mac_secret; |
| 8524 +- unsigned char *exp_label; |
| 8525 +- unsigned char tmp1[EVP_MAX_KEY_LENGTH]; |
| 8526 +- unsigned char tmp2[EVP_MAX_KEY_LENGTH]; |
| 8527 +- unsigned char iv1[EVP_MAX_IV_LENGTH*2]; |
| 8528 +- unsigned char iv2[EVP_MAX_IV_LENGTH*2]; |
| 8529 +- unsigned char *ms,*key,*iv; |
| 8530 +- int client_write; |
| 8531 +- EVP_CIPHER_CTX *dd; |
| 8532 +- const EVP_CIPHER *c; |
| 8533 +-#ifndef OPENSSL_NO_COMP |
| 8534 +- const SSL_COMP *comp; |
| 8535 +-#endif |
| 8536 +- const EVP_MD *m; |
| 8537 +- int mac_type; |
| 8538 +- int *mac_secret_size; |
| 8539 ++ const EVP_CIPHER *cipher = s->s3->tmp.new_sym_enc; |
| 8540 ++ const char is_export = SSL_C_IS_EXPORT(s->s3->tmp.new_cipher) != 0; |
| 8541 ++ EVP_CIPHER_CTX *cipher_ctx; |
| 8542 + EVP_MD_CTX *mac_ctx; |
| 8543 +- EVP_PKEY *mac_key; |
| 8544 +- int is_export,n,i,j,k,exp_label_len,cl; |
| 8545 +- int reuse_dd = 0; |
| 8546 ++ char is_aead_cipher; |
| 8547 + |
| 8548 +- is_export=SSL_C_IS_EXPORT(s->s3->tmp.new_cipher); |
| 8549 +- c=s->s3->tmp.new_sym_enc; |
| 8550 +- m=s->s3->tmp.new_hash; |
| 8551 +- mac_type = s->s3->tmp.new_mac_pkey_type; |
| 8552 +-#ifndef OPENSSL_NO_COMP |
| 8553 +- comp=s->s3->tmp.new_compression; |
| 8554 +-#endif |
| 8555 ++ unsigned char export_tmp1[EVP_MAX_KEY_LENGTH]; |
| 8556 ++ unsigned char export_tmp2[EVP_MAX_KEY_LENGTH]; |
| 8557 ++ unsigned char export_iv1[EVP_MAX_IV_LENGTH * 2]; |
| 8558 ++ unsigned char export_iv2[EVP_MAX_IV_LENGTH * 2]; |
| 8559 + |
| 8560 +-#ifdef KSSL_DEBUG |
| 8561 +- printf("tls1_change_cipher_state(which= %d) w/\n", which); |
| 8562 +- printf("\talg= %ld/%ld, comp= %p\n", |
| 8563 +- s->s3->tmp.new_cipher->algorithm_mkey, |
| 8564 +- s->s3->tmp.new_cipher->algorithm_auth, |
| 8565 +- comp); |
| 8566 +- printf("\tevp_cipher == %p ==? &d_cbc_ede_cipher3\n", c); |
| 8567 +- printf("\tevp_cipher: nid, blksz= %d, %d, keylen=%d, ivlen=%d\n", |
| 8568 +- c->nid,c->block_size,c->key_len,c->iv_len); |
| 8569 +- printf("\tkey_block: len= %d, data= ", s->s3->tmp.key_block_length); |
| 8570 +- { |
| 8571 +- int i; |
| 8572 +- for (i=0; i<s->s3->tmp.key_block_length; i++) |
| 8573 +- printf("%02x", s->s3->tmp.key_block[i]); printf("\n"); |
| 8574 +- } |
| 8575 +-#endif /* KSSL_DEBUG */ |
| 8576 +- |
| 8577 +- if (which & SSL3_CC_READ) |
| 8578 ++ if (is_read) |
| 8579 + { |
| 8580 + if (s->s3->tmp.new_cipher->algorithm2 & TLS1_STREAM_MAC) |
| 8581 + s->mac_flags |= SSL_MAC_FLAG_READ_MAC_STREAM; |
| 8582 +@@ -373,206 +347,257 @@ int tls1_change_cipher_state(SSL *s, int which) |
| 8583 + s->mac_flags &= ~SSL_MAC_FLAG_READ_MAC_STREAM; |
| 8584 + |
| 8585 + if (s->enc_read_ctx != NULL) |
| 8586 +- reuse_dd = 1; |
| 8587 ++ EVP_CIPHER_CTX_cleanup(s->enc_read_ctx); |
| 8588 + else if ((s->enc_read_ctx=OPENSSL_malloc(sizeof(EVP_CIPHER_CTX))
) == NULL) |
| 8589 + goto err; |
| 8590 + else |
| 8591 + /* make sure it's intialized in case we exit later with
an error */ |
| 8592 + EVP_CIPHER_CTX_init(s->enc_read_ctx); |
| 8593 +- dd= s->enc_read_ctx; |
| 8594 +- mac_ctx=ssl_replace_hash(&s->read_hash,NULL); |
| 8595 +-#ifndef OPENSSL_NO_COMP |
| 8596 +- if (s->expand != NULL) |
| 8597 +- { |
| 8598 +- COMP_CTX_free(s->expand); |
| 8599 +- s->expand=NULL; |
| 8600 +- } |
| 8601 +- if (comp != NULL) |
| 8602 +- { |
| 8603 +- s->expand=COMP_CTX_new(comp->method); |
| 8604 +- if (s->expand == NULL) |
| 8605 +- { |
| 8606 +- SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE,SSL_R_COMP
RESSION_LIBRARY_ERROR); |
| 8607 +- goto err2; |
| 8608 +- } |
| 8609 +- if (s->s3->rrec.comp == NULL) |
| 8610 +- s->s3->rrec.comp=(unsigned char *) |
| 8611 +- OPENSSL_malloc(SSL3_RT_MAX_ENCRYPTED_LEN
GTH); |
| 8612 +- if (s->s3->rrec.comp == NULL) |
| 8613 +- goto err; |
| 8614 +- } |
| 8615 +-#endif |
| 8616 +- /* this is done by dtls1_reset_seq_numbers for DTLS1_VERSION */ |
| 8617 +- if (s->version != DTLS1_VERSION) |
| 8618 +- memset(&(s->s3->read_sequence[0]),0,8); |
| 8619 +- mac_secret= &(s->s3->read_mac_secret[0]); |
| 8620 +- mac_secret_size=&(s->s3->read_mac_secret_size); |
| 8621 ++ |
| 8622 ++ cipher_ctx = s->enc_read_ctx; |
| 8623 ++ mac_ctx = ssl_replace_hash(&s->read_hash, NULL); |
| 8624 ++ |
| 8625 ++ memcpy(s->s3->read_mac_secret, mac_secret, mac_secret_len); |
| 8626 ++ s->s3->read_mac_secret_size = mac_secret_len; |
| 8627 + } |
| 8628 + else |
| 8629 + { |
| 8630 + if (s->s3->tmp.new_cipher->algorithm2 & TLS1_STREAM_MAC) |
| 8631 + s->mac_flags |= SSL_MAC_FLAG_WRITE_MAC_STREAM; |
| 8632 +- else |
| 8633 ++ else |
| 8634 + s->mac_flags &= ~SSL_MAC_FLAG_WRITE_MAC_STREAM; |
| 8635 ++ |
| 8636 + if (s->enc_write_ctx != NULL) |
| 8637 +- reuse_dd = 1; |
| 8638 ++ EVP_CIPHER_CTX_cleanup(s->enc_write_ctx); |
| 8639 + else if ((s->enc_write_ctx=OPENSSL_malloc(sizeof(EVP_CIPHER_CTX)
)) == NULL) |
| 8640 + goto err; |
| 8641 + else |
| 8642 + /* make sure it's intialized in case we exit later with
an error */ |
| 8643 + EVP_CIPHER_CTX_init(s->enc_write_ctx); |
| 8644 +- dd= s->enc_write_ctx; |
| 8645 +- mac_ctx = ssl_replace_hash(&s->write_hash,NULL); |
| 8646 +-#ifndef OPENSSL_NO_COMP |
| 8647 +- if (s->compress != NULL) |
| 8648 +- { |
| 8649 +- COMP_CTX_free(s->compress); |
| 8650 +- s->compress=NULL; |
| 8651 +- } |
| 8652 +- if (comp != NULL) |
| 8653 +- { |
| 8654 +- s->compress=COMP_CTX_new(comp->method); |
| 8655 +- if (s->compress == NULL) |
| 8656 +- { |
| 8657 +- SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE,SSL_R_COMP
RESSION_LIBRARY_ERROR); |
| 8658 +- goto err2; |
| 8659 +- } |
| 8660 +- } |
| 8661 +-#endif |
| 8662 +- /* this is done by dtls1_reset_seq_numbers for DTLS1_VERSION */ |
| 8663 +- if (s->version != DTLS1_VERSION) |
| 8664 +- memset(&(s->s3->write_sequence[0]),0,8); |
| 8665 +- mac_secret= &(s->s3->write_mac_secret[0]); |
| 8666 +- mac_secret_size = &(s->s3->write_mac_secret_size); |
| 8667 +- } |
| 8668 +- |
| 8669 +- if (reuse_dd) |
| 8670 +- EVP_CIPHER_CTX_cleanup(dd); |
| 8671 + |
| 8672 +- p=s->s3->tmp.key_block; |
| 8673 +- i=*mac_secret_size=s->s3->tmp.new_mac_secret_size; |
| 8674 ++ cipher_ctx = s->enc_write_ctx; |
| 8675 ++ mac_ctx = ssl_replace_hash(&s->write_hash, NULL); |
| 8676 + |
| 8677 +- cl=EVP_CIPHER_key_length(c); |
| 8678 +- j=is_export ? (cl < SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher) ? |
| 8679 +- cl : SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher)) : cl; |
| 8680 +- /* Was j=(exp)?5:EVP_CIPHER_key_length(c); */ |
| 8681 +- /* If GCM mode only part of IV comes from PRF */ |
| 8682 +- if (EVP_CIPHER_mode(c) == EVP_CIPH_GCM_MODE) |
| 8683 +- k = EVP_GCM_TLS_FIXED_IV_LEN; |
| 8684 +- else |
| 8685 +- k=EVP_CIPHER_iv_length(c); |
| 8686 +- if ( (which == SSL3_CHANGE_CIPHER_CLIENT_WRITE) || |
| 8687 +- (which == SSL3_CHANGE_CIPHER_SERVER_READ)) |
| 8688 +- { |
| 8689 +- ms= &(p[ 0]); n=i+i; |
| 8690 +- key= &(p[ n]); n+=j+j; |
| 8691 +- iv= &(p[ n]); n+=k+k; |
| 8692 +- exp_label=(unsigned char *)TLS_MD_CLIENT_WRITE_KEY_CONST; |
| 8693 +- exp_label_len=TLS_MD_CLIENT_WRITE_KEY_CONST_SIZE; |
| 8694 +- client_write=1; |
| 8695 +- } |
| 8696 +- else |
| 8697 +- { |
| 8698 +- n=i; |
| 8699 +- ms= &(p[ n]); n+=i+j; |
| 8700 +- key= &(p[ n]); n+=j+k; |
| 8701 +- iv= &(p[ n]); n+=k; |
| 8702 +- exp_label=(unsigned char *)TLS_MD_SERVER_WRITE_KEY_CONST; |
| 8703 +- exp_label_len=TLS_MD_SERVER_WRITE_KEY_CONST_SIZE; |
| 8704 +- client_write=0; |
| 8705 ++ memcpy(s->s3->write_mac_secret, mac_secret, mac_secret_len); |
| 8706 ++ s->s3->write_mac_secret_size = mac_secret_len; |
| 8707 + } |
| 8708 + |
| 8709 +- if (n > s->s3->tmp.key_block_length) |
| 8710 +- { |
| 8711 +- SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE,ERR_R_INTERNAL_ERROR); |
| 8712 +- goto err2; |
| 8713 +- } |
| 8714 +- |
| 8715 +- memcpy(mac_secret,ms,i); |
| 8716 +- |
| 8717 +- if (!(EVP_CIPHER_flags(c)&EVP_CIPH_FLAG_AEAD_CIPHER)) |
| 8718 +- { |
| 8719 +- mac_key = EVP_PKEY_new_mac_key(mac_type, NULL, |
| 8720 +- mac_secret,*mac_secret_size); |
| 8721 +- EVP_DigestSignInit(mac_ctx,NULL,m,NULL,mac_key); |
| 8722 +- EVP_PKEY_free(mac_key); |
| 8723 +- } |
| 8724 +-#ifdef TLS_DEBUG |
| 8725 +-printf("which = %04X\nmac key=",which); |
| 8726 +-{ int z; for (z=0; z<i; z++) printf("%02X%c",ms[z],((z+1)%16)?' ':'\n'); } |
| 8727 +-#endif |
| 8728 + if (is_export) |
| 8729 + { |
| 8730 + /* In here I set both the read and write key/iv to the |
| 8731 + * same value since only the correct one will be used :-). |
| 8732 + */ |
| 8733 ++ const unsigned char *label; |
| 8734 ++ unsigned label_len; |
| 8735 ++ |
| 8736 ++ if (use_client_keys) |
| 8737 ++ { |
| 8738 ++ label = (const unsigned char*) TLS_MD_CLIENT_WRITE_KEY_C
ONST; |
| 8739 ++ label_len = TLS_MD_CLIENT_WRITE_KEY_CONST_SIZE; |
| 8740 ++ } |
| 8741 ++ else |
| 8742 ++ { |
| 8743 ++ label = (const unsigned char*) TLS_MD_SERVER_WRITE_KEY_C
ONST; |
| 8744 ++ label_len = TLS_MD_SERVER_WRITE_KEY_CONST_SIZE; |
| 8745 ++ } |
| 8746 ++ |
| 8747 + if (!tls1_PRF(ssl_get_algorithm2(s), |
| 8748 +- exp_label,exp_label_len, |
| 8749 +- s->s3->client_random,SSL3_RANDOM_SIZE, |
| 8750 +- s->s3->server_random,SSL3_RANDOM_SIZE, |
| 8751 +- NULL,0,NULL,0, |
| 8752 +- key,j,tmp1,tmp2,EVP_CIPHER_key_length(c))) |
| 8753 +- goto err2; |
| 8754 +- key=tmp1; |
| 8755 ++ label, label_len, |
| 8756 ++ s->s3->client_random, SSL3_RANDOM_SIZE, |
| 8757 ++ s->s3->server_random, SSL3_RANDOM_SIZE, |
| 8758 ++ NULL, 0, NULL, 0, |
| 8759 ++ key /* secret */, key_len /* secret length */, |
| 8760 ++ export_tmp1 /* output */, |
| 8761 ++ export_tmp2 /* scratch space */, |
| 8762 ++ EVP_CIPHER_key_length(s->s3->tmp.new_sym_enc) /*
output length */)) |
| 8763 ++ return 0; |
| 8764 ++ key = export_tmp1; |
| 8765 + |
| 8766 +- if (k > 0) |
| 8767 ++ if (iv_len > 0) |
| 8768 + { |
| 8769 ++ static const unsigned char empty[] = ""; |
| 8770 ++ |
| 8771 + if (!tls1_PRF(ssl_get_algorithm2(s), |
| 8772 +- TLS_MD_IV_BLOCK_CONST,TLS_MD_IV_BLOCK_CO
NST_SIZE, |
| 8773 +- s->s3->client_random,SSL3_RANDOM_SIZE, |
| 8774 +- s->s3->server_random,SSL3_RANDOM_SIZE, |
| 8775 +- NULL,0,NULL,0, |
| 8776 +- empty,0,iv1,iv2,k*2)) |
| 8777 +- goto err2; |
| 8778 +- if (client_write) |
| 8779 +- iv=iv1; |
| 8780 ++ TLS_MD_IV_BLOCK_CONST, TLS_MD_IV_BLOCK_C
ONST_SIZE, |
| 8781 ++ s->s3->client_random, SSL3_RANDOM_SIZE, |
| 8782 ++ s->s3->server_random, SSL3_RANDOM_SIZE, |
| 8783 ++ NULL, 0, NULL, 0, |
| 8784 ++ empty /* secret */ ,0 /* secret length *
/, |
| 8785 ++ export_iv1 /* output */, |
| 8786 ++ export_iv2 /* scratch space */, |
| 8787 ++ iv_len * 2 /* output length */)) |
| 8788 ++ return 0; |
| 8789 ++ |
| 8790 ++ if (use_client_keys) |
| 8791 ++ iv = export_iv1; |
| 8792 + else |
| 8793 +- iv= &(iv1[k]); |
| 8794 ++ iv = &export_iv1[iv_len]; |
| 8795 + } |
| 8796 + } |
| 8797 + |
| 8798 +- s->session->key_arg_length=0; |
| 8799 +-#ifdef KSSL_DEBUG |
| 8800 +- { |
| 8801 +- int i; |
| 8802 +- printf("EVP_CipherInit_ex(dd,c,key=,iv=,which)\n"); |
| 8803 +- printf("\tkey= "); for (i=0; i<c->key_len; i++) printf("%02x", key[i]); |
| 8804 +- printf("\n"); |
| 8805 +- printf("\t iv= "); for (i=0; i<c->iv_len; i++) printf("%02x", iv[i]); |
| 8806 +- printf("\n"); |
| 8807 +- } |
| 8808 +-#endif /* KSSL_DEBUG */ |
| 8809 ++ /* is_aead_cipher indicates whether the EVP_CIPHER implements an AEAD |
| 8810 ++ * interface. This is different from the newer EVP_AEAD interface. */ |
| 8811 ++ is_aead_cipher = (EVP_CIPHER_flags(cipher) & EVP_CIPH_FLAG_AEAD_CIPHER)
!= 0; |
| 8812 + |
| 8813 +- if (EVP_CIPHER_mode(c) == EVP_CIPH_GCM_MODE) |
| 8814 ++ if (!is_aead_cipher) |
| 8815 + { |
| 8816 +- EVP_CipherInit_ex(dd,c,NULL,key,NULL,(which & SSL3_CC_WRITE)); |
| 8817 +- EVP_CIPHER_CTX_ctrl(dd, EVP_CTRL_GCM_SET_IV_FIXED, k, iv); |
| 8818 ++ EVP_PKEY *mac_key = |
| 8819 ++ EVP_PKEY_new_mac_key(s->s3->tmp.new_mac_pkey_type, |
| 8820 ++ NULL, mac_secret, mac_secret_len); |
| 8821 ++ if (!mac_key) |
| 8822 ++ return 0; |
| 8823 ++ EVP_DigestSignInit(mac_ctx, NULL, s->s3->tmp.new_hash, NULL, mac
_key); |
| 8824 ++ EVP_PKEY_free(mac_key); |
| 8825 + } |
| 8826 +- else |
| 8827 +- EVP_CipherInit_ex(dd,c,NULL,key,iv,(which & SSL3_CC_WRITE)); |
| 8828 ++ |
| 8829 ++ if (EVP_CIPHER_mode(cipher) == EVP_CIPH_GCM_MODE) |
| 8830 ++ { |
| 8831 ++ EVP_CipherInit_ex(cipher_ctx, cipher, NULL /* engine */, key, |
| 8832 ++ NULL /* iv */, !is_read); |
| 8833 ++ EVP_CIPHER_CTX_ctrl(cipher_ctx, EVP_CTRL_GCM_SET_IV_FIXED, iv_le
n, (void*) iv); |
| 8834 ++ } |
| 8835 ++ else |
| 8836 ++ EVP_CipherInit_ex(cipher_ctx, cipher, NULL /* engine */, key, iv
, !is_read); |
| 8837 + |
| 8838 + /* Needed for "composite" AEADs, such as RC4-HMAC-MD5 */ |
| 8839 +- if ((EVP_CIPHER_flags(c)&EVP_CIPH_FLAG_AEAD_CIPHER) && *mac_secret_size) |
| 8840 +- EVP_CIPHER_CTX_ctrl(dd,EVP_CTRL_AEAD_SET_MAC_KEY, |
| 8841 +- *mac_secret_size,mac_secret); |
| 8842 +- |
| 8843 +-#ifdef TLS_DEBUG |
| 8844 +-printf("which = %04X\nkey=",which); |
| 8845 +-{ int z; for (z=0; z<EVP_CIPHER_key_length(c); z++) printf("%02X%c",key[z],((z
+1)%16)?' ':'\n'); } |
| 8846 +-printf("\niv="); |
| 8847 +-{ int z; for (z=0; z<k; z++) printf("%02X%c",iv[z],((z+1)%16)?' ':'\n'); } |
| 8848 +-printf("\n"); |
| 8849 +-#endif |
| 8850 +- |
| 8851 +- OPENSSL_cleanse(tmp1,sizeof(tmp1)); |
| 8852 +- OPENSSL_cleanse(tmp2,sizeof(tmp1)); |
| 8853 +- OPENSSL_cleanse(iv1,sizeof(iv1)); |
| 8854 +- OPENSSL_cleanse(iv2,sizeof(iv2)); |
| 8855 +- return(1); |
| 8856 ++ if (is_aead_cipher && mac_secret_len > 0) |
| 8857 ++ EVP_CIPHER_CTX_ctrl(cipher_ctx, EVP_CTRL_AEAD_SET_MAC_KEY, |
| 8858 ++ mac_secret_len, (void*) mac_secret); |
| 8859 ++ |
| 8860 ++ if (is_export) |
| 8861 ++ { |
| 8862 ++ OPENSSL_cleanse(export_tmp1, sizeof(export_tmp1)); |
| 8863 ++ OPENSSL_cleanse(export_tmp2, sizeof(export_tmp1)); |
| 8864 ++ OPENSSL_cleanse(export_iv1, sizeof(export_iv1)); |
| 8865 ++ OPENSSL_cleanse(export_iv2, sizeof(export_iv2)); |
| 8866 ++ } |
| 8867 ++ |
| 8868 ++ return 1; |
| 8869 ++ |
| 8870 ++err: |
| 8871 ++ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE_CIPHER, ERR_R_MALLOC_FAILURE); |
| 8872 ++ return 0; |
| 8873 ++ } |
| 8874 ++ |
| 8875 ++int tls1_change_cipher_state(SSL *s, int which) |
| 8876 ++ { |
| 8877 ++ /* is_read is true if we have just read a ChangeCipherSpec message - |
| 8878 ++ * i.e. we need to update the read cipherspec. Otherwise we have just |
| 8879 ++ * written one. */ |
| 8880 ++ const char is_read = (which & SSL3_CC_READ) != 0; |
| 8881 ++ /* use_client_keys is true if we wish to use the keys for the "client |
| 8882 ++ * write" direction. This is the case if we're a client sending a |
| 8883 ++ * ChangeCipherSpec, or a server reading a client's ChangeCipherSpec. */ |
| 8884 ++ const char use_client_keys = which == SSL3_CHANGE_CIPHER_CLIENT_WRITE || |
| 8885 ++ which == SSL3_CHANGE_CIPHER_SERVER_READ; |
| 8886 ++ const unsigned char *client_write_mac_secret, *server_write_mac_secret,
*mac_secret; |
| 8887 ++ const unsigned char *client_write_key, *server_write_key, *key; |
| 8888 ++ const unsigned char *client_write_iv, *server_write_iv, *iv; |
| 8889 ++ const EVP_CIPHER *cipher = s->s3->tmp.new_sym_enc; |
| 8890 ++ unsigned key_len, iv_len, mac_secret_len; |
| 8891 ++ const unsigned char *key_data; |
| 8892 ++ const char is_export = SSL_C_IS_EXPORT(s->s3->tmp.new_cipher) != 0; |
| 8893 ++ |
| 8894 ++ /* Update compression contexts. */ |
| 8895 ++#ifndef OPENSSL_NO_COMP |
| 8896 ++ const SSL_COMP *comp = s->s3->tmp.new_compression; |
| 8897 ++ |
| 8898 ++ if (is_read) |
| 8899 ++ { |
| 8900 ++ if (s->expand != NULL) |
| 8901 ++ { |
| 8902 ++ COMP_CTX_free(s->expand); |
| 8903 ++ s->expand = NULL; |
| 8904 ++ } |
| 8905 ++ if (comp != NULL) |
| 8906 ++ { |
| 8907 ++ s->expand=COMP_CTX_new(comp->method); |
| 8908 ++ if (s->expand == NULL) |
| 8909 ++ { |
| 8910 ++ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE,SSL_R_COMP
RESSION_LIBRARY_ERROR); |
| 8911 ++ return 0; |
| 8912 ++ } |
| 8913 ++ if (s->s3->rrec.comp == NULL) |
| 8914 ++ s->s3->rrec.comp = |
| 8915 ++ (unsigned char *)OPENSSL_malloc(SSL3_RT_
MAX_ENCRYPTED_LENGTH); |
| 8916 ++ if (s->s3->rrec.comp == NULL) |
| 8917 ++ goto err; |
| 8918 ++ } |
| 8919 ++ } |
| 8920 ++ else |
| 8921 ++ { |
| 8922 ++ if (s->compress != NULL) |
| 8923 ++ { |
| 8924 ++ COMP_CTX_free(s->compress); |
| 8925 ++ s->compress = NULL; |
| 8926 ++ } |
| 8927 ++ if (comp != NULL) |
| 8928 ++ { |
| 8929 ++ s->compress = COMP_CTX_new(comp->method); |
| 8930 ++ if (s->compress == NULL) |
| 8931 ++ { |
| 8932 ++ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE,SSL_R_COMP
RESSION_LIBRARY_ERROR); |
| 8933 ++ return 0; |
| 8934 ++ } |
| 8935 ++ } |
| 8936 ++ } |
| 8937 ++#endif /* OPENSSL_NO_COMP */ |
| 8938 ++ |
| 8939 ++ /* Reset sequence number to zero. */ |
| 8940 ++ memset(is_read ? s->s3->read_sequence : s->s3->write_sequence, 0, 8); |
| 8941 ++ |
| 8942 ++ /* key_arg is used for SSLv2. We don't need it for TLS. */ |
| 8943 ++ s->session->key_arg_length = 0; |
| 8944 ++ |
| 8945 ++ mac_secret_len = s->s3->tmp.new_mac_secret_size; |
| 8946 ++ |
| 8947 ++ key_len = EVP_CIPHER_key_length(cipher); |
| 8948 ++ if (is_export && key_len > SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher)
) |
| 8949 ++ key_len = SSL_C_EXPORT_KEYLENGTH(s->s3->tmp.new_cipher); |
| 8950 ++ |
| 8951 ++ if (EVP_CIPHER_mode(cipher) == EVP_CIPH_GCM_MODE) |
| 8952 ++ iv_len = EVP_GCM_TLS_FIXED_IV_LEN; |
| 8953 ++ else |
| 8954 ++ iv_len = EVP_CIPHER_iv_length(cipher); |
| 8955 ++ |
| 8956 ++ key_data = s->s3->tmp.key_block; |
| 8957 ++ client_write_mac_secret = key_data; key_data += mac_secret_len; |
| 8958 ++ server_write_mac_secret = key_data; key_data += mac_secret_len; |
| 8959 ++ client_write_key = key_data; key_data += key_len; |
| 8960 ++ server_write_key = key_data; key_data += key_len; |
| 8961 ++ client_write_iv = key_data; key_data += iv_len; |
| 8962 ++ server_write_iv = key_data; key_data += iv_len; |
| 8963 ++ |
| 8964 ++ if (use_client_keys) |
| 8965 ++ { |
| 8966 ++ mac_secret = client_write_mac_secret; |
| 8967 ++ key = client_write_key; |
| 8968 ++ iv = client_write_iv; |
| 8969 ++ } |
| 8970 ++ else |
| 8971 ++ { |
| 8972 ++ mac_secret = server_write_mac_secret; |
| 8973 ++ key = server_write_key; |
| 8974 ++ iv = server_write_iv; |
| 8975 ++ } |
| 8976 ++ |
| 8977 ++ if (key_data - s->s3->tmp.key_block != s->s3->tmp.key_block_length) |
| 8978 ++ { |
| 8979 ++ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE,ERR_R_INTERNAL_ERROR); |
| 8980 ++ return 0; |
| 8981 ++ } |
| 8982 ++ |
| 8983 ++ if (!tls1_change_cipher_state_cipher(s, is_read, use_client_keys, |
| 8984 ++ mac_secret, mac_secret_len, |
| 8985 ++ key, key_len, |
| 8986 ++ iv, iv_len)) { |
| 8987 ++ return 0; |
| 8988 ++ } |
| 8989 ++ |
| 8990 ++ return 1; |
| 8991 + err: |
| 8992 +- SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE,ERR_R_MALLOC_FAILURE); |
| 8993 +-err2: |
| 8994 +- return(0); |
| 8995 ++ SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE, ERR_R_MALLOC_FAILURE); |
| 8996 ++ return 0; |
| 8997 + } |
| 8998 + |
| 8999 + int tls1_setup_key_block(SSL *s) |
| 9000 +@@ -584,6 +609,7 @@ int tls1_setup_key_block(SSL *s) |
| 9001 + SSL_COMP *comp; |
| 9002 + int mac_type= NID_undef,mac_secret_size=0; |
| 9003 + int ret=0; |
| 9004 ++ int iv_len; |
| 9005 + |
| 9006 + #ifdef KSSL_DEBUG |
| 9007 + printf ("tls1_setup_key_block()\n"); |
| 9008 +@@ -598,11 +624,16 @@ int tls1_setup_key_block(SSL *s) |
| 9009 + return(0); |
| 9010 + } |
| 9011 + |
| 9012 ++ if (EVP_CIPHER_mode(c) == EVP_CIPH_GCM_MODE) |
| 9013 ++ iv_len = EVP_GCM_TLS_FIXED_IV_LEN; |
| 9014 ++ else |
| 9015 ++ iv_len = EVP_CIPHER_iv_length(c); |
| 9016 ++ |
| 9017 + s->s3->tmp.new_sym_enc=c; |
| 9018 + s->s3->tmp.new_hash=hash; |
| 9019 + s->s3->tmp.new_mac_pkey_type = mac_type; |
| 9020 + s->s3->tmp.new_mac_secret_size = mac_secret_size; |
| 9021 +- num=EVP_CIPHER_key_length(c)+mac_secret_size+EVP_CIPHER_iv_length(c); |
| 9022 ++ num=EVP_CIPHER_key_length(c)+mac_secret_size+iv_len; |
| 9023 + num*=2; |
| 9024 + |
| 9025 + ssl3_cleanup_key_block(s); |
| 9026 +-- |
| 9027 +1.8.4.1 |
| 9028 + |
| 9029 diff -burN android-openssl-lhash2/patches/use_aead_for_aes_gcm.patch android-ope
nssl/patches/use_aead_for_aes_gcm.patch |
| 9030 --- android-openssl-lhash2/patches/use_aead_for_aes_gcm.patch 1969-12-31 19:00
:00.000000000 -0500 |
| 9031 +++ android-openssl/patches/use_aead_for_aes_gcm.patch 2013-11-05 14:14:34.6312
83497 -0500 |
| 9032 @@ -0,0 +1,119 @@ |
| 9033 +From 7156ca9ce97c1084d7fd010146c522633ad73e7a Mon Sep 17 00:00:00 2001 |
| 9034 +From: Adam Langley <agl@chromium.org> |
| 9035 +Date: Wed, 4 Sep 2013 12:21:12 -0400 |
| 9036 +Subject: [PATCH 42/50] use_aead_for_aes_gcm. |
| 9037 + |
| 9038 +Switches AES-GCM ciphersuites to use AEAD interfaces. |
| 9039 +--- |
| 9040 + ssl/s3_lib.c | 25 +++++++++++++++---------- |
| 9041 + 1 file changed, 15 insertions(+), 10 deletions(-) |
| 9042 + |
| 9043 +diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c |
| 9044 +index 2cd1654..75b6560 100644 |
| 9045 +--- a/ssl/s3_lib.c |
| 9046 ++++ b/ssl/s3_lib.c |
| 9047 +@@ -166,6 +166,11 @@ const char ssl3_version_str[]="SSLv3" OPENSSL_VERSION_PTEX
T; |
| 9048 + |
| 9049 + #define SSL3_NUM_CIPHERS (sizeof(ssl3_ciphers)/sizeof(SSL_CIPHER)) |
| 9050 + |
| 9051 ++/* FIXED_NONCE_LEN is a macro that results in the correct value to set the |
| 9052 ++ * fixed nonce length in SSL_CIPHER.algorithms2. It's the inverse of |
| 9053 ++ * SSL_CIPHER_AEAD_FIXED_NONCE_LEN. */ |
| 9054 ++#define FIXED_NONCE_LEN(x) ((x/2)<<24) |
| 9055 ++ |
| 9056 + /* list of available SSLv3 ciphers (sorted by id) */ |
| 9057 + OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 9058 + |
| 9059 +@@ -1836,7 +1841,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 9060 + SSL_AEAD, |
| 9061 + SSL_TLSV1_2, |
| 9062 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 9063 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, |
| 9064 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 9065 + 128, |
| 9066 + 128, |
| 9067 + }, |
| 9068 +@@ -1868,7 +1873,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 9069 + SSL_AEAD, |
| 9070 + SSL_TLSV1_2, |
| 9071 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 9072 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, |
| 9073 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 9074 + 128, |
| 9075 + 128, |
| 9076 + }, |
| 9077 +@@ -1900,7 +1905,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 9078 + SSL_AEAD, |
| 9079 + SSL_TLSV1_2, |
| 9080 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 9081 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, |
| 9082 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 9083 + 128, |
| 9084 + 128, |
| 9085 + }, |
| 9086 +@@ -1932,7 +1937,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 9087 + SSL_AEAD, |
| 9088 + SSL_TLSV1_2, |
| 9089 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 9090 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, |
| 9091 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 9092 + 128, |
| 9093 + 128, |
| 9094 + }, |
| 9095 +@@ -1964,7 +1969,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 9096 + SSL_AEAD, |
| 9097 + SSL_TLSV1_2, |
| 9098 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 9099 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, |
| 9100 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 9101 + 128, |
| 9102 + 128, |
| 9103 + }, |
| 9104 +@@ -1996,7 +2001,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 9105 + SSL_AEAD, |
| 9106 + SSL_TLSV1_2, |
| 9107 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 9108 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, |
| 9109 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 9110 + 128, |
| 9111 + 128, |
| 9112 + }, |
| 9113 +@@ -2709,7 +2714,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 9114 + SSL_AEAD, |
| 9115 + SSL_TLSV1_2, |
| 9116 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 9117 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, |
| 9118 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 9119 + 128, |
| 9120 + 128, |
| 9121 + }, |
| 9122 +@@ -2741,7 +2746,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 9123 + SSL_AEAD, |
| 9124 + SSL_TLSV1_2, |
| 9125 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 9126 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, |
| 9127 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 9128 + 128, |
| 9129 + 128, |
| 9130 + }, |
| 9131 +@@ -2773,7 +2778,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 9132 + SSL_AEAD, |
| 9133 + SSL_TLSV1_2, |
| 9134 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 9135 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, |
| 9136 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 9137 + 128, |
| 9138 + 128, |
| 9139 + }, |
| 9140 +@@ -2805,7 +2810,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 9141 + SSL_AEAD, |
| 9142 + SSL_TLSV1_2, |
| 9143 + SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 9144 +- SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256, |
| 9145 ++ SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 9146 + 128, |
| 9147 + 128, |
| 9148 + }, |
| 9149 +-- |
| 9150 +1.8.4.1 |
| 9151 + |
OLD | NEW |