OLD | NEW |
(Empty) | |
| 1 From 2688f00904e4ffd647afcff69bb8fe6df8c5902b Mon Sep 17 00:00:00 2001 |
| 2 From: Adam Langley <agl@chromium.org> |
| 3 Date: Mon, 9 Sep 2013 12:13:24 -0400 |
| 4 Subject: [PATCH 43/52] chacha20poly1305 |
| 5 |
| 6 Add support for Chacha20 + Poly1305. |
| 7 --- |
| 8 .gitignore | 1 + |
| 9 Configure | 56 +- |
| 10 Makefile.org | 6 +- |
| 11 apps/speed.c | 64 +- |
| 12 crypto/chacha/Makefile | 80 ++ |
| 13 crypto/chacha/chacha.h | 85 ++ |
| 14 crypto/chacha/chacha_enc.c | 167 +++ |
| 15 crypto/chacha/chacha_vec.c | 345 +++++++ |
| 16 crypto/chacha/chachatest.c | 211 ++++ |
| 17 crypto/evp/Makefile | 35 +- |
| 18 crypto/evp/e_chacha20poly1305.c | 261 +++++ |
| 19 crypto/evp/evp.h | 8 + |
| 20 crypto/evp/evp_err.c | 3 + |
| 21 crypto/poly1305/Makefile | 81 ++ |
| 22 crypto/poly1305/poly1305.c | 320 ++++++ |
| 23 crypto/poly1305/poly1305.h | 88 ++ |
| 24 crypto/poly1305/poly1305_arm.c | 335 ++++++ |
| 25 crypto/poly1305/poly1305_arm_asm.s | 2009 ++++++++++++++++++++++++++++++++++++ |
| 26 crypto/poly1305/poly1305_vec.c | 733 +++++++++++++ |
| 27 crypto/poly1305/poly1305test.c | 166 +++ |
| 28 ssl/s3_lib.c | 75 +- |
| 29 ssl/s3_pkt.c | 5 +- |
| 30 ssl/ssl.h | 1 + |
| 31 ssl/ssl_ciph.c | 16 +- |
| 32 ssl/ssl_locl.h | 10 + |
| 33 ssl/t1_enc.c | 30 +- |
| 34 ssl/tls1.h | 8 + |
| 35 test/Makefile | 23 +- |
| 36 28 files changed, 5166 insertions(+), 56 deletions(-) |
| 37 create mode 100644 crypto/chacha/Makefile |
| 38 create mode 100644 crypto/chacha/chacha.h |
| 39 create mode 100644 crypto/chacha/chacha_enc.c |
| 40 create mode 100644 crypto/chacha/chacha_vec.c |
| 41 create mode 100644 crypto/chacha/chachatest.c |
| 42 create mode 100644 crypto/evp/e_chacha20poly1305.c |
| 43 create mode 100644 crypto/poly1305/Makefile |
| 44 create mode 100644 crypto/poly1305/poly1305.c |
| 45 create mode 100644 crypto/poly1305/poly1305.h |
| 46 create mode 100644 crypto/poly1305/poly1305_arm.c |
| 47 create mode 100644 crypto/poly1305/poly1305_arm_asm.s |
| 48 create mode 100644 crypto/poly1305/poly1305_vec.c |
| 49 create mode 100644 crypto/poly1305/poly1305test.c |
| 50 |
| 51 diff --git a/Configure b/Configure |
| 52 index 9c803dc..1b95384 100755 |
| 53 --- a/Configure |
| 54 +++ b/Configure |
| 55 @@ -124,24 +124,24 @@ my $tlib="-lnsl -lsocket"; |
| 56 my $bits1="THIRTY_TWO_BIT "; |
| 57 my $bits2="SIXTY_FOUR_BIT "; |
| 58 |
| 59 -my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o crypt
586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586
.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cml
l-x86.o:ghash-x86.o:"; |
| 60 +my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o crypt
586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586
.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cml
l-x86.o:ghash-x86.o:::"; |
| 61 |
| 62 my $x86_elf_asm="$x86_asm:elf"; |
| 63 |
| 64 -my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-
gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_
64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_
64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghas
h-x86_64.o:"; |
| 65 -my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.
o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o::::
:ghash-ia64.o::void"; |
| 66 -my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a
-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-spa
rcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; |
| 67 -my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void"; |
| 68 -my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-
alpha.o::void"; |
| 69 -my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o:::
:::::"; |
| 70 -my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha2
56-mips.o sha512-mips.o::::::::"; |
| 71 -my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::ae
s-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-
s390x.o:::::ghash-s390x.o:"; |
| 72 -my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cb
c.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-a
rmv4.o::void"; |
| 73 -my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-p
arisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-
parisc.o::32"; |
| 74 -my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o ae
s-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::gha
sh-parisc.o::64"; |
| 75 -my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o
aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::"; |
| 76 -my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o
aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::"; |
| 77 -my $no_asm=":::::::::::::::void"; |
| 78 +my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-
gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_
64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_
64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghas
h-x86_64.o::chacha_vec.o:poly1305_vec.o"; |
| 79 +my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.
o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o::::
:ghash-ia64.o::::void"; |
| 80 +my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a
-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-spa
rcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::::void"; |
| 81 +my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::::void"; |
| 82 +my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-
alpha.o::::void"; |
| 83 +my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o:::
:::::::"; |
| 84 +my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha2
56-mips.o sha512-mips.o::::::::::"; |
| 85 +my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::ae
s-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-
s390x.o:::::::ghash-s390x.o:"; |
| 86 +my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cb
c.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-a
rmv4.o::chacha_vec.o:poly1305_arm.o poly1305_arm_asm.o:void"; |
| 87 +my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-p
arisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-
parisc.o::::32"; |
| 88 +my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o ae
s-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::gha
sh-parisc.o::::64"; |
| 89 +my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o
aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::::"; |
| 90 +my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o
aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::::"; |
| 91 +my $no_asm=":::::::::::::::::void"; |
| 92 |
| 93 # As for $BSDthreads. Idea is to maintain "collective" set of flags, |
| 94 # which would cover all BSD flavors. -pthread applies to them all, |
| 95 @@ -152,7 +152,7 @@ my $no_asm=":::::::::::::::void"; |
| 96 # seems to be sufficient? |
| 97 my $BSDthreads="-pthread -D_THREAD_SAFE -D_REENTRANT"; |
| 98 |
| 99 -#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lflags : $b
n_ops : $cpuid_obj : $bn_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj : $sha1_
obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_obj : $mod
es_obj : $engines_obj : $dso_scheme : $shared_target : $shared_cflag : $shared_l
dflag : $shared_extension : $ranlib : $arflags : $multilib |
| 100 +#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lflags : $b
n_ops : $cpuid_obj : $bn_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj : $sha1_
obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_obj : $mod
es_obj : $engines_obj : $chacha_obj : $poly1305_obj : $dso_scheme : $shared_targ
et : $shared_cflag : $shared_ldflag : $shared_extension : $ranlib : $arflags : $
multilib : |
| 101 |
| 102 my %table=( |
| 103 # File 'TABLE' (created by 'make TABLE') contains the data from this list, |
| 104 @@ -647,6 +647,8 @@ my $idx_wp_obj = $idx++; |
| 105 my $idx_cmll_obj = $idx++; |
| 106 my $idx_modes_obj = $idx++; |
| 107 my $idx_engines_obj = $idx++; |
| 108 +my $idx_chacha_obj = $idx++; |
| 109 +my $idx_poly1305_obj = $idx++; |
| 110 my $idx_perlasm_scheme = $idx++; |
| 111 my $idx_dso_scheme = $idx++; |
| 112 my $idx_shared_target = $idx++; |
| 113 @@ -692,6 +694,8 @@ my $aes_enc="aes_core.o aes_cbc.o"; |
| 114 my $bf_enc ="bf_enc.o"; |
| 115 my $cast_enc="c_enc.o"; |
| 116 my $rc4_enc="rc4_enc.o rc4_skey.o"; |
| 117 +my $chacha_enc="chacha_enc.o"; |
| 118 +my $poly1305 ="poly1305.o"; |
| 119 my $rc5_enc="rc5_enc.o"; |
| 120 my $md5_obj=""; |
| 121 my $sha1_obj=""; |
| 122 @@ -1144,7 +1148,7 @@ $openssldir=$prefix . "/" . $openssldir if $openssldir !~
/(^\/|^[a-zA-Z]:[\\\/] |
| 123 |
| 124 print "IsMK1MF=$IsMK1MF\n"; |
| 125 |
| 126 -my @fields = split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); |
| 127 +my @fields = split(/\s*:\s*/,$table{$target} . ":" x 31 , -1); |
| 128 my $cc = $fields[$idx_cc]; |
| 129 # Allow environment CC to override compiler... |
| 130 if($ENV{CC}) { |
| 131 @@ -1181,6 +1185,8 @@ my $ranlib = $ENV{'RANLIB'} || $fields[$idx_ranlib]; |
| 132 my $ar = $ENV{'AR'} || "ar"; |
| 133 my $arflags = $fields[$idx_arflags]; |
| 134 my $multilib = $fields[$idx_multilib]; |
| 135 +my $chacha_obj = $fields[$idx_chacha_obj]; |
| 136 +my $poly1305_obj = $fields[$idx_poly1305_obj]; |
| 137 |
| 138 # if $prefix/lib$multilib is not an existing directory, then |
| 139 # assume that it's not searched by linker automatically, in |
| 140 @@ -1477,6 +1483,8 @@ $des_obj=$des_enc unless ($des_obj =~ /\.o$/); |
| 141 $bf_obj=$bf_enc unless ($bf_obj =~ /\.o$/); |
| 142 $cast_obj=$cast_enc unless ($cast_obj =~ /\.o$/); |
| 143 $rc4_obj=$rc4_enc unless ($rc4_obj =~ /\.o$/); |
| 144 +$chacha_obj=$chacha_enc unless ($chacha_obj =~ /\.o$/); |
| 145 +$poly1305_obj=$poly1305 unless ($poly1305_obj =~ /\.o$/); |
| 146 $rc5_obj=$rc5_enc unless ($rc5_obj =~ /\.o$/); |
| 147 if ($sha1_obj =~ /\.o$/) |
| 148 { |
| 149 @@ -1637,6 +1645,8 @@ while (<IN>) |
| 150 s/^BF_ENC=.*$/BF_ENC= $bf_obj/; |
| 151 s/^CAST_ENC=.*$/CAST_ENC= $cast_obj/; |
| 152 s/^RC4_ENC=.*$/RC4_ENC= $rc4_obj/; |
| 153 + s/^CHACHA_ENC=.*$/CHACHA_ENC= $chacha_obj/; |
| 154 + s/^POLY1305=.*$/POLY1305= $poly1305_obj/; |
| 155 s/^RC5_ENC=.*$/RC5_ENC= $rc5_obj/; |
| 156 s/^MD5_ASM_OBJ=.*$/MD5_ASM_OBJ= $md5_obj/; |
| 157 s/^SHA1_ASM_OBJ=.*$/SHA1_ASM_OBJ= $sha1_obj/; |
| 158 @@ -1698,6 +1708,8 @@ print "AES_ENC =$aes_obj\n"; |
| 159 print "BF_ENC =$bf_obj\n"; |
| 160 print "CAST_ENC =$cast_obj\n"; |
| 161 print "RC4_ENC =$rc4_obj\n"; |
| 162 +print "CHACHA_ENC =$chacha_obj\n"; |
| 163 +print "POLY1305 =$poly1305_obj\n"; |
| 164 print "RC5_ENC =$rc5_obj\n"; |
| 165 print "MD5_OBJ_ASM =$md5_obj\n"; |
| 166 print "SHA1_OBJ_ASM =$sha1_obj\n"; |
| 167 @@ -2096,11 +2108,11 @@ sub print_table_entry |
| 168 |
| 169 (my $cc,my $cflags,my $unistd,my $thread_cflag,my $sys_id,my $lflags, |
| 170 my $bn_ops,my $cpuid_obj,my $bn_obj,my $des_obj,my $aes_obj, my $bf_obj, |
| 171 - my $md5_obj,my $sha1_obj,my $cast_obj,my $rc4_obj,my $rmd160_obj, |
| 172 - my $rc5_obj,my $wp_obj,my $cmll_obj,my $modes_obj, my $engines_obj, |
| 173 + my $md5_obj,my $sha1_obj,my $cast_obj,my $rc4_obj,my $chacha_obj,my $pol
y1305_obj, |
| 174 + my $rmd160_obj, my $rc5_obj,my $wp_obj,my $cmll_obj,my $modes_obj, my $e
ngines_obj, |
| 175 my $perlasm_scheme,my $dso_scheme,my $shared_target,my $shared_cflag, |
| 176 my $shared_ldflag,my $shared_extension,my $ranlib,my $arflags,my $multil
ib)= |
| 177 - split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); |
| 178 + split(/\s*:\s*/,$table{$target} . ":" x 31 , -1); |
| 179 |
| 180 print <<EOF |
| 181 |
| 182 @@ -2121,6 +2133,8 @@ sub print_table_entry |
| 183 \$sha1_obj = $sha1_obj |
| 184 \$cast_obj = $cast_obj |
| 185 \$rc4_obj = $rc4_obj |
| 186 +\$chacha_obj = $chacha_obj |
| 187 +\$poly1305_obj = $poly1305_obj |
| 188 \$rmd160_obj = $rmd160_obj |
| 189 \$rc5_obj = $rc5_obj |
| 190 \$wp_obj = $wp_obj |
| 191 @@ -2150,7 +2164,7 @@ sub test_sanity |
| 192 |
| 193 foreach $target (sort keys %table) |
| 194 { |
| 195 - @fields = split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); |
| 196 + @fields = split(/\s*:\s*/,$table{$target} . ":" x 31 , -1); |
| 197 |
| 198 if ($fields[$idx_dso_scheme-1] =~ /^(beos|dl|dlfcn|win32|vms)$/) |
| 199 { |
| 200 diff --git a/Makefile.org b/Makefile.org |
| 201 index 2db31ea..919466d 100644 |
| 202 --- a/Makefile.org |
| 203 +++ b/Makefile.org |
| 204 @@ -94,6 +94,8 @@ BF_ENC= bf_enc.o |
| 205 CAST_ENC= c_enc.o |
| 206 RC4_ENC= rc4_enc.o |
| 207 RC5_ENC= rc5_enc.o |
| 208 +CHACHA_ENC= chacha_enc.o |
| 209 +POLY1305= poly1305.o |
| 210 MD5_ASM_OBJ= |
| 211 SHA1_ASM_OBJ= |
| 212 RMD160_ASM_OBJ= |
| 213 @@ -147,7 +149,7 @@ SDIRS= \ |
| 214 bn ec rsa dsa ecdsa dh ecdh dso engine \ |
| 215 buffer bio stack lhash rand err \ |
| 216 evp asn1 pem x509 x509v3 conf txt_db pkcs7 pkcs12 comp ocsp ui krb5 \ |
| 217 - cms pqueue ts jpake srp store cmac |
| 218 + cms pqueue ts jpake srp store cmac poly1305 chacha |
| 219 # keep in mind that the above list is adjusted by ./Configure |
| 220 # according to no-xxx arguments... |
| 221 |
| 222 @@ -232,6 +234,8 @@ BUILDENV= PLATFORM='$(PLATFORM)' PROCESSOR='$(PROCESSOR)'
\ |
| 223 WP_ASM_OBJ='$(WP_ASM_OBJ)' \ |
| 224 MODES_ASM_OBJ='$(MODES_ASM_OBJ)' \ |
| 225 ENGINES_ASM_OBJ='$(ENGINES_ASM_OBJ)' \ |
| 226 + CHACHA_ENC='$(CHACHA_ENC)' \ |
| 227 + POLY1305='$(POLY1305)' \ |
| 228 PERLASM_SCHEME='$(PERLASM_SCHEME)' \ |
| 229 FIPSLIBDIR='${FIPSLIBDIR}' \ |
| 230 FIPSDIR='${FIPSDIR}' \ |
| 231 diff --git a/crypto/chacha/Makefile b/crypto/chacha/Makefile |
| 232 new file mode 100644 |
| 233 index 0000000..289933b |
| 234 --- /dev/null |
| 235 +++ b/crypto/chacha/Makefile |
| 236 @@ -0,0 +1,80 @@ |
| 237 +# |
| 238 +# OpenSSL/crypto/chacha/Makefile |
| 239 +# |
| 240 + |
| 241 +DIR= chacha |
| 242 +TOP= ../.. |
| 243 +CC= cc |
| 244 +CPP= $(CC) -E |
| 245 +INCLUDES= |
| 246 +CFLAG=-g |
| 247 +AR= ar r |
| 248 + |
| 249 +CFLAGS= $(INCLUDES) $(CFLAG) |
| 250 +ASFLAGS= $(INCLUDES) $(ASFLAG) |
| 251 +AFLAGS= $(ASFLAGS) |
| 252 + |
| 253 +CHACHA_ENC=chacha_enc.o |
| 254 + |
| 255 +GENERAL=Makefile |
| 256 +TEST=chachatest.o |
| 257 +APPS= |
| 258 + |
| 259 +LIB=$(TOP)/libcrypto.a |
| 260 +LIBSRC= |
| 261 +LIBOBJ=$(CHACHA_ENC) |
| 262 + |
| 263 +SRC= $(LIBSRC) |
| 264 + |
| 265 +EXHEADER=chacha.h |
| 266 +HEADER= $(EXHEADER) |
| 267 + |
| 268 +ALL= $(GENERAL) $(SRC) $(HEADER) |
| 269 + |
| 270 +top: |
| 271 + (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all) |
| 272 + |
| 273 +all: lib |
| 274 + |
| 275 +lib: $(LIBOBJ) |
| 276 + $(AR) $(LIB) $(LIBOBJ) |
| 277 + $(RANLIB) $(LIB) || echo Never mind. |
| 278 + @touch lib |
| 279 + |
| 280 +files: |
| 281 + $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO |
| 282 + |
| 283 +links: |
| 284 + @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER) |
| 285 + @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST) |
| 286 + @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS) |
| 287 + |
| 288 +install: |
| 289 + @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile... |
| 290 + @headerlist="$(EXHEADER)"; for i in $$headerlist ; \ |
| 291 + do \ |
| 292 + (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \ |
| 293 + chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \ |
| 294 + done; |
| 295 + |
| 296 +tags: |
| 297 + ctags $(SRC) |
| 298 + |
| 299 +tests: |
| 300 + |
| 301 +lint: |
| 302 + lint -DLINT $(INCLUDES) $(SRC)>fluff |
| 303 + |
| 304 +depend: |
| 305 + @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... |
| 306 + $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) |
| 307 + |
| 308 +dclean: |
| 309 + $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKE
FILE) >Makefile.new |
| 310 + mv -f Makefile.new $(MAKEFILE) |
| 311 + |
| 312 +clean: |
| 313 + rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff |
| 314 + |
| 315 +# DO NOT DELETE THIS LINE -- make depend depends on it. |
| 316 + |
| 317 diff --git a/crypto/chacha/chacha.h b/crypto/chacha/chacha.h |
| 318 new file mode 100644 |
| 319 index 0000000..d56519d |
| 320 --- /dev/null |
| 321 +++ b/crypto/chacha/chacha.h |
| 322 @@ -0,0 +1,85 @@ |
| 323 +/* |
| 324 + * Chacha stream algorithm. |
| 325 + * |
| 326 + * Created on: Jun, 2013 |
| 327 + * Author: Elie Bursztein (elieb@google.com) |
| 328 + * |
| 329 + * Adapted from the estream code by D. Bernstein. |
| 330 + */ |
| 331 +/* ==================================================================== |
| 332 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. |
| 333 + * |
| 334 + * Redistribution and use in source and binary forms, with or without |
| 335 + * modification, are permitted provided that the following conditions |
| 336 + * are met: |
| 337 + * |
| 338 + * 1. Redistributions of source code must retain the above copyright |
| 339 + * notice, this list of conditions and the following disclaimer. |
| 340 + * |
| 341 + * 2. Redistributions in binary form must reproduce the above copyright |
| 342 + * notice, this list of conditions and the following disclaimer in |
| 343 + * the documentation and/or other materials provided with the |
| 344 + * distribution. |
| 345 + * |
| 346 + * 3. All advertising materials mentioning features or use of this |
| 347 + * software must display the following acknowledgment: |
| 348 + * "This product includes software developed by the OpenSSL Project |
| 349 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" |
| 350 + * |
| 351 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
| 352 + * endorse or promote products derived from this software without |
| 353 + * prior written permission. For written permission, please contact |
| 354 + * licensing@OpenSSL.org. |
| 355 + * |
| 356 + * 5. Products derived from this software may not be called "OpenSSL" |
| 357 + * nor may "OpenSSL" appear in their names without prior written |
| 358 + * permission of the OpenSSL Project. |
| 359 + * |
| 360 + * 6. Redistributions of any form whatsoever must retain the following |
| 361 + * acknowledgment: |
| 362 + * "This product includes software developed by the OpenSSL Project |
| 363 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" |
| 364 + * |
| 365 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
| 366 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 367 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 368 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
| 369 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 370 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 371 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 372 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 373 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 374 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 375 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 376 + * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 377 + * ==================================================================== |
| 378 + */ |
| 379 +#ifndef HEADER_CHACHA_H |
| 380 +#define HEADER_CHACHA_H |
| 381 + |
| 382 +#include <openssl/opensslconf.h> |
| 383 + |
| 384 +#if defined(OPENSSL_NO_CHACHA) |
| 385 +#error ChaCha support is disabled. |
| 386 +#endif |
| 387 + |
| 388 +#include <stddef.h> |
| 389 + |
| 390 +#ifdef __cplusplus |
| 391 +extern "C" { |
| 392 +#endif |
| 393 + |
| 394 +/* CRYPTO_chacha_20 encrypts |in_len| bytes from |in| with the given key and |
| 395 + * nonce and writes the result to |out|, which may be equal to |in|. The |
| 396 + * initial block counter is specified by |counter|. */ |
| 397 +void CRYPTO_chacha_20(unsigned char *out, |
| 398 + const unsigned char *in, size_t in_len, |
| 399 + const unsigned char key[32], |
| 400 + const unsigned char nonce[8], |
| 401 + size_t counter); |
| 402 + |
| 403 +#ifdef __cplusplus |
| 404 +} |
| 405 +#endif |
| 406 + |
| 407 +#endif |
| 408 diff --git a/crypto/chacha/chacha_enc.c b/crypto/chacha/chacha_enc.c |
| 409 new file mode 100644 |
| 410 index 0000000..54d1ca3 |
| 411 --- /dev/null |
| 412 +++ b/crypto/chacha/chacha_enc.c |
| 413 @@ -0,0 +1,167 @@ |
| 414 +/* |
| 415 + * Chacha stream algorithm. |
| 416 + * |
| 417 + * Created on: Jun, 2013 |
| 418 + * Author: Elie Bursztein (elieb@google.com) |
| 419 + * |
| 420 + * Adapted from the estream code by D. Bernstein. |
| 421 + */ |
| 422 +/* ==================================================================== |
| 423 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. |
| 424 + * |
| 425 + * Redistribution and use in source and binary forms, with or without |
| 426 + * modification, are permitted provided that the following conditions |
| 427 + * are met: |
| 428 + * |
| 429 + * 1. Redistributions of source code must retain the above copyright |
| 430 + * notice, this list of conditions and the following disclaimer. |
| 431 + * |
| 432 + * 2. Redistributions in binary form must reproduce the above copyright |
| 433 + * notice, this list of conditions and the following disclaimer in |
| 434 + * the documentation and/or other materials provided with the |
| 435 + * distribution. |
| 436 + * |
| 437 + * 3. All advertising materials mentioning features or use of this |
| 438 + * software must display the following acknowledgment: |
| 439 + * "This product includes software developed by the OpenSSL Project |
| 440 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" |
| 441 + * |
| 442 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
| 443 + * endorse or promote products derived from this software without |
| 444 + * prior written permission. For written permission, please contact |
| 445 + * licensing@OpenSSL.org. |
| 446 + * |
| 447 + * 5. Products derived from this software may not be called "OpenSSL" |
| 448 + * nor may "OpenSSL" appear in their names without prior written |
| 449 + * permission of the OpenSSL Project. |
| 450 + * |
| 451 + * 6. Redistributions of any form whatsoever must retain the following |
| 452 + * acknowledgment: |
| 453 + * "This product includes software developed by the OpenSSL Project |
| 454 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" |
| 455 + * |
| 456 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
| 457 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 458 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 459 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
| 460 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 461 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 462 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 463 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 464 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 465 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 466 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 467 + * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 468 + * ==================================================================== |
| 469 + */ |
| 470 + |
| 471 +#include <stdint.h> |
| 472 +#include <string.h> |
| 473 +#include <openssl/opensslconf.h> |
| 474 + |
| 475 +#if !defined(OPENSSL_NO_CHACHA) |
| 476 + |
| 477 +#include <openssl/chacha.h> |
| 478 + |
| 479 +/* sigma contains the ChaCha constants, which happen to be an ASCII string. */ |
| 480 +static const char sigma[16] = "expand 32-byte k"; |
| 481 + |
| 482 +#define ROTATE(v, n) (((v) << (n)) | ((v) >> (32 - (n)))) |
| 483 +#define XOR(v, w) ((v) ^ (w)) |
| 484 +#define PLUS(x, y) ((x) + (y)) |
| 485 +#define PLUSONE(v) (PLUS((v), 1)) |
| 486 + |
| 487 +#define U32TO8_LITTLE(p, v) \ |
| 488 + { (p)[0] = (v >> 0) & 0xff; (p)[1] = (v >> 8) & 0xff; \ |
| 489 + (p)[2] = (v >> 16) & 0xff; (p)[3] = (v >> 24) & 0xff; } |
| 490 +#define U8TO32_LITTLE(p) \ |
| 491 + (((uint32_t)((p)[0]) ) | ((uint32_t)((p)[1]) << 8) | \ |
| 492 + ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24) ) |
| 493 + |
| 494 +/* QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round. */ |
| 495 +#define QUARTERROUND(a,b,c,d) \ |
| 496 + x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \ |
| 497 + x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \ |
| 498 + x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \ |
| 499 + x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7); |
| 500 + |
| 501 +typedef unsigned int uint32_t; |
| 502 + |
| 503 +/* chacha_core performs |num_rounds| rounds of ChaCha20 on the input words in |
| 504 + * |input| and writes the 64 output bytes to |output|. */ |
| 505 +static void chacha_core(unsigned char output[64], const uint32_t input[16], |
| 506 + int num_rounds) |
| 507 + { |
| 508 + uint32_t x[16]; |
| 509 + int i; |
| 510 + |
| 511 + memcpy(x, input, sizeof(uint32_t) * 16); |
| 512 + for (i = 20; i > 0; i -= 2) |
| 513 + { |
| 514 + QUARTERROUND( 0, 4, 8,12) |
| 515 + QUARTERROUND( 1, 5, 9,13) |
| 516 + QUARTERROUND( 2, 6,10,14) |
| 517 + QUARTERROUND( 3, 7,11,15) |
| 518 + QUARTERROUND( 0, 5,10,15) |
| 519 + QUARTERROUND( 1, 6,11,12) |
| 520 + QUARTERROUND( 2, 7, 8,13) |
| 521 + QUARTERROUND( 3, 4, 9,14) |
| 522 + } |
| 523 + |
| 524 + for (i = 0; i < 16; ++i) |
| 525 + x[i] = PLUS(x[i], input[i]); |
| 526 + for (i = 0; i < 16; ++i) |
| 527 + U32TO8_LITTLE(output + 4 * i, x[i]); |
| 528 + } |
| 529 + |
| 530 +void CRYPTO_chacha_20(unsigned char *out, |
| 531 + const unsigned char *in, size_t in_len, |
| 532 + const unsigned char key[32], |
| 533 + const unsigned char nonce[8], |
| 534 + size_t counter) |
| 535 + { |
| 536 + uint32_t input[16]; |
| 537 + unsigned char buf[64]; |
| 538 + size_t todo, i; |
| 539 + |
| 540 + input[0] = U8TO32_LITTLE(sigma + 0); |
| 541 + input[1] = U8TO32_LITTLE(sigma + 4); |
| 542 + input[2] = U8TO32_LITTLE(sigma + 8); |
| 543 + input[3] = U8TO32_LITTLE(sigma + 12); |
| 544 + |
| 545 + input[4] = U8TO32_LITTLE(key + 0); |
| 546 + input[5] = U8TO32_LITTLE(key + 4); |
| 547 + input[6] = U8TO32_LITTLE(key + 8); |
| 548 + input[7] = U8TO32_LITTLE(key + 12); |
| 549 + |
| 550 + input[8] = U8TO32_LITTLE(key + 16); |
| 551 + input[9] = U8TO32_LITTLE(key + 20); |
| 552 + input[10] = U8TO32_LITTLE(key + 24); |
| 553 + input[11] = U8TO32_LITTLE(key + 28); |
| 554 + |
| 555 + input[12] = counter; |
| 556 + input[13] = ((uint64_t) counter) >> 32; |
| 557 + input[14] = U8TO32_LITTLE(nonce + 0); |
| 558 + input[15] = U8TO32_LITTLE(nonce + 4); |
| 559 + |
| 560 + while (in_len > 0) |
| 561 + { |
| 562 + todo = sizeof(buf); |
| 563 + if (in_len < todo) |
| 564 + todo = in_len; |
| 565 + |
| 566 + chacha_core(buf, input, 20); |
| 567 + for (i = 0; i < todo; i++) |
| 568 + out[i] = in[i] ^ buf[i]; |
| 569 + |
| 570 + out += todo; |
| 571 + in += todo; |
| 572 + in_len -= todo; |
| 573 + |
| 574 + input[12]++; |
| 575 + if (input[12] == 0) |
| 576 + input[13]++; |
| 577 + } |
| 578 + } |
| 579 + |
| 580 +#endif /* !OPENSSL_NO_CHACHA */ |
| 581 diff --git a/crypto/chacha/chacha_vec.c b/crypto/chacha/chacha_vec.c |
| 582 new file mode 100644 |
| 583 index 0000000..33b2238 |
| 584 --- /dev/null |
| 585 +++ b/crypto/chacha/chacha_vec.c |
| 586 @@ -0,0 +1,345 @@ |
| 587 +/* ==================================================================== |
| 588 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. |
| 589 + * |
| 590 + * Redistribution and use in source and binary forms, with or without |
| 591 + * modification, are permitted provided that the following conditions |
| 592 + * are met: |
| 593 + * |
| 594 + * 1. Redistributions of source code must retain the above copyright |
| 595 + * notice, this list of conditions and the following disclaimer. |
| 596 + * |
| 597 + * 2. Redistributions in binary form must reproduce the above copyright |
| 598 + * notice, this list of conditions and the following disclaimer in |
| 599 + * the documentation and/or other materials provided with the |
| 600 + * distribution. |
| 601 + * |
| 602 + * 3. All advertising materials mentioning features or use of this |
| 603 + * software must display the following acknowledgment: |
| 604 + * "This product includes software developed by the OpenSSL Project |
| 605 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" |
| 606 + * |
| 607 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
| 608 + * endorse or promote products derived from this software without |
| 609 + * prior written permission. For written permission, please contact |
| 610 + * licensing@OpenSSL.org. |
| 611 + * |
| 612 + * 5. Products derived from this software may not be called "OpenSSL" |
| 613 + * nor may "OpenSSL" appear in their names without prior written |
| 614 + * permission of the OpenSSL Project. |
| 615 + * |
| 616 + * 6. Redistributions of any form whatsoever must retain the following |
| 617 + * acknowledgment: |
| 618 + * "This product includes software developed by the OpenSSL Project |
| 619 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" |
| 620 + * |
| 621 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
| 622 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 623 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 624 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
| 625 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 626 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 627 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 628 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 629 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 630 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 631 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 632 + * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 633 + * ==================================================================== |
| 634 + */ |
| 635 + |
| 636 +/* This implementation is by Ted Krovetz and was submitted to SUPERCOP and |
| 637 + * marked as public domain. It was been altered to allow for non-aligned inputs |
| 638 + * and to allow the block counter to be passed in specifically. */ |
| 639 + |
| 640 +#include <string.h> |
| 641 +#include <stdint.h> |
| 642 +#include <openssl/opensslconf.h> |
| 643 + |
| 644 +#if !defined(OPENSSL_NO_CHACHA) |
| 645 + |
| 646 +#include <openssl/chacha.h> |
| 647 + |
| 648 +#ifndef CHACHA_RNDS |
| 649 +#define CHACHA_RNDS 20 /* 8 (high speed), 20 (conservative), 12 (middle) */ |
| 650 +#endif |
| 651 + |
| 652 +/* Architecture-neutral way to specify 16-byte vector of ints */ |
| 653 +typedef unsigned vec __attribute__ ((vector_size (16))); |
| 654 + |
| 655 +/* This implementation is designed for Neon, SSE and AltiVec machines. The |
| 656 + * following specify how to do certain vector operations efficiently on |
| 657 + * each architecture, using intrinsics. |
| 658 + * This implementation supports parallel processing of multiple blocks, |
| 659 + * including potentially using general-purpose registers. |
| 660 + */ |
| 661 +#if __ARM_NEON__ |
| 662 +#include <arm_neon.h> |
| 663 +#define GPR_TOO 1 |
| 664 +#define VBPI 2 |
| 665 +#define ONE (vec)vsetq_lane_u32(1,vdupq_n_u32(0),0) |
| 666 +#define LOAD(m) (vec)(*((vec*)(m))) |
| 667 +#define STORE(m,r) (*((vec*)(m))) = (r) |
| 668 +#define ROTV1(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,1) |
| 669 +#define ROTV2(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,2) |
| 670 +#define ROTV3(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,3) |
| 671 +#define ROTW16(x) (vec)vrev32q_u16((uint16x8_t)x) |
| 672 +#if __clang__ |
| 673 +#define ROTW7(x) (x << ((vec){ 7, 7, 7, 7})) ^ (x >> ((vec){25,25,25,25})) |
| 674 +#define ROTW8(x) (x << ((vec){ 8, 8, 8, 8})) ^ (x >> ((vec){24,24,24,24})) |
| 675 +#define ROTW12(x) (x << ((vec){12,12,12,12})) ^ (x >> ((vec){20,20,20,20})) |
| 676 +#else |
| 677 +#define ROTW7(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,7),(uint32x4_t)x,2
5) |
| 678 +#define ROTW8(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,8),(uint32x4_t)x,2
4) |
| 679 +#define ROTW12(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,12),(uint32x4_t)x,
20) |
| 680 +#endif |
| 681 +#elif __SSE2__ |
| 682 +#include <emmintrin.h> |
| 683 +#define GPR_TOO 0 |
| 684 +#if __clang__ |
| 685 +#define VBPI 4 |
| 686 +#else |
| 687 +#define VBPI 3 |
| 688 +#endif |
| 689 +#define ONE (vec)_mm_set_epi32(0,0,0,1) |
| 690 +#define LOAD(m) (vec)_mm_loadu_si128((__m128i*)(m)) |
| 691 +#define STORE(m,r) _mm_storeu_si128((__m128i*)(m), (__m128i) (r)) |
| 692 +#define ROTV1(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(0,3,2,1)) |
| 693 +#define ROTV2(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(1,0,3,2)) |
| 694 +#define ROTV3(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(2,1,0,3)) |
| 695 +#define ROTW7(x) (vec)(_mm_slli_epi32((__m128i)x, 7) ^ _mm_srli_epi32((__m128i
)x,25)) |
| 696 +#define ROTW12(x) (vec)(_mm_slli_epi32((__m128i)x,12) ^ _mm_srli_epi32((__m128i
)x,20)) |
| 697 +#if __SSSE3__ |
| 698 +#include <tmmintrin.h> |
| 699 +#define ROTW8(x) (vec)_mm_shuffle_epi8((__m128i)x,_mm_set_epi8(14,13,12,15,10,
9,8,11,6,5,4,7,2,1,0,3)) |
| 700 +#define ROTW16(x) (vec)_mm_shuffle_epi8((__m128i)x,_mm_set_epi8(13,12,15,14,9,8
,11,10,5,4,7,6,1,0,3,2)) |
| 701 +#else |
| 702 +#define ROTW8(x) (vec)(_mm_slli_epi32((__m128i)x, 8) ^ _mm_srli_epi32((__m128i
)x,24)) |
| 703 +#define ROTW16(x) (vec)(_mm_slli_epi32((__m128i)x,16) ^ _mm_srli_epi32((__m128i
)x,16)) |
| 704 +#endif |
| 705 +#else |
| 706 +#error -- Implementation supports only machines with neon or SSE2 |
| 707 +#endif |
| 708 + |
| 709 +#ifndef REVV_BE |
| 710 +#define REVV_BE(x) (x) |
| 711 +#endif |
| 712 + |
| 713 +#ifndef REVW_BE |
| 714 +#define REVW_BE(x) (x) |
| 715 +#endif |
| 716 + |
| 717 +#define BPI (VBPI + GPR_TOO) /* Blocks computed per loop iteration */ |
| 718 + |
| 719 +#define DQROUND_VECTORS(a,b,c,d) \ |
| 720 + a += b; d ^= a; d = ROTW16(d); \ |
| 721 + c += d; b ^= c; b = ROTW12(b); \ |
| 722 + a += b; d ^= a; d = ROTW8(d); \ |
| 723 + c += d; b ^= c; b = ROTW7(b); \ |
| 724 + b = ROTV1(b); c = ROTV2(c); d = ROTV3(d); \ |
| 725 + a += b; d ^= a; d = ROTW16(d); \ |
| 726 + c += d; b ^= c; b = ROTW12(b); \ |
| 727 + a += b; d ^= a; d = ROTW8(d); \ |
| 728 + c += d; b ^= c; b = ROTW7(b); \ |
| 729 + b = ROTV3(b); c = ROTV2(c); d = ROTV1(d); |
| 730 + |
| 731 +#define QROUND_WORDS(a,b,c,d) \ |
| 732 + a = a+b; d ^= a; d = d<<16 | d>>16; \ |
| 733 + c = c+d; b ^= c; b = b<<12 | b>>20; \ |
| 734 + a = a+b; d ^= a; d = d<< 8 | d>>24; \ |
| 735 + c = c+d; b ^= c; b = b<< 7 | b>>25; |
| 736 + |
| 737 +#define WRITE_XOR(in, op, d, v0, v1, v2, v3) \ |
| 738 + STORE(op + d + 0, LOAD(in + d + 0) ^ REVV_BE(v0)); \ |
| 739 + STORE(op + d + 4, LOAD(in + d + 4) ^ REVV_BE(v1)); \ |
| 740 + STORE(op + d + 8, LOAD(in + d + 8) ^ REVV_BE(v2)); \ |
| 741 + STORE(op + d +12, LOAD(in + d +12) ^ REVV_BE(v3)); |
| 742 + |
| 743 +void CRYPTO_chacha_20( |
| 744 + unsigned char *out, |
| 745 + const unsigned char *in, |
| 746 + size_t inlen, |
| 747 + const unsigned char key[32], |
| 748 + const unsigned char nonce[8], |
| 749 + size_t counter) |
| 750 + { |
| 751 + unsigned iters, i, *op=(unsigned *)out, *ip=(unsigned *)in, *kp; |
| 752 +#if defined(__ARM_NEON__) |
| 753 + unsigned *np; |
| 754 +#endif |
| 755 + vec s0, s1, s2, s3; |
| 756 +#if !defined(__ARM_NEON__) && !defined(__SSE2__) |
| 757 + __attribute__ ((aligned (16))) unsigned key[8], nonce[4]; |
| 758 +#endif |
| 759 + __attribute__ ((aligned (16))) unsigned chacha_const[] = |
| 760 + {0x61707865,0x3320646E,0x79622D32,0x6B206574}; |
| 761 +#if defined(__ARM_NEON__) || defined(__SSE2__) |
| 762 + kp = (unsigned *)key; |
| 763 +#else |
| 764 + ((vec *)key)[0] = REVV_BE(((vec *)key)[0]); |
| 765 + ((vec *)key)[1] = REVV_BE(((vec *)key)[1]); |
| 766 + nonce[0] = REVW_BE(((unsigned *)nonce)[0]); |
| 767 + nonce[1] = REVW_BE(((unsigned *)nonce)[1]); |
| 768 + nonce[2] = REVW_BE(((unsigned *)nonce)[2]); |
| 769 + nonce[3] = REVW_BE(((unsigned *)nonce)[3]); |
| 770 + kp = (unsigned *)key; |
| 771 + np = (unsigned *)nonce; |
| 772 +#endif |
| 773 +#if defined(__ARM_NEON__) |
| 774 + np = (unsigned*) nonce; |
| 775 +#endif |
| 776 + s0 = LOAD(chacha_const); |
| 777 + s1 = LOAD(&((vec*)kp)[0]); |
| 778 + s2 = LOAD(&((vec*)kp)[1]); |
| 779 + s3 = (vec){ |
| 780 + counter & 0xffffffff, |
| 781 +#if __ARM_NEON__ |
| 782 + 0, /* can't right-shift 32 bits on a 32-bit system. */ |
| 783 +#else |
| 784 + counter >> 32, |
| 785 +#endif |
| 786 + ((uint32_t*)nonce)[0], |
| 787 + ((uint32_t*)nonce)[1] |
| 788 + }; |
| 789 + |
| 790 + for (iters = 0; iters < inlen/(BPI*64); iters++) |
| 791 + { |
| 792 +#if GPR_TOO |
| 793 + register unsigned x0, x1, x2, x3, x4, x5, x6, x7, x8, |
| 794 + x9, x10, x11, x12, x13, x14, x15; |
| 795 +#endif |
| 796 +#if VBPI > 2 |
| 797 + vec v8,v9,v10,v11; |
| 798 +#endif |
| 799 +#if VBPI > 3 |
| 800 + vec v12,v13,v14,v15; |
| 801 +#endif |
| 802 + |
| 803 + vec v0,v1,v2,v3,v4,v5,v6,v7; |
| 804 + v4 = v0 = s0; v5 = v1 = s1; v6 = v2 = s2; v3 = s3; |
| 805 + v7 = v3 + ONE; |
| 806 +#if VBPI > 2 |
| 807 + v8 = v4; v9 = v5; v10 = v6; |
| 808 + v11 = v7 + ONE; |
| 809 +#endif |
| 810 +#if VBPI > 3 |
| 811 + v12 = v8; v13 = v9; v14 = v10; |
| 812 + v15 = v11 + ONE; |
| 813 +#endif |
| 814 +#if GPR_TOO |
| 815 + x0 = chacha_const[0]; x1 = chacha_const[1]; |
| 816 + x2 = chacha_const[2]; x3 = chacha_const[3]; |
| 817 + x4 = kp[0]; x5 = kp[1]; x6 = kp[2]; x7 = kp[3]; |
| 818 + x8 = kp[4]; x9 = kp[5]; x10 = kp[6]; x11 = kp[7]; |
| 819 + x12 = counter+BPI*iters+(BPI-1); x13 = 0; |
| 820 + x14 = np[0]; x15 = np[1]; |
| 821 +#endif |
| 822 + for (i = CHACHA_RNDS/2; i; i--) |
| 823 + { |
| 824 + DQROUND_VECTORS(v0,v1,v2,v3) |
| 825 + DQROUND_VECTORS(v4,v5,v6,v7) |
| 826 +#if VBPI > 2 |
| 827 + DQROUND_VECTORS(v8,v9,v10,v11) |
| 828 +#endif |
| 829 +#if VBPI > 3 |
| 830 + DQROUND_VECTORS(v12,v13,v14,v15) |
| 831 +#endif |
| 832 +#if GPR_TOO |
| 833 + QROUND_WORDS( x0, x4, x8,x12) |
| 834 + QROUND_WORDS( x1, x5, x9,x13) |
| 835 + QROUND_WORDS( x2, x6,x10,x14) |
| 836 + QROUND_WORDS( x3, x7,x11,x15) |
| 837 + QROUND_WORDS( x0, x5,x10,x15) |
| 838 + QROUND_WORDS( x1, x6,x11,x12) |
| 839 + QROUND_WORDS( x2, x7, x8,x13) |
| 840 + QROUND_WORDS( x3, x4, x9,x14) |
| 841 +#endif |
| 842 + } |
| 843 + |
| 844 + WRITE_XOR(ip, op, 0, v0+s0, v1+s1, v2+s2, v3+s3) |
| 845 + s3 += ONE; |
| 846 + WRITE_XOR(ip, op, 16, v4+s0, v5+s1, v6+s2, v7+s3) |
| 847 + s3 += ONE; |
| 848 +#if VBPI > 2 |
| 849 + WRITE_XOR(ip, op, 32, v8+s0, v9+s1, v10+s2, v11+s3) |
| 850 + s3 += ONE; |
| 851 +#endif |
| 852 +#if VBPI > 3 |
| 853 + WRITE_XOR(ip, op, 48, v12+s0, v13+s1, v14+s2, v15+s3) |
| 854 + s3 += ONE; |
| 855 +#endif |
| 856 + ip += VBPI*16; |
| 857 + op += VBPI*16; |
| 858 +#if GPR_TOO |
| 859 + op[0] = REVW_BE(REVW_BE(ip[0]) ^ (x0 + chacha_const[0])); |
| 860 + op[1] = REVW_BE(REVW_BE(ip[1]) ^ (x1 + chacha_const[1])); |
| 861 + op[2] = REVW_BE(REVW_BE(ip[2]) ^ (x2 + chacha_const[2])); |
| 862 + op[3] = REVW_BE(REVW_BE(ip[3]) ^ (x3 + chacha_const[3])); |
| 863 + op[4] = REVW_BE(REVW_BE(ip[4]) ^ (x4 + kp[0])); |
| 864 + op[5] = REVW_BE(REVW_BE(ip[5]) ^ (x5 + kp[1])); |
| 865 + op[6] = REVW_BE(REVW_BE(ip[6]) ^ (x6 + kp[2])); |
| 866 + op[7] = REVW_BE(REVW_BE(ip[7]) ^ (x7 + kp[3])); |
| 867 + op[8] = REVW_BE(REVW_BE(ip[8]) ^ (x8 + kp[4])); |
| 868 + op[9] = REVW_BE(REVW_BE(ip[9]) ^ (x9 + kp[5])); |
| 869 + op[10] = REVW_BE(REVW_BE(ip[10]) ^ (x10 + kp[6])); |
| 870 + op[11] = REVW_BE(REVW_BE(ip[11]) ^ (x11 + kp[7])); |
| 871 + op[12] = REVW_BE(REVW_BE(ip[12]) ^ (x12 + BPI*iters+(BPI-1))); |
| 872 + op[13] = REVW_BE(REVW_BE(ip[13]) ^ (x13)); |
| 873 + op[14] = REVW_BE(REVW_BE(ip[14]) ^ (x14 + np[0])); |
| 874 + op[15] = REVW_BE(REVW_BE(ip[15]) ^ (x15 + np[1])); |
| 875 + s3 += ONE; |
| 876 + ip += 16; |
| 877 + op += 16; |
| 878 +#endif |
| 879 + } |
| 880 + |
| 881 + for (iters = inlen%(BPI*64)/64; iters != 0; iters--) |
| 882 + { |
| 883 + vec v0 = s0, v1 = s1, v2 = s2, v3 = s3; |
| 884 + for (i = CHACHA_RNDS/2; i; i--) |
| 885 + { |
| 886 + DQROUND_VECTORS(v0,v1,v2,v3); |
| 887 + } |
| 888 + WRITE_XOR(ip, op, 0, v0+s0, v1+s1, v2+s2, v3+s3) |
| 889 + s3 += ONE; |
| 890 + ip += 16; |
| 891 + op += 16; |
| 892 + } |
| 893 + |
| 894 + inlen = inlen % 64; |
| 895 + if (inlen) |
| 896 + { |
| 897 + __attribute__ ((aligned (16))) vec buf[4]; |
| 898 + vec v0,v1,v2,v3; |
| 899 + v0 = s0; v1 = s1; v2 = s2; v3 = s3; |
| 900 + for (i = CHACHA_RNDS/2; i; i--) |
| 901 + { |
| 902 + DQROUND_VECTORS(v0,v1,v2,v3); |
| 903 + } |
| 904 + |
| 905 + if (inlen >= 16) |
| 906 + { |
| 907 + STORE(op + 0, LOAD(ip + 0) ^ REVV_BE(v0 + s0)); |
| 908 + if (inlen >= 32) |
| 909 + { |
| 910 + STORE(op + 4, LOAD(ip + 4) ^ REVV_BE(v1 + s1)); |
| 911 + if (inlen >= 48) |
| 912 + { |
| 913 + STORE(op + 8, LOAD(ip + 8) ^ |
| 914 + REVV_BE(v2 + s2)); |
| 915 + buf[3] = REVV_BE(v3 + s3); |
| 916 + } |
| 917 + else |
| 918 + buf[2] = REVV_BE(v2 + s2); |
| 919 + } |
| 920 + else |
| 921 + buf[1] = REVV_BE(v1 + s1); |
| 922 + } |
| 923 + else |
| 924 + buf[0] = REVV_BE(v0 + s0); |
| 925 + |
| 926 + for (i=inlen & ~15; i<inlen; i++) |
| 927 + ((char *)op)[i] = ((char *)ip)[i] ^ ((char *)buf)[i]; |
| 928 + } |
| 929 + } |
| 930 + |
| 931 +#endif /* !OPENSSL_NO_CHACHA */ |
| 932 diff --git a/crypto/chacha/chachatest.c b/crypto/chacha/chachatest.c |
| 933 new file mode 100644 |
| 934 index 0000000..b2a9389 |
| 935 --- /dev/null |
| 936 +++ b/crypto/chacha/chachatest.c |
| 937 @@ -0,0 +1,211 @@ |
| 938 +/* |
| 939 + * Chacha stream algorithm. |
| 940 + * |
| 941 + * Created on: Jun, 2013 |
| 942 + * Author: Elie Bursztein (elieb@google.com) |
| 943 + * |
| 944 + * Adapted from the estream code by D. Bernstein. |
| 945 + */ |
| 946 +/* ==================================================================== |
| 947 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. |
| 948 + * |
| 949 + * Redistribution and use in source and binary forms, with or without |
| 950 + * modification, are permitted provided that the following conditions |
| 951 + * are met: |
| 952 + * |
| 953 + * 1. Redistributions of source code must retain the above copyright |
| 954 + * notice, this list of conditions and the following disclaimer. |
| 955 + * |
| 956 + * 2. Redistributions in binary form must reproduce the above copyright |
| 957 + * notice, this list of conditions and the following disclaimer in |
| 958 + * the documentation and/or other materials provided with the |
| 959 + * distribution. |
| 960 + * |
| 961 + * 3. All advertising materials mentioning features or use of this |
| 962 + * software must display the following acknowledgment: |
| 963 + * "This product includes software developed by the OpenSSL Project |
| 964 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" |
| 965 + * |
| 966 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
| 967 + * endorse or promote products derived from this software without |
| 968 + * prior written permission. For written permission, please contact |
| 969 + * licensing@OpenSSL.org. |
| 970 + * |
| 971 + * 5. Products derived from this software may not be called "OpenSSL" |
| 972 + * nor may "OpenSSL" appear in their names without prior written |
| 973 + * permission of the OpenSSL Project. |
| 974 + * |
| 975 + * 6. Redistributions of any form whatsoever must retain the following |
| 976 + * acknowledgment: |
| 977 + * "This product includes software developed by the OpenSSL Project |
| 978 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" |
| 979 + * |
| 980 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
| 981 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 982 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 983 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
| 984 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 985 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 986 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 987 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 988 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 989 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 990 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 991 + * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 992 + * ==================================================================== |
| 993 + */ |
| 994 + |
| 995 +#include <stdio.h> |
| 996 +#include <stdlib.h> |
| 997 +#include <string.h> |
| 998 +#include <stdint.h> |
| 999 + |
| 1000 +#include <openssl/chacha.h> |
| 1001 + |
| 1002 +struct chacha_test { |
| 1003 + const char *keyhex; |
| 1004 + const char *noncehex; |
| 1005 + const char *outhex; |
| 1006 +}; |
| 1007 + |
| 1008 +static const struct chacha_test chacha_tests[] = { |
| 1009 + { |
| 1010 + "000000000000000000000000000000000000000000000000000000000000000
0", |
| 1011 + "0000000000000000", |
| 1012 + "76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc
7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee6586", |
| 1013 + }, |
| 1014 + { |
| 1015 + "000000000000000000000000000000000000000000000000000000000000000
1", |
| 1016 + "0000000000000000", |
| 1017 + "4540f05a9f1fb296d7736e7b208e3c96eb4fe1834688d2604f450952ed432d4
1bbe2a0b6ea7566d2a5d1e7e20d42af2c53d792b1c43fea817e9ad275ae546963", |
| 1018 + }, |
| 1019 + { |
| 1020 + "000000000000000000000000000000000000000000000000000000000000000
0", |
| 1021 + "0000000000000001", |
| 1022 + "de9cba7bf3d69ef5e786dc63973f653a0b49e015adbff7134fcb7df13782103
1e85a050278a7084527214f73efc7fa5b5277062eb7a0433e445f41e31afab757", |
| 1023 + }, |
| 1024 + { |
| 1025 + "000000000000000000000000000000000000000000000000000000000000000
0", |
| 1026 + "0100000000000000", |
| 1027 + "ef3fdfd6c61578fbf5cf35bd3dd33b8009631634d21e42ac33960bd138e50d3
2111e4caf237ee53ca8ad6426194a88545ddc497a0b466e7d6bbdb0041b2f586b", |
| 1028 + }, |
| 1029 + { |
| 1030 + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1
f", |
| 1031 + "0001020304050607", |
| 1032 + "f798a189f195e66982105ffb640bb7757f579da31602fc93ec01ac56f85ac3c
134a4547b733b46413042c9440049176905d3be59ea1c53f15916155c2be8241a38008b9a26bc359
41e2444177c8ade6689de95264986d95889fb60e84629c9bd9a5acb1cc118be563eb9b3a4a472f82
e09a7e778492b562ef7130e88dfe031c79db9d4f7c7a899151b9a475032b63fc385245fe054e3dd5
a97a5f576fe064025d3ce042c566ab2c507b138db853e3d6959660996546cc9c4a6eafdc777c040d
70eaf46f76dad3979e5c5360c3317166a1c894c94a371876a94df7628fe4eaaf2ccb27d5aaae0ad7
ad0f9d4b6ad3b54098746d4524d38407a6deb", |
| 1033 + }, |
| 1034 +}; |
| 1035 + |
| 1036 +static unsigned char hex_digit(char h) |
| 1037 + { |
| 1038 + if (h >= '0' && h <= '9') |
| 1039 + return h - '0'; |
| 1040 + else if (h >= 'a' && h <= 'f') |
| 1041 + return h - 'a' + 10; |
| 1042 + else if (h >= 'A' && h <= 'F') |
| 1043 + return h - 'A' + 10; |
| 1044 + else |
| 1045 + abort(); |
| 1046 + } |
| 1047 + |
| 1048 +static void hex_decode(unsigned char *out, const char* hex) |
| 1049 + { |
| 1050 + size_t j = 0; |
| 1051 + |
| 1052 + while (*hex != 0) |
| 1053 + { |
| 1054 + unsigned char v = hex_digit(*hex++); |
| 1055 + v <<= 4; |
| 1056 + v |= hex_digit(*hex++); |
| 1057 + out[j++] = v; |
| 1058 + } |
| 1059 + } |
| 1060 + |
| 1061 +static void hexdump(unsigned char *a, size_t len) |
| 1062 + { |
| 1063 + size_t i; |
| 1064 + |
| 1065 + for (i = 0; i < len; i++) |
| 1066 + printf("%02x", a[i]); |
| 1067 + } |
| 1068 + |
| 1069 +/* misalign returns a pointer that points 0 to 15 bytes into |in| such that the |
| 1070 + * returned pointer has alignment 1 mod 16. */ |
| 1071 +static void* misalign(void* in) |
| 1072 + { |
| 1073 + intptr_t x = (intptr_t) in; |
| 1074 + x += (17 - (x % 16)) % 16; |
| 1075 + return (void*) x; |
| 1076 + } |
| 1077 + |
| 1078 +int main() |
| 1079 + { |
| 1080 + static const unsigned num_tests = |
| 1081 + sizeof(chacha_tests) / sizeof(struct chacha_test); |
| 1082 + unsigned i; |
| 1083 + unsigned char key_bytes[32 + 16]; |
| 1084 + unsigned char nonce_bytes[8 + 16] = {0}; |
| 1085 + |
| 1086 + unsigned char *key = misalign(key_bytes); |
| 1087 + unsigned char *nonce = misalign(nonce_bytes); |
| 1088 + |
| 1089 + for (i = 0; i < num_tests; i++) |
| 1090 + { |
| 1091 + const struct chacha_test *test = &chacha_tests[i]; |
| 1092 + unsigned char *expected, *out_bytes, *zero_bytes, *out, *zeros; |
| 1093 + size_t len = strlen(test->outhex); |
| 1094 + |
| 1095 + if (strlen(test->keyhex) != 32*2 || |
| 1096 + strlen(test->noncehex) != 8*2 || |
| 1097 + (len & 1) == 1) |
| 1098 + return 1; |
| 1099 + |
| 1100 + len /= 2; |
| 1101 + |
| 1102 + hex_decode(key, test->keyhex); |
| 1103 + hex_decode(nonce, test->noncehex); |
| 1104 + |
| 1105 + expected = malloc(len); |
| 1106 + out_bytes = malloc(len+16); |
| 1107 + zero_bytes = malloc(len+16); |
| 1108 + /* Attempt to test unaligned inputs. */ |
| 1109 + out = misalign(out_bytes); |
| 1110 + zeros = misalign(zero_bytes); |
| 1111 + memset(zeros, 0, len); |
| 1112 + |
| 1113 + hex_decode(expected, test->outhex); |
| 1114 + CRYPTO_chacha_20(out, zeros, len, key, nonce, 0); |
| 1115 + |
| 1116 + if (memcmp(out, expected, len) != 0) |
| 1117 + { |
| 1118 + printf("ChaCha20 test #%d failed.\n", i); |
| 1119 + printf("got: "); |
| 1120 + hexdump(out, len); |
| 1121 + printf("\nexpected: "); |
| 1122 + hexdump(expected, len); |
| 1123 + printf("\n"); |
| 1124 + return 1; |
| 1125 + } |
| 1126 + |
| 1127 + /* The last test has a large output. We test whether the |
| 1128 + * counter works as expected by skipping the first 64 bytes of |
| 1129 + * it. */ |
| 1130 + if (i == num_tests - 1) |
| 1131 + { |
| 1132 + CRYPTO_chacha_20(out, zeros, len - 64, key, nonce, 1); |
| 1133 + if (memcmp(out, expected + 64, len - 64) != 0) |
| 1134 + { |
| 1135 + printf("ChaCha20 skip test failed.\n"); |
| 1136 + return 1; |
| 1137 + } |
| 1138 + } |
| 1139 + |
| 1140 + free(expected); |
| 1141 + free(zero_bytes); |
| 1142 + free(out_bytes); |
| 1143 + } |
| 1144 + |
| 1145 + |
| 1146 + printf("PASS\n"); |
| 1147 + return 0; |
| 1148 + } |
| 1149 diff --git a/crypto/evp/Makefile b/crypto/evp/Makefile |
| 1150 index b73038d..86b0504 100644 |
| 1151 --- a/crypto/evp/Makefile |
| 1152 +++ b/crypto/evp/Makefile |
| 1153 @@ -29,7 +29,8 @@ LIBSRC= encode.c digest.c evp_enc.c evp_key.c evp_acnf.c evp_c
nf.c \ |
| 1154 c_all.c c_allc.c c_alld.c evp_lib.c bio_ok.c \ |
| 1155 evp_pkey.c evp_pbe.c p5_crpt.c p5_crpt2.c \ |
| 1156 e_old.c pmeth_lib.c pmeth_fn.c pmeth_gn.c m_sigver.c evp_fips.c \ |
| 1157 - e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c evp_aead.c |
| 1158 + e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c evp_aead.c \ |
| 1159 + e_chacha20poly1305.c |
| 1160 |
| 1161 LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \ |
| 1162 e_des.o e_bf.o e_idea.o e_des3.o e_camellia.o\ |
| 1163 @@ -42,7 +43,7 @@ LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o
evp_cnf.o \ |
| 1164 c_all.o c_allc.o c_alld.o evp_lib.o bio_ok.o \ |
| 1165 evp_pkey.o evp_pbe.o p5_crpt.o p5_crpt2.o \ |
| 1166 e_old.o pmeth_lib.o pmeth_fn.o pmeth_gn.o m_sigver.o evp_fips.o \ |
| 1167 - e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o evp_aead.o |
| 1168 + e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o evp_aead.o e_chacha20poly1305.o |
| 1169 |
| 1170 SRC= $(LIBSRC) |
| 1171 |
| 1172 @@ -239,6 +240,21 @@ e_cast.o: ../../include/openssl/objects.h ../../include/ope
nssl/opensslconf.h |
| 1173 e_cast.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h |
| 1174 e_cast.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
| 1175 e_cast.o: ../../include/openssl/symhacks.h ../cryptlib.h e_cast.c evp_locl.h |
| 1176 +e_chacha20poly1305.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h |
| 1177 +e_chacha20poly1305.o: ../../include/openssl/chacha.h |
| 1178 +e_chacha20poly1305.o: ../../include/openssl/crypto.h |
| 1179 +e_chacha20poly1305.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h |
| 1180 +e_chacha20poly1305.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h |
| 1181 +e_chacha20poly1305.o: ../../include/openssl/obj_mac.h |
| 1182 +e_chacha20poly1305.o: ../../include/openssl/objects.h |
| 1183 +e_chacha20poly1305.o: ../../include/openssl/opensslconf.h |
| 1184 +e_chacha20poly1305.o: ../../include/openssl/opensslv.h |
| 1185 +e_chacha20poly1305.o: ../../include/openssl/ossl_typ.h |
| 1186 +e_chacha20poly1305.o: ../../include/openssl/poly1305.h |
| 1187 +e_chacha20poly1305.o: ../../include/openssl/safestack.h |
| 1188 +e_chacha20poly1305.o: ../../include/openssl/stack.h |
| 1189 +e_chacha20poly1305.o: ../../include/openssl/symhacks.h e_chacha20poly1305.c |
| 1190 +e_chacha20poly1305.o: evp_locl.h |
| 1191 e_des.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h |
| 1192 e_des.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
| 1193 e_des.o: ../../include/openssl/des.h ../../include/openssl/des_old.h |
| 1194 @@ -258,9 +274,10 @@ e_des3.o: ../../include/openssl/evp.h ../../include/openssl
/lhash.h |
| 1195 e_des3.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h |
| 1196 e_des3.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h |
| 1197 e_des3.o: ../../include/openssl/ossl_typ.h ../../include/openssl/rand.h |
| 1198 -e_des3.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
| 1199 -e_des3.o: ../../include/openssl/symhacks.h ../../include/openssl/ui.h |
| 1200 -e_des3.o: ../../include/openssl/ui_compat.h ../cryptlib.h e_des3.c evp_locl.h |
| 1201 +e_des3.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h |
| 1202 +e_des3.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h |
| 1203 +e_des3.o: ../../include/openssl/ui.h ../../include/openssl/ui_compat.h |
| 1204 +e_des3.o: ../cryptlib.h e_des3.c evp_locl.h |
| 1205 e_idea.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h |
| 1206 e_idea.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h |
| 1207 e_idea.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h |
| 1208 @@ -356,6 +373,14 @@ evp_acnf.o: ../../include/openssl/opensslconf.h |
| 1209 evp_acnf.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h |
| 1210 evp_acnf.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
| 1211 evp_acnf.o: ../../include/openssl/symhacks.h ../cryptlib.h evp_acnf.c |
| 1212 +evp_aead.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h |
| 1213 +evp_aead.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h |
| 1214 +evp_aead.o: ../../include/openssl/err.h ../../include/openssl/evp.h |
| 1215 +evp_aead.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h |
| 1216 +evp_aead.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h |
| 1217 +evp_aead.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h |
| 1218 +evp_aead.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h |
| 1219 +evp_aead.o: ../../include/openssl/symhacks.h evp_aead.c |
| 1220 evp_cnf.o: ../../e_os.h ../../include/openssl/asn1.h |
| 1221 evp_cnf.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h |
| 1222 evp_cnf.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h |
| 1223 diff --git a/crypto/evp/e_chacha20poly1305.c b/crypto/evp/e_chacha20poly1305.c |
| 1224 new file mode 100644 |
| 1225 index 0000000..1c0c0fb |
| 1226 --- /dev/null |
| 1227 +++ b/crypto/evp/e_chacha20poly1305.c |
| 1228 @@ -0,0 +1,261 @@ |
| 1229 +/* ==================================================================== |
| 1230 + * Copyright (c) 2013 The OpenSSL Project. All rights reserved. |
| 1231 + * |
| 1232 + * Redistribution and use in source and binary forms, with or without |
| 1233 + * modification, are permitted provided that the following conditions |
| 1234 + * are met: |
| 1235 + * |
| 1236 + * 1. Redistributions of source code must retain the above copyright |
| 1237 + * notice, this list of conditions and the following disclaimer. |
| 1238 + * |
| 1239 + * 2. Redistributions in binary form must reproduce the above copyright |
| 1240 + * notice, this list of conditions and the following disclaimer in |
| 1241 + * the documentation and/or other materials provided with the |
| 1242 + * distribution. |
| 1243 + * |
| 1244 + * 3. All advertising materials mentioning features or use of this |
| 1245 + * software must display the following acknowledgment: |
| 1246 + * "This product includes software developed by the OpenSSL Project |
| 1247 + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" |
| 1248 + * |
| 1249 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
| 1250 + * endorse or promote products derived from this software without |
| 1251 + * prior written permission. For written permission, please contact |
| 1252 + * openssl-core@openssl.org. |
| 1253 + * |
| 1254 + * 5. Products derived from this software may not be called "OpenSSL" |
| 1255 + * nor may "OpenSSL" appear in their names without prior written |
| 1256 + * permission of the OpenSSL Project. |
| 1257 + * |
| 1258 + * 6. Redistributions of any form whatsoever must retain the following |
| 1259 + * acknowledgment: |
| 1260 + * "This product includes software developed by the OpenSSL Project |
| 1261 + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" |
| 1262 + * |
| 1263 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
| 1264 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 1265 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 1266 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
| 1267 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 1268 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 1269 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 1270 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 1271 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 1272 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 1273 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 1274 + * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 1275 + * ==================================================================== |
| 1276 + * |
| 1277 + */ |
| 1278 + |
| 1279 +#include <stdint.h> |
| 1280 +#include <string.h> |
| 1281 +#include <openssl/opensslconf.h> |
| 1282 + |
| 1283 +#if !defined(OPENSSL_NO_CHACHA) && !defined(OPENSSL_NO_POLY1305) |
| 1284 + |
| 1285 +#include <openssl/chacha.h> |
| 1286 +#include <openssl/poly1305.h> |
| 1287 +#include <openssl/evp.h> |
| 1288 +#include <openssl/err.h> |
| 1289 +#include "evp_locl.h" |
| 1290 + |
| 1291 +#define POLY1305_TAG_LEN 16 |
| 1292 +#define CHACHA20_NONCE_LEN 8 |
| 1293 + |
| 1294 +struct aead_chacha20_poly1305_ctx |
| 1295 + { |
| 1296 + unsigned char key[32]; |
| 1297 + unsigned char tag_len; |
| 1298 + }; |
| 1299 + |
| 1300 +static int aead_chacha20_poly1305_init(EVP_AEAD_CTX *ctx, const unsigned char *
key, size_t key_len, size_t tag_len) |
| 1301 + { |
| 1302 + struct aead_chacha20_poly1305_ctx *c20_ctx; |
| 1303 + |
| 1304 + if (tag_len == 0) |
| 1305 + tag_len = POLY1305_TAG_LEN; |
| 1306 + |
| 1307 + if (tag_len > POLY1305_TAG_LEN) |
| 1308 + { |
| 1309 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_INIT, EVP_R_TOO_LARGE); |
| 1310 + return 0; |
| 1311 + } |
| 1312 + |
| 1313 + if (key_len != sizeof(c20_ctx->key)) |
| 1314 + return 0; /* internal error - EVP_AEAD_CTX_init should catch th
is. */ |
| 1315 + |
| 1316 + c20_ctx = OPENSSL_malloc(sizeof(struct aead_chacha20_poly1305_ctx)); |
| 1317 + if (c20_ctx == NULL) |
| 1318 + return 0; |
| 1319 + |
| 1320 + memcpy(&c20_ctx->key[0], key, key_len); |
| 1321 + c20_ctx->tag_len = tag_len; |
| 1322 + ctx->aead_state = c20_ctx; |
| 1323 + |
| 1324 + return 1; |
| 1325 + } |
| 1326 + |
| 1327 +static void aead_chacha20_poly1305_cleanup(EVP_AEAD_CTX *ctx) |
| 1328 + { |
| 1329 + struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state; |
| 1330 + OPENSSL_cleanse(c20_ctx->key, sizeof(c20_ctx->key)); |
| 1331 + OPENSSL_free(c20_ctx); |
| 1332 + } |
| 1333 + |
| 1334 +static void poly1305_update_with_length(poly1305_state *poly1305, |
| 1335 + const unsigned char *data, size_t data_len) |
| 1336 + { |
| 1337 + size_t j = data_len; |
| 1338 + unsigned char length_bytes[8]; |
| 1339 + unsigned i; |
| 1340 + |
| 1341 + for (i = 0; i < sizeof(length_bytes); i++) |
| 1342 + { |
| 1343 + length_bytes[i] = j; |
| 1344 + j >>= 8; |
| 1345 + } |
| 1346 + |
| 1347 + CRYPTO_poly1305_update(poly1305, data, data_len); |
| 1348 + CRYPTO_poly1305_update(poly1305, length_bytes, sizeof(length_bytes)); |
| 1349 +} |
| 1350 + |
| 1351 +static ssize_t aead_chacha20_poly1305_seal(const EVP_AEAD_CTX *ctx, |
| 1352 + unsigned char *out, size_t max_out_len, |
| 1353 + const unsigned char *nonce, size_t nonce_len, |
| 1354 + const unsigned char *in, size_t in_len, |
| 1355 + const unsigned char *ad, size_t ad_len) |
| 1356 + { |
| 1357 + const struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state; |
| 1358 + unsigned char poly1305_key[32]; |
| 1359 + poly1305_state poly1305; |
| 1360 + const uint64_t in_len_64 = in_len; |
| 1361 + |
| 1362 + /* The underlying ChaCha implementation may not overflow the block |
| 1363 + * counter into the second counter word. Therefore we disallow |
| 1364 + * individual operations that work on more than 2TB at a time. |
| 1365 + * |in_len_64| is needed because, on 32-bit platforms, size_t is only |
| 1366 + * 32-bits and this produces a warning because it's always false. |
| 1367 + * Casting to uint64_t inside the conditional is not sufficient to stop |
| 1368 + * the warning. */ |
| 1369 + if (in_len_64 >= (1ull << 32)*64-64) |
| 1370 + { |
| 1371 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_TOO_LARGE); |
| 1372 + return -1; |
| 1373 + } |
| 1374 + |
| 1375 + if (max_out_len < in_len + c20_ctx->tag_len) |
| 1376 + { |
| 1377 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_BUFFER_TOO_SMALL
); |
| 1378 + return -1; |
| 1379 + } |
| 1380 + |
| 1381 + if (nonce_len != CHACHA20_NONCE_LEN) |
| 1382 + { |
| 1383 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_IV_TOO_LARGE); |
| 1384 + return -1; |
| 1385 + } |
| 1386 + |
| 1387 + memset(poly1305_key, 0, sizeof(poly1305_key)); |
| 1388 + CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key), c20_c
tx->key, nonce, 0); |
| 1389 + |
| 1390 + CRYPTO_poly1305_init(&poly1305, poly1305_key); |
| 1391 + poly1305_update_with_length(&poly1305, ad, ad_len); |
| 1392 + CRYPTO_chacha_20(out, in, in_len, c20_ctx->key, nonce, 1); |
| 1393 + poly1305_update_with_length(&poly1305, out, in_len); |
| 1394 + |
| 1395 + if (c20_ctx->tag_len != POLY1305_TAG_LEN) |
| 1396 + { |
| 1397 + unsigned char tag[POLY1305_TAG_LEN]; |
| 1398 + CRYPTO_poly1305_finish(&poly1305, tag); |
| 1399 + memcpy(out + in_len, tag, c20_ctx->tag_len); |
| 1400 + return in_len + c20_ctx->tag_len; |
| 1401 + } |
| 1402 + |
| 1403 + CRYPTO_poly1305_finish(&poly1305, out + in_len); |
| 1404 + return in_len + POLY1305_TAG_LEN; |
| 1405 + } |
| 1406 + |
| 1407 +static ssize_t aead_chacha20_poly1305_open(const EVP_AEAD_CTX *ctx, |
| 1408 + unsigned char *out, size_t max_out_len, |
| 1409 + const unsigned char *nonce, size_t nonce_len, |
| 1410 + const unsigned char *in, size_t in_len, |
| 1411 + const unsigned char *ad, size_t ad_len) |
| 1412 + { |
| 1413 + const struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state; |
| 1414 + unsigned char mac[POLY1305_TAG_LEN]; |
| 1415 + unsigned char poly1305_key[32]; |
| 1416 + size_t out_len; |
| 1417 + poly1305_state poly1305; |
| 1418 + const uint64_t in_len_64 = in_len; |
| 1419 + |
| 1420 + if (in_len < c20_ctx->tag_len) |
| 1421 + { |
| 1422 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BAD_DECRYPT); |
| 1423 + return -1; |
| 1424 + } |
| 1425 + |
| 1426 + /* The underlying ChaCha implementation may not overflow the block |
| 1427 + * counter into the second counter word. Therefore we disallow |
| 1428 + * individual operations that work on more than 2TB at a time. |
| 1429 + * |in_len_64| is needed because, on 32-bit platforms, size_t is only |
| 1430 + * 32-bits and this produces a warning because it's always false. |
| 1431 + * Casting to uint64_t inside the conditional is not sufficient to stop |
| 1432 + * the warning. */ |
| 1433 + if (in_len_64 >= (1ull << 32)*64-64) |
| 1434 + { |
| 1435 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_TOO_LARGE); |
| 1436 + return -1; |
| 1437 + } |
| 1438 + |
| 1439 + if (nonce_len != CHACHA20_NONCE_LEN) |
| 1440 + { |
| 1441 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_IV_TOO_LARGE); |
| 1442 + return -1; |
| 1443 + } |
| 1444 + |
| 1445 + out_len = in_len - c20_ctx->tag_len; |
| 1446 + |
| 1447 + if (max_out_len < out_len) |
| 1448 + { |
| 1449 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BUFFER_TOO_SMALL
); |
| 1450 + return -1; |
| 1451 + } |
| 1452 + |
| 1453 + memset(poly1305_key, 0, sizeof(poly1305_key)); |
| 1454 + CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key), c20_c
tx->key, nonce, 0); |
| 1455 + |
| 1456 + CRYPTO_poly1305_init(&poly1305, poly1305_key); |
| 1457 + poly1305_update_with_length(&poly1305, ad, ad_len); |
| 1458 + poly1305_update_with_length(&poly1305, in, out_len); |
| 1459 + CRYPTO_poly1305_finish(&poly1305, mac); |
| 1460 + |
| 1461 + if (CRYPTO_memcmp(mac, in + out_len, c20_ctx->tag_len) != 0) |
| 1462 + { |
| 1463 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BAD_DECRYPT); |
| 1464 + return -1; |
| 1465 + } |
| 1466 + |
| 1467 + CRYPTO_chacha_20(out, in, out_len, c20_ctx->key, nonce, 1); |
| 1468 + return out_len; |
| 1469 + } |
| 1470 + |
| 1471 +static const EVP_AEAD aead_chacha20_poly1305 = |
| 1472 + { |
| 1473 + 32, /* key len */ |
| 1474 + CHACHA20_NONCE_LEN, /* nonce len */ |
| 1475 + POLY1305_TAG_LEN, /* overhead */ |
| 1476 + POLY1305_TAG_LEN, /* max tag length */ |
| 1477 + |
| 1478 + aead_chacha20_poly1305_init, |
| 1479 + aead_chacha20_poly1305_cleanup, |
| 1480 + aead_chacha20_poly1305_seal, |
| 1481 + aead_chacha20_poly1305_open, |
| 1482 + }; |
| 1483 + |
| 1484 +const EVP_AEAD *EVP_aead_chacha20_poly1305() |
| 1485 + { |
| 1486 + return &aead_chacha20_poly1305; |
| 1487 + } |
| 1488 + |
| 1489 +#endif /* !OPENSSL_NO_CHACHA && !OPENSSL_NO_POLY1305 */ |
| 1490 diff --git a/crypto/evp/evp.h b/crypto/evp/evp.h |
| 1491 index bd10642..7dc1656 100644 |
| 1492 --- a/crypto/evp/evp.h |
| 1493 +++ b/crypto/evp/evp.h |
| 1494 @@ -1258,6 +1258,11 @@ typedef struct evp_aead_st EVP_AEAD; |
| 1495 const EVP_AEAD *EVP_aead_aes_128_gcm(void); |
| 1496 #endif |
| 1497 |
| 1498 +#if !defined(OPENSSL_NO_CHACHA) && !defined(OPENSSL_NO_POLY1305) |
| 1499 +/* EVP_aead_chacha20_poly1305 is ChaCha20 with a Poly1305 authenticator. */ |
| 1500 +const EVP_AEAD *EVP_aead_chacha20_poly1305(void); |
| 1501 +#endif |
| 1502 + |
| 1503 /* EVP_AEAD_key_length returns the length, in bytes, of the keys used by |
| 1504 * |aead|. */ |
| 1505 size_t EVP_AEAD_key_length(const EVP_AEAD *aead); |
| 1506 @@ -1360,6 +1365,9 @@ void ERR_load_EVP_strings(void); |
| 1507 #define EVP_F_AEAD_AES_128_GCM_INIT 183 |
| 1508 #define EVP_F_AEAD_AES_128_GCM_OPEN 181 |
| 1509 #define EVP_F_AEAD_AES_128_GCM_SEAL 182 |
| 1510 +#define EVP_F_AEAD_CHACHA20_POLY1305_INIT 187 |
| 1511 +#define EVP_F_AEAD_CHACHA20_POLY1305_OPEN 184 |
| 1512 +#define EVP_F_AEAD_CHACHA20_POLY1305_SEAL 183 |
| 1513 #define EVP_F_AEAD_CTX_OPEN 185 |
| 1514 #define EVP_F_AEAD_CTX_SEAL 186 |
| 1515 #define EVP_F_AESNI_INIT_KEY 165 |
| 1516 diff --git a/crypto/evp/evp_err.c b/crypto/evp/evp_err.c |
| 1517 index c47969c..fb747e5 100644 |
| 1518 --- a/crypto/evp/evp_err.c |
| 1519 +++ b/crypto/evp/evp_err.c |
| 1520 @@ -73,6 +73,9 @@ static ERR_STRING_DATA EVP_str_functs[]= |
| 1521 {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_INIT), "AEAD_AES_128_GCM_INIT"}, |
| 1522 {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_OPEN), "AEAD_AES_128_GCM_OPEN"}, |
| 1523 {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_SEAL), "AEAD_AES_128_GCM_SEAL"}, |
| 1524 +{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_INIT), "AEAD_CHACHA20_POLY1305_INIT"}, |
| 1525 +{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_OPEN), "AEAD_CHACHA20_POLY1305_OPEN"}, |
| 1526 +{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_SEAL), "AEAD_CHACHA20_POLY1305_SEAL"}, |
| 1527 {ERR_FUNC(EVP_F_AEAD_CTX_OPEN), "AEAD_CTX_OPEN"}, |
| 1528 {ERR_FUNC(EVP_F_AEAD_CTX_SEAL), "AEAD_CTX_SEAL"}, |
| 1529 {ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"}, |
| 1530 diff --git a/crypto/poly1305/Makefile b/crypto/poly1305/Makefile |
| 1531 new file mode 100644 |
| 1532 index 0000000..397d7cd |
| 1533 --- /dev/null |
| 1534 +++ b/crypto/poly1305/Makefile |
| 1535 @@ -0,0 +1,81 @@ |
| 1536 +# |
| 1537 +# OpenSSL/crypto/poly1305/Makefile |
| 1538 +# |
| 1539 + |
| 1540 +DIR= poly1305 |
| 1541 +TOP= ../.. |
| 1542 +CC= cc |
| 1543 +CPP= $(CC) -E |
| 1544 +INCLUDES= |
| 1545 +CFLAG=-g |
| 1546 +AR= ar r |
| 1547 + |
| 1548 +POLY1305=poly1305_vec.o |
| 1549 + |
| 1550 +CFLAGS= $(INCLUDES) $(CFLAG) |
| 1551 +ASFLAGS= $(INCLUDES) $(ASFLAG) |
| 1552 +AFLAGS= $(ASFLAGS) |
| 1553 + |
| 1554 +GENERAL=Makefile |
| 1555 +TEST= |
| 1556 +APPS= |
| 1557 + |
| 1558 +LIB=$(TOP)/libcrypto.a |
| 1559 +LIBSRC=poly1305_vec.c |
| 1560 +LIBOBJ=$(POLY1305) |
| 1561 + |
| 1562 +SRC= $(LIBSRC) |
| 1563 + |
| 1564 +EXHEADER=poly1305.h |
| 1565 +HEADER= $(EXHEADER) |
| 1566 + |
| 1567 +ALL= $(GENERAL) $(SRC) $(HEADER) |
| 1568 + |
| 1569 +top: |
| 1570 + (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all) |
| 1571 + |
| 1572 +all: lib |
| 1573 + |
| 1574 +lib: $(LIBOBJ) |
| 1575 + $(AR) $(LIB) $(LIBOBJ) |
| 1576 + $(RANLIB) $(LIB) || echo Never mind. |
| 1577 + @touch lib |
| 1578 + |
| 1579 +files: |
| 1580 + $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO |
| 1581 + |
| 1582 +links: |
| 1583 + @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER) |
| 1584 + @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST) |
| 1585 + @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS) |
| 1586 + |
| 1587 +install: |
| 1588 + @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile... |
| 1589 + @headerlist="$(EXHEADER)"; for i in $$headerlist ; \ |
| 1590 + do \ |
| 1591 + (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \ |
| 1592 + chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \ |
| 1593 + done; |
| 1594 + |
| 1595 +tags: |
| 1596 + ctags $(SRC) |
| 1597 + |
| 1598 +tests: |
| 1599 + |
| 1600 +lint: |
| 1601 + lint -DLINT $(INCLUDES) $(SRC)>fluff |
| 1602 + |
| 1603 +depend: |
| 1604 + @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... |
| 1605 + $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) |
| 1606 + |
| 1607 +dclean: |
| 1608 + $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKE
FILE) >Makefile.new |
| 1609 + mv -f Makefile.new $(MAKEFILE) |
| 1610 + |
| 1611 +clean: |
| 1612 + rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff |
| 1613 + |
| 1614 +# DO NOT DELETE THIS LINE -- make depend depends on it. |
| 1615 + |
| 1616 +poly1305_vec.o: ../../include/openssl/poly1305.h poly1305_vec.c |
| 1617 diff --git a/crypto/poly1305/poly1305.c b/crypto/poly1305/poly1305.c |
| 1618 new file mode 100644 |
| 1619 index 0000000..2e5621d |
| 1620 --- /dev/null |
| 1621 +++ b/crypto/poly1305/poly1305.c |
| 1622 @@ -0,0 +1,320 @@ |
| 1623 +/* ==================================================================== |
| 1624 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. |
| 1625 + * |
| 1626 + * Redistribution and use in source and binary forms, with or without |
| 1627 + * modification, are permitted provided that the following conditions |
| 1628 + * are met: |
| 1629 + * |
| 1630 + * 1. Redistributions of source code must retain the above copyright |
| 1631 + * notice, this list of conditions and the following disclaimer. |
| 1632 + * |
| 1633 + * 2. Redistributions in binary form must reproduce the above copyright |
| 1634 + * notice, this list of conditions and the following disclaimer in |
| 1635 + * the documentation and/or other materials provided with the |
| 1636 + * distribution. |
| 1637 + * |
| 1638 + * 3. All advertising materials mentioning features or use of this |
| 1639 + * software must display the following acknowledgment: |
| 1640 + * "This product includes software developed by the OpenSSL Project |
| 1641 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" |
| 1642 + * |
| 1643 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
| 1644 + * endorse or promote products derived from this software without |
| 1645 + * prior written permission. For written permission, please contact |
| 1646 + * licensing@OpenSSL.org. |
| 1647 + * |
| 1648 + * 5. Products derived from this software may not be called "OpenSSL" |
| 1649 + * nor may "OpenSSL" appear in their names without prior written |
| 1650 + * permission of the OpenSSL Project. |
| 1651 + * |
| 1652 + * 6. Redistributions of any form whatsoever must retain the following |
| 1653 + * acknowledgment: |
| 1654 + * "This product includes software developed by the OpenSSL Project |
| 1655 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" |
| 1656 + * |
| 1657 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
| 1658 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 1659 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 1660 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
| 1661 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 1662 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 1663 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 1664 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 1665 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 1666 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 1667 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 1668 + * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 1669 + * ==================================================================== |
| 1670 + */ |
| 1671 + |
| 1672 +/* This implementation of poly1305 is by Andrew Moon |
| 1673 + * (https://github.com/floodyberry/poly1305-donna) and released as public |
| 1674 + * domain. */ |
| 1675 + |
| 1676 +#include <string.h> |
| 1677 +#include <stdint.h> |
| 1678 +#include <openssl/opensslconf.h> |
| 1679 + |
| 1680 +#if !defined(OPENSSL_NO_POLY1305) |
| 1681 + |
| 1682 +#include <openssl/poly1305.h> |
| 1683 + |
| 1684 +#if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_
64__) |
| 1685 +/* We can assume little-endian. */ |
| 1686 +static uint32_t U8TO32_LE(const unsigned char *m) |
| 1687 + { |
| 1688 + uint32_t r; |
| 1689 + memcpy(&r, m, sizeof(r)); |
| 1690 + return r; |
| 1691 + } |
| 1692 + |
| 1693 +static void U32TO8_LE(unsigned char *m, uint32_t v) |
| 1694 + { |
| 1695 + memcpy(m, &v, sizeof(v)); |
| 1696 + } |
| 1697 +#else |
| 1698 +static uint32_t U8TO32_LE(const unsigned char *m) |
| 1699 + { |
| 1700 + return (uint32_t)m[0] | |
| 1701 + (uint32_t)m[1] << 8 | |
| 1702 + (uint32_t)m[2] << 16 | |
| 1703 + (uint32_t)m[3] << 24; |
| 1704 + } |
| 1705 + |
| 1706 +static void U32TO8_LE(unsigned char *m, uint32_t v) |
| 1707 + { |
| 1708 + m[0] = v; |
| 1709 + m[1] = v >> 8; |
| 1710 + m[2] = v >> 16; |
| 1711 + m[3] = v >> 24; |
| 1712 + } |
| 1713 +#endif |
| 1714 + |
| 1715 +static uint64_t |
| 1716 +mul32x32_64(uint32_t a, uint32_t b) |
| 1717 + { |
| 1718 + return (uint64_t)a * b; |
| 1719 + } |
| 1720 + |
| 1721 + |
| 1722 +struct poly1305_state_st |
| 1723 + { |
| 1724 + uint32_t r0,r1,r2,r3,r4; |
| 1725 + uint32_t s1,s2,s3,s4; |
| 1726 + uint32_t h0,h1,h2,h3,h4; |
| 1727 + unsigned char buf[16]; |
| 1728 + unsigned int buf_used; |
| 1729 + unsigned char key[16]; |
| 1730 + }; |
| 1731 + |
| 1732 +/* poly1305_blocks updates |state| given some amount of input data. This |
| 1733 + * function may only be called with a |len| that is not a multiple of 16 at the |
| 1734 + * end of the data. Otherwise the input must be buffered into 16 byte blocks. |
| 1735 + * */ |
| 1736 +static void poly1305_update(struct poly1305_state_st *state, |
| 1737 + const unsigned char *in, size_t len) |
| 1738 + { |
| 1739 + uint32_t t0,t1,t2,t3; |
| 1740 + uint64_t t[5]; |
| 1741 + uint32_t b; |
| 1742 + uint64_t c; |
| 1743 + size_t j; |
| 1744 + unsigned char mp[16]; |
| 1745 + |
| 1746 + if (len < 16) |
| 1747 + goto poly1305_donna_atmost15bytes; |
| 1748 + |
| 1749 +poly1305_donna_16bytes: |
| 1750 + t0 = U8TO32_LE(in); |
| 1751 + t1 = U8TO32_LE(in+4); |
| 1752 + t2 = U8TO32_LE(in+8); |
| 1753 + t3 = U8TO32_LE(in+12); |
| 1754 + |
| 1755 + in += 16; |
| 1756 + len -= 16; |
| 1757 + |
| 1758 + state->h0 += t0 & 0x3ffffff; |
| 1759 + state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; |
| 1760 + state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; |
| 1761 + state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; |
| 1762 + state->h4 += (t3 >> 8) | (1 << 24); |
| 1763 + |
| 1764 +poly1305_donna_mul: |
| 1765 + t[0] = mul32x32_64(state->h0,state->r0) + |
| 1766 + mul32x32_64(state->h1,state->s4) + |
| 1767 + mul32x32_64(state->h2,state->s3) + |
| 1768 + mul32x32_64(state->h3,state->s2) + |
| 1769 + mul32x32_64(state->h4,state->s1); |
| 1770 + t[1] = mul32x32_64(state->h0,state->r1) + |
| 1771 + mul32x32_64(state->h1,state->r0) + |
| 1772 + mul32x32_64(state->h2,state->s4) + |
| 1773 + mul32x32_64(state->h3,state->s3) + |
| 1774 + mul32x32_64(state->h4,state->s2); |
| 1775 + t[2] = mul32x32_64(state->h0,state->r2) + |
| 1776 + mul32x32_64(state->h1,state->r1) + |
| 1777 + mul32x32_64(state->h2,state->r0) + |
| 1778 + mul32x32_64(state->h3,state->s4) + |
| 1779 + mul32x32_64(state->h4,state->s3); |
| 1780 + t[3] = mul32x32_64(state->h0,state->r3) + |
| 1781 + mul32x32_64(state->h1,state->r2) + |
| 1782 + mul32x32_64(state->h2,state->r1) + |
| 1783 + mul32x32_64(state->h3,state->r0) + |
| 1784 + mul32x32_64(state->h4,state->s4); |
| 1785 + t[4] = mul32x32_64(state->h0,state->r4) + |
| 1786 + mul32x32_64(state->h1,state->r3) + |
| 1787 + mul32x32_64(state->h2,state->r2) + |
| 1788 + mul32x32_64(state->h3,state->r1) + |
| 1789 + mul32x32_64(state->h4,state->r0); |
| 1790 + |
| 1791 + state->h0 = (uint32_t)t[0] & 0x3ffffff; c = (t[0] >
> 26); |
| 1792 + t[1] += c; state->h1 = (uint32_t)t[1] & 0x3ffffff; b = (uint32_t)(t[1] >
> 26); |
| 1793 + t[2] += b; state->h2 = (uint32_t)t[2] & 0x3ffffff; b = (uint32_t)(t[2] >
> 26); |
| 1794 + t[3] += b; state->h3 = (uint32_t)t[3] & 0x3ffffff; b = (uint32_t)(t[3] >
> 26); |
| 1795 + t[4] += b; state->h4 = (uint32_t)t[4] & 0x3ffffff; b = (uint32_t)(t[4] >
> 26); |
| 1796 + state->h0 += b * 5; |
| 1797 + |
| 1798 + if (len >= 16) |
| 1799 + goto poly1305_donna_16bytes; |
| 1800 + |
| 1801 + /* final bytes */ |
| 1802 +poly1305_donna_atmost15bytes: |
| 1803 + if (!len) |
| 1804 + return; |
| 1805 + |
| 1806 + for (j = 0; j < len; j++) |
| 1807 + mp[j] = in[j]; |
| 1808 + mp[j++] = 1; |
| 1809 + for (; j < 16; j++) |
| 1810 + mp[j] = 0; |
| 1811 + len = 0; |
| 1812 + |
| 1813 + t0 = U8TO32_LE(mp+0); |
| 1814 + t1 = U8TO32_LE(mp+4); |
| 1815 + t2 = U8TO32_LE(mp+8); |
| 1816 + t3 = U8TO32_LE(mp+12); |
| 1817 + |
| 1818 + state->h0 += t0 & 0x3ffffff; |
| 1819 + state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; |
| 1820 + state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; |
| 1821 + state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; |
| 1822 + state->h4 += (t3 >> 8); |
| 1823 + |
| 1824 + goto poly1305_donna_mul; |
| 1825 + } |
| 1826 + |
| 1827 +void CRYPTO_poly1305_init(poly1305_state *statep, const unsigned char key[32]) |
| 1828 + { |
| 1829 + struct poly1305_state_st *state = (struct poly1305_state_st*) statep; |
| 1830 + uint32_t t0,t1,t2,t3; |
| 1831 + |
| 1832 + t0 = U8TO32_LE(key+0); |
| 1833 + t1 = U8TO32_LE(key+4); |
| 1834 + t2 = U8TO32_LE(key+8); |
| 1835 + t3 = U8TO32_LE(key+12); |
| 1836 + |
| 1837 + /* precompute multipliers */ |
| 1838 + state->r0 = t0 & 0x3ffffff; t0 >>= 26; t0 |= t1 << 6; |
| 1839 + state->r1 = t0 & 0x3ffff03; t1 >>= 20; t1 |= t2 << 12; |
| 1840 + state->r2 = t1 & 0x3ffc0ff; t2 >>= 14; t2 |= t3 << 18; |
| 1841 + state->r3 = t2 & 0x3f03fff; t3 >>= 8; |
| 1842 + state->r4 = t3 & 0x00fffff; |
| 1843 + |
| 1844 + state->s1 = state->r1 * 5; |
| 1845 + state->s2 = state->r2 * 5; |
| 1846 + state->s3 = state->r3 * 5; |
| 1847 + state->s4 = state->r4 * 5; |
| 1848 + |
| 1849 + /* init state */ |
| 1850 + state->h0 = 0; |
| 1851 + state->h1 = 0; |
| 1852 + state->h2 = 0; |
| 1853 + state->h3 = 0; |
| 1854 + state->h4 = 0; |
| 1855 + |
| 1856 + state->buf_used = 0; |
| 1857 + memcpy(state->key, key + 16, sizeof(state->key)); |
| 1858 + } |
| 1859 + |
| 1860 +void CRYPTO_poly1305_update(poly1305_state *statep, const unsigned char *in, |
| 1861 + size_t in_len) |
| 1862 + { |
| 1863 + unsigned int i; |
| 1864 + struct poly1305_state_st *state = (struct poly1305_state_st*) statep; |
| 1865 + |
| 1866 + if (state->buf_used) |
| 1867 + { |
| 1868 + unsigned int todo = 16 - state->buf_used; |
| 1869 + if (todo > in_len) |
| 1870 + todo = in_len; |
| 1871 + for (i = 0; i < todo; i++) |
| 1872 + state->buf[state->buf_used + i] = in[i]; |
| 1873 + state->buf_used += todo; |
| 1874 + in_len -= todo; |
| 1875 + in += todo; |
| 1876 + |
| 1877 + if (state->buf_used == 16) |
| 1878 + { |
| 1879 + poly1305_update(state, state->buf, 16); |
| 1880 + state->buf_used = 0; |
| 1881 + } |
| 1882 + } |
| 1883 + |
| 1884 + if (in_len >= 16) |
| 1885 + { |
| 1886 + size_t todo = in_len & ~0xf; |
| 1887 + poly1305_update(state, in, todo); |
| 1888 + in += todo; |
| 1889 + in_len &= 0xf; |
| 1890 + } |
| 1891 + |
| 1892 + if (in_len) |
| 1893 + { |
| 1894 + for (i = 0; i < in_len; i++) |
| 1895 + state->buf[i] = in[i]; |
| 1896 + state->buf_used = in_len; |
| 1897 + } |
| 1898 + } |
| 1899 + |
| 1900 +void CRYPTO_poly1305_finish(poly1305_state *statep, unsigned char mac[16]) |
| 1901 + { |
| 1902 + struct poly1305_state_st *state = (struct poly1305_state_st*) statep; |
| 1903 + uint64_t f0,f1,f2,f3; |
| 1904 + uint32_t g0,g1,g2,g3,g4; |
| 1905 + uint32_t b, nb; |
| 1906 + |
| 1907 + if (state->buf_used) |
| 1908 + poly1305_update(state, state->buf, state->buf_used); |
| 1909 + |
| 1910 + b = state->h0 >> 26; state->h0 = state->h0 & 0x3ffff
ff; |
| 1911 + state->h1 += b; b = state->h1 >> 26; state->h1 = state->h1 & 0x3ffff
ff; |
| 1912 + state->h2 += b; b = state->h2 >> 26; state->h2 = state->h2 & 0x3ffff
ff; |
| 1913 + state->h3 += b; b = state->h3 >> 26; state->h3 = state->h3 & 0x3ffff
ff; |
| 1914 + state->h4 += b; b = state->h4 >> 26; state->h4 = state->h4 & 0x3ffff
ff; |
| 1915 + state->h0 += b * 5; |
| 1916 + |
| 1917 + g0 = state->h0 + 5; b = g0 >> 26; g0 &= 0x3ffffff; |
| 1918 + g1 = state->h1 + b; b = g1 >> 26; g1 &= 0x3ffffff; |
| 1919 + g2 = state->h2 + b; b = g2 >> 26; g2 &= 0x3ffffff; |
| 1920 + g3 = state->h3 + b; b = g3 >> 26; g3 &= 0x3ffffff; |
| 1921 + g4 = state->h4 + b - (1 << 26); |
| 1922 + |
| 1923 + b = (g4 >> 31) - 1; |
| 1924 + nb = ~b; |
| 1925 + state->h0 = (state->h0 & nb) | (g0 & b); |
| 1926 + state->h1 = (state->h1 & nb) | (g1 & b); |
| 1927 + state->h2 = (state->h2 & nb) | (g2 & b); |
| 1928 + state->h3 = (state->h3 & nb) | (g3 & b); |
| 1929 + state->h4 = (state->h4 & nb) | (g4 & b); |
| 1930 + |
| 1931 + f0 = ((state->h0 ) | (state->h1 << 26)) + (uint64_t)U8TO32_LE(&stat
e->key[0]); |
| 1932 + f1 = ((state->h1 >> 6) | (state->h2 << 20)) + (uint64_t)U8TO32_LE(&stat
e->key[4]); |
| 1933 + f2 = ((state->h2 >> 12) | (state->h3 << 14)) + (uint64_t)U8TO32_LE(&stat
e->key[8]); |
| 1934 + f3 = ((state->h3 >> 18) | (state->h4 << 8)) + (uint64_t)U8TO32_LE(&stat
e->key[12]); |
| 1935 + |
| 1936 + U32TO8_LE(&mac[ 0], f0); f1 += (f0 >> 32); |
| 1937 + U32TO8_LE(&mac[ 4], f1); f2 += (f1 >> 32); |
| 1938 + U32TO8_LE(&mac[ 8], f2); f3 += (f2 >> 32); |
| 1939 + U32TO8_LE(&mac[12], f3); |
| 1940 + } |
| 1941 + |
| 1942 +#endif /* !OPENSSL_NO_POLY1305 */ |
| 1943 diff --git a/crypto/poly1305/poly1305.h b/crypto/poly1305/poly1305.h |
| 1944 new file mode 100644 |
| 1945 index 0000000..28f85ed |
| 1946 --- /dev/null |
| 1947 +++ b/crypto/poly1305/poly1305.h |
| 1948 @@ -0,0 +1,88 @@ |
| 1949 +/* |
| 1950 + * Poly1305 |
| 1951 + * |
| 1952 + * Created on: Jun, 2013 |
| 1953 + * Author: Elie Bursztein (elieb@google.com) |
| 1954 + * |
| 1955 + * Adapted from the estream code by D. Bernstein. |
| 1956 + */ |
| 1957 +/* ==================================================================== |
| 1958 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. |
| 1959 + * |
| 1960 + * Redistribution and use in source and binary forms, with or without |
| 1961 + * modification, are permitted provided that the following conditions |
| 1962 + * are met: |
| 1963 + * |
| 1964 + * 1. Redistributions of source code must retain the above copyright |
| 1965 + * notice, this list of conditions and the following disclaimer. |
| 1966 + * |
| 1967 + * 2. Redistributions in binary form must reproduce the above copyright |
| 1968 + * notice, this list of conditions and the following disclaimer in |
| 1969 + * the documentation and/or other materials provided with the |
| 1970 + * distribution. |
| 1971 + * |
| 1972 + * 3. All advertising materials mentioning features or use of this |
| 1973 + * software must display the following acknowledgment: |
| 1974 + * "This product includes software developed by the OpenSSL Project |
| 1975 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" |
| 1976 + * |
| 1977 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
| 1978 + * endorse or promote products derived from this software without |
| 1979 + * prior written permission. For written permission, please contact |
| 1980 + * licensing@OpenSSL.org. |
| 1981 + * |
| 1982 + * 5. Products derived from this software may not be called "OpenSSL" |
| 1983 + * nor may "OpenSSL" appear in their names without prior written |
| 1984 + * permission of the OpenSSL Project. |
| 1985 + * |
| 1986 + * 6. Redistributions of any form whatsoever must retain the following |
| 1987 + * acknowledgment: |
| 1988 + * "This product includes software developed by the OpenSSL Project |
| 1989 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" |
| 1990 + * |
| 1991 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
| 1992 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 1993 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 1994 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
| 1995 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 1996 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 1997 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 1998 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 1999 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 2000 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 2001 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 2002 + * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 2003 + * ==================================================================== |
| 2004 + */ |
| 2005 + |
| 2006 +#ifndef HEADER_POLY1305_H_ |
| 2007 +#define HEADER_POLY1305_H_ |
| 2008 + |
| 2009 +#include <stdint.h> |
| 2010 +#include <openssl/opensslconf.h> |
| 2011 + |
| 2012 +#if defined(OPENSSL_NO_POLY1305) |
| 2013 +#error Poly1305 support is disabled. |
| 2014 +#endif |
| 2015 + |
| 2016 +typedef unsigned char poly1305_state[512]; |
| 2017 + |
| 2018 +/* poly1305_init sets up |state| so that it can be used to calculate an |
| 2019 + * authentication tag with the one-time key |key|. Note that |key| is a |
| 2020 + * one-time key and therefore there is no `reset' method because that would |
| 2021 + * enable several messages to be authenticated with the same key. */ |
| 2022 +extern void CRYPTO_poly1305_init(poly1305_state* state, |
| 2023 + const unsigned char key[32]); |
| 2024 + |
| 2025 +/* poly1305_update processes |in_len| bytes from |in|. It can be called zero or |
| 2026 + * more times after poly1305_init. */ |
| 2027 +extern void CRYPTO_poly1305_update(poly1305_state* state, |
| 2028 + const unsigned char *in, |
| 2029 + size_t in_len); |
| 2030 + |
| 2031 +/* poly1305_finish completes the poly1305 calculation and writes a 16 byte |
| 2032 + * authentication tag to |mac|. */ |
| 2033 +extern void CRYPTO_poly1305_finish(poly1305_state* state, |
| 2034 + unsigned char mac[16]); |
| 2035 + |
| 2036 +#endif /* HEADER_POLY1305_H_ */ |
| 2037 diff --git a/crypto/poly1305/poly1305_arm.c b/crypto/poly1305/poly1305_arm.c |
| 2038 new file mode 100644 |
| 2039 index 0000000..adcef35 |
| 2040 --- /dev/null |
| 2041 +++ b/crypto/poly1305/poly1305_arm.c |
| 2042 @@ -0,0 +1,335 @@ |
| 2043 +/* ==================================================================== |
| 2044 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. |
| 2045 + * |
| 2046 + * Redistribution and use in source and binary forms, with or without |
| 2047 + * modification, are permitted provided that the following conditions |
| 2048 + * are met: |
| 2049 + * |
| 2050 + * 1. Redistributions of source code must retain the above copyright |
| 2051 + * notice, this list of conditions and the following disclaimer. |
| 2052 + * |
| 2053 + * 2. Redistributions in binary form must reproduce the above copyright |
| 2054 + * notice, this list of conditions and the following disclaimer in |
| 2055 + * the documentation and/or other materials provided with the |
| 2056 + * distribution. |
| 2057 + * |
| 2058 + * 3. All advertising materials mentioning features or use of this |
| 2059 + * software must display the following acknowledgment: |
| 2060 + * "This product includes software developed by the OpenSSL Project |
| 2061 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" |
| 2062 + * |
| 2063 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
| 2064 + * endorse or promote products derived from this software without |
| 2065 + * prior written permission. For written permission, please contact |
| 2066 + * licensing@OpenSSL.org. |
| 2067 + * |
| 2068 + * 5. Products derived from this software may not be called "OpenSSL" |
| 2069 + * nor may "OpenSSL" appear in their names without prior written |
| 2070 + * permission of the OpenSSL Project. |
| 2071 + * |
| 2072 + * 6. Redistributions of any form whatsoever must retain the following |
| 2073 + * acknowledgment: |
| 2074 + * "This product includes software developed by the OpenSSL Project |
| 2075 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" |
| 2076 + * |
| 2077 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
| 2078 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 2079 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 2080 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
| 2081 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 2082 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 2083 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 2084 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 2085 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 2086 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 2087 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 2088 + * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 2089 + * ==================================================================== |
| 2090 + */ |
| 2091 + |
| 2092 +/* This implementation was taken from the public domain, neon2 version in |
| 2093 + * SUPERCOP by D. J. Bernstein and Peter Schwabe. */ |
| 2094 + |
| 2095 +#include <stdint.h> |
| 2096 + |
| 2097 +#include <openssl/poly1305.h> |
| 2098 + |
| 2099 +#if !defined(OPENSSL_NO_POLY1305) |
| 2100 + |
| 2101 +typedef struct { |
| 2102 + uint32_t v[12]; /* for alignment; only using 10 */ |
| 2103 +} fe1305x2; |
| 2104 + |
| 2105 +#define addmulmod openssl_poly1305_neon2_addmulmod |
| 2106 +#define blocks openssl_poly1305_neon2_blocks |
| 2107 + |
| 2108 +extern void addmulmod(fe1305x2 *r, const fe1305x2 *x, const fe1305x2 *y, const
fe1305x2 *c); |
| 2109 + |
| 2110 +extern int blocks(fe1305x2 *h, const fe1305x2 *precomp, const unsigned char *in
, unsigned int inlen); |
| 2111 + |
| 2112 +static void freeze(fe1305x2 *r) |
| 2113 + { |
| 2114 + int i; |
| 2115 + |
| 2116 + uint32_t x0 = r->v[0]; |
| 2117 + uint32_t x1 = r->v[2]; |
| 2118 + uint32_t x2 = r->v[4]; |
| 2119 + uint32_t x3 = r->v[6]; |
| 2120 + uint32_t x4 = r->v[8]; |
| 2121 + uint32_t y0; |
| 2122 + uint32_t y1; |
| 2123 + uint32_t y2; |
| 2124 + uint32_t y3; |
| 2125 + uint32_t y4; |
| 2126 + uint32_t swap; |
| 2127 + |
| 2128 + for (i = 0;i < 3;++i) |
| 2129 + { |
| 2130 + x1 += x0 >> 26; x0 &= 0x3ffffff; |
| 2131 + x2 += x1 >> 26; x1 &= 0x3ffffff; |
| 2132 + x3 += x2 >> 26; x2 &= 0x3ffffff; |
| 2133 + x4 += x3 >> 26; x3 &= 0x3ffffff; |
| 2134 + x0 += 5*(x4 >> 26); x4 &= 0x3ffffff; |
| 2135 + } |
| 2136 + |
| 2137 + y0 = x0 + 5; |
| 2138 + y1 = x1 + (y0 >> 26); y0 &= 0x3ffffff; |
| 2139 + y2 = x2 + (y1 >> 26); y1 &= 0x3ffffff; |
| 2140 + y3 = x3 + (y2 >> 26); y2 &= 0x3ffffff; |
| 2141 + y4 = x4 + (y3 >> 26); y3 &= 0x3ffffff; |
| 2142 + swap = -(y4 >> 26); y4 &= 0x3ffffff; |
| 2143 + |
| 2144 + y0 ^= x0; |
| 2145 + y1 ^= x1; |
| 2146 + y2 ^= x2; |
| 2147 + y3 ^= x3; |
| 2148 + y4 ^= x4; |
| 2149 + |
| 2150 + y0 &= swap; |
| 2151 + y1 &= swap; |
| 2152 + y2 &= swap; |
| 2153 + y3 &= swap; |
| 2154 + y4 &= swap; |
| 2155 + |
| 2156 + y0 ^= x0; |
| 2157 + y1 ^= x1; |
| 2158 + y2 ^= x2; |
| 2159 + y3 ^= x3; |
| 2160 + y4 ^= x4; |
| 2161 + |
| 2162 + r->v[0] = y0; |
| 2163 + r->v[2] = y1; |
| 2164 + r->v[4] = y2; |
| 2165 + r->v[6] = y3; |
| 2166 + r->v[8] = y4; |
| 2167 + } |
| 2168 + |
| 2169 +static void fe1305x2_tobytearray(unsigned char *r, fe1305x2 *x) |
| 2170 + { |
| 2171 + uint32_t x0 = x->v[0]; |
| 2172 + uint32_t x1 = x->v[2]; |
| 2173 + uint32_t x2 = x->v[4]; |
| 2174 + uint32_t x3 = x->v[6]; |
| 2175 + uint32_t x4 = x->v[8]; |
| 2176 + |
| 2177 + x1 += x0 >> 26; |
| 2178 + x0 &= 0x3ffffff; |
| 2179 + x2 += x1 >> 26; |
| 2180 + x1 &= 0x3ffffff; |
| 2181 + x3 += x2 >> 26; |
| 2182 + x2 &= 0x3ffffff; |
| 2183 + x4 += x3 >> 26; |
| 2184 + x3 &= 0x3ffffff; |
| 2185 + |
| 2186 + *(uint32_t *) r = x0 + (x1 << 26); |
| 2187 + *(uint32_t *) (r + 4) = (x1 >> 6) + (x2 << 20); |
| 2188 + *(uint32_t *) (r + 8) = (x2 >> 12) + (x3 << 14); |
| 2189 + *(uint32_t *) (r + 12) = (x3 >> 18) + (x4 << 8); |
| 2190 + } |
| 2191 + |
| 2192 +/* load32 exists to avoid breaking strict aliasing rules in |
| 2193 + * fe1305x2_frombytearray. */ |
| 2194 +static uint32_t load32(unsigned char *t) |
| 2195 + { |
| 2196 + uint32_t tmp; |
| 2197 + memcpy(&tmp, t, sizeof(tmp)); |
| 2198 + return tmp; |
| 2199 + } |
| 2200 + |
| 2201 +static void fe1305x2_frombytearray(fe1305x2 *r, const unsigned char *x, unsigne
d long long xlen) |
| 2202 + { |
| 2203 + int i; |
| 2204 + unsigned char t[17]; |
| 2205 + |
| 2206 + for (i = 0; (i < 16) && (i < xlen); i++) |
| 2207 + t[i] = x[i]; |
| 2208 + xlen -= i; |
| 2209 + x += i; |
| 2210 + t[i++] = 1; |
| 2211 + for (; i<17; i++) |
| 2212 + t[i] = 0; |
| 2213 + |
| 2214 + r->v[0] = 0x3ffffff & load32(t); |
| 2215 + r->v[2] = 0x3ffffff & (load32(t + 3) >> 2); |
| 2216 + r->v[4] = 0x3ffffff & (load32(t + 6) >> 4); |
| 2217 + r->v[6] = 0x3ffffff & (load32(t + 9) >> 6); |
| 2218 + r->v[8] = load32(t + 13); |
| 2219 + |
| 2220 + if (xlen) |
| 2221 + { |
| 2222 + for (i = 0; (i < 16) && (i < xlen); i++) |
| 2223 + t[i] = x[i]; |
| 2224 + t[i++] = 1; |
| 2225 + for (; i<17; i++) |
| 2226 + t[i] = 0; |
| 2227 + |
| 2228 + r->v[1] = 0x3ffffff & load32(t); |
| 2229 + r->v[3] = 0x3ffffff & (load32(t + 3) >> 2); |
| 2230 + r->v[5] = 0x3ffffff & (load32(t + 6) >> 4); |
| 2231 + r->v[7] = 0x3ffffff & (load32(t + 9) >> 6); |
| 2232 + r->v[9] = load32(t + 13); |
| 2233 + } |
| 2234 + else |
| 2235 + r->v[1] = r->v[3] = r->v[5] = r->v[7] = r->v[9] = 0; |
| 2236 + } |
| 2237 + |
| 2238 +static const fe1305x2 zero __attribute__ ((aligned (16))); |
| 2239 + |
| 2240 +struct poly1305_state_st { |
| 2241 + unsigned char data[sizeof(fe1305x2[5]) + 128]; |
| 2242 + unsigned char buf[32]; |
| 2243 + unsigned int buf_used; |
| 2244 + unsigned char key[16]; |
| 2245 +}; |
| 2246 + |
| 2247 +void CRYPTO_poly1305_init(poly1305_state *state, const unsigned char key[32]) |
| 2248 + { |
| 2249 + struct poly1305_state_st *st = (struct poly1305_state_st*) (state); |
| 2250 + fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); |
| 2251 + fe1305x2 *const h = r + 1; |
| 2252 + fe1305x2 *const c = h + 1; |
| 2253 + fe1305x2 *const precomp = c + 1; |
| 2254 + unsigned int j; |
| 2255 + |
| 2256 + r->v[1] = r->v[0] = 0x3ffffff & *(uint32_t *) key; |
| 2257 + r->v[3] = r->v[2] = 0x3ffff03 & ((*(uint32_t *) (key + 3)) >> 2); |
| 2258 + r->v[5] = r->v[4] = 0x3ffc0ff & ((*(uint32_t *) (key + 6)) >> 4); |
| 2259 + r->v[7] = r->v[6] = 0x3f03fff & ((*(uint32_t *) (key + 9)) >> 6); |
| 2260 + r->v[9] = r->v[8] = 0x00fffff & ((*(uint32_t *) (key + 12)) >> 8); |
| 2261 + |
| 2262 + for (j = 0; j < 10; j++) |
| 2263 + h->v[j] = 0; /* XXX: should fast-forward a bit */ |
| 2264 + |
| 2265 + addmulmod(precomp,r,r,&zero); /* precompute r^2 */ |
| 2266 + addmulmod(precomp + 1,precomp,precomp,&zero); /* precompute r^4 */ |
| 2267 + |
| 2268 + memcpy(st->key, key + 16, 16); |
| 2269 + st->buf_used = 0; |
| 2270 + } |
| 2271 + |
| 2272 +void CRYPTO_poly1305_update(poly1305_state *state, const unsigned char *in, siz
e_t in_len) |
| 2273 + { |
| 2274 + struct poly1305_state_st *st = (struct poly1305_state_st*) (state); |
| 2275 + fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); |
| 2276 + fe1305x2 *const h = r + 1; |
| 2277 + fe1305x2 *const c = h + 1; |
| 2278 + fe1305x2 *const precomp = c + 1; |
| 2279 + unsigned int i; |
| 2280 + unsigned char data[sizeof(fe1305x2) + 16]; |
| 2281 + fe1305x2 *const r2r = (fe1305x2 *) (data + (15 & (-(int) data))); |
| 2282 + |
| 2283 + if (st->buf_used) |
| 2284 + { |
| 2285 + unsigned int todo = 32 - st->buf_used; |
| 2286 + if (todo > in_len) |
| 2287 + todo = in_len; |
| 2288 + for (i = 0; i < todo; i++) |
| 2289 + st->buf[st->buf_used + i] = in[i]; |
| 2290 + st->buf_used += todo; |
| 2291 + in_len -= todo; |
| 2292 + in += todo; |
| 2293 + |
| 2294 + if (st->buf_used == sizeof(st->buf)) |
| 2295 + { |
| 2296 + fe1305x2_frombytearray(c, st->buf, sizeof(st->buf)); |
| 2297 + r2r->v[0] = precomp->v[0]; |
| 2298 + r2r->v[2] = precomp->v[2]; |
| 2299 + r2r->v[4] = precomp->v[4]; |
| 2300 + r2r->v[6] = precomp->v[6]; |
| 2301 + r2r->v[8] = precomp->v[8]; |
| 2302 + r2r->v[1] = r->v[1]; |
| 2303 + r2r->v[3] = r->v[3]; |
| 2304 + r2r->v[5] = r->v[5]; |
| 2305 + r2r->v[7] = r->v[7]; |
| 2306 + r2r->v[9] = r->v[9]; |
| 2307 + addmulmod(h,h,r2r,c); |
| 2308 + st->buf_used = 0; |
| 2309 + } |
| 2310 + } |
| 2311 + |
| 2312 + while (in_len > 32) |
| 2313 + { |
| 2314 + unsigned int tlen = 1048576; |
| 2315 + if (in_len < 1048576) |
| 2316 + tlen = in_len; |
| 2317 + tlen -= blocks(h, precomp, in, tlen); |
| 2318 + in_len -= tlen; |
| 2319 + in += tlen; |
| 2320 + } |
| 2321 + |
| 2322 + if (in_len) |
| 2323 + { |
| 2324 + for (i = 0; i < in_len; i++) |
| 2325 + st->buf[i] = in[i]; |
| 2326 + st->buf_used = in_len; |
| 2327 + } |
| 2328 + } |
| 2329 + |
| 2330 +void CRYPTO_poly1305_finish(poly1305_state* state, unsigned char mac[16]) |
| 2331 + { |
| 2332 + struct poly1305_state_st *st = (struct poly1305_state_st*) (state); |
| 2333 + fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); |
| 2334 + fe1305x2 *const h = r + 1; |
| 2335 + fe1305x2 *const c = h + 1; |
| 2336 + fe1305x2 *const precomp = c + 1; |
| 2337 + |
| 2338 + if (st->buf_used > 16) |
| 2339 + { |
| 2340 + fe1305x2_frombytearray(c, st->buf, st->buf_used); |
| 2341 + precomp->v[1] = r->v[1]; |
| 2342 + precomp->v[3] = r->v[3]; |
| 2343 + precomp->v[5] = r->v[5]; |
| 2344 + precomp->v[7] = r->v[7]; |
| 2345 + precomp->v[9] = r->v[9]; |
| 2346 + addmulmod(h,h,precomp,c); |
| 2347 + } |
| 2348 + else if (st->buf_used > 0) |
| 2349 + { |
| 2350 + fe1305x2_frombytearray(c, st->buf, st->buf_used); |
| 2351 + r->v[1] = 1; |
| 2352 + r->v[3] = 0; |
| 2353 + r->v[5] = 0; |
| 2354 + r->v[7] = 0; |
| 2355 + r->v[9] = 0; |
| 2356 + addmulmod(h,h,r,c); |
| 2357 + } |
| 2358 + |
| 2359 + h->v[0] += h->v[1]; |
| 2360 + h->v[2] += h->v[3]; |
| 2361 + h->v[4] += h->v[5]; |
| 2362 + h->v[6] += h->v[7]; |
| 2363 + h->v[8] += h->v[9]; |
| 2364 + freeze(h); |
| 2365 + |
| 2366 + fe1305x2_frombytearray(c, st->key, 16); |
| 2367 + c->v[8] ^= (1 << 24); |
| 2368 + |
| 2369 + h->v[0] += c->v[0]; |
| 2370 + h->v[2] += c->v[2]; |
| 2371 + h->v[4] += c->v[4]; |
| 2372 + h->v[6] += c->v[6]; |
| 2373 + h->v[8] += c->v[8]; |
| 2374 + fe1305x2_tobytearray(mac, h); |
| 2375 + } |
| 2376 + |
| 2377 +#endif /* !OPENSSL_NO_POLY1305 */ |
| 2378 diff --git a/crypto/poly1305/poly1305_arm_asm.s b/crypto/poly1305/poly1305_arm_a
sm.s |
| 2379 new file mode 100644 |
| 2380 index 0000000..449d16f |
| 2381 --- /dev/null |
| 2382 +++ b/crypto/poly1305/poly1305_arm_asm.s |
| 2383 @@ -0,0 +1,2009 @@ |
| 2384 +# This implementation was taken from the public domain, neon2 version in |
| 2385 +# SUPERCOP by D. J. Bernstein and Peter Schwabe. |
| 2386 + |
| 2387 +# qhasm: int32 input_0 |
| 2388 + |
| 2389 +# qhasm: int32 input_1 |
| 2390 + |
| 2391 +# qhasm: int32 input_2 |
| 2392 + |
| 2393 +# qhasm: int32 input_3 |
| 2394 + |
| 2395 +# qhasm: stack32 input_4 |
| 2396 + |
| 2397 +# qhasm: stack32 input_5 |
| 2398 + |
| 2399 +# qhasm: stack32 input_6 |
| 2400 + |
| 2401 +# qhasm: stack32 input_7 |
| 2402 + |
| 2403 +# qhasm: int32 caller_r4 |
| 2404 + |
| 2405 +# qhasm: int32 caller_r5 |
| 2406 + |
| 2407 +# qhasm: int32 caller_r6 |
| 2408 + |
| 2409 +# qhasm: int32 caller_r7 |
| 2410 + |
| 2411 +# qhasm: int32 caller_r8 |
| 2412 + |
| 2413 +# qhasm: int32 caller_r9 |
| 2414 + |
| 2415 +# qhasm: int32 caller_r10 |
| 2416 + |
| 2417 +# qhasm: int32 caller_r11 |
| 2418 + |
| 2419 +# qhasm: int32 caller_r12 |
| 2420 + |
| 2421 +# qhasm: int32 caller_r14 |
| 2422 + |
| 2423 +# qhasm: reg128 caller_q4 |
| 2424 + |
| 2425 +# qhasm: reg128 caller_q5 |
| 2426 + |
| 2427 +# qhasm: reg128 caller_q6 |
| 2428 + |
| 2429 +# qhasm: reg128 caller_q7 |
| 2430 + |
| 2431 +# qhasm: startcode |
| 2432 +.fpu neon |
| 2433 +.text |
| 2434 + |
| 2435 +# qhasm: reg128 r0 |
| 2436 + |
| 2437 +# qhasm: reg128 r1 |
| 2438 + |
| 2439 +# qhasm: reg128 r2 |
| 2440 + |
| 2441 +# qhasm: reg128 r3 |
| 2442 + |
| 2443 +# qhasm: reg128 r4 |
| 2444 + |
| 2445 +# qhasm: reg128 x01 |
| 2446 + |
| 2447 +# qhasm: reg128 x23 |
| 2448 + |
| 2449 +# qhasm: reg128 x4 |
| 2450 + |
| 2451 +# qhasm: reg128 y0 |
| 2452 + |
| 2453 +# qhasm: reg128 y12 |
| 2454 + |
| 2455 +# qhasm: reg128 y34 |
| 2456 + |
| 2457 +# qhasm: reg128 5y12 |
| 2458 + |
| 2459 +# qhasm: reg128 5y34 |
| 2460 + |
| 2461 +# qhasm: stack128 y0_stack |
| 2462 + |
| 2463 +# qhasm: stack128 y12_stack |
| 2464 + |
| 2465 +# qhasm: stack128 y34_stack |
| 2466 + |
| 2467 +# qhasm: stack128 5y12_stack |
| 2468 + |
| 2469 +# qhasm: stack128 5y34_stack |
| 2470 + |
| 2471 +# qhasm: reg128 z0 |
| 2472 + |
| 2473 +# qhasm: reg128 z12 |
| 2474 + |
| 2475 +# qhasm: reg128 z34 |
| 2476 + |
| 2477 +# qhasm: reg128 5z12 |
| 2478 + |
| 2479 +# qhasm: reg128 5z34 |
| 2480 + |
| 2481 +# qhasm: stack128 z0_stack |
| 2482 + |
| 2483 +# qhasm: stack128 z12_stack |
| 2484 + |
| 2485 +# qhasm: stack128 z34_stack |
| 2486 + |
| 2487 +# qhasm: stack128 5z12_stack |
| 2488 + |
| 2489 +# qhasm: stack128 5z34_stack |
| 2490 + |
| 2491 +# qhasm: stack128 two24 |
| 2492 + |
| 2493 +# qhasm: int32 ptr |
| 2494 + |
| 2495 +# qhasm: reg128 c01 |
| 2496 + |
| 2497 +# qhasm: reg128 c23 |
| 2498 + |
| 2499 +# qhasm: reg128 d01 |
| 2500 + |
| 2501 +# qhasm: reg128 d23 |
| 2502 + |
| 2503 +# qhasm: reg128 t0 |
| 2504 + |
| 2505 +# qhasm: reg128 t1 |
| 2506 + |
| 2507 +# qhasm: reg128 t2 |
| 2508 + |
| 2509 +# qhasm: reg128 t3 |
| 2510 + |
| 2511 +# qhasm: reg128 t4 |
| 2512 + |
| 2513 +# qhasm: reg128 mask |
| 2514 + |
| 2515 +# qhasm: reg128 u0 |
| 2516 + |
| 2517 +# qhasm: reg128 u1 |
| 2518 + |
| 2519 +# qhasm: reg128 u2 |
| 2520 + |
| 2521 +# qhasm: reg128 u3 |
| 2522 + |
| 2523 +# qhasm: reg128 u4 |
| 2524 + |
| 2525 +# qhasm: reg128 v01 |
| 2526 + |
| 2527 +# qhasm: reg128 mid |
| 2528 + |
| 2529 +# qhasm: reg128 v23 |
| 2530 + |
| 2531 +# qhasm: reg128 v4 |
| 2532 + |
| 2533 +# qhasm: int32 len |
| 2534 + |
| 2535 +# qhasm: qpushenter crypto_onetimeauth_poly1305_neon2_blocks |
| 2536 +.align 4 |
| 2537 +.global openssl_poly1305_neon2_blocks |
| 2538 +.type openssl_poly1305_neon2_blocks STT_FUNC |
| 2539 +openssl_poly1305_neon2_blocks: |
| 2540 +vpush {q4,q5,q6,q7} |
| 2541 +mov r12,sp |
| 2542 +sub sp,sp,#192 |
| 2543 +and sp,sp,#0xffffffe0 |
| 2544 + |
| 2545 +# qhasm: len = input_3 |
| 2546 +# asm 1: mov >len=int32#4,<input_3=int32#4 |
| 2547 +# asm 2: mov >len=r3,<input_3=r3 |
| 2548 +mov r3,r3 |
| 2549 + |
| 2550 +# qhasm: new y0 |
| 2551 + |
| 2552 +# qhasm: y0 = mem64[input_1]y0[1]; input_1 += 8 |
| 2553 +# asm 1: vld1.8 {<y0=reg128#1%bot},[<input_1=int32#2]! |
| 2554 +# asm 2: vld1.8 {<y0=d0},[<input_1=r1]! |
| 2555 +vld1.8 {d0},[r1]! |
| 2556 + |
| 2557 +# qhasm: y12 = mem128[input_1]; input_1 += 16 |
| 2558 +# asm 1: vld1.8 {>y12=reg128#2%bot->y12=reg128#2%top},[<input_1=int32#2]! |
| 2559 +# asm 2: vld1.8 {>y12=d2->y12=d3},[<input_1=r1]! |
| 2560 +vld1.8 {d2-d3},[r1]! |
| 2561 + |
| 2562 +# qhasm: y34 = mem128[input_1]; input_1 += 16 |
| 2563 +# asm 1: vld1.8 {>y34=reg128#3%bot->y34=reg128#3%top},[<input_1=int32#2]! |
| 2564 +# asm 2: vld1.8 {>y34=d4->y34=d5},[<input_1=r1]! |
| 2565 +vld1.8 {d4-d5},[r1]! |
| 2566 + |
| 2567 +# qhasm: input_1 += 8 |
| 2568 +# asm 1: add >input_1=int32#2,<input_1=int32#2,#8 |
| 2569 +# asm 2: add >input_1=r1,<input_1=r1,#8 |
| 2570 +add r1,r1,#8 |
| 2571 + |
| 2572 +# qhasm: new z0 |
| 2573 + |
| 2574 +# qhasm: z0 = mem64[input_1]z0[1]; input_1 += 8 |
| 2575 +# asm 1: vld1.8 {<z0=reg128#4%bot},[<input_1=int32#2]! |
| 2576 +# asm 2: vld1.8 {<z0=d6},[<input_1=r1]! |
| 2577 +vld1.8 {d6},[r1]! |
| 2578 + |
| 2579 +# qhasm: z12 = mem128[input_1]; input_1 += 16 |
| 2580 +# asm 1: vld1.8 {>z12=reg128#5%bot->z12=reg128#5%top},[<input_1=int32#2]! |
| 2581 +# asm 2: vld1.8 {>z12=d8->z12=d9},[<input_1=r1]! |
| 2582 +vld1.8 {d8-d9},[r1]! |
| 2583 + |
| 2584 +# qhasm: z34 = mem128[input_1]; input_1 += 16 |
| 2585 +# asm 1: vld1.8 {>z34=reg128#6%bot->z34=reg128#6%top},[<input_1=int32#2]! |
| 2586 +# asm 2: vld1.8 {>z34=d10->z34=d11},[<input_1=r1]! |
| 2587 +vld1.8 {d10-d11},[r1]! |
| 2588 + |
| 2589 +# qhasm: 2x mask = 0xffffffff |
| 2590 +# asm 1: vmov.i64 >mask=reg128#7,#0xffffffff |
| 2591 +# asm 2: vmov.i64 >mask=q6,#0xffffffff |
| 2592 +vmov.i64 q6,#0xffffffff |
| 2593 + |
| 2594 +# qhasm: 2x u4 = 0xff |
| 2595 +# asm 1: vmov.i64 >u4=reg128#8,#0xff |
| 2596 +# asm 2: vmov.i64 >u4=q7,#0xff |
| 2597 +vmov.i64 q7,#0xff |
| 2598 + |
| 2599 +# qhasm: x01 aligned= mem128[input_0];input_0+=16 |
| 2600 +# asm 1: vld1.8 {>x01=reg128#9%bot->x01=reg128#9%top},[<input_0=int32#1,: 128]! |
| 2601 +# asm 2: vld1.8 {>x01=d16->x01=d17},[<input_0=r0,: 128]! |
| 2602 +vld1.8 {d16-d17},[r0,: 128]! |
| 2603 + |
| 2604 +# qhasm: x23 aligned= mem128[input_0];input_0+=16 |
| 2605 +# asm 1: vld1.8 {>x23=reg128#10%bot->x23=reg128#10%top},[<input_0=int32#1,: 128
]! |
| 2606 +# asm 2: vld1.8 {>x23=d18->x23=d19},[<input_0=r0,: 128]! |
| 2607 +vld1.8 {d18-d19},[r0,: 128]! |
| 2608 + |
| 2609 +# qhasm: x4 aligned= mem64[input_0]x4[1] |
| 2610 +# asm 1: vld1.8 {<x4=reg128#11%bot},[<input_0=int32#1,: 64] |
| 2611 +# asm 2: vld1.8 {<x4=d20},[<input_0=r0,: 64] |
| 2612 +vld1.8 {d20},[r0,: 64] |
| 2613 + |
| 2614 +# qhasm: input_0 -= 32 |
| 2615 +# asm 1: sub >input_0=int32#1,<input_0=int32#1,#32 |
| 2616 +# asm 2: sub >input_0=r0,<input_0=r0,#32 |
| 2617 +sub r0,r0,#32 |
| 2618 + |
| 2619 +# qhasm: 2x mask unsigned>>=6 |
| 2620 +# asm 1: vshr.u64 >mask=reg128#7,<mask=reg128#7,#6 |
| 2621 +# asm 2: vshr.u64 >mask=q6,<mask=q6,#6 |
| 2622 +vshr.u64 q6,q6,#6 |
| 2623 + |
| 2624 +# qhasm: 2x u4 unsigned>>= 7 |
| 2625 +# asm 1: vshr.u64 >u4=reg128#8,<u4=reg128#8,#7 |
| 2626 +# asm 2: vshr.u64 >u4=q7,<u4=q7,#7 |
| 2627 +vshr.u64 q7,q7,#7 |
| 2628 + |
| 2629 +# qhasm: 4x 5y12 = y12 << 2 |
| 2630 +# asm 1: vshl.i32 >5y12=reg128#12,<y12=reg128#2,#2 |
| 2631 +# asm 2: vshl.i32 >5y12=q11,<y12=q1,#2 |
| 2632 +vshl.i32 q11,q1,#2 |
| 2633 + |
| 2634 +# qhasm: 4x 5y34 = y34 << 2 |
| 2635 +# asm 1: vshl.i32 >5y34=reg128#13,<y34=reg128#3,#2 |
| 2636 +# asm 2: vshl.i32 >5y34=q12,<y34=q2,#2 |
| 2637 +vshl.i32 q12,q2,#2 |
| 2638 + |
| 2639 +# qhasm: 4x 5y12 += y12 |
| 2640 +# asm 1: vadd.i32 >5y12=reg128#12,<5y12=reg128#12,<y12=reg128#2 |
| 2641 +# asm 2: vadd.i32 >5y12=q11,<5y12=q11,<y12=q1 |
| 2642 +vadd.i32 q11,q11,q1 |
| 2643 + |
| 2644 +# qhasm: 4x 5y34 += y34 |
| 2645 +# asm 1: vadd.i32 >5y34=reg128#13,<5y34=reg128#13,<y34=reg128#3 |
| 2646 +# asm 2: vadd.i32 >5y34=q12,<5y34=q12,<y34=q2 |
| 2647 +vadd.i32 q12,q12,q2 |
| 2648 + |
| 2649 +# qhasm: 2x u4 <<= 24 |
| 2650 +# asm 1: vshl.i64 >u4=reg128#8,<u4=reg128#8,#24 |
| 2651 +# asm 2: vshl.i64 >u4=q7,<u4=q7,#24 |
| 2652 +vshl.i64 q7,q7,#24 |
| 2653 + |
| 2654 +# qhasm: 4x 5z12 = z12 << 2 |
| 2655 +# asm 1: vshl.i32 >5z12=reg128#14,<z12=reg128#5,#2 |
| 2656 +# asm 2: vshl.i32 >5z12=q13,<z12=q4,#2 |
| 2657 +vshl.i32 q13,q4,#2 |
| 2658 + |
| 2659 +# qhasm: 4x 5z34 = z34 << 2 |
| 2660 +# asm 1: vshl.i32 >5z34=reg128#15,<z34=reg128#6,#2 |
| 2661 +# asm 2: vshl.i32 >5z34=q14,<z34=q5,#2 |
| 2662 +vshl.i32 q14,q5,#2 |
| 2663 + |
| 2664 +# qhasm: 4x 5z12 += z12 |
| 2665 +# asm 1: vadd.i32 >5z12=reg128#14,<5z12=reg128#14,<z12=reg128#5 |
| 2666 +# asm 2: vadd.i32 >5z12=q13,<5z12=q13,<z12=q4 |
| 2667 +vadd.i32 q13,q13,q4 |
| 2668 + |
| 2669 +# qhasm: 4x 5z34 += z34 |
| 2670 +# asm 1: vadd.i32 >5z34=reg128#15,<5z34=reg128#15,<z34=reg128#6 |
| 2671 +# asm 2: vadd.i32 >5z34=q14,<5z34=q14,<z34=q5 |
| 2672 +vadd.i32 q14,q14,q5 |
| 2673 + |
| 2674 +# qhasm: new two24 |
| 2675 + |
| 2676 +# qhasm: new y0_stack |
| 2677 + |
| 2678 +# qhasm: new y12_stack |
| 2679 + |
| 2680 +# qhasm: new y34_stack |
| 2681 + |
| 2682 +# qhasm: new 5y12_stack |
| 2683 + |
| 2684 +# qhasm: new 5y34_stack |
| 2685 + |
| 2686 +# qhasm: new z0_stack |
| 2687 + |
| 2688 +# qhasm: new z12_stack |
| 2689 + |
| 2690 +# qhasm: new z34_stack |
| 2691 + |
| 2692 +# qhasm: new 5z12_stack |
| 2693 + |
| 2694 +# qhasm: new 5z34_stack |
| 2695 + |
| 2696 +# qhasm: ptr = &two24 |
| 2697 +# asm 1: lea >ptr=int32#2,<two24=stack128#1 |
| 2698 +# asm 2: lea >ptr=r1,<two24=[sp,#0] |
| 2699 +add r1,sp,#0 |
| 2700 + |
| 2701 +# qhasm: mem128[ptr] aligned= u4 |
| 2702 +# asm 1: vst1.8 {<u4=reg128#8%bot-<u4=reg128#8%top},[<ptr=int32#2,: 128] |
| 2703 +# asm 2: vst1.8 {<u4=d14-<u4=d15},[<ptr=r1,: 128] |
| 2704 +vst1.8 {d14-d15},[r1,: 128] |
| 2705 + |
| 2706 +# qhasm: r4 = u4 |
| 2707 +# asm 1: vmov >r4=reg128#16,<u4=reg128#8 |
| 2708 +# asm 2: vmov >r4=q15,<u4=q7 |
| 2709 +vmov q15,q7 |
| 2710 + |
| 2711 +# qhasm: r0 = u4 |
| 2712 +# asm 1: vmov >r0=reg128#8,<u4=reg128#8 |
| 2713 +# asm 2: vmov >r0=q7,<u4=q7 |
| 2714 +vmov q7,q7 |
| 2715 + |
| 2716 +# qhasm: ptr = &y0_stack |
| 2717 +# asm 1: lea >ptr=int32#2,<y0_stack=stack128#2 |
| 2718 +# asm 2: lea >ptr=r1,<y0_stack=[sp,#16] |
| 2719 +add r1,sp,#16 |
| 2720 + |
| 2721 +# qhasm: mem128[ptr] aligned= y0 |
| 2722 +# asm 1: vst1.8 {<y0=reg128#1%bot-<y0=reg128#1%top},[<ptr=int32#2,: 128] |
| 2723 +# asm 2: vst1.8 {<y0=d0-<y0=d1},[<ptr=r1,: 128] |
| 2724 +vst1.8 {d0-d1},[r1,: 128] |
| 2725 + |
| 2726 +# qhasm: ptr = &y12_stack |
| 2727 +# asm 1: lea >ptr=int32#2,<y12_stack=stack128#3 |
| 2728 +# asm 2: lea >ptr=r1,<y12_stack=[sp,#32] |
| 2729 +add r1,sp,#32 |
| 2730 + |
| 2731 +# qhasm: mem128[ptr] aligned= y12 |
| 2732 +# asm 1: vst1.8 {<y12=reg128#2%bot-<y12=reg128#2%top},[<ptr=int32#2,: 128] |
| 2733 +# asm 2: vst1.8 {<y12=d2-<y12=d3},[<ptr=r1,: 128] |
| 2734 +vst1.8 {d2-d3},[r1,: 128] |
| 2735 + |
| 2736 +# qhasm: ptr = &y34_stack |
| 2737 +# asm 1: lea >ptr=int32#2,<y34_stack=stack128#4 |
| 2738 +# asm 2: lea >ptr=r1,<y34_stack=[sp,#48] |
| 2739 +add r1,sp,#48 |
| 2740 + |
| 2741 +# qhasm: mem128[ptr] aligned= y34 |
| 2742 +# asm 1: vst1.8 {<y34=reg128#3%bot-<y34=reg128#3%top},[<ptr=int32#2,: 128] |
| 2743 +# asm 2: vst1.8 {<y34=d4-<y34=d5},[<ptr=r1,: 128] |
| 2744 +vst1.8 {d4-d5},[r1,: 128] |
| 2745 + |
| 2746 +# qhasm: ptr = &z0_stack |
| 2747 +# asm 1: lea >ptr=int32#2,<z0_stack=stack128#7 |
| 2748 +# asm 2: lea >ptr=r1,<z0_stack=[sp,#96] |
| 2749 +add r1,sp,#96 |
| 2750 + |
| 2751 +# qhasm: mem128[ptr] aligned= z0 |
| 2752 +# asm 1: vst1.8 {<z0=reg128#4%bot-<z0=reg128#4%top},[<ptr=int32#2,: 128] |
| 2753 +# asm 2: vst1.8 {<z0=d6-<z0=d7},[<ptr=r1,: 128] |
| 2754 +vst1.8 {d6-d7},[r1,: 128] |
| 2755 + |
| 2756 +# qhasm: ptr = &z12_stack |
| 2757 +# asm 1: lea >ptr=int32#2,<z12_stack=stack128#8 |
| 2758 +# asm 2: lea >ptr=r1,<z12_stack=[sp,#112] |
| 2759 +add r1,sp,#112 |
| 2760 + |
| 2761 +# qhasm: mem128[ptr] aligned= z12 |
| 2762 +# asm 1: vst1.8 {<z12=reg128#5%bot-<z12=reg128#5%top},[<ptr=int32#2,: 128] |
| 2763 +# asm 2: vst1.8 {<z12=d8-<z12=d9},[<ptr=r1,: 128] |
| 2764 +vst1.8 {d8-d9},[r1,: 128] |
| 2765 + |
| 2766 +# qhasm: ptr = &z34_stack |
| 2767 +# asm 1: lea >ptr=int32#2,<z34_stack=stack128#9 |
| 2768 +# asm 2: lea >ptr=r1,<z34_stack=[sp,#128] |
| 2769 +add r1,sp,#128 |
| 2770 + |
| 2771 +# qhasm: mem128[ptr] aligned= z34 |
| 2772 +# asm 1: vst1.8 {<z34=reg128#6%bot-<z34=reg128#6%top},[<ptr=int32#2,: 128] |
| 2773 +# asm 2: vst1.8 {<z34=d10-<z34=d11},[<ptr=r1,: 128] |
| 2774 +vst1.8 {d10-d11},[r1,: 128] |
| 2775 + |
| 2776 +# qhasm: ptr = &5y12_stack |
| 2777 +# asm 1: lea >ptr=int32#2,<5y12_stack=stack128#5 |
| 2778 +# asm 2: lea >ptr=r1,<5y12_stack=[sp,#64] |
| 2779 +add r1,sp,#64 |
| 2780 + |
| 2781 +# qhasm: mem128[ptr] aligned= 5y12 |
| 2782 +# asm 1: vst1.8 {<5y12=reg128#12%bot-<5y12=reg128#12%top},[<ptr=int32#2,: 128] |
| 2783 +# asm 2: vst1.8 {<5y12=d22-<5y12=d23},[<ptr=r1,: 128] |
| 2784 +vst1.8 {d22-d23},[r1,: 128] |
| 2785 + |
| 2786 +# qhasm: ptr = &5y34_stack |
| 2787 +# asm 1: lea >ptr=int32#2,<5y34_stack=stack128#6 |
| 2788 +# asm 2: lea >ptr=r1,<5y34_stack=[sp,#80] |
| 2789 +add r1,sp,#80 |
| 2790 + |
| 2791 +# qhasm: mem128[ptr] aligned= 5y34 |
| 2792 +# asm 1: vst1.8 {<5y34=reg128#13%bot-<5y34=reg128#13%top},[<ptr=int32#2,: 128] |
| 2793 +# asm 2: vst1.8 {<5y34=d24-<5y34=d25},[<ptr=r1,: 128] |
| 2794 +vst1.8 {d24-d25},[r1,: 128] |
| 2795 + |
| 2796 +# qhasm: ptr = &5z12_stack |
| 2797 +# asm 1: lea >ptr=int32#2,<5z12_stack=stack128#10 |
| 2798 +# asm 2: lea >ptr=r1,<5z12_stack=[sp,#144] |
| 2799 +add r1,sp,#144 |
| 2800 + |
| 2801 +# qhasm: mem128[ptr] aligned= 5z12 |
| 2802 +# asm 1: vst1.8 {<5z12=reg128#14%bot-<5z12=reg128#14%top},[<ptr=int32#2,: 128] |
| 2803 +# asm 2: vst1.8 {<5z12=d26-<5z12=d27},[<ptr=r1,: 128] |
| 2804 +vst1.8 {d26-d27},[r1,: 128] |
| 2805 + |
| 2806 +# qhasm: ptr = &5z34_stack |
| 2807 +# asm 1: lea >ptr=int32#2,<5z34_stack=stack128#11 |
| 2808 +# asm 2: lea >ptr=r1,<5z34_stack=[sp,#160] |
| 2809 +add r1,sp,#160 |
| 2810 + |
| 2811 +# qhasm: mem128[ptr] aligned= 5z34 |
| 2812 +# asm 1: vst1.8 {<5z34=reg128#15%bot-<5z34=reg128#15%top},[<ptr=int32#2,: 128] |
| 2813 +# asm 2: vst1.8 {<5z34=d28-<5z34=d29},[<ptr=r1,: 128] |
| 2814 +vst1.8 {d28-d29},[r1,: 128] |
| 2815 + |
| 2816 +# qhasm: unsigned>? len - 64 |
| 2817 +# asm 1: cmp <len=int32#4,#64 |
| 2818 +# asm 2: cmp <len=r3,#64 |
| 2819 +cmp r3,#64 |
| 2820 + |
| 2821 +# qhasm: goto below64bytes if !unsigned> |
| 2822 +bls ._below64bytes |
| 2823 + |
| 2824 +# qhasm: input_2 += 32 |
| 2825 +# asm 1: add >input_2=int32#2,<input_2=int32#3,#32 |
| 2826 +# asm 2: add >input_2=r1,<input_2=r2,#32 |
| 2827 +add r1,r2,#32 |
| 2828 + |
| 2829 +# qhasm: mainloop2: |
| 2830 +._mainloop2: |
| 2831 + |
| 2832 +# qhasm: c01 = mem128[input_2];input_2+=16 |
| 2833 +# asm 1: vld1.8 {>c01=reg128#1%bot->c01=reg128#1%top},[<input_2=int32#2]! |
| 2834 +# asm 2: vld1.8 {>c01=d0->c01=d1},[<input_2=r1]! |
| 2835 +vld1.8 {d0-d1},[r1]! |
| 2836 + |
| 2837 +# qhasm: c23 = mem128[input_2];input_2+=16 |
| 2838 +# asm 1: vld1.8 {>c23=reg128#2%bot->c23=reg128#2%top},[<input_2=int32#2]! |
| 2839 +# asm 2: vld1.8 {>c23=d2->c23=d3},[<input_2=r1]! |
| 2840 +vld1.8 {d2-d3},[r1]! |
| 2841 + |
| 2842 +# qhasm: r4[0,1] += x01[0] unsigned* z34[2]; r4[2,3] += x01[1] unsigned* z34
[3] |
| 2843 +# asm 1: vmlal.u32 <r4=reg128#16,<x01=reg128#9%bot,<z34=reg128#6%top |
| 2844 +# asm 2: vmlal.u32 <r4=q15,<x01=d16,<z34=d11 |
| 2845 +vmlal.u32 q15,d16,d11 |
| 2846 + |
| 2847 +# qhasm: ptr = &z12_stack |
| 2848 +# asm 1: lea >ptr=int32#3,<z12_stack=stack128#8 |
| 2849 +# asm 2: lea >ptr=r2,<z12_stack=[sp,#112] |
| 2850 +add r2,sp,#112 |
| 2851 + |
| 2852 +# qhasm: z12 aligned= mem128[ptr] |
| 2853 +# asm 1: vld1.8 {>z12=reg128#3%bot->z12=reg128#3%top},[<ptr=int32#3,: 128] |
| 2854 +# asm 2: vld1.8 {>z12=d4->z12=d5},[<ptr=r2,: 128] |
| 2855 +vld1.8 {d4-d5},[r2,: 128] |
| 2856 + |
| 2857 +# qhasm: r4[0,1] += x01[2] unsigned* z34[0]; r4[2,3] += x01[3] unsigned* z34[1
] |
| 2858 +# asm 1: vmlal.u32 <r4=reg128#16,<x01=reg128#9%top,<z34=reg128#6%bot |
| 2859 +# asm 2: vmlal.u32 <r4=q15,<x01=d17,<z34=d10 |
| 2860 +vmlal.u32 q15,d17,d10 |
| 2861 + |
| 2862 +# qhasm: ptr = &z0_stack |
| 2863 +# asm 1: lea >ptr=int32#3,<z0_stack=stack128#7 |
| 2864 +# asm 2: lea >ptr=r2,<z0_stack=[sp,#96] |
| 2865 +add r2,sp,#96 |
| 2866 + |
| 2867 +# qhasm: z0 aligned= mem128[ptr] |
| 2868 +# asm 1: vld1.8 {>z0=reg128#4%bot->z0=reg128#4%top},[<ptr=int32#3,: 128] |
| 2869 +# asm 2: vld1.8 {>z0=d6->z0=d7},[<ptr=r2,: 128] |
| 2870 +vld1.8 {d6-d7},[r2,: 128] |
| 2871 + |
| 2872 +# qhasm: r4[0,1] += x23[0] unsigned* z12[2]; r4[2,3] += x23[1] unsigned* z12[3
] |
| 2873 +# asm 1: vmlal.u32 <r4=reg128#16,<x23=reg128#10%bot,<z12=reg128#3%top |
| 2874 +# asm 2: vmlal.u32 <r4=q15,<x23=d18,<z12=d5 |
| 2875 +vmlal.u32 q15,d18,d5 |
| 2876 + |
| 2877 +# qhasm: c01 c23 = c01[0]c01[1]c01[2]c23[2]c23[0]c23[1]c01[3]c23[3] |
| 2878 +# asm 1: vtrn.32 <c01=reg128#1%top,<c23=reg128#2%top |
| 2879 +# asm 2: vtrn.32 <c01=d1,<c23=d3 |
| 2880 +vtrn.32 d1,d3 |
| 2881 + |
| 2882 +# qhasm: r4[0,1] += x23[2] unsigned* z12[0]; r4[2,3] += x23[3] unsigned* z12[1
] |
| 2883 +# asm 1: vmlal.u32 <r4=reg128#16,<x23=reg128#10%top,<z12=reg128#3%bot |
| 2884 +# asm 2: vmlal.u32 <r4=q15,<x23=d19,<z12=d4 |
| 2885 +vmlal.u32 q15,d19,d4 |
| 2886 + |
| 2887 +# qhasm: r4[0,1] += x4[0] unsigned* z0[0]; r4[2,3] += x4[1] unsigned* z0[1] |
| 2888 +# asm 1: vmlal.u32 <r4=reg128#16,<x4=reg128#11%bot,<z0=reg128#4%bot |
| 2889 +# asm 2: vmlal.u32 <r4=q15,<x4=d20,<z0=d6 |
| 2890 +vmlal.u32 q15,d20,d6 |
| 2891 + |
| 2892 +# qhasm: r3[0,1] = c23[2]<<18; r3[2,3] = c23[3]<<18 |
| 2893 +# asm 1: vshll.u32 >r3=reg128#5,<c23=reg128#2%top,#18 |
| 2894 +# asm 2: vshll.u32 >r3=q4,<c23=d3,#18 |
| 2895 +vshll.u32 q4,d3,#18 |
| 2896 + |
| 2897 +# qhasm: c01 c23 = c01[0]c23[0]c01[2]c01[3]c01[1]c23[1]c23[2]c23[3] |
| 2898 +# asm 1: vtrn.32 <c01=reg128#1%bot,<c23=reg128#2%bot |
| 2899 +# asm 2: vtrn.32 <c01=d0,<c23=d2 |
| 2900 +vtrn.32 d0,d2 |
| 2901 + |
| 2902 +# qhasm: r3[0,1] += x01[0] unsigned* z34[0]; r3[2,3] += x01[1] unsigned* z34[
1] |
| 2903 +# asm 1: vmlal.u32 <r3=reg128#5,<x01=reg128#9%bot,<z34=reg128#6%bot |
| 2904 +# asm 2: vmlal.u32 <r3=q4,<x01=d16,<z34=d10 |
| 2905 +vmlal.u32 q4,d16,d10 |
| 2906 + |
| 2907 +# qhasm: r3[0,1] += x01[2] unsigned* z12[2]; r3[2,3] += x01[3] unsigned* z12[
3] |
| 2908 +# asm 1: vmlal.u32 <r3=reg128#5,<x01=reg128#9%top,<z12=reg128#3%top |
| 2909 +# asm 2: vmlal.u32 <r3=q4,<x01=d17,<z12=d5 |
| 2910 +vmlal.u32 q4,d17,d5 |
| 2911 + |
| 2912 +# qhasm: r0 = r0[1]c01[0]r0[2,3] |
| 2913 +# asm 1: vext.32 <r0=reg128#8%bot,<r0=reg128#8%bot,<c01=reg128#1%bot,#1 |
| 2914 +# asm 2: vext.32 <r0=d14,<r0=d14,<c01=d0,#1 |
| 2915 +vext.32 d14,d14,d0,#1 |
| 2916 + |
| 2917 +# qhasm: r3[0,1] += x23[0] unsigned* z12[0]; r3[2,3] += x23[1] unsigned* z12[
1] |
| 2918 +# asm 1: vmlal.u32 <r3=reg128#5,<x23=reg128#10%bot,<z12=reg128#3%bot |
| 2919 +# asm 2: vmlal.u32 <r3=q4,<x23=d18,<z12=d4 |
| 2920 +vmlal.u32 q4,d18,d4 |
| 2921 + |
| 2922 +# qhasm: input_2
-= 64 |
| 2923 +# asm 1: sub >input_2=int32#2,<input_2=int32#2,#64 |
| 2924 +# asm 2: sub >input_2=r1,<input_2=r1,#64 |
| 2925 +sub r1,r1,#64 |
| 2926 + |
| 2927 +# qhasm: r3[0,1] += x23[2] unsigned* z0[0]; r3[2,3] += x23[3] unsigned* z0[1] |
| 2928 +# asm 1: vmlal.u32 <r3=reg128#5,<x23=reg128#10%top,<z0=reg128#4%bot |
| 2929 +# asm 2: vmlal.u32 <r3=q4,<x23=d19,<z0=d6 |
| 2930 +vmlal.u32 q4,d19,d6 |
| 2931 + |
| 2932 +# qhasm: ptr = &5z34_stack |
| 2933 +# asm 1: lea >ptr=int32#3,<5z34_stack=stack128#11 |
| 2934 +# asm 2: lea >ptr=r2,<5z34_stack=[sp,#160] |
| 2935 +add r2,sp,#160 |
| 2936 + |
| 2937 +# qhasm: 5z34 aligned= mem128[ptr] |
| 2938 +# asm 1: vld1.8 {>5z34=reg128#6%bot->5z34=reg128#6%top},[<ptr=int32#3,: 128] |
| 2939 +# asm 2: vld1.8 {>5z34=d10->5z34=d11},[<ptr=r2,: 128] |
| 2940 +vld1.8 {d10-d11},[r2,: 128] |
| 2941 + |
| 2942 +# qhasm: r3[0,1] += x4[0] unsigned* 5z34[2]; r3[2,3] += x4[1] unsigned* 5z3
4[3] |
| 2943 +# asm 1: vmlal.u32 <r3=reg128#5,<x4=reg128#11%bot,<5z34=reg128#6%top |
| 2944 +# asm 2: vmlal.u32 <r3=q4,<x4=d20,<5z34=d11 |
| 2945 +vmlal.u32 q4,d20,d11 |
| 2946 + |
| 2947 +# qhasm: r0 = r0[1]r0[0]r0[3]r0[2] |
| 2948 +# asm 1: vrev64.i32 >r0=reg128#8,<r0=reg128#8 |
| 2949 +# asm 2: vrev64.i32 >r0=q7,<r0=q7 |
| 2950 +vrev64.i32 q7,q7 |
| 2951 + |
| 2952 +# qhasm: r2[0,1] = c01[2]<<12; r2[2,3] = c01[3]<<12 |
| 2953 +# asm 1: vshll.u32 >r2=reg128#14,<c01=reg128#1%top,#12 |
| 2954 +# asm 2: vshll.u32 >r2=q13,<c01=d1,#12 |
| 2955 +vshll.u32 q13,d1,#12 |
| 2956 + |
| 2957 +# qhasm: d01 = mem128[input_2];input_2+=16 |
| 2958 +# asm 1: vld1.8 {>d01=reg128#12%bot->d01=reg128#12%top},[<input_2=int32#2]! |
| 2959 +# asm 2: vld1.8 {>d01=d22->d01=d23},[<input_2=r1]! |
| 2960 +vld1.8 {d22-d23},[r1]! |
| 2961 + |
| 2962 +# qhasm: r2[0,1] += x01[0] unsigned* z12[2]; r2[2,3] += x01[1] unsigned* z12[
3] |
| 2963 +# asm 1: vmlal.u32 <r2=reg128#14,<x01=reg128#9%bot,<z12=reg128#3%top |
| 2964 +# asm 2: vmlal.u32 <r2=q13,<x01=d16,<z12=d5 |
| 2965 +vmlal.u32 q13,d16,d5 |
| 2966 + |
| 2967 +# qhasm: r2[0,1] += x01[2] unsigned* z12[0]; r2[2,3] += x01[3] unsigned* z12[
1] |
| 2968 +# asm 1: vmlal.u32 <r2=reg128#14,<x01=reg128#9%top,<z12=reg128#3%bot |
| 2969 +# asm 2: vmlal.u32 <r2=q13,<x01=d17,<z12=d4 |
| 2970 +vmlal.u32 q13,d17,d4 |
| 2971 + |
| 2972 +# qhasm: r2[0,1] += x23[0] unsigned* z0[0]; r2[2,3] += x23[1] unsigned* z0[1] |
| 2973 +# asm 1: vmlal.u32 <r2=reg128#14,<x23=reg128#10%bot,<z0=reg128#4%bot |
| 2974 +# asm 2: vmlal.u32 <r2=q13,<x23=d18,<z0=d6 |
| 2975 +vmlal.u32 q13,d18,d6 |
| 2976 + |
| 2977 +# qhasm: r2[0,1] += x23[2] unsigned* 5z34[2]; r2[2,3] += x23[3] unsigned* 5z3
4[3] |
| 2978 +# asm 1: vmlal.u32 <r2=reg128#14,<x23=reg128#10%top,<5z34=reg128#6%top |
| 2979 +# asm 2: vmlal.u32 <r2=q13,<x23=d19,<5z34=d11 |
| 2980 +vmlal.u32 q13,d19,d11 |
| 2981 + |
| 2982 +# qhasm: r2[0,1] += x4[0] unsigned* 5z34[0]; r2[2,3] += x4[1] unsigned* 5z34[
1] |
| 2983 +# asm 1: vmlal.u32 <r2=reg128#14,<x4=reg128#11%bot,<5z34=reg128#6%bot |
| 2984 +# asm 2: vmlal.u32 <r2=q13,<x4=d20,<5z34=d10 |
| 2985 +vmlal.u32 q13,d20,d10 |
| 2986 + |
| 2987 +# qhasm: r0 = r0[0,1]c01[1]r0[2] |
| 2988 +# asm 1: vext.32 <r0=reg128#8%top,<c01=reg128#1%bot,<r0=reg128#8%top,#1 |
| 2989 +# asm 2: vext.32 <r0=d15,<c01=d0,<r0=d15,#1 |
| 2990 +vext.32 d15,d0,d15,#1 |
| 2991 + |
| 2992 +# qhasm: r1[0,1] = c23[0]<<6; r1[2,3] = c23[1]<<6 |
| 2993 +# asm 1: vshll.u32 >r1=reg128#15,<c23=reg128#2%bot,#6 |
| 2994 +# asm 2: vshll.u32 >r1=q14,<c23=d2,#6 |
| 2995 +vshll.u32 q14,d2,#6 |
| 2996 + |
| 2997 +# qhasm: r1[0,1] += x01[0] unsigned* z12[0]; r1[2,3] += x01[1] unsigned* z12[
1] |
| 2998 +# asm 1: vmlal.u32 <r1=reg128#15,<x01=reg128#9%bot,<z12=reg128#3%bot |
| 2999 +# asm 2: vmlal.u32 <r1=q14,<x01=d16,<z12=d4 |
| 3000 +vmlal.u32 q14,d16,d4 |
| 3001 + |
| 3002 +# qhasm: r1[0,1] += x01[2] unsigned* z0[0]; r1[2,3] += x01[3] unsigned* z0[1] |
| 3003 +# asm 1: vmlal.u32 <r1=reg128#15,<x01=reg128#9%top,<z0=reg128#4%bot |
| 3004 +# asm 2: vmlal.u32 <r1=q14,<x01=d17,<z0=d6 |
| 3005 +vmlal.u32 q14,d17,d6 |
| 3006 + |
| 3007 +# qhasm: r1[0,1] += x23[0] unsigned* 5z34[2]; r1[2,3] += x23[1] unsigned* 5z3
4[3] |
| 3008 +# asm 1: vmlal.u32 <r1=reg128#15,<x23=reg128#10%bot,<5z34=reg128#6%top |
| 3009 +# asm 2: vmlal.u32 <r1=q14,<x23=d18,<5z34=d11 |
| 3010 +vmlal.u32 q14,d18,d11 |
| 3011 + |
| 3012 +# qhasm: r1[0,1] += x23[2] unsigned* 5z34[0]; r1[2,3] += x23[3] unsigned* 5z34[
1] |
| 3013 +# asm 1: vmlal.u32 <r1=reg128#15,<x23=reg128#10%top,<5z34=reg128#6%bot |
| 3014 +# asm 2: vmlal.u32 <r1=q14,<x23=d19,<5z34=d10 |
| 3015 +vmlal.u32 q14,d19,d10 |
| 3016 + |
| 3017 +# qhasm: ptr = &5z12_stack |
| 3018 +# asm 1: lea >ptr=int32#3,<5z12_stack=stack128#10 |
| 3019 +# asm 2: lea >ptr=r2,<5z12_stack=[sp,#144] |
| 3020 +add r2,sp,#144 |
| 3021 + |
| 3022 +# qhasm: 5z12 aligned= mem128[ptr] |
| 3023 +# asm 1: vld1.8 {>5z12=reg128#1%bot->5z12=reg128#1%top},[<ptr=int32#3,: 128] |
| 3024 +# asm 2: vld1.8 {>5z12=d0->5z12=d1},[<ptr=r2,: 128] |
| 3025 +vld1.8 {d0-d1},[r2,: 128] |
| 3026 + |
| 3027 +# qhasm: r1[0,1] += x4[0] unsigned* 5z12[2]; r1[2,3] += x4[1] unsigned* 5z12[
3] |
| 3028 +# asm 1: vmlal.u32 <r1=reg128#15,<x4=reg128#11%bot,<5z12=reg128#1%top |
| 3029 +# asm 2: vmlal.u32 <r1=q14,<x4=d20,<5z12=d1 |
| 3030 +vmlal.u32 q14,d20,d1 |
| 3031 + |
| 3032 +# qhasm: d23 = mem128[input_2];input_2+=16 |
| 3033 +# asm 1: vld1.8 {>d23=reg128#2%bot->d23=reg128#2%top},[<input_2=int32#2]! |
| 3034 +# asm 2: vld1.8 {>d23=d2->d23=d3},[<input_2=r1]! |
| 3035 +vld1.8 {d2-d3},[r1]! |
| 3036 + |
| 3037 +# qhasm: input_2 += 32 |
| 3038 +# asm 1: add >input_2=int32#2,<input_2=int32#2,#32 |
| 3039 +# asm 2: add >input_2=r1,<input_2=r1,#32 |
| 3040 +add r1,r1,#32 |
| 3041 + |
| 3042 +# qhasm: r0[0,1] += x4[0] unsigned* 5z12[0]; r0[2,3] += x4[1] unsigned* 5z12[
1] |
| 3043 +# asm 1: vmlal.u32 <r0=reg128#8,<x4=reg128#11%bot,<5z12=reg128#1%bot |
| 3044 +# asm 2: vmlal.u32 <r0=q7,<x4=d20,<5z12=d0 |
| 3045 +vmlal.u32 q7,d20,d0 |
| 3046 + |
| 3047 +# qhasm: r0[0,1] += x23[0] unsigned* 5z34[0]; r0[2,3] += x23[1] unsigned* 5z34[
1] |
| 3048 +# asm 1: vmlal.u32 <r0=reg128#8,<x23=reg128#10%bot,<5z34=reg128#6%bot |
| 3049 +# asm 2: vmlal.u32 <r0=q7,<x23=d18,<5z34=d10 |
| 3050 +vmlal.u32 q7,d18,d10 |
| 3051 + |
| 3052 +# qhasm: d01 d23 = d01[0] d23[0] d01[1] d23[1] |
| 3053 +# asm 1: vswp <d23=reg128#2%bot,<d01=reg128#12%top |
| 3054 +# asm 2: vswp <d23=d2,<d01=d23 |
| 3055 +vswp d2,d23 |
| 3056 + |
| 3057 +# qhasm: r0[0,1] += x23[2] unsigned* 5z12[2]; r0[2,3] += x23[3] unsigned* 5z12[
3] |
| 3058 +# asm 1: vmlal.u32 <r0=reg128#8,<x23=reg128#10%top,<5z12=reg128#1%top |
| 3059 +# asm 2: vmlal.u32 <r0=q7,<x23=d19,<5z12=d1 |
| 3060 +vmlal.u32 q7,d19,d1 |
| 3061 + |
| 3062 +# qhasm: r0[0,1] += x01[0] unsigned* z0[0]; r0[2,3] += x01[1] unsigned* z0[1] |
| 3063 +# asm 1: vmlal.u32 <r0=reg128#8,<x01=reg128#9%bot,<z0=reg128#4%bot |
| 3064 +# asm 2: vmlal.u32 <r0=q7,<x01=d16,<z0=d6 |
| 3065 +vmlal.u32 q7,d16,d6 |
| 3066 + |
| 3067 +# qhasm: new mid |
| 3068 + |
| 3069 +# qhasm: 2x v4 = d23 unsigned>> 40 |
| 3070 +# asm 1: vshr.u64 >v4=reg128#4,<d23=reg128#2,#40 |
| 3071 +# asm 2: vshr.u64 >v4=q3,<d23=q1,#40 |
| 3072 +vshr.u64 q3,q1,#40 |
| 3073 + |
| 3074 +# qhasm: mid = d01[1]d23[0] mid[2,3] |
| 3075 +# asm 1: vext.32 <mid=reg128#1%bot,<d01=reg128#12%bot,<d23=reg128#2%bot,#1 |
| 3076 +# asm 2: vext.32 <mid=d0,<d01=d22,<d23=d2,#1 |
| 3077 +vext.32 d0,d22,d2,#1 |
| 3078 + |
| 3079 +# qhasm: new v23 |
| 3080 + |
| 3081 +# qhasm: v23[2] = d23[0,1] unsigned>> 14; v23[3] = d23[2,3] unsig
ned>> 14 |
| 3082 +# asm 1: vshrn.u64 <v23=reg128#10%top,<d23=reg128#2,#14 |
| 3083 +# asm 2: vshrn.u64 <v23=d19,<d23=q1,#14 |
| 3084 +vshrn.u64 d19,q1,#14 |
| 3085 + |
| 3086 +# qhasm: mid = mid[0,1] d01[3]d23[2] |
| 3087 +# asm 1: vext.32 <mid=reg128#1%top,<d01=reg128#12%top,<d23=reg128#2%top,#1 |
| 3088 +# asm 2: vext.32 <mid=d1,<d01=d23,<d23=d3,#1 |
| 3089 +vext.32 d1,d23,d3,#1 |
| 3090 + |
| 3091 +# qhasm: new v01 |
| 3092 + |
| 3093 +# qhasm: v01[2] = d01[0,1] unsigned>> 26; v01[3] = d01[2,3] unsig
ned>> 26 |
| 3094 +# asm 1: vshrn.u64 <v01=reg128#11%top,<d01=reg128#12,#26 |
| 3095 +# asm 2: vshrn.u64 <v01=d21,<d01=q11,#26 |
| 3096 +vshrn.u64 d21,q11,#26 |
| 3097 + |
| 3098 +# qhasm: v01 = d01[1]d01[0] v01[2,3] |
| 3099 +# asm 1: vext.32 <v01=reg128#11%bot,<d01=reg128#12%bot,<d01=reg128#12%bot,#1 |
| 3100 +# asm 2: vext.32 <v01=d20,<d01=d22,<d01=d22,#1 |
| 3101 +vext.32 d20,d22,d22,#1 |
| 3102 + |
| 3103 +# qhasm: r0[0,1] += x01[2] unsigned* 5z34[2]; r0[2,3] += x01[3] unsigned* 5z3
4[3] |
| 3104 +# asm 1: vmlal.u32 <r0=reg128#8,<x01=reg128#9%top,<5z34=reg128#6%top |
| 3105 +# asm 2: vmlal.u32 <r0=q7,<x01=d17,<5z34=d11 |
| 3106 +vmlal.u32 q7,d17,d11 |
| 3107 + |
| 3108 +# qhasm: v01 = v01[1]d01[2] v01[2,3] |
| 3109 +# asm 1: vext.32 <v01=reg128#11%bot,<v01=reg128#11%bot,<d01=reg128#12%top,#1 |
| 3110 +# asm 2: vext.32 <v01=d20,<v01=d20,<d01=d23,#1 |
| 3111 +vext.32 d20,d20,d23,#1 |
| 3112 + |
| 3113 +# qhasm: v23[0] = mid[0,1] unsigned>> 20; v23[1] = mid[2,3] unsig
ned>> 20 |
| 3114 +# asm 1: vshrn.u64 <v23=reg128#10%bot,<mid=reg128#1,#20 |
| 3115 +# asm 2: vshrn.u64 <v23=d18,<mid=q0,#20 |
| 3116 +vshrn.u64 d18,q0,#20 |
| 3117 + |
| 3118 +# qhasm: v4 = v4[0]v4[2]v4[1]v4[3] |
| 3119 +# asm 1: vtrn.32 <v4=reg128#4%bot,<v4=reg128#4%top |
| 3120 +# asm 2: vtrn.32 <v4=d6,<v4=d7 |
| 3121 +vtrn.32 d6,d7 |
| 3122 + |
| 3123 +# qhasm: 4x v01 &= 0x03ffffff |
| 3124 +# asm 1: vand.i32 <v01=reg128#11,#0x03ffffff |
| 3125 +# asm 2: vand.i32 <v01=q10,#0x03ffffff |
| 3126 +vand.i32 q10,#0x03ffffff |
| 3127 + |
| 3128 +# qhasm: ptr = &y34_stack |
| 3129 +# asm 1: lea >ptr=int32#3,<y34_stack=stack128#4 |
| 3130 +# asm 2: lea >ptr=r2,<y34_stack=[sp,#48] |
| 3131 +add r2,sp,#48 |
| 3132 + |
| 3133 +# qhasm: y34 aligned= mem128[ptr] |
| 3134 +# asm 1: vld1.8 {>y34=reg128#3%bot->y34=reg128#3%top},[<ptr=int32#3,: 128] |
| 3135 +# asm 2: vld1.8 {>y34=d4->y34=d5},[<ptr=r2,: 128] |
| 3136 +vld1.8 {d4-d5},[r2,: 128] |
| 3137 + |
| 3138 +# qhasm: 4x v23 &= 0x03ffffff |
| 3139 +# asm 1: vand.i32 <v23=reg128#10,#0x03ffffff |
| 3140 +# asm 2: vand.i32 <v23=q9,#0x03ffffff |
| 3141 +vand.i32 q9,#0x03ffffff |
| 3142 + |
| 3143 +# qhasm: ptr = &y12_stack |
| 3144 +# asm 1: lea >ptr=int32#3,<y12_stack=stack128#3 |
| 3145 +# asm 2: lea >ptr=r2,<y12_stack=[sp,#32] |
| 3146 +add r2,sp,#32 |
| 3147 + |
| 3148 +# qhasm: y12 aligned= mem128[ptr] |
| 3149 +# asm 1: vld1.8 {>y12=reg128#2%bot->y12=reg128#2%top},[<ptr=int32#3,: 128] |
| 3150 +# asm 2: vld1.8 {>y12=d2->y12=d3},[<ptr=r2,: 128] |
| 3151 +vld1.8 {d2-d3},[r2,: 128] |
| 3152 + |
| 3153 +# qhasm: 4x v4 |= 0x01000000 |
| 3154 +# asm 1: vorr.i32 <v4=reg128#4,#0x01000000 |
| 3155 +# asm 2: vorr.i32 <v4=q3,#0x01000000 |
| 3156 +vorr.i32 q3,#0x01000000 |
| 3157 + |
| 3158 +# qhasm: ptr = &y0_stack |
| 3159 +# asm 1: lea >ptr=int32#3,<y0_stack=stack128#2 |
| 3160 +# asm 2: lea >ptr=r2,<y0_stack=[sp,#16] |
| 3161 +add r2,sp,#16 |
| 3162 + |
| 3163 +# qhasm: y0 aligned= mem128[ptr] |
| 3164 +# asm 1: vld1.8 {>y0=reg128#1%bot->y0=reg128#1%top},[<ptr=int32#3,: 128] |
| 3165 +# asm 2: vld1.8 {>y0=d0->y0=d1},[<ptr=r2,: 128] |
| 3166 +vld1.8 {d0-d1},[r2,: 128] |
| 3167 + |
| 3168 +# qhasm: r4[0,1] += v01[0] unsigned* y34[2]; r4[2,3] += v01[1] unsigned* y34
[3] |
| 3169 +# asm 1: vmlal.u32 <r4=reg128#16,<v01=reg128#11%bot,<y34=reg128#3%top |
| 3170 +# asm 2: vmlal.u32 <r4=q15,<v01=d20,<y34=d5 |
| 3171 +vmlal.u32 q15,d20,d5 |
| 3172 + |
| 3173 +# qhasm: r4[0,1] += v01[2] unsigned* y34[0]; r4[2,3] += v01[3] unsigned* y34[1
] |
| 3174 +# asm 1: vmlal.u32 <r4=reg128#16,<v01=reg128#11%top,<y34=reg128#3%bot |
| 3175 +# asm 2: vmlal.u32 <r4=q15,<v01=d21,<y34=d4 |
| 3176 +vmlal.u32 q15,d21,d4 |
| 3177 + |
| 3178 +# qhasm: r4[0,1] += v23[0] unsigned* y12[2]; r4[2,3] += v23[1] unsigned* y12[3
] |
| 3179 +# asm 1: vmlal.u32 <r4=reg128#16,<v23=reg128#10%bot,<y12=reg128#2%top |
| 3180 +# asm 2: vmlal.u32 <r4=q15,<v23=d18,<y12=d3 |
| 3181 +vmlal.u32 q15,d18,d3 |
| 3182 + |
| 3183 +# qhasm: r4[0,1] += v23[2] unsigned* y12[0]; r4[2,3] += v23[3] unsigned* y12[1
] |
| 3184 +# asm 1: vmlal.u32 <r4=reg128#16,<v23=reg128#10%top,<y12=reg128#2%bot |
| 3185 +# asm 2: vmlal.u32 <r4=q15,<v23=d19,<y12=d2 |
| 3186 +vmlal.u32 q15,d19,d2 |
| 3187 + |
| 3188 +# qhasm: r4[0,1] += v4[0] unsigned* y0[0]; r4[2,3] += v4[1] unsigned* y0[1] |
| 3189 +# asm 1: vmlal.u32 <r4=reg128#16,<v4=reg128#4%bot,<y0=reg128#1%bot |
| 3190 +# asm 2: vmlal.u32 <r4=q15,<v4=d6,<y0=d0 |
| 3191 +vmlal.u32 q15,d6,d0 |
| 3192 + |
| 3193 +# qhasm: ptr = &5y34_stack |
| 3194 +# asm 1: lea >ptr=int32#3,<5y34_stack=stack128#6 |
| 3195 +# asm 2: lea >ptr=r2,<5y34_stack=[sp,#80] |
| 3196 +add r2,sp,#80 |
| 3197 + |
| 3198 +# qhasm: 5y34 aligned= mem128[ptr] |
| 3199 +# asm 1: vld1.8 {>5y34=reg128#13%bot->5y34=reg128#13%top},[<ptr=int32#3,: 128] |
| 3200 +# asm 2: vld1.8 {>5y34=d24->5y34=d25},[<ptr=r2,: 128] |
| 3201 +vld1.8 {d24-d25},[r2,: 128] |
| 3202 + |
| 3203 +# qhasm: r3[0,1] += v01[0] unsigned* y34[0]; r3[2,3] += v01[1] unsigned* y34[
1] |
| 3204 +# asm 1: vmlal.u32 <r3=reg128#5,<v01=reg128#11%bot,<y34=reg128#3%bot |
| 3205 +# asm 2: vmlal.u32 <r3=q4,<v01=d20,<y34=d4 |
| 3206 +vmlal.u32 q4,d20,d4 |
| 3207 + |
| 3208 +# qhasm: r3[0,1] += v01[2] unsigned* y12[2]; r3[2,3] += v01[3] unsigned* y12[
3] |
| 3209 +# asm 1: vmlal.u32 <r3=reg128#5,<v01=reg128#11%top,<y12=reg128#2%top |
| 3210 +# asm 2: vmlal.u32 <r3=q4,<v01=d21,<y12=d3 |
| 3211 +vmlal.u32 q4,d21,d3 |
| 3212 + |
| 3213 +# qhasm: r3[0,1] += v23[0] unsigned* y12[0]; r3[2,3] += v23[1] unsigned* y12[
1] |
| 3214 +# asm 1: vmlal.u32 <r3=reg128#5,<v23=reg128#10%bot,<y12=reg128#2%bot |
| 3215 +# asm 2: vmlal.u32 <r3=q4,<v23=d18,<y12=d2 |
| 3216 +vmlal.u32 q4,d18,d2 |
| 3217 + |
| 3218 +# qhasm: r3[0,1] += v23[2] unsigned* y0[0]; r3[2,3] += v23[3] unsigned* y0[1] |
| 3219 +# asm 1: vmlal.u32 <r3=reg128#5,<v23=reg128#10%top,<y0=reg128#1%bot |
| 3220 +# asm 2: vmlal.u32 <r3=q4,<v23=d19,<y0=d0 |
| 3221 +vmlal.u32 q4,d19,d0 |
| 3222 + |
| 3223 +# qhasm: r3[0,1] += v4[0] unsigned* 5y34[2]; r3[2,3] += v4[1] unsigned* 5y3
4[3] |
| 3224 +# asm 1: vmlal.u32 <r3=reg128#5,<v4=reg128#4%bot,<5y34=reg128#13%top |
| 3225 +# asm 2: vmlal.u32 <r3=q4,<v4=d6,<5y34=d25 |
| 3226 +vmlal.u32 q4,d6,d25 |
| 3227 + |
| 3228 +# qhasm: ptr = &5y12_stack |
| 3229 +# asm 1: lea >ptr=int32#3,<5y12_stack=stack128#5 |
| 3230 +# asm 2: lea >ptr=r2,<5y12_stack=[sp,#64] |
| 3231 +add r2,sp,#64 |
| 3232 + |
| 3233 +# qhasm: 5y12 aligned= mem128[ptr] |
| 3234 +# asm 1: vld1.8 {>5y12=reg128#12%bot->5y12=reg128#12%top},[<ptr=int32#3,: 128] |
| 3235 +# asm 2: vld1.8 {>5y12=d22->5y12=d23},[<ptr=r2,: 128] |
| 3236 +vld1.8 {d22-d23},[r2,: 128] |
| 3237 + |
| 3238 +# qhasm: r0[0,1] += v4[0] unsigned* 5y12[0]; r0[2,3] += v4[1] unsigned* 5y12[
1] |
| 3239 +# asm 1: vmlal.u32 <r0=reg128#8,<v4=reg128#4%bot,<5y12=reg128#12%bot |
| 3240 +# asm 2: vmlal.u32 <r0=q7,<v4=d6,<5y12=d22 |
| 3241 +vmlal.u32 q7,d6,d22 |
| 3242 + |
| 3243 +# qhasm: r0[0,1] += v23[0] unsigned* 5y34[0]; r0[2,3] += v23[1] unsigned* 5y34[
1] |
| 3244 +# asm 1: vmlal.u32 <r0=reg128#8,<v23=reg128#10%bot,<5y34=reg128#13%bot |
| 3245 +# asm 2: vmlal.u32 <r0=q7,<v23=d18,<5y34=d24 |
| 3246 +vmlal.u32 q7,d18,d24 |
| 3247 + |
| 3248 +# qhasm: r0[0,1] += v23[2] unsigned* 5y12[2]; r0[2,3] += v23[3] unsigned* 5y12[
3] |
| 3249 +# asm 1: vmlal.u32 <r0=reg128#8,<v23=reg128#10%top,<5y12=reg128#12%top |
| 3250 +# asm 2: vmlal.u32 <r0=q7,<v23=d19,<5y12=d23 |
| 3251 +vmlal.u32 q7,d19,d23 |
| 3252 + |
| 3253 +# qhasm: r0[0,1] += v01[0] unsigned* y0[0]; r0[2,3] += v01[1] unsigned* y0[1] |
| 3254 +# asm 1: vmlal.u32 <r0=reg128#8,<v01=reg128#11%bot,<y0=reg128#1%bot |
| 3255 +# asm 2: vmlal.u32 <r0=q7,<v01=d20,<y0=d0 |
| 3256 +vmlal.u32 q7,d20,d0 |
| 3257 + |
| 3258 +# qhasm: r0[0,1] += v01[2] unsigned* 5y34[2]; r0[2,3] += v01[3] unsigned* 5y3
4[3] |
| 3259 +# asm 1: vmlal.u32 <r0=reg128#8,<v01=reg128#11%top,<5y34=reg128#13%top |
| 3260 +# asm 2: vmlal.u32 <r0=q7,<v01=d21,<5y34=d25 |
| 3261 +vmlal.u32 q7,d21,d25 |
| 3262 + |
| 3263 +# qhasm: r1[0,1] += v01[0] unsigned* y12[0]; r1[2,3] += v01[1] unsigned* y12[
1] |
| 3264 +# asm 1: vmlal.u32 <r1=reg128#15,<v01=reg128#11%bot,<y12=reg128#2%bot |
| 3265 +# asm 2: vmlal.u32 <r1=q14,<v01=d20,<y12=d2 |
| 3266 +vmlal.u32 q14,d20,d2 |
| 3267 + |
| 3268 +# qhasm: r1[0,1] += v01[2] unsigned* y0[0]; r1[2,3] += v01[3] unsigned* y0[1] |
| 3269 +# asm 1: vmlal.u32 <r1=reg128#15,<v01=reg128#11%top,<y0=reg128#1%bot |
| 3270 +# asm 2: vmlal.u32 <r1=q14,<v01=d21,<y0=d0 |
| 3271 +vmlal.u32 q14,d21,d0 |
| 3272 + |
| 3273 +# qhasm: r1[0,1] += v23[0] unsigned* 5y34[2]; r1[2,3] += v23[1] unsigned* 5y3
4[3] |
| 3274 +# asm 1: vmlal.u32 <r1=reg128#15,<v23=reg128#10%bot,<5y34=reg128#13%top |
| 3275 +# asm 2: vmlal.u32 <r1=q14,<v23=d18,<5y34=d25 |
| 3276 +vmlal.u32 q14,d18,d25 |
| 3277 + |
| 3278 +# qhasm: r1[0,1] += v23[2] unsigned* 5y34[0]; r1[2,3] += v23[3] unsigned* 5y34[
1] |
| 3279 +# asm 1: vmlal.u32 <r1=reg128#15,<v23=reg128#10%top,<5y34=reg128#13%bot |
| 3280 +# asm 2: vmlal.u32 <r1=q14,<v23=d19,<5y34=d24 |
| 3281 +vmlal.u32 q14,d19,d24 |
| 3282 + |
| 3283 +# qhasm: r1[0,1] += v4[0] unsigned* 5y12[2]; r1[2,3] += v4[1] unsigned* 5y12[
3] |
| 3284 +# asm 1: vmlal.u32 <r1=reg128#15,<v4=reg128#4%bot,<5y12=reg128#12%top |
| 3285 +# asm 2: vmlal.u32 <r1=q14,<v4=d6,<5y12=d23 |
| 3286 +vmlal.u32 q14,d6,d23 |
| 3287 + |
| 3288 +# qhasm: r2[0,1] += v01[0] unsigned* y12[2]; r2[2,3] += v01[1] unsigned* y12[
3] |
| 3289 +# asm 1: vmlal.u32 <r2=reg128#14,<v01=reg128#11%bot,<y12=reg128#2%top |
| 3290 +# asm 2: vmlal.u32 <r2=q13,<v01=d20,<y12=d3 |
| 3291 +vmlal.u32 q13,d20,d3 |
| 3292 + |
| 3293 +# qhasm: r2[0,1] += v01[2] unsigned* y12[0]; r2[2,3] += v01[3] unsigned* y12[
1] |
| 3294 +# asm 1: vmlal.u32 <r2=reg128#14,<v01=reg128#11%top,<y12=reg128#2%bot |
| 3295 +# asm 2: vmlal.u32 <r2=q13,<v01=d21,<y12=d2 |
| 3296 +vmlal.u32 q13,d21,d2 |
| 3297 + |
| 3298 +# qhasm: r2[0,1] += v23[0] unsigned* y0[0]; r2[2,3] += v23[1] unsigned* y0[1] |
| 3299 +# asm 1: vmlal.u32 <r2=reg128#14,<v23=reg128#10%bot,<y0=reg128#1%bot |
| 3300 +# asm 2: vmlal.u32 <r2=q13,<v23=d18,<y0=d0 |
| 3301 +vmlal.u32 q13,d18,d0 |
| 3302 + |
| 3303 +# qhasm: r2[0,1] += v23[2] unsigned* 5y34[2]; r2[2,3] += v23[3] unsigned* 5y3
4[3] |
| 3304 +# asm 1: vmlal.u32 <r2=reg128#14,<v23=reg128#10%top,<5y34=reg128#13%top |
| 3305 +# asm 2: vmlal.u32 <r2=q13,<v23=d19,<5y34=d25 |
| 3306 +vmlal.u32 q13,d19,d25 |
| 3307 + |
| 3308 +# qhasm: r2[0,1] += v4[0] unsigned* 5y34[0]; r2[2,3] += v4[1] unsigned* 5y34[
1] |
| 3309 +# asm 1: vmlal.u32 <r2=reg128#14,<v4=reg128#4%bot,<5y34=reg128#13%bot |
| 3310 +# asm 2: vmlal.u32 <r2=q13,<v4=d6,<5y34=d24 |
| 3311 +vmlal.u32 q13,d6,d24 |
| 3312 + |
| 3313 +# qhasm: ptr = &two24 |
| 3314 +# asm 1: lea >ptr=int32#3,<two24=stack128#1 |
| 3315 +# asm 2: lea >ptr=r2,<two24=[sp,#0] |
| 3316 +add r2,sp,#0 |
| 3317 + |
| 3318 +# qhasm: 2x t1 = r0 unsigned>> 26 |
| 3319 +# asm 1: vshr.u64 >t1=reg128#4,<r0=reg128#8,#26 |
| 3320 +# asm 2: vshr.u64 >t1=q3,<r0=q7,#26 |
| 3321 +vshr.u64 q3,q7,#26 |
| 3322 + |
| 3323 +# qhasm: len -= 64 |
| 3324 +# asm 1: sub >len=int32#4,<len=int32#4,#64 |
| 3325 +# asm 2: sub >len=r3,<len=r3,#64 |
| 3326 +sub r3,r3,#64 |
| 3327 + |
| 3328 +# qhasm: r0 &= mask |
| 3329 +# asm 1: vand >r0=reg128#6,<r0=reg128#8,<mask=reg128#7 |
| 3330 +# asm 2: vand >r0=q5,<r0=q7,<mask=q6 |
| 3331 +vand q5,q7,q6 |
| 3332 + |
| 3333 +# qhasm: 2x r1 += t1 |
| 3334 +# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#15,<t1=reg128#4 |
| 3335 +# asm 2: vadd.i64 >r1=q3,<r1=q14,<t1=q3 |
| 3336 +vadd.i64 q3,q14,q3 |
| 3337 + |
| 3338 +# qhasm: 2x t4 = r3 unsigned>> 26 |
| 3339 +# asm 1: vshr.u64 >t4=reg128#8,<r3=reg128#5,#26 |
| 3340 +# asm 2: vshr.u64 >t4=q7,<r3=q4,#26 |
| 3341 +vshr.u64 q7,q4,#26 |
| 3342 + |
| 3343 +# qhasm: r3 &= mask |
| 3344 +# asm 1: vand >r3=reg128#5,<r3=reg128#5,<mask=reg128#7 |
| 3345 +# asm 2: vand >r3=q4,<r3=q4,<mask=q6 |
| 3346 +vand q4,q4,q6 |
| 3347 + |
| 3348 +# qhasm: 2x x4 = r4 + t4 |
| 3349 +# asm 1: vadd.i64 >x4=reg128#8,<r4=reg128#16,<t4=reg128#8 |
| 3350 +# asm 2: vadd.i64 >x4=q7,<r4=q15,<t4=q7 |
| 3351 +vadd.i64 q7,q15,q7 |
| 3352 + |
| 3353 +# qhasm: r4 aligned= mem128[ptr] |
| 3354 +# asm 1: vld1.8 {>r4=reg128#16%bot->r4=reg128#16%top},[<ptr=int32#3,: 128] |
| 3355 +# asm 2: vld1.8 {>r4=d30->r4=d31},[<ptr=r2,: 128] |
| 3356 +vld1.8 {d30-d31},[r2,: 128] |
| 3357 + |
| 3358 +# qhasm: 2x t2 = r1 unsigned>> 26 |
| 3359 +# asm 1: vshr.u64 >t2=reg128#9,<r1=reg128#4,#26 |
| 3360 +# asm 2: vshr.u64 >t2=q8,<r1=q3,#26 |
| 3361 +vshr.u64 q8,q3,#26 |
| 3362 + |
| 3363 +# qhasm: r1 &= mask |
| 3364 +# asm 1: vand >r1=reg128#4,<r1=reg128#4,<mask=reg128#7 |
| 3365 +# asm 2: vand >r1=q3,<r1=q3,<mask=q6 |
| 3366 +vand q3,q3,q6 |
| 3367 + |
| 3368 +# qhasm: 2x t0 = x4 unsigned>> 26 |
| 3369 +# asm 1: vshr.u64 >t0=reg128#10,<x4=reg128#8,#26 |
| 3370 +# asm 2: vshr.u64 >t0=q9,<x4=q7,#26 |
| 3371 +vshr.u64 q9,q7,#26 |
| 3372 + |
| 3373 +# qhasm: 2x r2 += t2 |
| 3374 +# asm 1: vadd.i64 >r2=reg128#9,<r2=reg128#14,<t2=reg128#9 |
| 3375 +# asm 2: vadd.i64 >r2=q8,<r2=q13,<t2=q8 |
| 3376 +vadd.i64 q8,q13,q8 |
| 3377 + |
| 3378 +# qhasm: x4 &= mask |
| 3379 +# asm 1: vand >x4=reg128#11,<x4=reg128#8,<mask=reg128#7 |
| 3380 +# asm 2: vand >x4=q10,<x4=q7,<mask=q6 |
| 3381 +vand q10,q7,q6 |
| 3382 + |
| 3383 +# qhasm: 2x x01 = r0 + t0 |
| 3384 +# asm 1: vadd.i64 >x01=reg128#6,<r0=reg128#6,<t0=reg128#10 |
| 3385 +# asm 2: vadd.i64 >x01=q5,<r0=q5,<t0=q9 |
| 3386 +vadd.i64 q5,q5,q9 |
| 3387 + |
| 3388 +# qhasm: r0 aligned= mem128[ptr] |
| 3389 +# asm 1: vld1.8 {>r0=reg128#8%bot->r0=reg128#8%top},[<ptr=int32#3,: 128] |
| 3390 +# asm 2: vld1.8 {>r0=d14->r0=d15},[<ptr=r2,: 128] |
| 3391 +vld1.8 {d14-d15},[r2,: 128] |
| 3392 + |
| 3393 +# qhasm: ptr = &z34_stack |
| 3394 +# asm 1: lea >ptr=int32#3,<z34_stack=stack128#9 |
| 3395 +# asm 2: lea >ptr=r2,<z34_stack=[sp,#128] |
| 3396 +add r2,sp,#128 |
| 3397 + |
| 3398 +# qhasm: 2x t0 <<= 2 |
| 3399 +# asm 1: vshl.i64 >t0=reg128#10,<t0=reg128#10,#2 |
| 3400 +# asm 2: vshl.i64 >t0=q9,<t0=q9,#2 |
| 3401 +vshl.i64 q9,q9,#2 |
| 3402 + |
| 3403 +# qhasm: 2x t3 = r2 unsigned>> 26 |
| 3404 +# asm 1: vshr.u64 >t3=reg128#14,<r2=reg128#9,#26 |
| 3405 +# asm 2: vshr.u64 >t3=q13,<r2=q8,#26 |
| 3406 +vshr.u64 q13,q8,#26 |
| 3407 + |
| 3408 +# qhasm: 2x x01 += t0 |
| 3409 +# asm 1: vadd.i64 >x01=reg128#15,<x01=reg128#6,<t0=reg128#10 |
| 3410 +# asm 2: vadd.i64 >x01=q14,<x01=q5,<t0=q9 |
| 3411 +vadd.i64 q14,q5,q9 |
| 3412 + |
| 3413 +# qhasm: z34 aligned= mem128[ptr] |
| 3414 +# asm 1: vld1.8 {>z34=reg128#6%bot->z34=reg128#6%top},[<ptr=int32#3,: 128] |
| 3415 +# asm 2: vld1.8 {>z34=d10->z34=d11},[<ptr=r2,: 128] |
| 3416 +vld1.8 {d10-d11},[r2,: 128] |
| 3417 + |
| 3418 +# qhasm: x23 = r2 & mask |
| 3419 +# asm 1: vand >x23=reg128#10,<r2=reg128#9,<mask=reg128#7 |
| 3420 +# asm 2: vand >x23=q9,<r2=q8,<mask=q6 |
| 3421 +vand q9,q8,q6 |
| 3422 + |
| 3423 +# qhasm: 2x r3 += t3 |
| 3424 +# asm 1: vadd.i64 >r3=reg128#5,<r3=reg128#5,<t3=reg128#14 |
| 3425 +# asm 2: vadd.i64 >r3=q4,<r3=q4,<t3=q13 |
| 3426 +vadd.i64 q4,q4,q13 |
| 3427 + |
| 3428 +# qhasm: input_2
+= 32 |
| 3429 +# asm 1: add >input_2=int32#2,<input_2=int32#2,#32 |
| 3430 +# asm 2: add >input_2=r1,<input_2=r1,#32 |
| 3431 +add r1,r1,#32 |
| 3432 + |
| 3433 +# qhasm: 2x t1 = x01 unsigned>> 26 |
| 3434 +# asm 1: vshr.u64 >t1=reg128#14,<x01=reg128#15,#26 |
| 3435 +# asm 2: vshr.u64 >t1=q13,<x01=q14,#26 |
| 3436 +vshr.u64 q13,q14,#26 |
| 3437 + |
| 3438 +# qhasm: x23 = x23[0,2,1,3] |
| 3439 +# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top |
| 3440 +# asm 2: vtrn.32 <x23=d18,<x23=d19 |
| 3441 +vtrn.32 d18,d19 |
| 3442 + |
| 3443 +# qhasm: x01 = x01 & mask |
| 3444 +# asm 1: vand >x01=reg128#9,<x01=reg128#15,<mask=reg128#7 |
| 3445 +# asm 2: vand >x01=q8,<x01=q14,<mask=q6 |
| 3446 +vand q8,q14,q6 |
| 3447 + |
| 3448 +# qhasm: 2x r1 += t1 |
| 3449 +# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#4,<t1=reg128#14 |
| 3450 +# asm 2: vadd.i64 >r1=q3,<r1=q3,<t1=q13 |
| 3451 +vadd.i64 q3,q3,q13 |
| 3452 + |
| 3453 +# qhasm: 2x t4 = r3 unsigned>> 26 |
| 3454 +# asm 1: vshr.u64 >t4=reg128#14,<r3=reg128#5,#26 |
| 3455 +# asm 2: vshr.u64 >t4=q13,<r3=q4,#26 |
| 3456 +vshr.u64 q13,q4,#26 |
| 3457 + |
| 3458 +# qhasm: x01 = x01[0,2,1,3] |
| 3459 +# asm 1: vtrn.32 <x01=reg128#9%bot,<x01=reg128#9%top |
| 3460 +# asm 2: vtrn.32 <x01=d16,<x01=d17 |
| 3461 +vtrn.32 d16,d17 |
| 3462 + |
| 3463 +# qhasm: r3 &= mask |
| 3464 +# asm 1: vand >r3=reg128#5,<r3=reg128#5,<mask=reg128#7 |
| 3465 +# asm 2: vand >r3=q4,<r3=q4,<mask=q6 |
| 3466 +vand q4,q4,q6 |
| 3467 + |
| 3468 +# qhasm: r1 = r1[0,2,1,3] |
| 3469 +# asm 1: vtrn.32 <r1=reg128#4%bot,<r1=reg128#4%top |
| 3470 +# asm 2: vtrn.32 <r1=d6,<r1=d7 |
| 3471 +vtrn.32 d6,d7 |
| 3472 + |
| 3473 +# qhasm: 2x x4 += t4 |
| 3474 +# asm 1: vadd.i64 >x4=reg128#11,<x4=reg128#11,<t4=reg128#14 |
| 3475 +# asm 2: vadd.i64 >x4=q10,<x4=q10,<t4=q13 |
| 3476 +vadd.i64 q10,q10,q13 |
| 3477 + |
| 3478 +# qhasm: r3 = r3[0,2,1,3] |
| 3479 +# asm 1: vtrn.32 <r3=reg128#5%bot,<r3=reg128#5%top |
| 3480 +# asm 2: vtrn.32 <r3=d8,<r3=d9 |
| 3481 +vtrn.32 d8,d9 |
| 3482 + |
| 3483 +# qhasm: x01 = x01[0,1] r1[0,1] |
| 3484 +# asm 1: vext.32 <x01=reg128#9%top,<r1=reg128#4%bot,<r1=reg128#4%bot,#0 |
| 3485 +# asm 2: vext.32 <x01=d17,<r1=d6,<r1=d6,#0 |
| 3486 +vext.32 d17,d6,d6,#0 |
| 3487 + |
| 3488 +# qhasm: x23 = x23[0,1] r3[0,1] |
| 3489 +# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#5%bot,<r3=reg128#5%bot,#0 |
| 3490 +# asm 2: vext.32 <x23=d19,<r3=d8,<r3=d8,#0 |
| 3491 +vext.32 d19,d8,d8,#0 |
| 3492 + |
| 3493 +# qhasm: x4 = x4[0,2,1,3] |
| 3494 +# asm 1: vtrn.32 <x4=reg128#11%bot,<x4=reg128#11%top |
| 3495 +# asm 2: vtrn.32 <x4=d20,<x4=d21 |
| 3496 +vtrn.32 d20,d21 |
| 3497 + |
| 3498 +# qhasm: unsigned>? len - 64 |
| 3499 +# asm 1: cmp <len=int32#4,#64 |
| 3500 +# asm 2: cmp <len=r3,#64 |
| 3501 +cmp r3,#64 |
| 3502 + |
| 3503 +# qhasm: goto mainloop2 if unsigned> |
| 3504 +bhi ._mainloop2 |
| 3505 + |
| 3506 +# qhasm: input_2 -= 32 |
| 3507 +# asm 1: sub >input_2=int32#3,<input_2=int32#2,#32 |
| 3508 +# asm 2: sub >input_2=r2,<input_2=r1,#32 |
| 3509 +sub r2,r1,#32 |
| 3510 + |
| 3511 +# qhasm: below64bytes: |
| 3512 +._below64bytes: |
| 3513 + |
| 3514 +# qhasm: unsigned>? len - 32 |
| 3515 +# asm 1: cmp <len=int32#4,#32 |
| 3516 +# asm 2: cmp <len=r3,#32 |
| 3517 +cmp r3,#32 |
| 3518 + |
| 3519 +# qhasm: goto end if !unsigned> |
| 3520 +bls ._end |
| 3521 + |
| 3522 +# qhasm: mainloop: |
| 3523 +._mainloop: |
| 3524 + |
| 3525 +# qhasm: new r0 |
| 3526 + |
| 3527 +# qhasm: ptr = &two24 |
| 3528 +# asm 1: lea >ptr=int32#2,<two24=stack128#1 |
| 3529 +# asm 2: lea >ptr=r1,<two24=[sp,#0] |
| 3530 +add r1,sp,#0 |
| 3531 + |
| 3532 +# qhasm: r4 aligned= mem128[ptr] |
| 3533 +# asm 1: vld1.8 {>r4=reg128#5%bot->r4=reg128#5%top},[<ptr=int32#2,: 128] |
| 3534 +# asm 2: vld1.8 {>r4=d8->r4=d9},[<ptr=r1,: 128] |
| 3535 +vld1.8 {d8-d9},[r1,: 128] |
| 3536 + |
| 3537 +# qhasm: u4 aligned= mem128[ptr] |
| 3538 +# asm 1: vld1.8 {>u4=reg128#6%bot->u4=reg128#6%top},[<ptr=int32#2,: 128] |
| 3539 +# asm 2: vld1.8 {>u4=d10->u4=d11},[<ptr=r1,: 128] |
| 3540 +vld1.8 {d10-d11},[r1,: 128] |
| 3541 + |
| 3542 +# qhasm: c01 = mem128[input_2];input_2+=16 |
| 3543 +# asm 1: vld1.8 {>c01=reg128#8%bot->c01=reg128#8%top},[<input_2=int32#3]! |
| 3544 +# asm 2: vld1.8 {>c01=d14->c01=d15},[<input_2=r2]! |
| 3545 +vld1.8 {d14-d15},[r2]! |
| 3546 + |
| 3547 +# qhasm: r4[0,1] += x01[0] unsigned* y34[2]; r4[2,3] += x01[1] unsigned* y34
[3] |
| 3548 +# asm 1: vmlal.u32 <r4=reg128#5,<x01=reg128#9%bot,<y34=reg128#3%top |
| 3549 +# asm 2: vmlal.u32 <r4=q4,<x01=d16,<y34=d5 |
| 3550 +vmlal.u32 q4,d16,d5 |
| 3551 + |
| 3552 +# qhasm: c23 = mem128[input_2];input_2+=16 |
| 3553 +# asm 1: vld1.8 {>c23=reg128#14%bot->c23=reg128#14%top},[<input_2=int32#3]! |
| 3554 +# asm 2: vld1.8 {>c23=d26->c23=d27},[<input_2=r2]! |
| 3555 +vld1.8 {d26-d27},[r2]! |
| 3556 + |
| 3557 +# qhasm: r4[0,1] += x01[2] unsigned* y34[0]; r4[2,3] += x01[3] unsigned* y34[1
] |
| 3558 +# asm 1: vmlal.u32 <r4=reg128#5,<x01=reg128#9%top,<y34=reg128#3%bot |
| 3559 +# asm 2: vmlal.u32 <r4=q4,<x01=d17,<y34=d4 |
| 3560 +vmlal.u32 q4,d17,d4 |
| 3561 + |
| 3562 +# qhasm: r0 = u4[1]c01[0]r0[2,3] |
| 3563 +# asm 1: vext.32 <r0=reg128#4%bot,<u4=reg128#6%bot,<c01=reg128#8%bot,#1 |
| 3564 +# asm 2: vext.32 <r0=d6,<u4=d10,<c01=d14,#1 |
| 3565 +vext.32 d6,d10,d14,#1 |
| 3566 + |
| 3567 +# qhasm: r4[0,1] += x23[0] unsigned* y12[2]; r4[2,3] += x23[1] unsigned* y12[3
] |
| 3568 +# asm 1: vmlal.u32 <r4=reg128#5,<x23=reg128#10%bot,<y12=reg128#2%top |
| 3569 +# asm 2: vmlal.u32 <r4=q4,<x23=d18,<y12=d3 |
| 3570 +vmlal.u32 q4,d18,d3 |
| 3571 + |
| 3572 +# qhasm: r0 = r0[0,1]u4[1]c23[0] |
| 3573 +# asm 1: vext.32 <r0=reg128#4%top,<u4=reg128#6%bot,<c23=reg128#14%bot,#1 |
| 3574 +# asm 2: vext.32 <r0=d7,<u4=d10,<c23=d26,#1 |
| 3575 +vext.32 d7,d10,d26,#1 |
| 3576 + |
| 3577 +# qhasm: r4[0,1] += x23[2] unsigned* y12[0]; r4[2,3] += x23[3] unsigned* y12[1
] |
| 3578 +# asm 1: vmlal.u32 <r4=reg128#5,<x23=reg128#10%top,<y12=reg128#2%bot |
| 3579 +# asm 2: vmlal.u32 <r4=q4,<x23=d19,<y12=d2 |
| 3580 +vmlal.u32 q4,d19,d2 |
| 3581 + |
| 3582 +# qhasm: r0 = r0[1]r0[0]r0[3]r0[2] |
| 3583 +# asm 1: vrev64.i32 >r0=reg128#4,<r0=reg128#4 |
| 3584 +# asm 2: vrev64.i32 >r0=q3,<r0=q3 |
| 3585 +vrev64.i32 q3,q3 |
| 3586 + |
| 3587 +# qhasm: r4[0,1] += x4[0] unsigned* y0[0]; r4[2,3] += x4[1] unsigned* y0[1] |
| 3588 +# asm 1: vmlal.u32 <r4=reg128#5,<x4=reg128#11%bot,<y0=reg128#1%bot |
| 3589 +# asm 2: vmlal.u32 <r4=q4,<x4=d20,<y0=d0 |
| 3590 +vmlal.u32 q4,d20,d0 |
| 3591 + |
| 3592 +# qhasm: r0[0,1] += x4[0] unsigned* 5y12[0]; r0[2,3] += x4[1] unsigned* 5y12[
1] |
| 3593 +# asm 1: vmlal.u32 <r0=reg128#4,<x4=reg128#11%bot,<5y12=reg128#12%bot |
| 3594 +# asm 2: vmlal.u32 <r0=q3,<x4=d20,<5y12=d22 |
| 3595 +vmlal.u32 q3,d20,d22 |
| 3596 + |
| 3597 +# qhasm: r0[0,1] += x23[0] unsigned* 5y34[0]; r0[2,3] += x23[1] unsigned* 5y34[
1] |
| 3598 +# asm 1: vmlal.u32 <r0=reg128#4,<x23=reg128#10%bot,<5y34=reg128#13%bot |
| 3599 +# asm 2: vmlal.u32 <r0=q3,<x23=d18,<5y34=d24 |
| 3600 +vmlal.u32 q3,d18,d24 |
| 3601 + |
| 3602 +# qhasm: r0[0,1] += x23[2] unsigned* 5y12[2]; r0[2,3] += x23[3] unsigned* 5y12[
3] |
| 3603 +# asm 1: vmlal.u32 <r0=reg128#4,<x23=reg128#10%top,<5y12=reg128#12%top |
| 3604 +# asm 2: vmlal.u32 <r0=q3,<x23=d19,<5y12=d23 |
| 3605 +vmlal.u32 q3,d19,d23 |
| 3606 + |
| 3607 +# qhasm: c01 c23 = c01[0]c23[0]c01[2]c23[2]c01[1]c23[1]c01[3]c23[3] |
| 3608 +# asm 1: vtrn.32 <c01=reg128#8,<c23=reg128#14 |
| 3609 +# asm 2: vtrn.32 <c01=q7,<c23=q13 |
| 3610 +vtrn.32 q7,q13 |
| 3611 + |
| 3612 +# qhasm: r0[0,1] += x01[0] unsigned* y0[0]; r0[2,3] += x01[1] unsigned* y0[1] |
| 3613 +# asm 1: vmlal.u32 <r0=reg128#4,<x01=reg128#9%bot,<y0=reg128#1%bot |
| 3614 +# asm 2: vmlal.u32 <r0=q3,<x01=d16,<y0=d0 |
| 3615 +vmlal.u32 q3,d16,d0 |
| 3616 + |
| 3617 +# qhasm: r3[0,1] = c23[2]<<18; r3[2,3] = c23[3]<<18 |
| 3618 +# asm 1: vshll.u32 >r3=reg128#6,<c23=reg128#14%top,#18 |
| 3619 +# asm 2: vshll.u32 >r3=q5,<c23=d27,#18 |
| 3620 +vshll.u32 q5,d27,#18 |
| 3621 + |
| 3622 +# qhasm: r0[0,1] += x01[2] unsigned* 5y34[2]; r0[2,3] += x01[3] unsigned* 5y3
4[3] |
| 3623 +# asm 1: vmlal.u32 <r0=reg128#4,<x01=reg128#9%top,<5y34=reg128#13%top |
| 3624 +# asm 2: vmlal.u32 <r0=q3,<x01=d17,<5y34=d25 |
| 3625 +vmlal.u32 q3,d17,d25 |
| 3626 + |
| 3627 +# qhasm: r3[0,1] += x01[0] unsigned* y34[0]; r3[2,3] += x01[1] unsigned* y34[
1] |
| 3628 +# asm 1: vmlal.u32 <r3=reg128#6,<x01=reg128#9%bot,<y34=reg128#3%bot |
| 3629 +# asm 2: vmlal.u32 <r3=q5,<x01=d16,<y34=d4 |
| 3630 +vmlal.u32 q5,d16,d4 |
| 3631 + |
| 3632 +# qhasm: r3[0,1] += x01[2] unsigned* y12[2]; r3[2,3] += x01[3] unsigned* y12[
3] |
| 3633 +# asm 1: vmlal.u32 <r3=reg128#6,<x01=reg128#9%top,<y12=reg128#2%top |
| 3634 +# asm 2: vmlal.u32 <r3=q5,<x01=d17,<y12=d3 |
| 3635 +vmlal.u32 q5,d17,d3 |
| 3636 + |
| 3637 +# qhasm: r3[0,1] += x23[0] unsigned* y12[0]; r3[2,3] += x23[1] unsigned* y12[
1] |
| 3638 +# asm 1: vmlal.u32 <r3=reg128#6,<x23=reg128#10%bot,<y12=reg128#2%bot |
| 3639 +# asm 2: vmlal.u32 <r3=q5,<x23=d18,<y12=d2 |
| 3640 +vmlal.u32 q5,d18,d2 |
| 3641 + |
| 3642 +# qhasm: r3[0,1] += x23[2] unsigned* y0[0]; r3[2,3] += x23[3] unsigned* y0[1] |
| 3643 +# asm 1: vmlal.u32 <r3=reg128#6,<x23=reg128#10%top,<y0=reg128#1%bot |
| 3644 +# asm 2: vmlal.u32 <r3=q5,<x23=d19,<y0=d0 |
| 3645 +vmlal.u32 q5,d19,d0 |
| 3646 + |
| 3647 +# qhasm: r1[0,1] = c23[0]<<6; r1[2,3] = c23[1]<<6 |
| 3648 +# asm 1: vshll.u32 >r1=reg128#14,<c23=reg128#14%bot,#6 |
| 3649 +# asm 2: vshll.u32 >r1=q13,<c23=d26,#6 |
| 3650 +vshll.u32 q13,d26,#6 |
| 3651 + |
| 3652 +# qhasm: r3[0,1] += x4[0] unsigned* 5y34[2]; r3[2,3] += x4[1] unsigned* 5y3
4[3] |
| 3653 +# asm 1: vmlal.u32 <r3=reg128#6,<x4=reg128#11%bot,<5y34=reg128#13%top |
| 3654 +# asm 2: vmlal.u32 <r3=q5,<x4=d20,<5y34=d25 |
| 3655 +vmlal.u32 q5,d20,d25 |
| 3656 + |
| 3657 +# qhasm: r1[0,1] += x01[0] unsigned* y12[0]; r1[2,3] += x01[1] unsigned* y12[
1] |
| 3658 +# asm 1: vmlal.u32 <r1=reg128#14,<x01=reg128#9%bot,<y12=reg128#2%bot |
| 3659 +# asm 2: vmlal.u32 <r1=q13,<x01=d16,<y12=d2 |
| 3660 +vmlal.u32 q13,d16,d2 |
| 3661 + |
| 3662 +# qhasm: r1[0,1] += x01[2] unsigned* y0[0]; r1[2,3] += x01[3] unsigned* y0[1] |
| 3663 +# asm 1: vmlal.u32 <r1=reg128#14,<x01=reg128#9%top,<y0=reg128#1%bot |
| 3664 +# asm 2: vmlal.u32 <r1=q13,<x01=d17,<y0=d0 |
| 3665 +vmlal.u32 q13,d17,d0 |
| 3666 + |
| 3667 +# qhasm: r1[0,1] += x23[0] unsigned* 5y34[2]; r1[2,3] += x23[1] unsigned* 5y3
4[3] |
| 3668 +# asm 1: vmlal.u32 <r1=reg128#14,<x23=reg128#10%bot,<5y34=reg128#13%top |
| 3669 +# asm 2: vmlal.u32 <r1=q13,<x23=d18,<5y34=d25 |
| 3670 +vmlal.u32 q13,d18,d25 |
| 3671 + |
| 3672 +# qhasm: r1[0,1] += x23[2] unsigned* 5y34[0]; r1[2,3] += x23[3] unsigned* 5y34[
1] |
| 3673 +# asm 1: vmlal.u32 <r1=reg128#14,<x23=reg128#10%top,<5y34=reg128#13%bot |
| 3674 +# asm 2: vmlal.u32 <r1=q13,<x23=d19,<5y34=d24 |
| 3675 +vmlal.u32 q13,d19,d24 |
| 3676 + |
| 3677 +# qhasm: r2[0,1] = c01[2]<<12; r2[2,3] = c01[3]<<12 |
| 3678 +# asm 1: vshll.u32 >r2=reg128#8,<c01=reg128#8%top,#12 |
| 3679 +# asm 2: vshll.u32 >r2=q7,<c01=d15,#12 |
| 3680 +vshll.u32 q7,d15,#12 |
| 3681 + |
| 3682 +# qhasm: r1[0,1] += x4[0] unsigned* 5y12[2]; r1[2,3] += x4[1] unsigned* 5y12[
3] |
| 3683 +# asm 1: vmlal.u32 <r1=reg128#14,<x4=reg128#11%bot,<5y12=reg128#12%top |
| 3684 +# asm 2: vmlal.u32 <r1=q13,<x4=d20,<5y12=d23 |
| 3685 +vmlal.u32 q13,d20,d23 |
| 3686 + |
| 3687 +# qhasm: r2[0,1] += x01[0] unsigned* y12[2]; r2[2,3] += x01[1] unsigned* y12[
3] |
| 3688 +# asm 1: vmlal.u32 <r2=reg128#8,<x01=reg128#9%bot,<y12=reg128#2%top |
| 3689 +# asm 2: vmlal.u32 <r2=q7,<x01=d16,<y12=d3 |
| 3690 +vmlal.u32 q7,d16,d3 |
| 3691 + |
| 3692 +# qhasm: r2[0,1] += x01[2] unsigned* y12[0]; r2[2,3] += x01[3] unsigned* y12[
1] |
| 3693 +# asm 1: vmlal.u32 <r2=reg128#8,<x01=reg128#9%top,<y12=reg128#2%bot |
| 3694 +# asm 2: vmlal.u32 <r2=q7,<x01=d17,<y12=d2 |
| 3695 +vmlal.u32 q7,d17,d2 |
| 3696 + |
| 3697 +# qhasm: r2[0,1] += x23[0] unsigned* y0[0]; r2[2,3] += x23[1] unsigned* y0[1] |
| 3698 +# asm 1: vmlal.u32 <r2=reg128#8,<x23=reg128#10%bot,<y0=reg128#1%bot |
| 3699 +# asm 2: vmlal.u32 <r2=q7,<x23=d18,<y0=d0 |
| 3700 +vmlal.u32 q7,d18,d0 |
| 3701 + |
| 3702 +# qhasm: r2[0,1] += x23[2] unsigned* 5y34[2]; r2[2,3] += x23[3] unsigned* 5y3
4[3] |
| 3703 +# asm 1: vmlal.u32 <r2=reg128#8,<x23=reg128#10%top,<5y34=reg128#13%top |
| 3704 +# asm 2: vmlal.u32 <r2=q7,<x23=d19,<5y34=d25 |
| 3705 +vmlal.u32 q7,d19,d25 |
| 3706 + |
| 3707 +# qhasm: r2[0,1] += x4[0] unsigned* 5y34[0]; r2[2,3] += x4[1] unsigned* 5y34[
1] |
| 3708 +# asm 1: vmlal.u32 <r2=reg128#8,<x4=reg128#11%bot,<5y34=reg128#13%bot |
| 3709 +# asm 2: vmlal.u32 <r2=q7,<x4=d20,<5y34=d24 |
| 3710 +vmlal.u32 q7,d20,d24 |
| 3711 + |
| 3712 +# qhasm: 2x t1 = r0 unsigned>> 26 |
| 3713 +# asm 1: vshr.u64 >t1=reg128#9,<r0=reg128#4,#26 |
| 3714 +# asm 2: vshr.u64 >t1=q8,<r0=q3,#26 |
| 3715 +vshr.u64 q8,q3,#26 |
| 3716 + |
| 3717 +# qhasm: r0 &= mask |
| 3718 +# asm 1: vand >r0=reg128#4,<r0=reg128#4,<mask=reg128#7 |
| 3719 +# asm 2: vand >r0=q3,<r0=q3,<mask=q6 |
| 3720 +vand q3,q3,q6 |
| 3721 + |
| 3722 +# qhasm: 2x r1 += t1 |
| 3723 +# asm 1: vadd.i64 >r1=reg128#9,<r1=reg128#14,<t1=reg128#9 |
| 3724 +# asm 2: vadd.i64 >r1=q8,<r1=q13,<t1=q8 |
| 3725 +vadd.i64 q8,q13,q8 |
| 3726 + |
| 3727 +# qhasm: 2x t4 = r3 unsigned>> 26 |
| 3728 +# asm 1: vshr.u64 >t4=reg128#10,<r3=reg128#6,#26 |
| 3729 +# asm 2: vshr.u64 >t4=q9,<r3=q5,#26 |
| 3730 +vshr.u64 q9,q5,#26 |
| 3731 + |
| 3732 +# qhasm: r3 &= mask |
| 3733 +# asm 1: vand >r3=reg128#6,<r3=reg128#6,<mask=reg128#7 |
| 3734 +# asm 2: vand >r3=q5,<r3=q5,<mask=q6 |
| 3735 +vand q5,q5,q6 |
| 3736 + |
| 3737 +# qhasm: 2x r4 += t4 |
| 3738 +# asm 1: vadd.i64 >r4=reg128#5,<r4=reg128#5,<t4=reg128#10 |
| 3739 +# asm 2: vadd.i64 >r4=q4,<r4=q4,<t4=q9 |
| 3740 +vadd.i64 q4,q4,q9 |
| 3741 + |
| 3742 +# qhasm: 2x t2 = r1 unsigned>> 26 |
| 3743 +# asm 1: vshr.u64 >t2=reg128#10,<r1=reg128#9,#26 |
| 3744 +# asm 2: vshr.u64 >t2=q9,<r1=q8,#26 |
| 3745 +vshr.u64 q9,q8,#26 |
| 3746 + |
| 3747 +# qhasm: r1 &= mask |
| 3748 +# asm 1: vand >r1=reg128#11,<r1=reg128#9,<mask=reg128#7 |
| 3749 +# asm 2: vand >r1=q10,<r1=q8,<mask=q6 |
| 3750 +vand q10,q8,q6 |
| 3751 + |
| 3752 +# qhasm: 2x t0 = r4 unsigned>> 26 |
| 3753 +# asm 1: vshr.u64 >t0=reg128#9,<r4=reg128#5,#26 |
| 3754 +# asm 2: vshr.u64 >t0=q8,<r4=q4,#26 |
| 3755 +vshr.u64 q8,q4,#26 |
| 3756 + |
| 3757 +# qhasm: 2x r2 += t2 |
| 3758 +# asm 1: vadd.i64 >r2=reg128#8,<r2=reg128#8,<t2=reg128#10 |
| 3759 +# asm 2: vadd.i64 >r2=q7,<r2=q7,<t2=q9 |
| 3760 +vadd.i64 q7,q7,q9 |
| 3761 + |
| 3762 +# qhasm: r4 &= mask |
| 3763 +# asm 1: vand >r4=reg128#5,<r4=reg128#5,<mask=reg128#7 |
| 3764 +# asm 2: vand >r4=q4,<r4=q4,<mask=q6 |
| 3765 +vand q4,q4,q6 |
| 3766 + |
| 3767 +# qhasm: 2x r0 += t0 |
| 3768 +# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#9 |
| 3769 +# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q8 |
| 3770 +vadd.i64 q3,q3,q8 |
| 3771 + |
| 3772 +# qhasm: 2x t0 <<= 2 |
| 3773 +# asm 1: vshl.i64 >t0=reg128#9,<t0=reg128#9,#2 |
| 3774 +# asm 2: vshl.i64 >t0=q8,<t0=q8,#2 |
| 3775 +vshl.i64 q8,q8,#2 |
| 3776 + |
| 3777 +# qhasm: 2x t3 = r2 unsigned>> 26 |
| 3778 +# asm 1: vshr.u64 >t3=reg128#14,<r2=reg128#8,#26 |
| 3779 +# asm 2: vshr.u64 >t3=q13,<r2=q7,#26 |
| 3780 +vshr.u64 q13,q7,#26 |
| 3781 + |
| 3782 +# qhasm: 2x r0 += t0 |
| 3783 +# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#9 |
| 3784 +# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q8 |
| 3785 +vadd.i64 q3,q3,q8 |
| 3786 + |
| 3787 +# qhasm: x23 = r2 & mask |
| 3788 +# asm 1: vand >x23=reg128#10,<r2=reg128#8,<mask=reg128#7 |
| 3789 +# asm 2: vand >x23=q9,<r2=q7,<mask=q6 |
| 3790 +vand q9,q7,q6 |
| 3791 + |
| 3792 +# qhasm: 2x r3 += t3 |
| 3793 +# asm 1: vadd.i64 >r3=reg128#6,<r3=reg128#6,<t3=reg128#14 |
| 3794 +# asm 2: vadd.i64 >r3=q5,<r3=q5,<t3=q13 |
| 3795 +vadd.i64 q5,q5,q13 |
| 3796 + |
| 3797 +# qhasm: 2x t1 = r0 unsigned>> 26 |
| 3798 +# asm 1: vshr.u64 >t1=reg128#8,<r0=reg128#4,#26 |
| 3799 +# asm 2: vshr.u64 >t1=q7,<r0=q3,#26 |
| 3800 +vshr.u64 q7,q3,#26 |
| 3801 + |
| 3802 +# qhasm: x01 = r0 & mask |
| 3803 +# asm 1: vand >x01=reg128#9,<r0=reg128#4,<mask=reg128#7 |
| 3804 +# asm 2: vand >x01=q8,<r0=q3,<mask=q6 |
| 3805 +vand q8,q3,q6 |
| 3806 + |
| 3807 +# qhasm: 2x r1 += t1 |
| 3808 +# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#11,<t1=reg128#8 |
| 3809 +# asm 2: vadd.i64 >r1=q3,<r1=q10,<t1=q7 |
| 3810 +vadd.i64 q3,q10,q7 |
| 3811 + |
| 3812 +# qhasm: 2x t4 = r3 unsigned>> 26 |
| 3813 +# asm 1: vshr.u64 >t4=reg128#8,<r3=reg128#6,#26 |
| 3814 +# asm 2: vshr.u64 >t4=q7,<r3=q5,#26 |
| 3815 +vshr.u64 q7,q5,#26 |
| 3816 + |
| 3817 +# qhasm: r3 &= mask |
| 3818 +# asm 1: vand >r3=reg128#6,<r3=reg128#6,<mask=reg128#7 |
| 3819 +# asm 2: vand >r3=q5,<r3=q5,<mask=q6 |
| 3820 +vand q5,q5,q6 |
| 3821 + |
| 3822 +# qhasm: 2x x4 = r4 + t4 |
| 3823 +# asm 1: vadd.i64 >x4=reg128#11,<r4=reg128#5,<t4=reg128#8 |
| 3824 +# asm 2: vadd.i64 >x4=q10,<r4=q4,<t4=q7 |
| 3825 +vadd.i64 q10,q4,q7 |
| 3826 + |
| 3827 +# qhasm: len -= 32 |
| 3828 +# asm 1: sub >len=int32#4,<len=int32#4,#32 |
| 3829 +# asm 2: sub >len=r3,<len=r3,#32 |
| 3830 +sub r3,r3,#32 |
| 3831 + |
| 3832 +# qhasm: x01 = x01[0,2,1,3] |
| 3833 +# asm 1: vtrn.32 <x01=reg128#9%bot,<x01=reg128#9%top |
| 3834 +# asm 2: vtrn.32 <x01=d16,<x01=d17 |
| 3835 +vtrn.32 d16,d17 |
| 3836 + |
| 3837 +# qhasm: x23 = x23[0,2,1,3] |
| 3838 +# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top |
| 3839 +# asm 2: vtrn.32 <x23=d18,<x23=d19 |
| 3840 +vtrn.32 d18,d19 |
| 3841 + |
| 3842 +# qhasm: r1 = r1[0,2,1,3] |
| 3843 +# asm 1: vtrn.32 <r1=reg128#4%bot,<r1=reg128#4%top |
| 3844 +# asm 2: vtrn.32 <r1=d6,<r1=d7 |
| 3845 +vtrn.32 d6,d7 |
| 3846 + |
| 3847 +# qhasm: r3 = r3[0,2,1,3] |
| 3848 +# asm 1: vtrn.32 <r3=reg128#6%bot,<r3=reg128#6%top |
| 3849 +# asm 2: vtrn.32 <r3=d10,<r3=d11 |
| 3850 +vtrn.32 d10,d11 |
| 3851 + |
| 3852 +# qhasm: x4 = x4[0,2,1,3] |
| 3853 +# asm 1: vtrn.32 <x4=reg128#11%bot,<x4=reg128#11%top |
| 3854 +# asm 2: vtrn.32 <x4=d20,<x4=d21 |
| 3855 +vtrn.32 d20,d21 |
| 3856 + |
| 3857 +# qhasm: x01 = x01[0,1] r1[0,1] |
| 3858 +# asm 1: vext.32 <x01=reg128#9%top,<r1=reg128#4%bot,<r1=reg128#4%bot,#0 |
| 3859 +# asm 2: vext.32 <x01=d17,<r1=d6,<r1=d6,#0 |
| 3860 +vext.32 d17,d6,d6,#0 |
| 3861 + |
| 3862 +# qhasm: x23 = x23[0,1] r3[0,1] |
| 3863 +# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#6%bot,<r3=reg128#6%bot,#0 |
| 3864 +# asm 2: vext.32 <x23=d19,<r3=d10,<r3=d10,#0 |
| 3865 +vext.32 d19,d10,d10,#0 |
| 3866 + |
| 3867 +# qhasm: unsigned>? len - 32 |
| 3868 +# asm 1: cmp <len=int32#4,#32 |
| 3869 +# asm 2: cmp <len=r3,#32 |
| 3870 +cmp r3,#32 |
| 3871 + |
| 3872 +# qhasm: goto mainloop if unsigned> |
| 3873 +bhi ._mainloop |
| 3874 + |
| 3875 +# qhasm: end: |
| 3876 +._end: |
| 3877 + |
| 3878 +# qhasm: mem128[input_0] = x01;input_0+=16 |
| 3879 +# asm 1: vst1.8 {<x01=reg128#9%bot-<x01=reg128#9%top},[<input_0=int32#1]! |
| 3880 +# asm 2: vst1.8 {<x01=d16-<x01=d17},[<input_0=r0]! |
| 3881 +vst1.8 {d16-d17},[r0]! |
| 3882 + |
| 3883 +# qhasm: mem128[input_0] = x23;input_0+=16 |
| 3884 +# asm 1: vst1.8 {<x23=reg128#10%bot-<x23=reg128#10%top},[<input_0=int32#1]! |
| 3885 +# asm 2: vst1.8 {<x23=d18-<x23=d19},[<input_0=r0]! |
| 3886 +vst1.8 {d18-d19},[r0]! |
| 3887 + |
| 3888 +# qhasm: mem64[input_0] = x4[0] |
| 3889 +# asm 1: vst1.8 <x4=reg128#11%bot,[<input_0=int32#1] |
| 3890 +# asm 2: vst1.8 <x4=d20,[<input_0=r0] |
| 3891 +vst1.8 d20,[r0] |
| 3892 + |
| 3893 +# qhasm: len = len |
| 3894 +# asm 1: mov >len=int32#1,<len=int32#4 |
| 3895 +# asm 2: mov >len=r0,<len=r3 |
| 3896 +mov r0,r3 |
| 3897 + |
| 3898 +# qhasm: qpopreturn len |
| 3899 +mov sp,r12 |
| 3900 +vpop {q4,q5,q6,q7} |
| 3901 +bx lr |
| 3902 + |
| 3903 +# qhasm: int32 input_0 |
| 3904 + |
| 3905 +# qhasm: int32 input_1 |
| 3906 + |
| 3907 +# qhasm: int32 input_2 |
| 3908 + |
| 3909 +# qhasm: int32 input_3 |
| 3910 + |
| 3911 +# qhasm: stack32 input_4 |
| 3912 + |
| 3913 +# qhasm: stack32 input_5 |
| 3914 + |
| 3915 +# qhasm: stack32 input_6 |
| 3916 + |
| 3917 +# qhasm: stack32 input_7 |
| 3918 + |
| 3919 +# qhasm: int32 caller_r4 |
| 3920 + |
| 3921 +# qhasm: int32 caller_r5 |
| 3922 + |
| 3923 +# qhasm: int32 caller_r6 |
| 3924 + |
| 3925 +# qhasm: int32 caller_r7 |
| 3926 + |
| 3927 +# qhasm: int32 caller_r8 |
| 3928 + |
| 3929 +# qhasm: int32 caller_r9 |
| 3930 + |
| 3931 +# qhasm: int32 caller_r10 |
| 3932 + |
| 3933 +# qhasm: int32 caller_r11 |
| 3934 + |
| 3935 +# qhasm: int32 caller_r12 |
| 3936 + |
| 3937 +# qhasm: int32 caller_r14 |
| 3938 + |
| 3939 +# qhasm: reg128 caller_q4 |
| 3940 + |
| 3941 +# qhasm: reg128 caller_q5 |
| 3942 + |
| 3943 +# qhasm: reg128 caller_q6 |
| 3944 + |
| 3945 +# qhasm: reg128 caller_q7 |
| 3946 + |
| 3947 +# qhasm: reg128 r0 |
| 3948 + |
| 3949 +# qhasm: reg128 r1 |
| 3950 + |
| 3951 +# qhasm: reg128 r2 |
| 3952 + |
| 3953 +# qhasm: reg128 r3 |
| 3954 + |
| 3955 +# qhasm: reg128 r4 |
| 3956 + |
| 3957 +# qhasm: reg128 x01 |
| 3958 + |
| 3959 +# qhasm: reg128 x23 |
| 3960 + |
| 3961 +# qhasm: reg128 x4 |
| 3962 + |
| 3963 +# qhasm: reg128 y01 |
| 3964 + |
| 3965 +# qhasm: reg128 y23 |
| 3966 + |
| 3967 +# qhasm: reg128 y4 |
| 3968 + |
| 3969 +# qhasm: reg128 _5y01 |
| 3970 + |
| 3971 +# qhasm: reg128 _5y23 |
| 3972 + |
| 3973 +# qhasm: reg128 _5y4 |
| 3974 + |
| 3975 +# qhasm: reg128 c01 |
| 3976 + |
| 3977 +# qhasm: reg128 c23 |
| 3978 + |
| 3979 +# qhasm: reg128 c4 |
| 3980 + |
| 3981 +# qhasm: reg128 t0 |
| 3982 + |
| 3983 +# qhasm: reg128 t1 |
| 3984 + |
| 3985 +# qhasm: reg128 t2 |
| 3986 + |
| 3987 +# qhasm: reg128 t3 |
| 3988 + |
| 3989 +# qhasm: reg128 t4 |
| 3990 + |
| 3991 +# qhasm: reg128 mask |
| 3992 + |
| 3993 +# qhasm: enter crypto_onetimeauth_poly1305_neon2_addmulmod |
| 3994 +.align 2 |
| 3995 +.global openssl_poly1305_neon2_addmulmod |
| 3996 +.type openssl_poly1305_neon2_addmulmod STT_FUNC |
| 3997 +openssl_poly1305_neon2_addmulmod: |
| 3998 +sub sp,sp,#0 |
| 3999 + |
| 4000 +# qhasm: 2x mask = 0xffffffff |
| 4001 +# asm 1: vmov.i64 >mask=reg128#1,#0xffffffff |
| 4002 +# asm 2: vmov.i64 >mask=q0,#0xffffffff |
| 4003 +vmov.i64 q0,#0xffffffff |
| 4004 + |
| 4005 +# qhasm: y01 aligned= mem128[input_2];input_2+=16 |
| 4006 +# asm 1: vld1.8 {>y01=reg128#2%bot->y01=reg128#2%top},[<input_2=int32#3,: 128]! |
| 4007 +# asm 2: vld1.8 {>y01=d2->y01=d3},[<input_2=r2,: 128]! |
| 4008 +vld1.8 {d2-d3},[r2,: 128]! |
| 4009 + |
| 4010 +# qhasm: 4x _5y01 = y01 << 2 |
| 4011 +# asm 1: vshl.i32 >_5y01=reg128#3,<y01=reg128#2,#2 |
| 4012 +# asm 2: vshl.i32 >_5y01=q2,<y01=q1,#2 |
| 4013 +vshl.i32 q2,q1,#2 |
| 4014 + |
| 4015 +# qhasm: y23 aligned= mem128[input_2];input_2+=16 |
| 4016 +# asm 1: vld1.8 {>y23=reg128#4%bot->y23=reg128#4%top},[<input_2=int32#3,: 128]! |
| 4017 +# asm 2: vld1.8 {>y23=d6->y23=d7},[<input_2=r2,: 128]! |
| 4018 +vld1.8 {d6-d7},[r2,: 128]! |
| 4019 + |
| 4020 +# qhasm: 4x _5y23 = y23 << 2 |
| 4021 +# asm 1: vshl.i32 >_5y23=reg128#9,<y23=reg128#4,#2 |
| 4022 +# asm 2: vshl.i32 >_5y23=q8,<y23=q3,#2 |
| 4023 +vshl.i32 q8,q3,#2 |
| 4024 + |
| 4025 +# qhasm: y4 aligned= mem64[input_2]y4[1] |
| 4026 +# asm 1: vld1.8 {<y4=reg128#10%bot},[<input_2=int32#3,: 64] |
| 4027 +# asm 2: vld1.8 {<y4=d18},[<input_2=r2,: 64] |
| 4028 +vld1.8 {d18},[r2,: 64] |
| 4029 + |
| 4030 +# qhasm: 4x _5y4 = y4 << 2 |
| 4031 +# asm 1: vshl.i32 >_5y4=reg128#11,<y4=reg128#10,#2 |
| 4032 +# asm 2: vshl.i32 >_5y4=q10,<y4=q9,#2 |
| 4033 +vshl.i32 q10,q9,#2 |
| 4034 + |
| 4035 +# qhasm: x01 aligned= mem128[input_1];input_1+=16 |
| 4036 +# asm 1: vld1.8 {>x01=reg128#12%bot->x01=reg128#12%top},[<input_1=int32#2,: 128
]! |
| 4037 +# asm 2: vld1.8 {>x01=d22->x01=d23},[<input_1=r1,: 128]! |
| 4038 +vld1.8 {d22-d23},[r1,: 128]! |
| 4039 + |
| 4040 +# qhasm: 4x _5y01 += y01 |
| 4041 +# asm 1: vadd.i32 >_5y01=reg128#3,<_5y01=reg128#3,<y01=reg128#2 |
| 4042 +# asm 2: vadd.i32 >_5y01=q2,<_5y01=q2,<y01=q1 |
| 4043 +vadd.i32 q2,q2,q1 |
| 4044 + |
| 4045 +# qhasm: x23 aligned= mem128[input_1];input_1+=16 |
| 4046 +# asm 1: vld1.8 {>x23=reg128#13%bot->x23=reg128#13%top},[<input_1=int32#2,: 128
]! |
| 4047 +# asm 2: vld1.8 {>x23=d24->x23=d25},[<input_1=r1,: 128]! |
| 4048 +vld1.8 {d24-d25},[r1,: 128]! |
| 4049 + |
| 4050 +# qhasm: 4x _5y23 += y23 |
| 4051 +# asm 1: vadd.i32 >_5y23=reg128#9,<_5y23=reg128#9,<y23=reg128#4 |
| 4052 +# asm 2: vadd.i32 >_5y23=q8,<_5y23=q8,<y23=q3 |
| 4053 +vadd.i32 q8,q8,q3 |
| 4054 + |
| 4055 +# qhasm: 4x _5y4 += y4 |
| 4056 +# asm 1: vadd.i32 >_5y4=reg128#11,<_5y4=reg128#11,<y4=reg128#10 |
| 4057 +# asm 2: vadd.i32 >_5y4=q10,<_5y4=q10,<y4=q9 |
| 4058 +vadd.i32 q10,q10,q9 |
| 4059 + |
| 4060 +# qhasm: c01 aligned= mem128[input_3];input_3+=16 |
| 4061 +# asm 1: vld1.8 {>c01=reg128#14%bot->c01=reg128#14%top},[<input_3=int32#4,: 128
]! |
| 4062 +# asm 2: vld1.8 {>c01=d26->c01=d27},[<input_3=r3,: 128]! |
| 4063 +vld1.8 {d26-d27},[r3,: 128]! |
| 4064 + |
| 4065 +# qhasm: 4x x01 += c01 |
| 4066 +# asm 1: vadd.i32 >x01=reg128#12,<x01=reg128#12,<c01=reg128#14 |
| 4067 +# asm 2: vadd.i32 >x01=q11,<x01=q11,<c01=q13 |
| 4068 +vadd.i32 q11,q11,q13 |
| 4069 + |
| 4070 +# qhasm: c23 aligned= mem128[input_3];input_3+=16 |
| 4071 +# asm 1: vld1.8 {>c23=reg128#14%bot->c23=reg128#14%top},[<input_3=int32#4,: 128
]! |
| 4072 +# asm 2: vld1.8 {>c23=d26->c23=d27},[<input_3=r3,: 128]! |
| 4073 +vld1.8 {d26-d27},[r3,: 128]! |
| 4074 + |
| 4075 +# qhasm: 4x x23 += c23 |
| 4076 +# asm 1: vadd.i32 >x23=reg128#13,<x23=reg128#13,<c23=reg128#14 |
| 4077 +# asm 2: vadd.i32 >x23=q12,<x23=q12,<c23=q13 |
| 4078 +vadd.i32 q12,q12,q13 |
| 4079 + |
| 4080 +# qhasm: x4 aligned= mem64[input_1]x4[1] |
| 4081 +# asm 1: vld1.8 {<x4=reg128#14%bot},[<input_1=int32#2,: 64] |
| 4082 +# asm 2: vld1.8 {<x4=d26},[<input_1=r1,: 64] |
| 4083 +vld1.8 {d26},[r1,: 64] |
| 4084 + |
| 4085 +# qhasm: 2x mask unsigned>>=6 |
| 4086 +# asm 1: vshr.u64 >mask=reg128#1,<mask=reg128#1,#6 |
| 4087 +# asm 2: vshr.u64 >mask=q0,<mask=q0,#6 |
| 4088 +vshr.u64 q0,q0,#6 |
| 4089 + |
| 4090 +# qhasm: c4 aligned= mem64[input_3]c4[1] |
| 4091 +# asm 1: vld1.8 {<c4=reg128#15%bot},[<input_3=int32#4,: 64] |
| 4092 +# asm 2: vld1.8 {<c4=d28},[<input_3=r3,: 64] |
| 4093 +vld1.8 {d28},[r3,: 64] |
| 4094 + |
| 4095 +# qhasm: 4x x4 += c4 |
| 4096 +# asm 1: vadd.i32 >x4=reg128#14,<x4=reg128#14,<c4=reg128#15 |
| 4097 +# asm 2: vadd.i32 >x4=q13,<x4=q13,<c4=q14 |
| 4098 +vadd.i32 q13,q13,q14 |
| 4099 + |
| 4100 +# qhasm: r0[0,1] = x01[0] unsigned* y01[0]; r0[2,3] = x01[1] unsigned* y01[
1] |
| 4101 +# asm 1: vmull.u32 >r0=reg128#15,<x01=reg128#12%bot,<y01=reg128#2%bot |
| 4102 +# asm 2: vmull.u32 >r0=q14,<x01=d22,<y01=d2 |
| 4103 +vmull.u32 q14,d22,d2 |
| 4104 + |
| 4105 +# qhasm: r0[0,1] += x01[2] unsigned* _5y4[0]; r0[2,3] += x01[3] unsigned* _5y
4[1] |
| 4106 +# asm 1: vmlal.u32 <r0=reg128#15,<x01=reg128#12%top,<_5y4=reg128#11%bot |
| 4107 +# asm 2: vmlal.u32 <r0=q14,<x01=d23,<_5y4=d20 |
| 4108 +vmlal.u32 q14,d23,d20 |
| 4109 + |
| 4110 +# qhasm: r0[0,1] += x23[0] unsigned* _5y23[2]; r0[2,3] += x23[1] unsigned* _5y2
3[3] |
| 4111 +# asm 1: vmlal.u32 <r0=reg128#15,<x23=reg128#13%bot,<_5y23=reg128#9%top |
| 4112 +# asm 2: vmlal.u32 <r0=q14,<x23=d24,<_5y23=d17 |
| 4113 +vmlal.u32 q14,d24,d17 |
| 4114 + |
| 4115 +# qhasm: r0[0,1] += x23[2] unsigned* _5y23[0]; r0[2,3] += x23[3] unsigned* _5y2
3[1] |
| 4116 +# asm 1: vmlal.u32 <r0=reg128#15,<x23=reg128#13%top,<_5y23=reg128#9%bot |
| 4117 +# asm 2: vmlal.u32 <r0=q14,<x23=d25,<_5y23=d16 |
| 4118 +vmlal.u32 q14,d25,d16 |
| 4119 + |
| 4120 +# qhasm: r0[0,1] += x4[0] unsigned* _5y01[2]; r0[2,3] += x4[1] unsigned* _5y0
1[3] |
| 4121 +# asm 1: vmlal.u32 <r0=reg128#15,<x4=reg128#14%bot,<_5y01=reg128#3%top |
| 4122 +# asm 2: vmlal.u32 <r0=q14,<x4=d26,<_5y01=d5 |
| 4123 +vmlal.u32 q14,d26,d5 |
| 4124 + |
| 4125 +# qhasm: r1[0,1] = x01[0] unsigned* y01[2]; r1[2,3] = x01[1] unsigned* y01[
3] |
| 4126 +# asm 1: vmull.u32 >r1=reg128#3,<x01=reg128#12%bot,<y01=reg128#2%top |
| 4127 +# asm 2: vmull.u32 >r1=q2,<x01=d22,<y01=d3 |
| 4128 +vmull.u32 q2,d22,d3 |
| 4129 + |
| 4130 +# qhasm: r1[0,1] += x01[2] unsigned* y01[0]; r1[2,3] += x01[3] unsigned* y01[
1] |
| 4131 +# asm 1: vmlal.u32 <r1=reg128#3,<x01=reg128#12%top,<y01=reg128#2%bot |
| 4132 +# asm 2: vmlal.u32 <r1=q2,<x01=d23,<y01=d2 |
| 4133 +vmlal.u32 q2,d23,d2 |
| 4134 + |
| 4135 +# qhasm: r1[0,1] += x23[0] unsigned* _5y4[0]; r1[2,3] += x23[1] unsigned* _5y
4[1] |
| 4136 +# asm 1: vmlal.u32 <r1=reg128#3,<x23=reg128#13%bot,<_5y4=reg128#11%bot |
| 4137 +# asm 2: vmlal.u32 <r1=q2,<x23=d24,<_5y4=d20 |
| 4138 +vmlal.u32 q2,d24,d20 |
| 4139 + |
| 4140 +# qhasm: r1[0,1] += x23[2] unsigned* _5y23[2]; r1[2,3] += x23[3] unsigned* _5y2
3[3] |
| 4141 +# asm 1: vmlal.u32 <r1=reg128#3,<x23=reg128#13%top,<_5y23=reg128#9%top |
| 4142 +# asm 2: vmlal.u32 <r1=q2,<x23=d25,<_5y23=d17 |
| 4143 +vmlal.u32 q2,d25,d17 |
| 4144 + |
| 4145 +# qhasm: r1[0,1] += x4[0] unsigned* _5y23[0]; r1[2,3] += x4[1] unsigned* _5y2
3[1] |
| 4146 +# asm 1: vmlal.u32 <r1=reg128#3,<x4=reg128#14%bot,<_5y23=reg128#9%bot |
| 4147 +# asm 2: vmlal.u32 <r1=q2,<x4=d26,<_5y23=d16 |
| 4148 +vmlal.u32 q2,d26,d16 |
| 4149 + |
| 4150 +# qhasm: r2[0,1] = x01[0] unsigned* y23[0]; r2[2,3] = x01[1] unsigned* y23[
1] |
| 4151 +# asm 1: vmull.u32 >r2=reg128#16,<x01=reg128#12%bot,<y23=reg128#4%bot |
| 4152 +# asm 2: vmull.u32 >r2=q15,<x01=d22,<y23=d6 |
| 4153 +vmull.u32 q15,d22,d6 |
| 4154 + |
| 4155 +# qhasm: r2[0,1] += x01[2] unsigned* y01[2]; r2[2,3] += x01[3] unsigned* y01[
3] |
| 4156 +# asm 1: vmlal.u32 <r2=reg128#16,<x01=reg128#12%top,<y01=reg128#2%top |
| 4157 +# asm 2: vmlal.u32 <r2=q15,<x01=d23,<y01=d3 |
| 4158 +vmlal.u32 q15,d23,d3 |
| 4159 + |
| 4160 +# qhasm: r2[0,1] += x23[0] unsigned* y01[0]; r2[2,3] += x23[1] unsigned* y01[
1] |
| 4161 +# asm 1: vmlal.u32 <r2=reg128#16,<x23=reg128#13%bot,<y01=reg128#2%bot |
| 4162 +# asm 2: vmlal.u32 <r2=q15,<x23=d24,<y01=d2 |
| 4163 +vmlal.u32 q15,d24,d2 |
| 4164 + |
| 4165 +# qhasm: r2[0,1] += x23[2] unsigned* _5y4[0]; r2[2,3] += x23[3] unsigned* _5y
4[1] |
| 4166 +# asm 1: vmlal.u32 <r2=reg128#16,<x23=reg128#13%top,<_5y4=reg128#11%bot |
| 4167 +# asm 2: vmlal.u32 <r2=q15,<x23=d25,<_5y4=d20 |
| 4168 +vmlal.u32 q15,d25,d20 |
| 4169 + |
| 4170 +# qhasm: r2[0,1] += x4[0] unsigned* _5y23[2]; r2[2,3] += x4[1] unsigned* _5y2
3[3] |
| 4171 +# asm 1: vmlal.u32 <r2=reg128#16,<x4=reg128#14%bot,<_5y23=reg128#9%top |
| 4172 +# asm 2: vmlal.u32 <r2=q15,<x4=d26,<_5y23=d17 |
| 4173 +vmlal.u32 q15,d26,d17 |
| 4174 + |
| 4175 +# qhasm: r3[0,1] = x01[0] unsigned* y23[2]; r3[2,3] = x01[1] unsigned* y23[
3] |
| 4176 +# asm 1: vmull.u32 >r3=reg128#9,<x01=reg128#12%bot,<y23=reg128#4%top |
| 4177 +# asm 2: vmull.u32 >r3=q8,<x01=d22,<y23=d7 |
| 4178 +vmull.u32 q8,d22,d7 |
| 4179 + |
| 4180 +# qhasm: r3[0,1] += x01[2] unsigned* y23[0]; r3[2,3] += x01[3] unsigned* y23[
1] |
| 4181 +# asm 1: vmlal.u32 <r3=reg128#9,<x01=reg128#12%top,<y23=reg128#4%bot |
| 4182 +# asm 2: vmlal.u32 <r3=q8,<x01=d23,<y23=d6 |
| 4183 +vmlal.u32 q8,d23,d6 |
| 4184 + |
| 4185 +# qhasm: r3[0,1] += x23[0] unsigned* y01[2]; r3[2,3] += x23[1] unsigned* y01[
3] |
| 4186 +# asm 1: vmlal.u32 <r3=reg128#9,<x23=reg128#13%bot,<y01=reg128#2%top |
| 4187 +# asm 2: vmlal.u32 <r3=q8,<x23=d24,<y01=d3 |
| 4188 +vmlal.u32 q8,d24,d3 |
| 4189 + |
| 4190 +# qhasm: r3[0,1] += x23[2] unsigned* y01[0]; r3[2,3] += x23[3] unsigned* y01[
1] |
| 4191 +# asm 1: vmlal.u32 <r3=reg128#9,<x23=reg128#13%top,<y01=reg128#2%bot |
| 4192 +# asm 2: vmlal.u32 <r3=q8,<x23=d25,<y01=d2 |
| 4193 +vmlal.u32 q8,d25,d2 |
| 4194 + |
| 4195 +# qhasm: r3[0,1] += x4[0] unsigned* _5y4[0]; r3[2,3] += x4[1] unsigned* _5y
4[1] |
| 4196 +# asm 1: vmlal.u32 <r3=reg128#9,<x4=reg128#14%bot,<_5y4=reg128#11%bot |
| 4197 +# asm 2: vmlal.u32 <r3=q8,<x4=d26,<_5y4=d20 |
| 4198 +vmlal.u32 q8,d26,d20 |
| 4199 + |
| 4200 +# qhasm: r4[0,1] = x01[0] unsigned* y4[0]; r4[2,3] = x01[1] unsigned* y4[1
] |
| 4201 +# asm 1: vmull.u32 >r4=reg128#10,<x01=reg128#12%bot,<y4=reg128#10%bot |
| 4202 +# asm 2: vmull.u32 >r4=q9,<x01=d22,<y4=d18 |
| 4203 +vmull.u32 q9,d22,d18 |
| 4204 + |
| 4205 +# qhasm: r4[0,1] += x01[2] unsigned* y23[2]; r4[2,3] += x01[3] unsigned* y23[3
] |
| 4206 +# asm 1: vmlal.u32 <r4=reg128#10,<x01=reg128#12%top,<y23=reg128#4%top |
| 4207 +# asm 2: vmlal.u32 <r4=q9,<x01=d23,<y23=d7 |
| 4208 +vmlal.u32 q9,d23,d7 |
| 4209 + |
| 4210 +# qhasm: r4[0,1] += x23[0] unsigned* y23[0]; r4[2,3] += x23[1] unsigned* y23[1
] |
| 4211 +# asm 1: vmlal.u32 <r4=reg128#10,<x23=reg128#13%bot,<y23=reg128#4%bot |
| 4212 +# asm 2: vmlal.u32 <r4=q9,<x23=d24,<y23=d6 |
| 4213 +vmlal.u32 q9,d24,d6 |
| 4214 + |
| 4215 +# qhasm: r4[0,1] += x23[2] unsigned* y01[2]; r4[2,3] += x23[3] unsigned* y01[3
] |
| 4216 +# asm 1: vmlal.u32 <r4=reg128#10,<x23=reg128#13%top,<y01=reg128#2%top |
| 4217 +# asm 2: vmlal.u32 <r4=q9,<x23=d25,<y01=d3 |
| 4218 +vmlal.u32 q9,d25,d3 |
| 4219 + |
| 4220 +# qhasm: r4[0,1] += x4[0] unsigned* y01[0]; r4[2,3] += x4[1] unsigned* y01[1
] |
| 4221 +# asm 1: vmlal.u32 <r4=reg128#10,<x4=reg128#14%bot,<y01=reg128#2%bot |
| 4222 +# asm 2: vmlal.u32 <r4=q9,<x4=d26,<y01=d2 |
| 4223 +vmlal.u32 q9,d26,d2 |
| 4224 + |
| 4225 +# qhasm: 2x t1 = r0 unsigned>> 26 |
| 4226 +# asm 1: vshr.u64 >t1=reg128#2,<r0=reg128#15,#26 |
| 4227 +# asm 2: vshr.u64 >t1=q1,<r0=q14,#26 |
| 4228 +vshr.u64 q1,q14,#26 |
| 4229 + |
| 4230 +# qhasm: r0 &= mask |
| 4231 +# asm 1: vand >r0=reg128#4,<r0=reg128#15,<mask=reg128#1 |
| 4232 +# asm 2: vand >r0=q3,<r0=q14,<mask=q0 |
| 4233 +vand q3,q14,q0 |
| 4234 + |
| 4235 +# qhasm: 2x r1 += t1 |
| 4236 +# asm 1: vadd.i64 >r1=reg128#2,<r1=reg128#3,<t1=reg128#2 |
| 4237 +# asm 2: vadd.i64 >r1=q1,<r1=q2,<t1=q1 |
| 4238 +vadd.i64 q1,q2,q1 |
| 4239 + |
| 4240 +# qhasm: 2x t4 = r3 unsigned>> 26 |
| 4241 +# asm 1: vshr.u64 >t4=reg128#3,<r3=reg128#9,#26 |
| 4242 +# asm 2: vshr.u64 >t4=q2,<r3=q8,#26 |
| 4243 +vshr.u64 q2,q8,#26 |
| 4244 + |
| 4245 +# qhasm: r3 &= mask |
| 4246 +# asm 1: vand >r3=reg128#9,<r3=reg128#9,<mask=reg128#1 |
| 4247 +# asm 2: vand >r3=q8,<r3=q8,<mask=q0 |
| 4248 +vand q8,q8,q0 |
| 4249 + |
| 4250 +# qhasm: 2x r4 += t4 |
| 4251 +# asm 1: vadd.i64 >r4=reg128#3,<r4=reg128#10,<t4=reg128#3 |
| 4252 +# asm 2: vadd.i64 >r4=q2,<r4=q9,<t4=q2 |
| 4253 +vadd.i64 q2,q9,q2 |
| 4254 + |
| 4255 +# qhasm: 2x t2 = r1 unsigned>> 26 |
| 4256 +# asm 1: vshr.u64 >t2=reg128#10,<r1=reg128#2,#26 |
| 4257 +# asm 2: vshr.u64 >t2=q9,<r1=q1,#26 |
| 4258 +vshr.u64 q9,q1,#26 |
| 4259 + |
| 4260 +# qhasm: r1 &= mask |
| 4261 +# asm 1: vand >r1=reg128#2,<r1=reg128#2,<mask=reg128#1 |
| 4262 +# asm 2: vand >r1=q1,<r1=q1,<mask=q0 |
| 4263 +vand q1,q1,q0 |
| 4264 + |
| 4265 +# qhasm: 2x t0 = r4 unsigned>> 26 |
| 4266 +# asm 1: vshr.u64 >t0=reg128#11,<r4=reg128#3,#26 |
| 4267 +# asm 2: vshr.u64 >t0=q10,<r4=q2,#26 |
| 4268 +vshr.u64 q10,q2,#26 |
| 4269 + |
| 4270 +# qhasm: 2x r2 += t2 |
| 4271 +# asm 1: vadd.i64 >r2=reg128#10,<r2=reg128#16,<t2=reg128#10 |
| 4272 +# asm 2: vadd.i64 >r2=q9,<r2=q15,<t2=q9 |
| 4273 +vadd.i64 q9,q15,q9 |
| 4274 + |
| 4275 +# qhasm: r4 &= mask |
| 4276 +# asm 1: vand >r4=reg128#3,<r4=reg128#3,<mask=reg128#1 |
| 4277 +# asm 2: vand >r4=q2,<r4=q2,<mask=q0 |
| 4278 +vand q2,q2,q0 |
| 4279 + |
| 4280 +# qhasm: 2x r0 += t0 |
| 4281 +# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#11 |
| 4282 +# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q10 |
| 4283 +vadd.i64 q3,q3,q10 |
| 4284 + |
| 4285 +# qhasm: 2x t0 <<= 2 |
| 4286 +# asm 1: vshl.i64 >t0=reg128#11,<t0=reg128#11,#2 |
| 4287 +# asm 2: vshl.i64 >t0=q10,<t0=q10,#2 |
| 4288 +vshl.i64 q10,q10,#2 |
| 4289 + |
| 4290 +# qhasm: 2x t3 = r2 unsigned>> 26 |
| 4291 +# asm 1: vshr.u64 >t3=reg128#12,<r2=reg128#10,#26 |
| 4292 +# asm 2: vshr.u64 >t3=q11,<r2=q9,#26 |
| 4293 +vshr.u64 q11,q9,#26 |
| 4294 + |
| 4295 +# qhasm: 2x r0 += t0 |
| 4296 +# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#11 |
| 4297 +# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q10 |
| 4298 +vadd.i64 q3,q3,q10 |
| 4299 + |
| 4300 +# qhasm: x23 = r2 & mask |
| 4301 +# asm 1: vand >x23=reg128#10,<r2=reg128#10,<mask=reg128#1 |
| 4302 +# asm 2: vand >x23=q9,<r2=q9,<mask=q0 |
| 4303 +vand q9,q9,q0 |
| 4304 + |
| 4305 +# qhasm: 2x r3 += t3 |
| 4306 +# asm 1: vadd.i64 >r3=reg128#9,<r3=reg128#9,<t3=reg128#12 |
| 4307 +# asm 2: vadd.i64 >r3=q8,<r3=q8,<t3=q11 |
| 4308 +vadd.i64 q8,q8,q11 |
| 4309 + |
| 4310 +# qhasm: 2x t1 = r0 unsigned>> 26 |
| 4311 +# asm 1: vshr.u64 >t1=reg128#11,<r0=reg128#4,#26 |
| 4312 +# asm 2: vshr.u64 >t1=q10,<r0=q3,#26 |
| 4313 +vshr.u64 q10,q3,#26 |
| 4314 + |
| 4315 +# qhasm: x23 = x23[0,2,1,3] |
| 4316 +# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top |
| 4317 +# asm 2: vtrn.32 <x23=d18,<x23=d19 |
| 4318 +vtrn.32 d18,d19 |
| 4319 + |
| 4320 +# qhasm: x01 = r0 & mask |
| 4321 +# asm 1: vand >x01=reg128#4,<r0=reg128#4,<mask=reg128#1 |
| 4322 +# asm 2: vand >x01=q3,<r0=q3,<mask=q0 |
| 4323 +vand q3,q3,q0 |
| 4324 + |
| 4325 +# qhasm: 2x r1 += t1 |
| 4326 +# asm 1: vadd.i64 >r1=reg128#2,<r1=reg128#2,<t1=reg128#11 |
| 4327 +# asm 2: vadd.i64 >r1=q1,<r1=q1,<t1=q10 |
| 4328 +vadd.i64 q1,q1,q10 |
| 4329 + |
| 4330 +# qhasm: 2x t4 = r3 unsigned>> 26 |
| 4331 +# asm 1: vshr.u64 >t4=reg128#11,<r3=reg128#9,#26 |
| 4332 +# asm 2: vshr.u64 >t4=q10,<r3=q8,#26 |
| 4333 +vshr.u64 q10,q8,#26 |
| 4334 + |
| 4335 +# qhasm: x01 = x01[0,2,1,3] |
| 4336 +# asm 1: vtrn.32 <x01=reg128#4%bot,<x01=reg128#4%top |
| 4337 +# asm 2: vtrn.32 <x01=d6,<x01=d7 |
| 4338 +vtrn.32 d6,d7 |
| 4339 + |
| 4340 +# qhasm: r3 &= mask |
| 4341 +# asm 1: vand >r3=reg128#1,<r3=reg128#9,<mask=reg128#1 |
| 4342 +# asm 2: vand >r3=q0,<r3=q8,<mask=q0 |
| 4343 +vand q0,q8,q0 |
| 4344 + |
| 4345 +# qhasm: r1 = r1[0,2,1,3] |
| 4346 +# asm 1: vtrn.32 <r1=reg128#2%bot,<r1=reg128#2%top |
| 4347 +# asm 2: vtrn.32 <r1=d2,<r1=d3 |
| 4348 +vtrn.32 d2,d3 |
| 4349 + |
| 4350 +# qhasm: 2x x4 = r4 + t4 |
| 4351 +# asm 1: vadd.i64 >x4=reg128#3,<r4=reg128#3,<t4=reg128#11 |
| 4352 +# asm 2: vadd.i64 >x4=q2,<r4=q2,<t4=q10 |
| 4353 +vadd.i64 q2,q2,q10 |
| 4354 + |
| 4355 +# qhasm: r3 = r3[0,2,1,3] |
| 4356 +# asm 1: vtrn.32 <r3=reg128#1%bot,<r3=reg128#1%top |
| 4357 +# asm 2: vtrn.32 <r3=d0,<r3=d1 |
| 4358 +vtrn.32 d0,d1 |
| 4359 + |
| 4360 +# qhasm: x01 = x01[0,1] r1[0,1] |
| 4361 +# asm 1: vext.32 <x01=reg128#4%top,<r1=reg128#2%bot,<r1=reg128#2%bot,#0 |
| 4362 +# asm 2: vext.32 <x01=d7,<r1=d2,<r1=d2,#0 |
| 4363 +vext.32 d7,d2,d2,#0 |
| 4364 + |
| 4365 +# qhasm: x23 = x23[0,1] r3[0,1] |
| 4366 +# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#1%bot,<r3=reg128#1%bot,#0 |
| 4367 +# asm 2: vext.32 <x23=d19,<r3=d0,<r3=d0,#0 |
| 4368 +vext.32 d19,d0,d0,#0 |
| 4369 + |
| 4370 +# qhasm: x4 = x4[0,2,1,3] |
| 4371 +# asm 1: vtrn.32 <x4=reg128#3%bot,<x4=reg128#3%top |
| 4372 +# asm 2: vtrn.32 <x4=d4,<x4=d5 |
| 4373 +vtrn.32 d4,d5 |
| 4374 + |
| 4375 +# qhasm: mem128[input_0] aligned= x01;input_0+=16 |
| 4376 +# asm 1: vst1.8 {<x01=reg128#4%bot-<x01=reg128#4%top},[<input_0=int32#1,: 128]! |
| 4377 +# asm 2: vst1.8 {<x01=d6-<x01=d7},[<input_0=r0,: 128]! |
| 4378 +vst1.8 {d6-d7},[r0,: 128]! |
| 4379 + |
| 4380 +# qhasm: mem128[input_0] aligned= x23;input_0+=16 |
| 4381 +# asm 1: vst1.8 {<x23=reg128#10%bot-<x23=reg128#10%top},[<input_0=int32#1,: 128
]! |
| 4382 +# asm 2: vst1.8 {<x23=d18-<x23=d19},[<input_0=r0,: 128]! |
| 4383 +vst1.8 {d18-d19},[r0,: 128]! |
| 4384 + |
| 4385 +# qhasm: mem64[input_0] aligned= x4[0] |
| 4386 +# asm 1: vst1.8 <x4=reg128#3%bot,[<input_0=int32#1,: 64] |
| 4387 +# asm 2: vst1.8 <x4=d4,[<input_0=r0,: 64] |
| 4388 +vst1.8 d4,[r0,: 64] |
| 4389 + |
| 4390 +# qhasm: return |
| 4391 +add sp,sp,#0 |
| 4392 +bx lr |
| 4393 diff --git a/crypto/poly1305/poly1305_vec.c b/crypto/poly1305/poly1305_vec.c |
| 4394 new file mode 100644 |
| 4395 index 0000000..c546200 |
| 4396 --- /dev/null |
| 4397 +++ b/crypto/poly1305/poly1305_vec.c |
| 4398 @@ -0,0 +1,733 @@ |
| 4399 +/* ==================================================================== |
| 4400 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. |
| 4401 + * |
| 4402 + * Redistribution and use in source and binary forms, with or without |
| 4403 + * modification, are permitted provided that the following conditions |
| 4404 + * are met: |
| 4405 + * |
| 4406 + * 1. Redistributions of source code must retain the above copyright |
| 4407 + * notice, this list of conditions and the following disclaimer. |
| 4408 + * |
| 4409 + * 2. Redistributions in binary form must reproduce the above copyright |
| 4410 + * notice, this list of conditions and the following disclaimer in |
| 4411 + * the documentation and/or other materials provided with the |
| 4412 + * distribution. |
| 4413 + * |
| 4414 + * 3. All advertising materials mentioning features or use of this |
| 4415 + * software must display the following acknowledgment: |
| 4416 + * "This product includes software developed by the OpenSSL Project |
| 4417 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" |
| 4418 + * |
| 4419 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
| 4420 + * endorse or promote products derived from this software without |
| 4421 + * prior written permission. For written permission, please contact |
| 4422 + * licensing@OpenSSL.org. |
| 4423 + * |
| 4424 + * 5. Products derived from this software may not be called "OpenSSL" |
| 4425 + * nor may "OpenSSL" appear in their names without prior written |
| 4426 + * permission of the OpenSSL Project. |
| 4427 + * |
| 4428 + * 6. Redistributions of any form whatsoever must retain the following |
| 4429 + * acknowledgment: |
| 4430 + * "This product includes software developed by the OpenSSL Project |
| 4431 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" |
| 4432 + * |
| 4433 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
| 4434 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 4435 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 4436 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
| 4437 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 4438 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 4439 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 4440 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 4441 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 4442 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 4443 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 4444 + * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 4445 + * ==================================================================== |
| 4446 + */ |
| 4447 + |
| 4448 +/* This implementation of poly1305 is by Andrew Moon |
| 4449 + * (https://github.com/floodyberry/poly1305-donna) and released as public |
| 4450 + * domain. It implements SIMD vectorization based on the algorithm described in |
| 4451 + * http://cr.yp.to/papers.html#neoncrypto. Unrolled to 2 powers, i.e. 64 byte |
| 4452 + * block size |
| 4453 +*/ |
| 4454 + |
| 4455 +#include <emmintrin.h> |
| 4456 +#include <stdint.h> |
| 4457 +#include <openssl/opensslconf.h> |
| 4458 + |
| 4459 +#if !defined(OPENSSL_NO_POLY1305) |
| 4460 + |
| 4461 +#include <openssl/poly1305.h> |
| 4462 + |
| 4463 +#define ALIGN(x) __attribute__((aligned(x))) |
| 4464 +#define INLINE inline |
| 4465 +#define U8TO64_LE(m) (*(uint64_t*)(m)) |
| 4466 +#define U8TO32_LE(m) (*(uint32_t*)(m)) |
| 4467 +#define U64TO8_LE(m,v) (*(uint64_t*)(m)) = v |
| 4468 + |
| 4469 +typedef __m128i xmmi; |
| 4470 +typedef unsigned __int128 uint128_t; |
| 4471 + |
| 4472 +static const uint32_t ALIGN(16) poly1305_x64_sse2_message_mask[4] = |
| 4473 + {(1 << 26) - 1, 0, (1 << 26) - 1, 0}; |
| 4474 +static const uint32_t ALIGN(16) poly1305_x64_sse2_5[4] = {5, 0, 5, 0}; |
| 4475 +static const uint32_t ALIGN(16) poly1305_x64_sse2_1shl128[4] = |
| 4476 + {(1 << 24), 0, (1 << 24), 0}; |
| 4477 + |
| 4478 +static uint128_t INLINE |
| 4479 +add128(uint128_t a, uint128_t b) |
| 4480 + { |
| 4481 + return a + b; |
| 4482 + } |
| 4483 + |
| 4484 +static uint128_t INLINE |
| 4485 +add128_64(uint128_t a, uint64_t b) |
| 4486 + { |
| 4487 + return a + b; |
| 4488 + } |
| 4489 + |
| 4490 +static uint128_t INLINE |
| 4491 +mul64x64_128(uint64_t a, uint64_t b) |
| 4492 + { |
| 4493 + return (uint128_t)a * b; |
| 4494 + } |
| 4495 + |
| 4496 +static uint64_t INLINE |
| 4497 +lo128(uint128_t a) |
| 4498 + { |
| 4499 + return (uint64_t)a; |
| 4500 + } |
| 4501 + |
| 4502 +static uint64_t INLINE |
| 4503 +shr128(uint128_t v, const int shift) |
| 4504 + { |
| 4505 + return (uint64_t)(v >> shift); |
| 4506 + } |
| 4507 + |
| 4508 +static uint64_t INLINE |
| 4509 +shr128_pair(uint64_t hi, uint64_t lo, const int shift) |
| 4510 + { |
| 4511 + return (uint64_t)((((uint128_t)hi << 64) | lo) >> shift); |
| 4512 + } |
| 4513 + |
| 4514 +typedef struct poly1305_power_t |
| 4515 + { |
| 4516 + union |
| 4517 + { |
| 4518 + xmmi v; |
| 4519 + uint64_t u[2]; |
| 4520 + uint32_t d[4]; |
| 4521 + } R20,R21,R22,R23,R24,S21,S22,S23,S24; |
| 4522 + } poly1305_power; |
| 4523 + |
| 4524 +typedef struct poly1305_state_internal_t |
| 4525 + { |
| 4526 + poly1305_power P[2]; /* 288 bytes, top 32 bit halves unused = 144 |
| 4527 + bytes of free storage */ |
| 4528 + union |
| 4529 + { |
| 4530 + xmmi H[5]; /* 80 bytes */ |
| 4531 + uint64_t HH[10]; |
| 4532 + }; |
| 4533 + /* uint64_t r0,r1,r2; [24 bytes] */ |
| 4534 + /* uint64_t pad0,pad1; [16 bytes] */ |
| 4535 + uint64_t started; /* 8 bytes */ |
| 4536 + uint64_t leftover; /* 8 bytes */ |
| 4537 + uint8_t buffer[64]; /* 64 bytes */ |
| 4538 + } poly1305_state_internal; /* 448 bytes total + 63 bytes for |
| 4539 + alignment = 511 bytes raw */ |
| 4540 + |
| 4541 +static poly1305_state_internal INLINE |
| 4542 +*poly1305_aligned_state(poly1305_state *state) |
| 4543 + { |
| 4544 + return (poly1305_state_internal *)(((uint64_t)state + 63) & ~63); |
| 4545 + } |
| 4546 + |
| 4547 +/* copy 0-63 bytes */ |
| 4548 +static void INLINE |
| 4549 +poly1305_block_copy(uint8_t *dst, const uint8_t *src, size_t bytes) |
| 4550 + { |
| 4551 + size_t offset = src - dst; |
| 4552 + if (bytes & 32) |
| 4553 + { |
| 4554 + _mm_storeu_si128((xmmi *)(dst + 0), _mm_loadu_si128((xmmi *)(dst
+ offset + 0))); |
| 4555 + _mm_storeu_si128((xmmi *)(dst + 16), _mm_loadu_si128((xmmi *)(ds
t + offset + 16))); |
| 4556 + dst += 32; |
| 4557 + } |
| 4558 + if (bytes & 16) |
| 4559 + { |
| 4560 + _mm_storeu_si128((xmmi *)dst, |
| 4561 + _mm_loadu_si128((xmmi *)(dst + offset))); |
| 4562 + dst += 16; |
| 4563 + } |
| 4564 + if (bytes & 8) |
| 4565 + { |
| 4566 + *(uint64_t *)dst = *(uint64_t *)(dst + offset); |
| 4567 + dst += 8; |
| 4568 + } |
| 4569 + if (bytes & 4) |
| 4570 + { |
| 4571 + *(uint32_t *)dst = *(uint32_t *)(dst + offset); |
| 4572 + dst += 4; |
| 4573 + } |
| 4574 + if (bytes & 2) |
| 4575 + { |
| 4576 + *(uint16_t *)dst = *(uint16_t *)(dst + offset); |
| 4577 + dst += 2; |
| 4578 + } |
| 4579 + if (bytes & 1) |
| 4580 + { |
| 4581 + *( uint8_t *)dst = *( uint8_t *)(dst + offset); |
| 4582 + } |
| 4583 + } |
| 4584 + |
| 4585 +/* zero 0-15 bytes */ |
| 4586 +static void INLINE |
| 4587 +poly1305_block_zero(uint8_t *dst, size_t bytes) |
| 4588 + { |
| 4589 + if (bytes & 8) { *(uint64_t *)dst = 0; dst += 8; } |
| 4590 + if (bytes & 4) { *(uint32_t *)dst = 0; dst += 4; } |
| 4591 + if (bytes & 2) { *(uint16_t *)dst = 0; dst += 2; } |
| 4592 + if (bytes & 1) { *( uint8_t *)dst = 0; } |
| 4593 + } |
| 4594 + |
| 4595 +static size_t INLINE |
| 4596 +poly1305_min(size_t a, size_t b) |
| 4597 + { |
| 4598 + return (a < b) ? a : b; |
| 4599 + } |
| 4600 + |
| 4601 +void |
| 4602 +CRYPTO_poly1305_init(poly1305_state *state, const unsigned char key[32]) |
| 4603 + { |
| 4604 + poly1305_state_internal *st = poly1305_aligned_state(state); |
| 4605 + poly1305_power *p; |
| 4606 + uint64_t r0,r1,r2; |
| 4607 + uint64_t t0,t1; |
| 4608 + |
| 4609 + /* clamp key */ |
| 4610 + t0 = U8TO64_LE(key + 0); |
| 4611 + t1 = U8TO64_LE(key + 8); |
| 4612 + r0 = t0 & 0xffc0fffffff; t0 >>= 44; t0 |= t1 << 20; |
| 4613 + r1 = t0 & 0xfffffc0ffff; t1 >>= 24; |
| 4614 + r2 = t1 & 0x00ffffffc0f; |
| 4615 + |
| 4616 + /* store r in un-used space of st->P[1] */ |
| 4617 + p = &st->P[1]; |
| 4618 + p->R20.d[1] = (uint32_t)(r0 ); |
| 4619 + p->R20.d[3] = (uint32_t)(r0 >> 32); |
| 4620 + p->R21.d[1] = (uint32_t)(r1 ); |
| 4621 + p->R21.d[3] = (uint32_t)(r1 >> 32); |
| 4622 + p->R22.d[1] = (uint32_t)(r2 ); |
| 4623 + p->R22.d[3] = (uint32_t)(r2 >> 32); |
| 4624 + |
| 4625 + /* store pad */ |
| 4626 + p->R23.d[1] = U8TO32_LE(key + 16); |
| 4627 + p->R23.d[3] = U8TO32_LE(key + 20); |
| 4628 + p->R24.d[1] = U8TO32_LE(key + 24); |
| 4629 + p->R24.d[3] = U8TO32_LE(key + 28); |
| 4630 + |
| 4631 + /* H = 0 */ |
| 4632 + st->H[0] = _mm_setzero_si128(); |
| 4633 + st->H[1] = _mm_setzero_si128(); |
| 4634 + st->H[2] = _mm_setzero_si128(); |
| 4635 + st->H[3] = _mm_setzero_si128(); |
| 4636 + st->H[4] = _mm_setzero_si128(); |
| 4637 + |
| 4638 + st->started = 0; |
| 4639 + st->leftover = 0; |
| 4640 + } |
| 4641 + |
| 4642 +static void |
| 4643 +poly1305_first_block(poly1305_state_internal *st, const uint8_t *m) |
| 4644 + { |
| 4645 + const xmmi MMASK = |
| 4646 + _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask); |
| 4647 + const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5); |
| 4648 + const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128); |
| 4649 + xmmi T5,T6; |
| 4650 + poly1305_power *p; |
| 4651 + uint128_t d[3]; |
| 4652 + uint64_t r0,r1,r2; |
| 4653 + uint64_t r20,r21,r22,s22; |
| 4654 + uint64_t pad0,pad1; |
| 4655 + uint64_t c; |
| 4656 + uint64_t i; |
| 4657 + |
| 4658 + /* pull out stored info */ |
| 4659 + p = &st->P[1]; |
| 4660 + |
| 4661 + r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; |
| 4662 + r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; |
| 4663 + r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; |
| 4664 + pad0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1]; |
| 4665 + pad1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1]; |
| 4666 + |
| 4667 + /* compute powers r^2,r^4 */ |
| 4668 + r20 = r0; |
| 4669 + r21 = r1; |
| 4670 + r22 = r2; |
| 4671 + for (i = 0; i < 2; i++) |
| 4672 + { |
| 4673 + s22 = r22 * (5 << 2); |
| 4674 + |
| 4675 + d[0] = add128(mul64x64_128(r20, r20), mul64x64_128(r21 * 2, s22)
); |
| 4676 + d[1] = add128(mul64x64_128(r22, s22), mul64x64_128(r20 * 2, r21)
); |
| 4677 + d[2] = add128(mul64x64_128(r21, r21), mul64x64_128(r22 * 2, r20)
); |
| 4678 + |
| 4679 + r20 = lo128(d[0]) & 0xfffffffffff; c
= shr128(d[0], 44); |
| 4680 + d[1] = add128_64(d[1], c); r21 = lo128(d[1]) & 0xfffffffffff; c
= shr128(d[1], 44); |
| 4681 + d[2] = add128_64(d[2], c); r22 = lo128(d[2]) & 0x3ffffffffff; c
= shr128(d[2], 42); |
| 4682 + r20 += c * 5; c = (r20 >> 44); r20 = r20 & 0xfffffffffff; |
| 4683 + r21 += c; |
| 4684 + |
| 4685 + p->R20.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)( r20
) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); |
| 4686 + p->R21.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r20 >
> 26) | (r21 << 18)) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); |
| 4687 + p->R22.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >
> 8) ) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); |
| 4688 + p->R23.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >
> 34) | (r22 << 10)) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); |
| 4689 + p->R24.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r22 >
> 16) ) ), _MM_SHUFFLE(1,0,1,0)); |
| 4690 + p->S21.v = _mm_mul_epu32(p->R21.v, FIVE); |
| 4691 + p->S22.v = _mm_mul_epu32(p->R22.v, FIVE); |
| 4692 + p->S23.v = _mm_mul_epu32(p->R23.v, FIVE); |
| 4693 + p->S24.v = _mm_mul_epu32(p->R24.v, FIVE); |
| 4694 + p--; |
| 4695 + } |
| 4696 + |
| 4697 + /* put saved info back */ |
| 4698 + p = &st->P[1]; |
| 4699 + p->R20.d[1] = (uint32_t)(r0 ); |
| 4700 + p->R20.d[3] = (uint32_t)(r0 >> 32); |
| 4701 + p->R21.d[1] = (uint32_t)(r1 ); |
| 4702 + p->R21.d[3] = (uint32_t)(r1 >> 32); |
| 4703 + p->R22.d[1] = (uint32_t)(r2 ); |
| 4704 + p->R22.d[3] = (uint32_t)(r2 >> 32); |
| 4705 + p->R23.d[1] = (uint32_t)(pad0 ); |
| 4706 + p->R23.d[3] = (uint32_t)(pad0 >> 32); |
| 4707 + p->R24.d[1] = (uint32_t)(pad1 ); |
| 4708 + p->R24.d[3] = (uint32_t)(pad1 >> 32); |
| 4709 + |
| 4710 + /* H = [Mx,My] */ |
| 4711 + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi6
4((xmmi *)(m + 16))); |
| 4712 + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi6
4((xmmi *)(m + 24))); |
| 4713 + st->H[0] = _mm_and_si128(MMASK, T5); |
| 4714 + st->H[1] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); |
| 4715 + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); |
| 4716 + st->H[2] = _mm_and_si128(MMASK, T5); |
| 4717 + st->H[3] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); |
| 4718 + st->H[4] = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); |
| 4719 + } |
| 4720 + |
| 4721 +static void |
| 4722 +poly1305_blocks(poly1305_state_internal *st, const uint8_t *m, size_t bytes) |
| 4723 + { |
| 4724 + const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask
); |
| 4725 + const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5); |
| 4726 + const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128); |
| 4727 + |
| 4728 + poly1305_power *p; |
| 4729 + xmmi H0,H1,H2,H3,H4; |
| 4730 + xmmi T0,T1,T2,T3,T4,T5,T6; |
| 4731 + xmmi M0,M1,M2,M3,M4; |
| 4732 + xmmi C1,C2; |
| 4733 + |
| 4734 + H0 = st->H[0]; |
| 4735 + H1 = st->H[1]; |
| 4736 + H2 = st->H[2]; |
| 4737 + H3 = st->H[3]; |
| 4738 + H4 = st->H[4]; |
| 4739 + |
| 4740 + while (bytes >= 64) |
| 4741 + { |
| 4742 + /* H *= [r^4,r^4] */ |
| 4743 + p = &st->P[0]; |
| 4744 + T0 = _mm_mul_epu32(H0, p->R20.v); |
| 4745 + T1 = _mm_mul_epu32(H0, p->R21.v); |
| 4746 + T2 = _mm_mul_epu32(H0, p->R22.v); |
| 4747 + T3 = _mm_mul_epu32(H0, p->R23.v); |
| 4748 + T4 = _mm_mul_epu32(H0, p->R24.v); |
| 4749 + T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 4750 + T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 4751 + T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 4752 + T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 4753 + T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 4754 + T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 4755 + T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 4756 + T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 4757 + T5 = _mm_mul_epu32(H1, p->R23.v);
T4 = _mm_add_epi64(T4, T5); |
| 4758 + T5 = _mm_mul_epu32(H2, p->R22.v);
T4 = _mm_add_epi64(T4, T5); |
| 4759 + T5 = _mm_mul_epu32(H3, p->R21.v);
T4 = _mm_add_epi64(T4, T5); |
| 4760 + T5 = _mm_mul_epu32(H4, p->R20.v);
T4 = _mm_add_epi64(T4, T5); |
| 4761 + |
| 4762 + /* H += [Mx,My]*[r^2,r^2] */ |
| 4763 + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_lo
adl_epi64((xmmi *)(m + 16))); |
| 4764 + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_lo
adl_epi64((xmmi *)(m + 24))); |
| 4765 + M0 = _mm_and_si128(MMASK, T5); |
| 4766 + M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); |
| 4767 + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)
); |
| 4768 + M2 = _mm_and_si128(MMASK, T5); |
| 4769 + M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); |
| 4770 + M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); |
| 4771 + |
| 4772 + p = &st->P[1]; |
| 4773 + T5 = _mm_mul_epu32(M0, p->R20.v); T6 = _mm_mul_epu32(M0, p->R21.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 4774 + T5 = _mm_mul_epu32(M1, p->S24.v); T6 = _mm_mul_epu32(M1, p->R20.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 4775 + T5 = _mm_mul_epu32(M2, p->S23.v); T6 = _mm_mul_epu32(M2, p->S24.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 4776 + T5 = _mm_mul_epu32(M3, p->S22.v); T6 = _mm_mul_epu32(M3, p->S23.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 4777 + T5 = _mm_mul_epu32(M4, p->S21.v); T6 = _mm_mul_epu32(M4, p->S22.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 4778 + T5 = _mm_mul_epu32(M0, p->R22.v); T6 = _mm_mul_epu32(M0, p->R23.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 4779 + T5 = _mm_mul_epu32(M1, p->R21.v); T6 = _mm_mul_epu32(M1, p->R22.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 4780 + T5 = _mm_mul_epu32(M2, p->R20.v); T6 = _mm_mul_epu32(M2, p->R21.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 4781 + T5 = _mm_mul_epu32(M3, p->S24.v); T6 = _mm_mul_epu32(M3, p->R20.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 4782 + T5 = _mm_mul_epu32(M4, p->S23.v); T6 = _mm_mul_epu32(M4, p->S24.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 4783 + T5 = _mm_mul_epu32(M0, p->R24.v);
T4 = _mm_add_epi64(T4, T5); |
| 4784 + T5 = _mm_mul_epu32(M1, p->R23.v);
T4 = _mm_add_epi64(T4, T5); |
| 4785 + T5 = _mm_mul_epu32(M2, p->R22.v);
T4 = _mm_add_epi64(T4, T5); |
| 4786 + T5 = _mm_mul_epu32(M3, p->R21.v);
T4 = _mm_add_epi64(T4, T5); |
| 4787 + T5 = _mm_mul_epu32(M4, p->R20.v);
T4 = _mm_add_epi64(T4, T5); |
| 4788 + |
| 4789 + /* H += [Mx,My] */ |
| 4790 + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 32)), _mm_l
oadl_epi64((xmmi *)(m + 48))); |
| 4791 + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 40)), _mm_l
oadl_epi64((xmmi *)(m + 56))); |
| 4792 + M0 = _mm_and_si128(MMASK, T5); |
| 4793 + M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); |
| 4794 + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)
); |
| 4795 + M2 = _mm_and_si128(MMASK, T5); |
| 4796 + M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); |
| 4797 + M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); |
| 4798 + |
| 4799 + T0 = _mm_add_epi64(T0, M0); |
| 4800 + T1 = _mm_add_epi64(T1, M1); |
| 4801 + T2 = _mm_add_epi64(T2, M2); |
| 4802 + T3 = _mm_add_epi64(T3, M3); |
| 4803 + T4 = _mm_add_epi64(T4, M4); |
| 4804 + |
| 4805 + /* reduce */ |
| 4806 + C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _
mm_and_si128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C
1); T4 = _mm_add_epi64(T4, C2); |
| 4807 + C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _
mm_and_si128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C
1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); |
| 4808 + C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _
mm_and_si128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C
1); T1 = _mm_add_epi64(T1, C2); |
| 4809 + C1 = _mm_srli_epi64(T3, 26); T3 = _
mm_and_si128(T3, MMASK); T4 = _mm_add_epi64(T4, C
1); |
| 4810 + |
| 4811 + /* H = (H*[r^4,r^4] + [Mx,My]*[r^2,r^2] + [Mx,My]) */ |
| 4812 + H0 = T0; |
| 4813 + H1 = T1; |
| 4814 + H2 = T2; |
| 4815 + H3 = T3; |
| 4816 + H4 = T4; |
| 4817 + |
| 4818 + m += 64; |
| 4819 + bytes -= 64; |
| 4820 + } |
| 4821 + |
| 4822 + st->H[0] = H0; |
| 4823 + st->H[1] = H1; |
| 4824 + st->H[2] = H2; |
| 4825 + st->H[3] = H3; |
| 4826 + st->H[4] = H4; |
| 4827 + } |
| 4828 + |
| 4829 +static size_t |
| 4830 +poly1305_combine(poly1305_state_internal *st, const uint8_t *m, size_t bytes) |
| 4831 + { |
| 4832 + const xmmi MMASK = |
| 4833 + _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask); |
| 4834 + const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128); |
| 4835 + const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5); |
| 4836 + |
| 4837 + poly1305_power *p; |
| 4838 + xmmi H0,H1,H2,H3,H4; |
| 4839 + xmmi M0,M1,M2,M3,M4; |
| 4840 + xmmi T0,T1,T2,T3,T4,T5,T6; |
| 4841 + xmmi C1,C2; |
| 4842 + |
| 4843 + uint64_t r0,r1,r2; |
| 4844 + uint64_t t0,t1,t2,t3,t4; |
| 4845 + uint64_t c; |
| 4846 + size_t consumed = 0; |
| 4847 + |
| 4848 + H0 = st->H[0]; |
| 4849 + H1 = st->H[1]; |
| 4850 + H2 = st->H[2]; |
| 4851 + H3 = st->H[3]; |
| 4852 + H4 = st->H[4]; |
| 4853 + |
| 4854 + /* p = [r^2,r^2] */ |
| 4855 + p = &st->P[1]; |
| 4856 + |
| 4857 + if (bytes >= 32) |
| 4858 + { |
| 4859 + /* H *= [r^2,r^2] */ |
| 4860 + T0 = _mm_mul_epu32(H0, p->R20.v); |
| 4861 + T1 = _mm_mul_epu32(H0, p->R21.v); |
| 4862 + T2 = _mm_mul_epu32(H0, p->R22.v); |
| 4863 + T3 = _mm_mul_epu32(H0, p->R23.v); |
| 4864 + T4 = _mm_mul_epu32(H0, p->R24.v); |
| 4865 + T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 4866 + T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 4867 + T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 4868 + T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 4869 + T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 4870 + T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 4871 + T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 4872 + T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 4873 + T5 = _mm_mul_epu32(H1, p->R23.v);
T4 = _mm_add_epi64(T4, T5); |
| 4874 + T5 = _mm_mul_epu32(H2, p->R22.v);
T4 = _mm_add_epi64(T4, T5); |
| 4875 + T5 = _mm_mul_epu32(H3, p->R21.v);
T4 = _mm_add_epi64(T4, T5); |
| 4876 + T5 = _mm_mul_epu32(H4, p->R20.v);
T4 = _mm_add_epi64(T4, T5); |
| 4877 + |
| 4878 + /* H += [Mx,My] */ |
| 4879 + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_lo
adl_epi64((xmmi *)(m + 16))); |
| 4880 + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_lo
adl_epi64((xmmi *)(m + 24))); |
| 4881 + M0 = _mm_and_si128(MMASK, T5); |
| 4882 + M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); |
| 4883 + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)
); |
| 4884 + M2 = _mm_and_si128(MMASK, T5); |
| 4885 + M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); |
| 4886 + M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); |
| 4887 + |
| 4888 + T0 = _mm_add_epi64(T0, M0); |
| 4889 + T1 = _mm_add_epi64(T1, M1); |
| 4890 + T2 = _mm_add_epi64(T2, M2); |
| 4891 + T3 = _mm_add_epi64(T3, M3); |
| 4892 + T4 = _mm_add_epi64(T4, M4); |
| 4893 + |
| 4894 + /* reduce */ |
| 4895 + C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _
mm_and_si128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C
1); T4 = _mm_add_epi64(T4, C2); |
| 4896 + C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _
mm_and_si128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C
1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); |
| 4897 + C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _
mm_and_si128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C
1); T1 = _mm_add_epi64(T1, C2); |
| 4898 + C1 = _mm_srli_epi64(T3, 26); T3 = _
mm_and_si128(T3, MMASK); T4 = _mm_add_epi64(T4, C
1); |
| 4899 + |
| 4900 + /* H = (H*[r^2,r^2] + [Mx,My]) */ |
| 4901 + H0 = T0; |
| 4902 + H1 = T1; |
| 4903 + H2 = T2; |
| 4904 + H3 = T3; |
| 4905 + H4 = T4; |
| 4906 + |
| 4907 + consumed = 32; |
| 4908 + } |
| 4909 + |
| 4910 + /* finalize, H *= [r^2,r] */ |
| 4911 + r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; |
| 4912 + r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; |
| 4913 + r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; |
| 4914 + |
| 4915 + p->R20.d[2] = (uint32_t)( r0 ) & 0x3ffffff; |
| 4916 + p->R21.d[2] = (uint32_t)((r0 >> 26) | (r1 << 18)) & 0x3ffffff; |
| 4917 + p->R22.d[2] = (uint32_t)((r1 >> 8) ) & 0x3ffffff; |
| 4918 + p->R23.d[2] = (uint32_t)((r1 >> 34) | (r2 << 10)) & 0x3ffffff; |
| 4919 + p->R24.d[2] = (uint32_t)((r2 >> 16) ) ; |
| 4920 + p->S21.d[2] = p->R21.d[2] * 5; |
| 4921 + p->S22.d[2] = p->R22.d[2] * 5; |
| 4922 + p->S23.d[2] = p->R23.d[2] * 5; |
| 4923 + p->S24.d[2] = p->R24.d[2] * 5; |
| 4924 + |
| 4925 + /* H *= [r^2,r] */ |
| 4926 + T0 = _mm_mul_epu32(H0, p->R20.v); |
| 4927 + T1 = _mm_mul_epu32(H0, p->R21.v); |
| 4928 + T2 = _mm_mul_epu32(H0, p->R22.v); |
| 4929 + T3 = _mm_mul_epu32(H0, p->R23.v); |
| 4930 + T4 = _mm_mul_epu32(H0, p->R24.v); |
| 4931 + T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20.v); T0 =
_mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 4932 + T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24.v); T0 =
_mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 4933 + T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23.v); T0 =
_mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 4934 + T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22.v); T0 =
_mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); |
| 4935 + T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22.v); T2 =
_mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 4936 + T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21.v); T2 =
_mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 4937 + T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20.v); T2 =
_mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 4938 + T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24.v); T2 =
_mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); |
| 4939 + T5 = _mm_mul_epu32(H1, p->R23.v); T4 =
_mm_add_epi64(T4, T5); |
| 4940 + T5 = _mm_mul_epu32(H2, p->R22.v); T4 =
_mm_add_epi64(T4, T5); |
| 4941 + T5 = _mm_mul_epu32(H3, p->R21.v); T4 =
_mm_add_epi64(T4, T5); |
| 4942 + T5 = _mm_mul_epu32(H4, p->R20.v); T4 =
_mm_add_epi64(T4, T5); |
| 4943 + |
| 4944 + C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _mm_and_s
i128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C1); T4 =
_mm_add_epi64(T4, C2); |
| 4945 + C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _mm_and_s
i128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C1); T0 =
_mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); |
| 4946 + C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _mm_and_s
i128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C1); T1 =
_mm_add_epi64(T1, C2); |
| 4947 + C1 = _mm_srli_epi64(T3, 26); T3 = _mm_and_s
i128(T3, MMASK); T4 = _mm_add_epi64(T4, C1); |
| 4948 + |
| 4949 + /* H = H[0]+H[1] */ |
| 4950 + H0 = _mm_add_epi64(T0, _mm_srli_si128(T0, 8)); |
| 4951 + H1 = _mm_add_epi64(T1, _mm_srli_si128(T1, 8)); |
| 4952 + H2 = _mm_add_epi64(T2, _mm_srli_si128(T2, 8)); |
| 4953 + H3 = _mm_add_epi64(T3, _mm_srli_si128(T3, 8)); |
| 4954 + H4 = _mm_add_epi64(T4, _mm_srli_si128(T4, 8)); |
| 4955 + |
| 4956 + t0 = _mm_cvtsi128_si32(H0) ; c = (t0 >> 26); t0 &= 0x3ffffff; |
| 4957 + t1 = _mm_cvtsi128_si32(H1) + c; c = (t1 >> 26); t1 &= 0x3ffffff; |
| 4958 + t2 = _mm_cvtsi128_si32(H2) + c; c = (t2 >> 26); t2 &= 0x3ffffff; |
| 4959 + t3 = _mm_cvtsi128_si32(H3) + c; c = (t3 >> 26); t3 &= 0x3ffffff; |
| 4960 + t4 = _mm_cvtsi128_si32(H4) + c; c = (t4 >> 26); t4 &= 0x3ffffff; |
| 4961 + t0 = t0 + (c * 5); c = (t0 >> 26); t0 &= 0x3ffffff; |
| 4962 + t1 = t1 + c; |
| 4963 + |
| 4964 + st->HH[0] = ((t0 ) | (t1 << 26) ) & 0xfffffffffffull; |
| 4965 + st->HH[1] = ((t1 >> 18) | (t2 << 8) | (t3 << 34)) & 0xfffffffffffull; |
| 4966 + st->HH[2] = ((t3 >> 10) | (t4 << 16) ) & 0x3ffffffffffull; |
| 4967 + |
| 4968 + return consumed; |
| 4969 + } |
| 4970 + |
| 4971 +void |
| 4972 +CRYPTO_poly1305_update(poly1305_state *state, const unsigned char *m, |
| 4973 + size_t bytes) |
| 4974 + { |
| 4975 + poly1305_state_internal *st = poly1305_aligned_state(state); |
| 4976 + size_t want; |
| 4977 + |
| 4978 + /* need at least 32 initial bytes to start the accelerated branch */ |
| 4979 + if (!st->started) |
| 4980 + { |
| 4981 + if ((st->leftover == 0) && (bytes > 32)) |
| 4982 + { |
| 4983 + poly1305_first_block(st, m); |
| 4984 + m += 32; |
| 4985 + bytes -= 32; |
| 4986 + } |
| 4987 + else |
| 4988 + { |
| 4989 + want = poly1305_min(32 - st->leftover, bytes); |
| 4990 + poly1305_block_copy(st->buffer + st->leftover, m, want); |
| 4991 + bytes -= want; |
| 4992 + m += want; |
| 4993 + st->leftover += want; |
| 4994 + if ((st->leftover < 32) || (bytes == 0)) |
| 4995 + return; |
| 4996 + poly1305_first_block(st, st->buffer); |
| 4997 + st->leftover = 0; |
| 4998 + } |
| 4999 + st->started = 1; |
| 5000 + } |
| 5001 + |
| 5002 + /* handle leftover */ |
| 5003 + if (st->leftover) |
| 5004 + { |
| 5005 + want = poly1305_min(64 - st->leftover, bytes); |
| 5006 + poly1305_block_copy(st->buffer + st->leftover, m, want); |
| 5007 + bytes -= want; |
| 5008 + m += want; |
| 5009 + st->leftover += want; |
| 5010 + if (st->leftover < 64) |
| 5011 + return; |
| 5012 + poly1305_blocks(st, st->buffer, 64); |
| 5013 + st->leftover = 0; |
| 5014 + } |
| 5015 + |
| 5016 + /* process 64 byte blocks */ |
| 5017 + if (bytes >= 64) |
| 5018 + { |
| 5019 + want = (bytes & ~63); |
| 5020 + poly1305_blocks(st, m, want); |
| 5021 + m += want; |
| 5022 + bytes -= want; |
| 5023 + } |
| 5024 + |
| 5025 + if (bytes) |
| 5026 + { |
| 5027 + poly1305_block_copy(st->buffer + st->leftover, m, bytes); |
| 5028 + st->leftover += bytes; |
| 5029 + } |
| 5030 + } |
| 5031 + |
| 5032 +void |
| 5033 +CRYPTO_poly1305_finish(poly1305_state *state, unsigned char mac[16]) |
| 5034 + { |
| 5035 + poly1305_state_internal *st = poly1305_aligned_state(state); |
| 5036 + size_t leftover = st->leftover; |
| 5037 + uint8_t *m = st->buffer; |
| 5038 + uint128_t d[3]; |
| 5039 + uint64_t h0,h1,h2; |
| 5040 + uint64_t t0,t1; |
| 5041 + uint64_t g0,g1,g2,c,nc; |
| 5042 + uint64_t r0,r1,r2,s1,s2; |
| 5043 + poly1305_power *p; |
| 5044 + |
| 5045 + if (st->started) |
| 5046 + { |
| 5047 + size_t consumed = poly1305_combine(st, m, leftover); |
| 5048 + leftover -= consumed; |
| 5049 + m += consumed; |
| 5050 + } |
| 5051 + |
| 5052 + /* st->HH will either be 0 or have the combined result */ |
| 5053 + h0 = st->HH[0]; |
| 5054 + h1 = st->HH[1]; |
| 5055 + h2 = st->HH[2]; |
| 5056 + |
| 5057 + p = &st->P[1]; |
| 5058 + r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; |
| 5059 + r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; |
| 5060 + r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; |
| 5061 + s1 = r1 * (5 << 2); |
| 5062 + s2 = r2 * (5 << 2); |
| 5063 + |
| 5064 + if (leftover < 16) |
| 5065 + goto poly1305_donna_atmost15bytes; |
| 5066 + |
| 5067 +poly1305_donna_atleast16bytes: |
| 5068 + t0 = U8TO64_LE(m + 0); |
| 5069 + t1 = U8TO64_LE(m + 8); |
| 5070 + h0 += t0 & 0xfffffffffff; |
| 5071 + t0 = shr128_pair(t1, t0, 44); |
| 5072 + h1 += t0 & 0xfffffffffff; |
| 5073 + h2 += (t1 >> 24) | ((uint64_t)1 << 40); |
| 5074 + |
| 5075 +poly1305_donna_mul: |
| 5076 + d[0] = add128(add128(mul64x64_128(h0, r0), mul64x64_128(h1, s2)), mul64x
64_128(h2, s1)); |
| 5077 + d[1] = add128(add128(mul64x64_128(h0, r1), mul64x64_128(h1, r0)), mul64x
64_128(h2, s2)); |
| 5078 + d[2] = add128(add128(mul64x64_128(h0, r2), mul64x64_128(h1, r1)), mul64x
64_128(h2, r0)); |
| 5079 + h0 = lo128(d[0]) & 0xfffffffffff; c = shr128(
d[0], 44); |
| 5080 + d[1] = add128_64(d[1], c); h1 = lo128(d[1]) & 0xfffffffffff; c = shr128(
d[1], 44); |
| 5081 + d[2] = add128_64(d[2], c); h2 = lo128(d[2]) & 0x3ffffffffff; c = shr128(
d[2], 42); |
| 5082 + h0 += c * 5; |
| 5083 + |
| 5084 + m += 16; |
| 5085 + leftover -= 16; |
| 5086 + if (leftover >= 16) goto poly1305_donna_atleast16bytes; |
| 5087 + |
| 5088 + /* final bytes */ |
| 5089 +poly1305_donna_atmost15bytes: |
| 5090 + if (!leftover) goto poly1305_donna_finish; |
| 5091 + |
| 5092 + m[leftover++] = 1; |
| 5093 + poly1305_block_zero(m + leftover, 16 - leftover); |
| 5094 + leftover = 16; |
| 5095 + |
| 5096 + t0 = U8TO64_LE(m+0); |
| 5097 + t1 = U8TO64_LE(m+8); |
| 5098 + h0 += t0 & 0xfffffffffff; t0 = shr128_pair(t1, t0, 44); |
| 5099 + h1 += t0 & 0xfffffffffff; |
| 5100 + h2 += (t1 >> 24); |
| 5101 + |
| 5102 + goto poly1305_donna_mul; |
| 5103 + |
| 5104 +poly1305_donna_finish: |
| 5105 + c = (h0 >> 44); h0 &= 0xfffffffffff; |
| 5106 + h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff; |
| 5107 + h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff; |
| 5108 + h0 += c * 5; |
| 5109 + |
| 5110 + g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff; |
| 5111 + g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff; |
| 5112 + g2 = h2 + c - ((uint64_t)1 << 42); |
| 5113 + |
| 5114 + c = (g2 >> 63) - 1; |
| 5115 + nc = ~c; |
| 5116 + h0 = (h0 & nc) | (g0 & c); |
| 5117 + h1 = (h1 & nc) | (g1 & c); |
| 5118 + h2 = (h2 & nc) | (g2 & c); |
| 5119 + |
| 5120 + /* pad */ |
| 5121 + t0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1]; |
| 5122 + t1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1]; |
| 5123 + h0 += (t0 & 0xfffffffffff) ; c = (h0 >> 44); h0 &= 0xfffffffffff; t0
= shr128_pair(t1, t0, 44); |
| 5124 + h1 += (t0 & 0xfffffffffff) + c; c = (h1 >> 44); h1 &= 0xfffffffffff; t1
= (t1 >> 24); |
| 5125 + h2 += (t1 ) + c; |
| 5126 + |
| 5127 + U64TO8_LE(mac + 0, ((h0 ) | (h1 << 44))); |
| 5128 + U64TO8_LE(mac + 8, ((h1 >> 20) | (h2 << 24))); |
| 5129 + } |
| 5130 + |
| 5131 +#endif /* !OPENSSL_NO_POLY1305 */ |
| 5132 diff --git a/crypto/poly1305/poly1305test.c b/crypto/poly1305/poly1305test.c |
| 5133 new file mode 100644 |
| 5134 index 0000000..8dd26af |
| 5135 --- /dev/null |
| 5136 +++ b/crypto/poly1305/poly1305test.c |
| 5137 @@ -0,0 +1,166 @@ |
| 5138 +/* ==================================================================== |
| 5139 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. |
| 5140 + * |
| 5141 + * Redistribution and use in source and binary forms, with or without |
| 5142 + * modification, are permitted provided that the following conditions |
| 5143 + * are met: |
| 5144 + * |
| 5145 + * 1. Redistributions of source code must retain the above copyright |
| 5146 + * notice, this list of conditions and the following disclaimer. |
| 5147 + * |
| 5148 + * 2. Redistributions in binary form must reproduce the above copyright |
| 5149 + * notice, this list of conditions and the following disclaimer in |
| 5150 + * the documentation and/or other materials provided with the |
| 5151 + * distribution. |
| 5152 + * |
| 5153 + * 3. All advertising materials mentioning features or use of this |
| 5154 + * software must display the following acknowledgment: |
| 5155 + * "This product includes software developed by the OpenSSL Project |
| 5156 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" |
| 5157 + * |
| 5158 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
| 5159 + * endorse or promote products derived from this software without |
| 5160 + * prior written permission. For written permission, please contact |
| 5161 + * licensing@OpenSSL.org. |
| 5162 + * |
| 5163 + * 5. Products derived from this software may not be called "OpenSSL" |
| 5164 + * nor may "OpenSSL" appear in their names without prior written |
| 5165 + * permission of the OpenSSL Project. |
| 5166 + * |
| 5167 + * 6. Redistributions of any form whatsoever must retain the following |
| 5168 + * acknowledgment: |
| 5169 + * "This product includes software developed by the OpenSSL Project |
| 5170 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" |
| 5171 + * |
| 5172 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
| 5173 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 5174 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 5175 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
| 5176 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 5177 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 5178 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 5179 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 5180 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
| 5181 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 5182 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
| 5183 + * OF THE POSSIBILITY OF SUCH DAMAGE. |
| 5184 + * ==================================================================== |
| 5185 + */ |
| 5186 + |
| 5187 +#include <stdio.h> |
| 5188 +#include <stdlib.h> |
| 5189 +#include <string.h> |
| 5190 + |
| 5191 +#include <openssl/poly1305.h> |
| 5192 + |
| 5193 +struct poly1305_test |
| 5194 + { |
| 5195 + const char *inputhex; |
| 5196 + const char *keyhex; |
| 5197 + const char *outhex; |
| 5198 + }; |
| 5199 + |
| 5200 +static const struct poly1305_test poly1305_tests[] = { |
| 5201 + { |
| 5202 + "", |
| 5203 + "c8afaac331ee372cd6082de134943b174710130e9f6fea8d72293850a667d86
c", |
| 5204 + "4710130e9f6fea8d72293850a667d86c", |
| 5205 + }, |
| 5206 + { |
| 5207 + "48656c6c6f20776f726c6421", |
| 5208 + "746869732069732033322d62797465206b657920666f7220506f6c793133303
5", |
| 5209 + "a6f745008f81c916a20dcc74eef2b2f0", |
| 5210 + }, |
| 5211 + { |
| 5212 + "000000000000000000000000000000000000000000000000000000000000000
0", |
| 5213 + "746869732069732033322d62797465206b657920666f7220506f6c793133303
5", |
| 5214 + "49ec78090e481ec6c26b33b91ccc0307", |
| 5215 + }, |
| 5216 + { |
| 5217 + "000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000", |
| 5218 + "746869732069732033322d62797465206b657920666f7220506f6c793133303
5", |
| 5219 + "da84bcab02676c38cdb015604274c2aa", |
| 5220 + }, |
| 5221 +}; |
| 5222 + |
| 5223 +static unsigned char hex_digit(char h) |
| 5224 + { |
| 5225 + if (h >= '0' && h <= '9') |
| 5226 + return h - '0'; |
| 5227 + else if (h >= 'a' && h <= 'f') |
| 5228 + return h - 'a' + 10; |
| 5229 + else if (h >= 'A' && h <= 'F') |
| 5230 + return h - 'A' + 10; |
| 5231 + else |
| 5232 + abort(); |
| 5233 + } |
| 5234 + |
| 5235 +static void hex_decode(unsigned char *out, const char* hex) |
| 5236 + { |
| 5237 + size_t j = 0; |
| 5238 + |
| 5239 + while (*hex != 0) |
| 5240 + { |
| 5241 + unsigned char v = hex_digit(*hex++); |
| 5242 + v <<= 4; |
| 5243 + v |= hex_digit(*hex++); |
| 5244 + out[j++] = v; |
| 5245 + } |
| 5246 + } |
| 5247 + |
| 5248 +static void hexdump(unsigned char *a, size_t len) |
| 5249 + { |
| 5250 + size_t i; |
| 5251 + |
| 5252 + for (i = 0; i < len; i++) |
| 5253 + printf("%02x", a[i]); |
| 5254 + } |
| 5255 + |
| 5256 +int main() |
| 5257 + { |
| 5258 + static const unsigned num_tests = |
| 5259 + sizeof(poly1305_tests) / sizeof(struct poly1305_test); |
| 5260 + unsigned i; |
| 5261 + unsigned char key[32], out[16], expected[16]; |
| 5262 + poly1305_state poly1305; |
| 5263 + |
| 5264 + for (i = 0; i < num_tests; i++) |
| 5265 + { |
| 5266 + const struct poly1305_test *test = &poly1305_tests[i]; |
| 5267 + unsigned char *in; |
| 5268 + size_t inlen = strlen(test->inputhex); |
| 5269 + |
| 5270 + if (strlen(test->keyhex) != sizeof(key)*2 || |
| 5271 + strlen(test->outhex) != sizeof(out)*2 || |
| 5272 + (inlen & 1) == 1) |
| 5273 + return 1; |
| 5274 + |
| 5275 + inlen /= 2; |
| 5276 + |
| 5277 + hex_decode(key, test->keyhex); |
| 5278 + hex_decode(expected, test->outhex); |
| 5279 + |
| 5280 + in = malloc(inlen); |
| 5281 + |
| 5282 + hex_decode(in, test->inputhex); |
| 5283 + CRYPTO_poly1305_init(&poly1305, key); |
| 5284 + CRYPTO_poly1305_update(&poly1305, in, inlen); |
| 5285 + CRYPTO_poly1305_finish(&poly1305, out); |
| 5286 + |
| 5287 + if (memcmp(out, expected, sizeof(expected)) != 0) |
| 5288 + { |
| 5289 + printf("Poly1305 test #%d failed.\n", i); |
| 5290 + printf("got: "); |
| 5291 + hexdump(out, sizeof(out)); |
| 5292 + printf("\nexpected: "); |
| 5293 + hexdump(expected, sizeof(expected)); |
| 5294 + printf("\n"); |
| 5295 + return 1; |
| 5296 + } |
| 5297 + |
| 5298 + free(in); |
| 5299 + } |
| 5300 + |
| 5301 + printf("PASS\n"); |
| 5302 + return 0; |
| 5303 + } |
| 5304 diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c |
| 5305 index 75b6560..a042b8d 100644 |
| 5306 --- a/ssl/s3_lib.c |
| 5307 +++ b/ssl/s3_lib.c |
| 5308 @@ -1841,7 +1841,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 5309 SSL_AEAD, |
| 5310 SSL_TLSV1_2, |
| 5311 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 5312 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 5313 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| |
| 5314 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, |
| 5315 128, |
| 5316 128, |
| 5317 }, |
| 5318 @@ -1873,7 +1874,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 5319 SSL_AEAD, |
| 5320 SSL_TLSV1_2, |
| 5321 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 5322 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 5323 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| |
| 5324 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, |
| 5325 128, |
| 5326 128, |
| 5327 }, |
| 5328 @@ -1905,7 +1907,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 5329 SSL_AEAD, |
| 5330 SSL_TLSV1_2, |
| 5331 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 5332 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 5333 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| |
| 5334 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, |
| 5335 128, |
| 5336 128, |
| 5337 }, |
| 5338 @@ -1937,7 +1940,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 5339 SSL_AEAD, |
| 5340 SSL_TLSV1_2, |
| 5341 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 5342 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 5343 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| |
| 5344 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, |
| 5345 128, |
| 5346 128, |
| 5347 }, |
| 5348 @@ -1969,7 +1973,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 5349 SSL_AEAD, |
| 5350 SSL_TLSV1_2, |
| 5351 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 5352 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 5353 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| |
| 5354 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, |
| 5355 128, |
| 5356 128, |
| 5357 }, |
| 5358 @@ -2001,7 +2006,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 5359 SSL_AEAD, |
| 5360 SSL_TLSV1_2, |
| 5361 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 5362 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 5363 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| |
| 5364 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, |
| 5365 128, |
| 5366 128, |
| 5367 }, |
| 5368 @@ -2714,7 +2720,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 5369 SSL_AEAD, |
| 5370 SSL_TLSV1_2, |
| 5371 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 5372 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 5373 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| |
| 5374 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, |
| 5375 128, |
| 5376 128, |
| 5377 }, |
| 5378 @@ -2746,7 +2753,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 5379 SSL_AEAD, |
| 5380 SSL_TLSV1_2, |
| 5381 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 5382 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 5383 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| |
| 5384 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, |
| 5385 128, |
| 5386 128, |
| 5387 }, |
| 5388 @@ -2778,7 +2786,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 5389 SSL_AEAD, |
| 5390 SSL_TLSV1_2, |
| 5391 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 5392 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 5393 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| |
| 5394 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, |
| 5395 128, |
| 5396 128, |
| 5397 }, |
| 5398 @@ -2810,7 +2819,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 5399 SSL_AEAD, |
| 5400 SSL_TLSV1_2, |
| 5401 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, |
| 5402 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), |
| 5403 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| |
| 5404 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, |
| 5405 128, |
| 5406 128, |
| 5407 }, |
| 5408 @@ -2894,6 +2904,51 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ |
| 5409 }, |
| 5410 #endif |
| 5411 |
| 5412 + { |
| 5413 + 1, |
| 5414 + TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305, |
| 5415 + TLS1_CK_ECDHE_RSA_CHACHA20_POLY1305, |
| 5416 + SSL_kEECDH, |
| 5417 + SSL_aRSA, |
| 5418 + SSL_CHACHA20POLY1305, |
| 5419 + SSL_AEAD, |
| 5420 + SSL_TLSV1_2, |
| 5421 + SSL_NOT_EXP|SSL_HIGH, |
| 5422 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(0), |
| 5423 + 256, |
| 5424 + 0, |
| 5425 + }, |
| 5426 + |
| 5427 + { |
| 5428 + 1, |
| 5429 + TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, |
| 5430 + TLS1_CK_ECDHE_ECDSA_CHACHA20_POLY1305, |
| 5431 + SSL_kEECDH, |
| 5432 + SSL_aECDSA, |
| 5433 + SSL_CHACHA20POLY1305, |
| 5434 + SSL_AEAD, |
| 5435 + SSL_TLSV1_2, |
| 5436 + SSL_NOT_EXP|SSL_HIGH, |
| 5437 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(0), |
| 5438 + 256, |
| 5439 + 0, |
| 5440 + }, |
| 5441 + |
| 5442 + { |
| 5443 + 1, |
| 5444 + TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305, |
| 5445 + TLS1_CK_DHE_RSA_CHACHA20_POLY1305, |
| 5446 + SSL_kEDH, |
| 5447 + SSL_aRSA, |
| 5448 + SSL_CHACHA20POLY1305, |
| 5449 + SSL_AEAD, |
| 5450 + SSL_TLSV1_2, |
| 5451 + SSL_NOT_EXP|SSL_HIGH, |
| 5452 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(0), |
| 5453 + 256, |
| 5454 + 0, |
| 5455 + }, |
| 5456 + |
| 5457 /* end of list */ |
| 5458 }; |
| 5459 |
| 5460 diff --git a/ssl/s3_pkt.c b/ssl/s3_pkt.c |
| 5461 index 5038f6c..04b474d 100644 |
| 5462 --- a/ssl/s3_pkt.c |
| 5463 +++ b/ssl/s3_pkt.c |
| 5464 @@ -790,8 +790,11 @@ static int do_ssl3_write(SSL *s, int type, const unsigned c
har *buf, |
| 5465 else |
| 5466 eivlen = 0; |
| 5467 } |
| 5468 - else if (s->aead_write_ctx != NULL) |
| 5469 + else if (s->aead_write_ctx != NULL && |
| 5470 + s->aead_write_ctx->variable_nonce_included_in_record) |
| 5471 + { |
| 5472 eivlen = s->aead_write_ctx->variable_nonce_len; |
| 5473 + } |
| 5474 else |
| 5475 eivlen = 0; |
| 5476 |
| 5477 diff --git a/ssl/ssl.h b/ssl/ssl.h |
| 5478 index 0644cbf..d782a98 100644 |
| 5479 --- a/ssl/ssl.h |
| 5480 +++ b/ssl/ssl.h |
| 5481 @@ -291,6 +291,7 @@ extern "C" { |
| 5482 #define SSL_TXT_CAMELLIA128 "CAMELLIA128" |
| 5483 #define SSL_TXT_CAMELLIA256 "CAMELLIA256" |
| 5484 #define SSL_TXT_CAMELLIA "CAMELLIA" |
| 5485 +#define SSL_TXT_CHACHA20 "CHACHA20" |
| 5486 |
| 5487 #define SSL_TXT_MD5 "MD5" |
| 5488 #define SSL_TXT_SHA1 "SHA1" |
| 5489 diff --git a/ssl/ssl_ciph.c b/ssl/ssl_ciph.c |
| 5490 index 7e780cd..b6370bd 100644 |
| 5491 --- a/ssl/ssl_ciph.c |
| 5492 +++ b/ssl/ssl_ciph.c |
| 5493 @@ -298,6 +298,7 @@ static const SSL_CIPHER cipher_aliases[]={ |
| 5494 {0,SSL_TXT_CAMELLIA128,0,0,0,SSL_CAMELLIA128,0,0,0,0,0,0}, |
| 5495 {0,SSL_TXT_CAMELLIA256,0,0,0,SSL_CAMELLIA256,0,0,0,0,0,0}, |
| 5496 {0,SSL_TXT_CAMELLIA ,0,0,0,SSL_CAMELLIA128|SSL_CAMELLIA256,0,0,0,0,0,0
}, |
| 5497 + {0,SSL_TXT_CHACHA20 ,0,0,0,SSL_CHACHA20POLY1305,0,0,0,0,0,0}, |
| 5498 |
| 5499 /* MAC aliases */ |
| 5500 {0,SSL_TXT_MD5,0, 0,0,0,SSL_MD5, 0,0,0,0,0}, |
| 5501 @@ -523,9 +524,15 @@ int ssl_cipher_get_evp_aead(const SSL_SESSION *s, const EVP
_AEAD **aead) |
| 5502 return 0; |
| 5503 |
| 5504 #ifndef OPENSSL_NO_AES |
| 5505 - /* There is only one AEAD for now. */ |
| 5506 - *aead = EVP_aead_aes_128_gcm(); |
| 5507 - return 1; |
| 5508 + switch (c->algorithm_enc) |
| 5509 + { |
| 5510 + case SSL_AES128GCM: |
| 5511 + *aead = EVP_aead_aes_128_gcm(); |
| 5512 + return 1; |
| 5513 + case SSL_CHACHA20POLY1305: |
| 5514 + *aead = EVP_aead_chacha20_poly1305(); |
| 5515 + return 1; |
| 5516 + } |
| 5517 #endif |
| 5518 |
| 5519 return 0; |
| 5520 @@ -1715,6 +1722,9 @@ char *SSL_CIPHER_description(const SSL_CIPHER *cipher, cha
r *buf, int len) |
| 5521 case SSL_SEED: |
| 5522 enc="SEED(128)"; |
| 5523 break; |
| 5524 + case SSL_CHACHA20POLY1305: |
| 5525 + enc="ChaCha20-Poly1305"; |
| 5526 + break; |
| 5527 default: |
| 5528 enc="unknown"; |
| 5529 break; |
| 5530 diff --git a/ssl/ssl_locl.h b/ssl/ssl_locl.h |
| 5531 index 63bc28b..b83d8cd 100644 |
| 5532 --- a/ssl/ssl_locl.h |
| 5533 +++ b/ssl/ssl_locl.h |
| 5534 @@ -328,6 +328,7 @@ |
| 5535 #define SSL_SEED 0x00000800L |
| 5536 #define SSL_AES128GCM 0x00001000L |
| 5537 #define SSL_AES256GCM 0x00002000L |
| 5538 +#define SSL_CHACHA20POLY1305 0x00004000L |
| 5539 |
| 5540 #define SSL_AES (SSL_AES128|SSL_AES256|SSL_AES128GCM|SSL
_AES256GCM) |
| 5541 #define SSL_CAMELLIA (SSL_CAMELLIA128|SSL_CAMELLIA256) |
| 5542 @@ -389,6 +390,12 @@ |
| 5543 #define SSL_CIPHER_AEAD_FIXED_NONCE_LEN(ssl_cipher) \ |
| 5544 (((ssl_cipher->algorithm2 >> 24) & 0xf)*2) |
| 5545 |
| 5546 +/* SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD is a flag in |
| 5547 + * SSL_CIPHER.algorithm2 which indicates that the variable part of the nonce is |
| 5548 + * included as a prefix of the record. (AES-GCM, for example, does with with an |
| 5549 + * 8-byte variable nonce.) */ |
| 5550 +#define SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD (1<<22) |
| 5551 + |
| 5552 /* |
| 5553 * Export and cipher strength information. For each cipher we have to decide |
| 5554 * whether it is exportable or not. This information is likely to change |
| 5555 @@ -605,6 +612,9 @@ struct ssl_aead_ctx_st |
| 5556 * records. */ |
| 5557 unsigned char fixed_nonce[8]; |
| 5558 unsigned char fixed_nonce_len, variable_nonce_len, tag_len; |
| 5559 + /* variable_nonce_included_in_record is non-zero if the variable nonce |
| 5560 + * for a record is included as a prefix before the ciphertext. */ |
| 5561 + char variable_nonce_included_in_record; |
| 5562 }; |
| 5563 |
| 5564 #ifndef OPENSSL_NO_COMP |
| 5565 diff --git a/ssl/t1_enc.c b/ssl/t1_enc.c |
| 5566 index 7af1a32..15800af 100644 |
| 5567 --- a/ssl/t1_enc.c |
| 5568 +++ b/ssl/t1_enc.c |
| 5569 @@ -366,6 +366,8 @@ static int tls1_change_cipher_state_aead(SSL *s, char is_rea
d, |
| 5570 memcpy(aead_ctx->fixed_nonce, iv, iv_len); |
| 5571 aead_ctx->fixed_nonce_len = iv_len; |
| 5572 aead_ctx->variable_nonce_len = 8; /* always the case, currently. */ |
| 5573 + aead_ctx->variable_nonce_included_in_record = |
| 5574 + (s->s3->tmp.new_cipher->algorithm2 & SSL_CIPHER_ALGORITHM2_VARIA
BLE_NONCE_INCLUDED_IN_RECORD) != 0; |
| 5575 if (aead_ctx->variable_nonce_len + aead_ctx->fixed_nonce_len != EVP_AEAD
_nonce_length(aead)) |
| 5576 { |
| 5577 SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE_AEAD, ERR_R_INTERNAL_ERROR
); |
| 5578 @@ -863,6 +865,7 @@ int tls1_enc(SSL *s, int send) |
| 5579 if (send) |
| 5580 { |
| 5581 size_t len = rec->length; |
| 5582 + size_t eivlen = 0; |
| 5583 in = rec->input; |
| 5584 out = rec->data; |
| 5585 |
| 5586 @@ -878,18 +881,22 @@ int tls1_enc(SSL *s, int send) |
| 5587 * variable nonce. Thus we can copy the sequence number |
| 5588 * bytes into place without overwriting any of the |
| 5589 * plaintext. */ |
| 5590 - memcpy(out, ad, aead->variable_nonce_len); |
| 5591 - len -= aead->variable_nonce_len; |
| 5592 + if (aead->variable_nonce_included_in_record) |
| 5593 + { |
| 5594 + memcpy(out, ad, aead->variable_nonce_len); |
| 5595 + len -= aead->variable_nonce_len; |
| 5596 + eivlen = aead->variable_nonce_len; |
| 5597 + } |
| 5598 |
| 5599 ad[11] = len >> 8; |
| 5600 ad[12] = len & 0xff; |
| 5601 |
| 5602 n = EVP_AEAD_CTX_seal(&aead->ctx, |
| 5603 - out + aead->variable_nonce_len, le
n + aead->tag_len, |
| 5604 + out + eivlen, len + aead->tag_len, |
| 5605 nonce, nonce_used, |
| 5606 - in + aead->variable_nonce_len, len
, |
| 5607 + in + eivlen, len, |
| 5608 ad, sizeof(ad)); |
| 5609 - if (n >= 0) |
| 5610 + if (n >= 0 && aead->variable_nonce_included_in_record) |
| 5611 n += aead->variable_nonce_len; |
| 5612 } |
| 5613 else |
| 5614 @@ -903,12 +910,17 @@ int tls1_enc(SSL *s, int send) |
| 5615 |
| 5616 if (len < aead->variable_nonce_len) |
| 5617 return 0; |
| 5618 - memcpy(nonce + nonce_used, in, aead->variable_nonce_len)
; |
| 5619 + memcpy(nonce + nonce_used, |
| 5620 + aead->variable_nonce_included_in_record ? in : ad
, |
| 5621 + aead->variable_nonce_len); |
| 5622 nonce_used += aead->variable_nonce_len; |
| 5623 |
| 5624 - in += aead->variable_nonce_len; |
| 5625 - len -= aead->variable_nonce_len; |
| 5626 - out += aead->variable_nonce_len; |
| 5627 + if (aead->variable_nonce_included_in_record) |
| 5628 + { |
| 5629 + in += aead->variable_nonce_len; |
| 5630 + len -= aead->variable_nonce_len; |
| 5631 + out += aead->variable_nonce_len; |
| 5632 + } |
| 5633 |
| 5634 if (len < aead->tag_len) |
| 5635 return 0; |
| 5636 diff --git a/ssl/tls1.h b/ssl/tls1.h |
| 5637 index 8cac7df..3cbcb83 100644 |
| 5638 --- a/ssl/tls1.h |
| 5639 +++ b/ssl/tls1.h |
| 5640 @@ -526,6 +526,10 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB
,(void (*)(void))cb) |
| 5641 #define TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256 0x0300C031 |
| 5642 #define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384 0x0300C032 |
| 5643 |
| 5644 +#define TLS1_CK_ECDHE_RSA_CHACHA20_POLY1305 0x0300CC13 |
| 5645 +#define TLS1_CK_ECDHE_ECDSA_CHACHA20_POLY1305 0x0300CC14 |
| 5646 +#define TLS1_CK_DHE_RSA_CHACHA20_POLY1305 0x0300CC15 |
| 5647 + |
| 5648 /* XXX |
| 5649 * Inconsistency alert: |
| 5650 * The OpenSSL names of ciphers with ephemeral DH here include the string |
| 5651 @@ -677,6 +681,10 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB
,(void (*)(void))cb) |
| 5652 #define TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256 "ECDH-RSA-AES128-GCM-SH
A256" |
| 5653 #define TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384 "ECDH-RSA-AES256-GCM-SH
A384" |
| 5654 |
| 5655 +#define TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305 "ECDHE-RSA-CHACHA20-POLY
1305" |
| 5656 +#define TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 "ECDHE-ECDSA-CHACHA20-PO
LY1305" |
| 5657 +#define TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305 "DHE-RSA-CHACHA2
0-POLY1305" |
| 5658 + |
| 5659 #define TLS_CT_RSA_SIGN 1 |
| 5660 #define TLS_CT_DSS_SIGN 2 |
| 5661 #define TLS_CT_RSA_FIXED_DH 3 |
| 5662 diff --git a/test/Makefile b/test/Makefile |
| 5663 index 4c9eabc..4790aa8 100644 |
| 5664 --- a/test/Makefile |
| 5665 +++ b/test/Makefile |
| 5666 @@ -86,7 +86,9 @@ OBJ= $(BNTEST).o $(ECTEST).o $(ECDSATEST).o $(ECDHTEST).o $(
IDEATEST).o \ |
| 5667 $(MDC2TEST).o $(RMDTEST).o \ |
| 5668 $(RANDTEST).o $(DHTEST).o $(ENGINETEST).o $(CASTTEST).o \ |
| 5669 $(BFTEST).o $(SSLTEST).o $(DSATEST).o $(EXPTEST).o $(RSATEST).o \ |
| 5670 - $(EVPTEST).o $(IGETEST).o $(JPAKETEST).o $(ASN1TEST).o |
| 5671 + $(EVPTEST).o $(IGETEST).o $(JPAKETEST).o $(ASN1TEST).o $(CHACHATEST).o \ |
| 5672 + $(POLY1305TEST).o |
| 5673 + |
| 5674 SRC= $(BNTEST).c $(ECTEST).c $(ECDSATEST).c $(ECDHTEST).c $(IDEATEST).c \ |
| 5675 $(MD2TEST).c $(MD4TEST).c $(MD5TEST).c \ |
| 5676 $(HMACTEST).c $(WPTEST).c \ |
| 5677 @@ -94,7 +96,8 @@ SRC= $(BNTEST).c $(ECTEST).c $(ECDSATEST).c $(ECDHTEST).c $(
IDEATEST).c \ |
| 5678 $(DESTEST).c $(SHATEST).c $(SHA1TEST).c $(MDC2TEST).c $(RMDTEST).c \ |
| 5679 $(RANDTEST).c $(DHTEST).c $(ENGINETEST).c $(CASTTEST).c \ |
| 5680 $(BFTEST).c $(SSLTEST).c $(DSATEST).c $(EXPTEST).c $(RSATEST).c \ |
| 5681 - $(EVPTEST).c $(IGETEST).c $(JPAKETEST).c $(SRPTEST).c $(ASN1TEST).c |
| 5682 + $(EVPTEST).c $(IGETEST).c $(JPAKETEST).c $(SRPTEST).c $(ASN1TEST).c \ |
| 5683 + $(CHACHATEST).c $(POLY1305TEST).c |
| 5684 |
| 5685 EXHEADER= |
| 5686 HEADER= $(EXHEADER) |
| 5687 @@ -137,7 +140,7 @@ alltests: \ |
| 5688 test_enc test_x509 test_rsa test_crl test_sid \ |
| 5689 test_gen test_req test_pkcs7 test_verify test_dh test_dsa \ |
| 5690 test_ss test_ca test_engine test_evp test_ssl test_tsa test_ige \ |
| 5691 - test_jpake test_srp test_cms |
| 5692 + test_jpake test_srp test_cms test_chacha test_poly1305 |
| 5693 |
| 5694 test_evp: |
| 5695 ../util/shlib_wrap.sh ./$(EVPTEST) evptests.txt |
| 5696 @@ -318,6 +321,14 @@ test_srp: $(SRPTEST)$(EXE_EXT) |
| 5697 @echo "Test SRP" |
| 5698 ../util/shlib_wrap.sh ./srptest |
| 5699 |
| 5700 +test_chacha: $(CHACHATEST)$(EXE_EXT) |
| 5701 + @echo "Test ChaCha" |
| 5702 + ../util/shlib_wrap.sh ./$(CHACHATEST) |
| 5703 + |
| 5704 +test_poly1305: $(POLY1305TEST)$(EXE_EXT) |
| 5705 + @echo "Test Poly1305" |
| 5706 + ../util/shlib_wrap.sh ./$(POLY1305TEST) |
| 5707 + |
| 5708 lint: |
| 5709 lint -DLINT $(INCLUDES) $(SRC)>fluff |
| 5710 |
| 5711 @@ -394,6 +405,12 @@ $(SHA256TEST)$(EXE_EXT): $(SHA256TEST).o $(DLIBCRYPTO) |
| 5712 $(SHA512TEST)$(EXE_EXT): $(SHA512TEST).o $(DLIBCRYPTO) |
| 5713 @target=$(SHA512TEST); $(BUILD_CMD) |
| 5714 |
| 5715 +$(CHACHATEST)$(EXE_EXT): $(CHACHATEST).o $(DLIBCRYPTO) |
| 5716 + @target=$(CHACHATEST); $(BUILD_CMD) |
| 5717 + |
| 5718 +$(POLY1305TEST)$(EXE_EXT): $(POLY1305TEST).o $(DLIBCRYPTO) |
| 5719 + @target=$(CHACHATEST); $(BUILD_CMD) |
| 5720 + |
| 5721 $(RMDTEST)$(EXE_EXT): $(RMDTEST).o $(DLIBCRYPTO) |
| 5722 @target=$(RMDTEST); $(BUILD_CMD) |
| 5723 |
| 5724 -- |
| 5725 1.8.4.1 |
| 5726 |
OLD | NEW |