| OLD | NEW |
| (Empty) |
| 1 From 2688f00904e4ffd647afcff69bb8fe6df8c5902b Mon Sep 17 00:00:00 2001 | |
| 2 From: Adam Langley <agl@chromium.org> | |
| 3 Date: Mon, 9 Sep 2013 12:13:24 -0400 | |
| 4 Subject: [PATCH 43/52] chacha20poly1305 | |
| 5 | |
| 6 Add support for Chacha20 + Poly1305. | |
| 7 --- | |
| 8 .gitignore | 1 + | |
| 9 Configure | 56 +- | |
| 10 Makefile.org | 6 +- | |
| 11 apps/speed.c | 64 +- | |
| 12 crypto/chacha/Makefile | 80 ++ | |
| 13 crypto/chacha/chacha.h | 85 ++ | |
| 14 crypto/chacha/chacha_enc.c | 167 +++ | |
| 15 crypto/chacha/chacha_vec.c | 345 +++++++ | |
| 16 crypto/chacha/chachatest.c | 211 ++++ | |
| 17 crypto/evp/Makefile | 35 +- | |
| 18 crypto/evp/e_chacha20poly1305.c | 261 +++++ | |
| 19 crypto/evp/evp.h | 8 + | |
| 20 crypto/evp/evp_err.c | 3 + | |
| 21 crypto/poly1305/Makefile | 81 ++ | |
| 22 crypto/poly1305/poly1305.c | 320 ++++++ | |
| 23 crypto/poly1305/poly1305.h | 88 ++ | |
| 24 crypto/poly1305/poly1305_arm.c | 335 ++++++ | |
| 25 crypto/poly1305/poly1305_arm_asm.s | 2009 ++++++++++++++++++++++++++++++++++++ | |
| 26 crypto/poly1305/poly1305_vec.c | 733 +++++++++++++ | |
| 27 crypto/poly1305/poly1305test.c | 166 +++ | |
| 28 ssl/s3_lib.c | 75 +- | |
| 29 ssl/s3_pkt.c | 5 +- | |
| 30 ssl/ssl.h | 1 + | |
| 31 ssl/ssl_ciph.c | 16 +- | |
| 32 ssl/ssl_locl.h | 10 + | |
| 33 ssl/t1_enc.c | 30 +- | |
| 34 ssl/tls1.h | 8 + | |
| 35 test/Makefile | 23 +- | |
| 36 28 files changed, 5166 insertions(+), 56 deletions(-) | |
| 37 create mode 100644 crypto/chacha/Makefile | |
| 38 create mode 100644 crypto/chacha/chacha.h | |
| 39 create mode 100644 crypto/chacha/chacha_enc.c | |
| 40 create mode 100644 crypto/chacha/chacha_vec.c | |
| 41 create mode 100644 crypto/chacha/chachatest.c | |
| 42 create mode 100644 crypto/evp/e_chacha20poly1305.c | |
| 43 create mode 100644 crypto/poly1305/Makefile | |
| 44 create mode 100644 crypto/poly1305/poly1305.c | |
| 45 create mode 100644 crypto/poly1305/poly1305.h | |
| 46 create mode 100644 crypto/poly1305/poly1305_arm.c | |
| 47 create mode 100644 crypto/poly1305/poly1305_arm_asm.s | |
| 48 create mode 100644 crypto/poly1305/poly1305_vec.c | |
| 49 create mode 100644 crypto/poly1305/poly1305test.c | |
| 50 | |
| 51 diff --git a/openssl/ssl/ssl_ciph.c b/openssl/ssl/ssl_ciph.c | |
| 52 index db85b29..cebb18a 100644 | |
| 53 --- a/ssl/ssl_ciph.c | |
| 54 +++ b/ssl/ssl_ciph.c | |
| 55 @@ -1442,7 +1442,9 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_list(const SSL_MET
HOD *ssl_method, | |
| 56 ssl_cipher_apply_rule(0, SSL_kEECDH, 0, 0, 0, 0, 0, CIPHER_ADD, -1, &hea
d, &tail); | |
| 57 ssl_cipher_apply_rule(0, SSL_kEECDH, 0, 0, 0, 0, 0, CIPHER_DEL, -1, &hea
d, &tail); | |
| 58 | |
| 59 - /* AES is our preferred symmetric cipher */ | |
| 60 + /* CHACHA20 is fast and safe on all hardware and is thus our preferred | |
| 61 + * symmetric cipher, with AES second. */ | |
| 62 + ssl_cipher_apply_rule(0, 0, 0, SSL_CHACHA20POLY1305, 0, 0, 0, CIPHER_ADD
, -1, &head, &tail); | |
| 63 ssl_cipher_apply_rule(0, 0, 0, SSL_AES, 0, 0, 0, CIPHER_ADD, -1, &head,
&tail); | |
| 64 | |
| 65 /* Temporarily enable everything else for sorting */ | |
| 66 diff --git a/Configure b/Configure | |
| 67 index 9c803dc..1b95384 100755 | |
| 68 --- a/Configure | |
| 69 +++ b/Configure | |
| 70 @@ -124,24 +124,24 @@ my $tlib="-lnsl -lsocket"; | |
| 71 my $bits1="THIRTY_TWO_BIT "; | |
| 72 my $bits2="SIXTY_FOUR_BIT "; | |
| 73 | |
| 74 -my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o crypt
586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586
.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cml
l-x86.o:ghash-x86.o:"; | |
| 75 +my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o crypt
586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586
.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cml
l-x86.o:ghash-x86.o:::"; | |
| 76 | |
| 77 my $x86_elf_asm="$x86_asm:elf"; | |
| 78 | |
| 79 -my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-
gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_
64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_
64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghas
h-x86_64.o:"; | |
| 80 -my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.
o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o::::
:ghash-ia64.o::void"; | |
| 81 -my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a
-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-spa
rcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; | |
| 82 -my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void"; | |
| 83 -my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-
alpha.o::void"; | |
| 84 -my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o:::
:::::"; | |
| 85 -my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha2
56-mips.o sha512-mips.o::::::::"; | |
| 86 -my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::ae
s-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-
s390x.o:::::ghash-s390x.o:"; | |
| 87 -my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cb
c.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-a
rmv4.o::void"; | |
| 88 -my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-p
arisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-
parisc.o::32"; | |
| 89 -my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o ae
s-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::gha
sh-parisc.o::64"; | |
| 90 -my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o
aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::"; | |
| 91 -my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o
aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::"; | |
| 92 -my $no_asm=":::::::::::::::void"; | |
| 93 +my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-
gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_
64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_
64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghas
h-x86_64.o::chacha_vec.o:poly1305_vec.o"; | |
| 94 +my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.
o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o::::
:ghash-ia64.o::::void"; | |
| 95 +my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a
-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-spa
rcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::::void"; | |
| 96 +my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::::void"; | |
| 97 +my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-
alpha.o::::void"; | |
| 98 +my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o:::
:::::::"; | |
| 99 +my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha2
56-mips.o sha512-mips.o::::::::::"; | |
| 100 +my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::ae
s-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-
s390x.o:::::::ghash-s390x.o:"; | |
| 101 +my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cb
c.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-a
rmv4.o::chacha_vec.o:poly1305_arm.o poly1305_arm_asm.o:void"; | |
| 102 +my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-p
arisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-
parisc.o::::32"; | |
| 103 +my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o ae
s-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::gha
sh-parisc.o::::64"; | |
| 104 +my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o
aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::::"; | |
| 105 +my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o
aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::::"; | |
| 106 +my $no_asm=":::::::::::::::::void"; | |
| 107 | |
| 108 # As for $BSDthreads. Idea is to maintain "collective" set of flags, | |
| 109 # which would cover all BSD flavors. -pthread applies to them all, | |
| 110 @@ -152,7 +152,7 @@ my $no_asm=":::::::::::::::void"; | |
| 111 # seems to be sufficient? | |
| 112 my $BSDthreads="-pthread -D_THREAD_SAFE -D_REENTRANT"; | |
| 113 | |
| 114 -#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lflags : $b
n_ops : $cpuid_obj : $bn_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj : $sha1_
obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_obj : $mod
es_obj : $engines_obj : $dso_scheme : $shared_target : $shared_cflag : $shared_l
dflag : $shared_extension : $ranlib : $arflags : $multilib | |
| 115 +#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lflags : $b
n_ops : $cpuid_obj : $bn_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj : $sha1_
obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_obj : $mod
es_obj : $engines_obj : $chacha_obj : $poly1305_obj : $dso_scheme : $shared_targ
et : $shared_cflag : $shared_ldflag : $shared_extension : $ranlib : $arflags : $
multilib : | |
| 116 | |
| 117 my %table=( | |
| 118 # File 'TABLE' (created by 'make TABLE') contains the data from this list, | |
| 119 @@ -647,6 +647,8 @@ my $idx_wp_obj = $idx++; | |
| 120 my $idx_cmll_obj = $idx++; | |
| 121 my $idx_modes_obj = $idx++; | |
| 122 my $idx_engines_obj = $idx++; | |
| 123 +my $idx_chacha_obj = $idx++; | |
| 124 +my $idx_poly1305_obj = $idx++; | |
| 125 my $idx_perlasm_scheme = $idx++; | |
| 126 my $idx_dso_scheme = $idx++; | |
| 127 my $idx_shared_target = $idx++; | |
| 128 @@ -692,6 +694,8 @@ my $aes_enc="aes_core.o aes_cbc.o"; | |
| 129 my $bf_enc ="bf_enc.o"; | |
| 130 my $cast_enc="c_enc.o"; | |
| 131 my $rc4_enc="rc4_enc.o rc4_skey.o"; | |
| 132 +my $chacha_enc="chacha_enc.o"; | |
| 133 +my $poly1305 ="poly1305.o"; | |
| 134 my $rc5_enc="rc5_enc.o"; | |
| 135 my $md5_obj=""; | |
| 136 my $sha1_obj=""; | |
| 137 @@ -1144,7 +1148,7 @@ $openssldir=$prefix . "/" . $openssldir if $openssldir !~
/(^\/|^[a-zA-Z]:[\\\/] | |
| 138 | |
| 139 print "IsMK1MF=$IsMK1MF\n"; | |
| 140 | |
| 141 -my @fields = split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); | |
| 142 +my @fields = split(/\s*:\s*/,$table{$target} . ":" x 31 , -1); | |
| 143 my $cc = $fields[$idx_cc]; | |
| 144 # Allow environment CC to override compiler... | |
| 145 if($ENV{CC}) { | |
| 146 @@ -1181,6 +1185,8 @@ my $ranlib = $ENV{'RANLIB'} || $fields[$idx_ranlib]; | |
| 147 my $ar = $ENV{'AR'} || "ar"; | |
| 148 my $arflags = $fields[$idx_arflags]; | |
| 149 my $multilib = $fields[$idx_multilib]; | |
| 150 +my $chacha_obj = $fields[$idx_chacha_obj]; | |
| 151 +my $poly1305_obj = $fields[$idx_poly1305_obj]; | |
| 152 | |
| 153 # if $prefix/lib$multilib is not an existing directory, then | |
| 154 # assume that it's not searched by linker automatically, in | |
| 155 @@ -1477,6 +1483,8 @@ $des_obj=$des_enc unless ($des_obj =~ /\.o$/); | |
| 156 $bf_obj=$bf_enc unless ($bf_obj =~ /\.o$/); | |
| 157 $cast_obj=$cast_enc unless ($cast_obj =~ /\.o$/); | |
| 158 $rc4_obj=$rc4_enc unless ($rc4_obj =~ /\.o$/); | |
| 159 +$chacha_obj=$chacha_enc unless ($chacha_obj =~ /\.o$/); | |
| 160 +$poly1305_obj=$poly1305 unless ($poly1305_obj =~ /\.o$/); | |
| 161 $rc5_obj=$rc5_enc unless ($rc5_obj =~ /\.o$/); | |
| 162 if ($sha1_obj =~ /\.o$/) | |
| 163 { | |
| 164 @@ -1637,6 +1645,8 @@ while (<IN>) | |
| 165 s/^BF_ENC=.*$/BF_ENC= $bf_obj/; | |
| 166 s/^CAST_ENC=.*$/CAST_ENC= $cast_obj/; | |
| 167 s/^RC4_ENC=.*$/RC4_ENC= $rc4_obj/; | |
| 168 + s/^CHACHA_ENC=.*$/CHACHA_ENC= $chacha_obj/; | |
| 169 + s/^POLY1305=.*$/POLY1305= $poly1305_obj/; | |
| 170 s/^RC5_ENC=.*$/RC5_ENC= $rc5_obj/; | |
| 171 s/^MD5_ASM_OBJ=.*$/MD5_ASM_OBJ= $md5_obj/; | |
| 172 s/^SHA1_ASM_OBJ=.*$/SHA1_ASM_OBJ= $sha1_obj/; | |
| 173 @@ -1698,6 +1708,8 @@ print "AES_ENC =$aes_obj\n"; | |
| 174 print "BF_ENC =$bf_obj\n"; | |
| 175 print "CAST_ENC =$cast_obj\n"; | |
| 176 print "RC4_ENC =$rc4_obj\n"; | |
| 177 +print "CHACHA_ENC =$chacha_obj\n"; | |
| 178 +print "POLY1305 =$poly1305_obj\n"; | |
| 179 print "RC5_ENC =$rc5_obj\n"; | |
| 180 print "MD5_OBJ_ASM =$md5_obj\n"; | |
| 181 print "SHA1_OBJ_ASM =$sha1_obj\n"; | |
| 182 @@ -2096,11 +2108,11 @@ sub print_table_entry | |
| 183 | |
| 184 (my $cc,my $cflags,my $unistd,my $thread_cflag,my $sys_id,my $lflags, | |
| 185 my $bn_ops,my $cpuid_obj,my $bn_obj,my $des_obj,my $aes_obj, my $bf_obj, | |
| 186 - my $md5_obj,my $sha1_obj,my $cast_obj,my $rc4_obj,my $rmd160_obj, | |
| 187 - my $rc5_obj,my $wp_obj,my $cmll_obj,my $modes_obj, my $engines_obj, | |
| 188 + my $md5_obj,my $sha1_obj,my $cast_obj,my $rc4_obj,my $chacha_obj,my $pol
y1305_obj, | |
| 189 + my $rmd160_obj, my $rc5_obj,my $wp_obj,my $cmll_obj,my $modes_obj, my $e
ngines_obj, | |
| 190 my $perlasm_scheme,my $dso_scheme,my $shared_target,my $shared_cflag, | |
| 191 my $shared_ldflag,my $shared_extension,my $ranlib,my $arflags,my $multil
ib)= | |
| 192 - split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); | |
| 193 + split(/\s*:\s*/,$table{$target} . ":" x 31 , -1); | |
| 194 | |
| 195 print <<EOF | |
| 196 | |
| 197 @@ -2121,6 +2133,8 @@ sub print_table_entry | |
| 198 \$sha1_obj = $sha1_obj | |
| 199 \$cast_obj = $cast_obj | |
| 200 \$rc4_obj = $rc4_obj | |
| 201 +\$chacha_obj = $chacha_obj | |
| 202 +\$poly1305_obj = $poly1305_obj | |
| 203 \$rmd160_obj = $rmd160_obj | |
| 204 \$rc5_obj = $rc5_obj | |
| 205 \$wp_obj = $wp_obj | |
| 206 @@ -2150,7 +2164,7 @@ sub test_sanity | |
| 207 | |
| 208 foreach $target (sort keys %table) | |
| 209 { | |
| 210 - @fields = split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); | |
| 211 + @fields = split(/\s*:\s*/,$table{$target} . ":" x 31 , -1); | |
| 212 | |
| 213 if ($fields[$idx_dso_scheme-1] =~ /^(beos|dl|dlfcn|win32|vms)$/) | |
| 214 { | |
| 215 diff --git a/Makefile.org b/Makefile.org | |
| 216 index 2db31ea..919466d 100644 | |
| 217 --- a/Makefile.org | |
| 218 +++ b/Makefile.org | |
| 219 @@ -94,6 +94,8 @@ BF_ENC= bf_enc.o | |
| 220 CAST_ENC= c_enc.o | |
| 221 RC4_ENC= rc4_enc.o | |
| 222 RC5_ENC= rc5_enc.o | |
| 223 +CHACHA_ENC= chacha_enc.o | |
| 224 +POLY1305= poly1305.o | |
| 225 MD5_ASM_OBJ= | |
| 226 SHA1_ASM_OBJ= | |
| 227 RMD160_ASM_OBJ= | |
| 228 @@ -147,7 +149,7 @@ SDIRS= \ | |
| 229 bn ec rsa dsa ecdsa dh ecdh dso engine \ | |
| 230 buffer bio stack lhash rand err \ | |
| 231 evp asn1 pem x509 x509v3 conf txt_db pkcs7 pkcs12 comp ocsp ui krb5 \ | |
| 232 - cms pqueue ts jpake srp store cmac | |
| 233 + cms pqueue ts jpake srp store cmac poly1305 chacha | |
| 234 # keep in mind that the above list is adjusted by ./Configure | |
| 235 # according to no-xxx arguments... | |
| 236 | |
| 237 @@ -232,6 +234,8 @@ BUILDENV= PLATFORM='$(PLATFORM)' PROCESSOR='$(PROCESSOR)'
\ | |
| 238 WP_ASM_OBJ='$(WP_ASM_OBJ)' \ | |
| 239 MODES_ASM_OBJ='$(MODES_ASM_OBJ)' \ | |
| 240 ENGINES_ASM_OBJ='$(ENGINES_ASM_OBJ)' \ | |
| 241 + CHACHA_ENC='$(CHACHA_ENC)' \ | |
| 242 + POLY1305='$(POLY1305)' \ | |
| 243 PERLASM_SCHEME='$(PERLASM_SCHEME)' \ | |
| 244 FIPSLIBDIR='${FIPSLIBDIR}' \ | |
| 245 FIPSDIR='${FIPSDIR}' \ | |
| 246 diff --git a/crypto/chacha/Makefile b/crypto/chacha/Makefile | |
| 247 new file mode 100644 | |
| 248 index 0000000..289933b | |
| 249 --- /dev/null | |
| 250 +++ b/crypto/chacha/Makefile | |
| 251 @@ -0,0 +1,80 @@ | |
| 252 +# | |
| 253 +# OpenSSL/crypto/chacha/Makefile | |
| 254 +# | |
| 255 + | |
| 256 +DIR= chacha | |
| 257 +TOP= ../.. | |
| 258 +CC= cc | |
| 259 +CPP= $(CC) -E | |
| 260 +INCLUDES= | |
| 261 +CFLAG=-g | |
| 262 +AR= ar r | |
| 263 + | |
| 264 +CFLAGS= $(INCLUDES) $(CFLAG) | |
| 265 +ASFLAGS= $(INCLUDES) $(ASFLAG) | |
| 266 +AFLAGS= $(ASFLAGS) | |
| 267 + | |
| 268 +CHACHA_ENC=chacha_enc.o | |
| 269 + | |
| 270 +GENERAL=Makefile | |
| 271 +TEST=chachatest.o | |
| 272 +APPS= | |
| 273 + | |
| 274 +LIB=$(TOP)/libcrypto.a | |
| 275 +LIBSRC= | |
| 276 +LIBOBJ=$(CHACHA_ENC) | |
| 277 + | |
| 278 +SRC= $(LIBSRC) | |
| 279 + | |
| 280 +EXHEADER=chacha.h | |
| 281 +HEADER= $(EXHEADER) | |
| 282 + | |
| 283 +ALL= $(GENERAL) $(SRC) $(HEADER) | |
| 284 + | |
| 285 +top: | |
| 286 + (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all) | |
| 287 + | |
| 288 +all: lib | |
| 289 + | |
| 290 +lib: $(LIBOBJ) | |
| 291 + $(AR) $(LIB) $(LIBOBJ) | |
| 292 + $(RANLIB) $(LIB) || echo Never mind. | |
| 293 + @touch lib | |
| 294 + | |
| 295 +files: | |
| 296 + $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO | |
| 297 + | |
| 298 +links: | |
| 299 + @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER) | |
| 300 + @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST) | |
| 301 + @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS) | |
| 302 + | |
| 303 +install: | |
| 304 + @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile... | |
| 305 + @headerlist="$(EXHEADER)"; for i in $$headerlist ; \ | |
| 306 + do \ | |
| 307 + (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \ | |
| 308 + chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \ | |
| 309 + done; | |
| 310 + | |
| 311 +tags: | |
| 312 + ctags $(SRC) | |
| 313 + | |
| 314 +tests: | |
| 315 + | |
| 316 +lint: | |
| 317 + lint -DLINT $(INCLUDES) $(SRC)>fluff | |
| 318 + | |
| 319 +depend: | |
| 320 + @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... | |
| 321 + $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) | |
| 322 + | |
| 323 +dclean: | |
| 324 + $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKE
FILE) >Makefile.new | |
| 325 + mv -f Makefile.new $(MAKEFILE) | |
| 326 + | |
| 327 +clean: | |
| 328 + rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff | |
| 329 + | |
| 330 +# DO NOT DELETE THIS LINE -- make depend depends on it. | |
| 331 + | |
| 332 diff --git a/crypto/chacha/chacha.h b/crypto/chacha/chacha.h | |
| 333 new file mode 100644 | |
| 334 index 0000000..d56519d | |
| 335 --- /dev/null | |
| 336 +++ b/crypto/chacha/chacha.h | |
| 337 @@ -0,0 +1,85 @@ | |
| 338 +/* | |
| 339 + * Chacha stream algorithm. | |
| 340 + * | |
| 341 + * Created on: Jun, 2013 | |
| 342 + * Author: Elie Bursztein (elieb@google.com) | |
| 343 + * | |
| 344 + * Adapted from the estream code by D. Bernstein. | |
| 345 + */ | |
| 346 +/* ==================================================================== | |
| 347 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
| 348 + * | |
| 349 + * Redistribution and use in source and binary forms, with or without | |
| 350 + * modification, are permitted provided that the following conditions | |
| 351 + * are met: | |
| 352 + * | |
| 353 + * 1. Redistributions of source code must retain the above copyright | |
| 354 + * notice, this list of conditions and the following disclaimer. | |
| 355 + * | |
| 356 + * 2. Redistributions in binary form must reproduce the above copyright | |
| 357 + * notice, this list of conditions and the following disclaimer in | |
| 358 + * the documentation and/or other materials provided with the | |
| 359 + * distribution. | |
| 360 + * | |
| 361 + * 3. All advertising materials mentioning features or use of this | |
| 362 + * software must display the following acknowledgment: | |
| 363 + * "This product includes software developed by the OpenSSL Project | |
| 364 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
| 365 + * | |
| 366 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
| 367 + * endorse or promote products derived from this software without | |
| 368 + * prior written permission. For written permission, please contact | |
| 369 + * licensing@OpenSSL.org. | |
| 370 + * | |
| 371 + * 5. Products derived from this software may not be called "OpenSSL" | |
| 372 + * nor may "OpenSSL" appear in their names without prior written | |
| 373 + * permission of the OpenSSL Project. | |
| 374 + * | |
| 375 + * 6. Redistributions of any form whatsoever must retain the following | |
| 376 + * acknowledgment: | |
| 377 + * "This product includes software developed by the OpenSSL Project | |
| 378 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
| 379 + * | |
| 380 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
| 381 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 382 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| 383 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
| 384 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 385 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
| 386 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| 387 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 388 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
| 389 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| 390 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
| 391 + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 392 + * ==================================================================== | |
| 393 + */ | |
| 394 +#ifndef HEADER_CHACHA_H | |
| 395 +#define HEADER_CHACHA_H | |
| 396 + | |
| 397 +#include <openssl/opensslconf.h> | |
| 398 + | |
| 399 +#if defined(OPENSSL_NO_CHACHA) | |
| 400 +#error ChaCha support is disabled. | |
| 401 +#endif | |
| 402 + | |
| 403 +#include <stddef.h> | |
| 404 + | |
| 405 +#ifdef __cplusplus | |
| 406 +extern "C" { | |
| 407 +#endif | |
| 408 + | |
| 409 +/* CRYPTO_chacha_20 encrypts |in_len| bytes from |in| with the given key and | |
| 410 + * nonce and writes the result to |out|, which may be equal to |in|. The | |
| 411 + * initial block counter is specified by |counter|. */ | |
| 412 +void CRYPTO_chacha_20(unsigned char *out, | |
| 413 + const unsigned char *in, size_t in_len, | |
| 414 + const unsigned char key[32], | |
| 415 + const unsigned char nonce[8], | |
| 416 + size_t counter); | |
| 417 + | |
| 418 +#ifdef __cplusplus | |
| 419 +} | |
| 420 +#endif | |
| 421 + | |
| 422 +#endif | |
| 423 diff --git a/crypto/chacha/chacha_enc.c b/crypto/chacha/chacha_enc.c | |
| 424 new file mode 100644 | |
| 425 index 0000000..54d1ca3 | |
| 426 --- /dev/null | |
| 427 +++ b/crypto/chacha/chacha_enc.c | |
| 428 @@ -0,0 +1,167 @@ | |
| 429 +/* | |
| 430 + * Chacha stream algorithm. | |
| 431 + * | |
| 432 + * Created on: Jun, 2013 | |
| 433 + * Author: Elie Bursztein (elieb@google.com) | |
| 434 + * | |
| 435 + * Adapted from the estream code by D. Bernstein. | |
| 436 + */ | |
| 437 +/* ==================================================================== | |
| 438 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
| 439 + * | |
| 440 + * Redistribution and use in source and binary forms, with or without | |
| 441 + * modification, are permitted provided that the following conditions | |
| 442 + * are met: | |
| 443 + * | |
| 444 + * 1. Redistributions of source code must retain the above copyright | |
| 445 + * notice, this list of conditions and the following disclaimer. | |
| 446 + * | |
| 447 + * 2. Redistributions in binary form must reproduce the above copyright | |
| 448 + * notice, this list of conditions and the following disclaimer in | |
| 449 + * the documentation and/or other materials provided with the | |
| 450 + * distribution. | |
| 451 + * | |
| 452 + * 3. All advertising materials mentioning features or use of this | |
| 453 + * software must display the following acknowledgment: | |
| 454 + * "This product includes software developed by the OpenSSL Project | |
| 455 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
| 456 + * | |
| 457 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
| 458 + * endorse or promote products derived from this software without | |
| 459 + * prior written permission. For written permission, please contact | |
| 460 + * licensing@OpenSSL.org. | |
| 461 + * | |
| 462 + * 5. Products derived from this software may not be called "OpenSSL" | |
| 463 + * nor may "OpenSSL" appear in their names without prior written | |
| 464 + * permission of the OpenSSL Project. | |
| 465 + * | |
| 466 + * 6. Redistributions of any form whatsoever must retain the following | |
| 467 + * acknowledgment: | |
| 468 + * "This product includes software developed by the OpenSSL Project | |
| 469 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
| 470 + * | |
| 471 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
| 472 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 473 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| 474 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
| 475 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 476 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
| 477 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| 478 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 479 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
| 480 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| 481 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
| 482 + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 483 + * ==================================================================== | |
| 484 + */ | |
| 485 + | |
| 486 +#include <stdint.h> | |
| 487 +#include <string.h> | |
| 488 +#include <openssl/opensslconf.h> | |
| 489 + | |
| 490 +#if !defined(OPENSSL_NO_CHACHA) | |
| 491 + | |
| 492 +#include <openssl/chacha.h> | |
| 493 + | |
| 494 +/* sigma contains the ChaCha constants, which happen to be an ASCII string. */ | |
| 495 +static const char sigma[16] = "expand 32-byte k"; | |
| 496 + | |
| 497 +#define ROTATE(v, n) (((v) << (n)) | ((v) >> (32 - (n)))) | |
| 498 +#define XOR(v, w) ((v) ^ (w)) | |
| 499 +#define PLUS(x, y) ((x) + (y)) | |
| 500 +#define PLUSONE(v) (PLUS((v), 1)) | |
| 501 + | |
| 502 +#define U32TO8_LITTLE(p, v) \ | |
| 503 + { (p)[0] = (v >> 0) & 0xff; (p)[1] = (v >> 8) & 0xff; \ | |
| 504 + (p)[2] = (v >> 16) & 0xff; (p)[3] = (v >> 24) & 0xff; } | |
| 505 +#define U8TO32_LITTLE(p) \ | |
| 506 + (((uint32_t)((p)[0]) ) | ((uint32_t)((p)[1]) << 8) | \ | |
| 507 + ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24) ) | |
| 508 + | |
| 509 +/* QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round. */ | |
| 510 +#define QUARTERROUND(a,b,c,d) \ | |
| 511 + x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \ | |
| 512 + x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \ | |
| 513 + x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \ | |
| 514 + x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7); | |
| 515 + | |
| 516 +typedef unsigned int uint32_t; | |
| 517 + | |
| 518 +/* chacha_core performs |num_rounds| rounds of ChaCha20 on the input words in | |
| 519 + * |input| and writes the 64 output bytes to |output|. */ | |
| 520 +static void chacha_core(unsigned char output[64], const uint32_t input[16], | |
| 521 + int num_rounds) | |
| 522 + { | |
| 523 + uint32_t x[16]; | |
| 524 + int i; | |
| 525 + | |
| 526 + memcpy(x, input, sizeof(uint32_t) * 16); | |
| 527 + for (i = 20; i > 0; i -= 2) | |
| 528 + { | |
| 529 + QUARTERROUND( 0, 4, 8,12) | |
| 530 + QUARTERROUND( 1, 5, 9,13) | |
| 531 + QUARTERROUND( 2, 6,10,14) | |
| 532 + QUARTERROUND( 3, 7,11,15) | |
| 533 + QUARTERROUND( 0, 5,10,15) | |
| 534 + QUARTERROUND( 1, 6,11,12) | |
| 535 + QUARTERROUND( 2, 7, 8,13) | |
| 536 + QUARTERROUND( 3, 4, 9,14) | |
| 537 + } | |
| 538 + | |
| 539 + for (i = 0; i < 16; ++i) | |
| 540 + x[i] = PLUS(x[i], input[i]); | |
| 541 + for (i = 0; i < 16; ++i) | |
| 542 + U32TO8_LITTLE(output + 4 * i, x[i]); | |
| 543 + } | |
| 544 + | |
| 545 +void CRYPTO_chacha_20(unsigned char *out, | |
| 546 + const unsigned char *in, size_t in_len, | |
| 547 + const unsigned char key[32], | |
| 548 + const unsigned char nonce[8], | |
| 549 + size_t counter) | |
| 550 + { | |
| 551 + uint32_t input[16]; | |
| 552 + unsigned char buf[64]; | |
| 553 + size_t todo, i; | |
| 554 + | |
| 555 + input[0] = U8TO32_LITTLE(sigma + 0); | |
| 556 + input[1] = U8TO32_LITTLE(sigma + 4); | |
| 557 + input[2] = U8TO32_LITTLE(sigma + 8); | |
| 558 + input[3] = U8TO32_LITTLE(sigma + 12); | |
| 559 + | |
| 560 + input[4] = U8TO32_LITTLE(key + 0); | |
| 561 + input[5] = U8TO32_LITTLE(key + 4); | |
| 562 + input[6] = U8TO32_LITTLE(key + 8); | |
| 563 + input[7] = U8TO32_LITTLE(key + 12); | |
| 564 + | |
| 565 + input[8] = U8TO32_LITTLE(key + 16); | |
| 566 + input[9] = U8TO32_LITTLE(key + 20); | |
| 567 + input[10] = U8TO32_LITTLE(key + 24); | |
| 568 + input[11] = U8TO32_LITTLE(key + 28); | |
| 569 + | |
| 570 + input[12] = counter; | |
| 571 + input[13] = ((uint64_t) counter) >> 32; | |
| 572 + input[14] = U8TO32_LITTLE(nonce + 0); | |
| 573 + input[15] = U8TO32_LITTLE(nonce + 4); | |
| 574 + | |
| 575 + while (in_len > 0) | |
| 576 + { | |
| 577 + todo = sizeof(buf); | |
| 578 + if (in_len < todo) | |
| 579 + todo = in_len; | |
| 580 + | |
| 581 + chacha_core(buf, input, 20); | |
| 582 + for (i = 0; i < todo; i++) | |
| 583 + out[i] = in[i] ^ buf[i]; | |
| 584 + | |
| 585 + out += todo; | |
| 586 + in += todo; | |
| 587 + in_len -= todo; | |
| 588 + | |
| 589 + input[12]++; | |
| 590 + if (input[12] == 0) | |
| 591 + input[13]++; | |
| 592 + } | |
| 593 + } | |
| 594 + | |
| 595 +#endif /* !OPENSSL_NO_CHACHA */ | |
| 596 diff --git a/crypto/chacha/chacha_vec.c b/crypto/chacha/chacha_vec.c | |
| 597 new file mode 100644 | |
| 598 index 0000000..33b2238 | |
| 599 --- /dev/null | |
| 600 +++ b/crypto/chacha/chacha_vec.c | |
| 601 @@ -0,0 +1,345 @@ | |
| 602 +/* ==================================================================== | |
| 603 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
| 604 + * | |
| 605 + * Redistribution and use in source and binary forms, with or without | |
| 606 + * modification, are permitted provided that the following conditions | |
| 607 + * are met: | |
| 608 + * | |
| 609 + * 1. Redistributions of source code must retain the above copyright | |
| 610 + * notice, this list of conditions and the following disclaimer. | |
| 611 + * | |
| 612 + * 2. Redistributions in binary form must reproduce the above copyright | |
| 613 + * notice, this list of conditions and the following disclaimer in | |
| 614 + * the documentation and/or other materials provided with the | |
| 615 + * distribution. | |
| 616 + * | |
| 617 + * 3. All advertising materials mentioning features or use of this | |
| 618 + * software must display the following acknowledgment: | |
| 619 + * "This product includes software developed by the OpenSSL Project | |
| 620 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
| 621 + * | |
| 622 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
| 623 + * endorse or promote products derived from this software without | |
| 624 + * prior written permission. For written permission, please contact | |
| 625 + * licensing@OpenSSL.org. | |
| 626 + * | |
| 627 + * 5. Products derived from this software may not be called "OpenSSL" | |
| 628 + * nor may "OpenSSL" appear in their names without prior written | |
| 629 + * permission of the OpenSSL Project. | |
| 630 + * | |
| 631 + * 6. Redistributions of any form whatsoever must retain the following | |
| 632 + * acknowledgment: | |
| 633 + * "This product includes software developed by the OpenSSL Project | |
| 634 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
| 635 + * | |
| 636 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
| 637 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 638 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| 639 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
| 640 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 641 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
| 642 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| 643 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 644 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
| 645 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| 646 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
| 647 + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 648 + * ==================================================================== | |
| 649 + */ | |
| 650 + | |
| 651 +/* This implementation is by Ted Krovetz and was submitted to SUPERCOP and | |
| 652 + * marked as public domain. It was been altered to allow for non-aligned inputs | |
| 653 + * and to allow the block counter to be passed in specifically. */ | |
| 654 + | |
| 655 +#include <string.h> | |
| 656 +#include <stdint.h> | |
| 657 +#include <openssl/opensslconf.h> | |
| 658 + | |
| 659 +#if !defined(OPENSSL_NO_CHACHA) | |
| 660 + | |
| 661 +#include <openssl/chacha.h> | |
| 662 + | |
| 663 +#ifndef CHACHA_RNDS | |
| 664 +#define CHACHA_RNDS 20 /* 8 (high speed), 20 (conservative), 12 (middle) */ | |
| 665 +#endif | |
| 666 + | |
| 667 +/* Architecture-neutral way to specify 16-byte vector of ints */ | |
| 668 +typedef unsigned vec __attribute__ ((vector_size (16))); | |
| 669 + | |
| 670 +/* This implementation is designed for Neon, SSE and AltiVec machines. The | |
| 671 + * following specify how to do certain vector operations efficiently on | |
| 672 + * each architecture, using intrinsics. | |
| 673 + * This implementation supports parallel processing of multiple blocks, | |
| 674 + * including potentially using general-purpose registers. | |
| 675 + */ | |
| 676 +#if __ARM_NEON__ | |
| 677 +#include <arm_neon.h> | |
| 678 +#define GPR_TOO 1 | |
| 679 +#define VBPI 2 | |
| 680 +#define ONE (vec)vsetq_lane_u32(1,vdupq_n_u32(0),0) | |
| 681 +#define LOAD(m) (vec)(*((vec*)(m))) | |
| 682 +#define STORE(m,r) (*((vec*)(m))) = (r) | |
| 683 +#define ROTV1(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,1) | |
| 684 +#define ROTV2(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,2) | |
| 685 +#define ROTV3(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,3) | |
| 686 +#define ROTW16(x) (vec)vrev32q_u16((uint16x8_t)x) | |
| 687 +#if __clang__ | |
| 688 +#define ROTW7(x) (x << ((vec){ 7, 7, 7, 7})) ^ (x >> ((vec){25,25,25,25})) | |
| 689 +#define ROTW8(x) (x << ((vec){ 8, 8, 8, 8})) ^ (x >> ((vec){24,24,24,24})) | |
| 690 +#define ROTW12(x) (x << ((vec){12,12,12,12})) ^ (x >> ((vec){20,20,20,20})) | |
| 691 +#else | |
| 692 +#define ROTW7(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,7),(uint32x4_t)x,2
5) | |
| 693 +#define ROTW8(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,8),(uint32x4_t)x,2
4) | |
| 694 +#define ROTW12(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,12),(uint32x4_t)x,
20) | |
| 695 +#endif | |
| 696 +#elif __SSE2__ | |
| 697 +#include <emmintrin.h> | |
| 698 +#define GPR_TOO 0 | |
| 699 +#if __clang__ | |
| 700 +#define VBPI 4 | |
| 701 +#else | |
| 702 +#define VBPI 3 | |
| 703 +#endif | |
| 704 +#define ONE (vec)_mm_set_epi32(0,0,0,1) | |
| 705 +#define LOAD(m) (vec)_mm_loadu_si128((__m128i*)(m)) | |
| 706 +#define STORE(m,r) _mm_storeu_si128((__m128i*)(m), (__m128i) (r)) | |
| 707 +#define ROTV1(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(0,3,2,1)) | |
| 708 +#define ROTV2(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(1,0,3,2)) | |
| 709 +#define ROTV3(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(2,1,0,3)) | |
| 710 +#define ROTW7(x) (vec)(_mm_slli_epi32((__m128i)x, 7) ^ _mm_srli_epi32((__m128i
)x,25)) | |
| 711 +#define ROTW12(x) (vec)(_mm_slli_epi32((__m128i)x,12) ^ _mm_srli_epi32((__m128i
)x,20)) | |
| 712 +#if __SSSE3__ | |
| 713 +#include <tmmintrin.h> | |
| 714 +#define ROTW8(x) (vec)_mm_shuffle_epi8((__m128i)x,_mm_set_epi8(14,13,12,15,10,
9,8,11,6,5,4,7,2,1,0,3)) | |
| 715 +#define ROTW16(x) (vec)_mm_shuffle_epi8((__m128i)x,_mm_set_epi8(13,12,15,14,9,8
,11,10,5,4,7,6,1,0,3,2)) | |
| 716 +#else | |
| 717 +#define ROTW8(x) (vec)(_mm_slli_epi32((__m128i)x, 8) ^ _mm_srli_epi32((__m128i
)x,24)) | |
| 718 +#define ROTW16(x) (vec)(_mm_slli_epi32((__m128i)x,16) ^ _mm_srli_epi32((__m128i
)x,16)) | |
| 719 +#endif | |
| 720 +#else | |
| 721 +#error -- Implementation supports only machines with neon or SSE2 | |
| 722 +#endif | |
| 723 + | |
| 724 +#ifndef REVV_BE | |
| 725 +#define REVV_BE(x) (x) | |
| 726 +#endif | |
| 727 + | |
| 728 +#ifndef REVW_BE | |
| 729 +#define REVW_BE(x) (x) | |
| 730 +#endif | |
| 731 + | |
| 732 +#define BPI (VBPI + GPR_TOO) /* Blocks computed per loop iteration */ | |
| 733 + | |
| 734 +#define DQROUND_VECTORS(a,b,c,d) \ | |
| 735 + a += b; d ^= a; d = ROTW16(d); \ | |
| 736 + c += d; b ^= c; b = ROTW12(b); \ | |
| 737 + a += b; d ^= a; d = ROTW8(d); \ | |
| 738 + c += d; b ^= c; b = ROTW7(b); \ | |
| 739 + b = ROTV1(b); c = ROTV2(c); d = ROTV3(d); \ | |
| 740 + a += b; d ^= a; d = ROTW16(d); \ | |
| 741 + c += d; b ^= c; b = ROTW12(b); \ | |
| 742 + a += b; d ^= a; d = ROTW8(d); \ | |
| 743 + c += d; b ^= c; b = ROTW7(b); \ | |
| 744 + b = ROTV3(b); c = ROTV2(c); d = ROTV1(d); | |
| 745 + | |
| 746 +#define QROUND_WORDS(a,b,c,d) \ | |
| 747 + a = a+b; d ^= a; d = d<<16 | d>>16; \ | |
| 748 + c = c+d; b ^= c; b = b<<12 | b>>20; \ | |
| 749 + a = a+b; d ^= a; d = d<< 8 | d>>24; \ | |
| 750 + c = c+d; b ^= c; b = b<< 7 | b>>25; | |
| 751 + | |
| 752 +#define WRITE_XOR(in, op, d, v0, v1, v2, v3) \ | |
| 753 + STORE(op + d + 0, LOAD(in + d + 0) ^ REVV_BE(v0)); \ | |
| 754 + STORE(op + d + 4, LOAD(in + d + 4) ^ REVV_BE(v1)); \ | |
| 755 + STORE(op + d + 8, LOAD(in + d + 8) ^ REVV_BE(v2)); \ | |
| 756 + STORE(op + d +12, LOAD(in + d +12) ^ REVV_BE(v3)); | |
| 757 + | |
| 758 +void CRYPTO_chacha_20( | |
| 759 + unsigned char *out, | |
| 760 + const unsigned char *in, | |
| 761 + size_t inlen, | |
| 762 + const unsigned char key[32], | |
| 763 + const unsigned char nonce[8], | |
| 764 + size_t counter) | |
| 765 + { | |
| 766 + unsigned iters, i, *op=(unsigned *)out, *ip=(unsigned *)in, *kp; | |
| 767 +#if defined(__ARM_NEON__) | |
| 768 + unsigned *np; | |
| 769 +#endif | |
| 770 + vec s0, s1, s2, s3; | |
| 771 +#if !defined(__ARM_NEON__) && !defined(__SSE2__) | |
| 772 + __attribute__ ((aligned (16))) unsigned key[8], nonce[4]; | |
| 773 +#endif | |
| 774 + __attribute__ ((aligned (16))) unsigned chacha_const[] = | |
| 775 + {0x61707865,0x3320646E,0x79622D32,0x6B206574}; | |
| 776 +#if defined(__ARM_NEON__) || defined(__SSE2__) | |
| 777 + kp = (unsigned *)key; | |
| 778 +#else | |
| 779 + ((vec *)key)[0] = REVV_BE(((vec *)key)[0]); | |
| 780 + ((vec *)key)[1] = REVV_BE(((vec *)key)[1]); | |
| 781 + nonce[0] = REVW_BE(((unsigned *)nonce)[0]); | |
| 782 + nonce[1] = REVW_BE(((unsigned *)nonce)[1]); | |
| 783 + nonce[2] = REVW_BE(((unsigned *)nonce)[2]); | |
| 784 + nonce[3] = REVW_BE(((unsigned *)nonce)[3]); | |
| 785 + kp = (unsigned *)key; | |
| 786 + np = (unsigned *)nonce; | |
| 787 +#endif | |
| 788 +#if defined(__ARM_NEON__) | |
| 789 + np = (unsigned*) nonce; | |
| 790 +#endif | |
| 791 + s0 = LOAD(chacha_const); | |
| 792 + s1 = LOAD(&((vec*)kp)[0]); | |
| 793 + s2 = LOAD(&((vec*)kp)[1]); | |
| 794 + s3 = (vec){ | |
| 795 + counter & 0xffffffff, | |
| 796 +#if __ARM_NEON__ | |
| 797 + 0, /* can't right-shift 32 bits on a 32-bit system. */ | |
| 798 +#else | |
| 799 + counter >> 32, | |
| 800 +#endif | |
| 801 + ((uint32_t*)nonce)[0], | |
| 802 + ((uint32_t*)nonce)[1] | |
| 803 + }; | |
| 804 + | |
| 805 + for (iters = 0; iters < inlen/(BPI*64); iters++) | |
| 806 + { | |
| 807 +#if GPR_TOO | |
| 808 + register unsigned x0, x1, x2, x3, x4, x5, x6, x7, x8, | |
| 809 + x9, x10, x11, x12, x13, x14, x15; | |
| 810 +#endif | |
| 811 +#if VBPI > 2 | |
| 812 + vec v8,v9,v10,v11; | |
| 813 +#endif | |
| 814 +#if VBPI > 3 | |
| 815 + vec v12,v13,v14,v15; | |
| 816 +#endif | |
| 817 + | |
| 818 + vec v0,v1,v2,v3,v4,v5,v6,v7; | |
| 819 + v4 = v0 = s0; v5 = v1 = s1; v6 = v2 = s2; v3 = s3; | |
| 820 + v7 = v3 + ONE; | |
| 821 +#if VBPI > 2 | |
| 822 + v8 = v4; v9 = v5; v10 = v6; | |
| 823 + v11 = v7 + ONE; | |
| 824 +#endif | |
| 825 +#if VBPI > 3 | |
| 826 + v12 = v8; v13 = v9; v14 = v10; | |
| 827 + v15 = v11 + ONE; | |
| 828 +#endif | |
| 829 +#if GPR_TOO | |
| 830 + x0 = chacha_const[0]; x1 = chacha_const[1]; | |
| 831 + x2 = chacha_const[2]; x3 = chacha_const[3]; | |
| 832 + x4 = kp[0]; x5 = kp[1]; x6 = kp[2]; x7 = kp[3]; | |
| 833 + x8 = kp[4]; x9 = kp[5]; x10 = kp[6]; x11 = kp[7]; | |
| 834 + x12 = counter+BPI*iters+(BPI-1); x13 = 0; | |
| 835 + x14 = np[0]; x15 = np[1]; | |
| 836 +#endif | |
| 837 + for (i = CHACHA_RNDS/2; i; i--) | |
| 838 + { | |
| 839 + DQROUND_VECTORS(v0,v1,v2,v3) | |
| 840 + DQROUND_VECTORS(v4,v5,v6,v7) | |
| 841 +#if VBPI > 2 | |
| 842 + DQROUND_VECTORS(v8,v9,v10,v11) | |
| 843 +#endif | |
| 844 +#if VBPI > 3 | |
| 845 + DQROUND_VECTORS(v12,v13,v14,v15) | |
| 846 +#endif | |
| 847 +#if GPR_TOO | |
| 848 + QROUND_WORDS( x0, x4, x8,x12) | |
| 849 + QROUND_WORDS( x1, x5, x9,x13) | |
| 850 + QROUND_WORDS( x2, x6,x10,x14) | |
| 851 + QROUND_WORDS( x3, x7,x11,x15) | |
| 852 + QROUND_WORDS( x0, x5,x10,x15) | |
| 853 + QROUND_WORDS( x1, x6,x11,x12) | |
| 854 + QROUND_WORDS( x2, x7, x8,x13) | |
| 855 + QROUND_WORDS( x3, x4, x9,x14) | |
| 856 +#endif | |
| 857 + } | |
| 858 + | |
| 859 + WRITE_XOR(ip, op, 0, v0+s0, v1+s1, v2+s2, v3+s3) | |
| 860 + s3 += ONE; | |
| 861 + WRITE_XOR(ip, op, 16, v4+s0, v5+s1, v6+s2, v7+s3) | |
| 862 + s3 += ONE; | |
| 863 +#if VBPI > 2 | |
| 864 + WRITE_XOR(ip, op, 32, v8+s0, v9+s1, v10+s2, v11+s3) | |
| 865 + s3 += ONE; | |
| 866 +#endif | |
| 867 +#if VBPI > 3 | |
| 868 + WRITE_XOR(ip, op, 48, v12+s0, v13+s1, v14+s2, v15+s3) | |
| 869 + s3 += ONE; | |
| 870 +#endif | |
| 871 + ip += VBPI*16; | |
| 872 + op += VBPI*16; | |
| 873 +#if GPR_TOO | |
| 874 + op[0] = REVW_BE(REVW_BE(ip[0]) ^ (x0 + chacha_const[0])); | |
| 875 + op[1] = REVW_BE(REVW_BE(ip[1]) ^ (x1 + chacha_const[1])); | |
| 876 + op[2] = REVW_BE(REVW_BE(ip[2]) ^ (x2 + chacha_const[2])); | |
| 877 + op[3] = REVW_BE(REVW_BE(ip[3]) ^ (x3 + chacha_const[3])); | |
| 878 + op[4] = REVW_BE(REVW_BE(ip[4]) ^ (x4 + kp[0])); | |
| 879 + op[5] = REVW_BE(REVW_BE(ip[5]) ^ (x5 + kp[1])); | |
| 880 + op[6] = REVW_BE(REVW_BE(ip[6]) ^ (x6 + kp[2])); | |
| 881 + op[7] = REVW_BE(REVW_BE(ip[7]) ^ (x7 + kp[3])); | |
| 882 + op[8] = REVW_BE(REVW_BE(ip[8]) ^ (x8 + kp[4])); | |
| 883 + op[9] = REVW_BE(REVW_BE(ip[9]) ^ (x9 + kp[5])); | |
| 884 + op[10] = REVW_BE(REVW_BE(ip[10]) ^ (x10 + kp[6])); | |
| 885 + op[11] = REVW_BE(REVW_BE(ip[11]) ^ (x11 + kp[7])); | |
| 886 + op[12] = REVW_BE(REVW_BE(ip[12]) ^ (x12 + counter+BPI*iters+(BPI
-1))); | |
| 887 + op[13] = REVW_BE(REVW_BE(ip[13]) ^ (x13)); | |
| 888 + op[14] = REVW_BE(REVW_BE(ip[14]) ^ (x14 + np[0])); | |
| 889 + op[15] = REVW_BE(REVW_BE(ip[15]) ^ (x15 + np[1])); | |
| 890 + s3 += ONE; | |
| 891 + ip += 16; | |
| 892 + op += 16; | |
| 893 +#endif | |
| 894 + } | |
| 895 + | |
| 896 + for (iters = inlen%(BPI*64)/64; iters != 0; iters--) | |
| 897 + { | |
| 898 + vec v0 = s0, v1 = s1, v2 = s2, v3 = s3; | |
| 899 + for (i = CHACHA_RNDS/2; i; i--) | |
| 900 + { | |
| 901 + DQROUND_VECTORS(v0,v1,v2,v3); | |
| 902 + } | |
| 903 + WRITE_XOR(ip, op, 0, v0+s0, v1+s1, v2+s2, v3+s3) | |
| 904 + s3 += ONE; | |
| 905 + ip += 16; | |
| 906 + op += 16; | |
| 907 + } | |
| 908 + | |
| 909 + inlen = inlen % 64; | |
| 910 + if (inlen) | |
| 911 + { | |
| 912 + __attribute__ ((aligned (16))) vec buf[4]; | |
| 913 + vec v0,v1,v2,v3; | |
| 914 + v0 = s0; v1 = s1; v2 = s2; v3 = s3; | |
| 915 + for (i = CHACHA_RNDS/2; i; i--) | |
| 916 + { | |
| 917 + DQROUND_VECTORS(v0,v1,v2,v3); | |
| 918 + } | |
| 919 + | |
| 920 + if (inlen >= 16) | |
| 921 + { | |
| 922 + STORE(op + 0, LOAD(ip + 0) ^ REVV_BE(v0 + s0)); | |
| 923 + if (inlen >= 32) | |
| 924 + { | |
| 925 + STORE(op + 4, LOAD(ip + 4) ^ REVV_BE(v1 + s1)); | |
| 926 + if (inlen >= 48) | |
| 927 + { | |
| 928 + STORE(op + 8, LOAD(ip + 8) ^ | |
| 929 + REVV_BE(v2 + s2)); | |
| 930 + buf[3] = REVV_BE(v3 + s3); | |
| 931 + } | |
| 932 + else | |
| 933 + buf[2] = REVV_BE(v2 + s2); | |
| 934 + } | |
| 935 + else | |
| 936 + buf[1] = REVV_BE(v1 + s1); | |
| 937 + } | |
| 938 + else | |
| 939 + buf[0] = REVV_BE(v0 + s0); | |
| 940 + | |
| 941 + for (i=inlen & ~15; i<inlen; i++) | |
| 942 + ((char *)op)[i] = ((char *)ip)[i] ^ ((char *)buf)[i]; | |
| 943 + } | |
| 944 + } | |
| 945 + | |
| 946 +#endif /* !OPENSSL_NO_CHACHA */ | |
| 947 diff --git a/crypto/chacha/chachatest.c b/crypto/chacha/chachatest.c | |
| 948 new file mode 100644 | |
| 949 index 0000000..b2a9389 | |
| 950 --- /dev/null | |
| 951 +++ b/crypto/chacha/chachatest.c | |
| 952 @@ -0,0 +1,211 @@ | |
| 953 +/* | |
| 954 + * Chacha stream algorithm. | |
| 955 + * | |
| 956 + * Created on: Jun, 2013 | |
| 957 + * Author: Elie Bursztein (elieb@google.com) | |
| 958 + * | |
| 959 + * Adapted from the estream code by D. Bernstein. | |
| 960 + */ | |
| 961 +/* ==================================================================== | |
| 962 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
| 963 + * | |
| 964 + * Redistribution and use in source and binary forms, with or without | |
| 965 + * modification, are permitted provided that the following conditions | |
| 966 + * are met: | |
| 967 + * | |
| 968 + * 1. Redistributions of source code must retain the above copyright | |
| 969 + * notice, this list of conditions and the following disclaimer. | |
| 970 + * | |
| 971 + * 2. Redistributions in binary form must reproduce the above copyright | |
| 972 + * notice, this list of conditions and the following disclaimer in | |
| 973 + * the documentation and/or other materials provided with the | |
| 974 + * distribution. | |
| 975 + * | |
| 976 + * 3. All advertising materials mentioning features or use of this | |
| 977 + * software must display the following acknowledgment: | |
| 978 + * "This product includes software developed by the OpenSSL Project | |
| 979 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
| 980 + * | |
| 981 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
| 982 + * endorse or promote products derived from this software without | |
| 983 + * prior written permission. For written permission, please contact | |
| 984 + * licensing@OpenSSL.org. | |
| 985 + * | |
| 986 + * 5. Products derived from this software may not be called "OpenSSL" | |
| 987 + * nor may "OpenSSL" appear in their names without prior written | |
| 988 + * permission of the OpenSSL Project. | |
| 989 + * | |
| 990 + * 6. Redistributions of any form whatsoever must retain the following | |
| 991 + * acknowledgment: | |
| 992 + * "This product includes software developed by the OpenSSL Project | |
| 993 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
| 994 + * | |
| 995 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
| 996 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 997 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| 998 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
| 999 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 1000 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
| 1001 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| 1002 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 1003 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
| 1004 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| 1005 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
| 1006 + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 1007 + * ==================================================================== | |
| 1008 + */ | |
| 1009 + | |
| 1010 +#include <stdio.h> | |
| 1011 +#include <stdlib.h> | |
| 1012 +#include <string.h> | |
| 1013 +#include <stdint.h> | |
| 1014 + | |
| 1015 +#include <openssl/chacha.h> | |
| 1016 + | |
| 1017 +struct chacha_test { | |
| 1018 + const char *keyhex; | |
| 1019 + const char *noncehex; | |
| 1020 + const char *outhex; | |
| 1021 +}; | |
| 1022 + | |
| 1023 +static const struct chacha_test chacha_tests[] = { | |
| 1024 + { | |
| 1025 + "000000000000000000000000000000000000000000000000000000000000000
0", | |
| 1026 + "0000000000000000", | |
| 1027 + "76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc
7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee6586", | |
| 1028 + }, | |
| 1029 + { | |
| 1030 + "000000000000000000000000000000000000000000000000000000000000000
1", | |
| 1031 + "0000000000000000", | |
| 1032 + "4540f05a9f1fb296d7736e7b208e3c96eb4fe1834688d2604f450952ed432d4
1bbe2a0b6ea7566d2a5d1e7e20d42af2c53d792b1c43fea817e9ad275ae546963", | |
| 1033 + }, | |
| 1034 + { | |
| 1035 + "000000000000000000000000000000000000000000000000000000000000000
0", | |
| 1036 + "0000000000000001", | |
| 1037 + "de9cba7bf3d69ef5e786dc63973f653a0b49e015adbff7134fcb7df13782103
1e85a050278a7084527214f73efc7fa5b5277062eb7a0433e445f41e31afab757", | |
| 1038 + }, | |
| 1039 + { | |
| 1040 + "000000000000000000000000000000000000000000000000000000000000000
0", | |
| 1041 + "0100000000000000", | |
| 1042 + "ef3fdfd6c61578fbf5cf35bd3dd33b8009631634d21e42ac33960bd138e50d3
2111e4caf237ee53ca8ad6426194a88545ddc497a0b466e7d6bbdb0041b2f586b", | |
| 1043 + }, | |
| 1044 + { | |
| 1045 + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1
f", | |
| 1046 + "0001020304050607", | |
| 1047 + "f798a189f195e66982105ffb640bb7757f579da31602fc93ec01ac56f85ac3c
134a4547b733b46413042c9440049176905d3be59ea1c53f15916155c2be8241a38008b9a26bc359
41e2444177c8ade6689de95264986d95889fb60e84629c9bd9a5acb1cc118be563eb9b3a4a472f82
e09a7e778492b562ef7130e88dfe031c79db9d4f7c7a899151b9a475032b63fc385245fe054e3dd5
a97a5f576fe064025d3ce042c566ab2c507b138db853e3d6959660996546cc9c4a6eafdc777c040d
70eaf46f76dad3979e5c5360c3317166a1c894c94a371876a94df7628fe4eaaf2ccb27d5aaae0ad7
ad0f9d4b6ad3b54098746d4524d38407a6deb", | |
| 1048 + }, | |
| 1049 +}; | |
| 1050 + | |
| 1051 +static unsigned char hex_digit(char h) | |
| 1052 + { | |
| 1053 + if (h >= '0' && h <= '9') | |
| 1054 + return h - '0'; | |
| 1055 + else if (h >= 'a' && h <= 'f') | |
| 1056 + return h - 'a' + 10; | |
| 1057 + else if (h >= 'A' && h <= 'F') | |
| 1058 + return h - 'A' + 10; | |
| 1059 + else | |
| 1060 + abort(); | |
| 1061 + } | |
| 1062 + | |
| 1063 +static void hex_decode(unsigned char *out, const char* hex) | |
| 1064 + { | |
| 1065 + size_t j = 0; | |
| 1066 + | |
| 1067 + while (*hex != 0) | |
| 1068 + { | |
| 1069 + unsigned char v = hex_digit(*hex++); | |
| 1070 + v <<= 4; | |
| 1071 + v |= hex_digit(*hex++); | |
| 1072 + out[j++] = v; | |
| 1073 + } | |
| 1074 + } | |
| 1075 + | |
| 1076 +static void hexdump(unsigned char *a, size_t len) | |
| 1077 + { | |
| 1078 + size_t i; | |
| 1079 + | |
| 1080 + for (i = 0; i < len; i++) | |
| 1081 + printf("%02x", a[i]); | |
| 1082 + } | |
| 1083 + | |
| 1084 +/* misalign returns a pointer that points 0 to 15 bytes into |in| such that the | |
| 1085 + * returned pointer has alignment 1 mod 16. */ | |
| 1086 +static void* misalign(void* in) | |
| 1087 + { | |
| 1088 + intptr_t x = (intptr_t) in; | |
| 1089 + x += (17 - (x % 16)) % 16; | |
| 1090 + return (void*) x; | |
| 1091 + } | |
| 1092 + | |
| 1093 +int main() | |
| 1094 + { | |
| 1095 + static const unsigned num_tests = | |
| 1096 + sizeof(chacha_tests) / sizeof(struct chacha_test); | |
| 1097 + unsigned i; | |
| 1098 + unsigned char key_bytes[32 + 16]; | |
| 1099 + unsigned char nonce_bytes[8 + 16] = {0}; | |
| 1100 + | |
| 1101 + unsigned char *key = misalign(key_bytes); | |
| 1102 + unsigned char *nonce = misalign(nonce_bytes); | |
| 1103 + | |
| 1104 + for (i = 0; i < num_tests; i++) | |
| 1105 + { | |
| 1106 + const struct chacha_test *test = &chacha_tests[i]; | |
| 1107 + unsigned char *expected, *out_bytes, *zero_bytes, *out, *zeros; | |
| 1108 + size_t len = strlen(test->outhex); | |
| 1109 + | |
| 1110 + if (strlen(test->keyhex) != 32*2 || | |
| 1111 + strlen(test->noncehex) != 8*2 || | |
| 1112 + (len & 1) == 1) | |
| 1113 + return 1; | |
| 1114 + | |
| 1115 + len /= 2; | |
| 1116 + | |
| 1117 + hex_decode(key, test->keyhex); | |
| 1118 + hex_decode(nonce, test->noncehex); | |
| 1119 + | |
| 1120 + expected = malloc(len); | |
| 1121 + out_bytes = malloc(len+16); | |
| 1122 + zero_bytes = malloc(len+16); | |
| 1123 + /* Attempt to test unaligned inputs. */ | |
| 1124 + out = misalign(out_bytes); | |
| 1125 + zeros = misalign(zero_bytes); | |
| 1126 + memset(zeros, 0, len); | |
| 1127 + | |
| 1128 + hex_decode(expected, test->outhex); | |
| 1129 + CRYPTO_chacha_20(out, zeros, len, key, nonce, 0); | |
| 1130 + | |
| 1131 + if (memcmp(out, expected, len) != 0) | |
| 1132 + { | |
| 1133 + printf("ChaCha20 test #%d failed.\n", i); | |
| 1134 + printf("got: "); | |
| 1135 + hexdump(out, len); | |
| 1136 + printf("\nexpected: "); | |
| 1137 + hexdump(expected, len); | |
| 1138 + printf("\n"); | |
| 1139 + return 1; | |
| 1140 + } | |
| 1141 + | |
| 1142 + /* The last test has a large output. We test whether the | |
| 1143 + * counter works as expected by skipping the first 64 bytes of | |
| 1144 + * it. */ | |
| 1145 + if (i == num_tests - 1) | |
| 1146 + { | |
| 1147 + CRYPTO_chacha_20(out, zeros, len - 64, key, nonce, 1); | |
| 1148 + if (memcmp(out, expected + 64, len - 64) != 0) | |
| 1149 + { | |
| 1150 + printf("ChaCha20 skip test failed.\n"); | |
| 1151 + return 1; | |
| 1152 + } | |
| 1153 + } | |
| 1154 + | |
| 1155 + free(expected); | |
| 1156 + free(zero_bytes); | |
| 1157 + free(out_bytes); | |
| 1158 + } | |
| 1159 + | |
| 1160 + | |
| 1161 + printf("PASS\n"); | |
| 1162 + return 0; | |
| 1163 + } | |
| 1164 diff --git a/crypto/evp/Makefile b/crypto/evp/Makefile | |
| 1165 index b73038d..86b0504 100644 | |
| 1166 --- a/crypto/evp/Makefile | |
| 1167 +++ b/crypto/evp/Makefile | |
| 1168 @@ -29,7 +29,8 @@ LIBSRC= encode.c digest.c evp_enc.c evp_key.c evp_acnf.c evp_c
nf.c \ | |
| 1169 c_all.c c_allc.c c_alld.c evp_lib.c bio_ok.c \ | |
| 1170 evp_pkey.c evp_pbe.c p5_crpt.c p5_crpt2.c \ | |
| 1171 e_old.c pmeth_lib.c pmeth_fn.c pmeth_gn.c m_sigver.c evp_fips.c \ | |
| 1172 - e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c evp_aead.c | |
| 1173 + e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c evp_aead.c \ | |
| 1174 + e_chacha20poly1305.c | |
| 1175 | |
| 1176 LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \ | |
| 1177 e_des.o e_bf.o e_idea.o e_des3.o e_camellia.o\ | |
| 1178 @@ -42,7 +43,7 @@ LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o
evp_cnf.o \ | |
| 1179 c_all.o c_allc.o c_alld.o evp_lib.o bio_ok.o \ | |
| 1180 evp_pkey.o evp_pbe.o p5_crpt.o p5_crpt2.o \ | |
| 1181 e_old.o pmeth_lib.o pmeth_fn.o pmeth_gn.o m_sigver.o evp_fips.o \ | |
| 1182 - e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o evp_aead.o | |
| 1183 + e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o evp_aead.o e_chacha20poly1305.o | |
| 1184 | |
| 1185 SRC= $(LIBSRC) | |
| 1186 | |
| 1187 @@ -239,6 +240,21 @@ e_cast.o: ../../include/openssl/objects.h ../../include/ope
nssl/opensslconf.h | |
| 1188 e_cast.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h | |
| 1189 e_cast.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h | |
| 1190 e_cast.o: ../../include/openssl/symhacks.h ../cryptlib.h e_cast.c evp_locl.h | |
| 1191 +e_chacha20poly1305.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h | |
| 1192 +e_chacha20poly1305.o: ../../include/openssl/chacha.h | |
| 1193 +e_chacha20poly1305.o: ../../include/openssl/crypto.h | |
| 1194 +e_chacha20poly1305.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h | |
| 1195 +e_chacha20poly1305.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h | |
| 1196 +e_chacha20poly1305.o: ../../include/openssl/obj_mac.h | |
| 1197 +e_chacha20poly1305.o: ../../include/openssl/objects.h | |
| 1198 +e_chacha20poly1305.o: ../../include/openssl/opensslconf.h | |
| 1199 +e_chacha20poly1305.o: ../../include/openssl/opensslv.h | |
| 1200 +e_chacha20poly1305.o: ../../include/openssl/ossl_typ.h | |
| 1201 +e_chacha20poly1305.o: ../../include/openssl/poly1305.h | |
| 1202 +e_chacha20poly1305.o: ../../include/openssl/safestack.h | |
| 1203 +e_chacha20poly1305.o: ../../include/openssl/stack.h | |
| 1204 +e_chacha20poly1305.o: ../../include/openssl/symhacks.h e_chacha20poly1305.c | |
| 1205 +e_chacha20poly1305.o: evp_locl.h | |
| 1206 e_des.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h | |
| 1207 e_des.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | |
| 1208 e_des.o: ../../include/openssl/des.h ../../include/openssl/des_old.h | |
| 1209 @@ -258,9 +274,10 @@ e_des3.o: ../../include/openssl/evp.h ../../include/openssl
/lhash.h | |
| 1210 e_des3.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h | |
| 1211 e_des3.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h | |
| 1212 e_des3.o: ../../include/openssl/ossl_typ.h ../../include/openssl/rand.h | |
| 1213 -e_des3.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h | |
| 1214 -e_des3.o: ../../include/openssl/symhacks.h ../../include/openssl/ui.h | |
| 1215 -e_des3.o: ../../include/openssl/ui_compat.h ../cryptlib.h e_des3.c evp_locl.h | |
| 1216 +e_des3.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h | |
| 1217 +e_des3.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h | |
| 1218 +e_des3.o: ../../include/openssl/ui.h ../../include/openssl/ui_compat.h | |
| 1219 +e_des3.o: ../cryptlib.h e_des3.c evp_locl.h | |
| 1220 e_idea.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h | |
| 1221 e_idea.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | |
| 1222 e_idea.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h | |
| 1223 @@ -356,6 +373,14 @@ evp_acnf.o: ../../include/openssl/opensslconf.h | |
| 1224 evp_acnf.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h | |
| 1225 evp_acnf.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h | |
| 1226 evp_acnf.o: ../../include/openssl/symhacks.h ../cryptlib.h evp_acnf.c | |
| 1227 +evp_aead.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h | |
| 1228 +evp_aead.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h | |
| 1229 +evp_aead.o: ../../include/openssl/err.h ../../include/openssl/evp.h | |
| 1230 +evp_aead.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h | |
| 1231 +evp_aead.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h | |
| 1232 +evp_aead.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h | |
| 1233 +evp_aead.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h | |
| 1234 +evp_aead.o: ../../include/openssl/symhacks.h evp_aead.c | |
| 1235 evp_cnf.o: ../../e_os.h ../../include/openssl/asn1.h | |
| 1236 evp_cnf.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h | |
| 1237 evp_cnf.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h | |
| 1238 diff --git a/crypto/evp/e_chacha20poly1305.c b/crypto/evp/e_chacha20poly1305.c | |
| 1239 new file mode 100644 | |
| 1240 index 0000000..1c0c0fb | |
| 1241 --- /dev/null | |
| 1242 +++ b/crypto/evp/e_chacha20poly1305.c | |
| 1243 @@ -0,0 +1,267 @@ | |
| 1244 +/* ==================================================================== | |
| 1245 + * Copyright (c) 2013 The OpenSSL Project. All rights reserved. | |
| 1246 + * | |
| 1247 + * Redistribution and use in source and binary forms, with or without | |
| 1248 + * modification, are permitted provided that the following conditions | |
| 1249 + * are met: | |
| 1250 + * | |
| 1251 + * 1. Redistributions of source code must retain the above copyright | |
| 1252 + * notice, this list of conditions and the following disclaimer. | |
| 1253 + * | |
| 1254 + * 2. Redistributions in binary form must reproduce the above copyright | |
| 1255 + * notice, this list of conditions and the following disclaimer in | |
| 1256 + * the documentation and/or other materials provided with the | |
| 1257 + * distribution. | |
| 1258 + * | |
| 1259 + * 3. All advertising materials mentioning features or use of this | |
| 1260 + * software must display the following acknowledgment: | |
| 1261 + * "This product includes software developed by the OpenSSL Project | |
| 1262 + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" | |
| 1263 + * | |
| 1264 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
| 1265 + * endorse or promote products derived from this software without | |
| 1266 + * prior written permission. For written permission, please contact | |
| 1267 + * openssl-core@openssl.org. | |
| 1268 + * | |
| 1269 + * 5. Products derived from this software may not be called "OpenSSL" | |
| 1270 + * nor may "OpenSSL" appear in their names without prior written | |
| 1271 + * permission of the OpenSSL Project. | |
| 1272 + * | |
| 1273 + * 6. Redistributions of any form whatsoever must retain the following | |
| 1274 + * acknowledgment: | |
| 1275 + * "This product includes software developed by the OpenSSL Project | |
| 1276 + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" | |
| 1277 + * | |
| 1278 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
| 1279 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 1280 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| 1281 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
| 1282 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 1283 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
| 1284 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| 1285 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 1286 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
| 1287 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| 1288 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
| 1289 + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 1290 + * ==================================================================== | |
| 1291 + * | |
| 1292 + */ | |
| 1293 + | |
| 1294 +#include <stdint.h> | |
| 1295 +#include <string.h> | |
| 1296 +#include <openssl/opensslconf.h> | |
| 1297 + | |
| 1298 +#if !defined(OPENSSL_NO_CHACHA) && !defined(OPENSSL_NO_POLY1305) | |
| 1299 + | |
| 1300 +#include <openssl/chacha.h> | |
| 1301 +#include <openssl/poly1305.h> | |
| 1302 +#include <openssl/evp.h> | |
| 1303 +#include <openssl/err.h> | |
| 1304 +#include "evp_locl.h" | |
| 1305 + | |
| 1306 +#define POLY1305_TAG_LEN 16 | |
| 1307 +#define CHACHA20_NONCE_LEN 8 | |
| 1308 + | |
| 1309 +struct aead_chacha20_poly1305_ctx | |
| 1310 + { | |
| 1311 + unsigned char key[32]; | |
| 1312 + unsigned char tag_len; | |
| 1313 + }; | |
| 1314 + | |
| 1315 +static int aead_chacha20_poly1305_init(EVP_AEAD_CTX *ctx, const unsigned char *
key, size_t key_len, size_t tag_len) | |
| 1316 + { | |
| 1317 + struct aead_chacha20_poly1305_ctx *c20_ctx; | |
| 1318 + | |
| 1319 + if (tag_len == 0) | |
| 1320 + tag_len = POLY1305_TAG_LEN; | |
| 1321 + | |
| 1322 + if (tag_len > POLY1305_TAG_LEN) | |
| 1323 + { | |
| 1324 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_INIT, EVP_R_TOO_LARGE); | |
| 1325 + return 0; | |
| 1326 + } | |
| 1327 + | |
| 1328 + if (key_len != sizeof(c20_ctx->key)) | |
| 1329 + return 0; /* internal error - EVP_AEAD_CTX_init should catch th
is. */ | |
| 1330 + | |
| 1331 + c20_ctx = OPENSSL_malloc(sizeof(struct aead_chacha20_poly1305_ctx)); | |
| 1332 + if (c20_ctx == NULL) | |
| 1333 + return 0; | |
| 1334 + | |
| 1335 + memcpy(&c20_ctx->key[0], key, key_len); | |
| 1336 + c20_ctx->tag_len = tag_len; | |
| 1337 + ctx->aead_state = c20_ctx; | |
| 1338 + | |
| 1339 + return 1; | |
| 1340 + } | |
| 1341 + | |
| 1342 +static void aead_chacha20_poly1305_cleanup(EVP_AEAD_CTX *ctx) | |
| 1343 + { | |
| 1344 + struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state; | |
| 1345 + OPENSSL_cleanse(c20_ctx->key, sizeof(c20_ctx->key)); | |
| 1346 + OPENSSL_free(c20_ctx); | |
| 1347 + } | |
| 1348 + | |
| 1349 +static void poly1305_update_with_length(poly1305_state *poly1305, | |
| 1350 + const unsigned char *data, size_t data_len) | |
| 1351 + { | |
| 1352 + size_t j = data_len; | |
| 1353 + unsigned char length_bytes[8]; | |
| 1354 + unsigned i; | |
| 1355 + | |
| 1356 + for (i = 0; i < sizeof(length_bytes); i++) | |
| 1357 + { | |
| 1358 + length_bytes[i] = j; | |
| 1359 + j >>= 8; | |
| 1360 + } | |
| 1361 + | |
| 1362 + CRYPTO_poly1305_update(poly1305, data, data_len); | |
| 1363 + CRYPTO_poly1305_update(poly1305, length_bytes, sizeof(length_bytes)); | |
| 1364 +} | |
| 1365 + | |
| 1366 +#if __arm__ | |
| 1367 +#define ALIGNED __attribute__((aligned(16))) | |
| 1368 +#else | |
| 1369 +#define ALIGNED | |
| 1370 +#endif | |
| 1371 + | |
| 1372 +static ssize_t aead_chacha20_poly1305_seal(const EVP_AEAD_CTX *ctx, | |
| 1373 + unsigned char *out, size_t max_out_len, | |
| 1374 + const unsigned char *nonce, size_t nonce_len, | |
| 1375 + const unsigned char *in, size_t in_len, | |
| 1376 + const unsigned char *ad, size_t ad_len) | |
| 1377 + { | |
| 1378 + const struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state; | |
| 1379 + unsigned char poly1305_key[32] ALIGNED; | |
| 1380 + poly1305_state poly1305; | |
| 1381 + const uint64_t in_len_64 = in_len; | |
| 1382 + | |
| 1383 + /* The underlying ChaCha implementation may not overflow the block | |
| 1384 + * counter into the second counter word. Therefore we disallow | |
| 1385 + * individual operations that work on more than 2TB at a time. | |
| 1386 + * |in_len_64| is needed because, on 32-bit platforms, size_t is only | |
| 1387 + * 32-bits and this produces a warning because it's always false. | |
| 1388 + * Casting to uint64_t inside the conditional is not sufficient to stop | |
| 1389 + * the warning. */ | |
| 1390 + if (in_len_64 >= (1ull << 32)*64-64) | |
| 1391 + { | |
| 1392 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_TOO_LARGE); | |
| 1393 + return -1; | |
| 1394 + } | |
| 1395 + | |
| 1396 + if (max_out_len < in_len + c20_ctx->tag_len) | |
| 1397 + { | |
| 1398 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_BUFFER_TOO_SMALL
); | |
| 1399 + return -1; | |
| 1400 + } | |
| 1401 + | |
| 1402 + if (nonce_len != CHACHA20_NONCE_LEN) | |
| 1403 + { | |
| 1404 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_IV_TOO_LARGE); | |
| 1405 + return -1; | |
| 1406 + } | |
| 1407 + | |
| 1408 + memset(poly1305_key, 0, sizeof(poly1305_key)); | |
| 1409 + CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key), c20_c
tx->key, nonce, 0); | |
| 1410 + | |
| 1411 + CRYPTO_poly1305_init(&poly1305, poly1305_key); | |
| 1412 + poly1305_update_with_length(&poly1305, ad, ad_len); | |
| 1413 + CRYPTO_chacha_20(out, in, in_len, c20_ctx->key, nonce, 1); | |
| 1414 + poly1305_update_with_length(&poly1305, out, in_len); | |
| 1415 + | |
| 1416 + if (c20_ctx->tag_len != POLY1305_TAG_LEN) | |
| 1417 + { | |
| 1418 + unsigned char tag[POLY1305_TAG_LEN]; | |
| 1419 + CRYPTO_poly1305_finish(&poly1305, tag); | |
| 1420 + memcpy(out + in_len, tag, c20_ctx->tag_len); | |
| 1421 + return in_len + c20_ctx->tag_len; | |
| 1422 + } | |
| 1423 + | |
| 1424 + CRYPTO_poly1305_finish(&poly1305, out + in_len); | |
| 1425 + return in_len + POLY1305_TAG_LEN; | |
| 1426 + } | |
| 1427 + | |
| 1428 +static ssize_t aead_chacha20_poly1305_open(const EVP_AEAD_CTX *ctx, | |
| 1429 + unsigned char *out, size_t max_out_len, | |
| 1430 + const unsigned char *nonce, size_t nonce_len, | |
| 1431 + const unsigned char *in, size_t in_len, | |
| 1432 + const unsigned char *ad, size_t ad_len) | |
| 1433 + { | |
| 1434 + const struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state; | |
| 1435 + unsigned char mac[POLY1305_TAG_LEN]; | |
| 1436 + unsigned char poly1305_key[32] ALIGNED; | |
| 1437 + size_t out_len; | |
| 1438 + poly1305_state poly1305; | |
| 1439 + const uint64_t in_len_64 = in_len; | |
| 1440 + | |
| 1441 + if (in_len < c20_ctx->tag_len) | |
| 1442 + { | |
| 1443 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BAD_DECRYPT); | |
| 1444 + return -1; | |
| 1445 + } | |
| 1446 + | |
| 1447 + /* The underlying ChaCha implementation may not overflow the block | |
| 1448 + * counter into the second counter word. Therefore we disallow | |
| 1449 + * individual operations that work on more than 2TB at a time. | |
| 1450 + * |in_len_64| is needed because, on 32-bit platforms, size_t is only | |
| 1451 + * 32-bits and this produces a warning because it's always false. | |
| 1452 + * Casting to uint64_t inside the conditional is not sufficient to stop | |
| 1453 + * the warning. */ | |
| 1454 + if (in_len_64 >= (1ull << 32)*64-64) | |
| 1455 + { | |
| 1456 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_TOO_LARGE); | |
| 1457 + return -1; | |
| 1458 + } | |
| 1459 + | |
| 1460 + if (nonce_len != CHACHA20_NONCE_LEN) | |
| 1461 + { | |
| 1462 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_IV_TOO_LARGE); | |
| 1463 + return -1; | |
| 1464 + } | |
| 1465 + | |
| 1466 + out_len = in_len - c20_ctx->tag_len; | |
| 1467 + | |
| 1468 + if (max_out_len < out_len) | |
| 1469 + { | |
| 1470 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BUFFER_TOO_SMALL
); | |
| 1471 + return -1; | |
| 1472 + } | |
| 1473 + | |
| 1474 + memset(poly1305_key, 0, sizeof(poly1305_key)); | |
| 1475 + CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key), c20_c
tx->key, nonce, 0); | |
| 1476 + | |
| 1477 + CRYPTO_poly1305_init(&poly1305, poly1305_key); | |
| 1478 + poly1305_update_with_length(&poly1305, ad, ad_len); | |
| 1479 + poly1305_update_with_length(&poly1305, in, out_len); | |
| 1480 + CRYPTO_poly1305_finish(&poly1305, mac); | |
| 1481 + | |
| 1482 + if (CRYPTO_memcmp(mac, in + out_len, c20_ctx->tag_len) != 0) | |
| 1483 + { | |
| 1484 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BAD_DECRYPT); | |
| 1485 + return -1; | |
| 1486 + } | |
| 1487 + | |
| 1488 + CRYPTO_chacha_20(out, in, out_len, c20_ctx->key, nonce, 1); | |
| 1489 + return out_len; | |
| 1490 + } | |
| 1491 + | |
| 1492 +static const EVP_AEAD aead_chacha20_poly1305 = | |
| 1493 + { | |
| 1494 + 32, /* key len */ | |
| 1495 + CHACHA20_NONCE_LEN, /* nonce len */ | |
| 1496 + POLY1305_TAG_LEN, /* overhead */ | |
| 1497 + POLY1305_TAG_LEN, /* max tag length */ | |
| 1498 + | |
| 1499 + aead_chacha20_poly1305_init, | |
| 1500 + aead_chacha20_poly1305_cleanup, | |
| 1501 + aead_chacha20_poly1305_seal, | |
| 1502 + aead_chacha20_poly1305_open, | |
| 1503 + }; | |
| 1504 + | |
| 1505 +const EVP_AEAD *EVP_aead_chacha20_poly1305() | |
| 1506 + { | |
| 1507 + return &aead_chacha20_poly1305; | |
| 1508 + } | |
| 1509 + | |
| 1510 +#endif /* !OPENSSL_NO_CHACHA && !OPENSSL_NO_POLY1305 */ | |
| 1511 diff --git a/crypto/evp/evp.h b/crypto/evp/evp.h | |
| 1512 index bd10642..7dc1656 100644 | |
| 1513 --- a/crypto/evp/evp.h | |
| 1514 +++ b/crypto/evp/evp.h | |
| 1515 @@ -1258,6 +1258,11 @@ typedef struct evp_aead_st EVP_AEAD; | |
| 1516 const EVP_AEAD *EVP_aead_aes_128_gcm(void); | |
| 1517 #endif | |
| 1518 | |
| 1519 +#if !defined(OPENSSL_NO_CHACHA) && !defined(OPENSSL_NO_POLY1305) | |
| 1520 +/* EVP_aead_chacha20_poly1305 is ChaCha20 with a Poly1305 authenticator. */ | |
| 1521 +const EVP_AEAD *EVP_aead_chacha20_poly1305(void); | |
| 1522 +#endif | |
| 1523 + | |
| 1524 /* EVP_AEAD_key_length returns the length, in bytes, of the keys used by | |
| 1525 * |aead|. */ | |
| 1526 size_t EVP_AEAD_key_length(const EVP_AEAD *aead); | |
| 1527 @@ -1360,6 +1365,9 @@ void ERR_load_EVP_strings(void); | |
| 1528 #define EVP_F_AEAD_AES_128_GCM_INIT 183 | |
| 1529 #define EVP_F_AEAD_AES_128_GCM_OPEN 181 | |
| 1530 #define EVP_F_AEAD_AES_128_GCM_SEAL 182 | |
| 1531 +#define EVP_F_AEAD_CHACHA20_POLY1305_INIT 187 | |
| 1532 +#define EVP_F_AEAD_CHACHA20_POLY1305_OPEN 184 | |
| 1533 +#define EVP_F_AEAD_CHACHA20_POLY1305_SEAL 183 | |
| 1534 #define EVP_F_AEAD_CTX_OPEN 185 | |
| 1535 #define EVP_F_AEAD_CTX_SEAL 186 | |
| 1536 #define EVP_F_AESNI_INIT_KEY 165 | |
| 1537 diff --git a/crypto/evp/evp_err.c b/crypto/evp/evp_err.c | |
| 1538 index c47969c..fb747e5 100644 | |
| 1539 --- a/crypto/evp/evp_err.c | |
| 1540 +++ b/crypto/evp/evp_err.c | |
| 1541 @@ -73,6 +73,9 @@ static ERR_STRING_DATA EVP_str_functs[]= | |
| 1542 {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_INIT), "AEAD_AES_128_GCM_INIT"}, | |
| 1543 {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_OPEN), "AEAD_AES_128_GCM_OPEN"}, | |
| 1544 {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_SEAL), "AEAD_AES_128_GCM_SEAL"}, | |
| 1545 +{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_INIT), "AEAD_CHACHA20_POLY1305_INIT"}, | |
| 1546 +{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_OPEN), "AEAD_CHACHA20_POLY1305_OPEN"}, | |
| 1547 +{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_SEAL), "AEAD_CHACHA20_POLY1305_SEAL"}, | |
| 1548 {ERR_FUNC(EVP_F_AEAD_CTX_OPEN), "AEAD_CTX_OPEN"}, | |
| 1549 {ERR_FUNC(EVP_F_AEAD_CTX_SEAL), "AEAD_CTX_SEAL"}, | |
| 1550 {ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"}, | |
| 1551 diff --git a/crypto/poly1305/Makefile b/crypto/poly1305/Makefile | |
| 1552 new file mode 100644 | |
| 1553 index 0000000..397d7cd | |
| 1554 --- /dev/null | |
| 1555 +++ b/crypto/poly1305/Makefile | |
| 1556 @@ -0,0 +1,81 @@ | |
| 1557 +# | |
| 1558 +# OpenSSL/crypto/poly1305/Makefile | |
| 1559 +# | |
| 1560 + | |
| 1561 +DIR= poly1305 | |
| 1562 +TOP= ../.. | |
| 1563 +CC= cc | |
| 1564 +CPP= $(CC) -E | |
| 1565 +INCLUDES= | |
| 1566 +CFLAG=-g | |
| 1567 +AR= ar r | |
| 1568 + | |
| 1569 +POLY1305=poly1305_vec.o | |
| 1570 + | |
| 1571 +CFLAGS= $(INCLUDES) $(CFLAG) | |
| 1572 +ASFLAGS= $(INCLUDES) $(ASFLAG) | |
| 1573 +AFLAGS= $(ASFLAGS) | |
| 1574 + | |
| 1575 +GENERAL=Makefile | |
| 1576 +TEST= | |
| 1577 +APPS= | |
| 1578 + | |
| 1579 +LIB=$(TOP)/libcrypto.a | |
| 1580 +LIBSRC=poly1305_vec.c | |
| 1581 +LIBOBJ=$(POLY1305) | |
| 1582 + | |
| 1583 +SRC= $(LIBSRC) | |
| 1584 + | |
| 1585 +EXHEADER=poly1305.h | |
| 1586 +HEADER= $(EXHEADER) | |
| 1587 + | |
| 1588 +ALL= $(GENERAL) $(SRC) $(HEADER) | |
| 1589 + | |
| 1590 +top: | |
| 1591 + (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all) | |
| 1592 + | |
| 1593 +all: lib | |
| 1594 + | |
| 1595 +lib: $(LIBOBJ) | |
| 1596 + $(AR) $(LIB) $(LIBOBJ) | |
| 1597 + $(RANLIB) $(LIB) || echo Never mind. | |
| 1598 + @touch lib | |
| 1599 + | |
| 1600 +files: | |
| 1601 + $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO | |
| 1602 + | |
| 1603 +links: | |
| 1604 + @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER) | |
| 1605 + @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST) | |
| 1606 + @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS) | |
| 1607 + | |
| 1608 +install: | |
| 1609 + @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile... | |
| 1610 + @headerlist="$(EXHEADER)"; for i in $$headerlist ; \ | |
| 1611 + do \ | |
| 1612 + (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \ | |
| 1613 + chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \ | |
| 1614 + done; | |
| 1615 + | |
| 1616 +tags: | |
| 1617 + ctags $(SRC) | |
| 1618 + | |
| 1619 +tests: | |
| 1620 + | |
| 1621 +lint: | |
| 1622 + lint -DLINT $(INCLUDES) $(SRC)>fluff | |
| 1623 + | |
| 1624 +depend: | |
| 1625 + @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... | |
| 1626 + $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) | |
| 1627 + | |
| 1628 +dclean: | |
| 1629 + $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKE
FILE) >Makefile.new | |
| 1630 + mv -f Makefile.new $(MAKEFILE) | |
| 1631 + | |
| 1632 +clean: | |
| 1633 + rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff | |
| 1634 + | |
| 1635 +# DO NOT DELETE THIS LINE -- make depend depends on it. | |
| 1636 + | |
| 1637 +poly1305_vec.o: ../../include/openssl/poly1305.h poly1305_vec.c | |
| 1638 diff --git a/crypto/poly1305/poly1305.c b/crypto/poly1305/poly1305.c | |
| 1639 new file mode 100644 | |
| 1640 index 0000000..2e5621d | |
| 1641 --- /dev/null | |
| 1642 +++ b/crypto/poly1305/poly1305.c | |
| 1643 @@ -0,0 +1,321 @@ | |
| 1644 +/* ==================================================================== | |
| 1645 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
| 1646 + * | |
| 1647 + * Redistribution and use in source and binary forms, with or without | |
| 1648 + * modification, are permitted provided that the following conditions | |
| 1649 + * are met: | |
| 1650 + * | |
| 1651 + * 1. Redistributions of source code must retain the above copyright | |
| 1652 + * notice, this list of conditions and the following disclaimer. | |
| 1653 + * | |
| 1654 + * 2. Redistributions in binary form must reproduce the above copyright | |
| 1655 + * notice, this list of conditions and the following disclaimer in | |
| 1656 + * the documentation and/or other materials provided with the | |
| 1657 + * distribution. | |
| 1658 + * | |
| 1659 + * 3. All advertising materials mentioning features or use of this | |
| 1660 + * software must display the following acknowledgment: | |
| 1661 + * "This product includes software developed by the OpenSSL Project | |
| 1662 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
| 1663 + * | |
| 1664 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
| 1665 + * endorse or promote products derived from this software without | |
| 1666 + * prior written permission. For written permission, please contact | |
| 1667 + * licensing@OpenSSL.org. | |
| 1668 + * | |
| 1669 + * 5. Products derived from this software may not be called "OpenSSL" | |
| 1670 + * nor may "OpenSSL" appear in their names without prior written | |
| 1671 + * permission of the OpenSSL Project. | |
| 1672 + * | |
| 1673 + * 6. Redistributions of any form whatsoever must retain the following | |
| 1674 + * acknowledgment: | |
| 1675 + * "This product includes software developed by the OpenSSL Project | |
| 1676 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
| 1677 + * | |
| 1678 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
| 1679 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 1680 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| 1681 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
| 1682 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 1683 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
| 1684 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| 1685 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 1686 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
| 1687 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| 1688 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
| 1689 + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 1690 + * ==================================================================== | |
| 1691 + */ | |
| 1692 + | |
| 1693 +/* This implementation of poly1305 is by Andrew Moon | |
| 1694 + * (https://github.com/floodyberry/poly1305-donna) and released as public | |
| 1695 + * domain. */ | |
| 1696 + | |
| 1697 +#include <string.h> | |
| 1698 +#include <stdint.h> | |
| 1699 +#include <openssl/opensslconf.h> | |
| 1700 + | |
| 1701 +#if !defined(OPENSSL_NO_POLY1305) | |
| 1702 + | |
| 1703 +#include <openssl/poly1305.h> | |
| 1704 +#include <openssl/crypto.h> | |
| 1705 + | |
| 1706 +#if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_
64__) | |
| 1707 +/* We can assume little-endian. */ | |
| 1708 +static uint32_t U8TO32_LE(const unsigned char *m) | |
| 1709 + { | |
| 1710 + uint32_t r; | |
| 1711 + memcpy(&r, m, sizeof(r)); | |
| 1712 + return r; | |
| 1713 + } | |
| 1714 + | |
| 1715 +static void U32TO8_LE(unsigned char *m, uint32_t v) | |
| 1716 + { | |
| 1717 + memcpy(m, &v, sizeof(v)); | |
| 1718 + } | |
| 1719 +#else | |
| 1720 +static uint32_t U8TO32_LE(const unsigned char *m) | |
| 1721 + { | |
| 1722 + return (uint32_t)m[0] | | |
| 1723 + (uint32_t)m[1] << 8 | | |
| 1724 + (uint32_t)m[2] << 16 | | |
| 1725 + (uint32_t)m[3] << 24; | |
| 1726 + } | |
| 1727 + | |
| 1728 +static void U32TO8_LE(unsigned char *m, uint32_t v) | |
| 1729 + { | |
| 1730 + m[0] = v; | |
| 1731 + m[1] = v >> 8; | |
| 1732 + m[2] = v >> 16; | |
| 1733 + m[3] = v >> 24; | |
| 1734 + } | |
| 1735 +#endif | |
| 1736 + | |
| 1737 +static uint64_t | |
| 1738 +mul32x32_64(uint32_t a, uint32_t b) | |
| 1739 + { | |
| 1740 + return (uint64_t)a * b; | |
| 1741 + } | |
| 1742 + | |
| 1743 + | |
| 1744 +struct poly1305_state_st | |
| 1745 + { | |
| 1746 + uint32_t r0,r1,r2,r3,r4; | |
| 1747 + uint32_t s1,s2,s3,s4; | |
| 1748 + uint32_t h0,h1,h2,h3,h4; | |
| 1749 + unsigned char buf[16]; | |
| 1750 + unsigned int buf_used; | |
| 1751 + unsigned char key[16]; | |
| 1752 + }; | |
| 1753 + | |
| 1754 +/* poly1305_blocks updates |state| given some amount of input data. This | |
| 1755 + * function may only be called with a |len| that is not a multiple of 16 at the | |
| 1756 + * end of the data. Otherwise the input must be buffered into 16 byte blocks. | |
| 1757 + * */ | |
| 1758 +static void poly1305_update(struct poly1305_state_st *state, | |
| 1759 + const unsigned char *in, size_t len) | |
| 1760 + { | |
| 1761 + uint32_t t0,t1,t2,t3; | |
| 1762 + uint64_t t[5]; | |
| 1763 + uint32_t b; | |
| 1764 + uint64_t c; | |
| 1765 + size_t j; | |
| 1766 + unsigned char mp[16]; | |
| 1767 + | |
| 1768 + if (len < 16) | |
| 1769 + goto poly1305_donna_atmost15bytes; | |
| 1770 + | |
| 1771 +poly1305_donna_16bytes: | |
| 1772 + t0 = U8TO32_LE(in); | |
| 1773 + t1 = U8TO32_LE(in+4); | |
| 1774 + t2 = U8TO32_LE(in+8); | |
| 1775 + t3 = U8TO32_LE(in+12); | |
| 1776 + | |
| 1777 + in += 16; | |
| 1778 + len -= 16; | |
| 1779 + | |
| 1780 + state->h0 += t0 & 0x3ffffff; | |
| 1781 + state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; | |
| 1782 + state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; | |
| 1783 + state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; | |
| 1784 + state->h4 += (t3 >> 8) | (1 << 24); | |
| 1785 + | |
| 1786 +poly1305_donna_mul: | |
| 1787 + t[0] = mul32x32_64(state->h0,state->r0) + | |
| 1788 + mul32x32_64(state->h1,state->s4) + | |
| 1789 + mul32x32_64(state->h2,state->s3) + | |
| 1790 + mul32x32_64(state->h3,state->s2) + | |
| 1791 + mul32x32_64(state->h4,state->s1); | |
| 1792 + t[1] = mul32x32_64(state->h0,state->r1) + | |
| 1793 + mul32x32_64(state->h1,state->r0) + | |
| 1794 + mul32x32_64(state->h2,state->s4) + | |
| 1795 + mul32x32_64(state->h3,state->s3) + | |
| 1796 + mul32x32_64(state->h4,state->s2); | |
| 1797 + t[2] = mul32x32_64(state->h0,state->r2) + | |
| 1798 + mul32x32_64(state->h1,state->r1) + | |
| 1799 + mul32x32_64(state->h2,state->r0) + | |
| 1800 + mul32x32_64(state->h3,state->s4) + | |
| 1801 + mul32x32_64(state->h4,state->s3); | |
| 1802 + t[3] = mul32x32_64(state->h0,state->r3) + | |
| 1803 + mul32x32_64(state->h1,state->r2) + | |
| 1804 + mul32x32_64(state->h2,state->r1) + | |
| 1805 + mul32x32_64(state->h3,state->r0) + | |
| 1806 + mul32x32_64(state->h4,state->s4); | |
| 1807 + t[4] = mul32x32_64(state->h0,state->r4) + | |
| 1808 + mul32x32_64(state->h1,state->r3) + | |
| 1809 + mul32x32_64(state->h2,state->r2) + | |
| 1810 + mul32x32_64(state->h3,state->r1) + | |
| 1811 + mul32x32_64(state->h4,state->r0); | |
| 1812 + | |
| 1813 + state->h0 = (uint32_t)t[0] & 0x3ffffff; c = (t[0] >
> 26); | |
| 1814 + t[1] += c; state->h1 = (uint32_t)t[1] & 0x3ffffff; b = (uint32_t)(t[1] >
> 26); | |
| 1815 + t[2] += b; state->h2 = (uint32_t)t[2] & 0x3ffffff; b = (uint32_t)(t[2] >
> 26); | |
| 1816 + t[3] += b; state->h3 = (uint32_t)t[3] & 0x3ffffff; b = (uint32_t)(t[3] >
> 26); | |
| 1817 + t[4] += b; state->h4 = (uint32_t)t[4] & 0x3ffffff; b = (uint32_t)(t[4] >
> 26); | |
| 1818 + state->h0 += b * 5; | |
| 1819 + | |
| 1820 + if (len >= 16) | |
| 1821 + goto poly1305_donna_16bytes; | |
| 1822 + | |
| 1823 + /* final bytes */ | |
| 1824 +poly1305_donna_atmost15bytes: | |
| 1825 + if (!len) | |
| 1826 + return; | |
| 1827 + | |
| 1828 + for (j = 0; j < len; j++) | |
| 1829 + mp[j] = in[j]; | |
| 1830 + mp[j++] = 1; | |
| 1831 + for (; j < 16; j++) | |
| 1832 + mp[j] = 0; | |
| 1833 + len = 0; | |
| 1834 + | |
| 1835 + t0 = U8TO32_LE(mp+0); | |
| 1836 + t1 = U8TO32_LE(mp+4); | |
| 1837 + t2 = U8TO32_LE(mp+8); | |
| 1838 + t3 = U8TO32_LE(mp+12); | |
| 1839 + | |
| 1840 + state->h0 += t0 & 0x3ffffff; | |
| 1841 + state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; | |
| 1842 + state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; | |
| 1843 + state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; | |
| 1844 + state->h4 += (t3 >> 8); | |
| 1845 + | |
| 1846 + goto poly1305_donna_mul; | |
| 1847 + } | |
| 1848 + | |
| 1849 +void CRYPTO_poly1305_init(poly1305_state *statep, const unsigned char key[32]) | |
| 1850 + { | |
| 1851 + struct poly1305_state_st *state = (struct poly1305_state_st*) statep; | |
| 1852 + uint32_t t0,t1,t2,t3; | |
| 1853 + | |
| 1854 + t0 = U8TO32_LE(key+0); | |
| 1855 + t1 = U8TO32_LE(key+4); | |
| 1856 + t2 = U8TO32_LE(key+8); | |
| 1857 + t3 = U8TO32_LE(key+12); | |
| 1858 + | |
| 1859 + /* precompute multipliers */ | |
| 1860 + state->r0 = t0 & 0x3ffffff; t0 >>= 26; t0 |= t1 << 6; | |
| 1861 + state->r1 = t0 & 0x3ffff03; t1 >>= 20; t1 |= t2 << 12; | |
| 1862 + state->r2 = t1 & 0x3ffc0ff; t2 >>= 14; t2 |= t3 << 18; | |
| 1863 + state->r3 = t2 & 0x3f03fff; t3 >>= 8; | |
| 1864 + state->r4 = t3 & 0x00fffff; | |
| 1865 + | |
| 1866 + state->s1 = state->r1 * 5; | |
| 1867 + state->s2 = state->r2 * 5; | |
| 1868 + state->s3 = state->r3 * 5; | |
| 1869 + state->s4 = state->r4 * 5; | |
| 1870 + | |
| 1871 + /* init state */ | |
| 1872 + state->h0 = 0; | |
| 1873 + state->h1 = 0; | |
| 1874 + state->h2 = 0; | |
| 1875 + state->h3 = 0; | |
| 1876 + state->h4 = 0; | |
| 1877 + | |
| 1878 + state->buf_used = 0; | |
| 1879 + memcpy(state->key, key + 16, sizeof(state->key)); | |
| 1880 + } | |
| 1881 + | |
| 1882 +void CRYPTO_poly1305_update(poly1305_state *statep, const unsigned char *in, | |
| 1883 + size_t in_len) | |
| 1884 + { | |
| 1885 + unsigned int i; | |
| 1886 + struct poly1305_state_st *state = (struct poly1305_state_st*) statep; | |
| 1887 + | |
| 1888 + if (state->buf_used) | |
| 1889 + { | |
| 1890 + unsigned int todo = 16 - state->buf_used; | |
| 1891 + if (todo > in_len) | |
| 1892 + todo = in_len; | |
| 1893 + for (i = 0; i < todo; i++) | |
| 1894 + state->buf[state->buf_used + i] = in[i]; | |
| 1895 + state->buf_used += todo; | |
| 1896 + in_len -= todo; | |
| 1897 + in += todo; | |
| 1898 + | |
| 1899 + if (state->buf_used == 16) | |
| 1900 + { | |
| 1901 + poly1305_update(state, state->buf, 16); | |
| 1902 + state->buf_used = 0; | |
| 1903 + } | |
| 1904 + } | |
| 1905 + | |
| 1906 + if (in_len >= 16) | |
| 1907 + { | |
| 1908 + size_t todo = in_len & ~0xf; | |
| 1909 + poly1305_update(state, in, todo); | |
| 1910 + in += todo; | |
| 1911 + in_len &= 0xf; | |
| 1912 + } | |
| 1913 + | |
| 1914 + if (in_len) | |
| 1915 + { | |
| 1916 + for (i = 0; i < in_len; i++) | |
| 1917 + state->buf[i] = in[i]; | |
| 1918 + state->buf_used = in_len; | |
| 1919 + } | |
| 1920 + } | |
| 1921 + | |
| 1922 +void CRYPTO_poly1305_finish(poly1305_state *statep, unsigned char mac[16]) | |
| 1923 + { | |
| 1924 + struct poly1305_state_st *state = (struct poly1305_state_st*) statep; | |
| 1925 + uint64_t f0,f1,f2,f3; | |
| 1926 + uint32_t g0,g1,g2,g3,g4; | |
| 1927 + uint32_t b, nb; | |
| 1928 + | |
| 1929 + if (state->buf_used) | |
| 1930 + poly1305_update(state, state->buf, state->buf_used); | |
| 1931 + | |
| 1932 + b = state->h0 >> 26; state->h0 = state->h0 & 0x3ffff
ff; | |
| 1933 + state->h1 += b; b = state->h1 >> 26; state->h1 = state->h1 & 0x3ffff
ff; | |
| 1934 + state->h2 += b; b = state->h2 >> 26; state->h2 = state->h2 & 0x3ffff
ff; | |
| 1935 + state->h3 += b; b = state->h3 >> 26; state->h3 = state->h3 & 0x3ffff
ff; | |
| 1936 + state->h4 += b; b = state->h4 >> 26; state->h4 = state->h4 & 0x3ffff
ff; | |
| 1937 + state->h0 += b * 5; | |
| 1938 + | |
| 1939 + g0 = state->h0 + 5; b = g0 >> 26; g0 &= 0x3ffffff; | |
| 1940 + g1 = state->h1 + b; b = g1 >> 26; g1 &= 0x3ffffff; | |
| 1941 + g2 = state->h2 + b; b = g2 >> 26; g2 &= 0x3ffffff; | |
| 1942 + g3 = state->h3 + b; b = g3 >> 26; g3 &= 0x3ffffff; | |
| 1943 + g4 = state->h4 + b - (1 << 26); | |
| 1944 + | |
| 1945 + b = (g4 >> 31) - 1; | |
| 1946 + nb = ~b; | |
| 1947 + state->h0 = (state->h0 & nb) | (g0 & b); | |
| 1948 + state->h1 = (state->h1 & nb) | (g1 & b); | |
| 1949 + state->h2 = (state->h2 & nb) | (g2 & b); | |
| 1950 + state->h3 = (state->h3 & nb) | (g3 & b); | |
| 1951 + state->h4 = (state->h4 & nb) | (g4 & b); | |
| 1952 + | |
| 1953 + f0 = ((state->h0 ) | (state->h1 << 26)) + (uint64_t)U8TO32_LE(&stat
e->key[0]); | |
| 1954 + f1 = ((state->h1 >> 6) | (state->h2 << 20)) + (uint64_t)U8TO32_LE(&stat
e->key[4]); | |
| 1955 + f2 = ((state->h2 >> 12) | (state->h3 << 14)) + (uint64_t)U8TO32_LE(&stat
e->key[8]); | |
| 1956 + f3 = ((state->h3 >> 18) | (state->h4 << 8)) + (uint64_t)U8TO32_LE(&stat
e->key[12]); | |
| 1957 + | |
| 1958 + U32TO8_LE(&mac[ 0], f0); f1 += (f0 >> 32); | |
| 1959 + U32TO8_LE(&mac[ 4], f1); f2 += (f1 >> 32); | |
| 1960 + U32TO8_LE(&mac[ 8], f2); f3 += (f2 >> 32); | |
| 1961 + U32TO8_LE(&mac[12], f3); | |
| 1962 + } | |
| 1963 + | |
| 1964 +#endif /* !OPENSSL_NO_POLY1305 */ | |
| 1965 diff --git a/crypto/poly1305/poly1305.h b/crypto/poly1305/poly1305.h | |
| 1966 new file mode 100644 | |
| 1967 index 0000000..28f85ed | |
| 1968 --- /dev/null | |
| 1969 +++ b/crypto/poly1305/poly1305.h | |
| 1970 @@ -0,0 +1,88 @@ | |
| 1971 +/* | |
| 1972 + * Poly1305 | |
| 1973 + * | |
| 1974 + * Created on: Jun, 2013 | |
| 1975 + * Author: Elie Bursztein (elieb@google.com) | |
| 1976 + * | |
| 1977 + * Adapted from the estream code by D. Bernstein. | |
| 1978 + */ | |
| 1979 +/* ==================================================================== | |
| 1980 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
| 1981 + * | |
| 1982 + * Redistribution and use in source and binary forms, with or without | |
| 1983 + * modification, are permitted provided that the following conditions | |
| 1984 + * are met: | |
| 1985 + * | |
| 1986 + * 1. Redistributions of source code must retain the above copyright | |
| 1987 + * notice, this list of conditions and the following disclaimer. | |
| 1988 + * | |
| 1989 + * 2. Redistributions in binary form must reproduce the above copyright | |
| 1990 + * notice, this list of conditions and the following disclaimer in | |
| 1991 + * the documentation and/or other materials provided with the | |
| 1992 + * distribution. | |
| 1993 + * | |
| 1994 + * 3. All advertising materials mentioning features or use of this | |
| 1995 + * software must display the following acknowledgment: | |
| 1996 + * "This product includes software developed by the OpenSSL Project | |
| 1997 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
| 1998 + * | |
| 1999 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
| 2000 + * endorse or promote products derived from this software without | |
| 2001 + * prior written permission. For written permission, please contact | |
| 2002 + * licensing@OpenSSL.org. | |
| 2003 + * | |
| 2004 + * 5. Products derived from this software may not be called "OpenSSL" | |
| 2005 + * nor may "OpenSSL" appear in their names without prior written | |
| 2006 + * permission of the OpenSSL Project. | |
| 2007 + * | |
| 2008 + * 6. Redistributions of any form whatsoever must retain the following | |
| 2009 + * acknowledgment: | |
| 2010 + * "This product includes software developed by the OpenSSL Project | |
| 2011 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
| 2012 + * | |
| 2013 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
| 2014 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 2015 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| 2016 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
| 2017 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 2018 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
| 2019 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| 2020 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 2021 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
| 2022 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| 2023 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
| 2024 + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 2025 + * ==================================================================== | |
| 2026 + */ | |
| 2027 + | |
| 2028 +#ifndef HEADER_POLY1305_H_ | |
| 2029 +#define HEADER_POLY1305_H_ | |
| 2030 + | |
| 2031 +#include <stdint.h> | |
| 2032 +#include <openssl/opensslconf.h> | |
| 2033 + | |
| 2034 +#if defined(OPENSSL_NO_POLY1305) | |
| 2035 +#error Poly1305 support is disabled. | |
| 2036 +#endif | |
| 2037 + | |
| 2038 +typedef unsigned char poly1305_state[512]; | |
| 2039 + | |
| 2040 +/* poly1305_init sets up |state| so that it can be used to calculate an | |
| 2041 + * authentication tag with the one-time key |key|. Note that |key| is a | |
| 2042 + * one-time key and therefore there is no `reset' method because that would | |
| 2043 + * enable several messages to be authenticated with the same key. */ | |
| 2044 +extern void CRYPTO_poly1305_init(poly1305_state* state, | |
| 2045 + const unsigned char key[32]); | |
| 2046 + | |
| 2047 +/* poly1305_update processes |in_len| bytes from |in|. It can be called zero or | |
| 2048 + * more times after poly1305_init. */ | |
| 2049 +extern void CRYPTO_poly1305_update(poly1305_state* state, | |
| 2050 + const unsigned char *in, | |
| 2051 + size_t in_len); | |
| 2052 + | |
| 2053 +/* poly1305_finish completes the poly1305 calculation and writes a 16 byte | |
| 2054 + * authentication tag to |mac|. */ | |
| 2055 +extern void CRYPTO_poly1305_finish(poly1305_state* state, | |
| 2056 + unsigned char mac[16]); | |
| 2057 + | |
| 2058 +#endif /* HEADER_POLY1305_H_ */ | |
| 2059 diff --git a/crypto/poly1305/poly1305_arm.c b/crypto/poly1305/poly1305_arm.c | |
| 2060 new file mode 100644 | |
| 2061 index 0000000..adcef35 | |
| 2062 --- /dev/null | |
| 2063 +++ b/crypto/poly1305/poly1305_arm.c | |
| 2064 @@ -0,0 +1,327 @@ | |
| 2065 +/* ==================================================================== | |
| 2066 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
| 2067 + * | |
| 2068 + * Redistribution and use in source and binary forms, with or without | |
| 2069 + * modification, are permitted provided that the following conditions | |
| 2070 + * are met: | |
| 2071 + * | |
| 2072 + * 1. Redistributions of source code must retain the above copyright | |
| 2073 + * notice, this list of conditions and the following disclaimer. | |
| 2074 + * | |
| 2075 + * 2. Redistributions in binary form must reproduce the above copyright | |
| 2076 + * notice, this list of conditions and the following disclaimer in | |
| 2077 + * the documentation and/or other materials provided with the | |
| 2078 + * distribution. | |
| 2079 + * | |
| 2080 + * 3. All advertising materials mentioning features or use of this | |
| 2081 + * software must display the following acknowledgment: | |
| 2082 + * "This product includes software developed by the OpenSSL Project | |
| 2083 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
| 2084 + * | |
| 2085 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
| 2086 + * endorse or promote products derived from this software without | |
| 2087 + * prior written permission. For written permission, please contact | |
| 2088 + * licensing@OpenSSL.org. | |
| 2089 + * | |
| 2090 + * 5. Products derived from this software may not be called "OpenSSL" | |
| 2091 + * nor may "OpenSSL" appear in their names without prior written | |
| 2092 + * permission of the OpenSSL Project. | |
| 2093 + * | |
| 2094 + * 6. Redistributions of any form whatsoever must retain the following | |
| 2095 + * acknowledgment: | |
| 2096 + * "This product includes software developed by the OpenSSL Project | |
| 2097 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
| 2098 + * | |
| 2099 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
| 2100 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 2101 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| 2102 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
| 2103 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 2104 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
| 2105 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| 2106 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 2107 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
| 2108 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| 2109 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
| 2110 + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 2111 + * ==================================================================== | |
| 2112 + */ | |
| 2113 + | |
| 2114 +/* This implementation was taken from the public domain, neon2 version in | |
| 2115 + * SUPERCOP by D. J. Bernstein and Peter Schwabe. */ | |
| 2116 + | |
| 2117 +#include <stdint.h> | |
| 2118 + | |
| 2119 +#include <openssl/poly1305.h> | |
| 2120 + | |
| 2121 +#if !defined(OPENSSL_NO_POLY1305) | |
| 2122 + | |
| 2123 +typedef struct { | |
| 2124 + uint32_t v[12]; /* for alignment; only using 10 */ | |
| 2125 +} fe1305x2; | |
| 2126 + | |
| 2127 +#define addmulmod openssl_poly1305_neon2_addmulmod | |
| 2128 +#define blocks openssl_poly1305_neon2_blocks | |
| 2129 + | |
| 2130 +extern void addmulmod(fe1305x2 *r, const fe1305x2 *x, const fe1305x2 *y, const
fe1305x2 *c); | |
| 2131 + | |
| 2132 +extern int blocks(fe1305x2 *h, const fe1305x2 *precomp, const unsigned char *in
, unsigned int inlen); | |
| 2133 + | |
| 2134 +static void freeze(fe1305x2 *r) | |
| 2135 + { | |
| 2136 + int i; | |
| 2137 + | |
| 2138 + uint32_t x0 = r->v[0]; | |
| 2139 + uint32_t x1 = r->v[2]; | |
| 2140 + uint32_t x2 = r->v[4]; | |
| 2141 + uint32_t x3 = r->v[6]; | |
| 2142 + uint32_t x4 = r->v[8]; | |
| 2143 + uint32_t y0; | |
| 2144 + uint32_t y1; | |
| 2145 + uint32_t y2; | |
| 2146 + uint32_t y3; | |
| 2147 + uint32_t y4; | |
| 2148 + uint32_t swap; | |
| 2149 + | |
| 2150 + for (i = 0;i < 3;++i) | |
| 2151 + { | |
| 2152 + x1 += x0 >> 26; x0 &= 0x3ffffff; | |
| 2153 + x2 += x1 >> 26; x1 &= 0x3ffffff; | |
| 2154 + x3 += x2 >> 26; x2 &= 0x3ffffff; | |
| 2155 + x4 += x3 >> 26; x3 &= 0x3ffffff; | |
| 2156 + x0 += 5*(x4 >> 26); x4 &= 0x3ffffff; | |
| 2157 + } | |
| 2158 + | |
| 2159 + y0 = x0 + 5; | |
| 2160 + y1 = x1 + (y0 >> 26); y0 &= 0x3ffffff; | |
| 2161 + y2 = x2 + (y1 >> 26); y1 &= 0x3ffffff; | |
| 2162 + y3 = x3 + (y2 >> 26); y2 &= 0x3ffffff; | |
| 2163 + y4 = x4 + (y3 >> 26); y3 &= 0x3ffffff; | |
| 2164 + swap = -(y4 >> 26); y4 &= 0x3ffffff; | |
| 2165 + | |
| 2166 + y0 ^= x0; | |
| 2167 + y1 ^= x1; | |
| 2168 + y2 ^= x2; | |
| 2169 + y3 ^= x3; | |
| 2170 + y4 ^= x4; | |
| 2171 + | |
| 2172 + y0 &= swap; | |
| 2173 + y1 &= swap; | |
| 2174 + y2 &= swap; | |
| 2175 + y3 &= swap; | |
| 2176 + y4 &= swap; | |
| 2177 + | |
| 2178 + y0 ^= x0; | |
| 2179 + y1 ^= x1; | |
| 2180 + y2 ^= x2; | |
| 2181 + y3 ^= x3; | |
| 2182 + y4 ^= x4; | |
| 2183 + | |
| 2184 + r->v[0] = y0; | |
| 2185 + r->v[2] = y1; | |
| 2186 + r->v[4] = y2; | |
| 2187 + r->v[6] = y3; | |
| 2188 + r->v[8] = y4; | |
| 2189 + } | |
| 2190 + | |
| 2191 +static void fe1305x2_tobytearray(unsigned char *r, fe1305x2 *x) | |
| 2192 + { | |
| 2193 + uint32_t x0 = x->v[0]; | |
| 2194 + uint32_t x1 = x->v[2]; | |
| 2195 + uint32_t x2 = x->v[4]; | |
| 2196 + uint32_t x3 = x->v[6]; | |
| 2197 + uint32_t x4 = x->v[8]; | |
| 2198 + | |
| 2199 + x1 += x0 >> 26; | |
| 2200 + x0 &= 0x3ffffff; | |
| 2201 + x2 += x1 >> 26; | |
| 2202 + x1 &= 0x3ffffff; | |
| 2203 + x3 += x2 >> 26; | |
| 2204 + x2 &= 0x3ffffff; | |
| 2205 + x4 += x3 >> 26; | |
| 2206 + x3 &= 0x3ffffff; | |
| 2207 + | |
| 2208 + *(uint32_t *) r = x0 + (x1 << 26); | |
| 2209 + *(uint32_t *) (r + 4) = (x1 >> 6) + (x2 << 20); | |
| 2210 + *(uint32_t *) (r + 8) = (x2 >> 12) + (x3 << 14); | |
| 2211 + *(uint32_t *) (r + 12) = (x3 >> 18) + (x4 << 8); | |
| 2212 + } | |
| 2213 + | |
| 2214 +/* load32 exists to avoid breaking strict aliasing rules in | |
| 2215 + * fe1305x2_frombytearray. */ | |
| 2216 +static uint32_t load32(unsigned char *t) | |
| 2217 + { | |
| 2218 + uint32_t tmp; | |
| 2219 + memcpy(&tmp, t, sizeof(tmp)); | |
| 2220 + return tmp; | |
| 2221 + } | |
| 2222 + | |
| 2223 +static void fe1305x2_frombytearray(fe1305x2 *r, const unsigned char *x, unsigne
d long long xlen) | |
| 2224 + { | |
| 2225 + int i; | |
| 2226 + unsigned char t[17]; | |
| 2227 + | |
| 2228 + for (i = 0; (i < 16) && (i < xlen); i++) | |
| 2229 + t[i] = x[i]; | |
| 2230 + xlen -= i; | |
| 2231 + x += i; | |
| 2232 + t[i++] = 1; | |
| 2233 + for (; i<17; i++) | |
| 2234 + t[i] = 0; | |
| 2235 + | |
| 2236 + r->v[0] = 0x3ffffff & load32(t); | |
| 2237 + r->v[2] = 0x3ffffff & (load32(t + 3) >> 2); | |
| 2238 + r->v[4] = 0x3ffffff & (load32(t + 6) >> 4); | |
| 2239 + r->v[6] = 0x3ffffff & (load32(t + 9) >> 6); | |
| 2240 + r->v[8] = load32(t + 13); | |
| 2241 + | |
| 2242 + if (xlen) | |
| 2243 + { | |
| 2244 + for (i = 0; (i < 16) && (i < xlen); i++) | |
| 2245 + t[i] = x[i]; | |
| 2246 + t[i++] = 1; | |
| 2247 + for (; i<17; i++) | |
| 2248 + t[i] = 0; | |
| 2249 + | |
| 2250 + r->v[1] = 0x3ffffff & load32(t); | |
| 2251 + r->v[3] = 0x3ffffff & (load32(t + 3) >> 2); | |
| 2252 + r->v[5] = 0x3ffffff & (load32(t + 6) >> 4); | |
| 2253 + r->v[7] = 0x3ffffff & (load32(t + 9) >> 6); | |
| 2254 + r->v[9] = load32(t + 13); | |
| 2255 + } | |
| 2256 + else | |
| 2257 + r->v[1] = r->v[3] = r->v[5] = r->v[7] = r->v[9] = 0; | |
| 2258 + } | |
| 2259 + | |
| 2260 +static const fe1305x2 zero __attribute__ ((aligned (16))); | |
| 2261 + | |
| 2262 +struct poly1305_state_st { | |
| 2263 + unsigned char data[sizeof(fe1305x2[5]) + 128]; | |
| 2264 + unsigned char buf[32]; | |
| 2265 + unsigned int buf_used; | |
| 2266 + unsigned char key[16]; | |
| 2267 +}; | |
| 2268 + | |
| 2269 +void CRYPTO_poly1305_init(poly1305_state *state, const unsigned char key[32]) | |
| 2270 + { | |
| 2271 + struct poly1305_state_st *st = (struct poly1305_state_st*) (state); | |
| 2272 + fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); | |
| 2273 + fe1305x2 *const h = r + 1; | |
| 2274 + fe1305x2 *const c = h + 1; | |
| 2275 + fe1305x2 *const precomp = c + 1; | |
| 2276 + unsigned int j; | |
| 2277 + | |
| 2278 + r->v[1] = r->v[0] = 0x3ffffff & *(uint32_t *) key; | |
| 2279 + r->v[3] = r->v[2] = 0x3ffff03 & ((*(uint32_t *) (key + 3)) >> 2); | |
| 2280 + r->v[5] = r->v[4] = 0x3ffc0ff & ((*(uint32_t *) (key + 6)) >> 4); | |
| 2281 + r->v[7] = r->v[6] = 0x3f03fff & ((*(uint32_t *) (key + 9)) >> 6); | |
| 2282 + r->v[9] = r->v[8] = 0x00fffff & ((*(uint32_t *) (key + 12)) >> 8); | |
| 2283 + | |
| 2284 + for (j = 0; j < 10; j++) | |
| 2285 + h->v[j] = 0; /* XXX: should fast-forward a bit */ | |
| 2286 + | |
| 2287 + addmulmod(precomp,r,r,&zero); /* precompute r^2 */ | |
| 2288 + addmulmod(precomp + 1,precomp,precomp,&zero); /* precompute r^4 */ | |
| 2289 + | |
| 2290 + memcpy(st->key, key + 16, 16); | |
| 2291 + st->buf_used = 0; | |
| 2292 + } | |
| 2293 + | |
| 2294 +void CRYPTO_poly1305_update(poly1305_state *state, const unsigned char *in, siz
e_t in_len) | |
| 2295 + { | |
| 2296 + struct poly1305_state_st *st = (struct poly1305_state_st*) (state); | |
| 2297 + fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); | |
| 2298 + fe1305x2 *const h = r + 1; | |
| 2299 + fe1305x2 *const c = h + 1; | |
| 2300 + fe1305x2 *const precomp = c + 1; | |
| 2301 + unsigned int i; | |
| 2302 + | |
| 2303 + if (st->buf_used) | |
| 2304 + { | |
| 2305 + unsigned int todo = 32 - st->buf_used; | |
| 2306 + if (todo > in_len) | |
| 2307 + todo = in_len; | |
| 2308 + for (i = 0; i < todo; i++) | |
| 2309 + st->buf[st->buf_used + i] = in[i]; | |
| 2310 + st->buf_used += todo; | |
| 2311 + in_len -= todo; | |
| 2312 + in += todo; | |
| 2313 + | |
| 2314 + if (st->buf_used == sizeof(st->buf) && in_len) | |
| 2315 + { | |
| 2316 + addmulmod(h,h,precomp,&zero); | |
| 2317 + fe1305x2_frombytearray(c, st->buf, sizeof(st->buf)); | |
| 2318 + for (i = 0; i < 10; i++) | |
| 2319 + h->v[i] += c->v[i]; | |
| 2320 + st->buf_used = 0; | |
| 2321 + } | |
| 2322 + } | |
| 2323 + | |
| 2324 + while (in_len > 32) | |
| 2325 + { | |
| 2326 + unsigned int tlen = 1048576; | |
| 2327 + if (in_len < tlen) | |
| 2328 + tlen = in_len; | |
| 2329 + tlen -= blocks(h, precomp, in, tlen); | |
| 2330 + in_len -= tlen; | |
| 2331 + in += tlen; | |
| 2332 + } | |
| 2333 + | |
| 2334 + if (in_len) | |
| 2335 + { | |
| 2336 + for (i = 0; i < in_len; i++) | |
| 2337 + st->buf[i] = in[i]; | |
| 2338 + st->buf_used = in_len; | |
| 2339 + } | |
| 2340 + } | |
| 2341 + | |
| 2342 +void CRYPTO_poly1305_finish(poly1305_state* state, unsigned char mac[16]) | |
| 2343 + { | |
| 2344 + struct poly1305_state_st *st = (struct poly1305_state_st*) (state); | |
| 2345 + fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); | |
| 2346 + fe1305x2 *const h = r + 1; | |
| 2347 + fe1305x2 *const c = h + 1; | |
| 2348 + fe1305x2 *const precomp = c + 1; | |
| 2349 + | |
| 2350 + addmulmod(h,h,precomp,&zero); | |
| 2351 + | |
| 2352 + if (st->buf_used > 16) | |
| 2353 + { | |
| 2354 + fe1305x2_frombytearray(c, st->buf, st->buf_used); | |
| 2355 + precomp->v[1] = r->v[1]; | |
| 2356 + precomp->v[3] = r->v[3]; | |
| 2357 + precomp->v[5] = r->v[5]; | |
| 2358 + precomp->v[7] = r->v[7]; | |
| 2359 + precomp->v[9] = r->v[9]; | |
| 2360 + addmulmod(h,h,precomp,c); | |
| 2361 + } | |
| 2362 + else if (st->buf_used > 0) | |
| 2363 + { | |
| 2364 + fe1305x2_frombytearray(c, st->buf, st->buf_used); | |
| 2365 + r->v[1] = 1; | |
| 2366 + r->v[3] = 0; | |
| 2367 + r->v[5] = 0; | |
| 2368 + r->v[7] = 0; | |
| 2369 + r->v[9] = 0; | |
| 2370 + addmulmod(h,h,r,c); | |
| 2371 + } | |
| 2372 + | |
| 2373 + h->v[0] += h->v[1]; | |
| 2374 + h->v[2] += h->v[3]; | |
| 2375 + h->v[4] += h->v[5]; | |
| 2376 + h->v[6] += h->v[7]; | |
| 2377 + h->v[8] += h->v[9]; | |
| 2378 + freeze(h); | |
| 2379 + | |
| 2380 + fe1305x2_frombytearray(c, st->key, 16); | |
| 2381 + c->v[8] ^= (1 << 24); | |
| 2382 + | |
| 2383 + h->v[0] += c->v[0]; | |
| 2384 + h->v[2] += c->v[2]; | |
| 2385 + h->v[4] += c->v[4]; | |
| 2386 + h->v[6] += c->v[6]; | |
| 2387 + h->v[8] += c->v[8]; | |
| 2388 + fe1305x2_tobytearray(mac, h); | |
| 2389 + } | |
| 2390 + | |
| 2391 +#endif /* !OPENSSL_NO_POLY1305 */ | |
| 2392 diff --git a/crypto/poly1305/poly1305_arm_asm.S b/crypto/poly1305/poly1305_arm_a
sm.S | |
| 2393 new file mode 100644 | |
| 2394 index 0000000..449d16f | |
| 2395 --- /dev/null | |
| 2396 +++ b/crypto/poly1305/poly1305_arm_asm.S | |
| 2397 @@ -0,0 +1,2009 @@ | |
| 2398 +# This implementation was taken from the public domain, neon2 version in | |
| 2399 +# SUPERCOP by D. J. Bernstein and Peter Schwabe. | |
| 2400 + | |
| 2401 +# qhasm: int32 input_0 | |
| 2402 + | |
| 2403 +# qhasm: int32 input_1 | |
| 2404 + | |
| 2405 +# qhasm: int32 input_2 | |
| 2406 + | |
| 2407 +# qhasm: int32 input_3 | |
| 2408 + | |
| 2409 +# qhasm: stack32 input_4 | |
| 2410 + | |
| 2411 +# qhasm: stack32 input_5 | |
| 2412 + | |
| 2413 +# qhasm: stack32 input_6 | |
| 2414 + | |
| 2415 +# qhasm: stack32 input_7 | |
| 2416 + | |
| 2417 +# qhasm: int32 caller_r4 | |
| 2418 + | |
| 2419 +# qhasm: int32 caller_r5 | |
| 2420 + | |
| 2421 +# qhasm: int32 caller_r6 | |
| 2422 + | |
| 2423 +# qhasm: int32 caller_r7 | |
| 2424 + | |
| 2425 +# qhasm: int32 caller_r8 | |
| 2426 + | |
| 2427 +# qhasm: int32 caller_r9 | |
| 2428 + | |
| 2429 +# qhasm: int32 caller_r10 | |
| 2430 + | |
| 2431 +# qhasm: int32 caller_r11 | |
| 2432 + | |
| 2433 +# qhasm: int32 caller_r12 | |
| 2434 + | |
| 2435 +# qhasm: int32 caller_r14 | |
| 2436 + | |
| 2437 +# qhasm: reg128 caller_q4 | |
| 2438 + | |
| 2439 +# qhasm: reg128 caller_q5 | |
| 2440 + | |
| 2441 +# qhasm: reg128 caller_q6 | |
| 2442 + | |
| 2443 +# qhasm: reg128 caller_q7 | |
| 2444 + | |
| 2445 +# qhasm: startcode | |
| 2446 +.fpu neon | |
| 2447 +.text | |
| 2448 + | |
| 2449 +# qhasm: reg128 r0 | |
| 2450 + | |
| 2451 +# qhasm: reg128 r1 | |
| 2452 + | |
| 2453 +# qhasm: reg128 r2 | |
| 2454 + | |
| 2455 +# qhasm: reg128 r3 | |
| 2456 + | |
| 2457 +# qhasm: reg128 r4 | |
| 2458 + | |
| 2459 +# qhasm: reg128 x01 | |
| 2460 + | |
| 2461 +# qhasm: reg128 x23 | |
| 2462 + | |
| 2463 +# qhasm: reg128 x4 | |
| 2464 + | |
| 2465 +# qhasm: reg128 y0 | |
| 2466 + | |
| 2467 +# qhasm: reg128 y12 | |
| 2468 + | |
| 2469 +# qhasm: reg128 y34 | |
| 2470 + | |
| 2471 +# qhasm: reg128 5y12 | |
| 2472 + | |
| 2473 +# qhasm: reg128 5y34 | |
| 2474 + | |
| 2475 +# qhasm: stack128 y0_stack | |
| 2476 + | |
| 2477 +# qhasm: stack128 y12_stack | |
| 2478 + | |
| 2479 +# qhasm: stack128 y34_stack | |
| 2480 + | |
| 2481 +# qhasm: stack128 5y12_stack | |
| 2482 + | |
| 2483 +# qhasm: stack128 5y34_stack | |
| 2484 + | |
| 2485 +# qhasm: reg128 z0 | |
| 2486 + | |
| 2487 +# qhasm: reg128 z12 | |
| 2488 + | |
| 2489 +# qhasm: reg128 z34 | |
| 2490 + | |
| 2491 +# qhasm: reg128 5z12 | |
| 2492 + | |
| 2493 +# qhasm: reg128 5z34 | |
| 2494 + | |
| 2495 +# qhasm: stack128 z0_stack | |
| 2496 + | |
| 2497 +# qhasm: stack128 z12_stack | |
| 2498 + | |
| 2499 +# qhasm: stack128 z34_stack | |
| 2500 + | |
| 2501 +# qhasm: stack128 5z12_stack | |
| 2502 + | |
| 2503 +# qhasm: stack128 5z34_stack | |
| 2504 + | |
| 2505 +# qhasm: stack128 two24 | |
| 2506 + | |
| 2507 +# qhasm: int32 ptr | |
| 2508 + | |
| 2509 +# qhasm: reg128 c01 | |
| 2510 + | |
| 2511 +# qhasm: reg128 c23 | |
| 2512 + | |
| 2513 +# qhasm: reg128 d01 | |
| 2514 + | |
| 2515 +# qhasm: reg128 d23 | |
| 2516 + | |
| 2517 +# qhasm: reg128 t0 | |
| 2518 + | |
| 2519 +# qhasm: reg128 t1 | |
| 2520 + | |
| 2521 +# qhasm: reg128 t2 | |
| 2522 + | |
| 2523 +# qhasm: reg128 t3 | |
| 2524 + | |
| 2525 +# qhasm: reg128 t4 | |
| 2526 + | |
| 2527 +# qhasm: reg128 mask | |
| 2528 + | |
| 2529 +# qhasm: reg128 u0 | |
| 2530 + | |
| 2531 +# qhasm: reg128 u1 | |
| 2532 + | |
| 2533 +# qhasm: reg128 u2 | |
| 2534 + | |
| 2535 +# qhasm: reg128 u3 | |
| 2536 + | |
| 2537 +# qhasm: reg128 u4 | |
| 2538 + | |
| 2539 +# qhasm: reg128 v01 | |
| 2540 + | |
| 2541 +# qhasm: reg128 mid | |
| 2542 + | |
| 2543 +# qhasm: reg128 v23 | |
| 2544 + | |
| 2545 +# qhasm: reg128 v4 | |
| 2546 + | |
| 2547 +# qhasm: int32 len | |
| 2548 + | |
| 2549 +# qhasm: qpushenter crypto_onetimeauth_poly1305_neon2_blocks | |
| 2550 +.align 4 | |
| 2551 +.global openssl_poly1305_neon2_blocks | |
| 2552 +.type openssl_poly1305_neon2_blocks STT_FUNC | |
| 2553 +openssl_poly1305_neon2_blocks: | |
| 2554 +vpush {q4,q5,q6,q7} | |
| 2555 +mov r12,sp | |
| 2556 +sub sp,sp,#192 | |
| 2557 +and sp,sp,#0xffffffe0 | |
| 2558 + | |
| 2559 +# qhasm: len = input_3 | |
| 2560 +# asm 1: mov >len=int32#4,<input_3=int32#4 | |
| 2561 +# asm 2: mov >len=r3,<input_3=r3 | |
| 2562 +mov r3,r3 | |
| 2563 + | |
| 2564 +# qhasm: new y0 | |
| 2565 + | |
| 2566 +# qhasm: y0 = mem64[input_1]y0[1]; input_1 += 8 | |
| 2567 +# asm 1: vld1.8 {<y0=reg128#1%bot},[<input_1=int32#2]! | |
| 2568 +# asm 2: vld1.8 {<y0=d0},[<input_1=r1]! | |
| 2569 +vld1.8 {d0},[r1]! | |
| 2570 + | |
| 2571 +# qhasm: y12 = mem128[input_1]; input_1 += 16 | |
| 2572 +# asm 1: vld1.8 {>y12=reg128#2%bot->y12=reg128#2%top},[<input_1=int32#2]! | |
| 2573 +# asm 2: vld1.8 {>y12=d2->y12=d3},[<input_1=r1]! | |
| 2574 +vld1.8 {d2-d3},[r1]! | |
| 2575 + | |
| 2576 +# qhasm: y34 = mem128[input_1]; input_1 += 16 | |
| 2577 +# asm 1: vld1.8 {>y34=reg128#3%bot->y34=reg128#3%top},[<input_1=int32#2]! | |
| 2578 +# asm 2: vld1.8 {>y34=d4->y34=d5},[<input_1=r1]! | |
| 2579 +vld1.8 {d4-d5},[r1]! | |
| 2580 + | |
| 2581 +# qhasm: input_1 += 8 | |
| 2582 +# asm 1: add >input_1=int32#2,<input_1=int32#2,#8 | |
| 2583 +# asm 2: add >input_1=r1,<input_1=r1,#8 | |
| 2584 +add r1,r1,#8 | |
| 2585 + | |
| 2586 +# qhasm: new z0 | |
| 2587 + | |
| 2588 +# qhasm: z0 = mem64[input_1]z0[1]; input_1 += 8 | |
| 2589 +# asm 1: vld1.8 {<z0=reg128#4%bot},[<input_1=int32#2]! | |
| 2590 +# asm 2: vld1.8 {<z0=d6},[<input_1=r1]! | |
| 2591 +vld1.8 {d6},[r1]! | |
| 2592 + | |
| 2593 +# qhasm: z12 = mem128[input_1]; input_1 += 16 | |
| 2594 +# asm 1: vld1.8 {>z12=reg128#5%bot->z12=reg128#5%top},[<input_1=int32#2]! | |
| 2595 +# asm 2: vld1.8 {>z12=d8->z12=d9},[<input_1=r1]! | |
| 2596 +vld1.8 {d8-d9},[r1]! | |
| 2597 + | |
| 2598 +# qhasm: z34 = mem128[input_1]; input_1 += 16 | |
| 2599 +# asm 1: vld1.8 {>z34=reg128#6%bot->z34=reg128#6%top},[<input_1=int32#2]! | |
| 2600 +# asm 2: vld1.8 {>z34=d10->z34=d11},[<input_1=r1]! | |
| 2601 +vld1.8 {d10-d11},[r1]! | |
| 2602 + | |
| 2603 +# qhasm: 2x mask = 0xffffffff | |
| 2604 +# asm 1: vmov.i64 >mask=reg128#7,#0xffffffff | |
| 2605 +# asm 2: vmov.i64 >mask=q6,#0xffffffff | |
| 2606 +vmov.i64 q6,#0xffffffff | |
| 2607 + | |
| 2608 +# qhasm: 2x u4 = 0xff | |
| 2609 +# asm 1: vmov.i64 >u4=reg128#8,#0xff | |
| 2610 +# asm 2: vmov.i64 >u4=q7,#0xff | |
| 2611 +vmov.i64 q7,#0xff | |
| 2612 + | |
| 2613 +# qhasm: x01 aligned= mem128[input_0];input_0+=16 | |
| 2614 +# asm 1: vld1.8 {>x01=reg128#9%bot->x01=reg128#9%top},[<input_0=int32#1,: 128]! | |
| 2615 +# asm 2: vld1.8 {>x01=d16->x01=d17},[<input_0=r0,: 128]! | |
| 2616 +vld1.8 {d16-d17},[r0,: 128]! | |
| 2617 + | |
| 2618 +# qhasm: x23 aligned= mem128[input_0];input_0+=16 | |
| 2619 +# asm 1: vld1.8 {>x23=reg128#10%bot->x23=reg128#10%top},[<input_0=int32#1,: 128
]! | |
| 2620 +# asm 2: vld1.8 {>x23=d18->x23=d19},[<input_0=r0,: 128]! | |
| 2621 +vld1.8 {d18-d19},[r0,: 128]! | |
| 2622 + | |
| 2623 +# qhasm: x4 aligned= mem64[input_0]x4[1] | |
| 2624 +# asm 1: vld1.8 {<x4=reg128#11%bot},[<input_0=int32#1,: 64] | |
| 2625 +# asm 2: vld1.8 {<x4=d20},[<input_0=r0,: 64] | |
| 2626 +vld1.8 {d20},[r0,: 64] | |
| 2627 + | |
| 2628 +# qhasm: input_0 -= 32 | |
| 2629 +# asm 1: sub >input_0=int32#1,<input_0=int32#1,#32 | |
| 2630 +# asm 2: sub >input_0=r0,<input_0=r0,#32 | |
| 2631 +sub r0,r0,#32 | |
| 2632 + | |
| 2633 +# qhasm: 2x mask unsigned>>=6 | |
| 2634 +# asm 1: vshr.u64 >mask=reg128#7,<mask=reg128#7,#6 | |
| 2635 +# asm 2: vshr.u64 >mask=q6,<mask=q6,#6 | |
| 2636 +vshr.u64 q6,q6,#6 | |
| 2637 + | |
| 2638 +# qhasm: 2x u4 unsigned>>= 7 | |
| 2639 +# asm 1: vshr.u64 >u4=reg128#8,<u4=reg128#8,#7 | |
| 2640 +# asm 2: vshr.u64 >u4=q7,<u4=q7,#7 | |
| 2641 +vshr.u64 q7,q7,#7 | |
| 2642 + | |
| 2643 +# qhasm: 4x 5y12 = y12 << 2 | |
| 2644 +# asm 1: vshl.i32 >5y12=reg128#12,<y12=reg128#2,#2 | |
| 2645 +# asm 2: vshl.i32 >5y12=q11,<y12=q1,#2 | |
| 2646 +vshl.i32 q11,q1,#2 | |
| 2647 + | |
| 2648 +# qhasm: 4x 5y34 = y34 << 2 | |
| 2649 +# asm 1: vshl.i32 >5y34=reg128#13,<y34=reg128#3,#2 | |
| 2650 +# asm 2: vshl.i32 >5y34=q12,<y34=q2,#2 | |
| 2651 +vshl.i32 q12,q2,#2 | |
| 2652 + | |
| 2653 +# qhasm: 4x 5y12 += y12 | |
| 2654 +# asm 1: vadd.i32 >5y12=reg128#12,<5y12=reg128#12,<y12=reg128#2 | |
| 2655 +# asm 2: vadd.i32 >5y12=q11,<5y12=q11,<y12=q1 | |
| 2656 +vadd.i32 q11,q11,q1 | |
| 2657 + | |
| 2658 +# qhasm: 4x 5y34 += y34 | |
| 2659 +# asm 1: vadd.i32 >5y34=reg128#13,<5y34=reg128#13,<y34=reg128#3 | |
| 2660 +# asm 2: vadd.i32 >5y34=q12,<5y34=q12,<y34=q2 | |
| 2661 +vadd.i32 q12,q12,q2 | |
| 2662 + | |
| 2663 +# qhasm: 2x u4 <<= 24 | |
| 2664 +# asm 1: vshl.i64 >u4=reg128#8,<u4=reg128#8,#24 | |
| 2665 +# asm 2: vshl.i64 >u4=q7,<u4=q7,#24 | |
| 2666 +vshl.i64 q7,q7,#24 | |
| 2667 + | |
| 2668 +# qhasm: 4x 5z12 = z12 << 2 | |
| 2669 +# asm 1: vshl.i32 >5z12=reg128#14,<z12=reg128#5,#2 | |
| 2670 +# asm 2: vshl.i32 >5z12=q13,<z12=q4,#2 | |
| 2671 +vshl.i32 q13,q4,#2 | |
| 2672 + | |
| 2673 +# qhasm: 4x 5z34 = z34 << 2 | |
| 2674 +# asm 1: vshl.i32 >5z34=reg128#15,<z34=reg128#6,#2 | |
| 2675 +# asm 2: vshl.i32 >5z34=q14,<z34=q5,#2 | |
| 2676 +vshl.i32 q14,q5,#2 | |
| 2677 + | |
| 2678 +# qhasm: 4x 5z12 += z12 | |
| 2679 +# asm 1: vadd.i32 >5z12=reg128#14,<5z12=reg128#14,<z12=reg128#5 | |
| 2680 +# asm 2: vadd.i32 >5z12=q13,<5z12=q13,<z12=q4 | |
| 2681 +vadd.i32 q13,q13,q4 | |
| 2682 + | |
| 2683 +# qhasm: 4x 5z34 += z34 | |
| 2684 +# asm 1: vadd.i32 >5z34=reg128#15,<5z34=reg128#15,<z34=reg128#6 | |
| 2685 +# asm 2: vadd.i32 >5z34=q14,<5z34=q14,<z34=q5 | |
| 2686 +vadd.i32 q14,q14,q5 | |
| 2687 + | |
| 2688 +# qhasm: new two24 | |
| 2689 + | |
| 2690 +# qhasm: new y0_stack | |
| 2691 + | |
| 2692 +# qhasm: new y12_stack | |
| 2693 + | |
| 2694 +# qhasm: new y34_stack | |
| 2695 + | |
| 2696 +# qhasm: new 5y12_stack | |
| 2697 + | |
| 2698 +# qhasm: new 5y34_stack | |
| 2699 + | |
| 2700 +# qhasm: new z0_stack | |
| 2701 + | |
| 2702 +# qhasm: new z12_stack | |
| 2703 + | |
| 2704 +# qhasm: new z34_stack | |
| 2705 + | |
| 2706 +# qhasm: new 5z12_stack | |
| 2707 + | |
| 2708 +# qhasm: new 5z34_stack | |
| 2709 + | |
| 2710 +# qhasm: ptr = &two24 | |
| 2711 +# asm 1: lea >ptr=int32#2,<two24=stack128#1 | |
| 2712 +# asm 2: lea >ptr=r1,<two24=[sp,#0] | |
| 2713 +add r1,sp,#0 | |
| 2714 + | |
| 2715 +# qhasm: mem128[ptr] aligned= u4 | |
| 2716 +# asm 1: vst1.8 {<u4=reg128#8%bot-<u4=reg128#8%top},[<ptr=int32#2,: 128] | |
| 2717 +# asm 2: vst1.8 {<u4=d14-<u4=d15},[<ptr=r1,: 128] | |
| 2718 +vst1.8 {d14-d15},[r1,: 128] | |
| 2719 + | |
| 2720 +# qhasm: r4 = u4 | |
| 2721 +# asm 1: vmov >r4=reg128#16,<u4=reg128#8 | |
| 2722 +# asm 2: vmov >r4=q15,<u4=q7 | |
| 2723 +vmov q15,q7 | |
| 2724 + | |
| 2725 +# qhasm: r0 = u4 | |
| 2726 +# asm 1: vmov >r0=reg128#8,<u4=reg128#8 | |
| 2727 +# asm 2: vmov >r0=q7,<u4=q7 | |
| 2728 +vmov q7,q7 | |
| 2729 + | |
| 2730 +# qhasm: ptr = &y0_stack | |
| 2731 +# asm 1: lea >ptr=int32#2,<y0_stack=stack128#2 | |
| 2732 +# asm 2: lea >ptr=r1,<y0_stack=[sp,#16] | |
| 2733 +add r1,sp,#16 | |
| 2734 + | |
| 2735 +# qhasm: mem128[ptr] aligned= y0 | |
| 2736 +# asm 1: vst1.8 {<y0=reg128#1%bot-<y0=reg128#1%top},[<ptr=int32#2,: 128] | |
| 2737 +# asm 2: vst1.8 {<y0=d0-<y0=d1},[<ptr=r1,: 128] | |
| 2738 +vst1.8 {d0-d1},[r1,: 128] | |
| 2739 + | |
| 2740 +# qhasm: ptr = &y12_stack | |
| 2741 +# asm 1: lea >ptr=int32#2,<y12_stack=stack128#3 | |
| 2742 +# asm 2: lea >ptr=r1,<y12_stack=[sp,#32] | |
| 2743 +add r1,sp,#32 | |
| 2744 + | |
| 2745 +# qhasm: mem128[ptr] aligned= y12 | |
| 2746 +# asm 1: vst1.8 {<y12=reg128#2%bot-<y12=reg128#2%top},[<ptr=int32#2,: 128] | |
| 2747 +# asm 2: vst1.8 {<y12=d2-<y12=d3},[<ptr=r1,: 128] | |
| 2748 +vst1.8 {d2-d3},[r1,: 128] | |
| 2749 + | |
| 2750 +# qhasm: ptr = &y34_stack | |
| 2751 +# asm 1: lea >ptr=int32#2,<y34_stack=stack128#4 | |
| 2752 +# asm 2: lea >ptr=r1,<y34_stack=[sp,#48] | |
| 2753 +add r1,sp,#48 | |
| 2754 + | |
| 2755 +# qhasm: mem128[ptr] aligned= y34 | |
| 2756 +# asm 1: vst1.8 {<y34=reg128#3%bot-<y34=reg128#3%top},[<ptr=int32#2,: 128] | |
| 2757 +# asm 2: vst1.8 {<y34=d4-<y34=d5},[<ptr=r1,: 128] | |
| 2758 +vst1.8 {d4-d5},[r1,: 128] | |
| 2759 + | |
| 2760 +# qhasm: ptr = &z0_stack | |
| 2761 +# asm 1: lea >ptr=int32#2,<z0_stack=stack128#7 | |
| 2762 +# asm 2: lea >ptr=r1,<z0_stack=[sp,#96] | |
| 2763 +add r1,sp,#96 | |
| 2764 + | |
| 2765 +# qhasm: mem128[ptr] aligned= z0 | |
| 2766 +# asm 1: vst1.8 {<z0=reg128#4%bot-<z0=reg128#4%top},[<ptr=int32#2,: 128] | |
| 2767 +# asm 2: vst1.8 {<z0=d6-<z0=d7},[<ptr=r1,: 128] | |
| 2768 +vst1.8 {d6-d7},[r1,: 128] | |
| 2769 + | |
| 2770 +# qhasm: ptr = &z12_stack | |
| 2771 +# asm 1: lea >ptr=int32#2,<z12_stack=stack128#8 | |
| 2772 +# asm 2: lea >ptr=r1,<z12_stack=[sp,#112] | |
| 2773 +add r1,sp,#112 | |
| 2774 + | |
| 2775 +# qhasm: mem128[ptr] aligned= z12 | |
| 2776 +# asm 1: vst1.8 {<z12=reg128#5%bot-<z12=reg128#5%top},[<ptr=int32#2,: 128] | |
| 2777 +# asm 2: vst1.8 {<z12=d8-<z12=d9},[<ptr=r1,: 128] | |
| 2778 +vst1.8 {d8-d9},[r1,: 128] | |
| 2779 + | |
| 2780 +# qhasm: ptr = &z34_stack | |
| 2781 +# asm 1: lea >ptr=int32#2,<z34_stack=stack128#9 | |
| 2782 +# asm 2: lea >ptr=r1,<z34_stack=[sp,#128] | |
| 2783 +add r1,sp,#128 | |
| 2784 + | |
| 2785 +# qhasm: mem128[ptr] aligned= z34 | |
| 2786 +# asm 1: vst1.8 {<z34=reg128#6%bot-<z34=reg128#6%top},[<ptr=int32#2,: 128] | |
| 2787 +# asm 2: vst1.8 {<z34=d10-<z34=d11},[<ptr=r1,: 128] | |
| 2788 +vst1.8 {d10-d11},[r1,: 128] | |
| 2789 + | |
| 2790 +# qhasm: ptr = &5y12_stack | |
| 2791 +# asm 1: lea >ptr=int32#2,<5y12_stack=stack128#5 | |
| 2792 +# asm 2: lea >ptr=r1,<5y12_stack=[sp,#64] | |
| 2793 +add r1,sp,#64 | |
| 2794 + | |
| 2795 +# qhasm: mem128[ptr] aligned= 5y12 | |
| 2796 +# asm 1: vst1.8 {<5y12=reg128#12%bot-<5y12=reg128#12%top},[<ptr=int32#2,: 128] | |
| 2797 +# asm 2: vst1.8 {<5y12=d22-<5y12=d23},[<ptr=r1,: 128] | |
| 2798 +vst1.8 {d22-d23},[r1,: 128] | |
| 2799 + | |
| 2800 +# qhasm: ptr = &5y34_stack | |
| 2801 +# asm 1: lea >ptr=int32#2,<5y34_stack=stack128#6 | |
| 2802 +# asm 2: lea >ptr=r1,<5y34_stack=[sp,#80] | |
| 2803 +add r1,sp,#80 | |
| 2804 + | |
| 2805 +# qhasm: mem128[ptr] aligned= 5y34 | |
| 2806 +# asm 1: vst1.8 {<5y34=reg128#13%bot-<5y34=reg128#13%top},[<ptr=int32#2,: 128] | |
| 2807 +# asm 2: vst1.8 {<5y34=d24-<5y34=d25},[<ptr=r1,: 128] | |
| 2808 +vst1.8 {d24-d25},[r1,: 128] | |
| 2809 + | |
| 2810 +# qhasm: ptr = &5z12_stack | |
| 2811 +# asm 1: lea >ptr=int32#2,<5z12_stack=stack128#10 | |
| 2812 +# asm 2: lea >ptr=r1,<5z12_stack=[sp,#144] | |
| 2813 +add r1,sp,#144 | |
| 2814 + | |
| 2815 +# qhasm: mem128[ptr] aligned= 5z12 | |
| 2816 +# asm 1: vst1.8 {<5z12=reg128#14%bot-<5z12=reg128#14%top},[<ptr=int32#2,: 128] | |
| 2817 +# asm 2: vst1.8 {<5z12=d26-<5z12=d27},[<ptr=r1,: 128] | |
| 2818 +vst1.8 {d26-d27},[r1,: 128] | |
| 2819 + | |
| 2820 +# qhasm: ptr = &5z34_stack | |
| 2821 +# asm 1: lea >ptr=int32#2,<5z34_stack=stack128#11 | |
| 2822 +# asm 2: lea >ptr=r1,<5z34_stack=[sp,#160] | |
| 2823 +add r1,sp,#160 | |
| 2824 + | |
| 2825 +# qhasm: mem128[ptr] aligned= 5z34 | |
| 2826 +# asm 1: vst1.8 {<5z34=reg128#15%bot-<5z34=reg128#15%top},[<ptr=int32#2,: 128] | |
| 2827 +# asm 2: vst1.8 {<5z34=d28-<5z34=d29},[<ptr=r1,: 128] | |
| 2828 +vst1.8 {d28-d29},[r1,: 128] | |
| 2829 + | |
| 2830 +# qhasm: unsigned>? len - 64 | |
| 2831 +# asm 1: cmp <len=int32#4,#64 | |
| 2832 +# asm 2: cmp <len=r3,#64 | |
| 2833 +cmp r3,#64 | |
| 2834 + | |
| 2835 +# qhasm: goto below64bytes if !unsigned> | |
| 2836 +bls ._below64bytes | |
| 2837 + | |
| 2838 +# qhasm: input_2 += 32 | |
| 2839 +# asm 1: add >input_2=int32#2,<input_2=int32#3,#32 | |
| 2840 +# asm 2: add >input_2=r1,<input_2=r2,#32 | |
| 2841 +add r1,r2,#32 | |
| 2842 + | |
| 2843 +# qhasm: mainloop2: | |
| 2844 +._mainloop2: | |
| 2845 + | |
| 2846 +# qhasm: c01 = mem128[input_2];input_2+=16 | |
| 2847 +# asm 1: vld1.8 {>c01=reg128#1%bot->c01=reg128#1%top},[<input_2=int32#2]! | |
| 2848 +# asm 2: vld1.8 {>c01=d0->c01=d1},[<input_2=r1]! | |
| 2849 +vld1.8 {d0-d1},[r1]! | |
| 2850 + | |
| 2851 +# qhasm: c23 = mem128[input_2];input_2+=16 | |
| 2852 +# asm 1: vld1.8 {>c23=reg128#2%bot->c23=reg128#2%top},[<input_2=int32#2]! | |
| 2853 +# asm 2: vld1.8 {>c23=d2->c23=d3},[<input_2=r1]! | |
| 2854 +vld1.8 {d2-d3},[r1]! | |
| 2855 + | |
| 2856 +# qhasm: r4[0,1] += x01[0] unsigned* z34[2]; r4[2,3] += x01[1] unsigned* z34
[3] | |
| 2857 +# asm 1: vmlal.u32 <r4=reg128#16,<x01=reg128#9%bot,<z34=reg128#6%top | |
| 2858 +# asm 2: vmlal.u32 <r4=q15,<x01=d16,<z34=d11 | |
| 2859 +vmlal.u32 q15,d16,d11 | |
| 2860 + | |
| 2861 +# qhasm: ptr = &z12_stack | |
| 2862 +# asm 1: lea >ptr=int32#3,<z12_stack=stack128#8 | |
| 2863 +# asm 2: lea >ptr=r2,<z12_stack=[sp,#112] | |
| 2864 +add r2,sp,#112 | |
| 2865 + | |
| 2866 +# qhasm: z12 aligned= mem128[ptr] | |
| 2867 +# asm 1: vld1.8 {>z12=reg128#3%bot->z12=reg128#3%top},[<ptr=int32#3,: 128] | |
| 2868 +# asm 2: vld1.8 {>z12=d4->z12=d5},[<ptr=r2,: 128] | |
| 2869 +vld1.8 {d4-d5},[r2,: 128] | |
| 2870 + | |
| 2871 +# qhasm: r4[0,1] += x01[2] unsigned* z34[0]; r4[2,3] += x01[3] unsigned* z34[1
] | |
| 2872 +# asm 1: vmlal.u32 <r4=reg128#16,<x01=reg128#9%top,<z34=reg128#6%bot | |
| 2873 +# asm 2: vmlal.u32 <r4=q15,<x01=d17,<z34=d10 | |
| 2874 +vmlal.u32 q15,d17,d10 | |
| 2875 + | |
| 2876 +# qhasm: ptr = &z0_stack | |
| 2877 +# asm 1: lea >ptr=int32#3,<z0_stack=stack128#7 | |
| 2878 +# asm 2: lea >ptr=r2,<z0_stack=[sp,#96] | |
| 2879 +add r2,sp,#96 | |
| 2880 + | |
| 2881 +# qhasm: z0 aligned= mem128[ptr] | |
| 2882 +# asm 1: vld1.8 {>z0=reg128#4%bot->z0=reg128#4%top},[<ptr=int32#3,: 128] | |
| 2883 +# asm 2: vld1.8 {>z0=d6->z0=d7},[<ptr=r2,: 128] | |
| 2884 +vld1.8 {d6-d7},[r2,: 128] | |
| 2885 + | |
| 2886 +# qhasm: r4[0,1] += x23[0] unsigned* z12[2]; r4[2,3] += x23[1] unsigned* z12[3
] | |
| 2887 +# asm 1: vmlal.u32 <r4=reg128#16,<x23=reg128#10%bot,<z12=reg128#3%top | |
| 2888 +# asm 2: vmlal.u32 <r4=q15,<x23=d18,<z12=d5 | |
| 2889 +vmlal.u32 q15,d18,d5 | |
| 2890 + | |
| 2891 +# qhasm: c01 c23 = c01[0]c01[1]c01[2]c23[2]c23[0]c23[1]c01[3]c23[3] | |
| 2892 +# asm 1: vtrn.32 <c01=reg128#1%top,<c23=reg128#2%top | |
| 2893 +# asm 2: vtrn.32 <c01=d1,<c23=d3 | |
| 2894 +vtrn.32 d1,d3 | |
| 2895 + | |
| 2896 +# qhasm: r4[0,1] += x23[2] unsigned* z12[0]; r4[2,3] += x23[3] unsigned* z12[1
] | |
| 2897 +# asm 1: vmlal.u32 <r4=reg128#16,<x23=reg128#10%top,<z12=reg128#3%bot | |
| 2898 +# asm 2: vmlal.u32 <r4=q15,<x23=d19,<z12=d4 | |
| 2899 +vmlal.u32 q15,d19,d4 | |
| 2900 + | |
| 2901 +# qhasm: r4[0,1] += x4[0] unsigned* z0[0]; r4[2,3] += x4[1] unsigned* z0[1] | |
| 2902 +# asm 1: vmlal.u32 <r4=reg128#16,<x4=reg128#11%bot,<z0=reg128#4%bot | |
| 2903 +# asm 2: vmlal.u32 <r4=q15,<x4=d20,<z0=d6 | |
| 2904 +vmlal.u32 q15,d20,d6 | |
| 2905 + | |
| 2906 +# qhasm: r3[0,1] = c23[2]<<18; r3[2,3] = c23[3]<<18 | |
| 2907 +# asm 1: vshll.u32 >r3=reg128#5,<c23=reg128#2%top,#18 | |
| 2908 +# asm 2: vshll.u32 >r3=q4,<c23=d3,#18 | |
| 2909 +vshll.u32 q4,d3,#18 | |
| 2910 + | |
| 2911 +# qhasm: c01 c23 = c01[0]c23[0]c01[2]c01[3]c01[1]c23[1]c23[2]c23[3] | |
| 2912 +# asm 1: vtrn.32 <c01=reg128#1%bot,<c23=reg128#2%bot | |
| 2913 +# asm 2: vtrn.32 <c01=d0,<c23=d2 | |
| 2914 +vtrn.32 d0,d2 | |
| 2915 + | |
| 2916 +# qhasm: r3[0,1] += x01[0] unsigned* z34[0]; r3[2,3] += x01[1] unsigned* z34[
1] | |
| 2917 +# asm 1: vmlal.u32 <r3=reg128#5,<x01=reg128#9%bot,<z34=reg128#6%bot | |
| 2918 +# asm 2: vmlal.u32 <r3=q4,<x01=d16,<z34=d10 | |
| 2919 +vmlal.u32 q4,d16,d10 | |
| 2920 + | |
| 2921 +# qhasm: r3[0,1] += x01[2] unsigned* z12[2]; r3[2,3] += x01[3] unsigned* z12[
3] | |
| 2922 +# asm 1: vmlal.u32 <r3=reg128#5,<x01=reg128#9%top,<z12=reg128#3%top | |
| 2923 +# asm 2: vmlal.u32 <r3=q4,<x01=d17,<z12=d5 | |
| 2924 +vmlal.u32 q4,d17,d5 | |
| 2925 + | |
| 2926 +# qhasm: r0 = r0[1]c01[0]r0[2,3] | |
| 2927 +# asm 1: vext.32 <r0=reg128#8%bot,<r0=reg128#8%bot,<c01=reg128#1%bot,#1 | |
| 2928 +# asm 2: vext.32 <r0=d14,<r0=d14,<c01=d0,#1 | |
| 2929 +vext.32 d14,d14,d0,#1 | |
| 2930 + | |
| 2931 +# qhasm: r3[0,1] += x23[0] unsigned* z12[0]; r3[2,3] += x23[1] unsigned* z12[
1] | |
| 2932 +# asm 1: vmlal.u32 <r3=reg128#5,<x23=reg128#10%bot,<z12=reg128#3%bot | |
| 2933 +# asm 2: vmlal.u32 <r3=q4,<x23=d18,<z12=d4 | |
| 2934 +vmlal.u32 q4,d18,d4 | |
| 2935 + | |
| 2936 +# qhasm: input_2
-= 64 | |
| 2937 +# asm 1: sub >input_2=int32#2,<input_2=int32#2,#64 | |
| 2938 +# asm 2: sub >input_2=r1,<input_2=r1,#64 | |
| 2939 +sub r1,r1,#64 | |
| 2940 + | |
| 2941 +# qhasm: r3[0,1] += x23[2] unsigned* z0[0]; r3[2,3] += x23[3] unsigned* z0[1] | |
| 2942 +# asm 1: vmlal.u32 <r3=reg128#5,<x23=reg128#10%top,<z0=reg128#4%bot | |
| 2943 +# asm 2: vmlal.u32 <r3=q4,<x23=d19,<z0=d6 | |
| 2944 +vmlal.u32 q4,d19,d6 | |
| 2945 + | |
| 2946 +# qhasm: ptr = &5z34_stack | |
| 2947 +# asm 1: lea >ptr=int32#3,<5z34_stack=stack128#11 | |
| 2948 +# asm 2: lea >ptr=r2,<5z34_stack=[sp,#160] | |
| 2949 +add r2,sp,#160 | |
| 2950 + | |
| 2951 +# qhasm: 5z34 aligned= mem128[ptr] | |
| 2952 +# asm 1: vld1.8 {>5z34=reg128#6%bot->5z34=reg128#6%top},[<ptr=int32#3,: 128] | |
| 2953 +# asm 2: vld1.8 {>5z34=d10->5z34=d11},[<ptr=r2,: 128] | |
| 2954 +vld1.8 {d10-d11},[r2,: 128] | |
| 2955 + | |
| 2956 +# qhasm: r3[0,1] += x4[0] unsigned* 5z34[2]; r3[2,3] += x4[1] unsigned* 5z3
4[3] | |
| 2957 +# asm 1: vmlal.u32 <r3=reg128#5,<x4=reg128#11%bot,<5z34=reg128#6%top | |
| 2958 +# asm 2: vmlal.u32 <r3=q4,<x4=d20,<5z34=d11 | |
| 2959 +vmlal.u32 q4,d20,d11 | |
| 2960 + | |
| 2961 +# qhasm: r0 = r0[1]r0[0]r0[3]r0[2] | |
| 2962 +# asm 1: vrev64.i32 >r0=reg128#8,<r0=reg128#8 | |
| 2963 +# asm 2: vrev64.i32 >r0=q7,<r0=q7 | |
| 2964 +vrev64.i32 q7,q7 | |
| 2965 + | |
| 2966 +# qhasm: r2[0,1] = c01[2]<<12; r2[2,3] = c01[3]<<12 | |
| 2967 +# asm 1: vshll.u32 >r2=reg128#14,<c01=reg128#1%top,#12 | |
| 2968 +# asm 2: vshll.u32 >r2=q13,<c01=d1,#12 | |
| 2969 +vshll.u32 q13,d1,#12 | |
| 2970 + | |
| 2971 +# qhasm: d01 = mem128[input_2];input_2+=16 | |
| 2972 +# asm 1: vld1.8 {>d01=reg128#12%bot->d01=reg128#12%top},[<input_2=int32#2]! | |
| 2973 +# asm 2: vld1.8 {>d01=d22->d01=d23},[<input_2=r1]! | |
| 2974 +vld1.8 {d22-d23},[r1]! | |
| 2975 + | |
| 2976 +# qhasm: r2[0,1] += x01[0] unsigned* z12[2]; r2[2,3] += x01[1] unsigned* z12[
3] | |
| 2977 +# asm 1: vmlal.u32 <r2=reg128#14,<x01=reg128#9%bot,<z12=reg128#3%top | |
| 2978 +# asm 2: vmlal.u32 <r2=q13,<x01=d16,<z12=d5 | |
| 2979 +vmlal.u32 q13,d16,d5 | |
| 2980 + | |
| 2981 +# qhasm: r2[0,1] += x01[2] unsigned* z12[0]; r2[2,3] += x01[3] unsigned* z12[
1] | |
| 2982 +# asm 1: vmlal.u32 <r2=reg128#14,<x01=reg128#9%top,<z12=reg128#3%bot | |
| 2983 +# asm 2: vmlal.u32 <r2=q13,<x01=d17,<z12=d4 | |
| 2984 +vmlal.u32 q13,d17,d4 | |
| 2985 + | |
| 2986 +# qhasm: r2[0,1] += x23[0] unsigned* z0[0]; r2[2,3] += x23[1] unsigned* z0[1] | |
| 2987 +# asm 1: vmlal.u32 <r2=reg128#14,<x23=reg128#10%bot,<z0=reg128#4%bot | |
| 2988 +# asm 2: vmlal.u32 <r2=q13,<x23=d18,<z0=d6 | |
| 2989 +vmlal.u32 q13,d18,d6 | |
| 2990 + | |
| 2991 +# qhasm: r2[0,1] += x23[2] unsigned* 5z34[2]; r2[2,3] += x23[3] unsigned* 5z3
4[3] | |
| 2992 +# asm 1: vmlal.u32 <r2=reg128#14,<x23=reg128#10%top,<5z34=reg128#6%top | |
| 2993 +# asm 2: vmlal.u32 <r2=q13,<x23=d19,<5z34=d11 | |
| 2994 +vmlal.u32 q13,d19,d11 | |
| 2995 + | |
| 2996 +# qhasm: r2[0,1] += x4[0] unsigned* 5z34[0]; r2[2,3] += x4[1] unsigned* 5z34[
1] | |
| 2997 +# asm 1: vmlal.u32 <r2=reg128#14,<x4=reg128#11%bot,<5z34=reg128#6%bot | |
| 2998 +# asm 2: vmlal.u32 <r2=q13,<x4=d20,<5z34=d10 | |
| 2999 +vmlal.u32 q13,d20,d10 | |
| 3000 + | |
| 3001 +# qhasm: r0 = r0[0,1]c01[1]r0[2] | |
| 3002 +# asm 1: vext.32 <r0=reg128#8%top,<c01=reg128#1%bot,<r0=reg128#8%top,#1 | |
| 3003 +# asm 2: vext.32 <r0=d15,<c01=d0,<r0=d15,#1 | |
| 3004 +vext.32 d15,d0,d15,#1 | |
| 3005 + | |
| 3006 +# qhasm: r1[0,1] = c23[0]<<6; r1[2,3] = c23[1]<<6 | |
| 3007 +# asm 1: vshll.u32 >r1=reg128#15,<c23=reg128#2%bot,#6 | |
| 3008 +# asm 2: vshll.u32 >r1=q14,<c23=d2,#6 | |
| 3009 +vshll.u32 q14,d2,#6 | |
| 3010 + | |
| 3011 +# qhasm: r1[0,1] += x01[0] unsigned* z12[0]; r1[2,3] += x01[1] unsigned* z12[
1] | |
| 3012 +# asm 1: vmlal.u32 <r1=reg128#15,<x01=reg128#9%bot,<z12=reg128#3%bot | |
| 3013 +# asm 2: vmlal.u32 <r1=q14,<x01=d16,<z12=d4 | |
| 3014 +vmlal.u32 q14,d16,d4 | |
| 3015 + | |
| 3016 +# qhasm: r1[0,1] += x01[2] unsigned* z0[0]; r1[2,3] += x01[3] unsigned* z0[1] | |
| 3017 +# asm 1: vmlal.u32 <r1=reg128#15,<x01=reg128#9%top,<z0=reg128#4%bot | |
| 3018 +# asm 2: vmlal.u32 <r1=q14,<x01=d17,<z0=d6 | |
| 3019 +vmlal.u32 q14,d17,d6 | |
| 3020 + | |
| 3021 +# qhasm: r1[0,1] += x23[0] unsigned* 5z34[2]; r1[2,3] += x23[1] unsigned* 5z3
4[3] | |
| 3022 +# asm 1: vmlal.u32 <r1=reg128#15,<x23=reg128#10%bot,<5z34=reg128#6%top | |
| 3023 +# asm 2: vmlal.u32 <r1=q14,<x23=d18,<5z34=d11 | |
| 3024 +vmlal.u32 q14,d18,d11 | |
| 3025 + | |
| 3026 +# qhasm: r1[0,1] += x23[2] unsigned* 5z34[0]; r1[2,3] += x23[3] unsigned* 5z34[
1] | |
| 3027 +# asm 1: vmlal.u32 <r1=reg128#15,<x23=reg128#10%top,<5z34=reg128#6%bot | |
| 3028 +# asm 2: vmlal.u32 <r1=q14,<x23=d19,<5z34=d10 | |
| 3029 +vmlal.u32 q14,d19,d10 | |
| 3030 + | |
| 3031 +# qhasm: ptr = &5z12_stack | |
| 3032 +# asm 1: lea >ptr=int32#3,<5z12_stack=stack128#10 | |
| 3033 +# asm 2: lea >ptr=r2,<5z12_stack=[sp,#144] | |
| 3034 +add r2,sp,#144 | |
| 3035 + | |
| 3036 +# qhasm: 5z12 aligned= mem128[ptr] | |
| 3037 +# asm 1: vld1.8 {>5z12=reg128#1%bot->5z12=reg128#1%top},[<ptr=int32#3,: 128] | |
| 3038 +# asm 2: vld1.8 {>5z12=d0->5z12=d1},[<ptr=r2,: 128] | |
| 3039 +vld1.8 {d0-d1},[r2,: 128] | |
| 3040 + | |
| 3041 +# qhasm: r1[0,1] += x4[0] unsigned* 5z12[2]; r1[2,3] += x4[1] unsigned* 5z12[
3] | |
| 3042 +# asm 1: vmlal.u32 <r1=reg128#15,<x4=reg128#11%bot,<5z12=reg128#1%top | |
| 3043 +# asm 2: vmlal.u32 <r1=q14,<x4=d20,<5z12=d1 | |
| 3044 +vmlal.u32 q14,d20,d1 | |
| 3045 + | |
| 3046 +# qhasm: d23 = mem128[input_2];input_2+=16 | |
| 3047 +# asm 1: vld1.8 {>d23=reg128#2%bot->d23=reg128#2%top},[<input_2=int32#2]! | |
| 3048 +# asm 2: vld1.8 {>d23=d2->d23=d3},[<input_2=r1]! | |
| 3049 +vld1.8 {d2-d3},[r1]! | |
| 3050 + | |
| 3051 +# qhasm: input_2 += 32 | |
| 3052 +# asm 1: add >input_2=int32#2,<input_2=int32#2,#32 | |
| 3053 +# asm 2: add >input_2=r1,<input_2=r1,#32 | |
| 3054 +add r1,r1,#32 | |
| 3055 + | |
| 3056 +# qhasm: r0[0,1] += x4[0] unsigned* 5z12[0]; r0[2,3] += x4[1] unsigned* 5z12[
1] | |
| 3057 +# asm 1: vmlal.u32 <r0=reg128#8,<x4=reg128#11%bot,<5z12=reg128#1%bot | |
| 3058 +# asm 2: vmlal.u32 <r0=q7,<x4=d20,<5z12=d0 | |
| 3059 +vmlal.u32 q7,d20,d0 | |
| 3060 + | |
| 3061 +# qhasm: r0[0,1] += x23[0] unsigned* 5z34[0]; r0[2,3] += x23[1] unsigned* 5z34[
1] | |
| 3062 +# asm 1: vmlal.u32 <r0=reg128#8,<x23=reg128#10%bot,<5z34=reg128#6%bot | |
| 3063 +# asm 2: vmlal.u32 <r0=q7,<x23=d18,<5z34=d10 | |
| 3064 +vmlal.u32 q7,d18,d10 | |
| 3065 + | |
| 3066 +# qhasm: d01 d23 = d01[0] d23[0] d01[1] d23[1] | |
| 3067 +# asm 1: vswp <d23=reg128#2%bot,<d01=reg128#12%top | |
| 3068 +# asm 2: vswp <d23=d2,<d01=d23 | |
| 3069 +vswp d2,d23 | |
| 3070 + | |
| 3071 +# qhasm: r0[0,1] += x23[2] unsigned* 5z12[2]; r0[2,3] += x23[3] unsigned* 5z12[
3] | |
| 3072 +# asm 1: vmlal.u32 <r0=reg128#8,<x23=reg128#10%top,<5z12=reg128#1%top | |
| 3073 +# asm 2: vmlal.u32 <r0=q7,<x23=d19,<5z12=d1 | |
| 3074 +vmlal.u32 q7,d19,d1 | |
| 3075 + | |
| 3076 +# qhasm: r0[0,1] += x01[0] unsigned* z0[0]; r0[2,3] += x01[1] unsigned* z0[1] | |
| 3077 +# asm 1: vmlal.u32 <r0=reg128#8,<x01=reg128#9%bot,<z0=reg128#4%bot | |
| 3078 +# asm 2: vmlal.u32 <r0=q7,<x01=d16,<z0=d6 | |
| 3079 +vmlal.u32 q7,d16,d6 | |
| 3080 + | |
| 3081 +# qhasm: new mid | |
| 3082 + | |
| 3083 +# qhasm: 2x v4 = d23 unsigned>> 40 | |
| 3084 +# asm 1: vshr.u64 >v4=reg128#4,<d23=reg128#2,#40 | |
| 3085 +# asm 2: vshr.u64 >v4=q3,<d23=q1,#40 | |
| 3086 +vshr.u64 q3,q1,#40 | |
| 3087 + | |
| 3088 +# qhasm: mid = d01[1]d23[0] mid[2,3] | |
| 3089 +# asm 1: vext.32 <mid=reg128#1%bot,<d01=reg128#12%bot,<d23=reg128#2%bot,#1 | |
| 3090 +# asm 2: vext.32 <mid=d0,<d01=d22,<d23=d2,#1 | |
| 3091 +vext.32 d0,d22,d2,#1 | |
| 3092 + | |
| 3093 +# qhasm: new v23 | |
| 3094 + | |
| 3095 +# qhasm: v23[2] = d23[0,1] unsigned>> 14; v23[3] = d23[2,3] unsig
ned>> 14 | |
| 3096 +# asm 1: vshrn.u64 <v23=reg128#10%top,<d23=reg128#2,#14 | |
| 3097 +# asm 2: vshrn.u64 <v23=d19,<d23=q1,#14 | |
| 3098 +vshrn.u64 d19,q1,#14 | |
| 3099 + | |
| 3100 +# qhasm: mid = mid[0,1] d01[3]d23[2] | |
| 3101 +# asm 1: vext.32 <mid=reg128#1%top,<d01=reg128#12%top,<d23=reg128#2%top,#1 | |
| 3102 +# asm 2: vext.32 <mid=d1,<d01=d23,<d23=d3,#1 | |
| 3103 +vext.32 d1,d23,d3,#1 | |
| 3104 + | |
| 3105 +# qhasm: new v01 | |
| 3106 + | |
| 3107 +# qhasm: v01[2] = d01[0,1] unsigned>> 26; v01[3] = d01[2,3] unsig
ned>> 26 | |
| 3108 +# asm 1: vshrn.u64 <v01=reg128#11%top,<d01=reg128#12,#26 | |
| 3109 +# asm 2: vshrn.u64 <v01=d21,<d01=q11,#26 | |
| 3110 +vshrn.u64 d21,q11,#26 | |
| 3111 + | |
| 3112 +# qhasm: v01 = d01[1]d01[0] v01[2,3] | |
| 3113 +# asm 1: vext.32 <v01=reg128#11%bot,<d01=reg128#12%bot,<d01=reg128#12%bot,#1 | |
| 3114 +# asm 2: vext.32 <v01=d20,<d01=d22,<d01=d22,#1 | |
| 3115 +vext.32 d20,d22,d22,#1 | |
| 3116 + | |
| 3117 +# qhasm: r0[0,1] += x01[2] unsigned* 5z34[2]; r0[2,3] += x01[3] unsigned* 5z3
4[3] | |
| 3118 +# asm 1: vmlal.u32 <r0=reg128#8,<x01=reg128#9%top,<5z34=reg128#6%top | |
| 3119 +# asm 2: vmlal.u32 <r0=q7,<x01=d17,<5z34=d11 | |
| 3120 +vmlal.u32 q7,d17,d11 | |
| 3121 + | |
| 3122 +# qhasm: v01 = v01[1]d01[2] v01[2,3] | |
| 3123 +# asm 1: vext.32 <v01=reg128#11%bot,<v01=reg128#11%bot,<d01=reg128#12%top,#1 | |
| 3124 +# asm 2: vext.32 <v01=d20,<v01=d20,<d01=d23,#1 | |
| 3125 +vext.32 d20,d20,d23,#1 | |
| 3126 + | |
| 3127 +# qhasm: v23[0] = mid[0,1] unsigned>> 20; v23[1] = mid[2,3] unsig
ned>> 20 | |
| 3128 +# asm 1: vshrn.u64 <v23=reg128#10%bot,<mid=reg128#1,#20 | |
| 3129 +# asm 2: vshrn.u64 <v23=d18,<mid=q0,#20 | |
| 3130 +vshrn.u64 d18,q0,#20 | |
| 3131 + | |
| 3132 +# qhasm: v4 = v4[0]v4[2]v4[1]v4[3] | |
| 3133 +# asm 1: vtrn.32 <v4=reg128#4%bot,<v4=reg128#4%top | |
| 3134 +# asm 2: vtrn.32 <v4=d6,<v4=d7 | |
| 3135 +vtrn.32 d6,d7 | |
| 3136 + | |
| 3137 +# qhasm: 4x v01 &= 0x03ffffff | |
| 3138 +# asm 1: vand.i32 <v01=reg128#11,#0x03ffffff | |
| 3139 +# asm 2: vand.i32 <v01=q10,#0x03ffffff | |
| 3140 +vand.i32 q10,#0x03ffffff | |
| 3141 + | |
| 3142 +# qhasm: ptr = &y34_stack | |
| 3143 +# asm 1: lea >ptr=int32#3,<y34_stack=stack128#4 | |
| 3144 +# asm 2: lea >ptr=r2,<y34_stack=[sp,#48] | |
| 3145 +add r2,sp,#48 | |
| 3146 + | |
| 3147 +# qhasm: y34 aligned= mem128[ptr] | |
| 3148 +# asm 1: vld1.8 {>y34=reg128#3%bot->y34=reg128#3%top},[<ptr=int32#3,: 128] | |
| 3149 +# asm 2: vld1.8 {>y34=d4->y34=d5},[<ptr=r2,: 128] | |
| 3150 +vld1.8 {d4-d5},[r2,: 128] | |
| 3151 + | |
| 3152 +# qhasm: 4x v23 &= 0x03ffffff | |
| 3153 +# asm 1: vand.i32 <v23=reg128#10,#0x03ffffff | |
| 3154 +# asm 2: vand.i32 <v23=q9,#0x03ffffff | |
| 3155 +vand.i32 q9,#0x03ffffff | |
| 3156 + | |
| 3157 +# qhasm: ptr = &y12_stack | |
| 3158 +# asm 1: lea >ptr=int32#3,<y12_stack=stack128#3 | |
| 3159 +# asm 2: lea >ptr=r2,<y12_stack=[sp,#32] | |
| 3160 +add r2,sp,#32 | |
| 3161 + | |
| 3162 +# qhasm: y12 aligned= mem128[ptr] | |
| 3163 +# asm 1: vld1.8 {>y12=reg128#2%bot->y12=reg128#2%top},[<ptr=int32#3,: 128] | |
| 3164 +# asm 2: vld1.8 {>y12=d2->y12=d3},[<ptr=r2,: 128] | |
| 3165 +vld1.8 {d2-d3},[r2,: 128] | |
| 3166 + | |
| 3167 +# qhasm: 4x v4 |= 0x01000000 | |
| 3168 +# asm 1: vorr.i32 <v4=reg128#4,#0x01000000 | |
| 3169 +# asm 2: vorr.i32 <v4=q3,#0x01000000 | |
| 3170 +vorr.i32 q3,#0x01000000 | |
| 3171 + | |
| 3172 +# qhasm: ptr = &y0_stack | |
| 3173 +# asm 1: lea >ptr=int32#3,<y0_stack=stack128#2 | |
| 3174 +# asm 2: lea >ptr=r2,<y0_stack=[sp,#16] | |
| 3175 +add r2,sp,#16 | |
| 3176 + | |
| 3177 +# qhasm: y0 aligned= mem128[ptr] | |
| 3178 +# asm 1: vld1.8 {>y0=reg128#1%bot->y0=reg128#1%top},[<ptr=int32#3,: 128] | |
| 3179 +# asm 2: vld1.8 {>y0=d0->y0=d1},[<ptr=r2,: 128] | |
| 3180 +vld1.8 {d0-d1},[r2,: 128] | |
| 3181 + | |
| 3182 +# qhasm: r4[0,1] += v01[0] unsigned* y34[2]; r4[2,3] += v01[1] unsigned* y34
[3] | |
| 3183 +# asm 1: vmlal.u32 <r4=reg128#16,<v01=reg128#11%bot,<y34=reg128#3%top | |
| 3184 +# asm 2: vmlal.u32 <r4=q15,<v01=d20,<y34=d5 | |
| 3185 +vmlal.u32 q15,d20,d5 | |
| 3186 + | |
| 3187 +# qhasm: r4[0,1] += v01[2] unsigned* y34[0]; r4[2,3] += v01[3] unsigned* y34[1
] | |
| 3188 +# asm 1: vmlal.u32 <r4=reg128#16,<v01=reg128#11%top,<y34=reg128#3%bot | |
| 3189 +# asm 2: vmlal.u32 <r4=q15,<v01=d21,<y34=d4 | |
| 3190 +vmlal.u32 q15,d21,d4 | |
| 3191 + | |
| 3192 +# qhasm: r4[0,1] += v23[0] unsigned* y12[2]; r4[2,3] += v23[1] unsigned* y12[3
] | |
| 3193 +# asm 1: vmlal.u32 <r4=reg128#16,<v23=reg128#10%bot,<y12=reg128#2%top | |
| 3194 +# asm 2: vmlal.u32 <r4=q15,<v23=d18,<y12=d3 | |
| 3195 +vmlal.u32 q15,d18,d3 | |
| 3196 + | |
| 3197 +# qhasm: r4[0,1] += v23[2] unsigned* y12[0]; r4[2,3] += v23[3] unsigned* y12[1
] | |
| 3198 +# asm 1: vmlal.u32 <r4=reg128#16,<v23=reg128#10%top,<y12=reg128#2%bot | |
| 3199 +# asm 2: vmlal.u32 <r4=q15,<v23=d19,<y12=d2 | |
| 3200 +vmlal.u32 q15,d19,d2 | |
| 3201 + | |
| 3202 +# qhasm: r4[0,1] += v4[0] unsigned* y0[0]; r4[2,3] += v4[1] unsigned* y0[1] | |
| 3203 +# asm 1: vmlal.u32 <r4=reg128#16,<v4=reg128#4%bot,<y0=reg128#1%bot | |
| 3204 +# asm 2: vmlal.u32 <r4=q15,<v4=d6,<y0=d0 | |
| 3205 +vmlal.u32 q15,d6,d0 | |
| 3206 + | |
| 3207 +# qhasm: ptr = &5y34_stack | |
| 3208 +# asm 1: lea >ptr=int32#3,<5y34_stack=stack128#6 | |
| 3209 +# asm 2: lea >ptr=r2,<5y34_stack=[sp,#80] | |
| 3210 +add r2,sp,#80 | |
| 3211 + | |
| 3212 +# qhasm: 5y34 aligned= mem128[ptr] | |
| 3213 +# asm 1: vld1.8 {>5y34=reg128#13%bot->5y34=reg128#13%top},[<ptr=int32#3,: 128] | |
| 3214 +# asm 2: vld1.8 {>5y34=d24->5y34=d25},[<ptr=r2,: 128] | |
| 3215 +vld1.8 {d24-d25},[r2,: 128] | |
| 3216 + | |
| 3217 +# qhasm: r3[0,1] += v01[0] unsigned* y34[0]; r3[2,3] += v01[1] unsigned* y34[
1] | |
| 3218 +# asm 1: vmlal.u32 <r3=reg128#5,<v01=reg128#11%bot,<y34=reg128#3%bot | |
| 3219 +# asm 2: vmlal.u32 <r3=q4,<v01=d20,<y34=d4 | |
| 3220 +vmlal.u32 q4,d20,d4 | |
| 3221 + | |
| 3222 +# qhasm: r3[0,1] += v01[2] unsigned* y12[2]; r3[2,3] += v01[3] unsigned* y12[
3] | |
| 3223 +# asm 1: vmlal.u32 <r3=reg128#5,<v01=reg128#11%top,<y12=reg128#2%top | |
| 3224 +# asm 2: vmlal.u32 <r3=q4,<v01=d21,<y12=d3 | |
| 3225 +vmlal.u32 q4,d21,d3 | |
| 3226 + | |
| 3227 +# qhasm: r3[0,1] += v23[0] unsigned* y12[0]; r3[2,3] += v23[1] unsigned* y12[
1] | |
| 3228 +# asm 1: vmlal.u32 <r3=reg128#5,<v23=reg128#10%bot,<y12=reg128#2%bot | |
| 3229 +# asm 2: vmlal.u32 <r3=q4,<v23=d18,<y12=d2 | |
| 3230 +vmlal.u32 q4,d18,d2 | |
| 3231 + | |
| 3232 +# qhasm: r3[0,1] += v23[2] unsigned* y0[0]; r3[2,3] += v23[3] unsigned* y0[1] | |
| 3233 +# asm 1: vmlal.u32 <r3=reg128#5,<v23=reg128#10%top,<y0=reg128#1%bot | |
| 3234 +# asm 2: vmlal.u32 <r3=q4,<v23=d19,<y0=d0 | |
| 3235 +vmlal.u32 q4,d19,d0 | |
| 3236 + | |
| 3237 +# qhasm: r3[0,1] += v4[0] unsigned* 5y34[2]; r3[2,3] += v4[1] unsigned* 5y3
4[3] | |
| 3238 +# asm 1: vmlal.u32 <r3=reg128#5,<v4=reg128#4%bot,<5y34=reg128#13%top | |
| 3239 +# asm 2: vmlal.u32 <r3=q4,<v4=d6,<5y34=d25 | |
| 3240 +vmlal.u32 q4,d6,d25 | |
| 3241 + | |
| 3242 +# qhasm: ptr = &5y12_stack | |
| 3243 +# asm 1: lea >ptr=int32#3,<5y12_stack=stack128#5 | |
| 3244 +# asm 2: lea >ptr=r2,<5y12_stack=[sp,#64] | |
| 3245 +add r2,sp,#64 | |
| 3246 + | |
| 3247 +# qhasm: 5y12 aligned= mem128[ptr] | |
| 3248 +# asm 1: vld1.8 {>5y12=reg128#12%bot->5y12=reg128#12%top},[<ptr=int32#3,: 128] | |
| 3249 +# asm 2: vld1.8 {>5y12=d22->5y12=d23},[<ptr=r2,: 128] | |
| 3250 +vld1.8 {d22-d23},[r2,: 128] | |
| 3251 + | |
| 3252 +# qhasm: r0[0,1] += v4[0] unsigned* 5y12[0]; r0[2,3] += v4[1] unsigned* 5y12[
1] | |
| 3253 +# asm 1: vmlal.u32 <r0=reg128#8,<v4=reg128#4%bot,<5y12=reg128#12%bot | |
| 3254 +# asm 2: vmlal.u32 <r0=q7,<v4=d6,<5y12=d22 | |
| 3255 +vmlal.u32 q7,d6,d22 | |
| 3256 + | |
| 3257 +# qhasm: r0[0,1] += v23[0] unsigned* 5y34[0]; r0[2,3] += v23[1] unsigned* 5y34[
1] | |
| 3258 +# asm 1: vmlal.u32 <r0=reg128#8,<v23=reg128#10%bot,<5y34=reg128#13%bot | |
| 3259 +# asm 2: vmlal.u32 <r0=q7,<v23=d18,<5y34=d24 | |
| 3260 +vmlal.u32 q7,d18,d24 | |
| 3261 + | |
| 3262 +# qhasm: r0[0,1] += v23[2] unsigned* 5y12[2]; r0[2,3] += v23[3] unsigned* 5y12[
3] | |
| 3263 +# asm 1: vmlal.u32 <r0=reg128#8,<v23=reg128#10%top,<5y12=reg128#12%top | |
| 3264 +# asm 2: vmlal.u32 <r0=q7,<v23=d19,<5y12=d23 | |
| 3265 +vmlal.u32 q7,d19,d23 | |
| 3266 + | |
| 3267 +# qhasm: r0[0,1] += v01[0] unsigned* y0[0]; r0[2,3] += v01[1] unsigned* y0[1] | |
| 3268 +# asm 1: vmlal.u32 <r0=reg128#8,<v01=reg128#11%bot,<y0=reg128#1%bot | |
| 3269 +# asm 2: vmlal.u32 <r0=q7,<v01=d20,<y0=d0 | |
| 3270 +vmlal.u32 q7,d20,d0 | |
| 3271 + | |
| 3272 +# qhasm: r0[0,1] += v01[2] unsigned* 5y34[2]; r0[2,3] += v01[3] unsigned* 5y3
4[3] | |
| 3273 +# asm 1: vmlal.u32 <r0=reg128#8,<v01=reg128#11%top,<5y34=reg128#13%top | |
| 3274 +# asm 2: vmlal.u32 <r0=q7,<v01=d21,<5y34=d25 | |
| 3275 +vmlal.u32 q7,d21,d25 | |
| 3276 + | |
| 3277 +# qhasm: r1[0,1] += v01[0] unsigned* y12[0]; r1[2,3] += v01[1] unsigned* y12[
1] | |
| 3278 +# asm 1: vmlal.u32 <r1=reg128#15,<v01=reg128#11%bot,<y12=reg128#2%bot | |
| 3279 +# asm 2: vmlal.u32 <r1=q14,<v01=d20,<y12=d2 | |
| 3280 +vmlal.u32 q14,d20,d2 | |
| 3281 + | |
| 3282 +# qhasm: r1[0,1] += v01[2] unsigned* y0[0]; r1[2,3] += v01[3] unsigned* y0[1] | |
| 3283 +# asm 1: vmlal.u32 <r1=reg128#15,<v01=reg128#11%top,<y0=reg128#1%bot | |
| 3284 +# asm 2: vmlal.u32 <r1=q14,<v01=d21,<y0=d0 | |
| 3285 +vmlal.u32 q14,d21,d0 | |
| 3286 + | |
| 3287 +# qhasm: r1[0,1] += v23[0] unsigned* 5y34[2]; r1[2,3] += v23[1] unsigned* 5y3
4[3] | |
| 3288 +# asm 1: vmlal.u32 <r1=reg128#15,<v23=reg128#10%bot,<5y34=reg128#13%top | |
| 3289 +# asm 2: vmlal.u32 <r1=q14,<v23=d18,<5y34=d25 | |
| 3290 +vmlal.u32 q14,d18,d25 | |
| 3291 + | |
| 3292 +# qhasm: r1[0,1] += v23[2] unsigned* 5y34[0]; r1[2,3] += v23[3] unsigned* 5y34[
1] | |
| 3293 +# asm 1: vmlal.u32 <r1=reg128#15,<v23=reg128#10%top,<5y34=reg128#13%bot | |
| 3294 +# asm 2: vmlal.u32 <r1=q14,<v23=d19,<5y34=d24 | |
| 3295 +vmlal.u32 q14,d19,d24 | |
| 3296 + | |
| 3297 +# qhasm: r1[0,1] += v4[0] unsigned* 5y12[2]; r1[2,3] += v4[1] unsigned* 5y12[
3] | |
| 3298 +# asm 1: vmlal.u32 <r1=reg128#15,<v4=reg128#4%bot,<5y12=reg128#12%top | |
| 3299 +# asm 2: vmlal.u32 <r1=q14,<v4=d6,<5y12=d23 | |
| 3300 +vmlal.u32 q14,d6,d23 | |
| 3301 + | |
| 3302 +# qhasm: r2[0,1] += v01[0] unsigned* y12[2]; r2[2,3] += v01[1] unsigned* y12[
3] | |
| 3303 +# asm 1: vmlal.u32 <r2=reg128#14,<v01=reg128#11%bot,<y12=reg128#2%top | |
| 3304 +# asm 2: vmlal.u32 <r2=q13,<v01=d20,<y12=d3 | |
| 3305 +vmlal.u32 q13,d20,d3 | |
| 3306 + | |
| 3307 +# qhasm: r2[0,1] += v01[2] unsigned* y12[0]; r2[2,3] += v01[3] unsigned* y12[
1] | |
| 3308 +# asm 1: vmlal.u32 <r2=reg128#14,<v01=reg128#11%top,<y12=reg128#2%bot | |
| 3309 +# asm 2: vmlal.u32 <r2=q13,<v01=d21,<y12=d2 | |
| 3310 +vmlal.u32 q13,d21,d2 | |
| 3311 + | |
| 3312 +# qhasm: r2[0,1] += v23[0] unsigned* y0[0]; r2[2,3] += v23[1] unsigned* y0[1] | |
| 3313 +# asm 1: vmlal.u32 <r2=reg128#14,<v23=reg128#10%bot,<y0=reg128#1%bot | |
| 3314 +# asm 2: vmlal.u32 <r2=q13,<v23=d18,<y0=d0 | |
| 3315 +vmlal.u32 q13,d18,d0 | |
| 3316 + | |
| 3317 +# qhasm: r2[0,1] += v23[2] unsigned* 5y34[2]; r2[2,3] += v23[3] unsigned* 5y3
4[3] | |
| 3318 +# asm 1: vmlal.u32 <r2=reg128#14,<v23=reg128#10%top,<5y34=reg128#13%top | |
| 3319 +# asm 2: vmlal.u32 <r2=q13,<v23=d19,<5y34=d25 | |
| 3320 +vmlal.u32 q13,d19,d25 | |
| 3321 + | |
| 3322 +# qhasm: r2[0,1] += v4[0] unsigned* 5y34[0]; r2[2,3] += v4[1] unsigned* 5y34[
1] | |
| 3323 +# asm 1: vmlal.u32 <r2=reg128#14,<v4=reg128#4%bot,<5y34=reg128#13%bot | |
| 3324 +# asm 2: vmlal.u32 <r2=q13,<v4=d6,<5y34=d24 | |
| 3325 +vmlal.u32 q13,d6,d24 | |
| 3326 + | |
| 3327 +# qhasm: ptr = &two24 | |
| 3328 +# asm 1: lea >ptr=int32#3,<two24=stack128#1 | |
| 3329 +# asm 2: lea >ptr=r2,<two24=[sp,#0] | |
| 3330 +add r2,sp,#0 | |
| 3331 + | |
| 3332 +# qhasm: 2x t1 = r0 unsigned>> 26 | |
| 3333 +# asm 1: vshr.u64 >t1=reg128#4,<r0=reg128#8,#26 | |
| 3334 +# asm 2: vshr.u64 >t1=q3,<r0=q7,#26 | |
| 3335 +vshr.u64 q3,q7,#26 | |
| 3336 + | |
| 3337 +# qhasm: len -= 64 | |
| 3338 +# asm 1: sub >len=int32#4,<len=int32#4,#64 | |
| 3339 +# asm 2: sub >len=r3,<len=r3,#64 | |
| 3340 +sub r3,r3,#64 | |
| 3341 + | |
| 3342 +# qhasm: r0 &= mask | |
| 3343 +# asm 1: vand >r0=reg128#6,<r0=reg128#8,<mask=reg128#7 | |
| 3344 +# asm 2: vand >r0=q5,<r0=q7,<mask=q6 | |
| 3345 +vand q5,q7,q6 | |
| 3346 + | |
| 3347 +# qhasm: 2x r1 += t1 | |
| 3348 +# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#15,<t1=reg128#4 | |
| 3349 +# asm 2: vadd.i64 >r1=q3,<r1=q14,<t1=q3 | |
| 3350 +vadd.i64 q3,q14,q3 | |
| 3351 + | |
| 3352 +# qhasm: 2x t4 = r3 unsigned>> 26 | |
| 3353 +# asm 1: vshr.u64 >t4=reg128#8,<r3=reg128#5,#26 | |
| 3354 +# asm 2: vshr.u64 >t4=q7,<r3=q4,#26 | |
| 3355 +vshr.u64 q7,q4,#26 | |
| 3356 + | |
| 3357 +# qhasm: r3 &= mask | |
| 3358 +# asm 1: vand >r3=reg128#5,<r3=reg128#5,<mask=reg128#7 | |
| 3359 +# asm 2: vand >r3=q4,<r3=q4,<mask=q6 | |
| 3360 +vand q4,q4,q6 | |
| 3361 + | |
| 3362 +# qhasm: 2x x4 = r4 + t4 | |
| 3363 +# asm 1: vadd.i64 >x4=reg128#8,<r4=reg128#16,<t4=reg128#8 | |
| 3364 +# asm 2: vadd.i64 >x4=q7,<r4=q15,<t4=q7 | |
| 3365 +vadd.i64 q7,q15,q7 | |
| 3366 + | |
| 3367 +# qhasm: r4 aligned= mem128[ptr] | |
| 3368 +# asm 1: vld1.8 {>r4=reg128#16%bot->r4=reg128#16%top},[<ptr=int32#3,: 128] | |
| 3369 +# asm 2: vld1.8 {>r4=d30->r4=d31},[<ptr=r2,: 128] | |
| 3370 +vld1.8 {d30-d31},[r2,: 128] | |
| 3371 + | |
| 3372 +# qhasm: 2x t2 = r1 unsigned>> 26 | |
| 3373 +# asm 1: vshr.u64 >t2=reg128#9,<r1=reg128#4,#26 | |
| 3374 +# asm 2: vshr.u64 >t2=q8,<r1=q3,#26 | |
| 3375 +vshr.u64 q8,q3,#26 | |
| 3376 + | |
| 3377 +# qhasm: r1 &= mask | |
| 3378 +# asm 1: vand >r1=reg128#4,<r1=reg128#4,<mask=reg128#7 | |
| 3379 +# asm 2: vand >r1=q3,<r1=q3,<mask=q6 | |
| 3380 +vand q3,q3,q6 | |
| 3381 + | |
| 3382 +# qhasm: 2x t0 = x4 unsigned>> 26 | |
| 3383 +# asm 1: vshr.u64 >t0=reg128#10,<x4=reg128#8,#26 | |
| 3384 +# asm 2: vshr.u64 >t0=q9,<x4=q7,#26 | |
| 3385 +vshr.u64 q9,q7,#26 | |
| 3386 + | |
| 3387 +# qhasm: 2x r2 += t2 | |
| 3388 +# asm 1: vadd.i64 >r2=reg128#9,<r2=reg128#14,<t2=reg128#9 | |
| 3389 +# asm 2: vadd.i64 >r2=q8,<r2=q13,<t2=q8 | |
| 3390 +vadd.i64 q8,q13,q8 | |
| 3391 + | |
| 3392 +# qhasm: x4 &= mask | |
| 3393 +# asm 1: vand >x4=reg128#11,<x4=reg128#8,<mask=reg128#7 | |
| 3394 +# asm 2: vand >x4=q10,<x4=q7,<mask=q6 | |
| 3395 +vand q10,q7,q6 | |
| 3396 + | |
| 3397 +# qhasm: 2x x01 = r0 + t0 | |
| 3398 +# asm 1: vadd.i64 >x01=reg128#6,<r0=reg128#6,<t0=reg128#10 | |
| 3399 +# asm 2: vadd.i64 >x01=q5,<r0=q5,<t0=q9 | |
| 3400 +vadd.i64 q5,q5,q9 | |
| 3401 + | |
| 3402 +# qhasm: r0 aligned= mem128[ptr] | |
| 3403 +# asm 1: vld1.8 {>r0=reg128#8%bot->r0=reg128#8%top},[<ptr=int32#3,: 128] | |
| 3404 +# asm 2: vld1.8 {>r0=d14->r0=d15},[<ptr=r2,: 128] | |
| 3405 +vld1.8 {d14-d15},[r2,: 128] | |
| 3406 + | |
| 3407 +# qhasm: ptr = &z34_stack | |
| 3408 +# asm 1: lea >ptr=int32#3,<z34_stack=stack128#9 | |
| 3409 +# asm 2: lea >ptr=r2,<z34_stack=[sp,#128] | |
| 3410 +add r2,sp,#128 | |
| 3411 + | |
| 3412 +# qhasm: 2x t0 <<= 2 | |
| 3413 +# asm 1: vshl.i64 >t0=reg128#10,<t0=reg128#10,#2 | |
| 3414 +# asm 2: vshl.i64 >t0=q9,<t0=q9,#2 | |
| 3415 +vshl.i64 q9,q9,#2 | |
| 3416 + | |
| 3417 +# qhasm: 2x t3 = r2 unsigned>> 26 | |
| 3418 +# asm 1: vshr.u64 >t3=reg128#14,<r2=reg128#9,#26 | |
| 3419 +# asm 2: vshr.u64 >t3=q13,<r2=q8,#26 | |
| 3420 +vshr.u64 q13,q8,#26 | |
| 3421 + | |
| 3422 +# qhasm: 2x x01 += t0 | |
| 3423 +# asm 1: vadd.i64 >x01=reg128#15,<x01=reg128#6,<t0=reg128#10 | |
| 3424 +# asm 2: vadd.i64 >x01=q14,<x01=q5,<t0=q9 | |
| 3425 +vadd.i64 q14,q5,q9 | |
| 3426 + | |
| 3427 +# qhasm: z34 aligned= mem128[ptr] | |
| 3428 +# asm 1: vld1.8 {>z34=reg128#6%bot->z34=reg128#6%top},[<ptr=int32#3,: 128] | |
| 3429 +# asm 2: vld1.8 {>z34=d10->z34=d11},[<ptr=r2,: 128] | |
| 3430 +vld1.8 {d10-d11},[r2,: 128] | |
| 3431 + | |
| 3432 +# qhasm: x23 = r2 & mask | |
| 3433 +# asm 1: vand >x23=reg128#10,<r2=reg128#9,<mask=reg128#7 | |
| 3434 +# asm 2: vand >x23=q9,<r2=q8,<mask=q6 | |
| 3435 +vand q9,q8,q6 | |
| 3436 + | |
| 3437 +# qhasm: 2x r3 += t3 | |
| 3438 +# asm 1: vadd.i64 >r3=reg128#5,<r3=reg128#5,<t3=reg128#14 | |
| 3439 +# asm 2: vadd.i64 >r3=q4,<r3=q4,<t3=q13 | |
| 3440 +vadd.i64 q4,q4,q13 | |
| 3441 + | |
| 3442 +# qhasm: input_2
+= 32 | |
| 3443 +# asm 1: add >input_2=int32#2,<input_2=int32#2,#32 | |
| 3444 +# asm 2: add >input_2=r1,<input_2=r1,#32 | |
| 3445 +add r1,r1,#32 | |
| 3446 + | |
| 3447 +# qhasm: 2x t1 = x01 unsigned>> 26 | |
| 3448 +# asm 1: vshr.u64 >t1=reg128#14,<x01=reg128#15,#26 | |
| 3449 +# asm 2: vshr.u64 >t1=q13,<x01=q14,#26 | |
| 3450 +vshr.u64 q13,q14,#26 | |
| 3451 + | |
| 3452 +# qhasm: x23 = x23[0,2,1,3] | |
| 3453 +# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top | |
| 3454 +# asm 2: vtrn.32 <x23=d18,<x23=d19 | |
| 3455 +vtrn.32 d18,d19 | |
| 3456 + | |
| 3457 +# qhasm: x01 = x01 & mask | |
| 3458 +# asm 1: vand >x01=reg128#9,<x01=reg128#15,<mask=reg128#7 | |
| 3459 +# asm 2: vand >x01=q8,<x01=q14,<mask=q6 | |
| 3460 +vand q8,q14,q6 | |
| 3461 + | |
| 3462 +# qhasm: 2x r1 += t1 | |
| 3463 +# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#4,<t1=reg128#14 | |
| 3464 +# asm 2: vadd.i64 >r1=q3,<r1=q3,<t1=q13 | |
| 3465 +vadd.i64 q3,q3,q13 | |
| 3466 + | |
| 3467 +# qhasm: 2x t4 = r3 unsigned>> 26 | |
| 3468 +# asm 1: vshr.u64 >t4=reg128#14,<r3=reg128#5,#26 | |
| 3469 +# asm 2: vshr.u64 >t4=q13,<r3=q4,#26 | |
| 3470 +vshr.u64 q13,q4,#26 | |
| 3471 + | |
| 3472 +# qhasm: x01 = x01[0,2,1,3] | |
| 3473 +# asm 1: vtrn.32 <x01=reg128#9%bot,<x01=reg128#9%top | |
| 3474 +# asm 2: vtrn.32 <x01=d16,<x01=d17 | |
| 3475 +vtrn.32 d16,d17 | |
| 3476 + | |
| 3477 +# qhasm: r3 &= mask | |
| 3478 +# asm 1: vand >r3=reg128#5,<r3=reg128#5,<mask=reg128#7 | |
| 3479 +# asm 2: vand >r3=q4,<r3=q4,<mask=q6 | |
| 3480 +vand q4,q4,q6 | |
| 3481 + | |
| 3482 +# qhasm: r1 = r1[0,2,1,3] | |
| 3483 +# asm 1: vtrn.32 <r1=reg128#4%bot,<r1=reg128#4%top | |
| 3484 +# asm 2: vtrn.32 <r1=d6,<r1=d7 | |
| 3485 +vtrn.32 d6,d7 | |
| 3486 + | |
| 3487 +# qhasm: 2x x4 += t4 | |
| 3488 +# asm 1: vadd.i64 >x4=reg128#11,<x4=reg128#11,<t4=reg128#14 | |
| 3489 +# asm 2: vadd.i64 >x4=q10,<x4=q10,<t4=q13 | |
| 3490 +vadd.i64 q10,q10,q13 | |
| 3491 + | |
| 3492 +# qhasm: r3 = r3[0,2,1,3] | |
| 3493 +# asm 1: vtrn.32 <r3=reg128#5%bot,<r3=reg128#5%top | |
| 3494 +# asm 2: vtrn.32 <r3=d8,<r3=d9 | |
| 3495 +vtrn.32 d8,d9 | |
| 3496 + | |
| 3497 +# qhasm: x01 = x01[0,1] r1[0,1] | |
| 3498 +# asm 1: vext.32 <x01=reg128#9%top,<r1=reg128#4%bot,<r1=reg128#4%bot,#0 | |
| 3499 +# asm 2: vext.32 <x01=d17,<r1=d6,<r1=d6,#0 | |
| 3500 +vext.32 d17,d6,d6,#0 | |
| 3501 + | |
| 3502 +# qhasm: x23 = x23[0,1] r3[0,1] | |
| 3503 +# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#5%bot,<r3=reg128#5%bot,#0 | |
| 3504 +# asm 2: vext.32 <x23=d19,<r3=d8,<r3=d8,#0 | |
| 3505 +vext.32 d19,d8,d8,#0 | |
| 3506 + | |
| 3507 +# qhasm: x4 = x4[0,2,1,3] | |
| 3508 +# asm 1: vtrn.32 <x4=reg128#11%bot,<x4=reg128#11%top | |
| 3509 +# asm 2: vtrn.32 <x4=d20,<x4=d21 | |
| 3510 +vtrn.32 d20,d21 | |
| 3511 + | |
| 3512 +# qhasm: unsigned>? len - 64 | |
| 3513 +# asm 1: cmp <len=int32#4,#64 | |
| 3514 +# asm 2: cmp <len=r3,#64 | |
| 3515 +cmp r3,#64 | |
| 3516 + | |
| 3517 +# qhasm: goto mainloop2 if unsigned> | |
| 3518 +bhi ._mainloop2 | |
| 3519 + | |
| 3520 +# qhasm: input_2 -= 32 | |
| 3521 +# asm 1: sub >input_2=int32#3,<input_2=int32#2,#32 | |
| 3522 +# asm 2: sub >input_2=r2,<input_2=r1,#32 | |
| 3523 +sub r2,r1,#32 | |
| 3524 + | |
| 3525 +# qhasm: below64bytes: | |
| 3526 +._below64bytes: | |
| 3527 + | |
| 3528 +# qhasm: unsigned>? len - 32 | |
| 3529 +# asm 1: cmp <len=int32#4,#32 | |
| 3530 +# asm 2: cmp <len=r3,#32 | |
| 3531 +cmp r3,#32 | |
| 3532 + | |
| 3533 +# qhasm: goto end if !unsigned> | |
| 3534 +bls ._end | |
| 3535 + | |
| 3536 +# qhasm: mainloop: | |
| 3537 +._mainloop: | |
| 3538 + | |
| 3539 +# qhasm: new r0 | |
| 3540 + | |
| 3541 +# qhasm: ptr = &two24 | |
| 3542 +# asm 1: lea >ptr=int32#2,<two24=stack128#1 | |
| 3543 +# asm 2: lea >ptr=r1,<two24=[sp,#0] | |
| 3544 +add r1,sp,#0 | |
| 3545 + | |
| 3546 +# qhasm: r4 aligned= mem128[ptr] | |
| 3547 +# asm 1: vld1.8 {>r4=reg128#5%bot->r4=reg128#5%top},[<ptr=int32#2,: 128] | |
| 3548 +# asm 2: vld1.8 {>r4=d8->r4=d9},[<ptr=r1,: 128] | |
| 3549 +vld1.8 {d8-d9},[r1,: 128] | |
| 3550 + | |
| 3551 +# qhasm: u4 aligned= mem128[ptr] | |
| 3552 +# asm 1: vld1.8 {>u4=reg128#6%bot->u4=reg128#6%top},[<ptr=int32#2,: 128] | |
| 3553 +# asm 2: vld1.8 {>u4=d10->u4=d11},[<ptr=r1,: 128] | |
| 3554 +vld1.8 {d10-d11},[r1,: 128] | |
| 3555 + | |
| 3556 +# qhasm: c01 = mem128[input_2];input_2+=16 | |
| 3557 +# asm 1: vld1.8 {>c01=reg128#8%bot->c01=reg128#8%top},[<input_2=int32#3]! | |
| 3558 +# asm 2: vld1.8 {>c01=d14->c01=d15},[<input_2=r2]! | |
| 3559 +vld1.8 {d14-d15},[r2]! | |
| 3560 + | |
| 3561 +# qhasm: r4[0,1] += x01[0] unsigned* y34[2]; r4[2,3] += x01[1] unsigned* y34
[3] | |
| 3562 +# asm 1: vmlal.u32 <r4=reg128#5,<x01=reg128#9%bot,<y34=reg128#3%top | |
| 3563 +# asm 2: vmlal.u32 <r4=q4,<x01=d16,<y34=d5 | |
| 3564 +vmlal.u32 q4,d16,d5 | |
| 3565 + | |
| 3566 +# qhasm: c23 = mem128[input_2];input_2+=16 | |
| 3567 +# asm 1: vld1.8 {>c23=reg128#14%bot->c23=reg128#14%top},[<input_2=int32#3]! | |
| 3568 +# asm 2: vld1.8 {>c23=d26->c23=d27},[<input_2=r2]! | |
| 3569 +vld1.8 {d26-d27},[r2]! | |
| 3570 + | |
| 3571 +# qhasm: r4[0,1] += x01[2] unsigned* y34[0]; r4[2,3] += x01[3] unsigned* y34[1
] | |
| 3572 +# asm 1: vmlal.u32 <r4=reg128#5,<x01=reg128#9%top,<y34=reg128#3%bot | |
| 3573 +# asm 2: vmlal.u32 <r4=q4,<x01=d17,<y34=d4 | |
| 3574 +vmlal.u32 q4,d17,d4 | |
| 3575 + | |
| 3576 +# qhasm: r0 = u4[1]c01[0]r0[2,3] | |
| 3577 +# asm 1: vext.32 <r0=reg128#4%bot,<u4=reg128#6%bot,<c01=reg128#8%bot,#1 | |
| 3578 +# asm 2: vext.32 <r0=d6,<u4=d10,<c01=d14,#1 | |
| 3579 +vext.32 d6,d10,d14,#1 | |
| 3580 + | |
| 3581 +# qhasm: r4[0,1] += x23[0] unsigned* y12[2]; r4[2,3] += x23[1] unsigned* y12[3
] | |
| 3582 +# asm 1: vmlal.u32 <r4=reg128#5,<x23=reg128#10%bot,<y12=reg128#2%top | |
| 3583 +# asm 2: vmlal.u32 <r4=q4,<x23=d18,<y12=d3 | |
| 3584 +vmlal.u32 q4,d18,d3 | |
| 3585 + | |
| 3586 +# qhasm: r0 = r0[0,1]u4[1]c23[0] | |
| 3587 +# asm 1: vext.32 <r0=reg128#4%top,<u4=reg128#6%bot,<c23=reg128#14%bot,#1 | |
| 3588 +# asm 2: vext.32 <r0=d7,<u4=d10,<c23=d26,#1 | |
| 3589 +vext.32 d7,d10,d26,#1 | |
| 3590 + | |
| 3591 +# qhasm: r4[0,1] += x23[2] unsigned* y12[0]; r4[2,3] += x23[3] unsigned* y12[1
] | |
| 3592 +# asm 1: vmlal.u32 <r4=reg128#5,<x23=reg128#10%top,<y12=reg128#2%bot | |
| 3593 +# asm 2: vmlal.u32 <r4=q4,<x23=d19,<y12=d2 | |
| 3594 +vmlal.u32 q4,d19,d2 | |
| 3595 + | |
| 3596 +# qhasm: r0 = r0[1]r0[0]r0[3]r0[2] | |
| 3597 +# asm 1: vrev64.i32 >r0=reg128#4,<r0=reg128#4 | |
| 3598 +# asm 2: vrev64.i32 >r0=q3,<r0=q3 | |
| 3599 +vrev64.i32 q3,q3 | |
| 3600 + | |
| 3601 +# qhasm: r4[0,1] += x4[0] unsigned* y0[0]; r4[2,3] += x4[1] unsigned* y0[1] | |
| 3602 +# asm 1: vmlal.u32 <r4=reg128#5,<x4=reg128#11%bot,<y0=reg128#1%bot | |
| 3603 +# asm 2: vmlal.u32 <r4=q4,<x4=d20,<y0=d0 | |
| 3604 +vmlal.u32 q4,d20,d0 | |
| 3605 + | |
| 3606 +# qhasm: r0[0,1] += x4[0] unsigned* 5y12[0]; r0[2,3] += x4[1] unsigned* 5y12[
1] | |
| 3607 +# asm 1: vmlal.u32 <r0=reg128#4,<x4=reg128#11%bot,<5y12=reg128#12%bot | |
| 3608 +# asm 2: vmlal.u32 <r0=q3,<x4=d20,<5y12=d22 | |
| 3609 +vmlal.u32 q3,d20,d22 | |
| 3610 + | |
| 3611 +# qhasm: r0[0,1] += x23[0] unsigned* 5y34[0]; r0[2,3] += x23[1] unsigned* 5y34[
1] | |
| 3612 +# asm 1: vmlal.u32 <r0=reg128#4,<x23=reg128#10%bot,<5y34=reg128#13%bot | |
| 3613 +# asm 2: vmlal.u32 <r0=q3,<x23=d18,<5y34=d24 | |
| 3614 +vmlal.u32 q3,d18,d24 | |
| 3615 + | |
| 3616 +# qhasm: r0[0,1] += x23[2] unsigned* 5y12[2]; r0[2,3] += x23[3] unsigned* 5y12[
3] | |
| 3617 +# asm 1: vmlal.u32 <r0=reg128#4,<x23=reg128#10%top,<5y12=reg128#12%top | |
| 3618 +# asm 2: vmlal.u32 <r0=q3,<x23=d19,<5y12=d23 | |
| 3619 +vmlal.u32 q3,d19,d23 | |
| 3620 + | |
| 3621 +# qhasm: c01 c23 = c01[0]c23[0]c01[2]c23[2]c01[1]c23[1]c01[3]c23[3] | |
| 3622 +# asm 1: vtrn.32 <c01=reg128#8,<c23=reg128#14 | |
| 3623 +# asm 2: vtrn.32 <c01=q7,<c23=q13 | |
| 3624 +vtrn.32 q7,q13 | |
| 3625 + | |
| 3626 +# qhasm: r0[0,1] += x01[0] unsigned* y0[0]; r0[2,3] += x01[1] unsigned* y0[1] | |
| 3627 +# asm 1: vmlal.u32 <r0=reg128#4,<x01=reg128#9%bot,<y0=reg128#1%bot | |
| 3628 +# asm 2: vmlal.u32 <r0=q3,<x01=d16,<y0=d0 | |
| 3629 +vmlal.u32 q3,d16,d0 | |
| 3630 + | |
| 3631 +# qhasm: r3[0,1] = c23[2]<<18; r3[2,3] = c23[3]<<18 | |
| 3632 +# asm 1: vshll.u32 >r3=reg128#6,<c23=reg128#14%top,#18 | |
| 3633 +# asm 2: vshll.u32 >r3=q5,<c23=d27,#18 | |
| 3634 +vshll.u32 q5,d27,#18 | |
| 3635 + | |
| 3636 +# qhasm: r0[0,1] += x01[2] unsigned* 5y34[2]; r0[2,3] += x01[3] unsigned* 5y3
4[3] | |
| 3637 +# asm 1: vmlal.u32 <r0=reg128#4,<x01=reg128#9%top,<5y34=reg128#13%top | |
| 3638 +# asm 2: vmlal.u32 <r0=q3,<x01=d17,<5y34=d25 | |
| 3639 +vmlal.u32 q3,d17,d25 | |
| 3640 + | |
| 3641 +# qhasm: r3[0,1] += x01[0] unsigned* y34[0]; r3[2,3] += x01[1] unsigned* y34[
1] | |
| 3642 +# asm 1: vmlal.u32 <r3=reg128#6,<x01=reg128#9%bot,<y34=reg128#3%bot | |
| 3643 +# asm 2: vmlal.u32 <r3=q5,<x01=d16,<y34=d4 | |
| 3644 +vmlal.u32 q5,d16,d4 | |
| 3645 + | |
| 3646 +# qhasm: r3[0,1] += x01[2] unsigned* y12[2]; r3[2,3] += x01[3] unsigned* y12[
3] | |
| 3647 +# asm 1: vmlal.u32 <r3=reg128#6,<x01=reg128#9%top,<y12=reg128#2%top | |
| 3648 +# asm 2: vmlal.u32 <r3=q5,<x01=d17,<y12=d3 | |
| 3649 +vmlal.u32 q5,d17,d3 | |
| 3650 + | |
| 3651 +# qhasm: r3[0,1] += x23[0] unsigned* y12[0]; r3[2,3] += x23[1] unsigned* y12[
1] | |
| 3652 +# asm 1: vmlal.u32 <r3=reg128#6,<x23=reg128#10%bot,<y12=reg128#2%bot | |
| 3653 +# asm 2: vmlal.u32 <r3=q5,<x23=d18,<y12=d2 | |
| 3654 +vmlal.u32 q5,d18,d2 | |
| 3655 + | |
| 3656 +# qhasm: r3[0,1] += x23[2] unsigned* y0[0]; r3[2,3] += x23[3] unsigned* y0[1] | |
| 3657 +# asm 1: vmlal.u32 <r3=reg128#6,<x23=reg128#10%top,<y0=reg128#1%bot | |
| 3658 +# asm 2: vmlal.u32 <r3=q5,<x23=d19,<y0=d0 | |
| 3659 +vmlal.u32 q5,d19,d0 | |
| 3660 + | |
| 3661 +# qhasm: r1[0,1] = c23[0]<<6; r1[2,3] = c23[1]<<6 | |
| 3662 +# asm 1: vshll.u32 >r1=reg128#14,<c23=reg128#14%bot,#6 | |
| 3663 +# asm 2: vshll.u32 >r1=q13,<c23=d26,#6 | |
| 3664 +vshll.u32 q13,d26,#6 | |
| 3665 + | |
| 3666 +# qhasm: r3[0,1] += x4[0] unsigned* 5y34[2]; r3[2,3] += x4[1] unsigned* 5y3
4[3] | |
| 3667 +# asm 1: vmlal.u32 <r3=reg128#6,<x4=reg128#11%bot,<5y34=reg128#13%top | |
| 3668 +# asm 2: vmlal.u32 <r3=q5,<x4=d20,<5y34=d25 | |
| 3669 +vmlal.u32 q5,d20,d25 | |
| 3670 + | |
| 3671 +# qhasm: r1[0,1] += x01[0] unsigned* y12[0]; r1[2,3] += x01[1] unsigned* y12[
1] | |
| 3672 +# asm 1: vmlal.u32 <r1=reg128#14,<x01=reg128#9%bot,<y12=reg128#2%bot | |
| 3673 +# asm 2: vmlal.u32 <r1=q13,<x01=d16,<y12=d2 | |
| 3674 +vmlal.u32 q13,d16,d2 | |
| 3675 + | |
| 3676 +# qhasm: r1[0,1] += x01[2] unsigned* y0[0]; r1[2,3] += x01[3] unsigned* y0[1] | |
| 3677 +# asm 1: vmlal.u32 <r1=reg128#14,<x01=reg128#9%top,<y0=reg128#1%bot | |
| 3678 +# asm 2: vmlal.u32 <r1=q13,<x01=d17,<y0=d0 | |
| 3679 +vmlal.u32 q13,d17,d0 | |
| 3680 + | |
| 3681 +# qhasm: r1[0,1] += x23[0] unsigned* 5y34[2]; r1[2,3] += x23[1] unsigned* 5y3
4[3] | |
| 3682 +# asm 1: vmlal.u32 <r1=reg128#14,<x23=reg128#10%bot,<5y34=reg128#13%top | |
| 3683 +# asm 2: vmlal.u32 <r1=q13,<x23=d18,<5y34=d25 | |
| 3684 +vmlal.u32 q13,d18,d25 | |
| 3685 + | |
| 3686 +# qhasm: r1[0,1] += x23[2] unsigned* 5y34[0]; r1[2,3] += x23[3] unsigned* 5y34[
1] | |
| 3687 +# asm 1: vmlal.u32 <r1=reg128#14,<x23=reg128#10%top,<5y34=reg128#13%bot | |
| 3688 +# asm 2: vmlal.u32 <r1=q13,<x23=d19,<5y34=d24 | |
| 3689 +vmlal.u32 q13,d19,d24 | |
| 3690 + | |
| 3691 +# qhasm: r2[0,1] = c01[2]<<12; r2[2,3] = c01[3]<<12 | |
| 3692 +# asm 1: vshll.u32 >r2=reg128#8,<c01=reg128#8%top,#12 | |
| 3693 +# asm 2: vshll.u32 >r2=q7,<c01=d15,#12 | |
| 3694 +vshll.u32 q7,d15,#12 | |
| 3695 + | |
| 3696 +# qhasm: r1[0,1] += x4[0] unsigned* 5y12[2]; r1[2,3] += x4[1] unsigned* 5y12[
3] | |
| 3697 +# asm 1: vmlal.u32 <r1=reg128#14,<x4=reg128#11%bot,<5y12=reg128#12%top | |
| 3698 +# asm 2: vmlal.u32 <r1=q13,<x4=d20,<5y12=d23 | |
| 3699 +vmlal.u32 q13,d20,d23 | |
| 3700 + | |
| 3701 +# qhasm: r2[0,1] += x01[0] unsigned* y12[2]; r2[2,3] += x01[1] unsigned* y12[
3] | |
| 3702 +# asm 1: vmlal.u32 <r2=reg128#8,<x01=reg128#9%bot,<y12=reg128#2%top | |
| 3703 +# asm 2: vmlal.u32 <r2=q7,<x01=d16,<y12=d3 | |
| 3704 +vmlal.u32 q7,d16,d3 | |
| 3705 + | |
| 3706 +# qhasm: r2[0,1] += x01[2] unsigned* y12[0]; r2[2,3] += x01[3] unsigned* y12[
1] | |
| 3707 +# asm 1: vmlal.u32 <r2=reg128#8,<x01=reg128#9%top,<y12=reg128#2%bot | |
| 3708 +# asm 2: vmlal.u32 <r2=q7,<x01=d17,<y12=d2 | |
| 3709 +vmlal.u32 q7,d17,d2 | |
| 3710 + | |
| 3711 +# qhasm: r2[0,1] += x23[0] unsigned* y0[0]; r2[2,3] += x23[1] unsigned* y0[1] | |
| 3712 +# asm 1: vmlal.u32 <r2=reg128#8,<x23=reg128#10%bot,<y0=reg128#1%bot | |
| 3713 +# asm 2: vmlal.u32 <r2=q7,<x23=d18,<y0=d0 | |
| 3714 +vmlal.u32 q7,d18,d0 | |
| 3715 + | |
| 3716 +# qhasm: r2[0,1] += x23[2] unsigned* 5y34[2]; r2[2,3] += x23[3] unsigned* 5y3
4[3] | |
| 3717 +# asm 1: vmlal.u32 <r2=reg128#8,<x23=reg128#10%top,<5y34=reg128#13%top | |
| 3718 +# asm 2: vmlal.u32 <r2=q7,<x23=d19,<5y34=d25 | |
| 3719 +vmlal.u32 q7,d19,d25 | |
| 3720 + | |
| 3721 +# qhasm: r2[0,1] += x4[0] unsigned* 5y34[0]; r2[2,3] += x4[1] unsigned* 5y34[
1] | |
| 3722 +# asm 1: vmlal.u32 <r2=reg128#8,<x4=reg128#11%bot,<5y34=reg128#13%bot | |
| 3723 +# asm 2: vmlal.u32 <r2=q7,<x4=d20,<5y34=d24 | |
| 3724 +vmlal.u32 q7,d20,d24 | |
| 3725 + | |
| 3726 +# qhasm: 2x t1 = r0 unsigned>> 26 | |
| 3727 +# asm 1: vshr.u64 >t1=reg128#9,<r0=reg128#4,#26 | |
| 3728 +# asm 2: vshr.u64 >t1=q8,<r0=q3,#26 | |
| 3729 +vshr.u64 q8,q3,#26 | |
| 3730 + | |
| 3731 +# qhasm: r0 &= mask | |
| 3732 +# asm 1: vand >r0=reg128#4,<r0=reg128#4,<mask=reg128#7 | |
| 3733 +# asm 2: vand >r0=q3,<r0=q3,<mask=q6 | |
| 3734 +vand q3,q3,q6 | |
| 3735 + | |
| 3736 +# qhasm: 2x r1 += t1 | |
| 3737 +# asm 1: vadd.i64 >r1=reg128#9,<r1=reg128#14,<t1=reg128#9 | |
| 3738 +# asm 2: vadd.i64 >r1=q8,<r1=q13,<t1=q8 | |
| 3739 +vadd.i64 q8,q13,q8 | |
| 3740 + | |
| 3741 +# qhasm: 2x t4 = r3 unsigned>> 26 | |
| 3742 +# asm 1: vshr.u64 >t4=reg128#10,<r3=reg128#6,#26 | |
| 3743 +# asm 2: vshr.u64 >t4=q9,<r3=q5,#26 | |
| 3744 +vshr.u64 q9,q5,#26 | |
| 3745 + | |
| 3746 +# qhasm: r3 &= mask | |
| 3747 +# asm 1: vand >r3=reg128#6,<r3=reg128#6,<mask=reg128#7 | |
| 3748 +# asm 2: vand >r3=q5,<r3=q5,<mask=q6 | |
| 3749 +vand q5,q5,q6 | |
| 3750 + | |
| 3751 +# qhasm: 2x r4 += t4 | |
| 3752 +# asm 1: vadd.i64 >r4=reg128#5,<r4=reg128#5,<t4=reg128#10 | |
| 3753 +# asm 2: vadd.i64 >r4=q4,<r4=q4,<t4=q9 | |
| 3754 +vadd.i64 q4,q4,q9 | |
| 3755 + | |
| 3756 +# qhasm: 2x t2 = r1 unsigned>> 26 | |
| 3757 +# asm 1: vshr.u64 >t2=reg128#10,<r1=reg128#9,#26 | |
| 3758 +# asm 2: vshr.u64 >t2=q9,<r1=q8,#26 | |
| 3759 +vshr.u64 q9,q8,#26 | |
| 3760 + | |
| 3761 +# qhasm: r1 &= mask | |
| 3762 +# asm 1: vand >r1=reg128#11,<r1=reg128#9,<mask=reg128#7 | |
| 3763 +# asm 2: vand >r1=q10,<r1=q8,<mask=q6 | |
| 3764 +vand q10,q8,q6 | |
| 3765 + | |
| 3766 +# qhasm: 2x t0 = r4 unsigned>> 26 | |
| 3767 +# asm 1: vshr.u64 >t0=reg128#9,<r4=reg128#5,#26 | |
| 3768 +# asm 2: vshr.u64 >t0=q8,<r4=q4,#26 | |
| 3769 +vshr.u64 q8,q4,#26 | |
| 3770 + | |
| 3771 +# qhasm: 2x r2 += t2 | |
| 3772 +# asm 1: vadd.i64 >r2=reg128#8,<r2=reg128#8,<t2=reg128#10 | |
| 3773 +# asm 2: vadd.i64 >r2=q7,<r2=q7,<t2=q9 | |
| 3774 +vadd.i64 q7,q7,q9 | |
| 3775 + | |
| 3776 +# qhasm: r4 &= mask | |
| 3777 +# asm 1: vand >r4=reg128#5,<r4=reg128#5,<mask=reg128#7 | |
| 3778 +# asm 2: vand >r4=q4,<r4=q4,<mask=q6 | |
| 3779 +vand q4,q4,q6 | |
| 3780 + | |
| 3781 +# qhasm: 2x r0 += t0 | |
| 3782 +# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#9 | |
| 3783 +# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q8 | |
| 3784 +vadd.i64 q3,q3,q8 | |
| 3785 + | |
| 3786 +# qhasm: 2x t0 <<= 2 | |
| 3787 +# asm 1: vshl.i64 >t0=reg128#9,<t0=reg128#9,#2 | |
| 3788 +# asm 2: vshl.i64 >t0=q8,<t0=q8,#2 | |
| 3789 +vshl.i64 q8,q8,#2 | |
| 3790 + | |
| 3791 +# qhasm: 2x t3 = r2 unsigned>> 26 | |
| 3792 +# asm 1: vshr.u64 >t3=reg128#14,<r2=reg128#8,#26 | |
| 3793 +# asm 2: vshr.u64 >t3=q13,<r2=q7,#26 | |
| 3794 +vshr.u64 q13,q7,#26 | |
| 3795 + | |
| 3796 +# qhasm: 2x r0 += t0 | |
| 3797 +# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#9 | |
| 3798 +# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q8 | |
| 3799 +vadd.i64 q3,q3,q8 | |
| 3800 + | |
| 3801 +# qhasm: x23 = r2 & mask | |
| 3802 +# asm 1: vand >x23=reg128#10,<r2=reg128#8,<mask=reg128#7 | |
| 3803 +# asm 2: vand >x23=q9,<r2=q7,<mask=q6 | |
| 3804 +vand q9,q7,q6 | |
| 3805 + | |
| 3806 +# qhasm: 2x r3 += t3 | |
| 3807 +# asm 1: vadd.i64 >r3=reg128#6,<r3=reg128#6,<t3=reg128#14 | |
| 3808 +# asm 2: vadd.i64 >r3=q5,<r3=q5,<t3=q13 | |
| 3809 +vadd.i64 q5,q5,q13 | |
| 3810 + | |
| 3811 +# qhasm: 2x t1 = r0 unsigned>> 26 | |
| 3812 +# asm 1: vshr.u64 >t1=reg128#8,<r0=reg128#4,#26 | |
| 3813 +# asm 2: vshr.u64 >t1=q7,<r0=q3,#26 | |
| 3814 +vshr.u64 q7,q3,#26 | |
| 3815 + | |
| 3816 +# qhasm: x01 = r0 & mask | |
| 3817 +# asm 1: vand >x01=reg128#9,<r0=reg128#4,<mask=reg128#7 | |
| 3818 +# asm 2: vand >x01=q8,<r0=q3,<mask=q6 | |
| 3819 +vand q8,q3,q6 | |
| 3820 + | |
| 3821 +# qhasm: 2x r1 += t1 | |
| 3822 +# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#11,<t1=reg128#8 | |
| 3823 +# asm 2: vadd.i64 >r1=q3,<r1=q10,<t1=q7 | |
| 3824 +vadd.i64 q3,q10,q7 | |
| 3825 + | |
| 3826 +# qhasm: 2x t4 = r3 unsigned>> 26 | |
| 3827 +# asm 1: vshr.u64 >t4=reg128#8,<r3=reg128#6,#26 | |
| 3828 +# asm 2: vshr.u64 >t4=q7,<r3=q5,#26 | |
| 3829 +vshr.u64 q7,q5,#26 | |
| 3830 + | |
| 3831 +# qhasm: r3 &= mask | |
| 3832 +# asm 1: vand >r3=reg128#6,<r3=reg128#6,<mask=reg128#7 | |
| 3833 +# asm 2: vand >r3=q5,<r3=q5,<mask=q6 | |
| 3834 +vand q5,q5,q6 | |
| 3835 + | |
| 3836 +# qhasm: 2x x4 = r4 + t4 | |
| 3837 +# asm 1: vadd.i64 >x4=reg128#11,<r4=reg128#5,<t4=reg128#8 | |
| 3838 +# asm 2: vadd.i64 >x4=q10,<r4=q4,<t4=q7 | |
| 3839 +vadd.i64 q10,q4,q7 | |
| 3840 + | |
| 3841 +# qhasm: len -= 32 | |
| 3842 +# asm 1: sub >len=int32#4,<len=int32#4,#32 | |
| 3843 +# asm 2: sub >len=r3,<len=r3,#32 | |
| 3844 +sub r3,r3,#32 | |
| 3845 + | |
| 3846 +# qhasm: x01 = x01[0,2,1,3] | |
| 3847 +# asm 1: vtrn.32 <x01=reg128#9%bot,<x01=reg128#9%top | |
| 3848 +# asm 2: vtrn.32 <x01=d16,<x01=d17 | |
| 3849 +vtrn.32 d16,d17 | |
| 3850 + | |
| 3851 +# qhasm: x23 = x23[0,2,1,3] | |
| 3852 +# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top | |
| 3853 +# asm 2: vtrn.32 <x23=d18,<x23=d19 | |
| 3854 +vtrn.32 d18,d19 | |
| 3855 + | |
| 3856 +# qhasm: r1 = r1[0,2,1,3] | |
| 3857 +# asm 1: vtrn.32 <r1=reg128#4%bot,<r1=reg128#4%top | |
| 3858 +# asm 2: vtrn.32 <r1=d6,<r1=d7 | |
| 3859 +vtrn.32 d6,d7 | |
| 3860 + | |
| 3861 +# qhasm: r3 = r3[0,2,1,3] | |
| 3862 +# asm 1: vtrn.32 <r3=reg128#6%bot,<r3=reg128#6%top | |
| 3863 +# asm 2: vtrn.32 <r3=d10,<r3=d11 | |
| 3864 +vtrn.32 d10,d11 | |
| 3865 + | |
| 3866 +# qhasm: x4 = x4[0,2,1,3] | |
| 3867 +# asm 1: vtrn.32 <x4=reg128#11%bot,<x4=reg128#11%top | |
| 3868 +# asm 2: vtrn.32 <x4=d20,<x4=d21 | |
| 3869 +vtrn.32 d20,d21 | |
| 3870 + | |
| 3871 +# qhasm: x01 = x01[0,1] r1[0,1] | |
| 3872 +# asm 1: vext.32 <x01=reg128#9%top,<r1=reg128#4%bot,<r1=reg128#4%bot,#0 | |
| 3873 +# asm 2: vext.32 <x01=d17,<r1=d6,<r1=d6,#0 | |
| 3874 +vext.32 d17,d6,d6,#0 | |
| 3875 + | |
| 3876 +# qhasm: x23 = x23[0,1] r3[0,1] | |
| 3877 +# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#6%bot,<r3=reg128#6%bot,#0 | |
| 3878 +# asm 2: vext.32 <x23=d19,<r3=d10,<r3=d10,#0 | |
| 3879 +vext.32 d19,d10,d10,#0 | |
| 3880 + | |
| 3881 +# qhasm: unsigned>? len - 32 | |
| 3882 +# asm 1: cmp <len=int32#4,#32 | |
| 3883 +# asm 2: cmp <len=r3,#32 | |
| 3884 +cmp r3,#32 | |
| 3885 + | |
| 3886 +# qhasm: goto mainloop if unsigned> | |
| 3887 +bhi ._mainloop | |
| 3888 + | |
| 3889 +# qhasm: end: | |
| 3890 +._end: | |
| 3891 + | |
| 3892 +# qhasm: mem128[input_0] = x01;input_0+=16 | |
| 3893 +# asm 1: vst1.8 {<x01=reg128#9%bot-<x01=reg128#9%top},[<input_0=int32#1]! | |
| 3894 +# asm 2: vst1.8 {<x01=d16-<x01=d17},[<input_0=r0]! | |
| 3895 +vst1.8 {d16-d17},[r0]! | |
| 3896 + | |
| 3897 +# qhasm: mem128[input_0] = x23;input_0+=16 | |
| 3898 +# asm 1: vst1.8 {<x23=reg128#10%bot-<x23=reg128#10%top},[<input_0=int32#1]! | |
| 3899 +# asm 2: vst1.8 {<x23=d18-<x23=d19},[<input_0=r0]! | |
| 3900 +vst1.8 {d18-d19},[r0]! | |
| 3901 + | |
| 3902 +# qhasm: mem64[input_0] = x4[0] | |
| 3903 +# asm 1: vst1.8 <x4=reg128#11%bot,[<input_0=int32#1] | |
| 3904 +# asm 2: vst1.8 <x4=d20,[<input_0=r0] | |
| 3905 +vst1.8 d20,[r0] | |
| 3906 + | |
| 3907 +# qhasm: len = len | |
| 3908 +# asm 1: mov >len=int32#1,<len=int32#4 | |
| 3909 +# asm 2: mov >len=r0,<len=r3 | |
| 3910 +mov r0,r3 | |
| 3911 + | |
| 3912 +# qhasm: qpopreturn len | |
| 3913 +mov sp,r12 | |
| 3914 +vpop {q4,q5,q6,q7} | |
| 3915 +bx lr | |
| 3916 + | |
| 3917 +# qhasm: int32 input_0 | |
| 3918 + | |
| 3919 +# qhasm: int32 input_1 | |
| 3920 + | |
| 3921 +# qhasm: int32 input_2 | |
| 3922 + | |
| 3923 +# qhasm: int32 input_3 | |
| 3924 + | |
| 3925 +# qhasm: stack32 input_4 | |
| 3926 + | |
| 3927 +# qhasm: stack32 input_5 | |
| 3928 + | |
| 3929 +# qhasm: stack32 input_6 | |
| 3930 + | |
| 3931 +# qhasm: stack32 input_7 | |
| 3932 + | |
| 3933 +# qhasm: int32 caller_r4 | |
| 3934 + | |
| 3935 +# qhasm: int32 caller_r5 | |
| 3936 + | |
| 3937 +# qhasm: int32 caller_r6 | |
| 3938 + | |
| 3939 +# qhasm: int32 caller_r7 | |
| 3940 + | |
| 3941 +# qhasm: int32 caller_r8 | |
| 3942 + | |
| 3943 +# qhasm: int32 caller_r9 | |
| 3944 + | |
| 3945 +# qhasm: int32 caller_r10 | |
| 3946 + | |
| 3947 +# qhasm: int32 caller_r11 | |
| 3948 + | |
| 3949 +# qhasm: int32 caller_r12 | |
| 3950 + | |
| 3951 +# qhasm: int32 caller_r14 | |
| 3952 + | |
| 3953 +# qhasm: reg128 caller_q4 | |
| 3954 + | |
| 3955 +# qhasm: reg128 caller_q5 | |
| 3956 + | |
| 3957 +# qhasm: reg128 caller_q6 | |
| 3958 + | |
| 3959 +# qhasm: reg128 caller_q7 | |
| 3960 + | |
| 3961 +# qhasm: reg128 r0 | |
| 3962 + | |
| 3963 +# qhasm: reg128 r1 | |
| 3964 + | |
| 3965 +# qhasm: reg128 r2 | |
| 3966 + | |
| 3967 +# qhasm: reg128 r3 | |
| 3968 + | |
| 3969 +# qhasm: reg128 r4 | |
| 3970 + | |
| 3971 +# qhasm: reg128 x01 | |
| 3972 + | |
| 3973 +# qhasm: reg128 x23 | |
| 3974 + | |
| 3975 +# qhasm: reg128 x4 | |
| 3976 + | |
| 3977 +# qhasm: reg128 y01 | |
| 3978 + | |
| 3979 +# qhasm: reg128 y23 | |
| 3980 + | |
| 3981 +# qhasm: reg128 y4 | |
| 3982 + | |
| 3983 +# qhasm: reg128 _5y01 | |
| 3984 + | |
| 3985 +# qhasm: reg128 _5y23 | |
| 3986 + | |
| 3987 +# qhasm: reg128 _5y4 | |
| 3988 + | |
| 3989 +# qhasm: reg128 c01 | |
| 3990 + | |
| 3991 +# qhasm: reg128 c23 | |
| 3992 + | |
| 3993 +# qhasm: reg128 c4 | |
| 3994 + | |
| 3995 +# qhasm: reg128 t0 | |
| 3996 + | |
| 3997 +# qhasm: reg128 t1 | |
| 3998 + | |
| 3999 +# qhasm: reg128 t2 | |
| 4000 + | |
| 4001 +# qhasm: reg128 t3 | |
| 4002 + | |
| 4003 +# qhasm: reg128 t4 | |
| 4004 + | |
| 4005 +# qhasm: reg128 mask | |
| 4006 + | |
| 4007 +# qhasm: enter crypto_onetimeauth_poly1305_neon2_addmulmod | |
| 4008 +.align 2 | |
| 4009 +.global openssl_poly1305_neon2_addmulmod | |
| 4010 +.type openssl_poly1305_neon2_addmulmod STT_FUNC | |
| 4011 +openssl_poly1305_neon2_addmulmod: | |
| 4012 +sub sp,sp,#0 | |
| 4013 + | |
| 4014 +# qhasm: 2x mask = 0xffffffff | |
| 4015 +# asm 1: vmov.i64 >mask=reg128#1,#0xffffffff | |
| 4016 +# asm 2: vmov.i64 >mask=q0,#0xffffffff | |
| 4017 +vmov.i64 q0,#0xffffffff | |
| 4018 + | |
| 4019 +# qhasm: y01 aligned= mem128[input_2];input_2+=16 | |
| 4020 +# asm 1: vld1.8 {>y01=reg128#2%bot->y01=reg128#2%top},[<input_2=int32#3,: 128]! | |
| 4021 +# asm 2: vld1.8 {>y01=d2->y01=d3},[<input_2=r2,: 128]! | |
| 4022 +vld1.8 {d2-d3},[r2,: 128]! | |
| 4023 + | |
| 4024 +# qhasm: 4x _5y01 = y01 << 2 | |
| 4025 +# asm 1: vshl.i32 >_5y01=reg128#3,<y01=reg128#2,#2 | |
| 4026 +# asm 2: vshl.i32 >_5y01=q2,<y01=q1,#2 | |
| 4027 +vshl.i32 q2,q1,#2 | |
| 4028 + | |
| 4029 +# qhasm: y23 aligned= mem128[input_2];input_2+=16 | |
| 4030 +# asm 1: vld1.8 {>y23=reg128#4%bot->y23=reg128#4%top},[<input_2=int32#3,: 128]! | |
| 4031 +# asm 2: vld1.8 {>y23=d6->y23=d7},[<input_2=r2,: 128]! | |
| 4032 +vld1.8 {d6-d7},[r2,: 128]! | |
| 4033 + | |
| 4034 +# qhasm: 4x _5y23 = y23 << 2 | |
| 4035 +# asm 1: vshl.i32 >_5y23=reg128#9,<y23=reg128#4,#2 | |
| 4036 +# asm 2: vshl.i32 >_5y23=q8,<y23=q3,#2 | |
| 4037 +vshl.i32 q8,q3,#2 | |
| 4038 + | |
| 4039 +# qhasm: y4 aligned= mem64[input_2]y4[1] | |
| 4040 +# asm 1: vld1.8 {<y4=reg128#10%bot},[<input_2=int32#3,: 64] | |
| 4041 +# asm 2: vld1.8 {<y4=d18},[<input_2=r2,: 64] | |
| 4042 +vld1.8 {d18},[r2,: 64] | |
| 4043 + | |
| 4044 +# qhasm: 4x _5y4 = y4 << 2 | |
| 4045 +# asm 1: vshl.i32 >_5y4=reg128#11,<y4=reg128#10,#2 | |
| 4046 +# asm 2: vshl.i32 >_5y4=q10,<y4=q9,#2 | |
| 4047 +vshl.i32 q10,q9,#2 | |
| 4048 + | |
| 4049 +# qhasm: x01 aligned= mem128[input_1];input_1+=16 | |
| 4050 +# asm 1: vld1.8 {>x01=reg128#12%bot->x01=reg128#12%top},[<input_1=int32#2,: 128
]! | |
| 4051 +# asm 2: vld1.8 {>x01=d22->x01=d23},[<input_1=r1,: 128]! | |
| 4052 +vld1.8 {d22-d23},[r1,: 128]! | |
| 4053 + | |
| 4054 +# qhasm: 4x _5y01 += y01 | |
| 4055 +# asm 1: vadd.i32 >_5y01=reg128#3,<_5y01=reg128#3,<y01=reg128#2 | |
| 4056 +# asm 2: vadd.i32 >_5y01=q2,<_5y01=q2,<y01=q1 | |
| 4057 +vadd.i32 q2,q2,q1 | |
| 4058 + | |
| 4059 +# qhasm: x23 aligned= mem128[input_1];input_1+=16 | |
| 4060 +# asm 1: vld1.8 {>x23=reg128#13%bot->x23=reg128#13%top},[<input_1=int32#2,: 128
]! | |
| 4061 +# asm 2: vld1.8 {>x23=d24->x23=d25},[<input_1=r1,: 128]! | |
| 4062 +vld1.8 {d24-d25},[r1,: 128]! | |
| 4063 + | |
| 4064 +# qhasm: 4x _5y23 += y23 | |
| 4065 +# asm 1: vadd.i32 >_5y23=reg128#9,<_5y23=reg128#9,<y23=reg128#4 | |
| 4066 +# asm 2: vadd.i32 >_5y23=q8,<_5y23=q8,<y23=q3 | |
| 4067 +vadd.i32 q8,q8,q3 | |
| 4068 + | |
| 4069 +# qhasm: 4x _5y4 += y4 | |
| 4070 +# asm 1: vadd.i32 >_5y4=reg128#11,<_5y4=reg128#11,<y4=reg128#10 | |
| 4071 +# asm 2: vadd.i32 >_5y4=q10,<_5y4=q10,<y4=q9 | |
| 4072 +vadd.i32 q10,q10,q9 | |
| 4073 + | |
| 4074 +# qhasm: c01 aligned= mem128[input_3];input_3+=16 | |
| 4075 +# asm 1: vld1.8 {>c01=reg128#14%bot->c01=reg128#14%top},[<input_3=int32#4,: 128
]! | |
| 4076 +# asm 2: vld1.8 {>c01=d26->c01=d27},[<input_3=r3,: 128]! | |
| 4077 +vld1.8 {d26-d27},[r3,: 128]! | |
| 4078 + | |
| 4079 +# qhasm: 4x x01 += c01 | |
| 4080 +# asm 1: vadd.i32 >x01=reg128#12,<x01=reg128#12,<c01=reg128#14 | |
| 4081 +# asm 2: vadd.i32 >x01=q11,<x01=q11,<c01=q13 | |
| 4082 +vadd.i32 q11,q11,q13 | |
| 4083 + | |
| 4084 +# qhasm: c23 aligned= mem128[input_3];input_3+=16 | |
| 4085 +# asm 1: vld1.8 {>c23=reg128#14%bot->c23=reg128#14%top},[<input_3=int32#4,: 128
]! | |
| 4086 +# asm 2: vld1.8 {>c23=d26->c23=d27},[<input_3=r3,: 128]! | |
| 4087 +vld1.8 {d26-d27},[r3,: 128]! | |
| 4088 + | |
| 4089 +# qhasm: 4x x23 += c23 | |
| 4090 +# asm 1: vadd.i32 >x23=reg128#13,<x23=reg128#13,<c23=reg128#14 | |
| 4091 +# asm 2: vadd.i32 >x23=q12,<x23=q12,<c23=q13 | |
| 4092 +vadd.i32 q12,q12,q13 | |
| 4093 + | |
| 4094 +# qhasm: x4 aligned= mem64[input_1]x4[1] | |
| 4095 +# asm 1: vld1.8 {<x4=reg128#14%bot},[<input_1=int32#2,: 64] | |
| 4096 +# asm 2: vld1.8 {<x4=d26},[<input_1=r1,: 64] | |
| 4097 +vld1.8 {d26},[r1,: 64] | |
| 4098 + | |
| 4099 +# qhasm: 2x mask unsigned>>=6 | |
| 4100 +# asm 1: vshr.u64 >mask=reg128#1,<mask=reg128#1,#6 | |
| 4101 +# asm 2: vshr.u64 >mask=q0,<mask=q0,#6 | |
| 4102 +vshr.u64 q0,q0,#6 | |
| 4103 + | |
| 4104 +# qhasm: c4 aligned= mem64[input_3]c4[1] | |
| 4105 +# asm 1: vld1.8 {<c4=reg128#15%bot},[<input_3=int32#4,: 64] | |
| 4106 +# asm 2: vld1.8 {<c4=d28},[<input_3=r3,: 64] | |
| 4107 +vld1.8 {d28},[r3,: 64] | |
| 4108 + | |
| 4109 +# qhasm: 4x x4 += c4 | |
| 4110 +# asm 1: vadd.i32 >x4=reg128#14,<x4=reg128#14,<c4=reg128#15 | |
| 4111 +# asm 2: vadd.i32 >x4=q13,<x4=q13,<c4=q14 | |
| 4112 +vadd.i32 q13,q13,q14 | |
| 4113 + | |
| 4114 +# qhasm: r0[0,1] = x01[0] unsigned* y01[0]; r0[2,3] = x01[1] unsigned* y01[
1] | |
| 4115 +# asm 1: vmull.u32 >r0=reg128#15,<x01=reg128#12%bot,<y01=reg128#2%bot | |
| 4116 +# asm 2: vmull.u32 >r0=q14,<x01=d22,<y01=d2 | |
| 4117 +vmull.u32 q14,d22,d2 | |
| 4118 + | |
| 4119 +# qhasm: r0[0,1] += x01[2] unsigned* _5y4[0]; r0[2,3] += x01[3] unsigned* _5y
4[1] | |
| 4120 +# asm 1: vmlal.u32 <r0=reg128#15,<x01=reg128#12%top,<_5y4=reg128#11%bot | |
| 4121 +# asm 2: vmlal.u32 <r0=q14,<x01=d23,<_5y4=d20 | |
| 4122 +vmlal.u32 q14,d23,d20 | |
| 4123 + | |
| 4124 +# qhasm: r0[0,1] += x23[0] unsigned* _5y23[2]; r0[2,3] += x23[1] unsigned* _5y2
3[3] | |
| 4125 +# asm 1: vmlal.u32 <r0=reg128#15,<x23=reg128#13%bot,<_5y23=reg128#9%top | |
| 4126 +# asm 2: vmlal.u32 <r0=q14,<x23=d24,<_5y23=d17 | |
| 4127 +vmlal.u32 q14,d24,d17 | |
| 4128 + | |
| 4129 +# qhasm: r0[0,1] += x23[2] unsigned* _5y23[0]; r0[2,3] += x23[3] unsigned* _5y2
3[1] | |
| 4130 +# asm 1: vmlal.u32 <r0=reg128#15,<x23=reg128#13%top,<_5y23=reg128#9%bot | |
| 4131 +# asm 2: vmlal.u32 <r0=q14,<x23=d25,<_5y23=d16 | |
| 4132 +vmlal.u32 q14,d25,d16 | |
| 4133 + | |
| 4134 +# qhasm: r0[0,1] += x4[0] unsigned* _5y01[2]; r0[2,3] += x4[1] unsigned* _5y0
1[3] | |
| 4135 +# asm 1: vmlal.u32 <r0=reg128#15,<x4=reg128#14%bot,<_5y01=reg128#3%top | |
| 4136 +# asm 2: vmlal.u32 <r0=q14,<x4=d26,<_5y01=d5 | |
| 4137 +vmlal.u32 q14,d26,d5 | |
| 4138 + | |
| 4139 +# qhasm: r1[0,1] = x01[0] unsigned* y01[2]; r1[2,3] = x01[1] unsigned* y01[
3] | |
| 4140 +# asm 1: vmull.u32 >r1=reg128#3,<x01=reg128#12%bot,<y01=reg128#2%top | |
| 4141 +# asm 2: vmull.u32 >r1=q2,<x01=d22,<y01=d3 | |
| 4142 +vmull.u32 q2,d22,d3 | |
| 4143 + | |
| 4144 +# qhasm: r1[0,1] += x01[2] unsigned* y01[0]; r1[2,3] += x01[3] unsigned* y01[
1] | |
| 4145 +# asm 1: vmlal.u32 <r1=reg128#3,<x01=reg128#12%top,<y01=reg128#2%bot | |
| 4146 +# asm 2: vmlal.u32 <r1=q2,<x01=d23,<y01=d2 | |
| 4147 +vmlal.u32 q2,d23,d2 | |
| 4148 + | |
| 4149 +# qhasm: r1[0,1] += x23[0] unsigned* _5y4[0]; r1[2,3] += x23[1] unsigned* _5y
4[1] | |
| 4150 +# asm 1: vmlal.u32 <r1=reg128#3,<x23=reg128#13%bot,<_5y4=reg128#11%bot | |
| 4151 +# asm 2: vmlal.u32 <r1=q2,<x23=d24,<_5y4=d20 | |
| 4152 +vmlal.u32 q2,d24,d20 | |
| 4153 + | |
| 4154 +# qhasm: r1[0,1] += x23[2] unsigned* _5y23[2]; r1[2,3] += x23[3] unsigned* _5y2
3[3] | |
| 4155 +# asm 1: vmlal.u32 <r1=reg128#3,<x23=reg128#13%top,<_5y23=reg128#9%top | |
| 4156 +# asm 2: vmlal.u32 <r1=q2,<x23=d25,<_5y23=d17 | |
| 4157 +vmlal.u32 q2,d25,d17 | |
| 4158 + | |
| 4159 +# qhasm: r1[0,1] += x4[0] unsigned* _5y23[0]; r1[2,3] += x4[1] unsigned* _5y2
3[1] | |
| 4160 +# asm 1: vmlal.u32 <r1=reg128#3,<x4=reg128#14%bot,<_5y23=reg128#9%bot | |
| 4161 +# asm 2: vmlal.u32 <r1=q2,<x4=d26,<_5y23=d16 | |
| 4162 +vmlal.u32 q2,d26,d16 | |
| 4163 + | |
| 4164 +# qhasm: r2[0,1] = x01[0] unsigned* y23[0]; r2[2,3] = x01[1] unsigned* y23[
1] | |
| 4165 +# asm 1: vmull.u32 >r2=reg128#16,<x01=reg128#12%bot,<y23=reg128#4%bot | |
| 4166 +# asm 2: vmull.u32 >r2=q15,<x01=d22,<y23=d6 | |
| 4167 +vmull.u32 q15,d22,d6 | |
| 4168 + | |
| 4169 +# qhasm: r2[0,1] += x01[2] unsigned* y01[2]; r2[2,3] += x01[3] unsigned* y01[
3] | |
| 4170 +# asm 1: vmlal.u32 <r2=reg128#16,<x01=reg128#12%top,<y01=reg128#2%top | |
| 4171 +# asm 2: vmlal.u32 <r2=q15,<x01=d23,<y01=d3 | |
| 4172 +vmlal.u32 q15,d23,d3 | |
| 4173 + | |
| 4174 +# qhasm: r2[0,1] += x23[0] unsigned* y01[0]; r2[2,3] += x23[1] unsigned* y01[
1] | |
| 4175 +# asm 1: vmlal.u32 <r2=reg128#16,<x23=reg128#13%bot,<y01=reg128#2%bot | |
| 4176 +# asm 2: vmlal.u32 <r2=q15,<x23=d24,<y01=d2 | |
| 4177 +vmlal.u32 q15,d24,d2 | |
| 4178 + | |
| 4179 +# qhasm: r2[0,1] += x23[2] unsigned* _5y4[0]; r2[2,3] += x23[3] unsigned* _5y
4[1] | |
| 4180 +# asm 1: vmlal.u32 <r2=reg128#16,<x23=reg128#13%top,<_5y4=reg128#11%bot | |
| 4181 +# asm 2: vmlal.u32 <r2=q15,<x23=d25,<_5y4=d20 | |
| 4182 +vmlal.u32 q15,d25,d20 | |
| 4183 + | |
| 4184 +# qhasm: r2[0,1] += x4[0] unsigned* _5y23[2]; r2[2,3] += x4[1] unsigned* _5y2
3[3] | |
| 4185 +# asm 1: vmlal.u32 <r2=reg128#16,<x4=reg128#14%bot,<_5y23=reg128#9%top | |
| 4186 +# asm 2: vmlal.u32 <r2=q15,<x4=d26,<_5y23=d17 | |
| 4187 +vmlal.u32 q15,d26,d17 | |
| 4188 + | |
| 4189 +# qhasm: r3[0,1] = x01[0] unsigned* y23[2]; r3[2,3] = x01[1] unsigned* y23[
3] | |
| 4190 +# asm 1: vmull.u32 >r3=reg128#9,<x01=reg128#12%bot,<y23=reg128#4%top | |
| 4191 +# asm 2: vmull.u32 >r3=q8,<x01=d22,<y23=d7 | |
| 4192 +vmull.u32 q8,d22,d7 | |
| 4193 + | |
| 4194 +# qhasm: r3[0,1] += x01[2] unsigned* y23[0]; r3[2,3] += x01[3] unsigned* y23[
1] | |
| 4195 +# asm 1: vmlal.u32 <r3=reg128#9,<x01=reg128#12%top,<y23=reg128#4%bot | |
| 4196 +# asm 2: vmlal.u32 <r3=q8,<x01=d23,<y23=d6 | |
| 4197 +vmlal.u32 q8,d23,d6 | |
| 4198 + | |
| 4199 +# qhasm: r3[0,1] += x23[0] unsigned* y01[2]; r3[2,3] += x23[1] unsigned* y01[
3] | |
| 4200 +# asm 1: vmlal.u32 <r3=reg128#9,<x23=reg128#13%bot,<y01=reg128#2%top | |
| 4201 +# asm 2: vmlal.u32 <r3=q8,<x23=d24,<y01=d3 | |
| 4202 +vmlal.u32 q8,d24,d3 | |
| 4203 + | |
| 4204 +# qhasm: r3[0,1] += x23[2] unsigned* y01[0]; r3[2,3] += x23[3] unsigned* y01[
1] | |
| 4205 +# asm 1: vmlal.u32 <r3=reg128#9,<x23=reg128#13%top,<y01=reg128#2%bot | |
| 4206 +# asm 2: vmlal.u32 <r3=q8,<x23=d25,<y01=d2 | |
| 4207 +vmlal.u32 q8,d25,d2 | |
| 4208 + | |
| 4209 +# qhasm: r3[0,1] += x4[0] unsigned* _5y4[0]; r3[2,3] += x4[1] unsigned* _5y
4[1] | |
| 4210 +# asm 1: vmlal.u32 <r3=reg128#9,<x4=reg128#14%bot,<_5y4=reg128#11%bot | |
| 4211 +# asm 2: vmlal.u32 <r3=q8,<x4=d26,<_5y4=d20 | |
| 4212 +vmlal.u32 q8,d26,d20 | |
| 4213 + | |
| 4214 +# qhasm: r4[0,1] = x01[0] unsigned* y4[0]; r4[2,3] = x01[1] unsigned* y4[1
] | |
| 4215 +# asm 1: vmull.u32 >r4=reg128#10,<x01=reg128#12%bot,<y4=reg128#10%bot | |
| 4216 +# asm 2: vmull.u32 >r4=q9,<x01=d22,<y4=d18 | |
| 4217 +vmull.u32 q9,d22,d18 | |
| 4218 + | |
| 4219 +# qhasm: r4[0,1] += x01[2] unsigned* y23[2]; r4[2,3] += x01[3] unsigned* y23[3
] | |
| 4220 +# asm 1: vmlal.u32 <r4=reg128#10,<x01=reg128#12%top,<y23=reg128#4%top | |
| 4221 +# asm 2: vmlal.u32 <r4=q9,<x01=d23,<y23=d7 | |
| 4222 +vmlal.u32 q9,d23,d7 | |
| 4223 + | |
| 4224 +# qhasm: r4[0,1] += x23[0] unsigned* y23[0]; r4[2,3] += x23[1] unsigned* y23[1
] | |
| 4225 +# asm 1: vmlal.u32 <r4=reg128#10,<x23=reg128#13%bot,<y23=reg128#4%bot | |
| 4226 +# asm 2: vmlal.u32 <r4=q9,<x23=d24,<y23=d6 | |
| 4227 +vmlal.u32 q9,d24,d6 | |
| 4228 + | |
| 4229 +# qhasm: r4[0,1] += x23[2] unsigned* y01[2]; r4[2,3] += x23[3] unsigned* y01[3
] | |
| 4230 +# asm 1: vmlal.u32 <r4=reg128#10,<x23=reg128#13%top,<y01=reg128#2%top | |
| 4231 +# asm 2: vmlal.u32 <r4=q9,<x23=d25,<y01=d3 | |
| 4232 +vmlal.u32 q9,d25,d3 | |
| 4233 + | |
| 4234 +# qhasm: r4[0,1] += x4[0] unsigned* y01[0]; r4[2,3] += x4[1] unsigned* y01[1
] | |
| 4235 +# asm 1: vmlal.u32 <r4=reg128#10,<x4=reg128#14%bot,<y01=reg128#2%bot | |
| 4236 +# asm 2: vmlal.u32 <r4=q9,<x4=d26,<y01=d2 | |
| 4237 +vmlal.u32 q9,d26,d2 | |
| 4238 + | |
| 4239 +# qhasm: 2x t1 = r0 unsigned>> 26 | |
| 4240 +# asm 1: vshr.u64 >t1=reg128#2,<r0=reg128#15,#26 | |
| 4241 +# asm 2: vshr.u64 >t1=q1,<r0=q14,#26 | |
| 4242 +vshr.u64 q1,q14,#26 | |
| 4243 + | |
| 4244 +# qhasm: r0 &= mask | |
| 4245 +# asm 1: vand >r0=reg128#4,<r0=reg128#15,<mask=reg128#1 | |
| 4246 +# asm 2: vand >r0=q3,<r0=q14,<mask=q0 | |
| 4247 +vand q3,q14,q0 | |
| 4248 + | |
| 4249 +# qhasm: 2x r1 += t1 | |
| 4250 +# asm 1: vadd.i64 >r1=reg128#2,<r1=reg128#3,<t1=reg128#2 | |
| 4251 +# asm 2: vadd.i64 >r1=q1,<r1=q2,<t1=q1 | |
| 4252 +vadd.i64 q1,q2,q1 | |
| 4253 + | |
| 4254 +# qhasm: 2x t4 = r3 unsigned>> 26 | |
| 4255 +# asm 1: vshr.u64 >t4=reg128#3,<r3=reg128#9,#26 | |
| 4256 +# asm 2: vshr.u64 >t4=q2,<r3=q8,#26 | |
| 4257 +vshr.u64 q2,q8,#26 | |
| 4258 + | |
| 4259 +# qhasm: r3 &= mask | |
| 4260 +# asm 1: vand >r3=reg128#9,<r3=reg128#9,<mask=reg128#1 | |
| 4261 +# asm 2: vand >r3=q8,<r3=q8,<mask=q0 | |
| 4262 +vand q8,q8,q0 | |
| 4263 + | |
| 4264 +# qhasm: 2x r4 += t4 | |
| 4265 +# asm 1: vadd.i64 >r4=reg128#3,<r4=reg128#10,<t4=reg128#3 | |
| 4266 +# asm 2: vadd.i64 >r4=q2,<r4=q9,<t4=q2 | |
| 4267 +vadd.i64 q2,q9,q2 | |
| 4268 + | |
| 4269 +# qhasm: 2x t2 = r1 unsigned>> 26 | |
| 4270 +# asm 1: vshr.u64 >t2=reg128#10,<r1=reg128#2,#26 | |
| 4271 +# asm 2: vshr.u64 >t2=q9,<r1=q1,#26 | |
| 4272 +vshr.u64 q9,q1,#26 | |
| 4273 + | |
| 4274 +# qhasm: r1 &= mask | |
| 4275 +# asm 1: vand >r1=reg128#2,<r1=reg128#2,<mask=reg128#1 | |
| 4276 +# asm 2: vand >r1=q1,<r1=q1,<mask=q0 | |
| 4277 +vand q1,q1,q0 | |
| 4278 + | |
| 4279 +# qhasm: 2x t0 = r4 unsigned>> 26 | |
| 4280 +# asm 1: vshr.u64 >t0=reg128#11,<r4=reg128#3,#26 | |
| 4281 +# asm 2: vshr.u64 >t0=q10,<r4=q2,#26 | |
| 4282 +vshr.u64 q10,q2,#26 | |
| 4283 + | |
| 4284 +# qhasm: 2x r2 += t2 | |
| 4285 +# asm 1: vadd.i64 >r2=reg128#10,<r2=reg128#16,<t2=reg128#10 | |
| 4286 +# asm 2: vadd.i64 >r2=q9,<r2=q15,<t2=q9 | |
| 4287 +vadd.i64 q9,q15,q9 | |
| 4288 + | |
| 4289 +# qhasm: r4 &= mask | |
| 4290 +# asm 1: vand >r4=reg128#3,<r4=reg128#3,<mask=reg128#1 | |
| 4291 +# asm 2: vand >r4=q2,<r4=q2,<mask=q0 | |
| 4292 +vand q2,q2,q0 | |
| 4293 + | |
| 4294 +# qhasm: 2x r0 += t0 | |
| 4295 +# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#11 | |
| 4296 +# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q10 | |
| 4297 +vadd.i64 q3,q3,q10 | |
| 4298 + | |
| 4299 +# qhasm: 2x t0 <<= 2 | |
| 4300 +# asm 1: vshl.i64 >t0=reg128#11,<t0=reg128#11,#2 | |
| 4301 +# asm 2: vshl.i64 >t0=q10,<t0=q10,#2 | |
| 4302 +vshl.i64 q10,q10,#2 | |
| 4303 + | |
| 4304 +# qhasm: 2x t3 = r2 unsigned>> 26 | |
| 4305 +# asm 1: vshr.u64 >t3=reg128#12,<r2=reg128#10,#26 | |
| 4306 +# asm 2: vshr.u64 >t3=q11,<r2=q9,#26 | |
| 4307 +vshr.u64 q11,q9,#26 | |
| 4308 + | |
| 4309 +# qhasm: 2x r0 += t0 | |
| 4310 +# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#11 | |
| 4311 +# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q10 | |
| 4312 +vadd.i64 q3,q3,q10 | |
| 4313 + | |
| 4314 +# qhasm: x23 = r2 & mask | |
| 4315 +# asm 1: vand >x23=reg128#10,<r2=reg128#10,<mask=reg128#1 | |
| 4316 +# asm 2: vand >x23=q9,<r2=q9,<mask=q0 | |
| 4317 +vand q9,q9,q0 | |
| 4318 + | |
| 4319 +# qhasm: 2x r3 += t3 | |
| 4320 +# asm 1: vadd.i64 >r3=reg128#9,<r3=reg128#9,<t3=reg128#12 | |
| 4321 +# asm 2: vadd.i64 >r3=q8,<r3=q8,<t3=q11 | |
| 4322 +vadd.i64 q8,q8,q11 | |
| 4323 + | |
| 4324 +# qhasm: 2x t1 = r0 unsigned>> 26 | |
| 4325 +# asm 1: vshr.u64 >t1=reg128#11,<r0=reg128#4,#26 | |
| 4326 +# asm 2: vshr.u64 >t1=q10,<r0=q3,#26 | |
| 4327 +vshr.u64 q10,q3,#26 | |
| 4328 + | |
| 4329 +# qhasm: x23 = x23[0,2,1,3] | |
| 4330 +# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top | |
| 4331 +# asm 2: vtrn.32 <x23=d18,<x23=d19 | |
| 4332 +vtrn.32 d18,d19 | |
| 4333 + | |
| 4334 +# qhasm: x01 = r0 & mask | |
| 4335 +# asm 1: vand >x01=reg128#4,<r0=reg128#4,<mask=reg128#1 | |
| 4336 +# asm 2: vand >x01=q3,<r0=q3,<mask=q0 | |
| 4337 +vand q3,q3,q0 | |
| 4338 + | |
| 4339 +# qhasm: 2x r1 += t1 | |
| 4340 +# asm 1: vadd.i64 >r1=reg128#2,<r1=reg128#2,<t1=reg128#11 | |
| 4341 +# asm 2: vadd.i64 >r1=q1,<r1=q1,<t1=q10 | |
| 4342 +vadd.i64 q1,q1,q10 | |
| 4343 + | |
| 4344 +# qhasm: 2x t4 = r3 unsigned>> 26 | |
| 4345 +# asm 1: vshr.u64 >t4=reg128#11,<r3=reg128#9,#26 | |
| 4346 +# asm 2: vshr.u64 >t4=q10,<r3=q8,#26 | |
| 4347 +vshr.u64 q10,q8,#26 | |
| 4348 + | |
| 4349 +# qhasm: x01 = x01[0,2,1,3] | |
| 4350 +# asm 1: vtrn.32 <x01=reg128#4%bot,<x01=reg128#4%top | |
| 4351 +# asm 2: vtrn.32 <x01=d6,<x01=d7 | |
| 4352 +vtrn.32 d6,d7 | |
| 4353 + | |
| 4354 +# qhasm: r3 &= mask | |
| 4355 +# asm 1: vand >r3=reg128#1,<r3=reg128#9,<mask=reg128#1 | |
| 4356 +# asm 2: vand >r3=q0,<r3=q8,<mask=q0 | |
| 4357 +vand q0,q8,q0 | |
| 4358 + | |
| 4359 +# qhasm: r1 = r1[0,2,1,3] | |
| 4360 +# asm 1: vtrn.32 <r1=reg128#2%bot,<r1=reg128#2%top | |
| 4361 +# asm 2: vtrn.32 <r1=d2,<r1=d3 | |
| 4362 +vtrn.32 d2,d3 | |
| 4363 + | |
| 4364 +# qhasm: 2x x4 = r4 + t4 | |
| 4365 +# asm 1: vadd.i64 >x4=reg128#3,<r4=reg128#3,<t4=reg128#11 | |
| 4366 +# asm 2: vadd.i64 >x4=q2,<r4=q2,<t4=q10 | |
| 4367 +vadd.i64 q2,q2,q10 | |
| 4368 + | |
| 4369 +# qhasm: r3 = r3[0,2,1,3] | |
| 4370 +# asm 1: vtrn.32 <r3=reg128#1%bot,<r3=reg128#1%top | |
| 4371 +# asm 2: vtrn.32 <r3=d0,<r3=d1 | |
| 4372 +vtrn.32 d0,d1 | |
| 4373 + | |
| 4374 +# qhasm: x01 = x01[0,1] r1[0,1] | |
| 4375 +# asm 1: vext.32 <x01=reg128#4%top,<r1=reg128#2%bot,<r1=reg128#2%bot,#0 | |
| 4376 +# asm 2: vext.32 <x01=d7,<r1=d2,<r1=d2,#0 | |
| 4377 +vext.32 d7,d2,d2,#0 | |
| 4378 + | |
| 4379 +# qhasm: x23 = x23[0,1] r3[0,1] | |
| 4380 +# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#1%bot,<r3=reg128#1%bot,#0 | |
| 4381 +# asm 2: vext.32 <x23=d19,<r3=d0,<r3=d0,#0 | |
| 4382 +vext.32 d19,d0,d0,#0 | |
| 4383 + | |
| 4384 +# qhasm: x4 = x4[0,2,1,3] | |
| 4385 +# asm 1: vtrn.32 <x4=reg128#3%bot,<x4=reg128#3%top | |
| 4386 +# asm 2: vtrn.32 <x4=d4,<x4=d5 | |
| 4387 +vtrn.32 d4,d5 | |
| 4388 + | |
| 4389 +# qhasm: mem128[input_0] aligned= x01;input_0+=16 | |
| 4390 +# asm 1: vst1.8 {<x01=reg128#4%bot-<x01=reg128#4%top},[<input_0=int32#1,: 128]! | |
| 4391 +# asm 2: vst1.8 {<x01=d6-<x01=d7},[<input_0=r0,: 128]! | |
| 4392 +vst1.8 {d6-d7},[r0,: 128]! | |
| 4393 + | |
| 4394 +# qhasm: mem128[input_0] aligned= x23;input_0+=16 | |
| 4395 +# asm 1: vst1.8 {<x23=reg128#10%bot-<x23=reg128#10%top},[<input_0=int32#1,: 128
]! | |
| 4396 +# asm 2: vst1.8 {<x23=d18-<x23=d19},[<input_0=r0,: 128]! | |
| 4397 +vst1.8 {d18-d19},[r0,: 128]! | |
| 4398 + | |
| 4399 +# qhasm: mem64[input_0] aligned= x4[0] | |
| 4400 +# asm 1: vst1.8 <x4=reg128#3%bot,[<input_0=int32#1,: 64] | |
| 4401 +# asm 2: vst1.8 <x4=d4,[<input_0=r0,: 64] | |
| 4402 +vst1.8 d4,[r0,: 64] | |
| 4403 + | |
| 4404 +# qhasm: return | |
| 4405 +add sp,sp,#0 | |
| 4406 +bx lr | |
| 4407 diff --git a/crypto/poly1305/poly1305_vec.c b/crypto/poly1305/poly1305_vec.c | |
| 4408 new file mode 100644 | |
| 4409 index 0000000..c546200 | |
| 4410 --- /dev/null | |
| 4411 +++ b/crypto/poly1305/poly1305_vec.c | |
| 4412 @@ -0,0 +1,733 @@ | |
| 4413 +/* ==================================================================== | |
| 4414 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
| 4415 + * | |
| 4416 + * Redistribution and use in source and binary forms, with or without | |
| 4417 + * modification, are permitted provided that the following conditions | |
| 4418 + * are met: | |
| 4419 + * | |
| 4420 + * 1. Redistributions of source code must retain the above copyright | |
| 4421 + * notice, this list of conditions and the following disclaimer. | |
| 4422 + * | |
| 4423 + * 2. Redistributions in binary form must reproduce the above copyright | |
| 4424 + * notice, this list of conditions and the following disclaimer in | |
| 4425 + * the documentation and/or other materials provided with the | |
| 4426 + * distribution. | |
| 4427 + * | |
| 4428 + * 3. All advertising materials mentioning features or use of this | |
| 4429 + * software must display the following acknowledgment: | |
| 4430 + * "This product includes software developed by the OpenSSL Project | |
| 4431 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
| 4432 + * | |
| 4433 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
| 4434 + * endorse or promote products derived from this software without | |
| 4435 + * prior written permission. For written permission, please contact | |
| 4436 + * licensing@OpenSSL.org. | |
| 4437 + * | |
| 4438 + * 5. Products derived from this software may not be called "OpenSSL" | |
| 4439 + * nor may "OpenSSL" appear in their names without prior written | |
| 4440 + * permission of the OpenSSL Project. | |
| 4441 + * | |
| 4442 + * 6. Redistributions of any form whatsoever must retain the following | |
| 4443 + * acknowledgment: | |
| 4444 + * "This product includes software developed by the OpenSSL Project | |
| 4445 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
| 4446 + * | |
| 4447 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
| 4448 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 4449 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| 4450 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
| 4451 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 4452 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
| 4453 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| 4454 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 4455 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
| 4456 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| 4457 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
| 4458 + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 4459 + * ==================================================================== | |
| 4460 + */ | |
| 4461 + | |
| 4462 +/* This implementation of poly1305 is by Andrew Moon | |
| 4463 + * (https://github.com/floodyberry/poly1305-donna) and released as public | |
| 4464 + * domain. It implements SIMD vectorization based on the algorithm described in | |
| 4465 + * http://cr.yp.to/papers.html#neoncrypto. Unrolled to 2 powers, i.e. 64 byte | |
| 4466 + * block size | |
| 4467 +*/ | |
| 4468 + | |
| 4469 +#include <emmintrin.h> | |
| 4470 +#include <stdint.h> | |
| 4471 +#include <openssl/opensslconf.h> | |
| 4472 + | |
| 4473 +#if !defined(OPENSSL_NO_POLY1305) | |
| 4474 + | |
| 4475 +#include <openssl/poly1305.h> | |
| 4476 + | |
| 4477 +#define ALIGN(x) __attribute__((aligned(x))) | |
| 4478 +#define INLINE inline | |
| 4479 +#define U8TO64_LE(m) (*(uint64_t*)(m)) | |
| 4480 +#define U8TO32_LE(m) (*(uint32_t*)(m)) | |
| 4481 +#define U64TO8_LE(m,v) (*(uint64_t*)(m)) = v | |
| 4482 + | |
| 4483 +typedef __m128i xmmi; | |
| 4484 +typedef unsigned __int128 uint128_t; | |
| 4485 + | |
| 4486 +static const uint32_t ALIGN(16) poly1305_x64_sse2_message_mask[4] = | |
| 4487 + {(1 << 26) - 1, 0, (1 << 26) - 1, 0}; | |
| 4488 +static const uint32_t ALIGN(16) poly1305_x64_sse2_5[4] = {5, 0, 5, 0}; | |
| 4489 +static const uint32_t ALIGN(16) poly1305_x64_sse2_1shl128[4] = | |
| 4490 + {(1 << 24), 0, (1 << 24), 0}; | |
| 4491 + | |
| 4492 +static uint128_t INLINE | |
| 4493 +add128(uint128_t a, uint128_t b) | |
| 4494 + { | |
| 4495 + return a + b; | |
| 4496 + } | |
| 4497 + | |
| 4498 +static uint128_t INLINE | |
| 4499 +add128_64(uint128_t a, uint64_t b) | |
| 4500 + { | |
| 4501 + return a + b; | |
| 4502 + } | |
| 4503 + | |
| 4504 +static uint128_t INLINE | |
| 4505 +mul64x64_128(uint64_t a, uint64_t b) | |
| 4506 + { | |
| 4507 + return (uint128_t)a * b; | |
| 4508 + } | |
| 4509 + | |
| 4510 +static uint64_t INLINE | |
| 4511 +lo128(uint128_t a) | |
| 4512 + { | |
| 4513 + return (uint64_t)a; | |
| 4514 + } | |
| 4515 + | |
| 4516 +static uint64_t INLINE | |
| 4517 +shr128(uint128_t v, const int shift) | |
| 4518 + { | |
| 4519 + return (uint64_t)(v >> shift); | |
| 4520 + } | |
| 4521 + | |
| 4522 +static uint64_t INLINE | |
| 4523 +shr128_pair(uint64_t hi, uint64_t lo, const int shift) | |
| 4524 + { | |
| 4525 + return (uint64_t)((((uint128_t)hi << 64) | lo) >> shift); | |
| 4526 + } | |
| 4527 + | |
| 4528 +typedef struct poly1305_power_t | |
| 4529 + { | |
| 4530 + union | |
| 4531 + { | |
| 4532 + xmmi v; | |
| 4533 + uint64_t u[2]; | |
| 4534 + uint32_t d[4]; | |
| 4535 + } R20,R21,R22,R23,R24,S21,S22,S23,S24; | |
| 4536 + } poly1305_power; | |
| 4537 + | |
| 4538 +typedef struct poly1305_state_internal_t | |
| 4539 + { | |
| 4540 + poly1305_power P[2]; /* 288 bytes, top 32 bit halves unused = 144 | |
| 4541 + bytes of free storage */ | |
| 4542 + union | |
| 4543 + { | |
| 4544 + xmmi H[5]; /* 80 bytes */ | |
| 4545 + uint64_t HH[10]; | |
| 4546 + }; | |
| 4547 + /* uint64_t r0,r1,r2; [24 bytes] */ | |
| 4548 + /* uint64_t pad0,pad1; [16 bytes] */ | |
| 4549 + uint64_t started; /* 8 bytes */ | |
| 4550 + uint64_t leftover; /* 8 bytes */ | |
| 4551 + uint8_t buffer[64]; /* 64 bytes */ | |
| 4552 + } poly1305_state_internal; /* 448 bytes total + 63 bytes for | |
| 4553 + alignment = 511 bytes raw */ | |
| 4554 + | |
| 4555 +static poly1305_state_internal INLINE | |
| 4556 +*poly1305_aligned_state(poly1305_state *state) | |
| 4557 + { | |
| 4558 + return (poly1305_state_internal *)(((uint64_t)state + 63) & ~63); | |
| 4559 + } | |
| 4560 + | |
| 4561 +/* copy 0-63 bytes */ | |
| 4562 +static void INLINE | |
| 4563 +poly1305_block_copy(uint8_t *dst, const uint8_t *src, size_t bytes) | |
| 4564 + { | |
| 4565 + size_t offset = src - dst; | |
| 4566 + if (bytes & 32) | |
| 4567 + { | |
| 4568 + _mm_storeu_si128((xmmi *)(dst + 0), _mm_loadu_si128((xmmi *)(dst
+ offset + 0))); | |
| 4569 + _mm_storeu_si128((xmmi *)(dst + 16), _mm_loadu_si128((xmmi *)(ds
t + offset + 16))); | |
| 4570 + dst += 32; | |
| 4571 + } | |
| 4572 + if (bytes & 16) | |
| 4573 + { | |
| 4574 + _mm_storeu_si128((xmmi *)dst, | |
| 4575 + _mm_loadu_si128((xmmi *)(dst + offset))); | |
| 4576 + dst += 16; | |
| 4577 + } | |
| 4578 + if (bytes & 8) | |
| 4579 + { | |
| 4580 + *(uint64_t *)dst = *(uint64_t *)(dst + offset); | |
| 4581 + dst += 8; | |
| 4582 + } | |
| 4583 + if (bytes & 4) | |
| 4584 + { | |
| 4585 + *(uint32_t *)dst = *(uint32_t *)(dst + offset); | |
| 4586 + dst += 4; | |
| 4587 + } | |
| 4588 + if (bytes & 2) | |
| 4589 + { | |
| 4590 + *(uint16_t *)dst = *(uint16_t *)(dst + offset); | |
| 4591 + dst += 2; | |
| 4592 + } | |
| 4593 + if (bytes & 1) | |
| 4594 + { | |
| 4595 + *( uint8_t *)dst = *( uint8_t *)(dst + offset); | |
| 4596 + } | |
| 4597 + } | |
| 4598 + | |
| 4599 +/* zero 0-15 bytes */ | |
| 4600 +static void INLINE | |
| 4601 +poly1305_block_zero(uint8_t *dst, size_t bytes) | |
| 4602 + { | |
| 4603 + if (bytes & 8) { *(uint64_t *)dst = 0; dst += 8; } | |
| 4604 + if (bytes & 4) { *(uint32_t *)dst = 0; dst += 4; } | |
| 4605 + if (bytes & 2) { *(uint16_t *)dst = 0; dst += 2; } | |
| 4606 + if (bytes & 1) { *( uint8_t *)dst = 0; } | |
| 4607 + } | |
| 4608 + | |
| 4609 +static size_t INLINE | |
| 4610 +poly1305_min(size_t a, size_t b) | |
| 4611 + { | |
| 4612 + return (a < b) ? a : b; | |
| 4613 + } | |
| 4614 + | |
| 4615 +void | |
| 4616 +CRYPTO_poly1305_init(poly1305_state *state, const unsigned char key[32]) | |
| 4617 + { | |
| 4618 + poly1305_state_internal *st = poly1305_aligned_state(state); | |
| 4619 + poly1305_power *p; | |
| 4620 + uint64_t r0,r1,r2; | |
| 4621 + uint64_t t0,t1; | |
| 4622 + | |
| 4623 + /* clamp key */ | |
| 4624 + t0 = U8TO64_LE(key + 0); | |
| 4625 + t1 = U8TO64_LE(key + 8); | |
| 4626 + r0 = t0 & 0xffc0fffffff; t0 >>= 44; t0 |= t1 << 20; | |
| 4627 + r1 = t0 & 0xfffffc0ffff; t1 >>= 24; | |
| 4628 + r2 = t1 & 0x00ffffffc0f; | |
| 4629 + | |
| 4630 + /* store r in un-used space of st->P[1] */ | |
| 4631 + p = &st->P[1]; | |
| 4632 + p->R20.d[1] = (uint32_t)(r0 ); | |
| 4633 + p->R20.d[3] = (uint32_t)(r0 >> 32); | |
| 4634 + p->R21.d[1] = (uint32_t)(r1 ); | |
| 4635 + p->R21.d[3] = (uint32_t)(r1 >> 32); | |
| 4636 + p->R22.d[1] = (uint32_t)(r2 ); | |
| 4637 + p->R22.d[3] = (uint32_t)(r2 >> 32); | |
| 4638 + | |
| 4639 + /* store pad */ | |
| 4640 + p->R23.d[1] = U8TO32_LE(key + 16); | |
| 4641 + p->R23.d[3] = U8TO32_LE(key + 20); | |
| 4642 + p->R24.d[1] = U8TO32_LE(key + 24); | |
| 4643 + p->R24.d[3] = U8TO32_LE(key + 28); | |
| 4644 + | |
| 4645 + /* H = 0 */ | |
| 4646 + st->H[0] = _mm_setzero_si128(); | |
| 4647 + st->H[1] = _mm_setzero_si128(); | |
| 4648 + st->H[2] = _mm_setzero_si128(); | |
| 4649 + st->H[3] = _mm_setzero_si128(); | |
| 4650 + st->H[4] = _mm_setzero_si128(); | |
| 4651 + | |
| 4652 + st->started = 0; | |
| 4653 + st->leftover = 0; | |
| 4654 + } | |
| 4655 + | |
| 4656 +static void | |
| 4657 +poly1305_first_block(poly1305_state_internal *st, const uint8_t *m) | |
| 4658 + { | |
| 4659 + const xmmi MMASK = | |
| 4660 + _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask); | |
| 4661 + const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5); | |
| 4662 + const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128); | |
| 4663 + xmmi T5,T6; | |
| 4664 + poly1305_power *p; | |
| 4665 + uint128_t d[3]; | |
| 4666 + uint64_t r0,r1,r2; | |
| 4667 + uint64_t r20,r21,r22,s22; | |
| 4668 + uint64_t pad0,pad1; | |
| 4669 + uint64_t c; | |
| 4670 + uint64_t i; | |
| 4671 + | |
| 4672 + /* pull out stored info */ | |
| 4673 + p = &st->P[1]; | |
| 4674 + | |
| 4675 + r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; | |
| 4676 + r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; | |
| 4677 + r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; | |
| 4678 + pad0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1]; | |
| 4679 + pad1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1]; | |
| 4680 + | |
| 4681 + /* compute powers r^2,r^4 */ | |
| 4682 + r20 = r0; | |
| 4683 + r21 = r1; | |
| 4684 + r22 = r2; | |
| 4685 + for (i = 0; i < 2; i++) | |
| 4686 + { | |
| 4687 + s22 = r22 * (5 << 2); | |
| 4688 + | |
| 4689 + d[0] = add128(mul64x64_128(r20, r20), mul64x64_128(r21 * 2, s22)
); | |
| 4690 + d[1] = add128(mul64x64_128(r22, s22), mul64x64_128(r20 * 2, r21)
); | |
| 4691 + d[2] = add128(mul64x64_128(r21, r21), mul64x64_128(r22 * 2, r20)
); | |
| 4692 + | |
| 4693 + r20 = lo128(d[0]) & 0xfffffffffff; c
= shr128(d[0], 44); | |
| 4694 + d[1] = add128_64(d[1], c); r21 = lo128(d[1]) & 0xfffffffffff; c
= shr128(d[1], 44); | |
| 4695 + d[2] = add128_64(d[2], c); r22 = lo128(d[2]) & 0x3ffffffffff; c
= shr128(d[2], 42); | |
| 4696 + r20 += c * 5; c = (r20 >> 44); r20 = r20 & 0xfffffffffff; | |
| 4697 + r21 += c; | |
| 4698 + | |
| 4699 + p->R20.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)( r20
) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); | |
| 4700 + p->R21.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r20 >
> 26) | (r21 << 18)) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); | |
| 4701 + p->R22.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >
> 8) ) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); | |
| 4702 + p->R23.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >
> 34) | (r22 << 10)) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); | |
| 4703 + p->R24.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r22 >
> 16) ) ), _MM_SHUFFLE(1,0,1,0)); | |
| 4704 + p->S21.v = _mm_mul_epu32(p->R21.v, FIVE); | |
| 4705 + p->S22.v = _mm_mul_epu32(p->R22.v, FIVE); | |
| 4706 + p->S23.v = _mm_mul_epu32(p->R23.v, FIVE); | |
| 4707 + p->S24.v = _mm_mul_epu32(p->R24.v, FIVE); | |
| 4708 + p--; | |
| 4709 + } | |
| 4710 + | |
| 4711 + /* put saved info back */ | |
| 4712 + p = &st->P[1]; | |
| 4713 + p->R20.d[1] = (uint32_t)(r0 ); | |
| 4714 + p->R20.d[3] = (uint32_t)(r0 >> 32); | |
| 4715 + p->R21.d[1] = (uint32_t)(r1 ); | |
| 4716 + p->R21.d[3] = (uint32_t)(r1 >> 32); | |
| 4717 + p->R22.d[1] = (uint32_t)(r2 ); | |
| 4718 + p->R22.d[3] = (uint32_t)(r2 >> 32); | |
| 4719 + p->R23.d[1] = (uint32_t)(pad0 ); | |
| 4720 + p->R23.d[3] = (uint32_t)(pad0 >> 32); | |
| 4721 + p->R24.d[1] = (uint32_t)(pad1 ); | |
| 4722 + p->R24.d[3] = (uint32_t)(pad1 >> 32); | |
| 4723 + | |
| 4724 + /* H = [Mx,My] */ | |
| 4725 + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi6
4((xmmi *)(m + 16))); | |
| 4726 + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi6
4((xmmi *)(m + 24))); | |
| 4727 + st->H[0] = _mm_and_si128(MMASK, T5); | |
| 4728 + st->H[1] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
| 4729 + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); | |
| 4730 + st->H[2] = _mm_and_si128(MMASK, T5); | |
| 4731 + st->H[3] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
| 4732 + st->H[4] = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); | |
| 4733 + } | |
| 4734 + | |
| 4735 +static void | |
| 4736 +poly1305_blocks(poly1305_state_internal *st, const uint8_t *m, size_t bytes) | |
| 4737 + { | |
| 4738 + const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask
); | |
| 4739 + const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5); | |
| 4740 + const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128); | |
| 4741 + | |
| 4742 + poly1305_power *p; | |
| 4743 + xmmi H0,H1,H2,H3,H4; | |
| 4744 + xmmi T0,T1,T2,T3,T4,T5,T6; | |
| 4745 + xmmi M0,M1,M2,M3,M4; | |
| 4746 + xmmi C1,C2; | |
| 4747 + | |
| 4748 + H0 = st->H[0]; | |
| 4749 + H1 = st->H[1]; | |
| 4750 + H2 = st->H[2]; | |
| 4751 + H3 = st->H[3]; | |
| 4752 + H4 = st->H[4]; | |
| 4753 + | |
| 4754 + while (bytes >= 64) | |
| 4755 + { | |
| 4756 + /* H *= [r^4,r^4] */ | |
| 4757 + p = &st->P[0]; | |
| 4758 + T0 = _mm_mul_epu32(H0, p->R20.v); | |
| 4759 + T1 = _mm_mul_epu32(H0, p->R21.v); | |
| 4760 + T2 = _mm_mul_epu32(H0, p->R22.v); | |
| 4761 + T3 = _mm_mul_epu32(H0, p->R23.v); | |
| 4762 + T4 = _mm_mul_epu32(H0, p->R24.v); | |
| 4763 + T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
| 4764 + T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
| 4765 + T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
| 4766 + T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
| 4767 + T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
| 4768 + T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
| 4769 + T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
| 4770 + T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
| 4771 + T5 = _mm_mul_epu32(H1, p->R23.v);
T4 = _mm_add_epi64(T4, T5); | |
| 4772 + T5 = _mm_mul_epu32(H2, p->R22.v);
T4 = _mm_add_epi64(T4, T5); | |
| 4773 + T5 = _mm_mul_epu32(H3, p->R21.v);
T4 = _mm_add_epi64(T4, T5); | |
| 4774 + T5 = _mm_mul_epu32(H4, p->R20.v);
T4 = _mm_add_epi64(T4, T5); | |
| 4775 + | |
| 4776 + /* H += [Mx,My]*[r^2,r^2] */ | |
| 4777 + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_lo
adl_epi64((xmmi *)(m + 16))); | |
| 4778 + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_lo
adl_epi64((xmmi *)(m + 24))); | |
| 4779 + M0 = _mm_and_si128(MMASK, T5); | |
| 4780 + M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
| 4781 + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)
); | |
| 4782 + M2 = _mm_and_si128(MMASK, T5); | |
| 4783 + M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
| 4784 + M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); | |
| 4785 + | |
| 4786 + p = &st->P[1]; | |
| 4787 + T5 = _mm_mul_epu32(M0, p->R20.v); T6 = _mm_mul_epu32(M0, p->R21.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
| 4788 + T5 = _mm_mul_epu32(M1, p->S24.v); T6 = _mm_mul_epu32(M1, p->R20.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
| 4789 + T5 = _mm_mul_epu32(M2, p->S23.v); T6 = _mm_mul_epu32(M2, p->S24.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
| 4790 + T5 = _mm_mul_epu32(M3, p->S22.v); T6 = _mm_mul_epu32(M3, p->S23.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
| 4791 + T5 = _mm_mul_epu32(M4, p->S21.v); T6 = _mm_mul_epu32(M4, p->S22.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
| 4792 + T5 = _mm_mul_epu32(M0, p->R22.v); T6 = _mm_mul_epu32(M0, p->R23.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
| 4793 + T5 = _mm_mul_epu32(M1, p->R21.v); T6 = _mm_mul_epu32(M1, p->R22.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
| 4794 + T5 = _mm_mul_epu32(M2, p->R20.v); T6 = _mm_mul_epu32(M2, p->R21.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
| 4795 + T5 = _mm_mul_epu32(M3, p->S24.v); T6 = _mm_mul_epu32(M3, p->R20.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
| 4796 + T5 = _mm_mul_epu32(M4, p->S23.v); T6 = _mm_mul_epu32(M4, p->S24.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
| 4797 + T5 = _mm_mul_epu32(M0, p->R24.v);
T4 = _mm_add_epi64(T4, T5); | |
| 4798 + T5 = _mm_mul_epu32(M1, p->R23.v);
T4 = _mm_add_epi64(T4, T5); | |
| 4799 + T5 = _mm_mul_epu32(M2, p->R22.v);
T4 = _mm_add_epi64(T4, T5); | |
| 4800 + T5 = _mm_mul_epu32(M3, p->R21.v);
T4 = _mm_add_epi64(T4, T5); | |
| 4801 + T5 = _mm_mul_epu32(M4, p->R20.v);
T4 = _mm_add_epi64(T4, T5); | |
| 4802 + | |
| 4803 + /* H += [Mx,My] */ | |
| 4804 + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 32)), _mm_l
oadl_epi64((xmmi *)(m + 48))); | |
| 4805 + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 40)), _mm_l
oadl_epi64((xmmi *)(m + 56))); | |
| 4806 + M0 = _mm_and_si128(MMASK, T5); | |
| 4807 + M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
| 4808 + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)
); | |
| 4809 + M2 = _mm_and_si128(MMASK, T5); | |
| 4810 + M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
| 4811 + M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); | |
| 4812 + | |
| 4813 + T0 = _mm_add_epi64(T0, M0); | |
| 4814 + T1 = _mm_add_epi64(T1, M1); | |
| 4815 + T2 = _mm_add_epi64(T2, M2); | |
| 4816 + T3 = _mm_add_epi64(T3, M3); | |
| 4817 + T4 = _mm_add_epi64(T4, M4); | |
| 4818 + | |
| 4819 + /* reduce */ | |
| 4820 + C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _
mm_and_si128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C
1); T4 = _mm_add_epi64(T4, C2); | |
| 4821 + C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _
mm_and_si128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C
1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); | |
| 4822 + C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _
mm_and_si128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C
1); T1 = _mm_add_epi64(T1, C2); | |
| 4823 + C1 = _mm_srli_epi64(T3, 26); T3 = _
mm_and_si128(T3, MMASK); T4 = _mm_add_epi64(T4, C
1); | |
| 4824 + | |
| 4825 + /* H = (H*[r^4,r^4] + [Mx,My]*[r^2,r^2] + [Mx,My]) */ | |
| 4826 + H0 = T0; | |
| 4827 + H1 = T1; | |
| 4828 + H2 = T2; | |
| 4829 + H3 = T3; | |
| 4830 + H4 = T4; | |
| 4831 + | |
| 4832 + m += 64; | |
| 4833 + bytes -= 64; | |
| 4834 + } | |
| 4835 + | |
| 4836 + st->H[0] = H0; | |
| 4837 + st->H[1] = H1; | |
| 4838 + st->H[2] = H2; | |
| 4839 + st->H[3] = H3; | |
| 4840 + st->H[4] = H4; | |
| 4841 + } | |
| 4842 + | |
| 4843 +static size_t | |
| 4844 +poly1305_combine(poly1305_state_internal *st, const uint8_t *m, size_t bytes) | |
| 4845 + { | |
| 4846 + const xmmi MMASK = | |
| 4847 + _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask); | |
| 4848 + const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128); | |
| 4849 + const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5); | |
| 4850 + | |
| 4851 + poly1305_power *p; | |
| 4852 + xmmi H0,H1,H2,H3,H4; | |
| 4853 + xmmi M0,M1,M2,M3,M4; | |
| 4854 + xmmi T0,T1,T2,T3,T4,T5,T6; | |
| 4855 + xmmi C1,C2; | |
| 4856 + | |
| 4857 + uint64_t r0,r1,r2; | |
| 4858 + uint64_t t0,t1,t2,t3,t4; | |
| 4859 + uint64_t c; | |
| 4860 + size_t consumed = 0; | |
| 4861 + | |
| 4862 + H0 = st->H[0]; | |
| 4863 + H1 = st->H[1]; | |
| 4864 + H2 = st->H[2]; | |
| 4865 + H3 = st->H[3]; | |
| 4866 + H4 = st->H[4]; | |
| 4867 + | |
| 4868 + /* p = [r^2,r^2] */ | |
| 4869 + p = &st->P[1]; | |
| 4870 + | |
| 4871 + if (bytes >= 32) | |
| 4872 + { | |
| 4873 + /* H *= [r^2,r^2] */ | |
| 4874 + T0 = _mm_mul_epu32(H0, p->R20.v); | |
| 4875 + T1 = _mm_mul_epu32(H0, p->R21.v); | |
| 4876 + T2 = _mm_mul_epu32(H0, p->R22.v); | |
| 4877 + T3 = _mm_mul_epu32(H0, p->R23.v); | |
| 4878 + T4 = _mm_mul_epu32(H0, p->R24.v); | |
| 4879 + T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
| 4880 + T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
| 4881 + T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
| 4882 + T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
| 4883 + T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
| 4884 + T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
| 4885 + T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
| 4886 + T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
| 4887 + T5 = _mm_mul_epu32(H1, p->R23.v);
T4 = _mm_add_epi64(T4, T5); | |
| 4888 + T5 = _mm_mul_epu32(H2, p->R22.v);
T4 = _mm_add_epi64(T4, T5); | |
| 4889 + T5 = _mm_mul_epu32(H3, p->R21.v);
T4 = _mm_add_epi64(T4, T5); | |
| 4890 + T5 = _mm_mul_epu32(H4, p->R20.v);
T4 = _mm_add_epi64(T4, T5); | |
| 4891 + | |
| 4892 + /* H += [Mx,My] */ | |
| 4893 + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_lo
adl_epi64((xmmi *)(m + 16))); | |
| 4894 + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_lo
adl_epi64((xmmi *)(m + 24))); | |
| 4895 + M0 = _mm_and_si128(MMASK, T5); | |
| 4896 + M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
| 4897 + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)
); | |
| 4898 + M2 = _mm_and_si128(MMASK, T5); | |
| 4899 + M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
| 4900 + M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); | |
| 4901 + | |
| 4902 + T0 = _mm_add_epi64(T0, M0); | |
| 4903 + T1 = _mm_add_epi64(T1, M1); | |
| 4904 + T2 = _mm_add_epi64(T2, M2); | |
| 4905 + T3 = _mm_add_epi64(T3, M3); | |
| 4906 + T4 = _mm_add_epi64(T4, M4); | |
| 4907 + | |
| 4908 + /* reduce */ | |
| 4909 + C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _
mm_and_si128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C
1); T4 = _mm_add_epi64(T4, C2); | |
| 4910 + C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _
mm_and_si128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C
1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); | |
| 4911 + C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _
mm_and_si128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C
1); T1 = _mm_add_epi64(T1, C2); | |
| 4912 + C1 = _mm_srli_epi64(T3, 26); T3 = _
mm_and_si128(T3, MMASK); T4 = _mm_add_epi64(T4, C
1); | |
| 4913 + | |
| 4914 + /* H = (H*[r^2,r^2] + [Mx,My]) */ | |
| 4915 + H0 = T0; | |
| 4916 + H1 = T1; | |
| 4917 + H2 = T2; | |
| 4918 + H3 = T3; | |
| 4919 + H4 = T4; | |
| 4920 + | |
| 4921 + consumed = 32; | |
| 4922 + } | |
| 4923 + | |
| 4924 + /* finalize, H *= [r^2,r] */ | |
| 4925 + r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; | |
| 4926 + r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; | |
| 4927 + r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; | |
| 4928 + | |
| 4929 + p->R20.d[2] = (uint32_t)( r0 ) & 0x3ffffff; | |
| 4930 + p->R21.d[2] = (uint32_t)((r0 >> 26) | (r1 << 18)) & 0x3ffffff; | |
| 4931 + p->R22.d[2] = (uint32_t)((r1 >> 8) ) & 0x3ffffff; | |
| 4932 + p->R23.d[2] = (uint32_t)((r1 >> 34) | (r2 << 10)) & 0x3ffffff; | |
| 4933 + p->R24.d[2] = (uint32_t)((r2 >> 16) ) ; | |
| 4934 + p->S21.d[2] = p->R21.d[2] * 5; | |
| 4935 + p->S22.d[2] = p->R22.d[2] * 5; | |
| 4936 + p->S23.d[2] = p->R23.d[2] * 5; | |
| 4937 + p->S24.d[2] = p->R24.d[2] * 5; | |
| 4938 + | |
| 4939 + /* H *= [r^2,r] */ | |
| 4940 + T0 = _mm_mul_epu32(H0, p->R20.v); | |
| 4941 + T1 = _mm_mul_epu32(H0, p->R21.v); | |
| 4942 + T2 = _mm_mul_epu32(H0, p->R22.v); | |
| 4943 + T3 = _mm_mul_epu32(H0, p->R23.v); | |
| 4944 + T4 = _mm_mul_epu32(H0, p->R24.v); | |
| 4945 + T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20.v); T0 =
_mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
| 4946 + T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24.v); T0 =
_mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
| 4947 + T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23.v); T0 =
_mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
| 4948 + T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22.v); T0 =
_mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
| 4949 + T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22.v); T2 =
_mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
| 4950 + T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21.v); T2 =
_mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
| 4951 + T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20.v); T2 =
_mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
| 4952 + T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24.v); T2 =
_mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
| 4953 + T5 = _mm_mul_epu32(H1, p->R23.v); T4 =
_mm_add_epi64(T4, T5); | |
| 4954 + T5 = _mm_mul_epu32(H2, p->R22.v); T4 =
_mm_add_epi64(T4, T5); | |
| 4955 + T5 = _mm_mul_epu32(H3, p->R21.v); T4 =
_mm_add_epi64(T4, T5); | |
| 4956 + T5 = _mm_mul_epu32(H4, p->R20.v); T4 =
_mm_add_epi64(T4, T5); | |
| 4957 + | |
| 4958 + C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _mm_and_s
i128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C1); T4 =
_mm_add_epi64(T4, C2); | |
| 4959 + C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _mm_and_s
i128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C1); T0 =
_mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); | |
| 4960 + C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _mm_and_s
i128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C1); T1 =
_mm_add_epi64(T1, C2); | |
| 4961 + C1 = _mm_srli_epi64(T3, 26); T3 = _mm_and_s
i128(T3, MMASK); T4 = _mm_add_epi64(T4, C1); | |
| 4962 + | |
| 4963 + /* H = H[0]+H[1] */ | |
| 4964 + H0 = _mm_add_epi64(T0, _mm_srli_si128(T0, 8)); | |
| 4965 + H1 = _mm_add_epi64(T1, _mm_srli_si128(T1, 8)); | |
| 4966 + H2 = _mm_add_epi64(T2, _mm_srli_si128(T2, 8)); | |
| 4967 + H3 = _mm_add_epi64(T3, _mm_srli_si128(T3, 8)); | |
| 4968 + H4 = _mm_add_epi64(T4, _mm_srli_si128(T4, 8)); | |
| 4969 + | |
| 4970 + t0 = _mm_cvtsi128_si32(H0) ; c = (t0 >> 26); t0 &= 0x3ffffff; | |
| 4971 + t1 = _mm_cvtsi128_si32(H1) + c; c = (t1 >> 26); t1 &= 0x3ffffff; | |
| 4972 + t2 = _mm_cvtsi128_si32(H2) + c; c = (t2 >> 26); t2 &= 0x3ffffff; | |
| 4973 + t3 = _mm_cvtsi128_si32(H3) + c; c = (t3 >> 26); t3 &= 0x3ffffff; | |
| 4974 + t4 = _mm_cvtsi128_si32(H4) + c; c = (t4 >> 26); t4 &= 0x3ffffff; | |
| 4975 + t0 = t0 + (c * 5); c = (t0 >> 26); t0 &= 0x3ffffff; | |
| 4976 + t1 = t1 + c; | |
| 4977 + | |
| 4978 + st->HH[0] = ((t0 ) | (t1 << 26) ) & 0xfffffffffffull; | |
| 4979 + st->HH[1] = ((t1 >> 18) | (t2 << 8) | (t3 << 34)) & 0xfffffffffffull; | |
| 4980 + st->HH[2] = ((t3 >> 10) | (t4 << 16) ) & 0x3ffffffffffull; | |
| 4981 + | |
| 4982 + return consumed; | |
| 4983 + } | |
| 4984 + | |
| 4985 +void | |
| 4986 +CRYPTO_poly1305_update(poly1305_state *state, const unsigned char *m, | |
| 4987 + size_t bytes) | |
| 4988 + { | |
| 4989 + poly1305_state_internal *st = poly1305_aligned_state(state); | |
| 4990 + size_t want; | |
| 4991 + | |
| 4992 + /* need at least 32 initial bytes to start the accelerated branch */ | |
| 4993 + if (!st->started) | |
| 4994 + { | |
| 4995 + if ((st->leftover == 0) && (bytes > 32)) | |
| 4996 + { | |
| 4997 + poly1305_first_block(st, m); | |
| 4998 + m += 32; | |
| 4999 + bytes -= 32; | |
| 5000 + } | |
| 5001 + else | |
| 5002 + { | |
| 5003 + want = poly1305_min(32 - st->leftover, bytes); | |
| 5004 + poly1305_block_copy(st->buffer + st->leftover, m, want); | |
| 5005 + bytes -= want; | |
| 5006 + m += want; | |
| 5007 + st->leftover += want; | |
| 5008 + if ((st->leftover < 32) || (bytes == 0)) | |
| 5009 + return; | |
| 5010 + poly1305_first_block(st, st->buffer); | |
| 5011 + st->leftover = 0; | |
| 5012 + } | |
| 5013 + st->started = 1; | |
| 5014 + } | |
| 5015 + | |
| 5016 + /* handle leftover */ | |
| 5017 + if (st->leftover) | |
| 5018 + { | |
| 5019 + want = poly1305_min(64 - st->leftover, bytes); | |
| 5020 + poly1305_block_copy(st->buffer + st->leftover, m, want); | |
| 5021 + bytes -= want; | |
| 5022 + m += want; | |
| 5023 + st->leftover += want; | |
| 5024 + if (st->leftover < 64) | |
| 5025 + return; | |
| 5026 + poly1305_blocks(st, st->buffer, 64); | |
| 5027 + st->leftover = 0; | |
| 5028 + } | |
| 5029 + | |
| 5030 + /* process 64 byte blocks */ | |
| 5031 + if (bytes >= 64) | |
| 5032 + { | |
| 5033 + want = (bytes & ~63); | |
| 5034 + poly1305_blocks(st, m, want); | |
| 5035 + m += want; | |
| 5036 + bytes -= want; | |
| 5037 + } | |
| 5038 + | |
| 5039 + if (bytes) | |
| 5040 + { | |
| 5041 + poly1305_block_copy(st->buffer + st->leftover, m, bytes); | |
| 5042 + st->leftover += bytes; | |
| 5043 + } | |
| 5044 + } | |
| 5045 + | |
| 5046 +void | |
| 5047 +CRYPTO_poly1305_finish(poly1305_state *state, unsigned char mac[16]) | |
| 5048 + { | |
| 5049 + poly1305_state_internal *st = poly1305_aligned_state(state); | |
| 5050 + size_t leftover = st->leftover; | |
| 5051 + uint8_t *m = st->buffer; | |
| 5052 + uint128_t d[3]; | |
| 5053 + uint64_t h0,h1,h2; | |
| 5054 + uint64_t t0,t1; | |
| 5055 + uint64_t g0,g1,g2,c,nc; | |
| 5056 + uint64_t r0,r1,r2,s1,s2; | |
| 5057 + poly1305_power *p; | |
| 5058 + | |
| 5059 + if (st->started) | |
| 5060 + { | |
| 5061 + size_t consumed = poly1305_combine(st, m, leftover); | |
| 5062 + leftover -= consumed; | |
| 5063 + m += consumed; | |
| 5064 + } | |
| 5065 + | |
| 5066 + /* st->HH will either be 0 or have the combined result */ | |
| 5067 + h0 = st->HH[0]; | |
| 5068 + h1 = st->HH[1]; | |
| 5069 + h2 = st->HH[2]; | |
| 5070 + | |
| 5071 + p = &st->P[1]; | |
| 5072 + r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; | |
| 5073 + r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; | |
| 5074 + r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; | |
| 5075 + s1 = r1 * (5 << 2); | |
| 5076 + s2 = r2 * (5 << 2); | |
| 5077 + | |
| 5078 + if (leftover < 16) | |
| 5079 + goto poly1305_donna_atmost15bytes; | |
| 5080 + | |
| 5081 +poly1305_donna_atleast16bytes: | |
| 5082 + t0 = U8TO64_LE(m + 0); | |
| 5083 + t1 = U8TO64_LE(m + 8); | |
| 5084 + h0 += t0 & 0xfffffffffff; | |
| 5085 + t0 = shr128_pair(t1, t0, 44); | |
| 5086 + h1 += t0 & 0xfffffffffff; | |
| 5087 + h2 += (t1 >> 24) | ((uint64_t)1 << 40); | |
| 5088 + | |
| 5089 +poly1305_donna_mul: | |
| 5090 + d[0] = add128(add128(mul64x64_128(h0, r0), mul64x64_128(h1, s2)), mul64x
64_128(h2, s1)); | |
| 5091 + d[1] = add128(add128(mul64x64_128(h0, r1), mul64x64_128(h1, r0)), mul64x
64_128(h2, s2)); | |
| 5092 + d[2] = add128(add128(mul64x64_128(h0, r2), mul64x64_128(h1, r1)), mul64x
64_128(h2, r0)); | |
| 5093 + h0 = lo128(d[0]) & 0xfffffffffff; c = shr128(
d[0], 44); | |
| 5094 + d[1] = add128_64(d[1], c); h1 = lo128(d[1]) & 0xfffffffffff; c = shr128(
d[1], 44); | |
| 5095 + d[2] = add128_64(d[2], c); h2 = lo128(d[2]) & 0x3ffffffffff; c = shr128(
d[2], 42); | |
| 5096 + h0 += c * 5; | |
| 5097 + | |
| 5098 + m += 16; | |
| 5099 + leftover -= 16; | |
| 5100 + if (leftover >= 16) goto poly1305_donna_atleast16bytes; | |
| 5101 + | |
| 5102 + /* final bytes */ | |
| 5103 +poly1305_donna_atmost15bytes: | |
| 5104 + if (!leftover) goto poly1305_donna_finish; | |
| 5105 + | |
| 5106 + m[leftover++] = 1; | |
| 5107 + poly1305_block_zero(m + leftover, 16 - leftover); | |
| 5108 + leftover = 16; | |
| 5109 + | |
| 5110 + t0 = U8TO64_LE(m+0); | |
| 5111 + t1 = U8TO64_LE(m+8); | |
| 5112 + h0 += t0 & 0xfffffffffff; t0 = shr128_pair(t1, t0, 44); | |
| 5113 + h1 += t0 & 0xfffffffffff; | |
| 5114 + h2 += (t1 >> 24); | |
| 5115 + | |
| 5116 + goto poly1305_donna_mul; | |
| 5117 + | |
| 5118 +poly1305_donna_finish: | |
| 5119 + c = (h0 >> 44); h0 &= 0xfffffffffff; | |
| 5120 + h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff; | |
| 5121 + h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff; | |
| 5122 + h0 += c * 5; | |
| 5123 + | |
| 5124 + g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff; | |
| 5125 + g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff; | |
| 5126 + g2 = h2 + c - ((uint64_t)1 << 42); | |
| 5127 + | |
| 5128 + c = (g2 >> 63) - 1; | |
| 5129 + nc = ~c; | |
| 5130 + h0 = (h0 & nc) | (g0 & c); | |
| 5131 + h1 = (h1 & nc) | (g1 & c); | |
| 5132 + h2 = (h2 & nc) | (g2 & c); | |
| 5133 + | |
| 5134 + /* pad */ | |
| 5135 + t0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1]; | |
| 5136 + t1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1]; | |
| 5137 + h0 += (t0 & 0xfffffffffff) ; c = (h0 >> 44); h0 &= 0xfffffffffff; t0
= shr128_pair(t1, t0, 44); | |
| 5138 + h1 += (t0 & 0xfffffffffff) + c; c = (h1 >> 44); h1 &= 0xfffffffffff; t1
= (t1 >> 24); | |
| 5139 + h2 += (t1 ) + c; | |
| 5140 + | |
| 5141 + U64TO8_LE(mac + 0, ((h0 ) | (h1 << 44))); | |
| 5142 + U64TO8_LE(mac + 8, ((h1 >> 20) | (h2 << 24))); | |
| 5143 + } | |
| 5144 + | |
| 5145 +#endif /* !OPENSSL_NO_POLY1305 */ | |
| 5146 diff --git a/crypto/poly1305/poly1305test.c b/crypto/poly1305/poly1305test.c | |
| 5147 new file mode 100644 | |
| 5148 index 0000000..8dd26af | |
| 5149 --- /dev/null | |
| 5150 +++ b/crypto/poly1305/poly1305test.c | |
| 5151 @@ -0,0 +1,166 @@ | |
| 5152 +/* ==================================================================== | |
| 5153 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
| 5154 + * | |
| 5155 + * Redistribution and use in source and binary forms, with or without | |
| 5156 + * modification, are permitted provided that the following conditions | |
| 5157 + * are met: | |
| 5158 + * | |
| 5159 + * 1. Redistributions of source code must retain the above copyright | |
| 5160 + * notice, this list of conditions and the following disclaimer. | |
| 5161 + * | |
| 5162 + * 2. Redistributions in binary form must reproduce the above copyright | |
| 5163 + * notice, this list of conditions and the following disclaimer in | |
| 5164 + * the documentation and/or other materials provided with the | |
| 5165 + * distribution. | |
| 5166 + * | |
| 5167 + * 3. All advertising materials mentioning features or use of this | |
| 5168 + * software must display the following acknowledgment: | |
| 5169 + * "This product includes software developed by the OpenSSL Project | |
| 5170 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
| 5171 + * | |
| 5172 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
| 5173 + * endorse or promote products derived from this software without | |
| 5174 + * prior written permission. For written permission, please contact | |
| 5175 + * licensing@OpenSSL.org. | |
| 5176 + * | |
| 5177 + * 5. Products derived from this software may not be called "OpenSSL" | |
| 5178 + * nor may "OpenSSL" appear in their names without prior written | |
| 5179 + * permission of the OpenSSL Project. | |
| 5180 + * | |
| 5181 + * 6. Redistributions of any form whatsoever must retain the following | |
| 5182 + * acknowledgment: | |
| 5183 + * "This product includes software developed by the OpenSSL Project | |
| 5184 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
| 5185 + * | |
| 5186 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
| 5187 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 5188 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| 5189 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
| 5190 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 5191 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
| 5192 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| 5193 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
| 5194 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
| 5195 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| 5196 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
| 5197 + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 5198 + * ==================================================================== | |
| 5199 + */ | |
| 5200 + | |
| 5201 +#include <stdio.h> | |
| 5202 +#include <stdlib.h> | |
| 5203 +#include <string.h> | |
| 5204 + | |
| 5205 +#include <openssl/poly1305.h> | |
| 5206 + | |
| 5207 +struct poly1305_test | |
| 5208 + { | |
| 5209 + const char *inputhex; | |
| 5210 + const char *keyhex; | |
| 5211 + const char *outhex; | |
| 5212 + }; | |
| 5213 + | |
| 5214 +static const struct poly1305_test poly1305_tests[] = { | |
| 5215 + { | |
| 5216 + "", | |
| 5217 + "c8afaac331ee372cd6082de134943b174710130e9f6fea8d72293850a667d86
c", | |
| 5218 + "4710130e9f6fea8d72293850a667d86c", | |
| 5219 + }, | |
| 5220 + { | |
| 5221 + "48656c6c6f20776f726c6421", | |
| 5222 + "746869732069732033322d62797465206b657920666f7220506f6c793133303
5", | |
| 5223 + "a6f745008f81c916a20dcc74eef2b2f0", | |
| 5224 + }, | |
| 5225 + { | |
| 5226 + "000000000000000000000000000000000000000000000000000000000000000
0", | |
| 5227 + "746869732069732033322d62797465206b657920666f7220506f6c793133303
5", | |
| 5228 + "49ec78090e481ec6c26b33b91ccc0307", | |
| 5229 + }, | |
| 5230 + { | |
| 5231 + "000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000", | |
| 5232 + "746869732069732033322d62797465206b657920666f7220506f6c793133303
5", | |
| 5233 + "da84bcab02676c38cdb015604274c2aa", | |
| 5234 + }, | |
| 5235 +}; | |
| 5236 + | |
| 5237 +static unsigned char hex_digit(char h) | |
| 5238 + { | |
| 5239 + if (h >= '0' && h <= '9') | |
| 5240 + return h - '0'; | |
| 5241 + else if (h >= 'a' && h <= 'f') | |
| 5242 + return h - 'a' + 10; | |
| 5243 + else if (h >= 'A' && h <= 'F') | |
| 5244 + return h - 'A' + 10; | |
| 5245 + else | |
| 5246 + abort(); | |
| 5247 + } | |
| 5248 + | |
| 5249 +static void hex_decode(unsigned char *out, const char* hex) | |
| 5250 + { | |
| 5251 + size_t j = 0; | |
| 5252 + | |
| 5253 + while (*hex != 0) | |
| 5254 + { | |
| 5255 + unsigned char v = hex_digit(*hex++); | |
| 5256 + v <<= 4; | |
| 5257 + v |= hex_digit(*hex++); | |
| 5258 + out[j++] = v; | |
| 5259 + } | |
| 5260 + } | |
| 5261 + | |
| 5262 +static void hexdump(unsigned char *a, size_t len) | |
| 5263 + { | |
| 5264 + size_t i; | |
| 5265 + | |
| 5266 + for (i = 0; i < len; i++) | |
| 5267 + printf("%02x", a[i]); | |
| 5268 + } | |
| 5269 + | |
| 5270 +int main() | |
| 5271 + { | |
| 5272 + static const unsigned num_tests = | |
| 5273 + sizeof(poly1305_tests) / sizeof(struct poly1305_test); | |
| 5274 + unsigned i; | |
| 5275 + unsigned char key[32], out[16], expected[16]; | |
| 5276 + poly1305_state poly1305; | |
| 5277 + | |
| 5278 + for (i = 0; i < num_tests; i++) | |
| 5279 + { | |
| 5280 + const struct poly1305_test *test = &poly1305_tests[i]; | |
| 5281 + unsigned char *in; | |
| 5282 + size_t inlen = strlen(test->inputhex); | |
| 5283 + | |
| 5284 + if (strlen(test->keyhex) != sizeof(key)*2 || | |
| 5285 + strlen(test->outhex) != sizeof(out)*2 || | |
| 5286 + (inlen & 1) == 1) | |
| 5287 + return 1; | |
| 5288 + | |
| 5289 + inlen /= 2; | |
| 5290 + | |
| 5291 + hex_decode(key, test->keyhex); | |
| 5292 + hex_decode(expected, test->outhex); | |
| 5293 + | |
| 5294 + in = malloc(inlen); | |
| 5295 + | |
| 5296 + hex_decode(in, test->inputhex); | |
| 5297 + CRYPTO_poly1305_init(&poly1305, key); | |
| 5298 + CRYPTO_poly1305_update(&poly1305, in, inlen); | |
| 5299 + CRYPTO_poly1305_finish(&poly1305, out); | |
| 5300 + | |
| 5301 + if (memcmp(out, expected, sizeof(expected)) != 0) | |
| 5302 + { | |
| 5303 + printf("Poly1305 test #%d failed.\n", i); | |
| 5304 + printf("got: "); | |
| 5305 + hexdump(out, sizeof(out)); | |
| 5306 + printf("\nexpected: "); | |
| 5307 + hexdump(expected, sizeof(expected)); | |
| 5308 + printf("\n"); | |
| 5309 + return 1; | |
| 5310 + } | |
| 5311 + | |
| 5312 + free(in); | |
| 5313 + } | |
| 5314 + | |
| 5315 + printf("PASS\n"); | |
| 5316 + return 0; | |
| 5317 + } | |
| 5318 diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c | |
| 5319 index 75b6560..a042b8d 100644 | |
| 5320 --- a/ssl/s3_lib.c | |
| 5321 +++ b/ssl/s3_lib.c | |
| 5322 @@ -1841,7 +1841,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
| 5323 SSL_AEAD, | |
| 5324 SSL_TLSV1_2, | |
| 5325 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
| 5326 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
| 5327 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
| 5328 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
| 5329 128, | |
| 5330 128, | |
| 5331 }, | |
| 5332 @@ -1873,7 +1874,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
| 5333 SSL_AEAD, | |
| 5334 SSL_TLSV1_2, | |
| 5335 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
| 5336 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
| 5337 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
| 5338 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
| 5339 128, | |
| 5340 128, | |
| 5341 }, | |
| 5342 @@ -1905,7 +1907,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
| 5343 SSL_AEAD, | |
| 5344 SSL_TLSV1_2, | |
| 5345 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
| 5346 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
| 5347 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
| 5348 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
| 5349 128, | |
| 5350 128, | |
| 5351 }, | |
| 5352 @@ -1937,7 +1940,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
| 5353 SSL_AEAD, | |
| 5354 SSL_TLSV1_2, | |
| 5355 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
| 5356 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
| 5357 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
| 5358 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
| 5359 128, | |
| 5360 128, | |
| 5361 }, | |
| 5362 @@ -1969,7 +1973,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
| 5363 SSL_AEAD, | |
| 5364 SSL_TLSV1_2, | |
| 5365 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
| 5366 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
| 5367 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
| 5368 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
| 5369 128, | |
| 5370 128, | |
| 5371 }, | |
| 5372 @@ -2001,7 +2006,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
| 5373 SSL_AEAD, | |
| 5374 SSL_TLSV1_2, | |
| 5375 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
| 5376 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
| 5377 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
| 5378 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
| 5379 128, | |
| 5380 128, | |
| 5381 }, | |
| 5382 @@ -2714,7 +2720,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
| 5383 SSL_AEAD, | |
| 5384 SSL_TLSV1_2, | |
| 5385 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
| 5386 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
| 5387 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
| 5388 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
| 5389 128, | |
| 5390 128, | |
| 5391 }, | |
| 5392 @@ -2746,7 +2753,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
| 5393 SSL_AEAD, | |
| 5394 SSL_TLSV1_2, | |
| 5395 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
| 5396 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
| 5397 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
| 5398 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
| 5399 128, | |
| 5400 128, | |
| 5401 }, | |
| 5402 @@ -2778,7 +2786,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
| 5403 SSL_AEAD, | |
| 5404 SSL_TLSV1_2, | |
| 5405 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
| 5406 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
| 5407 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
| 5408 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
| 5409 128, | |
| 5410 128, | |
| 5411 }, | |
| 5412 @@ -2810,7 +2819,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
| 5413 SSL_AEAD, | |
| 5414 SSL_TLSV1_2, | |
| 5415 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
| 5416 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
| 5417 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
| 5418 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
| 5419 128, | |
| 5420 128, | |
| 5421 }, | |
| 5422 @@ -2894,6 +2904,51 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
| 5423 }, | |
| 5424 #endif | |
| 5425 | |
| 5426 + { | |
| 5427 + 1, | |
| 5428 + TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305, | |
| 5429 + TLS1_CK_ECDHE_RSA_CHACHA20_POLY1305, | |
| 5430 + SSL_kEECDH, | |
| 5431 + SSL_aRSA, | |
| 5432 + SSL_CHACHA20POLY1305, | |
| 5433 + SSL_AEAD, | |
| 5434 + SSL_TLSV1_2, | |
| 5435 + SSL_NOT_EXP|SSL_HIGH, | |
| 5436 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(0), | |
| 5437 + 256, | |
| 5438 + 0, | |
| 5439 + }, | |
| 5440 + | |
| 5441 + { | |
| 5442 + 1, | |
| 5443 + TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, | |
| 5444 + TLS1_CK_ECDHE_ECDSA_CHACHA20_POLY1305, | |
| 5445 + SSL_kEECDH, | |
| 5446 + SSL_aECDSA, | |
| 5447 + SSL_CHACHA20POLY1305, | |
| 5448 + SSL_AEAD, | |
| 5449 + SSL_TLSV1_2, | |
| 5450 + SSL_NOT_EXP|SSL_HIGH, | |
| 5451 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(0), | |
| 5452 + 256, | |
| 5453 + 0, | |
| 5454 + }, | |
| 5455 + | |
| 5456 + { | |
| 5457 + 1, | |
| 5458 + TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305, | |
| 5459 + TLS1_CK_DHE_RSA_CHACHA20_POLY1305, | |
| 5460 + SSL_kEDH, | |
| 5461 + SSL_aRSA, | |
| 5462 + SSL_CHACHA20POLY1305, | |
| 5463 + SSL_AEAD, | |
| 5464 + SSL_TLSV1_2, | |
| 5465 + SSL_NOT_EXP|SSL_HIGH, | |
| 5466 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(0), | |
| 5467 + 256, | |
| 5468 + 0, | |
| 5469 + }, | |
| 5470 + | |
| 5471 /* end of list */ | |
| 5472 }; | |
| 5473 | |
| 5474 diff --git a/ssl/s3_pkt.c b/ssl/s3_pkt.c | |
| 5475 index 5038f6c..04b474d 100644 | |
| 5476 --- a/ssl/s3_pkt.c | |
| 5477 +++ b/ssl/s3_pkt.c | |
| 5478 @@ -790,8 +790,11 @@ static int do_ssl3_write(SSL *s, int type, const unsigned c
har *buf, | |
| 5479 else | |
| 5480 eivlen = 0; | |
| 5481 } | |
| 5482 - else if (s->aead_write_ctx != NULL) | |
| 5483 + else if (s->aead_write_ctx != NULL && | |
| 5484 + s->aead_write_ctx->variable_nonce_included_in_record) | |
| 5485 + { | |
| 5486 eivlen = s->aead_write_ctx->variable_nonce_len; | |
| 5487 + } | |
| 5488 else | |
| 5489 eivlen = 0; | |
| 5490 | |
| 5491 diff --git a/ssl/ssl.h b/ssl/ssl.h | |
| 5492 index 0644cbf..d782a98 100644 | |
| 5493 --- a/ssl/ssl.h | |
| 5494 +++ b/ssl/ssl.h | |
| 5495 @@ -291,6 +291,7 @@ extern "C" { | |
| 5496 #define SSL_TXT_CAMELLIA128 "CAMELLIA128" | |
| 5497 #define SSL_TXT_CAMELLIA256 "CAMELLIA256" | |
| 5498 #define SSL_TXT_CAMELLIA "CAMELLIA" | |
| 5499 +#define SSL_TXT_CHACHA20 "CHACHA20" | |
| 5500 | |
| 5501 #define SSL_TXT_MD5 "MD5" | |
| 5502 #define SSL_TXT_SHA1 "SHA1" | |
| 5503 diff --git a/ssl/ssl_ciph.c b/ssl/ssl_ciph.c | |
| 5504 index 7e780cd..b6370bd 100644 | |
| 5505 --- a/ssl/ssl_ciph.c | |
| 5506 +++ b/ssl/ssl_ciph.c | |
| 5507 @@ -298,6 +298,7 @@ static const SSL_CIPHER cipher_aliases[]={ | |
| 5508 {0,SSL_TXT_CAMELLIA128,0,0,0,SSL_CAMELLIA128,0,0,0,0,0,0}, | |
| 5509 {0,SSL_TXT_CAMELLIA256,0,0,0,SSL_CAMELLIA256,0,0,0,0,0,0}, | |
| 5510 {0,SSL_TXT_CAMELLIA ,0,0,0,SSL_CAMELLIA128|SSL_CAMELLIA256,0,0,0,0,0,0
}, | |
| 5511 + {0,SSL_TXT_CHACHA20 ,0,0,0,SSL_CHACHA20POLY1305,0,0,0,0,0,0}, | |
| 5512 | |
| 5513 /* MAC aliases */ | |
| 5514 {0,SSL_TXT_MD5,0, 0,0,0,SSL_MD5, 0,0,0,0,0}, | |
| 5515 @@ -523,9 +524,15 @@ int ssl_cipher_get_evp_aead(const SSL_SESSION *s, const EVP
_AEAD **aead) | |
| 5516 return 0; | |
| 5517 | |
| 5518 #ifndef OPENSSL_NO_AES | |
| 5519 - /* There is only one AEAD for now. */ | |
| 5520 - *aead = EVP_aead_aes_128_gcm(); | |
| 5521 - return 1; | |
| 5522 + switch (c->algorithm_enc) | |
| 5523 + { | |
| 5524 + case SSL_AES128GCM: | |
| 5525 + *aead = EVP_aead_aes_128_gcm(); | |
| 5526 + return 1; | |
| 5527 + case SSL_CHACHA20POLY1305: | |
| 5528 + *aead = EVP_aead_chacha20_poly1305(); | |
| 5529 + return 1; | |
| 5530 + } | |
| 5531 #endif | |
| 5532 | |
| 5533 return 0; | |
| 5534 @@ -1715,6 +1722,9 @@ char *SSL_CIPHER_description(const SSL_CIPHER *cipher, cha
r *buf, int len) | |
| 5535 case SSL_SEED: | |
| 5536 enc="SEED(128)"; | |
| 5537 break; | |
| 5538 + case SSL_CHACHA20POLY1305: | |
| 5539 + enc="ChaCha20-Poly1305"; | |
| 5540 + break; | |
| 5541 default: | |
| 5542 enc="unknown"; | |
| 5543 break; | |
| 5544 diff --git a/ssl/ssl_locl.h b/ssl/ssl_locl.h | |
| 5545 index 63bc28b..b83d8cd 100644 | |
| 5546 --- a/ssl/ssl_locl.h | |
| 5547 +++ b/ssl/ssl_locl.h | |
| 5548 @@ -328,6 +328,7 @@ | |
| 5549 #define SSL_SEED 0x00000800L | |
| 5550 #define SSL_AES128GCM 0x00001000L | |
| 5551 #define SSL_AES256GCM 0x00002000L | |
| 5552 +#define SSL_CHACHA20POLY1305 0x00004000L | |
| 5553 | |
| 5554 #define SSL_AES (SSL_AES128|SSL_AES256|SSL_AES128GCM|SSL
_AES256GCM) | |
| 5555 #define SSL_CAMELLIA (SSL_CAMELLIA128|SSL_CAMELLIA256) | |
| 5556 @@ -389,6 +390,12 @@ | |
| 5557 #define SSL_CIPHER_AEAD_FIXED_NONCE_LEN(ssl_cipher) \ | |
| 5558 (((ssl_cipher->algorithm2 >> 24) & 0xf)*2) | |
| 5559 | |
| 5560 +/* SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD is a flag in | |
| 5561 + * SSL_CIPHER.algorithm2 which indicates that the variable part of the nonce is | |
| 5562 + * included as a prefix of the record. (AES-GCM, for example, does with with an | |
| 5563 + * 8-byte variable nonce.) */ | |
| 5564 +#define SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD (1<<22) | |
| 5565 + | |
| 5566 /* | |
| 5567 * Export and cipher strength information. For each cipher we have to decide | |
| 5568 * whether it is exportable or not. This information is likely to change | |
| 5569 @@ -605,6 +612,9 @@ struct ssl_aead_ctx_st | |
| 5570 * records. */ | |
| 5571 unsigned char fixed_nonce[8]; | |
| 5572 unsigned char fixed_nonce_len, variable_nonce_len, tag_len; | |
| 5573 + /* variable_nonce_included_in_record is non-zero if the variable nonce | |
| 5574 + * for a record is included as a prefix before the ciphertext. */ | |
| 5575 + char variable_nonce_included_in_record; | |
| 5576 }; | |
| 5577 | |
| 5578 #ifndef OPENSSL_NO_COMP | |
| 5579 diff --git a/ssl/t1_enc.c b/ssl/t1_enc.c | |
| 5580 index 7af1a32..15800af 100644 | |
| 5581 --- a/ssl/t1_enc.c | |
| 5582 +++ b/ssl/t1_enc.c | |
| 5583 @@ -366,6 +366,8 @@ static int tls1_change_cipher_state_aead(SSL *s, char is_rea
d, | |
| 5584 memcpy(aead_ctx->fixed_nonce, iv, iv_len); | |
| 5585 aead_ctx->fixed_nonce_len = iv_len; | |
| 5586 aead_ctx->variable_nonce_len = 8; /* always the case, currently. */ | |
| 5587 + aead_ctx->variable_nonce_included_in_record = | |
| 5588 + (s->s3->tmp.new_cipher->algorithm2 & SSL_CIPHER_ALGORITHM2_VARIA
BLE_NONCE_INCLUDED_IN_RECORD) != 0; | |
| 5589 if (aead_ctx->variable_nonce_len + aead_ctx->fixed_nonce_len != EVP_AEAD
_nonce_length(aead)) | |
| 5590 { | |
| 5591 SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE_AEAD, ERR_R_INTERNAL_ERROR
); | |
| 5592 @@ -863,6 +865,7 @@ int tls1_enc(SSL *s, int send) | |
| 5593 if (send) | |
| 5594 { | |
| 5595 size_t len = rec->length; | |
| 5596 + size_t eivlen = 0; | |
| 5597 in = rec->input; | |
| 5598 out = rec->data; | |
| 5599 | |
| 5600 @@ -878,18 +881,22 @@ int tls1_enc(SSL *s, int send) | |
| 5601 * variable nonce. Thus we can copy the sequence number | |
| 5602 * bytes into place without overwriting any of the | |
| 5603 * plaintext. */ | |
| 5604 - memcpy(out, ad, aead->variable_nonce_len); | |
| 5605 - len -= aead->variable_nonce_len; | |
| 5606 + if (aead->variable_nonce_included_in_record) | |
| 5607 + { | |
| 5608 + memcpy(out, ad, aead->variable_nonce_len); | |
| 5609 + len -= aead->variable_nonce_len; | |
| 5610 + eivlen = aead->variable_nonce_len; | |
| 5611 + } | |
| 5612 | |
| 5613 ad[11] = len >> 8; | |
| 5614 ad[12] = len & 0xff; | |
| 5615 | |
| 5616 n = EVP_AEAD_CTX_seal(&aead->ctx, | |
| 5617 - out + aead->variable_nonce_len, le
n + aead->tag_len, | |
| 5618 + out + eivlen, len + aead->tag_len, | |
| 5619 nonce, nonce_used, | |
| 5620 - in + aead->variable_nonce_len, len
, | |
| 5621 + in + eivlen, len, | |
| 5622 ad, sizeof(ad)); | |
| 5623 - if (n >= 0) | |
| 5624 + if (n >= 0 && aead->variable_nonce_included_in_record) | |
| 5625 n += aead->variable_nonce_len; | |
| 5626 } | |
| 5627 else | |
| 5628 @@ -903,12 +910,17 @@ int tls1_enc(SSL *s, int send) | |
| 5629 | |
| 5630 if (len < aead->variable_nonce_len) | |
| 5631 return 0; | |
| 5632 - memcpy(nonce + nonce_used, in, aead->variable_nonce_len)
; | |
| 5633 + memcpy(nonce + nonce_used, | |
| 5634 + aead->variable_nonce_included_in_record ? in : ad
, | |
| 5635 + aead->variable_nonce_len); | |
| 5636 nonce_used += aead->variable_nonce_len; | |
| 5637 | |
| 5638 - in += aead->variable_nonce_len; | |
| 5639 - len -= aead->variable_nonce_len; | |
| 5640 - out += aead->variable_nonce_len; | |
| 5641 + if (aead->variable_nonce_included_in_record) | |
| 5642 + { | |
| 5643 + in += aead->variable_nonce_len; | |
| 5644 + len -= aead->variable_nonce_len; | |
| 5645 + out += aead->variable_nonce_len; | |
| 5646 + } | |
| 5647 | |
| 5648 if (len < aead->tag_len) | |
| 5649 return 0; | |
| 5650 diff --git a/ssl/tls1.h b/ssl/tls1.h | |
| 5651 index 8cac7df..3cbcb83 100644 | |
| 5652 --- a/ssl/tls1.h | |
| 5653 +++ b/ssl/tls1.h | |
| 5654 @@ -526,6 +526,10 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB
,(void (*)(void))cb) | |
| 5655 #define TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256 0x0300C031 | |
| 5656 #define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384 0x0300C032 | |
| 5657 | |
| 5658 +#define TLS1_CK_ECDHE_RSA_CHACHA20_POLY1305 0x0300CC13 | |
| 5659 +#define TLS1_CK_ECDHE_ECDSA_CHACHA20_POLY1305 0x0300CC14 | |
| 5660 +#define TLS1_CK_DHE_RSA_CHACHA20_POLY1305 0x0300CC15 | |
| 5661 + | |
| 5662 /* XXX | |
| 5663 * Inconsistency alert: | |
| 5664 * The OpenSSL names of ciphers with ephemeral DH here include the string | |
| 5665 @@ -677,6 +681,10 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB
,(void (*)(void))cb) | |
| 5666 #define TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256 "ECDH-RSA-AES128-GCM-SH
A256" | |
| 5667 #define TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384 "ECDH-RSA-AES256-GCM-SH
A384" | |
| 5668 | |
| 5669 +#define TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305 "ECDHE-RSA-CHACHA20-POLY
1305" | |
| 5670 +#define TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 "ECDHE-ECDSA-CHACHA20-PO
LY1305" | |
| 5671 +#define TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305 "DHE-RSA-CHACHA2
0-POLY1305" | |
| 5672 + | |
| 5673 #define TLS_CT_RSA_SIGN 1 | |
| 5674 #define TLS_CT_DSS_SIGN 2 | |
| 5675 #define TLS_CT_RSA_FIXED_DH 3 | |
| 5676 diff --git a/test/Makefile b/test/Makefile | |
| 5677 index 4c9eabc..4790aa8 100644 | |
| 5678 --- a/test/Makefile | |
| 5679 +++ b/test/Makefile | |
| 5680 @@ -86,7 +86,9 @@ OBJ= $(BNTEST).o $(ECTEST).o $(ECDSATEST).o $(ECDHTEST).o $(
IDEATEST).o \ | |
| 5681 $(MDC2TEST).o $(RMDTEST).o \ | |
| 5682 $(RANDTEST).o $(DHTEST).o $(ENGINETEST).o $(CASTTEST).o \ | |
| 5683 $(BFTEST).o $(SSLTEST).o $(DSATEST).o $(EXPTEST).o $(RSATEST).o \ | |
| 5684 - $(EVPTEST).o $(IGETEST).o $(JPAKETEST).o $(ASN1TEST).o | |
| 5685 + $(EVPTEST).o $(IGETEST).o $(JPAKETEST).o $(ASN1TEST).o $(CHACHATEST).o \ | |
| 5686 + $(POLY1305TEST).o | |
| 5687 + | |
| 5688 SRC= $(BNTEST).c $(ECTEST).c $(ECDSATEST).c $(ECDHTEST).c $(IDEATEST).c \ | |
| 5689 $(MD2TEST).c $(MD4TEST).c $(MD5TEST).c \ | |
| 5690 $(HMACTEST).c $(WPTEST).c \ | |
| 5691 @@ -94,7 +96,8 @@ SRC= $(BNTEST).c $(ECTEST).c $(ECDSATEST).c $(ECDHTEST).c $(
IDEATEST).c \ | |
| 5692 $(DESTEST).c $(SHATEST).c $(SHA1TEST).c $(MDC2TEST).c $(RMDTEST).c \ | |
| 5693 $(RANDTEST).c $(DHTEST).c $(ENGINETEST).c $(CASTTEST).c \ | |
| 5694 $(BFTEST).c $(SSLTEST).c $(DSATEST).c $(EXPTEST).c $(RSATEST).c \ | |
| 5695 - $(EVPTEST).c $(IGETEST).c $(JPAKETEST).c $(SRPTEST).c $(ASN1TEST).c | |
| 5696 + $(EVPTEST).c $(IGETEST).c $(JPAKETEST).c $(SRPTEST).c $(ASN1TEST).c \ | |
| 5697 + $(CHACHATEST).c $(POLY1305TEST).c | |
| 5698 | |
| 5699 EXHEADER= | |
| 5700 HEADER= $(EXHEADER) | |
| 5701 @@ -137,7 +140,7 @@ alltests: \ | |
| 5702 test_enc test_x509 test_rsa test_crl test_sid \ | |
| 5703 test_gen test_req test_pkcs7 test_verify test_dh test_dsa \ | |
| 5704 test_ss test_ca test_engine test_evp test_ssl test_tsa test_ige \ | |
| 5705 - test_jpake test_srp test_cms | |
| 5706 + test_jpake test_srp test_cms test_chacha test_poly1305 | |
| 5707 | |
| 5708 test_evp: | |
| 5709 ../util/shlib_wrap.sh ./$(EVPTEST) evptests.txt | |
| 5710 @@ -318,6 +321,14 @@ test_srp: $(SRPTEST)$(EXE_EXT) | |
| 5711 @echo "Test SRP" | |
| 5712 ../util/shlib_wrap.sh ./srptest | |
| 5713 | |
| 5714 +test_chacha: $(CHACHATEST)$(EXE_EXT) | |
| 5715 + @echo "Test ChaCha" | |
| 5716 + ../util/shlib_wrap.sh ./$(CHACHATEST) | |
| 5717 + | |
| 5718 +test_poly1305: $(POLY1305TEST)$(EXE_EXT) | |
| 5719 + @echo "Test Poly1305" | |
| 5720 + ../util/shlib_wrap.sh ./$(POLY1305TEST) | |
| 5721 + | |
| 5722 lint: | |
| 5723 lint -DLINT $(INCLUDES) $(SRC)>fluff | |
| 5724 | |
| 5725 @@ -394,6 +405,12 @@ $(SHA256TEST)$(EXE_EXT): $(SHA256TEST).o $(DLIBCRYPTO) | |
| 5726 $(SHA512TEST)$(EXE_EXT): $(SHA512TEST).o $(DLIBCRYPTO) | |
| 5727 @target=$(SHA512TEST); $(BUILD_CMD) | |
| 5728 | |
| 5729 +$(CHACHATEST)$(EXE_EXT): $(CHACHATEST).o $(DLIBCRYPTO) | |
| 5730 + @target=$(CHACHATEST); $(BUILD_CMD) | |
| 5731 + | |
| 5732 +$(POLY1305TEST)$(EXE_EXT): $(POLY1305TEST).o $(DLIBCRYPTO) | |
| 5733 + @target=$(CHACHATEST); $(BUILD_CMD) | |
| 5734 + | |
| 5735 $(RMDTEST)$(EXE_EXT): $(RMDTEST).o $(DLIBCRYPTO) | |
| 5736 @target=$(RMDTEST); $(BUILD_CMD) | |
| 5737 | |
| 5738 -- | |
| 5739 1.8.4.1 | |
| 5740 | |
| OLD | NEW |