Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(562)

Side by Side Diff: openssl/patches/chacha20poly1305.patch

Issue 2072073002: Delete bundled copy of OpenSSL and replace with README. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/openssl@master
Patch Set: Delete bundled copy of OpenSSL and replace with README. Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « openssl/patches/aead_support.patch ('k') | openssl/patches/channelid.patch » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 From 2688f00904e4ffd647afcff69bb8fe6df8c5902b Mon Sep 17 00:00:00 2001
2 From: Adam Langley <agl@chromium.org>
3 Date: Mon, 9 Sep 2013 12:13:24 -0400
4 Subject: [PATCH 43/52] chacha20poly1305
5
6 Add support for Chacha20 + Poly1305.
7 ---
8 .gitignore | 1 +
9 Configure | 56 +-
10 Makefile.org | 6 +-
11 apps/speed.c | 64 +-
12 crypto/chacha/Makefile | 80 ++
13 crypto/chacha/chacha.h | 85 ++
14 crypto/chacha/chacha_enc.c | 167 +++
15 crypto/chacha/chacha_vec.c | 345 +++++++
16 crypto/chacha/chachatest.c | 211 ++++
17 crypto/evp/Makefile | 35 +-
18 crypto/evp/e_chacha20poly1305.c | 261 +++++
19 crypto/evp/evp.h | 8 +
20 crypto/evp/evp_err.c | 3 +
21 crypto/poly1305/Makefile | 81 ++
22 crypto/poly1305/poly1305.c | 320 ++++++
23 crypto/poly1305/poly1305.h | 88 ++
24 crypto/poly1305/poly1305_arm.c | 335 ++++++
25 crypto/poly1305/poly1305_arm_asm.s | 2009 ++++++++++++++++++++++++++++++++++++
26 crypto/poly1305/poly1305_vec.c | 733 +++++++++++++
27 crypto/poly1305/poly1305test.c | 166 +++
28 ssl/s3_lib.c | 75 +-
29 ssl/s3_pkt.c | 5 +-
30 ssl/ssl.h | 1 +
31 ssl/ssl_ciph.c | 16 +-
32 ssl/ssl_locl.h | 10 +
33 ssl/t1_enc.c | 30 +-
34 ssl/tls1.h | 8 +
35 test/Makefile | 23 +-
36 28 files changed, 5166 insertions(+), 56 deletions(-)
37 create mode 100644 crypto/chacha/Makefile
38 create mode 100644 crypto/chacha/chacha.h
39 create mode 100644 crypto/chacha/chacha_enc.c
40 create mode 100644 crypto/chacha/chacha_vec.c
41 create mode 100644 crypto/chacha/chachatest.c
42 create mode 100644 crypto/evp/e_chacha20poly1305.c
43 create mode 100644 crypto/poly1305/Makefile
44 create mode 100644 crypto/poly1305/poly1305.c
45 create mode 100644 crypto/poly1305/poly1305.h
46 create mode 100644 crypto/poly1305/poly1305_arm.c
47 create mode 100644 crypto/poly1305/poly1305_arm_asm.s
48 create mode 100644 crypto/poly1305/poly1305_vec.c
49 create mode 100644 crypto/poly1305/poly1305test.c
50
51 diff --git a/openssl/ssl/ssl_ciph.c b/openssl/ssl/ssl_ciph.c
52 index db85b29..cebb18a 100644
53 --- a/ssl/ssl_ciph.c
54 +++ b/ssl/ssl_ciph.c
55 @@ -1442,7 +1442,9 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_list(const SSL_MET HOD *ssl_method,
56 ssl_cipher_apply_rule(0, SSL_kEECDH, 0, 0, 0, 0, 0, CIPHER_ADD, -1, &hea d, &tail);
57 ssl_cipher_apply_rule(0, SSL_kEECDH, 0, 0, 0, 0, 0, CIPHER_DEL, -1, &hea d, &tail);
58
59 - /* AES is our preferred symmetric cipher */
60 + /* CHACHA20 is fast and safe on all hardware and is thus our preferred
61 + * symmetric cipher, with AES second. */
62 + ssl_cipher_apply_rule(0, 0, 0, SSL_CHACHA20POLY1305, 0, 0, 0, CIPHER_ADD , -1, &head, &tail);
63 ssl_cipher_apply_rule(0, 0, 0, SSL_AES, 0, 0, 0, CIPHER_ADD, -1, &head, &tail);
64
65 /* Temporarily enable everything else for sorting */
66 diff --git a/Configure b/Configure
67 index 9c803dc..1b95384 100755
68 --- a/Configure
69 +++ b/Configure
70 @@ -124,24 +124,24 @@ my $tlib="-lnsl -lsocket";
71 my $bits1="THIRTY_TWO_BIT ";
72 my $bits2="SIXTY_FOUR_BIT ";
73
74 -my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o crypt 586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586 .o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cml l-x86.o:ghash-x86.o:";
75 +my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o crypt 586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586 .o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cml l-x86.o:ghash-x86.o:::";
76
77 my $x86_elf_asm="$x86_asm:elf";
78
79 -my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64- gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_ 64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_ 64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghas h-x86_64.o:";
80 -my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64. o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::: :ghash-ia64.o::void";
81 -my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a -mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-spa rcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void";
82 -my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void";
83 -my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash- alpha.o::void";
84 -my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::: :::::";
85 -my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha2 56-mips.o sha512-mips.o::::::::";
86 -my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::ae s-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4- s390x.o:::::ghash-s390x.o:";
87 -my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cb c.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-a rmv4.o::void";
88 -my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-p arisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash- parisc.o::32";
89 -my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o ae s-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::gha sh-parisc.o::64";
90 -my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::";
91 -my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::";
92 -my $no_asm=":::::::::::::::void";
93 +my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64- gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_ 64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_ 64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghas h-x86_64.o::chacha_vec.o:poly1305_vec.o";
94 +my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64. o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::: :ghash-ia64.o::::void";
95 +my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a -mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-spa rcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::::void";
96 +my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::::void";
97 +my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash- alpha.o::::void";
98 +my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::: :::::::";
99 +my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha2 56-mips.o sha512-mips.o::::::::::";
100 +my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::ae s-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4- s390x.o:::::::ghash-s390x.o:";
101 +my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cb c.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-a rmv4.o::chacha_vec.o:poly1305_arm.o poly1305_arm_asm.o:void";
102 +my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-p arisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash- parisc.o::::32";
103 +my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o ae s-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::gha sh-parisc.o::::64";
104 +my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::::";
105 +my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::::";
106 +my $no_asm=":::::::::::::::::void";
107
108 # As for $BSDthreads. Idea is to maintain "collective" set of flags,
109 # which would cover all BSD flavors. -pthread applies to them all,
110 @@ -152,7 +152,7 @@ my $no_asm=":::::::::::::::void";
111 # seems to be sufficient?
112 my $BSDthreads="-pthread -D_THREAD_SAFE -D_REENTRANT";
113
114 -#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lflags : $b n_ops : $cpuid_obj : $bn_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj : $sha1_ obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_obj : $mod es_obj : $engines_obj : $dso_scheme : $shared_target : $shared_cflag : $shared_l dflag : $shared_extension : $ranlib : $arflags : $multilib
115 +#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lflags : $b n_ops : $cpuid_obj : $bn_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj : $sha1_ obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_obj : $mod es_obj : $engines_obj : $chacha_obj : $poly1305_obj : $dso_scheme : $shared_targ et : $shared_cflag : $shared_ldflag : $shared_extension : $ranlib : $arflags : $ multilib :
116
117 my %table=(
118 # File 'TABLE' (created by 'make TABLE') contains the data from this list,
119 @@ -647,6 +647,8 @@ my $idx_wp_obj = $idx++;
120 my $idx_cmll_obj = $idx++;
121 my $idx_modes_obj = $idx++;
122 my $idx_engines_obj = $idx++;
123 +my $idx_chacha_obj = $idx++;
124 +my $idx_poly1305_obj = $idx++;
125 my $idx_perlasm_scheme = $idx++;
126 my $idx_dso_scheme = $idx++;
127 my $idx_shared_target = $idx++;
128 @@ -692,6 +694,8 @@ my $aes_enc="aes_core.o aes_cbc.o";
129 my $bf_enc ="bf_enc.o";
130 my $cast_enc="c_enc.o";
131 my $rc4_enc="rc4_enc.o rc4_skey.o";
132 +my $chacha_enc="chacha_enc.o";
133 +my $poly1305 ="poly1305.o";
134 my $rc5_enc="rc5_enc.o";
135 my $md5_obj="";
136 my $sha1_obj="";
137 @@ -1144,7 +1148,7 @@ $openssldir=$prefix . "/" . $openssldir if $openssldir !~ /(^\/|^[a-zA-Z]:[\\\/]
138
139 print "IsMK1MF=$IsMK1MF\n";
140
141 -my @fields = split(/\s*:\s*/,$table{$target} . ":" x 30 , -1);
142 +my @fields = split(/\s*:\s*/,$table{$target} . ":" x 31 , -1);
143 my $cc = $fields[$idx_cc];
144 # Allow environment CC to override compiler...
145 if($ENV{CC}) {
146 @@ -1181,6 +1185,8 @@ my $ranlib = $ENV{'RANLIB'} || $fields[$idx_ranlib];
147 my $ar = $ENV{'AR'} || "ar";
148 my $arflags = $fields[$idx_arflags];
149 my $multilib = $fields[$idx_multilib];
150 +my $chacha_obj = $fields[$idx_chacha_obj];
151 +my $poly1305_obj = $fields[$idx_poly1305_obj];
152
153 # if $prefix/lib$multilib is not an existing directory, then
154 # assume that it's not searched by linker automatically, in
155 @@ -1477,6 +1483,8 @@ $des_obj=$des_enc unless ($des_obj =~ /\.o$/);
156 $bf_obj=$bf_enc unless ($bf_obj =~ /\.o$/);
157 $cast_obj=$cast_enc unless ($cast_obj =~ /\.o$/);
158 $rc4_obj=$rc4_enc unless ($rc4_obj =~ /\.o$/);
159 +$chacha_obj=$chacha_enc unless ($chacha_obj =~ /\.o$/);
160 +$poly1305_obj=$poly1305 unless ($poly1305_obj =~ /\.o$/);
161 $rc5_obj=$rc5_enc unless ($rc5_obj =~ /\.o$/);
162 if ($sha1_obj =~ /\.o$/)
163 {
164 @@ -1637,6 +1645,8 @@ while (<IN>)
165 s/^BF_ENC=.*$/BF_ENC= $bf_obj/;
166 s/^CAST_ENC=.*$/CAST_ENC= $cast_obj/;
167 s/^RC4_ENC=.*$/RC4_ENC= $rc4_obj/;
168 + s/^CHACHA_ENC=.*$/CHACHA_ENC= $chacha_obj/;
169 + s/^POLY1305=.*$/POLY1305= $poly1305_obj/;
170 s/^RC5_ENC=.*$/RC5_ENC= $rc5_obj/;
171 s/^MD5_ASM_OBJ=.*$/MD5_ASM_OBJ= $md5_obj/;
172 s/^SHA1_ASM_OBJ=.*$/SHA1_ASM_OBJ= $sha1_obj/;
173 @@ -1698,6 +1708,8 @@ print "AES_ENC =$aes_obj\n";
174 print "BF_ENC =$bf_obj\n";
175 print "CAST_ENC =$cast_obj\n";
176 print "RC4_ENC =$rc4_obj\n";
177 +print "CHACHA_ENC =$chacha_obj\n";
178 +print "POLY1305 =$poly1305_obj\n";
179 print "RC5_ENC =$rc5_obj\n";
180 print "MD5_OBJ_ASM =$md5_obj\n";
181 print "SHA1_OBJ_ASM =$sha1_obj\n";
182 @@ -2096,11 +2108,11 @@ sub print_table_entry
183
184 (my $cc,my $cflags,my $unistd,my $thread_cflag,my $sys_id,my $lflags,
185 my $bn_ops,my $cpuid_obj,my $bn_obj,my $des_obj,my $aes_obj, my $bf_obj,
186 - my $md5_obj,my $sha1_obj,my $cast_obj,my $rc4_obj,my $rmd160_obj,
187 - my $rc5_obj,my $wp_obj,my $cmll_obj,my $modes_obj, my $engines_obj,
188 + my $md5_obj,my $sha1_obj,my $cast_obj,my $rc4_obj,my $chacha_obj,my $pol y1305_obj,
189 + my $rmd160_obj, my $rc5_obj,my $wp_obj,my $cmll_obj,my $modes_obj, my $e ngines_obj,
190 my $perlasm_scheme,my $dso_scheme,my $shared_target,my $shared_cflag,
191 my $shared_ldflag,my $shared_extension,my $ranlib,my $arflags,my $multil ib)=
192 - split(/\s*:\s*/,$table{$target} . ":" x 30 , -1);
193 + split(/\s*:\s*/,$table{$target} . ":" x 31 , -1);
194
195 print <<EOF
196
197 @@ -2121,6 +2133,8 @@ sub print_table_entry
198 \$sha1_obj = $sha1_obj
199 \$cast_obj = $cast_obj
200 \$rc4_obj = $rc4_obj
201 +\$chacha_obj = $chacha_obj
202 +\$poly1305_obj = $poly1305_obj
203 \$rmd160_obj = $rmd160_obj
204 \$rc5_obj = $rc5_obj
205 \$wp_obj = $wp_obj
206 @@ -2150,7 +2164,7 @@ sub test_sanity
207
208 foreach $target (sort keys %table)
209 {
210 - @fields = split(/\s*:\s*/,$table{$target} . ":" x 30 , -1);
211 + @fields = split(/\s*:\s*/,$table{$target} . ":" x 31 , -1);
212
213 if ($fields[$idx_dso_scheme-1] =~ /^(beos|dl|dlfcn|win32|vms)$/)
214 {
215 diff --git a/Makefile.org b/Makefile.org
216 index 2db31ea..919466d 100644
217 --- a/Makefile.org
218 +++ b/Makefile.org
219 @@ -94,6 +94,8 @@ BF_ENC= bf_enc.o
220 CAST_ENC= c_enc.o
221 RC4_ENC= rc4_enc.o
222 RC5_ENC= rc5_enc.o
223 +CHACHA_ENC= chacha_enc.o
224 +POLY1305= poly1305.o
225 MD5_ASM_OBJ=
226 SHA1_ASM_OBJ=
227 RMD160_ASM_OBJ=
228 @@ -147,7 +149,7 @@ SDIRS= \
229 bn ec rsa dsa ecdsa dh ecdh dso engine \
230 buffer bio stack lhash rand err \
231 evp asn1 pem x509 x509v3 conf txt_db pkcs7 pkcs12 comp ocsp ui krb5 \
232 - cms pqueue ts jpake srp store cmac
233 + cms pqueue ts jpake srp store cmac poly1305 chacha
234 # keep in mind that the above list is adjusted by ./Configure
235 # according to no-xxx arguments...
236
237 @@ -232,6 +234,8 @@ BUILDENV= PLATFORM='$(PLATFORM)' PROCESSOR='$(PROCESSOR)' \
238 WP_ASM_OBJ='$(WP_ASM_OBJ)' \
239 MODES_ASM_OBJ='$(MODES_ASM_OBJ)' \
240 ENGINES_ASM_OBJ='$(ENGINES_ASM_OBJ)' \
241 + CHACHA_ENC='$(CHACHA_ENC)' \
242 + POLY1305='$(POLY1305)' \
243 PERLASM_SCHEME='$(PERLASM_SCHEME)' \
244 FIPSLIBDIR='${FIPSLIBDIR}' \
245 FIPSDIR='${FIPSDIR}' \
246 diff --git a/crypto/chacha/Makefile b/crypto/chacha/Makefile
247 new file mode 100644
248 index 0000000..289933b
249 --- /dev/null
250 +++ b/crypto/chacha/Makefile
251 @@ -0,0 +1,80 @@
252 +#
253 +# OpenSSL/crypto/chacha/Makefile
254 +#
255 +
256 +DIR= chacha
257 +TOP= ../..
258 +CC= cc
259 +CPP= $(CC) -E
260 +INCLUDES=
261 +CFLAG=-g
262 +AR= ar r
263 +
264 +CFLAGS= $(INCLUDES) $(CFLAG)
265 +ASFLAGS= $(INCLUDES) $(ASFLAG)
266 +AFLAGS= $(ASFLAGS)
267 +
268 +CHACHA_ENC=chacha_enc.o
269 +
270 +GENERAL=Makefile
271 +TEST=chachatest.o
272 +APPS=
273 +
274 +LIB=$(TOP)/libcrypto.a
275 +LIBSRC=
276 +LIBOBJ=$(CHACHA_ENC)
277 +
278 +SRC= $(LIBSRC)
279 +
280 +EXHEADER=chacha.h
281 +HEADER= $(EXHEADER)
282 +
283 +ALL= $(GENERAL) $(SRC) $(HEADER)
284 +
285 +top:
286 + (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all)
287 +
288 +all: lib
289 +
290 +lib: $(LIBOBJ)
291 + $(AR) $(LIB) $(LIBOBJ)
292 + $(RANLIB) $(LIB) || echo Never mind.
293 + @touch lib
294 +
295 +files:
296 + $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
297 +
298 +links:
299 + @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER)
300 + @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST)
301 + @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS)
302 +
303 +install:
304 + @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile...
305 + @headerlist="$(EXHEADER)"; for i in $$headerlist ; \
306 + do \
307 + (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \
308 + chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \
309 + done;
310 +
311 +tags:
312 + ctags $(SRC)
313 +
314 +tests:
315 +
316 +lint:
317 + lint -DLINT $(INCLUDES) $(SRC)>fluff
318 +
319 +depend:
320 + @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile...
321 + $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC)
322 +
323 +dclean:
324 + $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKE FILE) >Makefile.new
325 + mv -f Makefile.new $(MAKEFILE)
326 +
327 +clean:
328 + rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff
329 +
330 +# DO NOT DELETE THIS LINE -- make depend depends on it.
331 +
332 diff --git a/crypto/chacha/chacha.h b/crypto/chacha/chacha.h
333 new file mode 100644
334 index 0000000..d56519d
335 --- /dev/null
336 +++ b/crypto/chacha/chacha.h
337 @@ -0,0 +1,85 @@
338 +/*
339 + * Chacha stream algorithm.
340 + *
341 + * Created on: Jun, 2013
342 + * Author: Elie Bursztein (elieb@google.com)
343 + *
344 + * Adapted from the estream code by D. Bernstein.
345 + */
346 +/* ====================================================================
347 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved.
348 + *
349 + * Redistribution and use in source and binary forms, with or without
350 + * modification, are permitted provided that the following conditions
351 + * are met:
352 + *
353 + * 1. Redistributions of source code must retain the above copyright
354 + * notice, this list of conditions and the following disclaimer.
355 + *
356 + * 2. Redistributions in binary form must reproduce the above copyright
357 + * notice, this list of conditions and the following disclaimer in
358 + * the documentation and/or other materials provided with the
359 + * distribution.
360 + *
361 + * 3. All advertising materials mentioning features or use of this
362 + * software must display the following acknowledgment:
363 + * "This product includes software developed by the OpenSSL Project
364 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
365 + *
366 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
367 + * endorse or promote products derived from this software without
368 + * prior written permission. For written permission, please contact
369 + * licensing@OpenSSL.org.
370 + *
371 + * 5. Products derived from this software may not be called "OpenSSL"
372 + * nor may "OpenSSL" appear in their names without prior written
373 + * permission of the OpenSSL Project.
374 + *
375 + * 6. Redistributions of any form whatsoever must retain the following
376 + * acknowledgment:
377 + * "This product includes software developed by the OpenSSL Project
378 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
379 + *
380 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
381 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
382 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
383 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
384 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
385 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
386 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
387 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
388 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
389 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
390 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
391 + * OF THE POSSIBILITY OF SUCH DAMAGE.
392 + * ====================================================================
393 + */
394 +#ifndef HEADER_CHACHA_H
395 +#define HEADER_CHACHA_H
396 +
397 +#include <openssl/opensslconf.h>
398 +
399 +#if defined(OPENSSL_NO_CHACHA)
400 +#error ChaCha support is disabled.
401 +#endif
402 +
403 +#include <stddef.h>
404 +
405 +#ifdef __cplusplus
406 +extern "C" {
407 +#endif
408 +
409 +/* CRYPTO_chacha_20 encrypts |in_len| bytes from |in| with the given key and
410 + * nonce and writes the result to |out|, which may be equal to |in|. The
411 + * initial block counter is specified by |counter|. */
412 +void CRYPTO_chacha_20(unsigned char *out,
413 + const unsigned char *in, size_t in_len,
414 + const unsigned char key[32],
415 + const unsigned char nonce[8],
416 + size_t counter);
417 +
418 +#ifdef __cplusplus
419 +}
420 +#endif
421 +
422 +#endif
423 diff --git a/crypto/chacha/chacha_enc.c b/crypto/chacha/chacha_enc.c
424 new file mode 100644
425 index 0000000..54d1ca3
426 --- /dev/null
427 +++ b/crypto/chacha/chacha_enc.c
428 @@ -0,0 +1,167 @@
429 +/*
430 + * Chacha stream algorithm.
431 + *
432 + * Created on: Jun, 2013
433 + * Author: Elie Bursztein (elieb@google.com)
434 + *
435 + * Adapted from the estream code by D. Bernstein.
436 + */
437 +/* ====================================================================
438 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved.
439 + *
440 + * Redistribution and use in source and binary forms, with or without
441 + * modification, are permitted provided that the following conditions
442 + * are met:
443 + *
444 + * 1. Redistributions of source code must retain the above copyright
445 + * notice, this list of conditions and the following disclaimer.
446 + *
447 + * 2. Redistributions in binary form must reproduce the above copyright
448 + * notice, this list of conditions and the following disclaimer in
449 + * the documentation and/or other materials provided with the
450 + * distribution.
451 + *
452 + * 3. All advertising materials mentioning features or use of this
453 + * software must display the following acknowledgment:
454 + * "This product includes software developed by the OpenSSL Project
455 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
456 + *
457 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
458 + * endorse or promote products derived from this software without
459 + * prior written permission. For written permission, please contact
460 + * licensing@OpenSSL.org.
461 + *
462 + * 5. Products derived from this software may not be called "OpenSSL"
463 + * nor may "OpenSSL" appear in their names without prior written
464 + * permission of the OpenSSL Project.
465 + *
466 + * 6. Redistributions of any form whatsoever must retain the following
467 + * acknowledgment:
468 + * "This product includes software developed by the OpenSSL Project
469 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
470 + *
471 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
472 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
473 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
474 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
475 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
476 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
477 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
478 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
479 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
480 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
481 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
482 + * OF THE POSSIBILITY OF SUCH DAMAGE.
483 + * ====================================================================
484 + */
485 +
486 +#include <stdint.h>
487 +#include <string.h>
488 +#include <openssl/opensslconf.h>
489 +
490 +#if !defined(OPENSSL_NO_CHACHA)
491 +
492 +#include <openssl/chacha.h>
493 +
494 +/* sigma contains the ChaCha constants, which happen to be an ASCII string. */
495 +static const char sigma[16] = "expand 32-byte k";
496 +
497 +#define ROTATE(v, n) (((v) << (n)) | ((v) >> (32 - (n))))
498 +#define XOR(v, w) ((v) ^ (w))
499 +#define PLUS(x, y) ((x) + (y))
500 +#define PLUSONE(v) (PLUS((v), 1))
501 +
502 +#define U32TO8_LITTLE(p, v) \
503 + { (p)[0] = (v >> 0) & 0xff; (p)[1] = (v >> 8) & 0xff; \
504 + (p)[2] = (v >> 16) & 0xff; (p)[3] = (v >> 24) & 0xff; }
505 +#define U8TO32_LITTLE(p) \
506 + (((uint32_t)((p)[0]) ) | ((uint32_t)((p)[1]) << 8) | \
507 + ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24) )
508 +
509 +/* QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round. */
510 +#define QUARTERROUND(a,b,c,d) \
511 + x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \
512 + x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \
513 + x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \
514 + x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7);
515 +
516 +typedef unsigned int uint32_t;
517 +
518 +/* chacha_core performs |num_rounds| rounds of ChaCha20 on the input words in
519 + * |input| and writes the 64 output bytes to |output|. */
520 +static void chacha_core(unsigned char output[64], const uint32_t input[16],
521 + int num_rounds)
522 + {
523 + uint32_t x[16];
524 + int i;
525 +
526 + memcpy(x, input, sizeof(uint32_t) * 16);
527 + for (i = 20; i > 0; i -= 2)
528 + {
529 + QUARTERROUND( 0, 4, 8,12)
530 + QUARTERROUND( 1, 5, 9,13)
531 + QUARTERROUND( 2, 6,10,14)
532 + QUARTERROUND( 3, 7,11,15)
533 + QUARTERROUND( 0, 5,10,15)
534 + QUARTERROUND( 1, 6,11,12)
535 + QUARTERROUND( 2, 7, 8,13)
536 + QUARTERROUND( 3, 4, 9,14)
537 + }
538 +
539 + for (i = 0; i < 16; ++i)
540 + x[i] = PLUS(x[i], input[i]);
541 + for (i = 0; i < 16; ++i)
542 + U32TO8_LITTLE(output + 4 * i, x[i]);
543 + }
544 +
545 +void CRYPTO_chacha_20(unsigned char *out,
546 + const unsigned char *in, size_t in_len,
547 + const unsigned char key[32],
548 + const unsigned char nonce[8],
549 + size_t counter)
550 + {
551 + uint32_t input[16];
552 + unsigned char buf[64];
553 + size_t todo, i;
554 +
555 + input[0] = U8TO32_LITTLE(sigma + 0);
556 + input[1] = U8TO32_LITTLE(sigma + 4);
557 + input[2] = U8TO32_LITTLE(sigma + 8);
558 + input[3] = U8TO32_LITTLE(sigma + 12);
559 +
560 + input[4] = U8TO32_LITTLE(key + 0);
561 + input[5] = U8TO32_LITTLE(key + 4);
562 + input[6] = U8TO32_LITTLE(key + 8);
563 + input[7] = U8TO32_LITTLE(key + 12);
564 +
565 + input[8] = U8TO32_LITTLE(key + 16);
566 + input[9] = U8TO32_LITTLE(key + 20);
567 + input[10] = U8TO32_LITTLE(key + 24);
568 + input[11] = U8TO32_LITTLE(key + 28);
569 +
570 + input[12] = counter;
571 + input[13] = ((uint64_t) counter) >> 32;
572 + input[14] = U8TO32_LITTLE(nonce + 0);
573 + input[15] = U8TO32_LITTLE(nonce + 4);
574 +
575 + while (in_len > 0)
576 + {
577 + todo = sizeof(buf);
578 + if (in_len < todo)
579 + todo = in_len;
580 +
581 + chacha_core(buf, input, 20);
582 + for (i = 0; i < todo; i++)
583 + out[i] = in[i] ^ buf[i];
584 +
585 + out += todo;
586 + in += todo;
587 + in_len -= todo;
588 +
589 + input[12]++;
590 + if (input[12] == 0)
591 + input[13]++;
592 + }
593 + }
594 +
595 +#endif /* !OPENSSL_NO_CHACHA */
596 diff --git a/crypto/chacha/chacha_vec.c b/crypto/chacha/chacha_vec.c
597 new file mode 100644
598 index 0000000..33b2238
599 --- /dev/null
600 +++ b/crypto/chacha/chacha_vec.c
601 @@ -0,0 +1,345 @@
602 +/* ====================================================================
603 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved.
604 + *
605 + * Redistribution and use in source and binary forms, with or without
606 + * modification, are permitted provided that the following conditions
607 + * are met:
608 + *
609 + * 1. Redistributions of source code must retain the above copyright
610 + * notice, this list of conditions and the following disclaimer.
611 + *
612 + * 2. Redistributions in binary form must reproduce the above copyright
613 + * notice, this list of conditions and the following disclaimer in
614 + * the documentation and/or other materials provided with the
615 + * distribution.
616 + *
617 + * 3. All advertising materials mentioning features or use of this
618 + * software must display the following acknowledgment:
619 + * "This product includes software developed by the OpenSSL Project
620 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
621 + *
622 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
623 + * endorse or promote products derived from this software without
624 + * prior written permission. For written permission, please contact
625 + * licensing@OpenSSL.org.
626 + *
627 + * 5. Products derived from this software may not be called "OpenSSL"
628 + * nor may "OpenSSL" appear in their names without prior written
629 + * permission of the OpenSSL Project.
630 + *
631 + * 6. Redistributions of any form whatsoever must retain the following
632 + * acknowledgment:
633 + * "This product includes software developed by the OpenSSL Project
634 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
635 + *
636 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
637 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
638 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
639 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
640 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
641 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
642 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
643 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
644 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
645 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
646 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
647 + * OF THE POSSIBILITY OF SUCH DAMAGE.
648 + * ====================================================================
649 + */
650 +
651 +/* This implementation is by Ted Krovetz and was submitted to SUPERCOP and
652 + * marked as public domain. It was been altered to allow for non-aligned inputs
653 + * and to allow the block counter to be passed in specifically. */
654 +
655 +#include <string.h>
656 +#include <stdint.h>
657 +#include <openssl/opensslconf.h>
658 +
659 +#if !defined(OPENSSL_NO_CHACHA)
660 +
661 +#include <openssl/chacha.h>
662 +
663 +#ifndef CHACHA_RNDS
664 +#define CHACHA_RNDS 20 /* 8 (high speed), 20 (conservative), 12 (middle) */
665 +#endif
666 +
667 +/* Architecture-neutral way to specify 16-byte vector of ints */
668 +typedef unsigned vec __attribute__ ((vector_size (16)));
669 +
670 +/* This implementation is designed for Neon, SSE and AltiVec machines. The
671 + * following specify how to do certain vector operations efficiently on
672 + * each architecture, using intrinsics.
673 + * This implementation supports parallel processing of multiple blocks,
674 + * including potentially using general-purpose registers.
675 + */
676 +#if __ARM_NEON__
677 +#include <arm_neon.h>
678 +#define GPR_TOO 1
679 +#define VBPI 2
680 +#define ONE (vec)vsetq_lane_u32(1,vdupq_n_u32(0),0)
681 +#define LOAD(m) (vec)(*((vec*)(m)))
682 +#define STORE(m,r) (*((vec*)(m))) = (r)
683 +#define ROTV1(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,1)
684 +#define ROTV2(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,2)
685 +#define ROTV3(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,3)
686 +#define ROTW16(x) (vec)vrev32q_u16((uint16x8_t)x)
687 +#if __clang__
688 +#define ROTW7(x) (x << ((vec){ 7, 7, 7, 7})) ^ (x >> ((vec){25,25,25,25}))
689 +#define ROTW8(x) (x << ((vec){ 8, 8, 8, 8})) ^ (x >> ((vec){24,24,24,24}))
690 +#define ROTW12(x) (x << ((vec){12,12,12,12})) ^ (x >> ((vec){20,20,20,20}))
691 +#else
692 +#define ROTW7(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,7),(uint32x4_t)x,2 5)
693 +#define ROTW8(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,8),(uint32x4_t)x,2 4)
694 +#define ROTW12(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,12),(uint32x4_t)x, 20)
695 +#endif
696 +#elif __SSE2__
697 +#include <emmintrin.h>
698 +#define GPR_TOO 0
699 +#if __clang__
700 +#define VBPI 4
701 +#else
702 +#define VBPI 3
703 +#endif
704 +#define ONE (vec)_mm_set_epi32(0,0,0,1)
705 +#define LOAD(m) (vec)_mm_loadu_si128((__m128i*)(m))
706 +#define STORE(m,r) _mm_storeu_si128((__m128i*)(m), (__m128i) (r))
707 +#define ROTV1(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(0,3,2,1))
708 +#define ROTV2(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(1,0,3,2))
709 +#define ROTV3(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(2,1,0,3))
710 +#define ROTW7(x) (vec)(_mm_slli_epi32((__m128i)x, 7) ^ _mm_srli_epi32((__m128i )x,25))
711 +#define ROTW12(x) (vec)(_mm_slli_epi32((__m128i)x,12) ^ _mm_srli_epi32((__m128i )x,20))
712 +#if __SSSE3__
713 +#include <tmmintrin.h>
714 +#define ROTW8(x) (vec)_mm_shuffle_epi8((__m128i)x,_mm_set_epi8(14,13,12,15,10, 9,8,11,6,5,4,7,2,1,0,3))
715 +#define ROTW16(x) (vec)_mm_shuffle_epi8((__m128i)x,_mm_set_epi8(13,12,15,14,9,8 ,11,10,5,4,7,6,1,0,3,2))
716 +#else
717 +#define ROTW8(x) (vec)(_mm_slli_epi32((__m128i)x, 8) ^ _mm_srli_epi32((__m128i )x,24))
718 +#define ROTW16(x) (vec)(_mm_slli_epi32((__m128i)x,16) ^ _mm_srli_epi32((__m128i )x,16))
719 +#endif
720 +#else
721 +#error -- Implementation supports only machines with neon or SSE2
722 +#endif
723 +
724 +#ifndef REVV_BE
725 +#define REVV_BE(x) (x)
726 +#endif
727 +
728 +#ifndef REVW_BE
729 +#define REVW_BE(x) (x)
730 +#endif
731 +
732 +#define BPI (VBPI + GPR_TOO) /* Blocks computed per loop iteration */
733 +
734 +#define DQROUND_VECTORS(a,b,c,d) \
735 + a += b; d ^= a; d = ROTW16(d); \
736 + c += d; b ^= c; b = ROTW12(b); \
737 + a += b; d ^= a; d = ROTW8(d); \
738 + c += d; b ^= c; b = ROTW7(b); \
739 + b = ROTV1(b); c = ROTV2(c); d = ROTV3(d); \
740 + a += b; d ^= a; d = ROTW16(d); \
741 + c += d; b ^= c; b = ROTW12(b); \
742 + a += b; d ^= a; d = ROTW8(d); \
743 + c += d; b ^= c; b = ROTW7(b); \
744 + b = ROTV3(b); c = ROTV2(c); d = ROTV1(d);
745 +
746 +#define QROUND_WORDS(a,b,c,d) \
747 + a = a+b; d ^= a; d = d<<16 | d>>16; \
748 + c = c+d; b ^= c; b = b<<12 | b>>20; \
749 + a = a+b; d ^= a; d = d<< 8 | d>>24; \
750 + c = c+d; b ^= c; b = b<< 7 | b>>25;
751 +
752 +#define WRITE_XOR(in, op, d, v0, v1, v2, v3) \
753 + STORE(op + d + 0, LOAD(in + d + 0) ^ REVV_BE(v0)); \
754 + STORE(op + d + 4, LOAD(in + d + 4) ^ REVV_BE(v1)); \
755 + STORE(op + d + 8, LOAD(in + d + 8) ^ REVV_BE(v2)); \
756 + STORE(op + d +12, LOAD(in + d +12) ^ REVV_BE(v3));
757 +
758 +void CRYPTO_chacha_20(
759 + unsigned char *out,
760 + const unsigned char *in,
761 + size_t inlen,
762 + const unsigned char key[32],
763 + const unsigned char nonce[8],
764 + size_t counter)
765 + {
766 + unsigned iters, i, *op=(unsigned *)out, *ip=(unsigned *)in, *kp;
767 +#if defined(__ARM_NEON__)
768 + unsigned *np;
769 +#endif
770 + vec s0, s1, s2, s3;
771 +#if !defined(__ARM_NEON__) && !defined(__SSE2__)
772 + __attribute__ ((aligned (16))) unsigned key[8], nonce[4];
773 +#endif
774 + __attribute__ ((aligned (16))) unsigned chacha_const[] =
775 + {0x61707865,0x3320646E,0x79622D32,0x6B206574};
776 +#if defined(__ARM_NEON__) || defined(__SSE2__)
777 + kp = (unsigned *)key;
778 +#else
779 + ((vec *)key)[0] = REVV_BE(((vec *)key)[0]);
780 + ((vec *)key)[1] = REVV_BE(((vec *)key)[1]);
781 + nonce[0] = REVW_BE(((unsigned *)nonce)[0]);
782 + nonce[1] = REVW_BE(((unsigned *)nonce)[1]);
783 + nonce[2] = REVW_BE(((unsigned *)nonce)[2]);
784 + nonce[3] = REVW_BE(((unsigned *)nonce)[3]);
785 + kp = (unsigned *)key;
786 + np = (unsigned *)nonce;
787 +#endif
788 +#if defined(__ARM_NEON__)
789 + np = (unsigned*) nonce;
790 +#endif
791 + s0 = LOAD(chacha_const);
792 + s1 = LOAD(&((vec*)kp)[0]);
793 + s2 = LOAD(&((vec*)kp)[1]);
794 + s3 = (vec){
795 + counter & 0xffffffff,
796 +#if __ARM_NEON__
797 + 0, /* can't right-shift 32 bits on a 32-bit system. */
798 +#else
799 + counter >> 32,
800 +#endif
801 + ((uint32_t*)nonce)[0],
802 + ((uint32_t*)nonce)[1]
803 + };
804 +
805 + for (iters = 0; iters < inlen/(BPI*64); iters++)
806 + {
807 +#if GPR_TOO
808 + register unsigned x0, x1, x2, x3, x4, x5, x6, x7, x8,
809 + x9, x10, x11, x12, x13, x14, x15;
810 +#endif
811 +#if VBPI > 2
812 + vec v8,v9,v10,v11;
813 +#endif
814 +#if VBPI > 3
815 + vec v12,v13,v14,v15;
816 +#endif
817 +
818 + vec v0,v1,v2,v3,v4,v5,v6,v7;
819 + v4 = v0 = s0; v5 = v1 = s1; v6 = v2 = s2; v3 = s3;
820 + v7 = v3 + ONE;
821 +#if VBPI > 2
822 + v8 = v4; v9 = v5; v10 = v6;
823 + v11 = v7 + ONE;
824 +#endif
825 +#if VBPI > 3
826 + v12 = v8; v13 = v9; v14 = v10;
827 + v15 = v11 + ONE;
828 +#endif
829 +#if GPR_TOO
830 + x0 = chacha_const[0]; x1 = chacha_const[1];
831 + x2 = chacha_const[2]; x3 = chacha_const[3];
832 + x4 = kp[0]; x5 = kp[1]; x6 = kp[2]; x7 = kp[3];
833 + x8 = kp[4]; x9 = kp[5]; x10 = kp[6]; x11 = kp[7];
834 + x12 = counter+BPI*iters+(BPI-1); x13 = 0;
835 + x14 = np[0]; x15 = np[1];
836 +#endif
837 + for (i = CHACHA_RNDS/2; i; i--)
838 + {
839 + DQROUND_VECTORS(v0,v1,v2,v3)
840 + DQROUND_VECTORS(v4,v5,v6,v7)
841 +#if VBPI > 2
842 + DQROUND_VECTORS(v8,v9,v10,v11)
843 +#endif
844 +#if VBPI > 3
845 + DQROUND_VECTORS(v12,v13,v14,v15)
846 +#endif
847 +#if GPR_TOO
848 + QROUND_WORDS( x0, x4, x8,x12)
849 + QROUND_WORDS( x1, x5, x9,x13)
850 + QROUND_WORDS( x2, x6,x10,x14)
851 + QROUND_WORDS( x3, x7,x11,x15)
852 + QROUND_WORDS( x0, x5,x10,x15)
853 + QROUND_WORDS( x1, x6,x11,x12)
854 + QROUND_WORDS( x2, x7, x8,x13)
855 + QROUND_WORDS( x3, x4, x9,x14)
856 +#endif
857 + }
858 +
859 + WRITE_XOR(ip, op, 0, v0+s0, v1+s1, v2+s2, v3+s3)
860 + s3 += ONE;
861 + WRITE_XOR(ip, op, 16, v4+s0, v5+s1, v6+s2, v7+s3)
862 + s3 += ONE;
863 +#if VBPI > 2
864 + WRITE_XOR(ip, op, 32, v8+s0, v9+s1, v10+s2, v11+s3)
865 + s3 += ONE;
866 +#endif
867 +#if VBPI > 3
868 + WRITE_XOR(ip, op, 48, v12+s0, v13+s1, v14+s2, v15+s3)
869 + s3 += ONE;
870 +#endif
871 + ip += VBPI*16;
872 + op += VBPI*16;
873 +#if GPR_TOO
874 + op[0] = REVW_BE(REVW_BE(ip[0]) ^ (x0 + chacha_const[0]));
875 + op[1] = REVW_BE(REVW_BE(ip[1]) ^ (x1 + chacha_const[1]));
876 + op[2] = REVW_BE(REVW_BE(ip[2]) ^ (x2 + chacha_const[2]));
877 + op[3] = REVW_BE(REVW_BE(ip[3]) ^ (x3 + chacha_const[3]));
878 + op[4] = REVW_BE(REVW_BE(ip[4]) ^ (x4 + kp[0]));
879 + op[5] = REVW_BE(REVW_BE(ip[5]) ^ (x5 + kp[1]));
880 + op[6] = REVW_BE(REVW_BE(ip[6]) ^ (x6 + kp[2]));
881 + op[7] = REVW_BE(REVW_BE(ip[7]) ^ (x7 + kp[3]));
882 + op[8] = REVW_BE(REVW_BE(ip[8]) ^ (x8 + kp[4]));
883 + op[9] = REVW_BE(REVW_BE(ip[9]) ^ (x9 + kp[5]));
884 + op[10] = REVW_BE(REVW_BE(ip[10]) ^ (x10 + kp[6]));
885 + op[11] = REVW_BE(REVW_BE(ip[11]) ^ (x11 + kp[7]));
886 + op[12] = REVW_BE(REVW_BE(ip[12]) ^ (x12 + counter+BPI*iters+(BPI -1)));
887 + op[13] = REVW_BE(REVW_BE(ip[13]) ^ (x13));
888 + op[14] = REVW_BE(REVW_BE(ip[14]) ^ (x14 + np[0]));
889 + op[15] = REVW_BE(REVW_BE(ip[15]) ^ (x15 + np[1]));
890 + s3 += ONE;
891 + ip += 16;
892 + op += 16;
893 +#endif
894 + }
895 +
896 + for (iters = inlen%(BPI*64)/64; iters != 0; iters--)
897 + {
898 + vec v0 = s0, v1 = s1, v2 = s2, v3 = s3;
899 + for (i = CHACHA_RNDS/2; i; i--)
900 + {
901 + DQROUND_VECTORS(v0,v1,v2,v3);
902 + }
903 + WRITE_XOR(ip, op, 0, v0+s0, v1+s1, v2+s2, v3+s3)
904 + s3 += ONE;
905 + ip += 16;
906 + op += 16;
907 + }
908 +
909 + inlen = inlen % 64;
910 + if (inlen)
911 + {
912 + __attribute__ ((aligned (16))) vec buf[4];
913 + vec v0,v1,v2,v3;
914 + v0 = s0; v1 = s1; v2 = s2; v3 = s3;
915 + for (i = CHACHA_RNDS/2; i; i--)
916 + {
917 + DQROUND_VECTORS(v0,v1,v2,v3);
918 + }
919 +
920 + if (inlen >= 16)
921 + {
922 + STORE(op + 0, LOAD(ip + 0) ^ REVV_BE(v0 + s0));
923 + if (inlen >= 32)
924 + {
925 + STORE(op + 4, LOAD(ip + 4) ^ REVV_BE(v1 + s1));
926 + if (inlen >= 48)
927 + {
928 + STORE(op + 8, LOAD(ip + 8) ^
929 + REVV_BE(v2 + s2));
930 + buf[3] = REVV_BE(v3 + s3);
931 + }
932 + else
933 + buf[2] = REVV_BE(v2 + s2);
934 + }
935 + else
936 + buf[1] = REVV_BE(v1 + s1);
937 + }
938 + else
939 + buf[0] = REVV_BE(v0 + s0);
940 +
941 + for (i=inlen & ~15; i<inlen; i++)
942 + ((char *)op)[i] = ((char *)ip)[i] ^ ((char *)buf)[i];
943 + }
944 + }
945 +
946 +#endif /* !OPENSSL_NO_CHACHA */
947 diff --git a/crypto/chacha/chachatest.c b/crypto/chacha/chachatest.c
948 new file mode 100644
949 index 0000000..b2a9389
950 --- /dev/null
951 +++ b/crypto/chacha/chachatest.c
952 @@ -0,0 +1,211 @@
953 +/*
954 + * Chacha stream algorithm.
955 + *
956 + * Created on: Jun, 2013
957 + * Author: Elie Bursztein (elieb@google.com)
958 + *
959 + * Adapted from the estream code by D. Bernstein.
960 + */
961 +/* ====================================================================
962 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved.
963 + *
964 + * Redistribution and use in source and binary forms, with or without
965 + * modification, are permitted provided that the following conditions
966 + * are met:
967 + *
968 + * 1. Redistributions of source code must retain the above copyright
969 + * notice, this list of conditions and the following disclaimer.
970 + *
971 + * 2. Redistributions in binary form must reproduce the above copyright
972 + * notice, this list of conditions and the following disclaimer in
973 + * the documentation and/or other materials provided with the
974 + * distribution.
975 + *
976 + * 3. All advertising materials mentioning features or use of this
977 + * software must display the following acknowledgment:
978 + * "This product includes software developed by the OpenSSL Project
979 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
980 + *
981 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
982 + * endorse or promote products derived from this software without
983 + * prior written permission. For written permission, please contact
984 + * licensing@OpenSSL.org.
985 + *
986 + * 5. Products derived from this software may not be called "OpenSSL"
987 + * nor may "OpenSSL" appear in their names without prior written
988 + * permission of the OpenSSL Project.
989 + *
990 + * 6. Redistributions of any form whatsoever must retain the following
991 + * acknowledgment:
992 + * "This product includes software developed by the OpenSSL Project
993 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
994 + *
995 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
996 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
997 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
998 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
999 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1000 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
1001 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
1002 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1003 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
1004 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
1005 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
1006 + * OF THE POSSIBILITY OF SUCH DAMAGE.
1007 + * ====================================================================
1008 + */
1009 +
1010 +#include <stdio.h>
1011 +#include <stdlib.h>
1012 +#include <string.h>
1013 +#include <stdint.h>
1014 +
1015 +#include <openssl/chacha.h>
1016 +
1017 +struct chacha_test {
1018 + const char *keyhex;
1019 + const char *noncehex;
1020 + const char *outhex;
1021 +};
1022 +
1023 +static const struct chacha_test chacha_tests[] = {
1024 + {
1025 + "000000000000000000000000000000000000000000000000000000000000000 0",
1026 + "0000000000000000",
1027 + "76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc 7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee6586",
1028 + },
1029 + {
1030 + "000000000000000000000000000000000000000000000000000000000000000 1",
1031 + "0000000000000000",
1032 + "4540f05a9f1fb296d7736e7b208e3c96eb4fe1834688d2604f450952ed432d4 1bbe2a0b6ea7566d2a5d1e7e20d42af2c53d792b1c43fea817e9ad275ae546963",
1033 + },
1034 + {
1035 + "000000000000000000000000000000000000000000000000000000000000000 0",
1036 + "0000000000000001",
1037 + "de9cba7bf3d69ef5e786dc63973f653a0b49e015adbff7134fcb7df13782103 1e85a050278a7084527214f73efc7fa5b5277062eb7a0433e445f41e31afab757",
1038 + },
1039 + {
1040 + "000000000000000000000000000000000000000000000000000000000000000 0",
1041 + "0100000000000000",
1042 + "ef3fdfd6c61578fbf5cf35bd3dd33b8009631634d21e42ac33960bd138e50d3 2111e4caf237ee53ca8ad6426194a88545ddc497a0b466e7d6bbdb0041b2f586b",
1043 + },
1044 + {
1045 + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1 f",
1046 + "0001020304050607",
1047 + "f798a189f195e66982105ffb640bb7757f579da31602fc93ec01ac56f85ac3c 134a4547b733b46413042c9440049176905d3be59ea1c53f15916155c2be8241a38008b9a26bc359 41e2444177c8ade6689de95264986d95889fb60e84629c9bd9a5acb1cc118be563eb9b3a4a472f82 e09a7e778492b562ef7130e88dfe031c79db9d4f7c7a899151b9a475032b63fc385245fe054e3dd5 a97a5f576fe064025d3ce042c566ab2c507b138db853e3d6959660996546cc9c4a6eafdc777c040d 70eaf46f76dad3979e5c5360c3317166a1c894c94a371876a94df7628fe4eaaf2ccb27d5aaae0ad7 ad0f9d4b6ad3b54098746d4524d38407a6deb",
1048 + },
1049 +};
1050 +
1051 +static unsigned char hex_digit(char h)
1052 + {
1053 + if (h >= '0' && h <= '9')
1054 + return h - '0';
1055 + else if (h >= 'a' && h <= 'f')
1056 + return h - 'a' + 10;
1057 + else if (h >= 'A' && h <= 'F')
1058 + return h - 'A' + 10;
1059 + else
1060 + abort();
1061 + }
1062 +
1063 +static void hex_decode(unsigned char *out, const char* hex)
1064 + {
1065 + size_t j = 0;
1066 +
1067 + while (*hex != 0)
1068 + {
1069 + unsigned char v = hex_digit(*hex++);
1070 + v <<= 4;
1071 + v |= hex_digit(*hex++);
1072 + out[j++] = v;
1073 + }
1074 + }
1075 +
1076 +static void hexdump(unsigned char *a, size_t len)
1077 + {
1078 + size_t i;
1079 +
1080 + for (i = 0; i < len; i++)
1081 + printf("%02x", a[i]);
1082 + }
1083 +
1084 +/* misalign returns a pointer that points 0 to 15 bytes into |in| such that the
1085 + * returned pointer has alignment 1 mod 16. */
1086 +static void* misalign(void* in)
1087 + {
1088 + intptr_t x = (intptr_t) in;
1089 + x += (17 - (x % 16)) % 16;
1090 + return (void*) x;
1091 + }
1092 +
1093 +int main()
1094 + {
1095 + static const unsigned num_tests =
1096 + sizeof(chacha_tests) / sizeof(struct chacha_test);
1097 + unsigned i;
1098 + unsigned char key_bytes[32 + 16];
1099 + unsigned char nonce_bytes[8 + 16] = {0};
1100 +
1101 + unsigned char *key = misalign(key_bytes);
1102 + unsigned char *nonce = misalign(nonce_bytes);
1103 +
1104 + for (i = 0; i < num_tests; i++)
1105 + {
1106 + const struct chacha_test *test = &chacha_tests[i];
1107 + unsigned char *expected, *out_bytes, *zero_bytes, *out, *zeros;
1108 + size_t len = strlen(test->outhex);
1109 +
1110 + if (strlen(test->keyhex) != 32*2 ||
1111 + strlen(test->noncehex) != 8*2 ||
1112 + (len & 1) == 1)
1113 + return 1;
1114 +
1115 + len /= 2;
1116 +
1117 + hex_decode(key, test->keyhex);
1118 + hex_decode(nonce, test->noncehex);
1119 +
1120 + expected = malloc(len);
1121 + out_bytes = malloc(len+16);
1122 + zero_bytes = malloc(len+16);
1123 + /* Attempt to test unaligned inputs. */
1124 + out = misalign(out_bytes);
1125 + zeros = misalign(zero_bytes);
1126 + memset(zeros, 0, len);
1127 +
1128 + hex_decode(expected, test->outhex);
1129 + CRYPTO_chacha_20(out, zeros, len, key, nonce, 0);
1130 +
1131 + if (memcmp(out, expected, len) != 0)
1132 + {
1133 + printf("ChaCha20 test #%d failed.\n", i);
1134 + printf("got: ");
1135 + hexdump(out, len);
1136 + printf("\nexpected: ");
1137 + hexdump(expected, len);
1138 + printf("\n");
1139 + return 1;
1140 + }
1141 +
1142 + /* The last test has a large output. We test whether the
1143 + * counter works as expected by skipping the first 64 bytes of
1144 + * it. */
1145 + if (i == num_tests - 1)
1146 + {
1147 + CRYPTO_chacha_20(out, zeros, len - 64, key, nonce, 1);
1148 + if (memcmp(out, expected + 64, len - 64) != 0)
1149 + {
1150 + printf("ChaCha20 skip test failed.\n");
1151 + return 1;
1152 + }
1153 + }
1154 +
1155 + free(expected);
1156 + free(zero_bytes);
1157 + free(out_bytes);
1158 + }
1159 +
1160 +
1161 + printf("PASS\n");
1162 + return 0;
1163 + }
1164 diff --git a/crypto/evp/Makefile b/crypto/evp/Makefile
1165 index b73038d..86b0504 100644
1166 --- a/crypto/evp/Makefile
1167 +++ b/crypto/evp/Makefile
1168 @@ -29,7 +29,8 @@ LIBSRC= encode.c digest.c evp_enc.c evp_key.c evp_acnf.c evp_c nf.c \
1169 c_all.c c_allc.c c_alld.c evp_lib.c bio_ok.c \
1170 evp_pkey.c evp_pbe.c p5_crpt.c p5_crpt2.c \
1171 e_old.c pmeth_lib.c pmeth_fn.c pmeth_gn.c m_sigver.c evp_fips.c \
1172 - e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c evp_aead.c
1173 + e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c evp_aead.c \
1174 + e_chacha20poly1305.c
1175
1176 LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \
1177 e_des.o e_bf.o e_idea.o e_des3.o e_camellia.o\
1178 @@ -42,7 +43,7 @@ LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \
1179 c_all.o c_allc.o c_alld.o evp_lib.o bio_ok.o \
1180 evp_pkey.o evp_pbe.o p5_crpt.o p5_crpt2.o \
1181 e_old.o pmeth_lib.o pmeth_fn.o pmeth_gn.o m_sigver.o evp_fips.o \
1182 - e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o evp_aead.o
1183 + e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o evp_aead.o e_chacha20poly1305.o
1184
1185 SRC= $(LIBSRC)
1186
1187 @@ -239,6 +240,21 @@ e_cast.o: ../../include/openssl/objects.h ../../include/ope nssl/opensslconf.h
1188 e_cast.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
1189 e_cast.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
1190 e_cast.o: ../../include/openssl/symhacks.h ../cryptlib.h e_cast.c evp_locl.h
1191 +e_chacha20poly1305.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h
1192 +e_chacha20poly1305.o: ../../include/openssl/chacha.h
1193 +e_chacha20poly1305.o: ../../include/openssl/crypto.h
1194 +e_chacha20poly1305.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
1195 +e_chacha20poly1305.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
1196 +e_chacha20poly1305.o: ../../include/openssl/obj_mac.h
1197 +e_chacha20poly1305.o: ../../include/openssl/objects.h
1198 +e_chacha20poly1305.o: ../../include/openssl/opensslconf.h
1199 +e_chacha20poly1305.o: ../../include/openssl/opensslv.h
1200 +e_chacha20poly1305.o: ../../include/openssl/ossl_typ.h
1201 +e_chacha20poly1305.o: ../../include/openssl/poly1305.h
1202 +e_chacha20poly1305.o: ../../include/openssl/safestack.h
1203 +e_chacha20poly1305.o: ../../include/openssl/stack.h
1204 +e_chacha20poly1305.o: ../../include/openssl/symhacks.h e_chacha20poly1305.c
1205 +e_chacha20poly1305.o: evp_locl.h
1206 e_des.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h
1207 e_des.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
1208 e_des.o: ../../include/openssl/des.h ../../include/openssl/des_old.h
1209 @@ -258,9 +274,10 @@ e_des3.o: ../../include/openssl/evp.h ../../include/openssl /lhash.h
1210 e_des3.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
1211 e_des3.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
1212 e_des3.o: ../../include/openssl/ossl_typ.h ../../include/openssl/rand.h
1213 -e_des3.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
1214 -e_des3.o: ../../include/openssl/symhacks.h ../../include/openssl/ui.h
1215 -e_des3.o: ../../include/openssl/ui_compat.h ../cryptlib.h e_des3.c evp_locl.h
1216 +e_des3.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
1217 +e_des3.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
1218 +e_des3.o: ../../include/openssl/ui.h ../../include/openssl/ui_compat.h
1219 +e_des3.o: ../cryptlib.h e_des3.c evp_locl.h
1220 e_idea.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h
1221 e_idea.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
1222 e_idea.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
1223 @@ -356,6 +373,14 @@ evp_acnf.o: ../../include/openssl/opensslconf.h
1224 evp_acnf.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
1225 evp_acnf.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
1226 evp_acnf.o: ../../include/openssl/symhacks.h ../cryptlib.h evp_acnf.c
1227 +evp_aead.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h
1228 +evp_aead.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
1229 +evp_aead.o: ../../include/openssl/err.h ../../include/openssl/evp.h
1230 +evp_aead.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
1231 +evp_aead.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
1232 +evp_aead.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
1233 +evp_aead.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
1234 +evp_aead.o: ../../include/openssl/symhacks.h evp_aead.c
1235 evp_cnf.o: ../../e_os.h ../../include/openssl/asn1.h
1236 evp_cnf.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
1237 evp_cnf.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h
1238 diff --git a/crypto/evp/e_chacha20poly1305.c b/crypto/evp/e_chacha20poly1305.c
1239 new file mode 100644
1240 index 0000000..1c0c0fb
1241 --- /dev/null
1242 +++ b/crypto/evp/e_chacha20poly1305.c
1243 @@ -0,0 +1,267 @@
1244 +/* ====================================================================
1245 + * Copyright (c) 2013 The OpenSSL Project. All rights reserved.
1246 + *
1247 + * Redistribution and use in source and binary forms, with or without
1248 + * modification, are permitted provided that the following conditions
1249 + * are met:
1250 + *
1251 + * 1. Redistributions of source code must retain the above copyright
1252 + * notice, this list of conditions and the following disclaimer.
1253 + *
1254 + * 2. Redistributions in binary form must reproduce the above copyright
1255 + * notice, this list of conditions and the following disclaimer in
1256 + * the documentation and/or other materials provided with the
1257 + * distribution.
1258 + *
1259 + * 3. All advertising materials mentioning features or use of this
1260 + * software must display the following acknowledgment:
1261 + * "This product includes software developed by the OpenSSL Project
1262 + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
1263 + *
1264 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
1265 + * endorse or promote products derived from this software without
1266 + * prior written permission. For written permission, please contact
1267 + * openssl-core@openssl.org.
1268 + *
1269 + * 5. Products derived from this software may not be called "OpenSSL"
1270 + * nor may "OpenSSL" appear in their names without prior written
1271 + * permission of the OpenSSL Project.
1272 + *
1273 + * 6. Redistributions of any form whatsoever must retain the following
1274 + * acknowledgment:
1275 + * "This product includes software developed by the OpenSSL Project
1276 + * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
1277 + *
1278 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
1279 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1280 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
1281 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
1282 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1283 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
1284 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
1285 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1286 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
1287 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
1288 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
1289 + * OF THE POSSIBILITY OF SUCH DAMAGE.
1290 + * ====================================================================
1291 + *
1292 + */
1293 +
1294 +#include <stdint.h>
1295 +#include <string.h>
1296 +#include <openssl/opensslconf.h>
1297 +
1298 +#if !defined(OPENSSL_NO_CHACHA) && !defined(OPENSSL_NO_POLY1305)
1299 +
1300 +#include <openssl/chacha.h>
1301 +#include <openssl/poly1305.h>
1302 +#include <openssl/evp.h>
1303 +#include <openssl/err.h>
1304 +#include "evp_locl.h"
1305 +
1306 +#define POLY1305_TAG_LEN 16
1307 +#define CHACHA20_NONCE_LEN 8
1308 +
1309 +struct aead_chacha20_poly1305_ctx
1310 + {
1311 + unsigned char key[32];
1312 + unsigned char tag_len;
1313 + };
1314 +
1315 +static int aead_chacha20_poly1305_init(EVP_AEAD_CTX *ctx, const unsigned char * key, size_t key_len, size_t tag_len)
1316 + {
1317 + struct aead_chacha20_poly1305_ctx *c20_ctx;
1318 +
1319 + if (tag_len == 0)
1320 + tag_len = POLY1305_TAG_LEN;
1321 +
1322 + if (tag_len > POLY1305_TAG_LEN)
1323 + {
1324 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_INIT, EVP_R_TOO_LARGE);
1325 + return 0;
1326 + }
1327 +
1328 + if (key_len != sizeof(c20_ctx->key))
1329 + return 0; /* internal error - EVP_AEAD_CTX_init should catch th is. */
1330 +
1331 + c20_ctx = OPENSSL_malloc(sizeof(struct aead_chacha20_poly1305_ctx));
1332 + if (c20_ctx == NULL)
1333 + return 0;
1334 +
1335 + memcpy(&c20_ctx->key[0], key, key_len);
1336 + c20_ctx->tag_len = tag_len;
1337 + ctx->aead_state = c20_ctx;
1338 +
1339 + return 1;
1340 + }
1341 +
1342 +static void aead_chacha20_poly1305_cleanup(EVP_AEAD_CTX *ctx)
1343 + {
1344 + struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state;
1345 + OPENSSL_cleanse(c20_ctx->key, sizeof(c20_ctx->key));
1346 + OPENSSL_free(c20_ctx);
1347 + }
1348 +
1349 +static void poly1305_update_with_length(poly1305_state *poly1305,
1350 + const unsigned char *data, size_t data_len)
1351 + {
1352 + size_t j = data_len;
1353 + unsigned char length_bytes[8];
1354 + unsigned i;
1355 +
1356 + for (i = 0; i < sizeof(length_bytes); i++)
1357 + {
1358 + length_bytes[i] = j;
1359 + j >>= 8;
1360 + }
1361 +
1362 + CRYPTO_poly1305_update(poly1305, data, data_len);
1363 + CRYPTO_poly1305_update(poly1305, length_bytes, sizeof(length_bytes));
1364 +}
1365 +
1366 +#if __arm__
1367 +#define ALIGNED __attribute__((aligned(16)))
1368 +#else
1369 +#define ALIGNED
1370 +#endif
1371 +
1372 +static ssize_t aead_chacha20_poly1305_seal(const EVP_AEAD_CTX *ctx,
1373 + unsigned char *out, size_t max_out_len,
1374 + const unsigned char *nonce, size_t nonce_len,
1375 + const unsigned char *in, size_t in_len,
1376 + const unsigned char *ad, size_t ad_len)
1377 + {
1378 + const struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state;
1379 + unsigned char poly1305_key[32] ALIGNED;
1380 + poly1305_state poly1305;
1381 + const uint64_t in_len_64 = in_len;
1382 +
1383 + /* The underlying ChaCha implementation may not overflow the block
1384 + * counter into the second counter word. Therefore we disallow
1385 + * individual operations that work on more than 2TB at a time.
1386 + * |in_len_64| is needed because, on 32-bit platforms, size_t is only
1387 + * 32-bits and this produces a warning because it's always false.
1388 + * Casting to uint64_t inside the conditional is not sufficient to stop
1389 + * the warning. */
1390 + if (in_len_64 >= (1ull << 32)*64-64)
1391 + {
1392 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_TOO_LARGE);
1393 + return -1;
1394 + }
1395 +
1396 + if (max_out_len < in_len + c20_ctx->tag_len)
1397 + {
1398 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_BUFFER_TOO_SMALL );
1399 + return -1;
1400 + }
1401 +
1402 + if (nonce_len != CHACHA20_NONCE_LEN)
1403 + {
1404 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_IV_TOO_LARGE);
1405 + return -1;
1406 + }
1407 +
1408 + memset(poly1305_key, 0, sizeof(poly1305_key));
1409 + CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key), c20_c tx->key, nonce, 0);
1410 +
1411 + CRYPTO_poly1305_init(&poly1305, poly1305_key);
1412 + poly1305_update_with_length(&poly1305, ad, ad_len);
1413 + CRYPTO_chacha_20(out, in, in_len, c20_ctx->key, nonce, 1);
1414 + poly1305_update_with_length(&poly1305, out, in_len);
1415 +
1416 + if (c20_ctx->tag_len != POLY1305_TAG_LEN)
1417 + {
1418 + unsigned char tag[POLY1305_TAG_LEN];
1419 + CRYPTO_poly1305_finish(&poly1305, tag);
1420 + memcpy(out + in_len, tag, c20_ctx->tag_len);
1421 + return in_len + c20_ctx->tag_len;
1422 + }
1423 +
1424 + CRYPTO_poly1305_finish(&poly1305, out + in_len);
1425 + return in_len + POLY1305_TAG_LEN;
1426 + }
1427 +
1428 +static ssize_t aead_chacha20_poly1305_open(const EVP_AEAD_CTX *ctx,
1429 + unsigned char *out, size_t max_out_len,
1430 + const unsigned char *nonce, size_t nonce_len,
1431 + const unsigned char *in, size_t in_len,
1432 + const unsigned char *ad, size_t ad_len)
1433 + {
1434 + const struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state;
1435 + unsigned char mac[POLY1305_TAG_LEN];
1436 + unsigned char poly1305_key[32] ALIGNED;
1437 + size_t out_len;
1438 + poly1305_state poly1305;
1439 + const uint64_t in_len_64 = in_len;
1440 +
1441 + if (in_len < c20_ctx->tag_len)
1442 + {
1443 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BAD_DECRYPT);
1444 + return -1;
1445 + }
1446 +
1447 + /* The underlying ChaCha implementation may not overflow the block
1448 + * counter into the second counter word. Therefore we disallow
1449 + * individual operations that work on more than 2TB at a time.
1450 + * |in_len_64| is needed because, on 32-bit platforms, size_t is only
1451 + * 32-bits and this produces a warning because it's always false.
1452 + * Casting to uint64_t inside the conditional is not sufficient to stop
1453 + * the warning. */
1454 + if (in_len_64 >= (1ull << 32)*64-64)
1455 + {
1456 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_TOO_LARGE);
1457 + return -1;
1458 + }
1459 +
1460 + if (nonce_len != CHACHA20_NONCE_LEN)
1461 + {
1462 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_IV_TOO_LARGE);
1463 + return -1;
1464 + }
1465 +
1466 + out_len = in_len - c20_ctx->tag_len;
1467 +
1468 + if (max_out_len < out_len)
1469 + {
1470 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BUFFER_TOO_SMALL );
1471 + return -1;
1472 + }
1473 +
1474 + memset(poly1305_key, 0, sizeof(poly1305_key));
1475 + CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key), c20_c tx->key, nonce, 0);
1476 +
1477 + CRYPTO_poly1305_init(&poly1305, poly1305_key);
1478 + poly1305_update_with_length(&poly1305, ad, ad_len);
1479 + poly1305_update_with_length(&poly1305, in, out_len);
1480 + CRYPTO_poly1305_finish(&poly1305, mac);
1481 +
1482 + if (CRYPTO_memcmp(mac, in + out_len, c20_ctx->tag_len) != 0)
1483 + {
1484 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BAD_DECRYPT);
1485 + return -1;
1486 + }
1487 +
1488 + CRYPTO_chacha_20(out, in, out_len, c20_ctx->key, nonce, 1);
1489 + return out_len;
1490 + }
1491 +
1492 +static const EVP_AEAD aead_chacha20_poly1305 =
1493 + {
1494 + 32, /* key len */
1495 + CHACHA20_NONCE_LEN, /* nonce len */
1496 + POLY1305_TAG_LEN, /* overhead */
1497 + POLY1305_TAG_LEN, /* max tag length */
1498 +
1499 + aead_chacha20_poly1305_init,
1500 + aead_chacha20_poly1305_cleanup,
1501 + aead_chacha20_poly1305_seal,
1502 + aead_chacha20_poly1305_open,
1503 + };
1504 +
1505 +const EVP_AEAD *EVP_aead_chacha20_poly1305()
1506 + {
1507 + return &aead_chacha20_poly1305;
1508 + }
1509 +
1510 +#endif /* !OPENSSL_NO_CHACHA && !OPENSSL_NO_POLY1305 */
1511 diff --git a/crypto/evp/evp.h b/crypto/evp/evp.h
1512 index bd10642..7dc1656 100644
1513 --- a/crypto/evp/evp.h
1514 +++ b/crypto/evp/evp.h
1515 @@ -1258,6 +1258,11 @@ typedef struct evp_aead_st EVP_AEAD;
1516 const EVP_AEAD *EVP_aead_aes_128_gcm(void);
1517 #endif
1518
1519 +#if !defined(OPENSSL_NO_CHACHA) && !defined(OPENSSL_NO_POLY1305)
1520 +/* EVP_aead_chacha20_poly1305 is ChaCha20 with a Poly1305 authenticator. */
1521 +const EVP_AEAD *EVP_aead_chacha20_poly1305(void);
1522 +#endif
1523 +
1524 /* EVP_AEAD_key_length returns the length, in bytes, of the keys used by
1525 * |aead|. */
1526 size_t EVP_AEAD_key_length(const EVP_AEAD *aead);
1527 @@ -1360,6 +1365,9 @@ void ERR_load_EVP_strings(void);
1528 #define EVP_F_AEAD_AES_128_GCM_INIT 183
1529 #define EVP_F_AEAD_AES_128_GCM_OPEN 181
1530 #define EVP_F_AEAD_AES_128_GCM_SEAL 182
1531 +#define EVP_F_AEAD_CHACHA20_POLY1305_INIT 187
1532 +#define EVP_F_AEAD_CHACHA20_POLY1305_OPEN 184
1533 +#define EVP_F_AEAD_CHACHA20_POLY1305_SEAL 183
1534 #define EVP_F_AEAD_CTX_OPEN 185
1535 #define EVP_F_AEAD_CTX_SEAL 186
1536 #define EVP_F_AESNI_INIT_KEY 165
1537 diff --git a/crypto/evp/evp_err.c b/crypto/evp/evp_err.c
1538 index c47969c..fb747e5 100644
1539 --- a/crypto/evp/evp_err.c
1540 +++ b/crypto/evp/evp_err.c
1541 @@ -73,6 +73,9 @@ static ERR_STRING_DATA EVP_str_functs[]=
1542 {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_INIT), "AEAD_AES_128_GCM_INIT"},
1543 {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_OPEN), "AEAD_AES_128_GCM_OPEN"},
1544 {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_SEAL), "AEAD_AES_128_GCM_SEAL"},
1545 +{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_INIT), "AEAD_CHACHA20_POLY1305_INIT"},
1546 +{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_OPEN), "AEAD_CHACHA20_POLY1305_OPEN"},
1547 +{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_SEAL), "AEAD_CHACHA20_POLY1305_SEAL"},
1548 {ERR_FUNC(EVP_F_AEAD_CTX_OPEN), "AEAD_CTX_OPEN"},
1549 {ERR_FUNC(EVP_F_AEAD_CTX_SEAL), "AEAD_CTX_SEAL"},
1550 {ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"},
1551 diff --git a/crypto/poly1305/Makefile b/crypto/poly1305/Makefile
1552 new file mode 100644
1553 index 0000000..397d7cd
1554 --- /dev/null
1555 +++ b/crypto/poly1305/Makefile
1556 @@ -0,0 +1,81 @@
1557 +#
1558 +# OpenSSL/crypto/poly1305/Makefile
1559 +#
1560 +
1561 +DIR= poly1305
1562 +TOP= ../..
1563 +CC= cc
1564 +CPP= $(CC) -E
1565 +INCLUDES=
1566 +CFLAG=-g
1567 +AR= ar r
1568 +
1569 +POLY1305=poly1305_vec.o
1570 +
1571 +CFLAGS= $(INCLUDES) $(CFLAG)
1572 +ASFLAGS= $(INCLUDES) $(ASFLAG)
1573 +AFLAGS= $(ASFLAGS)
1574 +
1575 +GENERAL=Makefile
1576 +TEST=
1577 +APPS=
1578 +
1579 +LIB=$(TOP)/libcrypto.a
1580 +LIBSRC=poly1305_vec.c
1581 +LIBOBJ=$(POLY1305)
1582 +
1583 +SRC= $(LIBSRC)
1584 +
1585 +EXHEADER=poly1305.h
1586 +HEADER= $(EXHEADER)
1587 +
1588 +ALL= $(GENERAL) $(SRC) $(HEADER)
1589 +
1590 +top:
1591 + (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all)
1592 +
1593 +all: lib
1594 +
1595 +lib: $(LIBOBJ)
1596 + $(AR) $(LIB) $(LIBOBJ)
1597 + $(RANLIB) $(LIB) || echo Never mind.
1598 + @touch lib
1599 +
1600 +files:
1601 + $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
1602 +
1603 +links:
1604 + @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER)
1605 + @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST)
1606 + @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS)
1607 +
1608 +install:
1609 + @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile...
1610 + @headerlist="$(EXHEADER)"; for i in $$headerlist ; \
1611 + do \
1612 + (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \
1613 + chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \
1614 + done;
1615 +
1616 +tags:
1617 + ctags $(SRC)
1618 +
1619 +tests:
1620 +
1621 +lint:
1622 + lint -DLINT $(INCLUDES) $(SRC)>fluff
1623 +
1624 +depend:
1625 + @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile...
1626 + $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC)
1627 +
1628 +dclean:
1629 + $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKE FILE) >Makefile.new
1630 + mv -f Makefile.new $(MAKEFILE)
1631 +
1632 +clean:
1633 + rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff
1634 +
1635 +# DO NOT DELETE THIS LINE -- make depend depends on it.
1636 +
1637 +poly1305_vec.o: ../../include/openssl/poly1305.h poly1305_vec.c
1638 diff --git a/crypto/poly1305/poly1305.c b/crypto/poly1305/poly1305.c
1639 new file mode 100644
1640 index 0000000..2e5621d
1641 --- /dev/null
1642 +++ b/crypto/poly1305/poly1305.c
1643 @@ -0,0 +1,321 @@
1644 +/* ====================================================================
1645 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved.
1646 + *
1647 + * Redistribution and use in source and binary forms, with or without
1648 + * modification, are permitted provided that the following conditions
1649 + * are met:
1650 + *
1651 + * 1. Redistributions of source code must retain the above copyright
1652 + * notice, this list of conditions and the following disclaimer.
1653 + *
1654 + * 2. Redistributions in binary form must reproduce the above copyright
1655 + * notice, this list of conditions and the following disclaimer in
1656 + * the documentation and/or other materials provided with the
1657 + * distribution.
1658 + *
1659 + * 3. All advertising materials mentioning features or use of this
1660 + * software must display the following acknowledgment:
1661 + * "This product includes software developed by the OpenSSL Project
1662 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
1663 + *
1664 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
1665 + * endorse or promote products derived from this software without
1666 + * prior written permission. For written permission, please contact
1667 + * licensing@OpenSSL.org.
1668 + *
1669 + * 5. Products derived from this software may not be called "OpenSSL"
1670 + * nor may "OpenSSL" appear in their names without prior written
1671 + * permission of the OpenSSL Project.
1672 + *
1673 + * 6. Redistributions of any form whatsoever must retain the following
1674 + * acknowledgment:
1675 + * "This product includes software developed by the OpenSSL Project
1676 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
1677 + *
1678 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
1679 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1680 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
1681 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
1682 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1683 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
1684 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
1685 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1686 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
1687 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
1688 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
1689 + * OF THE POSSIBILITY OF SUCH DAMAGE.
1690 + * ====================================================================
1691 + */
1692 +
1693 +/* This implementation of poly1305 is by Andrew Moon
1694 + * (https://github.com/floodyberry/poly1305-donna) and released as public
1695 + * domain. */
1696 +
1697 +#include <string.h>
1698 +#include <stdint.h>
1699 +#include <openssl/opensslconf.h>
1700 +
1701 +#if !defined(OPENSSL_NO_POLY1305)
1702 +
1703 +#include <openssl/poly1305.h>
1704 +#include <openssl/crypto.h>
1705 +
1706 +#if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_ 64__)
1707 +/* We can assume little-endian. */
1708 +static uint32_t U8TO32_LE(const unsigned char *m)
1709 + {
1710 + uint32_t r;
1711 + memcpy(&r, m, sizeof(r));
1712 + return r;
1713 + }
1714 +
1715 +static void U32TO8_LE(unsigned char *m, uint32_t v)
1716 + {
1717 + memcpy(m, &v, sizeof(v));
1718 + }
1719 +#else
1720 +static uint32_t U8TO32_LE(const unsigned char *m)
1721 + {
1722 + return (uint32_t)m[0] |
1723 + (uint32_t)m[1] << 8 |
1724 + (uint32_t)m[2] << 16 |
1725 + (uint32_t)m[3] << 24;
1726 + }
1727 +
1728 +static void U32TO8_LE(unsigned char *m, uint32_t v)
1729 + {
1730 + m[0] = v;
1731 + m[1] = v >> 8;
1732 + m[2] = v >> 16;
1733 + m[3] = v >> 24;
1734 + }
1735 +#endif
1736 +
1737 +static uint64_t
1738 +mul32x32_64(uint32_t a, uint32_t b)
1739 + {
1740 + return (uint64_t)a * b;
1741 + }
1742 +
1743 +
1744 +struct poly1305_state_st
1745 + {
1746 + uint32_t r0,r1,r2,r3,r4;
1747 + uint32_t s1,s2,s3,s4;
1748 + uint32_t h0,h1,h2,h3,h4;
1749 + unsigned char buf[16];
1750 + unsigned int buf_used;
1751 + unsigned char key[16];
1752 + };
1753 +
1754 +/* poly1305_blocks updates |state| given some amount of input data. This
1755 + * function may only be called with a |len| that is not a multiple of 16 at the
1756 + * end of the data. Otherwise the input must be buffered into 16 byte blocks.
1757 + * */
1758 +static void poly1305_update(struct poly1305_state_st *state,
1759 + const unsigned char *in, size_t len)
1760 + {
1761 + uint32_t t0,t1,t2,t3;
1762 + uint64_t t[5];
1763 + uint32_t b;
1764 + uint64_t c;
1765 + size_t j;
1766 + unsigned char mp[16];
1767 +
1768 + if (len < 16)
1769 + goto poly1305_donna_atmost15bytes;
1770 +
1771 +poly1305_donna_16bytes:
1772 + t0 = U8TO32_LE(in);
1773 + t1 = U8TO32_LE(in+4);
1774 + t2 = U8TO32_LE(in+8);
1775 + t3 = U8TO32_LE(in+12);
1776 +
1777 + in += 16;
1778 + len -= 16;
1779 +
1780 + state->h0 += t0 & 0x3ffffff;
1781 + state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff;
1782 + state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff;
1783 + state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff;
1784 + state->h4 += (t3 >> 8) | (1 << 24);
1785 +
1786 +poly1305_donna_mul:
1787 + t[0] = mul32x32_64(state->h0,state->r0) +
1788 + mul32x32_64(state->h1,state->s4) +
1789 + mul32x32_64(state->h2,state->s3) +
1790 + mul32x32_64(state->h3,state->s2) +
1791 + mul32x32_64(state->h4,state->s1);
1792 + t[1] = mul32x32_64(state->h0,state->r1) +
1793 + mul32x32_64(state->h1,state->r0) +
1794 + mul32x32_64(state->h2,state->s4) +
1795 + mul32x32_64(state->h3,state->s3) +
1796 + mul32x32_64(state->h4,state->s2);
1797 + t[2] = mul32x32_64(state->h0,state->r2) +
1798 + mul32x32_64(state->h1,state->r1) +
1799 + mul32x32_64(state->h2,state->r0) +
1800 + mul32x32_64(state->h3,state->s4) +
1801 + mul32x32_64(state->h4,state->s3);
1802 + t[3] = mul32x32_64(state->h0,state->r3) +
1803 + mul32x32_64(state->h1,state->r2) +
1804 + mul32x32_64(state->h2,state->r1) +
1805 + mul32x32_64(state->h3,state->r0) +
1806 + mul32x32_64(state->h4,state->s4);
1807 + t[4] = mul32x32_64(state->h0,state->r4) +
1808 + mul32x32_64(state->h1,state->r3) +
1809 + mul32x32_64(state->h2,state->r2) +
1810 + mul32x32_64(state->h3,state->r1) +
1811 + mul32x32_64(state->h4,state->r0);
1812 +
1813 + state->h0 = (uint32_t)t[0] & 0x3ffffff; c = (t[0] > > 26);
1814 + t[1] += c; state->h1 = (uint32_t)t[1] & 0x3ffffff; b = (uint32_t)(t[1] > > 26);
1815 + t[2] += b; state->h2 = (uint32_t)t[2] & 0x3ffffff; b = (uint32_t)(t[2] > > 26);
1816 + t[3] += b; state->h3 = (uint32_t)t[3] & 0x3ffffff; b = (uint32_t)(t[3] > > 26);
1817 + t[4] += b; state->h4 = (uint32_t)t[4] & 0x3ffffff; b = (uint32_t)(t[4] > > 26);
1818 + state->h0 += b * 5;
1819 +
1820 + if (len >= 16)
1821 + goto poly1305_donna_16bytes;
1822 +
1823 + /* final bytes */
1824 +poly1305_donna_atmost15bytes:
1825 + if (!len)
1826 + return;
1827 +
1828 + for (j = 0; j < len; j++)
1829 + mp[j] = in[j];
1830 + mp[j++] = 1;
1831 + for (; j < 16; j++)
1832 + mp[j] = 0;
1833 + len = 0;
1834 +
1835 + t0 = U8TO32_LE(mp+0);
1836 + t1 = U8TO32_LE(mp+4);
1837 + t2 = U8TO32_LE(mp+8);
1838 + t3 = U8TO32_LE(mp+12);
1839 +
1840 + state->h0 += t0 & 0x3ffffff;
1841 + state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff;
1842 + state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff;
1843 + state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff;
1844 + state->h4 += (t3 >> 8);
1845 +
1846 + goto poly1305_donna_mul;
1847 + }
1848 +
1849 +void CRYPTO_poly1305_init(poly1305_state *statep, const unsigned char key[32])
1850 + {
1851 + struct poly1305_state_st *state = (struct poly1305_state_st*) statep;
1852 + uint32_t t0,t1,t2,t3;
1853 +
1854 + t0 = U8TO32_LE(key+0);
1855 + t1 = U8TO32_LE(key+4);
1856 + t2 = U8TO32_LE(key+8);
1857 + t3 = U8TO32_LE(key+12);
1858 +
1859 + /* precompute multipliers */
1860 + state->r0 = t0 & 0x3ffffff; t0 >>= 26; t0 |= t1 << 6;
1861 + state->r1 = t0 & 0x3ffff03; t1 >>= 20; t1 |= t2 << 12;
1862 + state->r2 = t1 & 0x3ffc0ff; t2 >>= 14; t2 |= t3 << 18;
1863 + state->r3 = t2 & 0x3f03fff; t3 >>= 8;
1864 + state->r4 = t3 & 0x00fffff;
1865 +
1866 + state->s1 = state->r1 * 5;
1867 + state->s2 = state->r2 * 5;
1868 + state->s3 = state->r3 * 5;
1869 + state->s4 = state->r4 * 5;
1870 +
1871 + /* init state */
1872 + state->h0 = 0;
1873 + state->h1 = 0;
1874 + state->h2 = 0;
1875 + state->h3 = 0;
1876 + state->h4 = 0;
1877 +
1878 + state->buf_used = 0;
1879 + memcpy(state->key, key + 16, sizeof(state->key));
1880 + }
1881 +
1882 +void CRYPTO_poly1305_update(poly1305_state *statep, const unsigned char *in,
1883 + size_t in_len)
1884 + {
1885 + unsigned int i;
1886 + struct poly1305_state_st *state = (struct poly1305_state_st*) statep;
1887 +
1888 + if (state->buf_used)
1889 + {
1890 + unsigned int todo = 16 - state->buf_used;
1891 + if (todo > in_len)
1892 + todo = in_len;
1893 + for (i = 0; i < todo; i++)
1894 + state->buf[state->buf_used + i] = in[i];
1895 + state->buf_used += todo;
1896 + in_len -= todo;
1897 + in += todo;
1898 +
1899 + if (state->buf_used == 16)
1900 + {
1901 + poly1305_update(state, state->buf, 16);
1902 + state->buf_used = 0;
1903 + }
1904 + }
1905 +
1906 + if (in_len >= 16)
1907 + {
1908 + size_t todo = in_len & ~0xf;
1909 + poly1305_update(state, in, todo);
1910 + in += todo;
1911 + in_len &= 0xf;
1912 + }
1913 +
1914 + if (in_len)
1915 + {
1916 + for (i = 0; i < in_len; i++)
1917 + state->buf[i] = in[i];
1918 + state->buf_used = in_len;
1919 + }
1920 + }
1921 +
1922 +void CRYPTO_poly1305_finish(poly1305_state *statep, unsigned char mac[16])
1923 + {
1924 + struct poly1305_state_st *state = (struct poly1305_state_st*) statep;
1925 + uint64_t f0,f1,f2,f3;
1926 + uint32_t g0,g1,g2,g3,g4;
1927 + uint32_t b, nb;
1928 +
1929 + if (state->buf_used)
1930 + poly1305_update(state, state->buf, state->buf_used);
1931 +
1932 + b = state->h0 >> 26; state->h0 = state->h0 & 0x3ffff ff;
1933 + state->h1 += b; b = state->h1 >> 26; state->h1 = state->h1 & 0x3ffff ff;
1934 + state->h2 += b; b = state->h2 >> 26; state->h2 = state->h2 & 0x3ffff ff;
1935 + state->h3 += b; b = state->h3 >> 26; state->h3 = state->h3 & 0x3ffff ff;
1936 + state->h4 += b; b = state->h4 >> 26; state->h4 = state->h4 & 0x3ffff ff;
1937 + state->h0 += b * 5;
1938 +
1939 + g0 = state->h0 + 5; b = g0 >> 26; g0 &= 0x3ffffff;
1940 + g1 = state->h1 + b; b = g1 >> 26; g1 &= 0x3ffffff;
1941 + g2 = state->h2 + b; b = g2 >> 26; g2 &= 0x3ffffff;
1942 + g3 = state->h3 + b; b = g3 >> 26; g3 &= 0x3ffffff;
1943 + g4 = state->h4 + b - (1 << 26);
1944 +
1945 + b = (g4 >> 31) - 1;
1946 + nb = ~b;
1947 + state->h0 = (state->h0 & nb) | (g0 & b);
1948 + state->h1 = (state->h1 & nb) | (g1 & b);
1949 + state->h2 = (state->h2 & nb) | (g2 & b);
1950 + state->h3 = (state->h3 & nb) | (g3 & b);
1951 + state->h4 = (state->h4 & nb) | (g4 & b);
1952 +
1953 + f0 = ((state->h0 ) | (state->h1 << 26)) + (uint64_t)U8TO32_LE(&stat e->key[0]);
1954 + f1 = ((state->h1 >> 6) | (state->h2 << 20)) + (uint64_t)U8TO32_LE(&stat e->key[4]);
1955 + f2 = ((state->h2 >> 12) | (state->h3 << 14)) + (uint64_t)U8TO32_LE(&stat e->key[8]);
1956 + f3 = ((state->h3 >> 18) | (state->h4 << 8)) + (uint64_t)U8TO32_LE(&stat e->key[12]);
1957 +
1958 + U32TO8_LE(&mac[ 0], f0); f1 += (f0 >> 32);
1959 + U32TO8_LE(&mac[ 4], f1); f2 += (f1 >> 32);
1960 + U32TO8_LE(&mac[ 8], f2); f3 += (f2 >> 32);
1961 + U32TO8_LE(&mac[12], f3);
1962 + }
1963 +
1964 +#endif /* !OPENSSL_NO_POLY1305 */
1965 diff --git a/crypto/poly1305/poly1305.h b/crypto/poly1305/poly1305.h
1966 new file mode 100644
1967 index 0000000..28f85ed
1968 --- /dev/null
1969 +++ b/crypto/poly1305/poly1305.h
1970 @@ -0,0 +1,88 @@
1971 +/*
1972 + * Poly1305
1973 + *
1974 + * Created on: Jun, 2013
1975 + * Author: Elie Bursztein (elieb@google.com)
1976 + *
1977 + * Adapted from the estream code by D. Bernstein.
1978 + */
1979 +/* ====================================================================
1980 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved.
1981 + *
1982 + * Redistribution and use in source and binary forms, with or without
1983 + * modification, are permitted provided that the following conditions
1984 + * are met:
1985 + *
1986 + * 1. Redistributions of source code must retain the above copyright
1987 + * notice, this list of conditions and the following disclaimer.
1988 + *
1989 + * 2. Redistributions in binary form must reproduce the above copyright
1990 + * notice, this list of conditions and the following disclaimer in
1991 + * the documentation and/or other materials provided with the
1992 + * distribution.
1993 + *
1994 + * 3. All advertising materials mentioning features or use of this
1995 + * software must display the following acknowledgment:
1996 + * "This product includes software developed by the OpenSSL Project
1997 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
1998 + *
1999 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
2000 + * endorse or promote products derived from this software without
2001 + * prior written permission. For written permission, please contact
2002 + * licensing@OpenSSL.org.
2003 + *
2004 + * 5. Products derived from this software may not be called "OpenSSL"
2005 + * nor may "OpenSSL" appear in their names without prior written
2006 + * permission of the OpenSSL Project.
2007 + *
2008 + * 6. Redistributions of any form whatsoever must retain the following
2009 + * acknowledgment:
2010 + * "This product includes software developed by the OpenSSL Project
2011 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
2012 + *
2013 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
2014 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2015 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
2016 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
2017 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2018 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2019 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
2020 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2021 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
2022 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2023 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
2024 + * OF THE POSSIBILITY OF SUCH DAMAGE.
2025 + * ====================================================================
2026 + */
2027 +
2028 +#ifndef HEADER_POLY1305_H_
2029 +#define HEADER_POLY1305_H_
2030 +
2031 +#include <stdint.h>
2032 +#include <openssl/opensslconf.h>
2033 +
2034 +#if defined(OPENSSL_NO_POLY1305)
2035 +#error Poly1305 support is disabled.
2036 +#endif
2037 +
2038 +typedef unsigned char poly1305_state[512];
2039 +
2040 +/* poly1305_init sets up |state| so that it can be used to calculate an
2041 + * authentication tag with the one-time key |key|. Note that |key| is a
2042 + * one-time key and therefore there is no `reset' method because that would
2043 + * enable several messages to be authenticated with the same key. */
2044 +extern void CRYPTO_poly1305_init(poly1305_state* state,
2045 + const unsigned char key[32]);
2046 +
2047 +/* poly1305_update processes |in_len| bytes from |in|. It can be called zero or
2048 + * more times after poly1305_init. */
2049 +extern void CRYPTO_poly1305_update(poly1305_state* state,
2050 + const unsigned char *in,
2051 + size_t in_len);
2052 +
2053 +/* poly1305_finish completes the poly1305 calculation and writes a 16 byte
2054 + * authentication tag to |mac|. */
2055 +extern void CRYPTO_poly1305_finish(poly1305_state* state,
2056 + unsigned char mac[16]);
2057 +
2058 +#endif /* HEADER_POLY1305_H_ */
2059 diff --git a/crypto/poly1305/poly1305_arm.c b/crypto/poly1305/poly1305_arm.c
2060 new file mode 100644
2061 index 0000000..adcef35
2062 --- /dev/null
2063 +++ b/crypto/poly1305/poly1305_arm.c
2064 @@ -0,0 +1,327 @@
2065 +/* ====================================================================
2066 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved.
2067 + *
2068 + * Redistribution and use in source and binary forms, with or without
2069 + * modification, are permitted provided that the following conditions
2070 + * are met:
2071 + *
2072 + * 1. Redistributions of source code must retain the above copyright
2073 + * notice, this list of conditions and the following disclaimer.
2074 + *
2075 + * 2. Redistributions in binary form must reproduce the above copyright
2076 + * notice, this list of conditions and the following disclaimer in
2077 + * the documentation and/or other materials provided with the
2078 + * distribution.
2079 + *
2080 + * 3. All advertising materials mentioning features or use of this
2081 + * software must display the following acknowledgment:
2082 + * "This product includes software developed by the OpenSSL Project
2083 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
2084 + *
2085 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
2086 + * endorse or promote products derived from this software without
2087 + * prior written permission. For written permission, please contact
2088 + * licensing@OpenSSL.org.
2089 + *
2090 + * 5. Products derived from this software may not be called "OpenSSL"
2091 + * nor may "OpenSSL" appear in their names without prior written
2092 + * permission of the OpenSSL Project.
2093 + *
2094 + * 6. Redistributions of any form whatsoever must retain the following
2095 + * acknowledgment:
2096 + * "This product includes software developed by the OpenSSL Project
2097 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
2098 + *
2099 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
2100 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2101 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
2102 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
2103 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2104 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2105 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
2106 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2107 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
2108 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2109 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
2110 + * OF THE POSSIBILITY OF SUCH DAMAGE.
2111 + * ====================================================================
2112 + */
2113 +
2114 +/* This implementation was taken from the public domain, neon2 version in
2115 + * SUPERCOP by D. J. Bernstein and Peter Schwabe. */
2116 +
2117 +#include <stdint.h>
2118 +
2119 +#include <openssl/poly1305.h>
2120 +
2121 +#if !defined(OPENSSL_NO_POLY1305)
2122 +
2123 +typedef struct {
2124 + uint32_t v[12]; /* for alignment; only using 10 */
2125 +} fe1305x2;
2126 +
2127 +#define addmulmod openssl_poly1305_neon2_addmulmod
2128 +#define blocks openssl_poly1305_neon2_blocks
2129 +
2130 +extern void addmulmod(fe1305x2 *r, const fe1305x2 *x, const fe1305x2 *y, const fe1305x2 *c);
2131 +
2132 +extern int blocks(fe1305x2 *h, const fe1305x2 *precomp, const unsigned char *in , unsigned int inlen);
2133 +
2134 +static void freeze(fe1305x2 *r)
2135 + {
2136 + int i;
2137 +
2138 + uint32_t x0 = r->v[0];
2139 + uint32_t x1 = r->v[2];
2140 + uint32_t x2 = r->v[4];
2141 + uint32_t x3 = r->v[6];
2142 + uint32_t x4 = r->v[8];
2143 + uint32_t y0;
2144 + uint32_t y1;
2145 + uint32_t y2;
2146 + uint32_t y3;
2147 + uint32_t y4;
2148 + uint32_t swap;
2149 +
2150 + for (i = 0;i < 3;++i)
2151 + {
2152 + x1 += x0 >> 26; x0 &= 0x3ffffff;
2153 + x2 += x1 >> 26; x1 &= 0x3ffffff;
2154 + x3 += x2 >> 26; x2 &= 0x3ffffff;
2155 + x4 += x3 >> 26; x3 &= 0x3ffffff;
2156 + x0 += 5*(x4 >> 26); x4 &= 0x3ffffff;
2157 + }
2158 +
2159 + y0 = x0 + 5;
2160 + y1 = x1 + (y0 >> 26); y0 &= 0x3ffffff;
2161 + y2 = x2 + (y1 >> 26); y1 &= 0x3ffffff;
2162 + y3 = x3 + (y2 >> 26); y2 &= 0x3ffffff;
2163 + y4 = x4 + (y3 >> 26); y3 &= 0x3ffffff;
2164 + swap = -(y4 >> 26); y4 &= 0x3ffffff;
2165 +
2166 + y0 ^= x0;
2167 + y1 ^= x1;
2168 + y2 ^= x2;
2169 + y3 ^= x3;
2170 + y4 ^= x4;
2171 +
2172 + y0 &= swap;
2173 + y1 &= swap;
2174 + y2 &= swap;
2175 + y3 &= swap;
2176 + y4 &= swap;
2177 +
2178 + y0 ^= x0;
2179 + y1 ^= x1;
2180 + y2 ^= x2;
2181 + y3 ^= x3;
2182 + y4 ^= x4;
2183 +
2184 + r->v[0] = y0;
2185 + r->v[2] = y1;
2186 + r->v[4] = y2;
2187 + r->v[6] = y3;
2188 + r->v[8] = y4;
2189 + }
2190 +
2191 +static void fe1305x2_tobytearray(unsigned char *r, fe1305x2 *x)
2192 + {
2193 + uint32_t x0 = x->v[0];
2194 + uint32_t x1 = x->v[2];
2195 + uint32_t x2 = x->v[4];
2196 + uint32_t x3 = x->v[6];
2197 + uint32_t x4 = x->v[8];
2198 +
2199 + x1 += x0 >> 26;
2200 + x0 &= 0x3ffffff;
2201 + x2 += x1 >> 26;
2202 + x1 &= 0x3ffffff;
2203 + x3 += x2 >> 26;
2204 + x2 &= 0x3ffffff;
2205 + x4 += x3 >> 26;
2206 + x3 &= 0x3ffffff;
2207 +
2208 + *(uint32_t *) r = x0 + (x1 << 26);
2209 + *(uint32_t *) (r + 4) = (x1 >> 6) + (x2 << 20);
2210 + *(uint32_t *) (r + 8) = (x2 >> 12) + (x3 << 14);
2211 + *(uint32_t *) (r + 12) = (x3 >> 18) + (x4 << 8);
2212 + }
2213 +
2214 +/* load32 exists to avoid breaking strict aliasing rules in
2215 + * fe1305x2_frombytearray. */
2216 +static uint32_t load32(unsigned char *t)
2217 + {
2218 + uint32_t tmp;
2219 + memcpy(&tmp, t, sizeof(tmp));
2220 + return tmp;
2221 + }
2222 +
2223 +static void fe1305x2_frombytearray(fe1305x2 *r, const unsigned char *x, unsigne d long long xlen)
2224 + {
2225 + int i;
2226 + unsigned char t[17];
2227 +
2228 + for (i = 0; (i < 16) && (i < xlen); i++)
2229 + t[i] = x[i];
2230 + xlen -= i;
2231 + x += i;
2232 + t[i++] = 1;
2233 + for (; i<17; i++)
2234 + t[i] = 0;
2235 +
2236 + r->v[0] = 0x3ffffff & load32(t);
2237 + r->v[2] = 0x3ffffff & (load32(t + 3) >> 2);
2238 + r->v[4] = 0x3ffffff & (load32(t + 6) >> 4);
2239 + r->v[6] = 0x3ffffff & (load32(t + 9) >> 6);
2240 + r->v[8] = load32(t + 13);
2241 +
2242 + if (xlen)
2243 + {
2244 + for (i = 0; (i < 16) && (i < xlen); i++)
2245 + t[i] = x[i];
2246 + t[i++] = 1;
2247 + for (; i<17; i++)
2248 + t[i] = 0;
2249 +
2250 + r->v[1] = 0x3ffffff & load32(t);
2251 + r->v[3] = 0x3ffffff & (load32(t + 3) >> 2);
2252 + r->v[5] = 0x3ffffff & (load32(t + 6) >> 4);
2253 + r->v[7] = 0x3ffffff & (load32(t + 9) >> 6);
2254 + r->v[9] = load32(t + 13);
2255 + }
2256 + else
2257 + r->v[1] = r->v[3] = r->v[5] = r->v[7] = r->v[9] = 0;
2258 + }
2259 +
2260 +static const fe1305x2 zero __attribute__ ((aligned (16)));
2261 +
2262 +struct poly1305_state_st {
2263 + unsigned char data[sizeof(fe1305x2[5]) + 128];
2264 + unsigned char buf[32];
2265 + unsigned int buf_used;
2266 + unsigned char key[16];
2267 +};
2268 +
2269 +void CRYPTO_poly1305_init(poly1305_state *state, const unsigned char key[32])
2270 + {
2271 + struct poly1305_state_st *st = (struct poly1305_state_st*) (state);
2272 + fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data)));
2273 + fe1305x2 *const h = r + 1;
2274 + fe1305x2 *const c = h + 1;
2275 + fe1305x2 *const precomp = c + 1;
2276 + unsigned int j;
2277 +
2278 + r->v[1] = r->v[0] = 0x3ffffff & *(uint32_t *) key;
2279 + r->v[3] = r->v[2] = 0x3ffff03 & ((*(uint32_t *) (key + 3)) >> 2);
2280 + r->v[5] = r->v[4] = 0x3ffc0ff & ((*(uint32_t *) (key + 6)) >> 4);
2281 + r->v[7] = r->v[6] = 0x3f03fff & ((*(uint32_t *) (key + 9)) >> 6);
2282 + r->v[9] = r->v[8] = 0x00fffff & ((*(uint32_t *) (key + 12)) >> 8);
2283 +
2284 + for (j = 0; j < 10; j++)
2285 + h->v[j] = 0; /* XXX: should fast-forward a bit */
2286 +
2287 + addmulmod(precomp,r,r,&zero); /* precompute r^2 */
2288 + addmulmod(precomp + 1,precomp,precomp,&zero); /* precompute r^4 */
2289 +
2290 + memcpy(st->key, key + 16, 16);
2291 + st->buf_used = 0;
2292 + }
2293 +
2294 +void CRYPTO_poly1305_update(poly1305_state *state, const unsigned char *in, siz e_t in_len)
2295 + {
2296 + struct poly1305_state_st *st = (struct poly1305_state_st*) (state);
2297 + fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data)));
2298 + fe1305x2 *const h = r + 1;
2299 + fe1305x2 *const c = h + 1;
2300 + fe1305x2 *const precomp = c + 1;
2301 + unsigned int i;
2302 +
2303 + if (st->buf_used)
2304 + {
2305 + unsigned int todo = 32 - st->buf_used;
2306 + if (todo > in_len)
2307 + todo = in_len;
2308 + for (i = 0; i < todo; i++)
2309 + st->buf[st->buf_used + i] = in[i];
2310 + st->buf_used += todo;
2311 + in_len -= todo;
2312 + in += todo;
2313 +
2314 + if (st->buf_used == sizeof(st->buf) && in_len)
2315 + {
2316 + addmulmod(h,h,precomp,&zero);
2317 + fe1305x2_frombytearray(c, st->buf, sizeof(st->buf));
2318 + for (i = 0; i < 10; i++)
2319 + h->v[i] += c->v[i];
2320 + st->buf_used = 0;
2321 + }
2322 + }
2323 +
2324 + while (in_len > 32)
2325 + {
2326 + unsigned int tlen = 1048576;
2327 + if (in_len < tlen)
2328 + tlen = in_len;
2329 + tlen -= blocks(h, precomp, in, tlen);
2330 + in_len -= tlen;
2331 + in += tlen;
2332 + }
2333 +
2334 + if (in_len)
2335 + {
2336 + for (i = 0; i < in_len; i++)
2337 + st->buf[i] = in[i];
2338 + st->buf_used = in_len;
2339 + }
2340 + }
2341 +
2342 +void CRYPTO_poly1305_finish(poly1305_state* state, unsigned char mac[16])
2343 + {
2344 + struct poly1305_state_st *st = (struct poly1305_state_st*) (state);
2345 + fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data)));
2346 + fe1305x2 *const h = r + 1;
2347 + fe1305x2 *const c = h + 1;
2348 + fe1305x2 *const precomp = c + 1;
2349 +
2350 + addmulmod(h,h,precomp,&zero);
2351 +
2352 + if (st->buf_used > 16)
2353 + {
2354 + fe1305x2_frombytearray(c, st->buf, st->buf_used);
2355 + precomp->v[1] = r->v[1];
2356 + precomp->v[3] = r->v[3];
2357 + precomp->v[5] = r->v[5];
2358 + precomp->v[7] = r->v[7];
2359 + precomp->v[9] = r->v[9];
2360 + addmulmod(h,h,precomp,c);
2361 + }
2362 + else if (st->buf_used > 0)
2363 + {
2364 + fe1305x2_frombytearray(c, st->buf, st->buf_used);
2365 + r->v[1] = 1;
2366 + r->v[3] = 0;
2367 + r->v[5] = 0;
2368 + r->v[7] = 0;
2369 + r->v[9] = 0;
2370 + addmulmod(h,h,r,c);
2371 + }
2372 +
2373 + h->v[0] += h->v[1];
2374 + h->v[2] += h->v[3];
2375 + h->v[4] += h->v[5];
2376 + h->v[6] += h->v[7];
2377 + h->v[8] += h->v[9];
2378 + freeze(h);
2379 +
2380 + fe1305x2_frombytearray(c, st->key, 16);
2381 + c->v[8] ^= (1 << 24);
2382 +
2383 + h->v[0] += c->v[0];
2384 + h->v[2] += c->v[2];
2385 + h->v[4] += c->v[4];
2386 + h->v[6] += c->v[6];
2387 + h->v[8] += c->v[8];
2388 + fe1305x2_tobytearray(mac, h);
2389 + }
2390 +
2391 +#endif /* !OPENSSL_NO_POLY1305 */
2392 diff --git a/crypto/poly1305/poly1305_arm_asm.S b/crypto/poly1305/poly1305_arm_a sm.S
2393 new file mode 100644
2394 index 0000000..449d16f
2395 --- /dev/null
2396 +++ b/crypto/poly1305/poly1305_arm_asm.S
2397 @@ -0,0 +1,2009 @@
2398 +# This implementation was taken from the public domain, neon2 version in
2399 +# SUPERCOP by D. J. Bernstein and Peter Schwabe.
2400 +
2401 +# qhasm: int32 input_0
2402 +
2403 +# qhasm: int32 input_1
2404 +
2405 +# qhasm: int32 input_2
2406 +
2407 +# qhasm: int32 input_3
2408 +
2409 +# qhasm: stack32 input_4
2410 +
2411 +# qhasm: stack32 input_5
2412 +
2413 +# qhasm: stack32 input_6
2414 +
2415 +# qhasm: stack32 input_7
2416 +
2417 +# qhasm: int32 caller_r4
2418 +
2419 +# qhasm: int32 caller_r5
2420 +
2421 +# qhasm: int32 caller_r6
2422 +
2423 +# qhasm: int32 caller_r7
2424 +
2425 +# qhasm: int32 caller_r8
2426 +
2427 +# qhasm: int32 caller_r9
2428 +
2429 +# qhasm: int32 caller_r10
2430 +
2431 +# qhasm: int32 caller_r11
2432 +
2433 +# qhasm: int32 caller_r12
2434 +
2435 +# qhasm: int32 caller_r14
2436 +
2437 +# qhasm: reg128 caller_q4
2438 +
2439 +# qhasm: reg128 caller_q5
2440 +
2441 +# qhasm: reg128 caller_q6
2442 +
2443 +# qhasm: reg128 caller_q7
2444 +
2445 +# qhasm: startcode
2446 +.fpu neon
2447 +.text
2448 +
2449 +# qhasm: reg128 r0
2450 +
2451 +# qhasm: reg128 r1
2452 +
2453 +# qhasm: reg128 r2
2454 +
2455 +# qhasm: reg128 r3
2456 +
2457 +# qhasm: reg128 r4
2458 +
2459 +# qhasm: reg128 x01
2460 +
2461 +# qhasm: reg128 x23
2462 +
2463 +# qhasm: reg128 x4
2464 +
2465 +# qhasm: reg128 y0
2466 +
2467 +# qhasm: reg128 y12
2468 +
2469 +# qhasm: reg128 y34
2470 +
2471 +# qhasm: reg128 5y12
2472 +
2473 +# qhasm: reg128 5y34
2474 +
2475 +# qhasm: stack128 y0_stack
2476 +
2477 +# qhasm: stack128 y12_stack
2478 +
2479 +# qhasm: stack128 y34_stack
2480 +
2481 +# qhasm: stack128 5y12_stack
2482 +
2483 +# qhasm: stack128 5y34_stack
2484 +
2485 +# qhasm: reg128 z0
2486 +
2487 +# qhasm: reg128 z12
2488 +
2489 +# qhasm: reg128 z34
2490 +
2491 +# qhasm: reg128 5z12
2492 +
2493 +# qhasm: reg128 5z34
2494 +
2495 +# qhasm: stack128 z0_stack
2496 +
2497 +# qhasm: stack128 z12_stack
2498 +
2499 +# qhasm: stack128 z34_stack
2500 +
2501 +# qhasm: stack128 5z12_stack
2502 +
2503 +# qhasm: stack128 5z34_stack
2504 +
2505 +# qhasm: stack128 two24
2506 +
2507 +# qhasm: int32 ptr
2508 +
2509 +# qhasm: reg128 c01
2510 +
2511 +# qhasm: reg128 c23
2512 +
2513 +# qhasm: reg128 d01
2514 +
2515 +# qhasm: reg128 d23
2516 +
2517 +# qhasm: reg128 t0
2518 +
2519 +# qhasm: reg128 t1
2520 +
2521 +# qhasm: reg128 t2
2522 +
2523 +# qhasm: reg128 t3
2524 +
2525 +# qhasm: reg128 t4
2526 +
2527 +# qhasm: reg128 mask
2528 +
2529 +# qhasm: reg128 u0
2530 +
2531 +# qhasm: reg128 u1
2532 +
2533 +# qhasm: reg128 u2
2534 +
2535 +# qhasm: reg128 u3
2536 +
2537 +# qhasm: reg128 u4
2538 +
2539 +# qhasm: reg128 v01
2540 +
2541 +# qhasm: reg128 mid
2542 +
2543 +# qhasm: reg128 v23
2544 +
2545 +# qhasm: reg128 v4
2546 +
2547 +# qhasm: int32 len
2548 +
2549 +# qhasm: qpushenter crypto_onetimeauth_poly1305_neon2_blocks
2550 +.align 4
2551 +.global openssl_poly1305_neon2_blocks
2552 +.type openssl_poly1305_neon2_blocks STT_FUNC
2553 +openssl_poly1305_neon2_blocks:
2554 +vpush {q4,q5,q6,q7}
2555 +mov r12,sp
2556 +sub sp,sp,#192
2557 +and sp,sp,#0xffffffe0
2558 +
2559 +# qhasm: len = input_3
2560 +# asm 1: mov >len=int32#4,<input_3=int32#4
2561 +# asm 2: mov >len=r3,<input_3=r3
2562 +mov r3,r3
2563 +
2564 +# qhasm: new y0
2565 +
2566 +# qhasm: y0 = mem64[input_1]y0[1]; input_1 += 8
2567 +# asm 1: vld1.8 {<y0=reg128#1%bot},[<input_1=int32#2]!
2568 +# asm 2: vld1.8 {<y0=d0},[<input_1=r1]!
2569 +vld1.8 {d0},[r1]!
2570 +
2571 +# qhasm: y12 = mem128[input_1]; input_1 += 16
2572 +# asm 1: vld1.8 {>y12=reg128#2%bot->y12=reg128#2%top},[<input_1=int32#2]!
2573 +# asm 2: vld1.8 {>y12=d2->y12=d3},[<input_1=r1]!
2574 +vld1.8 {d2-d3},[r1]!
2575 +
2576 +# qhasm: y34 = mem128[input_1]; input_1 += 16
2577 +# asm 1: vld1.8 {>y34=reg128#3%bot->y34=reg128#3%top},[<input_1=int32#2]!
2578 +# asm 2: vld1.8 {>y34=d4->y34=d5},[<input_1=r1]!
2579 +vld1.8 {d4-d5},[r1]!
2580 +
2581 +# qhasm: input_1 += 8
2582 +# asm 1: add >input_1=int32#2,<input_1=int32#2,#8
2583 +# asm 2: add >input_1=r1,<input_1=r1,#8
2584 +add r1,r1,#8
2585 +
2586 +# qhasm: new z0
2587 +
2588 +# qhasm: z0 = mem64[input_1]z0[1]; input_1 += 8
2589 +# asm 1: vld1.8 {<z0=reg128#4%bot},[<input_1=int32#2]!
2590 +# asm 2: vld1.8 {<z0=d6},[<input_1=r1]!
2591 +vld1.8 {d6},[r1]!
2592 +
2593 +# qhasm: z12 = mem128[input_1]; input_1 += 16
2594 +# asm 1: vld1.8 {>z12=reg128#5%bot->z12=reg128#5%top},[<input_1=int32#2]!
2595 +# asm 2: vld1.8 {>z12=d8->z12=d9},[<input_1=r1]!
2596 +vld1.8 {d8-d9},[r1]!
2597 +
2598 +# qhasm: z34 = mem128[input_1]; input_1 += 16
2599 +# asm 1: vld1.8 {>z34=reg128#6%bot->z34=reg128#6%top},[<input_1=int32#2]!
2600 +# asm 2: vld1.8 {>z34=d10->z34=d11},[<input_1=r1]!
2601 +vld1.8 {d10-d11},[r1]!
2602 +
2603 +# qhasm: 2x mask = 0xffffffff
2604 +# asm 1: vmov.i64 >mask=reg128#7,#0xffffffff
2605 +# asm 2: vmov.i64 >mask=q6,#0xffffffff
2606 +vmov.i64 q6,#0xffffffff
2607 +
2608 +# qhasm: 2x u4 = 0xff
2609 +# asm 1: vmov.i64 >u4=reg128#8,#0xff
2610 +# asm 2: vmov.i64 >u4=q7,#0xff
2611 +vmov.i64 q7,#0xff
2612 +
2613 +# qhasm: x01 aligned= mem128[input_0];input_0+=16
2614 +# asm 1: vld1.8 {>x01=reg128#9%bot->x01=reg128#9%top},[<input_0=int32#1,: 128]!
2615 +# asm 2: vld1.8 {>x01=d16->x01=d17},[<input_0=r0,: 128]!
2616 +vld1.8 {d16-d17},[r0,: 128]!
2617 +
2618 +# qhasm: x23 aligned= mem128[input_0];input_0+=16
2619 +# asm 1: vld1.8 {>x23=reg128#10%bot->x23=reg128#10%top},[<input_0=int32#1,: 128 ]!
2620 +# asm 2: vld1.8 {>x23=d18->x23=d19},[<input_0=r0,: 128]!
2621 +vld1.8 {d18-d19},[r0,: 128]!
2622 +
2623 +# qhasm: x4 aligned= mem64[input_0]x4[1]
2624 +# asm 1: vld1.8 {<x4=reg128#11%bot},[<input_0=int32#1,: 64]
2625 +# asm 2: vld1.8 {<x4=d20},[<input_0=r0,: 64]
2626 +vld1.8 {d20},[r0,: 64]
2627 +
2628 +# qhasm: input_0 -= 32
2629 +# asm 1: sub >input_0=int32#1,<input_0=int32#1,#32
2630 +# asm 2: sub >input_0=r0,<input_0=r0,#32
2631 +sub r0,r0,#32
2632 +
2633 +# qhasm: 2x mask unsigned>>=6
2634 +# asm 1: vshr.u64 >mask=reg128#7,<mask=reg128#7,#6
2635 +# asm 2: vshr.u64 >mask=q6,<mask=q6,#6
2636 +vshr.u64 q6,q6,#6
2637 +
2638 +# qhasm: 2x u4 unsigned>>= 7
2639 +# asm 1: vshr.u64 >u4=reg128#8,<u4=reg128#8,#7
2640 +# asm 2: vshr.u64 >u4=q7,<u4=q7,#7
2641 +vshr.u64 q7,q7,#7
2642 +
2643 +# qhasm: 4x 5y12 = y12 << 2
2644 +# asm 1: vshl.i32 >5y12=reg128#12,<y12=reg128#2,#2
2645 +# asm 2: vshl.i32 >5y12=q11,<y12=q1,#2
2646 +vshl.i32 q11,q1,#2
2647 +
2648 +# qhasm: 4x 5y34 = y34 << 2
2649 +# asm 1: vshl.i32 >5y34=reg128#13,<y34=reg128#3,#2
2650 +# asm 2: vshl.i32 >5y34=q12,<y34=q2,#2
2651 +vshl.i32 q12,q2,#2
2652 +
2653 +# qhasm: 4x 5y12 += y12
2654 +# asm 1: vadd.i32 >5y12=reg128#12,<5y12=reg128#12,<y12=reg128#2
2655 +# asm 2: vadd.i32 >5y12=q11,<5y12=q11,<y12=q1
2656 +vadd.i32 q11,q11,q1
2657 +
2658 +# qhasm: 4x 5y34 += y34
2659 +# asm 1: vadd.i32 >5y34=reg128#13,<5y34=reg128#13,<y34=reg128#3
2660 +# asm 2: vadd.i32 >5y34=q12,<5y34=q12,<y34=q2
2661 +vadd.i32 q12,q12,q2
2662 +
2663 +# qhasm: 2x u4 <<= 24
2664 +# asm 1: vshl.i64 >u4=reg128#8,<u4=reg128#8,#24
2665 +# asm 2: vshl.i64 >u4=q7,<u4=q7,#24
2666 +vshl.i64 q7,q7,#24
2667 +
2668 +# qhasm: 4x 5z12 = z12 << 2
2669 +# asm 1: vshl.i32 >5z12=reg128#14,<z12=reg128#5,#2
2670 +# asm 2: vshl.i32 >5z12=q13,<z12=q4,#2
2671 +vshl.i32 q13,q4,#2
2672 +
2673 +# qhasm: 4x 5z34 = z34 << 2
2674 +# asm 1: vshl.i32 >5z34=reg128#15,<z34=reg128#6,#2
2675 +# asm 2: vshl.i32 >5z34=q14,<z34=q5,#2
2676 +vshl.i32 q14,q5,#2
2677 +
2678 +# qhasm: 4x 5z12 += z12
2679 +# asm 1: vadd.i32 >5z12=reg128#14,<5z12=reg128#14,<z12=reg128#5
2680 +# asm 2: vadd.i32 >5z12=q13,<5z12=q13,<z12=q4
2681 +vadd.i32 q13,q13,q4
2682 +
2683 +# qhasm: 4x 5z34 += z34
2684 +# asm 1: vadd.i32 >5z34=reg128#15,<5z34=reg128#15,<z34=reg128#6
2685 +# asm 2: vadd.i32 >5z34=q14,<5z34=q14,<z34=q5
2686 +vadd.i32 q14,q14,q5
2687 +
2688 +# qhasm: new two24
2689 +
2690 +# qhasm: new y0_stack
2691 +
2692 +# qhasm: new y12_stack
2693 +
2694 +# qhasm: new y34_stack
2695 +
2696 +# qhasm: new 5y12_stack
2697 +
2698 +# qhasm: new 5y34_stack
2699 +
2700 +# qhasm: new z0_stack
2701 +
2702 +# qhasm: new z12_stack
2703 +
2704 +# qhasm: new z34_stack
2705 +
2706 +# qhasm: new 5z12_stack
2707 +
2708 +# qhasm: new 5z34_stack
2709 +
2710 +# qhasm: ptr = &two24
2711 +# asm 1: lea >ptr=int32#2,<two24=stack128#1
2712 +# asm 2: lea >ptr=r1,<two24=[sp,#0]
2713 +add r1,sp,#0
2714 +
2715 +# qhasm: mem128[ptr] aligned= u4
2716 +# asm 1: vst1.8 {<u4=reg128#8%bot-<u4=reg128#8%top},[<ptr=int32#2,: 128]
2717 +# asm 2: vst1.8 {<u4=d14-<u4=d15},[<ptr=r1,: 128]
2718 +vst1.8 {d14-d15},[r1,: 128]
2719 +
2720 +# qhasm: r4 = u4
2721 +# asm 1: vmov >r4=reg128#16,<u4=reg128#8
2722 +# asm 2: vmov >r4=q15,<u4=q7
2723 +vmov q15,q7
2724 +
2725 +# qhasm: r0 = u4
2726 +# asm 1: vmov >r0=reg128#8,<u4=reg128#8
2727 +# asm 2: vmov >r0=q7,<u4=q7
2728 +vmov q7,q7
2729 +
2730 +# qhasm: ptr = &y0_stack
2731 +# asm 1: lea >ptr=int32#2,<y0_stack=stack128#2
2732 +# asm 2: lea >ptr=r1,<y0_stack=[sp,#16]
2733 +add r1,sp,#16
2734 +
2735 +# qhasm: mem128[ptr] aligned= y0
2736 +# asm 1: vst1.8 {<y0=reg128#1%bot-<y0=reg128#1%top},[<ptr=int32#2,: 128]
2737 +# asm 2: vst1.8 {<y0=d0-<y0=d1},[<ptr=r1,: 128]
2738 +vst1.8 {d0-d1},[r1,: 128]
2739 +
2740 +# qhasm: ptr = &y12_stack
2741 +# asm 1: lea >ptr=int32#2,<y12_stack=stack128#3
2742 +# asm 2: lea >ptr=r1,<y12_stack=[sp,#32]
2743 +add r1,sp,#32
2744 +
2745 +# qhasm: mem128[ptr] aligned= y12
2746 +# asm 1: vst1.8 {<y12=reg128#2%bot-<y12=reg128#2%top},[<ptr=int32#2,: 128]
2747 +# asm 2: vst1.8 {<y12=d2-<y12=d3},[<ptr=r1,: 128]
2748 +vst1.8 {d2-d3},[r1,: 128]
2749 +
2750 +# qhasm: ptr = &y34_stack
2751 +# asm 1: lea >ptr=int32#2,<y34_stack=stack128#4
2752 +# asm 2: lea >ptr=r1,<y34_stack=[sp,#48]
2753 +add r1,sp,#48
2754 +
2755 +# qhasm: mem128[ptr] aligned= y34
2756 +# asm 1: vst1.8 {<y34=reg128#3%bot-<y34=reg128#3%top},[<ptr=int32#2,: 128]
2757 +# asm 2: vst1.8 {<y34=d4-<y34=d5},[<ptr=r1,: 128]
2758 +vst1.8 {d4-d5},[r1,: 128]
2759 +
2760 +# qhasm: ptr = &z0_stack
2761 +# asm 1: lea >ptr=int32#2,<z0_stack=stack128#7
2762 +# asm 2: lea >ptr=r1,<z0_stack=[sp,#96]
2763 +add r1,sp,#96
2764 +
2765 +# qhasm: mem128[ptr] aligned= z0
2766 +# asm 1: vst1.8 {<z0=reg128#4%bot-<z0=reg128#4%top},[<ptr=int32#2,: 128]
2767 +# asm 2: vst1.8 {<z0=d6-<z0=d7},[<ptr=r1,: 128]
2768 +vst1.8 {d6-d7},[r1,: 128]
2769 +
2770 +# qhasm: ptr = &z12_stack
2771 +# asm 1: lea >ptr=int32#2,<z12_stack=stack128#8
2772 +# asm 2: lea >ptr=r1,<z12_stack=[sp,#112]
2773 +add r1,sp,#112
2774 +
2775 +# qhasm: mem128[ptr] aligned= z12
2776 +# asm 1: vst1.8 {<z12=reg128#5%bot-<z12=reg128#5%top},[<ptr=int32#2,: 128]
2777 +# asm 2: vst1.8 {<z12=d8-<z12=d9},[<ptr=r1,: 128]
2778 +vst1.8 {d8-d9},[r1,: 128]
2779 +
2780 +# qhasm: ptr = &z34_stack
2781 +# asm 1: lea >ptr=int32#2,<z34_stack=stack128#9
2782 +# asm 2: lea >ptr=r1,<z34_stack=[sp,#128]
2783 +add r1,sp,#128
2784 +
2785 +# qhasm: mem128[ptr] aligned= z34
2786 +# asm 1: vst1.8 {<z34=reg128#6%bot-<z34=reg128#6%top},[<ptr=int32#2,: 128]
2787 +# asm 2: vst1.8 {<z34=d10-<z34=d11},[<ptr=r1,: 128]
2788 +vst1.8 {d10-d11},[r1,: 128]
2789 +
2790 +# qhasm: ptr = &5y12_stack
2791 +# asm 1: lea >ptr=int32#2,<5y12_stack=stack128#5
2792 +# asm 2: lea >ptr=r1,<5y12_stack=[sp,#64]
2793 +add r1,sp,#64
2794 +
2795 +# qhasm: mem128[ptr] aligned= 5y12
2796 +# asm 1: vst1.8 {<5y12=reg128#12%bot-<5y12=reg128#12%top},[<ptr=int32#2,: 128]
2797 +# asm 2: vst1.8 {<5y12=d22-<5y12=d23},[<ptr=r1,: 128]
2798 +vst1.8 {d22-d23},[r1,: 128]
2799 +
2800 +# qhasm: ptr = &5y34_stack
2801 +# asm 1: lea >ptr=int32#2,<5y34_stack=stack128#6
2802 +# asm 2: lea >ptr=r1,<5y34_stack=[sp,#80]
2803 +add r1,sp,#80
2804 +
2805 +# qhasm: mem128[ptr] aligned= 5y34
2806 +# asm 1: vst1.8 {<5y34=reg128#13%bot-<5y34=reg128#13%top},[<ptr=int32#2,: 128]
2807 +# asm 2: vst1.8 {<5y34=d24-<5y34=d25},[<ptr=r1,: 128]
2808 +vst1.8 {d24-d25},[r1,: 128]
2809 +
2810 +# qhasm: ptr = &5z12_stack
2811 +# asm 1: lea >ptr=int32#2,<5z12_stack=stack128#10
2812 +# asm 2: lea >ptr=r1,<5z12_stack=[sp,#144]
2813 +add r1,sp,#144
2814 +
2815 +# qhasm: mem128[ptr] aligned= 5z12
2816 +# asm 1: vst1.8 {<5z12=reg128#14%bot-<5z12=reg128#14%top},[<ptr=int32#2,: 128]
2817 +# asm 2: vst1.8 {<5z12=d26-<5z12=d27},[<ptr=r1,: 128]
2818 +vst1.8 {d26-d27},[r1,: 128]
2819 +
2820 +# qhasm: ptr = &5z34_stack
2821 +# asm 1: lea >ptr=int32#2,<5z34_stack=stack128#11
2822 +# asm 2: lea >ptr=r1,<5z34_stack=[sp,#160]
2823 +add r1,sp,#160
2824 +
2825 +# qhasm: mem128[ptr] aligned= 5z34
2826 +# asm 1: vst1.8 {<5z34=reg128#15%bot-<5z34=reg128#15%top},[<ptr=int32#2,: 128]
2827 +# asm 2: vst1.8 {<5z34=d28-<5z34=d29},[<ptr=r1,: 128]
2828 +vst1.8 {d28-d29},[r1,: 128]
2829 +
2830 +# qhasm: unsigned>? len - 64
2831 +# asm 1: cmp <len=int32#4,#64
2832 +# asm 2: cmp <len=r3,#64
2833 +cmp r3,#64
2834 +
2835 +# qhasm: goto below64bytes if !unsigned>
2836 +bls ._below64bytes
2837 +
2838 +# qhasm: input_2 += 32
2839 +# asm 1: add >input_2=int32#2,<input_2=int32#3,#32
2840 +# asm 2: add >input_2=r1,<input_2=r2,#32
2841 +add r1,r2,#32
2842 +
2843 +# qhasm: mainloop2:
2844 +._mainloop2:
2845 +
2846 +# qhasm: c01 = mem128[input_2];input_2+=16
2847 +# asm 1: vld1.8 {>c01=reg128#1%bot->c01=reg128#1%top},[<input_2=int32#2]!
2848 +# asm 2: vld1.8 {>c01=d0->c01=d1},[<input_2=r1]!
2849 +vld1.8 {d0-d1},[r1]!
2850 +
2851 +# qhasm: c23 = mem128[input_2];input_2+=16
2852 +# asm 1: vld1.8 {>c23=reg128#2%bot->c23=reg128#2%top},[<input_2=int32#2]!
2853 +# asm 2: vld1.8 {>c23=d2->c23=d3},[<input_2=r1]!
2854 +vld1.8 {d2-d3},[r1]!
2855 +
2856 +# qhasm: r4[0,1] += x01[0] unsigned* z34[2]; r4[2,3] += x01[1] unsigned* z34 [3]
2857 +# asm 1: vmlal.u32 <r4=reg128#16,<x01=reg128#9%bot,<z34=reg128#6%top
2858 +# asm 2: vmlal.u32 <r4=q15,<x01=d16,<z34=d11
2859 +vmlal.u32 q15,d16,d11
2860 +
2861 +# qhasm: ptr = &z12_stack
2862 +# asm 1: lea >ptr=int32#3,<z12_stack=stack128#8
2863 +# asm 2: lea >ptr=r2,<z12_stack=[sp,#112]
2864 +add r2,sp,#112
2865 +
2866 +# qhasm: z12 aligned= mem128[ptr]
2867 +# asm 1: vld1.8 {>z12=reg128#3%bot->z12=reg128#3%top},[<ptr=int32#3,: 128]
2868 +# asm 2: vld1.8 {>z12=d4->z12=d5},[<ptr=r2,: 128]
2869 +vld1.8 {d4-d5},[r2,: 128]
2870 +
2871 +# qhasm: r4[0,1] += x01[2] unsigned* z34[0]; r4[2,3] += x01[3] unsigned* z34[1 ]
2872 +# asm 1: vmlal.u32 <r4=reg128#16,<x01=reg128#9%top,<z34=reg128#6%bot
2873 +# asm 2: vmlal.u32 <r4=q15,<x01=d17,<z34=d10
2874 +vmlal.u32 q15,d17,d10
2875 +
2876 +# qhasm: ptr = &z0_stack
2877 +# asm 1: lea >ptr=int32#3,<z0_stack=stack128#7
2878 +# asm 2: lea >ptr=r2,<z0_stack=[sp,#96]
2879 +add r2,sp,#96
2880 +
2881 +# qhasm: z0 aligned= mem128[ptr]
2882 +# asm 1: vld1.8 {>z0=reg128#4%bot->z0=reg128#4%top},[<ptr=int32#3,: 128]
2883 +# asm 2: vld1.8 {>z0=d6->z0=d7},[<ptr=r2,: 128]
2884 +vld1.8 {d6-d7},[r2,: 128]
2885 +
2886 +# qhasm: r4[0,1] += x23[0] unsigned* z12[2]; r4[2,3] += x23[1] unsigned* z12[3 ]
2887 +# asm 1: vmlal.u32 <r4=reg128#16,<x23=reg128#10%bot,<z12=reg128#3%top
2888 +# asm 2: vmlal.u32 <r4=q15,<x23=d18,<z12=d5
2889 +vmlal.u32 q15,d18,d5
2890 +
2891 +# qhasm: c01 c23 = c01[0]c01[1]c01[2]c23[2]c23[0]c23[1]c01[3]c23[3]
2892 +# asm 1: vtrn.32 <c01=reg128#1%top,<c23=reg128#2%top
2893 +# asm 2: vtrn.32 <c01=d1,<c23=d3
2894 +vtrn.32 d1,d3
2895 +
2896 +# qhasm: r4[0,1] += x23[2] unsigned* z12[0]; r4[2,3] += x23[3] unsigned* z12[1 ]
2897 +# asm 1: vmlal.u32 <r4=reg128#16,<x23=reg128#10%top,<z12=reg128#3%bot
2898 +# asm 2: vmlal.u32 <r4=q15,<x23=d19,<z12=d4
2899 +vmlal.u32 q15,d19,d4
2900 +
2901 +# qhasm: r4[0,1] += x4[0] unsigned* z0[0]; r4[2,3] += x4[1] unsigned* z0[1]
2902 +# asm 1: vmlal.u32 <r4=reg128#16,<x4=reg128#11%bot,<z0=reg128#4%bot
2903 +# asm 2: vmlal.u32 <r4=q15,<x4=d20,<z0=d6
2904 +vmlal.u32 q15,d20,d6
2905 +
2906 +# qhasm: r3[0,1] = c23[2]<<18; r3[2,3] = c23[3]<<18
2907 +# asm 1: vshll.u32 >r3=reg128#5,<c23=reg128#2%top,#18
2908 +# asm 2: vshll.u32 >r3=q4,<c23=d3,#18
2909 +vshll.u32 q4,d3,#18
2910 +
2911 +# qhasm: c01 c23 = c01[0]c23[0]c01[2]c01[3]c01[1]c23[1]c23[2]c23[3]
2912 +# asm 1: vtrn.32 <c01=reg128#1%bot,<c23=reg128#2%bot
2913 +# asm 2: vtrn.32 <c01=d0,<c23=d2
2914 +vtrn.32 d0,d2
2915 +
2916 +# qhasm: r3[0,1] += x01[0] unsigned* z34[0]; r3[2,3] += x01[1] unsigned* z34[ 1]
2917 +# asm 1: vmlal.u32 <r3=reg128#5,<x01=reg128#9%bot,<z34=reg128#6%bot
2918 +# asm 2: vmlal.u32 <r3=q4,<x01=d16,<z34=d10
2919 +vmlal.u32 q4,d16,d10
2920 +
2921 +# qhasm: r3[0,1] += x01[2] unsigned* z12[2]; r3[2,3] += x01[3] unsigned* z12[ 3]
2922 +# asm 1: vmlal.u32 <r3=reg128#5,<x01=reg128#9%top,<z12=reg128#3%top
2923 +# asm 2: vmlal.u32 <r3=q4,<x01=d17,<z12=d5
2924 +vmlal.u32 q4,d17,d5
2925 +
2926 +# qhasm: r0 = r0[1]c01[0]r0[2,3]
2927 +# asm 1: vext.32 <r0=reg128#8%bot,<r0=reg128#8%bot,<c01=reg128#1%bot,#1
2928 +# asm 2: vext.32 <r0=d14,<r0=d14,<c01=d0,#1
2929 +vext.32 d14,d14,d0,#1
2930 +
2931 +# qhasm: r3[0,1] += x23[0] unsigned* z12[0]; r3[2,3] += x23[1] unsigned* z12[ 1]
2932 +# asm 1: vmlal.u32 <r3=reg128#5,<x23=reg128#10%bot,<z12=reg128#3%bot
2933 +# asm 2: vmlal.u32 <r3=q4,<x23=d18,<z12=d4
2934 +vmlal.u32 q4,d18,d4
2935 +
2936 +# qhasm: input_2 -= 64
2937 +# asm 1: sub >input_2=int32#2,<input_2=int32#2,#64
2938 +# asm 2: sub >input_2=r1,<input_2=r1,#64
2939 +sub r1,r1,#64
2940 +
2941 +# qhasm: r3[0,1] += x23[2] unsigned* z0[0]; r3[2,3] += x23[3] unsigned* z0[1]
2942 +# asm 1: vmlal.u32 <r3=reg128#5,<x23=reg128#10%top,<z0=reg128#4%bot
2943 +# asm 2: vmlal.u32 <r3=q4,<x23=d19,<z0=d6
2944 +vmlal.u32 q4,d19,d6
2945 +
2946 +# qhasm: ptr = &5z34_stack
2947 +# asm 1: lea >ptr=int32#3,<5z34_stack=stack128#11
2948 +# asm 2: lea >ptr=r2,<5z34_stack=[sp,#160]
2949 +add r2,sp,#160
2950 +
2951 +# qhasm: 5z34 aligned= mem128[ptr]
2952 +# asm 1: vld1.8 {>5z34=reg128#6%bot->5z34=reg128#6%top},[<ptr=int32#3,: 128]
2953 +# asm 2: vld1.8 {>5z34=d10->5z34=d11},[<ptr=r2,: 128]
2954 +vld1.8 {d10-d11},[r2,: 128]
2955 +
2956 +# qhasm: r3[0,1] += x4[0] unsigned* 5z34[2]; r3[2,3] += x4[1] unsigned* 5z3 4[3]
2957 +# asm 1: vmlal.u32 <r3=reg128#5,<x4=reg128#11%bot,<5z34=reg128#6%top
2958 +# asm 2: vmlal.u32 <r3=q4,<x4=d20,<5z34=d11
2959 +vmlal.u32 q4,d20,d11
2960 +
2961 +# qhasm: r0 = r0[1]r0[0]r0[3]r0[2]
2962 +# asm 1: vrev64.i32 >r0=reg128#8,<r0=reg128#8
2963 +# asm 2: vrev64.i32 >r0=q7,<r0=q7
2964 +vrev64.i32 q7,q7
2965 +
2966 +# qhasm: r2[0,1] = c01[2]<<12; r2[2,3] = c01[3]<<12
2967 +# asm 1: vshll.u32 >r2=reg128#14,<c01=reg128#1%top,#12
2968 +# asm 2: vshll.u32 >r2=q13,<c01=d1,#12
2969 +vshll.u32 q13,d1,#12
2970 +
2971 +# qhasm: d01 = mem128[input_2];input_2+=16
2972 +# asm 1: vld1.8 {>d01=reg128#12%bot->d01=reg128#12%top},[<input_2=int32#2]!
2973 +# asm 2: vld1.8 {>d01=d22->d01=d23},[<input_2=r1]!
2974 +vld1.8 {d22-d23},[r1]!
2975 +
2976 +# qhasm: r2[0,1] += x01[0] unsigned* z12[2]; r2[2,3] += x01[1] unsigned* z12[ 3]
2977 +# asm 1: vmlal.u32 <r2=reg128#14,<x01=reg128#9%bot,<z12=reg128#3%top
2978 +# asm 2: vmlal.u32 <r2=q13,<x01=d16,<z12=d5
2979 +vmlal.u32 q13,d16,d5
2980 +
2981 +# qhasm: r2[0,1] += x01[2] unsigned* z12[0]; r2[2,3] += x01[3] unsigned* z12[ 1]
2982 +# asm 1: vmlal.u32 <r2=reg128#14,<x01=reg128#9%top,<z12=reg128#3%bot
2983 +# asm 2: vmlal.u32 <r2=q13,<x01=d17,<z12=d4
2984 +vmlal.u32 q13,d17,d4
2985 +
2986 +# qhasm: r2[0,1] += x23[0] unsigned* z0[0]; r2[2,3] += x23[1] unsigned* z0[1]
2987 +# asm 1: vmlal.u32 <r2=reg128#14,<x23=reg128#10%bot,<z0=reg128#4%bot
2988 +# asm 2: vmlal.u32 <r2=q13,<x23=d18,<z0=d6
2989 +vmlal.u32 q13,d18,d6
2990 +
2991 +# qhasm: r2[0,1] += x23[2] unsigned* 5z34[2]; r2[2,3] += x23[3] unsigned* 5z3 4[3]
2992 +# asm 1: vmlal.u32 <r2=reg128#14,<x23=reg128#10%top,<5z34=reg128#6%top
2993 +# asm 2: vmlal.u32 <r2=q13,<x23=d19,<5z34=d11
2994 +vmlal.u32 q13,d19,d11
2995 +
2996 +# qhasm: r2[0,1] += x4[0] unsigned* 5z34[0]; r2[2,3] += x4[1] unsigned* 5z34[ 1]
2997 +# asm 1: vmlal.u32 <r2=reg128#14,<x4=reg128#11%bot,<5z34=reg128#6%bot
2998 +# asm 2: vmlal.u32 <r2=q13,<x4=d20,<5z34=d10
2999 +vmlal.u32 q13,d20,d10
3000 +
3001 +# qhasm: r0 = r0[0,1]c01[1]r0[2]
3002 +# asm 1: vext.32 <r0=reg128#8%top,<c01=reg128#1%bot,<r0=reg128#8%top,#1
3003 +# asm 2: vext.32 <r0=d15,<c01=d0,<r0=d15,#1
3004 +vext.32 d15,d0,d15,#1
3005 +
3006 +# qhasm: r1[0,1] = c23[0]<<6; r1[2,3] = c23[1]<<6
3007 +# asm 1: vshll.u32 >r1=reg128#15,<c23=reg128#2%bot,#6
3008 +# asm 2: vshll.u32 >r1=q14,<c23=d2,#6
3009 +vshll.u32 q14,d2,#6
3010 +
3011 +# qhasm: r1[0,1] += x01[0] unsigned* z12[0]; r1[2,3] += x01[1] unsigned* z12[ 1]
3012 +# asm 1: vmlal.u32 <r1=reg128#15,<x01=reg128#9%bot,<z12=reg128#3%bot
3013 +# asm 2: vmlal.u32 <r1=q14,<x01=d16,<z12=d4
3014 +vmlal.u32 q14,d16,d4
3015 +
3016 +# qhasm: r1[0,1] += x01[2] unsigned* z0[0]; r1[2,3] += x01[3] unsigned* z0[1]
3017 +# asm 1: vmlal.u32 <r1=reg128#15,<x01=reg128#9%top,<z0=reg128#4%bot
3018 +# asm 2: vmlal.u32 <r1=q14,<x01=d17,<z0=d6
3019 +vmlal.u32 q14,d17,d6
3020 +
3021 +# qhasm: r1[0,1] += x23[0] unsigned* 5z34[2]; r1[2,3] += x23[1] unsigned* 5z3 4[3]
3022 +# asm 1: vmlal.u32 <r1=reg128#15,<x23=reg128#10%bot,<5z34=reg128#6%top
3023 +# asm 2: vmlal.u32 <r1=q14,<x23=d18,<5z34=d11
3024 +vmlal.u32 q14,d18,d11
3025 +
3026 +# qhasm: r1[0,1] += x23[2] unsigned* 5z34[0]; r1[2,3] += x23[3] unsigned* 5z34[ 1]
3027 +# asm 1: vmlal.u32 <r1=reg128#15,<x23=reg128#10%top,<5z34=reg128#6%bot
3028 +# asm 2: vmlal.u32 <r1=q14,<x23=d19,<5z34=d10
3029 +vmlal.u32 q14,d19,d10
3030 +
3031 +# qhasm: ptr = &5z12_stack
3032 +# asm 1: lea >ptr=int32#3,<5z12_stack=stack128#10
3033 +# asm 2: lea >ptr=r2,<5z12_stack=[sp,#144]
3034 +add r2,sp,#144
3035 +
3036 +# qhasm: 5z12 aligned= mem128[ptr]
3037 +# asm 1: vld1.8 {>5z12=reg128#1%bot->5z12=reg128#1%top},[<ptr=int32#3,: 128]
3038 +# asm 2: vld1.8 {>5z12=d0->5z12=d1},[<ptr=r2,: 128]
3039 +vld1.8 {d0-d1},[r2,: 128]
3040 +
3041 +# qhasm: r1[0,1] += x4[0] unsigned* 5z12[2]; r1[2,3] += x4[1] unsigned* 5z12[ 3]
3042 +# asm 1: vmlal.u32 <r1=reg128#15,<x4=reg128#11%bot,<5z12=reg128#1%top
3043 +# asm 2: vmlal.u32 <r1=q14,<x4=d20,<5z12=d1
3044 +vmlal.u32 q14,d20,d1
3045 +
3046 +# qhasm: d23 = mem128[input_2];input_2+=16
3047 +# asm 1: vld1.8 {>d23=reg128#2%bot->d23=reg128#2%top},[<input_2=int32#2]!
3048 +# asm 2: vld1.8 {>d23=d2->d23=d3},[<input_2=r1]!
3049 +vld1.8 {d2-d3},[r1]!
3050 +
3051 +# qhasm: input_2 += 32
3052 +# asm 1: add >input_2=int32#2,<input_2=int32#2,#32
3053 +# asm 2: add >input_2=r1,<input_2=r1,#32
3054 +add r1,r1,#32
3055 +
3056 +# qhasm: r0[0,1] += x4[0] unsigned* 5z12[0]; r0[2,3] += x4[1] unsigned* 5z12[ 1]
3057 +# asm 1: vmlal.u32 <r0=reg128#8,<x4=reg128#11%bot,<5z12=reg128#1%bot
3058 +# asm 2: vmlal.u32 <r0=q7,<x4=d20,<5z12=d0
3059 +vmlal.u32 q7,d20,d0
3060 +
3061 +# qhasm: r0[0,1] += x23[0] unsigned* 5z34[0]; r0[2,3] += x23[1] unsigned* 5z34[ 1]
3062 +# asm 1: vmlal.u32 <r0=reg128#8,<x23=reg128#10%bot,<5z34=reg128#6%bot
3063 +# asm 2: vmlal.u32 <r0=q7,<x23=d18,<5z34=d10
3064 +vmlal.u32 q7,d18,d10
3065 +
3066 +# qhasm: d01 d23 = d01[0] d23[0] d01[1] d23[1]
3067 +# asm 1: vswp <d23=reg128#2%bot,<d01=reg128#12%top
3068 +# asm 2: vswp <d23=d2,<d01=d23
3069 +vswp d2,d23
3070 +
3071 +# qhasm: r0[0,1] += x23[2] unsigned* 5z12[2]; r0[2,3] += x23[3] unsigned* 5z12[ 3]
3072 +# asm 1: vmlal.u32 <r0=reg128#8,<x23=reg128#10%top,<5z12=reg128#1%top
3073 +# asm 2: vmlal.u32 <r0=q7,<x23=d19,<5z12=d1
3074 +vmlal.u32 q7,d19,d1
3075 +
3076 +# qhasm: r0[0,1] += x01[0] unsigned* z0[0]; r0[2,3] += x01[1] unsigned* z0[1]
3077 +# asm 1: vmlal.u32 <r0=reg128#8,<x01=reg128#9%bot,<z0=reg128#4%bot
3078 +# asm 2: vmlal.u32 <r0=q7,<x01=d16,<z0=d6
3079 +vmlal.u32 q7,d16,d6
3080 +
3081 +# qhasm: new mid
3082 +
3083 +# qhasm: 2x v4 = d23 unsigned>> 40
3084 +# asm 1: vshr.u64 >v4=reg128#4,<d23=reg128#2,#40
3085 +# asm 2: vshr.u64 >v4=q3,<d23=q1,#40
3086 +vshr.u64 q3,q1,#40
3087 +
3088 +# qhasm: mid = d01[1]d23[0] mid[2,3]
3089 +# asm 1: vext.32 <mid=reg128#1%bot,<d01=reg128#12%bot,<d23=reg128#2%bot,#1
3090 +# asm 2: vext.32 <mid=d0,<d01=d22,<d23=d2,#1
3091 +vext.32 d0,d22,d2,#1
3092 +
3093 +# qhasm: new v23
3094 +
3095 +# qhasm: v23[2] = d23[0,1] unsigned>> 14; v23[3] = d23[2,3] unsig ned>> 14
3096 +# asm 1: vshrn.u64 <v23=reg128#10%top,<d23=reg128#2,#14
3097 +# asm 2: vshrn.u64 <v23=d19,<d23=q1,#14
3098 +vshrn.u64 d19,q1,#14
3099 +
3100 +# qhasm: mid = mid[0,1] d01[3]d23[2]
3101 +# asm 1: vext.32 <mid=reg128#1%top,<d01=reg128#12%top,<d23=reg128#2%top,#1
3102 +# asm 2: vext.32 <mid=d1,<d01=d23,<d23=d3,#1
3103 +vext.32 d1,d23,d3,#1
3104 +
3105 +# qhasm: new v01
3106 +
3107 +# qhasm: v01[2] = d01[0,1] unsigned>> 26; v01[3] = d01[2,3] unsig ned>> 26
3108 +# asm 1: vshrn.u64 <v01=reg128#11%top,<d01=reg128#12,#26
3109 +# asm 2: vshrn.u64 <v01=d21,<d01=q11,#26
3110 +vshrn.u64 d21,q11,#26
3111 +
3112 +# qhasm: v01 = d01[1]d01[0] v01[2,3]
3113 +# asm 1: vext.32 <v01=reg128#11%bot,<d01=reg128#12%bot,<d01=reg128#12%bot,#1
3114 +# asm 2: vext.32 <v01=d20,<d01=d22,<d01=d22,#1
3115 +vext.32 d20,d22,d22,#1
3116 +
3117 +# qhasm: r0[0,1] += x01[2] unsigned* 5z34[2]; r0[2,3] += x01[3] unsigned* 5z3 4[3]
3118 +# asm 1: vmlal.u32 <r0=reg128#8,<x01=reg128#9%top,<5z34=reg128#6%top
3119 +# asm 2: vmlal.u32 <r0=q7,<x01=d17,<5z34=d11
3120 +vmlal.u32 q7,d17,d11
3121 +
3122 +# qhasm: v01 = v01[1]d01[2] v01[2,3]
3123 +# asm 1: vext.32 <v01=reg128#11%bot,<v01=reg128#11%bot,<d01=reg128#12%top,#1
3124 +# asm 2: vext.32 <v01=d20,<v01=d20,<d01=d23,#1
3125 +vext.32 d20,d20,d23,#1
3126 +
3127 +# qhasm: v23[0] = mid[0,1] unsigned>> 20; v23[1] = mid[2,3] unsig ned>> 20
3128 +# asm 1: vshrn.u64 <v23=reg128#10%bot,<mid=reg128#1,#20
3129 +# asm 2: vshrn.u64 <v23=d18,<mid=q0,#20
3130 +vshrn.u64 d18,q0,#20
3131 +
3132 +# qhasm: v4 = v4[0]v4[2]v4[1]v4[3]
3133 +# asm 1: vtrn.32 <v4=reg128#4%bot,<v4=reg128#4%top
3134 +# asm 2: vtrn.32 <v4=d6,<v4=d7
3135 +vtrn.32 d6,d7
3136 +
3137 +# qhasm: 4x v01 &= 0x03ffffff
3138 +# asm 1: vand.i32 <v01=reg128#11,#0x03ffffff
3139 +# asm 2: vand.i32 <v01=q10,#0x03ffffff
3140 +vand.i32 q10,#0x03ffffff
3141 +
3142 +# qhasm: ptr = &y34_stack
3143 +# asm 1: lea >ptr=int32#3,<y34_stack=stack128#4
3144 +# asm 2: lea >ptr=r2,<y34_stack=[sp,#48]
3145 +add r2,sp,#48
3146 +
3147 +# qhasm: y34 aligned= mem128[ptr]
3148 +# asm 1: vld1.8 {>y34=reg128#3%bot->y34=reg128#3%top},[<ptr=int32#3,: 128]
3149 +# asm 2: vld1.8 {>y34=d4->y34=d5},[<ptr=r2,: 128]
3150 +vld1.8 {d4-d5},[r2,: 128]
3151 +
3152 +# qhasm: 4x v23 &= 0x03ffffff
3153 +# asm 1: vand.i32 <v23=reg128#10,#0x03ffffff
3154 +# asm 2: vand.i32 <v23=q9,#0x03ffffff
3155 +vand.i32 q9,#0x03ffffff
3156 +
3157 +# qhasm: ptr = &y12_stack
3158 +# asm 1: lea >ptr=int32#3,<y12_stack=stack128#3
3159 +# asm 2: lea >ptr=r2,<y12_stack=[sp,#32]
3160 +add r2,sp,#32
3161 +
3162 +# qhasm: y12 aligned= mem128[ptr]
3163 +# asm 1: vld1.8 {>y12=reg128#2%bot->y12=reg128#2%top},[<ptr=int32#3,: 128]
3164 +# asm 2: vld1.8 {>y12=d2->y12=d3},[<ptr=r2,: 128]
3165 +vld1.8 {d2-d3},[r2,: 128]
3166 +
3167 +# qhasm: 4x v4 |= 0x01000000
3168 +# asm 1: vorr.i32 <v4=reg128#4,#0x01000000
3169 +# asm 2: vorr.i32 <v4=q3,#0x01000000
3170 +vorr.i32 q3,#0x01000000
3171 +
3172 +# qhasm: ptr = &y0_stack
3173 +# asm 1: lea >ptr=int32#3,<y0_stack=stack128#2
3174 +# asm 2: lea >ptr=r2,<y0_stack=[sp,#16]
3175 +add r2,sp,#16
3176 +
3177 +# qhasm: y0 aligned= mem128[ptr]
3178 +# asm 1: vld1.8 {>y0=reg128#1%bot->y0=reg128#1%top},[<ptr=int32#3,: 128]
3179 +# asm 2: vld1.8 {>y0=d0->y0=d1},[<ptr=r2,: 128]
3180 +vld1.8 {d0-d1},[r2,: 128]
3181 +
3182 +# qhasm: r4[0,1] += v01[0] unsigned* y34[2]; r4[2,3] += v01[1] unsigned* y34 [3]
3183 +# asm 1: vmlal.u32 <r4=reg128#16,<v01=reg128#11%bot,<y34=reg128#3%top
3184 +# asm 2: vmlal.u32 <r4=q15,<v01=d20,<y34=d5
3185 +vmlal.u32 q15,d20,d5
3186 +
3187 +# qhasm: r4[0,1] += v01[2] unsigned* y34[0]; r4[2,3] += v01[3] unsigned* y34[1 ]
3188 +# asm 1: vmlal.u32 <r4=reg128#16,<v01=reg128#11%top,<y34=reg128#3%bot
3189 +# asm 2: vmlal.u32 <r4=q15,<v01=d21,<y34=d4
3190 +vmlal.u32 q15,d21,d4
3191 +
3192 +# qhasm: r4[0,1] += v23[0] unsigned* y12[2]; r4[2,3] += v23[1] unsigned* y12[3 ]
3193 +# asm 1: vmlal.u32 <r4=reg128#16,<v23=reg128#10%bot,<y12=reg128#2%top
3194 +# asm 2: vmlal.u32 <r4=q15,<v23=d18,<y12=d3
3195 +vmlal.u32 q15,d18,d3
3196 +
3197 +# qhasm: r4[0,1] += v23[2] unsigned* y12[0]; r4[2,3] += v23[3] unsigned* y12[1 ]
3198 +# asm 1: vmlal.u32 <r4=reg128#16,<v23=reg128#10%top,<y12=reg128#2%bot
3199 +# asm 2: vmlal.u32 <r4=q15,<v23=d19,<y12=d2
3200 +vmlal.u32 q15,d19,d2
3201 +
3202 +# qhasm: r4[0,1] += v4[0] unsigned* y0[0]; r4[2,3] += v4[1] unsigned* y0[1]
3203 +# asm 1: vmlal.u32 <r4=reg128#16,<v4=reg128#4%bot,<y0=reg128#1%bot
3204 +# asm 2: vmlal.u32 <r4=q15,<v4=d6,<y0=d0
3205 +vmlal.u32 q15,d6,d0
3206 +
3207 +# qhasm: ptr = &5y34_stack
3208 +# asm 1: lea >ptr=int32#3,<5y34_stack=stack128#6
3209 +# asm 2: lea >ptr=r2,<5y34_stack=[sp,#80]
3210 +add r2,sp,#80
3211 +
3212 +# qhasm: 5y34 aligned= mem128[ptr]
3213 +# asm 1: vld1.8 {>5y34=reg128#13%bot->5y34=reg128#13%top},[<ptr=int32#3,: 128]
3214 +# asm 2: vld1.8 {>5y34=d24->5y34=d25},[<ptr=r2,: 128]
3215 +vld1.8 {d24-d25},[r2,: 128]
3216 +
3217 +# qhasm: r3[0,1] += v01[0] unsigned* y34[0]; r3[2,3] += v01[1] unsigned* y34[ 1]
3218 +# asm 1: vmlal.u32 <r3=reg128#5,<v01=reg128#11%bot,<y34=reg128#3%bot
3219 +# asm 2: vmlal.u32 <r3=q4,<v01=d20,<y34=d4
3220 +vmlal.u32 q4,d20,d4
3221 +
3222 +# qhasm: r3[0,1] += v01[2] unsigned* y12[2]; r3[2,3] += v01[3] unsigned* y12[ 3]
3223 +# asm 1: vmlal.u32 <r3=reg128#5,<v01=reg128#11%top,<y12=reg128#2%top
3224 +# asm 2: vmlal.u32 <r3=q4,<v01=d21,<y12=d3
3225 +vmlal.u32 q4,d21,d3
3226 +
3227 +# qhasm: r3[0,1] += v23[0] unsigned* y12[0]; r3[2,3] += v23[1] unsigned* y12[ 1]
3228 +# asm 1: vmlal.u32 <r3=reg128#5,<v23=reg128#10%bot,<y12=reg128#2%bot
3229 +# asm 2: vmlal.u32 <r3=q4,<v23=d18,<y12=d2
3230 +vmlal.u32 q4,d18,d2
3231 +
3232 +# qhasm: r3[0,1] += v23[2] unsigned* y0[0]; r3[2,3] += v23[3] unsigned* y0[1]
3233 +# asm 1: vmlal.u32 <r3=reg128#5,<v23=reg128#10%top,<y0=reg128#1%bot
3234 +# asm 2: vmlal.u32 <r3=q4,<v23=d19,<y0=d0
3235 +vmlal.u32 q4,d19,d0
3236 +
3237 +# qhasm: r3[0,1] += v4[0] unsigned* 5y34[2]; r3[2,3] += v4[1] unsigned* 5y3 4[3]
3238 +# asm 1: vmlal.u32 <r3=reg128#5,<v4=reg128#4%bot,<5y34=reg128#13%top
3239 +# asm 2: vmlal.u32 <r3=q4,<v4=d6,<5y34=d25
3240 +vmlal.u32 q4,d6,d25
3241 +
3242 +# qhasm: ptr = &5y12_stack
3243 +# asm 1: lea >ptr=int32#3,<5y12_stack=stack128#5
3244 +# asm 2: lea >ptr=r2,<5y12_stack=[sp,#64]
3245 +add r2,sp,#64
3246 +
3247 +# qhasm: 5y12 aligned= mem128[ptr]
3248 +# asm 1: vld1.8 {>5y12=reg128#12%bot->5y12=reg128#12%top},[<ptr=int32#3,: 128]
3249 +# asm 2: vld1.8 {>5y12=d22->5y12=d23},[<ptr=r2,: 128]
3250 +vld1.8 {d22-d23},[r2,: 128]
3251 +
3252 +# qhasm: r0[0,1] += v4[0] unsigned* 5y12[0]; r0[2,3] += v4[1] unsigned* 5y12[ 1]
3253 +# asm 1: vmlal.u32 <r0=reg128#8,<v4=reg128#4%bot,<5y12=reg128#12%bot
3254 +# asm 2: vmlal.u32 <r0=q7,<v4=d6,<5y12=d22
3255 +vmlal.u32 q7,d6,d22
3256 +
3257 +# qhasm: r0[0,1] += v23[0] unsigned* 5y34[0]; r0[2,3] += v23[1] unsigned* 5y34[ 1]
3258 +# asm 1: vmlal.u32 <r0=reg128#8,<v23=reg128#10%bot,<5y34=reg128#13%bot
3259 +# asm 2: vmlal.u32 <r0=q7,<v23=d18,<5y34=d24
3260 +vmlal.u32 q7,d18,d24
3261 +
3262 +# qhasm: r0[0,1] += v23[2] unsigned* 5y12[2]; r0[2,3] += v23[3] unsigned* 5y12[ 3]
3263 +# asm 1: vmlal.u32 <r0=reg128#8,<v23=reg128#10%top,<5y12=reg128#12%top
3264 +# asm 2: vmlal.u32 <r0=q7,<v23=d19,<5y12=d23
3265 +vmlal.u32 q7,d19,d23
3266 +
3267 +# qhasm: r0[0,1] += v01[0] unsigned* y0[0]; r0[2,3] += v01[1] unsigned* y0[1]
3268 +# asm 1: vmlal.u32 <r0=reg128#8,<v01=reg128#11%bot,<y0=reg128#1%bot
3269 +# asm 2: vmlal.u32 <r0=q7,<v01=d20,<y0=d0
3270 +vmlal.u32 q7,d20,d0
3271 +
3272 +# qhasm: r0[0,1] += v01[2] unsigned* 5y34[2]; r0[2,3] += v01[3] unsigned* 5y3 4[3]
3273 +# asm 1: vmlal.u32 <r0=reg128#8,<v01=reg128#11%top,<5y34=reg128#13%top
3274 +# asm 2: vmlal.u32 <r0=q7,<v01=d21,<5y34=d25
3275 +vmlal.u32 q7,d21,d25
3276 +
3277 +# qhasm: r1[0,1] += v01[0] unsigned* y12[0]; r1[2,3] += v01[1] unsigned* y12[ 1]
3278 +# asm 1: vmlal.u32 <r1=reg128#15,<v01=reg128#11%bot,<y12=reg128#2%bot
3279 +# asm 2: vmlal.u32 <r1=q14,<v01=d20,<y12=d2
3280 +vmlal.u32 q14,d20,d2
3281 +
3282 +# qhasm: r1[0,1] += v01[2] unsigned* y0[0]; r1[2,3] += v01[3] unsigned* y0[1]
3283 +# asm 1: vmlal.u32 <r1=reg128#15,<v01=reg128#11%top,<y0=reg128#1%bot
3284 +# asm 2: vmlal.u32 <r1=q14,<v01=d21,<y0=d0
3285 +vmlal.u32 q14,d21,d0
3286 +
3287 +# qhasm: r1[0,1] += v23[0] unsigned* 5y34[2]; r1[2,3] += v23[1] unsigned* 5y3 4[3]
3288 +# asm 1: vmlal.u32 <r1=reg128#15,<v23=reg128#10%bot,<5y34=reg128#13%top
3289 +# asm 2: vmlal.u32 <r1=q14,<v23=d18,<5y34=d25
3290 +vmlal.u32 q14,d18,d25
3291 +
3292 +# qhasm: r1[0,1] += v23[2] unsigned* 5y34[0]; r1[2,3] += v23[3] unsigned* 5y34[ 1]
3293 +# asm 1: vmlal.u32 <r1=reg128#15,<v23=reg128#10%top,<5y34=reg128#13%bot
3294 +# asm 2: vmlal.u32 <r1=q14,<v23=d19,<5y34=d24
3295 +vmlal.u32 q14,d19,d24
3296 +
3297 +# qhasm: r1[0,1] += v4[0] unsigned* 5y12[2]; r1[2,3] += v4[1] unsigned* 5y12[ 3]
3298 +# asm 1: vmlal.u32 <r1=reg128#15,<v4=reg128#4%bot,<5y12=reg128#12%top
3299 +# asm 2: vmlal.u32 <r1=q14,<v4=d6,<5y12=d23
3300 +vmlal.u32 q14,d6,d23
3301 +
3302 +# qhasm: r2[0,1] += v01[0] unsigned* y12[2]; r2[2,3] += v01[1] unsigned* y12[ 3]
3303 +# asm 1: vmlal.u32 <r2=reg128#14,<v01=reg128#11%bot,<y12=reg128#2%top
3304 +# asm 2: vmlal.u32 <r2=q13,<v01=d20,<y12=d3
3305 +vmlal.u32 q13,d20,d3
3306 +
3307 +# qhasm: r2[0,1] += v01[2] unsigned* y12[0]; r2[2,3] += v01[3] unsigned* y12[ 1]
3308 +# asm 1: vmlal.u32 <r2=reg128#14,<v01=reg128#11%top,<y12=reg128#2%bot
3309 +# asm 2: vmlal.u32 <r2=q13,<v01=d21,<y12=d2
3310 +vmlal.u32 q13,d21,d2
3311 +
3312 +# qhasm: r2[0,1] += v23[0] unsigned* y0[0]; r2[2,3] += v23[1] unsigned* y0[1]
3313 +# asm 1: vmlal.u32 <r2=reg128#14,<v23=reg128#10%bot,<y0=reg128#1%bot
3314 +# asm 2: vmlal.u32 <r2=q13,<v23=d18,<y0=d0
3315 +vmlal.u32 q13,d18,d0
3316 +
3317 +# qhasm: r2[0,1] += v23[2] unsigned* 5y34[2]; r2[2,3] += v23[3] unsigned* 5y3 4[3]
3318 +# asm 1: vmlal.u32 <r2=reg128#14,<v23=reg128#10%top,<5y34=reg128#13%top
3319 +# asm 2: vmlal.u32 <r2=q13,<v23=d19,<5y34=d25
3320 +vmlal.u32 q13,d19,d25
3321 +
3322 +# qhasm: r2[0,1] += v4[0] unsigned* 5y34[0]; r2[2,3] += v4[1] unsigned* 5y34[ 1]
3323 +# asm 1: vmlal.u32 <r2=reg128#14,<v4=reg128#4%bot,<5y34=reg128#13%bot
3324 +# asm 2: vmlal.u32 <r2=q13,<v4=d6,<5y34=d24
3325 +vmlal.u32 q13,d6,d24
3326 +
3327 +# qhasm: ptr = &two24
3328 +# asm 1: lea >ptr=int32#3,<two24=stack128#1
3329 +# asm 2: lea >ptr=r2,<two24=[sp,#0]
3330 +add r2,sp,#0
3331 +
3332 +# qhasm: 2x t1 = r0 unsigned>> 26
3333 +# asm 1: vshr.u64 >t1=reg128#4,<r0=reg128#8,#26
3334 +# asm 2: vshr.u64 >t1=q3,<r0=q7,#26
3335 +vshr.u64 q3,q7,#26
3336 +
3337 +# qhasm: len -= 64
3338 +# asm 1: sub >len=int32#4,<len=int32#4,#64
3339 +# asm 2: sub >len=r3,<len=r3,#64
3340 +sub r3,r3,#64
3341 +
3342 +# qhasm: r0 &= mask
3343 +# asm 1: vand >r0=reg128#6,<r0=reg128#8,<mask=reg128#7
3344 +# asm 2: vand >r0=q5,<r0=q7,<mask=q6
3345 +vand q5,q7,q6
3346 +
3347 +# qhasm: 2x r1 += t1
3348 +# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#15,<t1=reg128#4
3349 +# asm 2: vadd.i64 >r1=q3,<r1=q14,<t1=q3
3350 +vadd.i64 q3,q14,q3
3351 +
3352 +# qhasm: 2x t4 = r3 unsigned>> 26
3353 +# asm 1: vshr.u64 >t4=reg128#8,<r3=reg128#5,#26
3354 +# asm 2: vshr.u64 >t4=q7,<r3=q4,#26
3355 +vshr.u64 q7,q4,#26
3356 +
3357 +# qhasm: r3 &= mask
3358 +# asm 1: vand >r3=reg128#5,<r3=reg128#5,<mask=reg128#7
3359 +# asm 2: vand >r3=q4,<r3=q4,<mask=q6
3360 +vand q4,q4,q6
3361 +
3362 +# qhasm: 2x x4 = r4 + t4
3363 +# asm 1: vadd.i64 >x4=reg128#8,<r4=reg128#16,<t4=reg128#8
3364 +# asm 2: vadd.i64 >x4=q7,<r4=q15,<t4=q7
3365 +vadd.i64 q7,q15,q7
3366 +
3367 +# qhasm: r4 aligned= mem128[ptr]
3368 +# asm 1: vld1.8 {>r4=reg128#16%bot->r4=reg128#16%top},[<ptr=int32#3,: 128]
3369 +# asm 2: vld1.8 {>r4=d30->r4=d31},[<ptr=r2,: 128]
3370 +vld1.8 {d30-d31},[r2,: 128]
3371 +
3372 +# qhasm: 2x t2 = r1 unsigned>> 26
3373 +# asm 1: vshr.u64 >t2=reg128#9,<r1=reg128#4,#26
3374 +# asm 2: vshr.u64 >t2=q8,<r1=q3,#26
3375 +vshr.u64 q8,q3,#26
3376 +
3377 +# qhasm: r1 &= mask
3378 +# asm 1: vand >r1=reg128#4,<r1=reg128#4,<mask=reg128#7
3379 +# asm 2: vand >r1=q3,<r1=q3,<mask=q6
3380 +vand q3,q3,q6
3381 +
3382 +# qhasm: 2x t0 = x4 unsigned>> 26
3383 +# asm 1: vshr.u64 >t0=reg128#10,<x4=reg128#8,#26
3384 +# asm 2: vshr.u64 >t0=q9,<x4=q7,#26
3385 +vshr.u64 q9,q7,#26
3386 +
3387 +# qhasm: 2x r2 += t2
3388 +# asm 1: vadd.i64 >r2=reg128#9,<r2=reg128#14,<t2=reg128#9
3389 +# asm 2: vadd.i64 >r2=q8,<r2=q13,<t2=q8
3390 +vadd.i64 q8,q13,q8
3391 +
3392 +# qhasm: x4 &= mask
3393 +# asm 1: vand >x4=reg128#11,<x4=reg128#8,<mask=reg128#7
3394 +# asm 2: vand >x4=q10,<x4=q7,<mask=q6
3395 +vand q10,q7,q6
3396 +
3397 +# qhasm: 2x x01 = r0 + t0
3398 +# asm 1: vadd.i64 >x01=reg128#6,<r0=reg128#6,<t0=reg128#10
3399 +# asm 2: vadd.i64 >x01=q5,<r0=q5,<t0=q9
3400 +vadd.i64 q5,q5,q9
3401 +
3402 +# qhasm: r0 aligned= mem128[ptr]
3403 +# asm 1: vld1.8 {>r0=reg128#8%bot->r0=reg128#8%top},[<ptr=int32#3,: 128]
3404 +# asm 2: vld1.8 {>r0=d14->r0=d15},[<ptr=r2,: 128]
3405 +vld1.8 {d14-d15},[r2,: 128]
3406 +
3407 +# qhasm: ptr = &z34_stack
3408 +# asm 1: lea >ptr=int32#3,<z34_stack=stack128#9
3409 +# asm 2: lea >ptr=r2,<z34_stack=[sp,#128]
3410 +add r2,sp,#128
3411 +
3412 +# qhasm: 2x t0 <<= 2
3413 +# asm 1: vshl.i64 >t0=reg128#10,<t0=reg128#10,#2
3414 +# asm 2: vshl.i64 >t0=q9,<t0=q9,#2
3415 +vshl.i64 q9,q9,#2
3416 +
3417 +# qhasm: 2x t3 = r2 unsigned>> 26
3418 +# asm 1: vshr.u64 >t3=reg128#14,<r2=reg128#9,#26
3419 +# asm 2: vshr.u64 >t3=q13,<r2=q8,#26
3420 +vshr.u64 q13,q8,#26
3421 +
3422 +# qhasm: 2x x01 += t0
3423 +# asm 1: vadd.i64 >x01=reg128#15,<x01=reg128#6,<t0=reg128#10
3424 +# asm 2: vadd.i64 >x01=q14,<x01=q5,<t0=q9
3425 +vadd.i64 q14,q5,q9
3426 +
3427 +# qhasm: z34 aligned= mem128[ptr]
3428 +# asm 1: vld1.8 {>z34=reg128#6%bot->z34=reg128#6%top},[<ptr=int32#3,: 128]
3429 +# asm 2: vld1.8 {>z34=d10->z34=d11},[<ptr=r2,: 128]
3430 +vld1.8 {d10-d11},[r2,: 128]
3431 +
3432 +# qhasm: x23 = r2 & mask
3433 +# asm 1: vand >x23=reg128#10,<r2=reg128#9,<mask=reg128#7
3434 +# asm 2: vand >x23=q9,<r2=q8,<mask=q6
3435 +vand q9,q8,q6
3436 +
3437 +# qhasm: 2x r3 += t3
3438 +# asm 1: vadd.i64 >r3=reg128#5,<r3=reg128#5,<t3=reg128#14
3439 +# asm 2: vadd.i64 >r3=q4,<r3=q4,<t3=q13
3440 +vadd.i64 q4,q4,q13
3441 +
3442 +# qhasm: input_2 += 32
3443 +# asm 1: add >input_2=int32#2,<input_2=int32#2,#32
3444 +# asm 2: add >input_2=r1,<input_2=r1,#32
3445 +add r1,r1,#32
3446 +
3447 +# qhasm: 2x t1 = x01 unsigned>> 26
3448 +# asm 1: vshr.u64 >t1=reg128#14,<x01=reg128#15,#26
3449 +# asm 2: vshr.u64 >t1=q13,<x01=q14,#26
3450 +vshr.u64 q13,q14,#26
3451 +
3452 +# qhasm: x23 = x23[0,2,1,3]
3453 +# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top
3454 +# asm 2: vtrn.32 <x23=d18,<x23=d19
3455 +vtrn.32 d18,d19
3456 +
3457 +# qhasm: x01 = x01 & mask
3458 +# asm 1: vand >x01=reg128#9,<x01=reg128#15,<mask=reg128#7
3459 +# asm 2: vand >x01=q8,<x01=q14,<mask=q6
3460 +vand q8,q14,q6
3461 +
3462 +# qhasm: 2x r1 += t1
3463 +# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#4,<t1=reg128#14
3464 +# asm 2: vadd.i64 >r1=q3,<r1=q3,<t1=q13
3465 +vadd.i64 q3,q3,q13
3466 +
3467 +# qhasm: 2x t4 = r3 unsigned>> 26
3468 +# asm 1: vshr.u64 >t4=reg128#14,<r3=reg128#5,#26
3469 +# asm 2: vshr.u64 >t4=q13,<r3=q4,#26
3470 +vshr.u64 q13,q4,#26
3471 +
3472 +# qhasm: x01 = x01[0,2,1,3]
3473 +# asm 1: vtrn.32 <x01=reg128#9%bot,<x01=reg128#9%top
3474 +# asm 2: vtrn.32 <x01=d16,<x01=d17
3475 +vtrn.32 d16,d17
3476 +
3477 +# qhasm: r3 &= mask
3478 +# asm 1: vand >r3=reg128#5,<r3=reg128#5,<mask=reg128#7
3479 +# asm 2: vand >r3=q4,<r3=q4,<mask=q6
3480 +vand q4,q4,q6
3481 +
3482 +# qhasm: r1 = r1[0,2,1,3]
3483 +# asm 1: vtrn.32 <r1=reg128#4%bot,<r1=reg128#4%top
3484 +# asm 2: vtrn.32 <r1=d6,<r1=d7
3485 +vtrn.32 d6,d7
3486 +
3487 +# qhasm: 2x x4 += t4
3488 +# asm 1: vadd.i64 >x4=reg128#11,<x4=reg128#11,<t4=reg128#14
3489 +# asm 2: vadd.i64 >x4=q10,<x4=q10,<t4=q13
3490 +vadd.i64 q10,q10,q13
3491 +
3492 +# qhasm: r3 = r3[0,2,1,3]
3493 +# asm 1: vtrn.32 <r3=reg128#5%bot,<r3=reg128#5%top
3494 +# asm 2: vtrn.32 <r3=d8,<r3=d9
3495 +vtrn.32 d8,d9
3496 +
3497 +# qhasm: x01 = x01[0,1] r1[0,1]
3498 +# asm 1: vext.32 <x01=reg128#9%top,<r1=reg128#4%bot,<r1=reg128#4%bot,#0
3499 +# asm 2: vext.32 <x01=d17,<r1=d6,<r1=d6,#0
3500 +vext.32 d17,d6,d6,#0
3501 +
3502 +# qhasm: x23 = x23[0,1] r3[0,1]
3503 +# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#5%bot,<r3=reg128#5%bot,#0
3504 +# asm 2: vext.32 <x23=d19,<r3=d8,<r3=d8,#0
3505 +vext.32 d19,d8,d8,#0
3506 +
3507 +# qhasm: x4 = x4[0,2,1,3]
3508 +# asm 1: vtrn.32 <x4=reg128#11%bot,<x4=reg128#11%top
3509 +# asm 2: vtrn.32 <x4=d20,<x4=d21
3510 +vtrn.32 d20,d21
3511 +
3512 +# qhasm: unsigned>? len - 64
3513 +# asm 1: cmp <len=int32#4,#64
3514 +# asm 2: cmp <len=r3,#64
3515 +cmp r3,#64
3516 +
3517 +# qhasm: goto mainloop2 if unsigned>
3518 +bhi ._mainloop2
3519 +
3520 +# qhasm: input_2 -= 32
3521 +# asm 1: sub >input_2=int32#3,<input_2=int32#2,#32
3522 +# asm 2: sub >input_2=r2,<input_2=r1,#32
3523 +sub r2,r1,#32
3524 +
3525 +# qhasm: below64bytes:
3526 +._below64bytes:
3527 +
3528 +# qhasm: unsigned>? len - 32
3529 +# asm 1: cmp <len=int32#4,#32
3530 +# asm 2: cmp <len=r3,#32
3531 +cmp r3,#32
3532 +
3533 +# qhasm: goto end if !unsigned>
3534 +bls ._end
3535 +
3536 +# qhasm: mainloop:
3537 +._mainloop:
3538 +
3539 +# qhasm: new r0
3540 +
3541 +# qhasm: ptr = &two24
3542 +# asm 1: lea >ptr=int32#2,<two24=stack128#1
3543 +# asm 2: lea >ptr=r1,<two24=[sp,#0]
3544 +add r1,sp,#0
3545 +
3546 +# qhasm: r4 aligned= mem128[ptr]
3547 +# asm 1: vld1.8 {>r4=reg128#5%bot->r4=reg128#5%top},[<ptr=int32#2,: 128]
3548 +# asm 2: vld1.8 {>r4=d8->r4=d9},[<ptr=r1,: 128]
3549 +vld1.8 {d8-d9},[r1,: 128]
3550 +
3551 +# qhasm: u4 aligned= mem128[ptr]
3552 +# asm 1: vld1.8 {>u4=reg128#6%bot->u4=reg128#6%top},[<ptr=int32#2,: 128]
3553 +# asm 2: vld1.8 {>u4=d10->u4=d11},[<ptr=r1,: 128]
3554 +vld1.8 {d10-d11},[r1,: 128]
3555 +
3556 +# qhasm: c01 = mem128[input_2];input_2+=16
3557 +# asm 1: vld1.8 {>c01=reg128#8%bot->c01=reg128#8%top},[<input_2=int32#3]!
3558 +# asm 2: vld1.8 {>c01=d14->c01=d15},[<input_2=r2]!
3559 +vld1.8 {d14-d15},[r2]!
3560 +
3561 +# qhasm: r4[0,1] += x01[0] unsigned* y34[2]; r4[2,3] += x01[1] unsigned* y34 [3]
3562 +# asm 1: vmlal.u32 <r4=reg128#5,<x01=reg128#9%bot,<y34=reg128#3%top
3563 +# asm 2: vmlal.u32 <r4=q4,<x01=d16,<y34=d5
3564 +vmlal.u32 q4,d16,d5
3565 +
3566 +# qhasm: c23 = mem128[input_2];input_2+=16
3567 +# asm 1: vld1.8 {>c23=reg128#14%bot->c23=reg128#14%top},[<input_2=int32#3]!
3568 +# asm 2: vld1.8 {>c23=d26->c23=d27},[<input_2=r2]!
3569 +vld1.8 {d26-d27},[r2]!
3570 +
3571 +# qhasm: r4[0,1] += x01[2] unsigned* y34[0]; r4[2,3] += x01[3] unsigned* y34[1 ]
3572 +# asm 1: vmlal.u32 <r4=reg128#5,<x01=reg128#9%top,<y34=reg128#3%bot
3573 +# asm 2: vmlal.u32 <r4=q4,<x01=d17,<y34=d4
3574 +vmlal.u32 q4,d17,d4
3575 +
3576 +# qhasm: r0 = u4[1]c01[0]r0[2,3]
3577 +# asm 1: vext.32 <r0=reg128#4%bot,<u4=reg128#6%bot,<c01=reg128#8%bot,#1
3578 +# asm 2: vext.32 <r0=d6,<u4=d10,<c01=d14,#1
3579 +vext.32 d6,d10,d14,#1
3580 +
3581 +# qhasm: r4[0,1] += x23[0] unsigned* y12[2]; r4[2,3] += x23[1] unsigned* y12[3 ]
3582 +# asm 1: vmlal.u32 <r4=reg128#5,<x23=reg128#10%bot,<y12=reg128#2%top
3583 +# asm 2: vmlal.u32 <r4=q4,<x23=d18,<y12=d3
3584 +vmlal.u32 q4,d18,d3
3585 +
3586 +# qhasm: r0 = r0[0,1]u4[1]c23[0]
3587 +# asm 1: vext.32 <r0=reg128#4%top,<u4=reg128#6%bot,<c23=reg128#14%bot,#1
3588 +# asm 2: vext.32 <r0=d7,<u4=d10,<c23=d26,#1
3589 +vext.32 d7,d10,d26,#1
3590 +
3591 +# qhasm: r4[0,1] += x23[2] unsigned* y12[0]; r4[2,3] += x23[3] unsigned* y12[1 ]
3592 +# asm 1: vmlal.u32 <r4=reg128#5,<x23=reg128#10%top,<y12=reg128#2%bot
3593 +# asm 2: vmlal.u32 <r4=q4,<x23=d19,<y12=d2
3594 +vmlal.u32 q4,d19,d2
3595 +
3596 +# qhasm: r0 = r0[1]r0[0]r0[3]r0[2]
3597 +# asm 1: vrev64.i32 >r0=reg128#4,<r0=reg128#4
3598 +# asm 2: vrev64.i32 >r0=q3,<r0=q3
3599 +vrev64.i32 q3,q3
3600 +
3601 +# qhasm: r4[0,1] += x4[0] unsigned* y0[0]; r4[2,3] += x4[1] unsigned* y0[1]
3602 +# asm 1: vmlal.u32 <r4=reg128#5,<x4=reg128#11%bot,<y0=reg128#1%bot
3603 +# asm 2: vmlal.u32 <r4=q4,<x4=d20,<y0=d0
3604 +vmlal.u32 q4,d20,d0
3605 +
3606 +# qhasm: r0[0,1] += x4[0] unsigned* 5y12[0]; r0[2,3] += x4[1] unsigned* 5y12[ 1]
3607 +# asm 1: vmlal.u32 <r0=reg128#4,<x4=reg128#11%bot,<5y12=reg128#12%bot
3608 +# asm 2: vmlal.u32 <r0=q3,<x4=d20,<5y12=d22
3609 +vmlal.u32 q3,d20,d22
3610 +
3611 +# qhasm: r0[0,1] += x23[0] unsigned* 5y34[0]; r0[2,3] += x23[1] unsigned* 5y34[ 1]
3612 +# asm 1: vmlal.u32 <r0=reg128#4,<x23=reg128#10%bot,<5y34=reg128#13%bot
3613 +# asm 2: vmlal.u32 <r0=q3,<x23=d18,<5y34=d24
3614 +vmlal.u32 q3,d18,d24
3615 +
3616 +# qhasm: r0[0,1] += x23[2] unsigned* 5y12[2]; r0[2,3] += x23[3] unsigned* 5y12[ 3]
3617 +# asm 1: vmlal.u32 <r0=reg128#4,<x23=reg128#10%top,<5y12=reg128#12%top
3618 +# asm 2: vmlal.u32 <r0=q3,<x23=d19,<5y12=d23
3619 +vmlal.u32 q3,d19,d23
3620 +
3621 +# qhasm: c01 c23 = c01[0]c23[0]c01[2]c23[2]c01[1]c23[1]c01[3]c23[3]
3622 +# asm 1: vtrn.32 <c01=reg128#8,<c23=reg128#14
3623 +# asm 2: vtrn.32 <c01=q7,<c23=q13
3624 +vtrn.32 q7,q13
3625 +
3626 +# qhasm: r0[0,1] += x01[0] unsigned* y0[0]; r0[2,3] += x01[1] unsigned* y0[1]
3627 +# asm 1: vmlal.u32 <r0=reg128#4,<x01=reg128#9%bot,<y0=reg128#1%bot
3628 +# asm 2: vmlal.u32 <r0=q3,<x01=d16,<y0=d0
3629 +vmlal.u32 q3,d16,d0
3630 +
3631 +# qhasm: r3[0,1] = c23[2]<<18; r3[2,3] = c23[3]<<18
3632 +# asm 1: vshll.u32 >r3=reg128#6,<c23=reg128#14%top,#18
3633 +# asm 2: vshll.u32 >r3=q5,<c23=d27,#18
3634 +vshll.u32 q5,d27,#18
3635 +
3636 +# qhasm: r0[0,1] += x01[2] unsigned* 5y34[2]; r0[2,3] += x01[3] unsigned* 5y3 4[3]
3637 +# asm 1: vmlal.u32 <r0=reg128#4,<x01=reg128#9%top,<5y34=reg128#13%top
3638 +# asm 2: vmlal.u32 <r0=q3,<x01=d17,<5y34=d25
3639 +vmlal.u32 q3,d17,d25
3640 +
3641 +# qhasm: r3[0,1] += x01[0] unsigned* y34[0]; r3[2,3] += x01[1] unsigned* y34[ 1]
3642 +# asm 1: vmlal.u32 <r3=reg128#6,<x01=reg128#9%bot,<y34=reg128#3%bot
3643 +# asm 2: vmlal.u32 <r3=q5,<x01=d16,<y34=d4
3644 +vmlal.u32 q5,d16,d4
3645 +
3646 +# qhasm: r3[0,1] += x01[2] unsigned* y12[2]; r3[2,3] += x01[3] unsigned* y12[ 3]
3647 +# asm 1: vmlal.u32 <r3=reg128#6,<x01=reg128#9%top,<y12=reg128#2%top
3648 +# asm 2: vmlal.u32 <r3=q5,<x01=d17,<y12=d3
3649 +vmlal.u32 q5,d17,d3
3650 +
3651 +# qhasm: r3[0,1] += x23[0] unsigned* y12[0]; r3[2,3] += x23[1] unsigned* y12[ 1]
3652 +# asm 1: vmlal.u32 <r3=reg128#6,<x23=reg128#10%bot,<y12=reg128#2%bot
3653 +# asm 2: vmlal.u32 <r3=q5,<x23=d18,<y12=d2
3654 +vmlal.u32 q5,d18,d2
3655 +
3656 +# qhasm: r3[0,1] += x23[2] unsigned* y0[0]; r3[2,3] += x23[3] unsigned* y0[1]
3657 +# asm 1: vmlal.u32 <r3=reg128#6,<x23=reg128#10%top,<y0=reg128#1%bot
3658 +# asm 2: vmlal.u32 <r3=q5,<x23=d19,<y0=d0
3659 +vmlal.u32 q5,d19,d0
3660 +
3661 +# qhasm: r1[0,1] = c23[0]<<6; r1[2,3] = c23[1]<<6
3662 +# asm 1: vshll.u32 >r1=reg128#14,<c23=reg128#14%bot,#6
3663 +# asm 2: vshll.u32 >r1=q13,<c23=d26,#6
3664 +vshll.u32 q13,d26,#6
3665 +
3666 +# qhasm: r3[0,1] += x4[0] unsigned* 5y34[2]; r3[2,3] += x4[1] unsigned* 5y3 4[3]
3667 +# asm 1: vmlal.u32 <r3=reg128#6,<x4=reg128#11%bot,<5y34=reg128#13%top
3668 +# asm 2: vmlal.u32 <r3=q5,<x4=d20,<5y34=d25
3669 +vmlal.u32 q5,d20,d25
3670 +
3671 +# qhasm: r1[0,1] += x01[0] unsigned* y12[0]; r1[2,3] += x01[1] unsigned* y12[ 1]
3672 +# asm 1: vmlal.u32 <r1=reg128#14,<x01=reg128#9%bot,<y12=reg128#2%bot
3673 +# asm 2: vmlal.u32 <r1=q13,<x01=d16,<y12=d2
3674 +vmlal.u32 q13,d16,d2
3675 +
3676 +# qhasm: r1[0,1] += x01[2] unsigned* y0[0]; r1[2,3] += x01[3] unsigned* y0[1]
3677 +# asm 1: vmlal.u32 <r1=reg128#14,<x01=reg128#9%top,<y0=reg128#1%bot
3678 +# asm 2: vmlal.u32 <r1=q13,<x01=d17,<y0=d0
3679 +vmlal.u32 q13,d17,d0
3680 +
3681 +# qhasm: r1[0,1] += x23[0] unsigned* 5y34[2]; r1[2,3] += x23[1] unsigned* 5y3 4[3]
3682 +# asm 1: vmlal.u32 <r1=reg128#14,<x23=reg128#10%bot,<5y34=reg128#13%top
3683 +# asm 2: vmlal.u32 <r1=q13,<x23=d18,<5y34=d25
3684 +vmlal.u32 q13,d18,d25
3685 +
3686 +# qhasm: r1[0,1] += x23[2] unsigned* 5y34[0]; r1[2,3] += x23[3] unsigned* 5y34[ 1]
3687 +# asm 1: vmlal.u32 <r1=reg128#14,<x23=reg128#10%top,<5y34=reg128#13%bot
3688 +# asm 2: vmlal.u32 <r1=q13,<x23=d19,<5y34=d24
3689 +vmlal.u32 q13,d19,d24
3690 +
3691 +# qhasm: r2[0,1] = c01[2]<<12; r2[2,3] = c01[3]<<12
3692 +# asm 1: vshll.u32 >r2=reg128#8,<c01=reg128#8%top,#12
3693 +# asm 2: vshll.u32 >r2=q7,<c01=d15,#12
3694 +vshll.u32 q7,d15,#12
3695 +
3696 +# qhasm: r1[0,1] += x4[0] unsigned* 5y12[2]; r1[2,3] += x4[1] unsigned* 5y12[ 3]
3697 +# asm 1: vmlal.u32 <r1=reg128#14,<x4=reg128#11%bot,<5y12=reg128#12%top
3698 +# asm 2: vmlal.u32 <r1=q13,<x4=d20,<5y12=d23
3699 +vmlal.u32 q13,d20,d23
3700 +
3701 +# qhasm: r2[0,1] += x01[0] unsigned* y12[2]; r2[2,3] += x01[1] unsigned* y12[ 3]
3702 +# asm 1: vmlal.u32 <r2=reg128#8,<x01=reg128#9%bot,<y12=reg128#2%top
3703 +# asm 2: vmlal.u32 <r2=q7,<x01=d16,<y12=d3
3704 +vmlal.u32 q7,d16,d3
3705 +
3706 +# qhasm: r2[0,1] += x01[2] unsigned* y12[0]; r2[2,3] += x01[3] unsigned* y12[ 1]
3707 +# asm 1: vmlal.u32 <r2=reg128#8,<x01=reg128#9%top,<y12=reg128#2%bot
3708 +# asm 2: vmlal.u32 <r2=q7,<x01=d17,<y12=d2
3709 +vmlal.u32 q7,d17,d2
3710 +
3711 +# qhasm: r2[0,1] += x23[0] unsigned* y0[0]; r2[2,3] += x23[1] unsigned* y0[1]
3712 +# asm 1: vmlal.u32 <r2=reg128#8,<x23=reg128#10%bot,<y0=reg128#1%bot
3713 +# asm 2: vmlal.u32 <r2=q7,<x23=d18,<y0=d0
3714 +vmlal.u32 q7,d18,d0
3715 +
3716 +# qhasm: r2[0,1] += x23[2] unsigned* 5y34[2]; r2[2,3] += x23[3] unsigned* 5y3 4[3]
3717 +# asm 1: vmlal.u32 <r2=reg128#8,<x23=reg128#10%top,<5y34=reg128#13%top
3718 +# asm 2: vmlal.u32 <r2=q7,<x23=d19,<5y34=d25
3719 +vmlal.u32 q7,d19,d25
3720 +
3721 +# qhasm: r2[0,1] += x4[0] unsigned* 5y34[0]; r2[2,3] += x4[1] unsigned* 5y34[ 1]
3722 +# asm 1: vmlal.u32 <r2=reg128#8,<x4=reg128#11%bot,<5y34=reg128#13%bot
3723 +# asm 2: vmlal.u32 <r2=q7,<x4=d20,<5y34=d24
3724 +vmlal.u32 q7,d20,d24
3725 +
3726 +# qhasm: 2x t1 = r0 unsigned>> 26
3727 +# asm 1: vshr.u64 >t1=reg128#9,<r0=reg128#4,#26
3728 +# asm 2: vshr.u64 >t1=q8,<r0=q3,#26
3729 +vshr.u64 q8,q3,#26
3730 +
3731 +# qhasm: r0 &= mask
3732 +# asm 1: vand >r0=reg128#4,<r0=reg128#4,<mask=reg128#7
3733 +# asm 2: vand >r0=q3,<r0=q3,<mask=q6
3734 +vand q3,q3,q6
3735 +
3736 +# qhasm: 2x r1 += t1
3737 +# asm 1: vadd.i64 >r1=reg128#9,<r1=reg128#14,<t1=reg128#9
3738 +# asm 2: vadd.i64 >r1=q8,<r1=q13,<t1=q8
3739 +vadd.i64 q8,q13,q8
3740 +
3741 +# qhasm: 2x t4 = r3 unsigned>> 26
3742 +# asm 1: vshr.u64 >t4=reg128#10,<r3=reg128#6,#26
3743 +# asm 2: vshr.u64 >t4=q9,<r3=q5,#26
3744 +vshr.u64 q9,q5,#26
3745 +
3746 +# qhasm: r3 &= mask
3747 +# asm 1: vand >r3=reg128#6,<r3=reg128#6,<mask=reg128#7
3748 +# asm 2: vand >r3=q5,<r3=q5,<mask=q6
3749 +vand q5,q5,q6
3750 +
3751 +# qhasm: 2x r4 += t4
3752 +# asm 1: vadd.i64 >r4=reg128#5,<r4=reg128#5,<t4=reg128#10
3753 +# asm 2: vadd.i64 >r4=q4,<r4=q4,<t4=q9
3754 +vadd.i64 q4,q4,q9
3755 +
3756 +# qhasm: 2x t2 = r1 unsigned>> 26
3757 +# asm 1: vshr.u64 >t2=reg128#10,<r1=reg128#9,#26
3758 +# asm 2: vshr.u64 >t2=q9,<r1=q8,#26
3759 +vshr.u64 q9,q8,#26
3760 +
3761 +# qhasm: r1 &= mask
3762 +# asm 1: vand >r1=reg128#11,<r1=reg128#9,<mask=reg128#7
3763 +# asm 2: vand >r1=q10,<r1=q8,<mask=q6
3764 +vand q10,q8,q6
3765 +
3766 +# qhasm: 2x t0 = r4 unsigned>> 26
3767 +# asm 1: vshr.u64 >t0=reg128#9,<r4=reg128#5,#26
3768 +# asm 2: vshr.u64 >t0=q8,<r4=q4,#26
3769 +vshr.u64 q8,q4,#26
3770 +
3771 +# qhasm: 2x r2 += t2
3772 +# asm 1: vadd.i64 >r2=reg128#8,<r2=reg128#8,<t2=reg128#10
3773 +# asm 2: vadd.i64 >r2=q7,<r2=q7,<t2=q9
3774 +vadd.i64 q7,q7,q9
3775 +
3776 +# qhasm: r4 &= mask
3777 +# asm 1: vand >r4=reg128#5,<r4=reg128#5,<mask=reg128#7
3778 +# asm 2: vand >r4=q4,<r4=q4,<mask=q6
3779 +vand q4,q4,q6
3780 +
3781 +# qhasm: 2x r0 += t0
3782 +# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#9
3783 +# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q8
3784 +vadd.i64 q3,q3,q8
3785 +
3786 +# qhasm: 2x t0 <<= 2
3787 +# asm 1: vshl.i64 >t0=reg128#9,<t0=reg128#9,#2
3788 +# asm 2: vshl.i64 >t0=q8,<t0=q8,#2
3789 +vshl.i64 q8,q8,#2
3790 +
3791 +# qhasm: 2x t3 = r2 unsigned>> 26
3792 +# asm 1: vshr.u64 >t3=reg128#14,<r2=reg128#8,#26
3793 +# asm 2: vshr.u64 >t3=q13,<r2=q7,#26
3794 +vshr.u64 q13,q7,#26
3795 +
3796 +# qhasm: 2x r0 += t0
3797 +# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#9
3798 +# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q8
3799 +vadd.i64 q3,q3,q8
3800 +
3801 +# qhasm: x23 = r2 & mask
3802 +# asm 1: vand >x23=reg128#10,<r2=reg128#8,<mask=reg128#7
3803 +# asm 2: vand >x23=q9,<r2=q7,<mask=q6
3804 +vand q9,q7,q6
3805 +
3806 +# qhasm: 2x r3 += t3
3807 +# asm 1: vadd.i64 >r3=reg128#6,<r3=reg128#6,<t3=reg128#14
3808 +# asm 2: vadd.i64 >r3=q5,<r3=q5,<t3=q13
3809 +vadd.i64 q5,q5,q13
3810 +
3811 +# qhasm: 2x t1 = r0 unsigned>> 26
3812 +# asm 1: vshr.u64 >t1=reg128#8,<r0=reg128#4,#26
3813 +# asm 2: vshr.u64 >t1=q7,<r0=q3,#26
3814 +vshr.u64 q7,q3,#26
3815 +
3816 +# qhasm: x01 = r0 & mask
3817 +# asm 1: vand >x01=reg128#9,<r0=reg128#4,<mask=reg128#7
3818 +# asm 2: vand >x01=q8,<r0=q3,<mask=q6
3819 +vand q8,q3,q6
3820 +
3821 +# qhasm: 2x r1 += t1
3822 +# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#11,<t1=reg128#8
3823 +# asm 2: vadd.i64 >r1=q3,<r1=q10,<t1=q7
3824 +vadd.i64 q3,q10,q7
3825 +
3826 +# qhasm: 2x t4 = r3 unsigned>> 26
3827 +# asm 1: vshr.u64 >t4=reg128#8,<r3=reg128#6,#26
3828 +# asm 2: vshr.u64 >t4=q7,<r3=q5,#26
3829 +vshr.u64 q7,q5,#26
3830 +
3831 +# qhasm: r3 &= mask
3832 +# asm 1: vand >r3=reg128#6,<r3=reg128#6,<mask=reg128#7
3833 +# asm 2: vand >r3=q5,<r3=q5,<mask=q6
3834 +vand q5,q5,q6
3835 +
3836 +# qhasm: 2x x4 = r4 + t4
3837 +# asm 1: vadd.i64 >x4=reg128#11,<r4=reg128#5,<t4=reg128#8
3838 +# asm 2: vadd.i64 >x4=q10,<r4=q4,<t4=q7
3839 +vadd.i64 q10,q4,q7
3840 +
3841 +# qhasm: len -= 32
3842 +# asm 1: sub >len=int32#4,<len=int32#4,#32
3843 +# asm 2: sub >len=r3,<len=r3,#32
3844 +sub r3,r3,#32
3845 +
3846 +# qhasm: x01 = x01[0,2,1,3]
3847 +# asm 1: vtrn.32 <x01=reg128#9%bot,<x01=reg128#9%top
3848 +# asm 2: vtrn.32 <x01=d16,<x01=d17
3849 +vtrn.32 d16,d17
3850 +
3851 +# qhasm: x23 = x23[0,2,1,3]
3852 +# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top
3853 +# asm 2: vtrn.32 <x23=d18,<x23=d19
3854 +vtrn.32 d18,d19
3855 +
3856 +# qhasm: r1 = r1[0,2,1,3]
3857 +# asm 1: vtrn.32 <r1=reg128#4%bot,<r1=reg128#4%top
3858 +# asm 2: vtrn.32 <r1=d6,<r1=d7
3859 +vtrn.32 d6,d7
3860 +
3861 +# qhasm: r3 = r3[0,2,1,3]
3862 +# asm 1: vtrn.32 <r3=reg128#6%bot,<r3=reg128#6%top
3863 +# asm 2: vtrn.32 <r3=d10,<r3=d11
3864 +vtrn.32 d10,d11
3865 +
3866 +# qhasm: x4 = x4[0,2,1,3]
3867 +# asm 1: vtrn.32 <x4=reg128#11%bot,<x4=reg128#11%top
3868 +# asm 2: vtrn.32 <x4=d20,<x4=d21
3869 +vtrn.32 d20,d21
3870 +
3871 +# qhasm: x01 = x01[0,1] r1[0,1]
3872 +# asm 1: vext.32 <x01=reg128#9%top,<r1=reg128#4%bot,<r1=reg128#4%bot,#0
3873 +# asm 2: vext.32 <x01=d17,<r1=d6,<r1=d6,#0
3874 +vext.32 d17,d6,d6,#0
3875 +
3876 +# qhasm: x23 = x23[0,1] r3[0,1]
3877 +# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#6%bot,<r3=reg128#6%bot,#0
3878 +# asm 2: vext.32 <x23=d19,<r3=d10,<r3=d10,#0
3879 +vext.32 d19,d10,d10,#0
3880 +
3881 +# qhasm: unsigned>? len - 32
3882 +# asm 1: cmp <len=int32#4,#32
3883 +# asm 2: cmp <len=r3,#32
3884 +cmp r3,#32
3885 +
3886 +# qhasm: goto mainloop if unsigned>
3887 +bhi ._mainloop
3888 +
3889 +# qhasm: end:
3890 +._end:
3891 +
3892 +# qhasm: mem128[input_0] = x01;input_0+=16
3893 +# asm 1: vst1.8 {<x01=reg128#9%bot-<x01=reg128#9%top},[<input_0=int32#1]!
3894 +# asm 2: vst1.8 {<x01=d16-<x01=d17},[<input_0=r0]!
3895 +vst1.8 {d16-d17},[r0]!
3896 +
3897 +# qhasm: mem128[input_0] = x23;input_0+=16
3898 +# asm 1: vst1.8 {<x23=reg128#10%bot-<x23=reg128#10%top},[<input_0=int32#1]!
3899 +# asm 2: vst1.8 {<x23=d18-<x23=d19},[<input_0=r0]!
3900 +vst1.8 {d18-d19},[r0]!
3901 +
3902 +# qhasm: mem64[input_0] = x4[0]
3903 +# asm 1: vst1.8 <x4=reg128#11%bot,[<input_0=int32#1]
3904 +# asm 2: vst1.8 <x4=d20,[<input_0=r0]
3905 +vst1.8 d20,[r0]
3906 +
3907 +# qhasm: len = len
3908 +# asm 1: mov >len=int32#1,<len=int32#4
3909 +# asm 2: mov >len=r0,<len=r3
3910 +mov r0,r3
3911 +
3912 +# qhasm: qpopreturn len
3913 +mov sp,r12
3914 +vpop {q4,q5,q6,q7}
3915 +bx lr
3916 +
3917 +# qhasm: int32 input_0
3918 +
3919 +# qhasm: int32 input_1
3920 +
3921 +# qhasm: int32 input_2
3922 +
3923 +# qhasm: int32 input_3
3924 +
3925 +# qhasm: stack32 input_4
3926 +
3927 +# qhasm: stack32 input_5
3928 +
3929 +# qhasm: stack32 input_6
3930 +
3931 +# qhasm: stack32 input_7
3932 +
3933 +# qhasm: int32 caller_r4
3934 +
3935 +# qhasm: int32 caller_r5
3936 +
3937 +# qhasm: int32 caller_r6
3938 +
3939 +# qhasm: int32 caller_r7
3940 +
3941 +# qhasm: int32 caller_r8
3942 +
3943 +# qhasm: int32 caller_r9
3944 +
3945 +# qhasm: int32 caller_r10
3946 +
3947 +# qhasm: int32 caller_r11
3948 +
3949 +# qhasm: int32 caller_r12
3950 +
3951 +# qhasm: int32 caller_r14
3952 +
3953 +# qhasm: reg128 caller_q4
3954 +
3955 +# qhasm: reg128 caller_q5
3956 +
3957 +# qhasm: reg128 caller_q6
3958 +
3959 +# qhasm: reg128 caller_q7
3960 +
3961 +# qhasm: reg128 r0
3962 +
3963 +# qhasm: reg128 r1
3964 +
3965 +# qhasm: reg128 r2
3966 +
3967 +# qhasm: reg128 r3
3968 +
3969 +# qhasm: reg128 r4
3970 +
3971 +# qhasm: reg128 x01
3972 +
3973 +# qhasm: reg128 x23
3974 +
3975 +# qhasm: reg128 x4
3976 +
3977 +# qhasm: reg128 y01
3978 +
3979 +# qhasm: reg128 y23
3980 +
3981 +# qhasm: reg128 y4
3982 +
3983 +# qhasm: reg128 _5y01
3984 +
3985 +# qhasm: reg128 _5y23
3986 +
3987 +# qhasm: reg128 _5y4
3988 +
3989 +# qhasm: reg128 c01
3990 +
3991 +# qhasm: reg128 c23
3992 +
3993 +# qhasm: reg128 c4
3994 +
3995 +# qhasm: reg128 t0
3996 +
3997 +# qhasm: reg128 t1
3998 +
3999 +# qhasm: reg128 t2
4000 +
4001 +# qhasm: reg128 t3
4002 +
4003 +# qhasm: reg128 t4
4004 +
4005 +# qhasm: reg128 mask
4006 +
4007 +# qhasm: enter crypto_onetimeauth_poly1305_neon2_addmulmod
4008 +.align 2
4009 +.global openssl_poly1305_neon2_addmulmod
4010 +.type openssl_poly1305_neon2_addmulmod STT_FUNC
4011 +openssl_poly1305_neon2_addmulmod:
4012 +sub sp,sp,#0
4013 +
4014 +# qhasm: 2x mask = 0xffffffff
4015 +# asm 1: vmov.i64 >mask=reg128#1,#0xffffffff
4016 +# asm 2: vmov.i64 >mask=q0,#0xffffffff
4017 +vmov.i64 q0,#0xffffffff
4018 +
4019 +# qhasm: y01 aligned= mem128[input_2];input_2+=16
4020 +# asm 1: vld1.8 {>y01=reg128#2%bot->y01=reg128#2%top},[<input_2=int32#3,: 128]!
4021 +# asm 2: vld1.8 {>y01=d2->y01=d3},[<input_2=r2,: 128]!
4022 +vld1.8 {d2-d3},[r2,: 128]!
4023 +
4024 +# qhasm: 4x _5y01 = y01 << 2
4025 +# asm 1: vshl.i32 >_5y01=reg128#3,<y01=reg128#2,#2
4026 +# asm 2: vshl.i32 >_5y01=q2,<y01=q1,#2
4027 +vshl.i32 q2,q1,#2
4028 +
4029 +# qhasm: y23 aligned= mem128[input_2];input_2+=16
4030 +# asm 1: vld1.8 {>y23=reg128#4%bot->y23=reg128#4%top},[<input_2=int32#3,: 128]!
4031 +# asm 2: vld1.8 {>y23=d6->y23=d7},[<input_2=r2,: 128]!
4032 +vld1.8 {d6-d7},[r2,: 128]!
4033 +
4034 +# qhasm: 4x _5y23 = y23 << 2
4035 +# asm 1: vshl.i32 >_5y23=reg128#9,<y23=reg128#4,#2
4036 +# asm 2: vshl.i32 >_5y23=q8,<y23=q3,#2
4037 +vshl.i32 q8,q3,#2
4038 +
4039 +# qhasm: y4 aligned= mem64[input_2]y4[1]
4040 +# asm 1: vld1.8 {<y4=reg128#10%bot},[<input_2=int32#3,: 64]
4041 +# asm 2: vld1.8 {<y4=d18},[<input_2=r2,: 64]
4042 +vld1.8 {d18},[r2,: 64]
4043 +
4044 +# qhasm: 4x _5y4 = y4 << 2
4045 +# asm 1: vshl.i32 >_5y4=reg128#11,<y4=reg128#10,#2
4046 +# asm 2: vshl.i32 >_5y4=q10,<y4=q9,#2
4047 +vshl.i32 q10,q9,#2
4048 +
4049 +# qhasm: x01 aligned= mem128[input_1];input_1+=16
4050 +# asm 1: vld1.8 {>x01=reg128#12%bot->x01=reg128#12%top},[<input_1=int32#2,: 128 ]!
4051 +# asm 2: vld1.8 {>x01=d22->x01=d23},[<input_1=r1,: 128]!
4052 +vld1.8 {d22-d23},[r1,: 128]!
4053 +
4054 +# qhasm: 4x _5y01 += y01
4055 +# asm 1: vadd.i32 >_5y01=reg128#3,<_5y01=reg128#3,<y01=reg128#2
4056 +# asm 2: vadd.i32 >_5y01=q2,<_5y01=q2,<y01=q1
4057 +vadd.i32 q2,q2,q1
4058 +
4059 +# qhasm: x23 aligned= mem128[input_1];input_1+=16
4060 +# asm 1: vld1.8 {>x23=reg128#13%bot->x23=reg128#13%top},[<input_1=int32#2,: 128 ]!
4061 +# asm 2: vld1.8 {>x23=d24->x23=d25},[<input_1=r1,: 128]!
4062 +vld1.8 {d24-d25},[r1,: 128]!
4063 +
4064 +# qhasm: 4x _5y23 += y23
4065 +# asm 1: vadd.i32 >_5y23=reg128#9,<_5y23=reg128#9,<y23=reg128#4
4066 +# asm 2: vadd.i32 >_5y23=q8,<_5y23=q8,<y23=q3
4067 +vadd.i32 q8,q8,q3
4068 +
4069 +# qhasm: 4x _5y4 += y4
4070 +# asm 1: vadd.i32 >_5y4=reg128#11,<_5y4=reg128#11,<y4=reg128#10
4071 +# asm 2: vadd.i32 >_5y4=q10,<_5y4=q10,<y4=q9
4072 +vadd.i32 q10,q10,q9
4073 +
4074 +# qhasm: c01 aligned= mem128[input_3];input_3+=16
4075 +# asm 1: vld1.8 {>c01=reg128#14%bot->c01=reg128#14%top},[<input_3=int32#4,: 128 ]!
4076 +# asm 2: vld1.8 {>c01=d26->c01=d27},[<input_3=r3,: 128]!
4077 +vld1.8 {d26-d27},[r3,: 128]!
4078 +
4079 +# qhasm: 4x x01 += c01
4080 +# asm 1: vadd.i32 >x01=reg128#12,<x01=reg128#12,<c01=reg128#14
4081 +# asm 2: vadd.i32 >x01=q11,<x01=q11,<c01=q13
4082 +vadd.i32 q11,q11,q13
4083 +
4084 +# qhasm: c23 aligned= mem128[input_3];input_3+=16
4085 +# asm 1: vld1.8 {>c23=reg128#14%bot->c23=reg128#14%top},[<input_3=int32#4,: 128 ]!
4086 +# asm 2: vld1.8 {>c23=d26->c23=d27},[<input_3=r3,: 128]!
4087 +vld1.8 {d26-d27},[r3,: 128]!
4088 +
4089 +# qhasm: 4x x23 += c23
4090 +# asm 1: vadd.i32 >x23=reg128#13,<x23=reg128#13,<c23=reg128#14
4091 +# asm 2: vadd.i32 >x23=q12,<x23=q12,<c23=q13
4092 +vadd.i32 q12,q12,q13
4093 +
4094 +# qhasm: x4 aligned= mem64[input_1]x4[1]
4095 +# asm 1: vld1.8 {<x4=reg128#14%bot},[<input_1=int32#2,: 64]
4096 +# asm 2: vld1.8 {<x4=d26},[<input_1=r1,: 64]
4097 +vld1.8 {d26},[r1,: 64]
4098 +
4099 +# qhasm: 2x mask unsigned>>=6
4100 +# asm 1: vshr.u64 >mask=reg128#1,<mask=reg128#1,#6
4101 +# asm 2: vshr.u64 >mask=q0,<mask=q0,#6
4102 +vshr.u64 q0,q0,#6
4103 +
4104 +# qhasm: c4 aligned= mem64[input_3]c4[1]
4105 +# asm 1: vld1.8 {<c4=reg128#15%bot},[<input_3=int32#4,: 64]
4106 +# asm 2: vld1.8 {<c4=d28},[<input_3=r3,: 64]
4107 +vld1.8 {d28},[r3,: 64]
4108 +
4109 +# qhasm: 4x x4 += c4
4110 +# asm 1: vadd.i32 >x4=reg128#14,<x4=reg128#14,<c4=reg128#15
4111 +# asm 2: vadd.i32 >x4=q13,<x4=q13,<c4=q14
4112 +vadd.i32 q13,q13,q14
4113 +
4114 +# qhasm: r0[0,1] = x01[0] unsigned* y01[0]; r0[2,3] = x01[1] unsigned* y01[ 1]
4115 +# asm 1: vmull.u32 >r0=reg128#15,<x01=reg128#12%bot,<y01=reg128#2%bot
4116 +# asm 2: vmull.u32 >r0=q14,<x01=d22,<y01=d2
4117 +vmull.u32 q14,d22,d2
4118 +
4119 +# qhasm: r0[0,1] += x01[2] unsigned* _5y4[0]; r0[2,3] += x01[3] unsigned* _5y 4[1]
4120 +# asm 1: vmlal.u32 <r0=reg128#15,<x01=reg128#12%top,<_5y4=reg128#11%bot
4121 +# asm 2: vmlal.u32 <r0=q14,<x01=d23,<_5y4=d20
4122 +vmlal.u32 q14,d23,d20
4123 +
4124 +# qhasm: r0[0,1] += x23[0] unsigned* _5y23[2]; r0[2,3] += x23[1] unsigned* _5y2 3[3]
4125 +# asm 1: vmlal.u32 <r0=reg128#15,<x23=reg128#13%bot,<_5y23=reg128#9%top
4126 +# asm 2: vmlal.u32 <r0=q14,<x23=d24,<_5y23=d17
4127 +vmlal.u32 q14,d24,d17
4128 +
4129 +# qhasm: r0[0,1] += x23[2] unsigned* _5y23[0]; r0[2,3] += x23[3] unsigned* _5y2 3[1]
4130 +# asm 1: vmlal.u32 <r0=reg128#15,<x23=reg128#13%top,<_5y23=reg128#9%bot
4131 +# asm 2: vmlal.u32 <r0=q14,<x23=d25,<_5y23=d16
4132 +vmlal.u32 q14,d25,d16
4133 +
4134 +# qhasm: r0[0,1] += x4[0] unsigned* _5y01[2]; r0[2,3] += x4[1] unsigned* _5y0 1[3]
4135 +# asm 1: vmlal.u32 <r0=reg128#15,<x4=reg128#14%bot,<_5y01=reg128#3%top
4136 +# asm 2: vmlal.u32 <r0=q14,<x4=d26,<_5y01=d5
4137 +vmlal.u32 q14,d26,d5
4138 +
4139 +# qhasm: r1[0,1] = x01[0] unsigned* y01[2]; r1[2,3] = x01[1] unsigned* y01[ 3]
4140 +# asm 1: vmull.u32 >r1=reg128#3,<x01=reg128#12%bot,<y01=reg128#2%top
4141 +# asm 2: vmull.u32 >r1=q2,<x01=d22,<y01=d3
4142 +vmull.u32 q2,d22,d3
4143 +
4144 +# qhasm: r1[0,1] += x01[2] unsigned* y01[0]; r1[2,3] += x01[3] unsigned* y01[ 1]
4145 +# asm 1: vmlal.u32 <r1=reg128#3,<x01=reg128#12%top,<y01=reg128#2%bot
4146 +# asm 2: vmlal.u32 <r1=q2,<x01=d23,<y01=d2
4147 +vmlal.u32 q2,d23,d2
4148 +
4149 +# qhasm: r1[0,1] += x23[0] unsigned* _5y4[0]; r1[2,3] += x23[1] unsigned* _5y 4[1]
4150 +# asm 1: vmlal.u32 <r1=reg128#3,<x23=reg128#13%bot,<_5y4=reg128#11%bot
4151 +# asm 2: vmlal.u32 <r1=q2,<x23=d24,<_5y4=d20
4152 +vmlal.u32 q2,d24,d20
4153 +
4154 +# qhasm: r1[0,1] += x23[2] unsigned* _5y23[2]; r1[2,3] += x23[3] unsigned* _5y2 3[3]
4155 +# asm 1: vmlal.u32 <r1=reg128#3,<x23=reg128#13%top,<_5y23=reg128#9%top
4156 +# asm 2: vmlal.u32 <r1=q2,<x23=d25,<_5y23=d17
4157 +vmlal.u32 q2,d25,d17
4158 +
4159 +# qhasm: r1[0,1] += x4[0] unsigned* _5y23[0]; r1[2,3] += x4[1] unsigned* _5y2 3[1]
4160 +# asm 1: vmlal.u32 <r1=reg128#3,<x4=reg128#14%bot,<_5y23=reg128#9%bot
4161 +# asm 2: vmlal.u32 <r1=q2,<x4=d26,<_5y23=d16
4162 +vmlal.u32 q2,d26,d16
4163 +
4164 +# qhasm: r2[0,1] = x01[0] unsigned* y23[0]; r2[2,3] = x01[1] unsigned* y23[ 1]
4165 +# asm 1: vmull.u32 >r2=reg128#16,<x01=reg128#12%bot,<y23=reg128#4%bot
4166 +# asm 2: vmull.u32 >r2=q15,<x01=d22,<y23=d6
4167 +vmull.u32 q15,d22,d6
4168 +
4169 +# qhasm: r2[0,1] += x01[2] unsigned* y01[2]; r2[2,3] += x01[3] unsigned* y01[ 3]
4170 +# asm 1: vmlal.u32 <r2=reg128#16,<x01=reg128#12%top,<y01=reg128#2%top
4171 +# asm 2: vmlal.u32 <r2=q15,<x01=d23,<y01=d3
4172 +vmlal.u32 q15,d23,d3
4173 +
4174 +# qhasm: r2[0,1] += x23[0] unsigned* y01[0]; r2[2,3] += x23[1] unsigned* y01[ 1]
4175 +# asm 1: vmlal.u32 <r2=reg128#16,<x23=reg128#13%bot,<y01=reg128#2%bot
4176 +# asm 2: vmlal.u32 <r2=q15,<x23=d24,<y01=d2
4177 +vmlal.u32 q15,d24,d2
4178 +
4179 +# qhasm: r2[0,1] += x23[2] unsigned* _5y4[0]; r2[2,3] += x23[3] unsigned* _5y 4[1]
4180 +# asm 1: vmlal.u32 <r2=reg128#16,<x23=reg128#13%top,<_5y4=reg128#11%bot
4181 +# asm 2: vmlal.u32 <r2=q15,<x23=d25,<_5y4=d20
4182 +vmlal.u32 q15,d25,d20
4183 +
4184 +# qhasm: r2[0,1] += x4[0] unsigned* _5y23[2]; r2[2,3] += x4[1] unsigned* _5y2 3[3]
4185 +# asm 1: vmlal.u32 <r2=reg128#16,<x4=reg128#14%bot,<_5y23=reg128#9%top
4186 +# asm 2: vmlal.u32 <r2=q15,<x4=d26,<_5y23=d17
4187 +vmlal.u32 q15,d26,d17
4188 +
4189 +# qhasm: r3[0,1] = x01[0] unsigned* y23[2]; r3[2,3] = x01[1] unsigned* y23[ 3]
4190 +# asm 1: vmull.u32 >r3=reg128#9,<x01=reg128#12%bot,<y23=reg128#4%top
4191 +# asm 2: vmull.u32 >r3=q8,<x01=d22,<y23=d7
4192 +vmull.u32 q8,d22,d7
4193 +
4194 +# qhasm: r3[0,1] += x01[2] unsigned* y23[0]; r3[2,3] += x01[3] unsigned* y23[ 1]
4195 +# asm 1: vmlal.u32 <r3=reg128#9,<x01=reg128#12%top,<y23=reg128#4%bot
4196 +# asm 2: vmlal.u32 <r3=q8,<x01=d23,<y23=d6
4197 +vmlal.u32 q8,d23,d6
4198 +
4199 +# qhasm: r3[0,1] += x23[0] unsigned* y01[2]; r3[2,3] += x23[1] unsigned* y01[ 3]
4200 +# asm 1: vmlal.u32 <r3=reg128#9,<x23=reg128#13%bot,<y01=reg128#2%top
4201 +# asm 2: vmlal.u32 <r3=q8,<x23=d24,<y01=d3
4202 +vmlal.u32 q8,d24,d3
4203 +
4204 +# qhasm: r3[0,1] += x23[2] unsigned* y01[0]; r3[2,3] += x23[3] unsigned* y01[ 1]
4205 +# asm 1: vmlal.u32 <r3=reg128#9,<x23=reg128#13%top,<y01=reg128#2%bot
4206 +# asm 2: vmlal.u32 <r3=q8,<x23=d25,<y01=d2
4207 +vmlal.u32 q8,d25,d2
4208 +
4209 +# qhasm: r3[0,1] += x4[0] unsigned* _5y4[0]; r3[2,3] += x4[1] unsigned* _5y 4[1]
4210 +# asm 1: vmlal.u32 <r3=reg128#9,<x4=reg128#14%bot,<_5y4=reg128#11%bot
4211 +# asm 2: vmlal.u32 <r3=q8,<x4=d26,<_5y4=d20
4212 +vmlal.u32 q8,d26,d20
4213 +
4214 +# qhasm: r4[0,1] = x01[0] unsigned* y4[0]; r4[2,3] = x01[1] unsigned* y4[1 ]
4215 +# asm 1: vmull.u32 >r4=reg128#10,<x01=reg128#12%bot,<y4=reg128#10%bot
4216 +# asm 2: vmull.u32 >r4=q9,<x01=d22,<y4=d18
4217 +vmull.u32 q9,d22,d18
4218 +
4219 +# qhasm: r4[0,1] += x01[2] unsigned* y23[2]; r4[2,3] += x01[3] unsigned* y23[3 ]
4220 +# asm 1: vmlal.u32 <r4=reg128#10,<x01=reg128#12%top,<y23=reg128#4%top
4221 +# asm 2: vmlal.u32 <r4=q9,<x01=d23,<y23=d7
4222 +vmlal.u32 q9,d23,d7
4223 +
4224 +# qhasm: r4[0,1] += x23[0] unsigned* y23[0]; r4[2,3] += x23[1] unsigned* y23[1 ]
4225 +# asm 1: vmlal.u32 <r4=reg128#10,<x23=reg128#13%bot,<y23=reg128#4%bot
4226 +# asm 2: vmlal.u32 <r4=q9,<x23=d24,<y23=d6
4227 +vmlal.u32 q9,d24,d6
4228 +
4229 +# qhasm: r4[0,1] += x23[2] unsigned* y01[2]; r4[2,3] += x23[3] unsigned* y01[3 ]
4230 +# asm 1: vmlal.u32 <r4=reg128#10,<x23=reg128#13%top,<y01=reg128#2%top
4231 +# asm 2: vmlal.u32 <r4=q9,<x23=d25,<y01=d3
4232 +vmlal.u32 q9,d25,d3
4233 +
4234 +# qhasm: r4[0,1] += x4[0] unsigned* y01[0]; r4[2,3] += x4[1] unsigned* y01[1 ]
4235 +# asm 1: vmlal.u32 <r4=reg128#10,<x4=reg128#14%bot,<y01=reg128#2%bot
4236 +# asm 2: vmlal.u32 <r4=q9,<x4=d26,<y01=d2
4237 +vmlal.u32 q9,d26,d2
4238 +
4239 +# qhasm: 2x t1 = r0 unsigned>> 26
4240 +# asm 1: vshr.u64 >t1=reg128#2,<r0=reg128#15,#26
4241 +# asm 2: vshr.u64 >t1=q1,<r0=q14,#26
4242 +vshr.u64 q1,q14,#26
4243 +
4244 +# qhasm: r0 &= mask
4245 +# asm 1: vand >r0=reg128#4,<r0=reg128#15,<mask=reg128#1
4246 +# asm 2: vand >r0=q3,<r0=q14,<mask=q0
4247 +vand q3,q14,q0
4248 +
4249 +# qhasm: 2x r1 += t1
4250 +# asm 1: vadd.i64 >r1=reg128#2,<r1=reg128#3,<t1=reg128#2
4251 +# asm 2: vadd.i64 >r1=q1,<r1=q2,<t1=q1
4252 +vadd.i64 q1,q2,q1
4253 +
4254 +# qhasm: 2x t4 = r3 unsigned>> 26
4255 +# asm 1: vshr.u64 >t4=reg128#3,<r3=reg128#9,#26
4256 +# asm 2: vshr.u64 >t4=q2,<r3=q8,#26
4257 +vshr.u64 q2,q8,#26
4258 +
4259 +# qhasm: r3 &= mask
4260 +# asm 1: vand >r3=reg128#9,<r3=reg128#9,<mask=reg128#1
4261 +# asm 2: vand >r3=q8,<r3=q8,<mask=q0
4262 +vand q8,q8,q0
4263 +
4264 +# qhasm: 2x r4 += t4
4265 +# asm 1: vadd.i64 >r4=reg128#3,<r4=reg128#10,<t4=reg128#3
4266 +# asm 2: vadd.i64 >r4=q2,<r4=q9,<t4=q2
4267 +vadd.i64 q2,q9,q2
4268 +
4269 +# qhasm: 2x t2 = r1 unsigned>> 26
4270 +# asm 1: vshr.u64 >t2=reg128#10,<r1=reg128#2,#26
4271 +# asm 2: vshr.u64 >t2=q9,<r1=q1,#26
4272 +vshr.u64 q9,q1,#26
4273 +
4274 +# qhasm: r1 &= mask
4275 +# asm 1: vand >r1=reg128#2,<r1=reg128#2,<mask=reg128#1
4276 +# asm 2: vand >r1=q1,<r1=q1,<mask=q0
4277 +vand q1,q1,q0
4278 +
4279 +# qhasm: 2x t0 = r4 unsigned>> 26
4280 +# asm 1: vshr.u64 >t0=reg128#11,<r4=reg128#3,#26
4281 +# asm 2: vshr.u64 >t0=q10,<r4=q2,#26
4282 +vshr.u64 q10,q2,#26
4283 +
4284 +# qhasm: 2x r2 += t2
4285 +# asm 1: vadd.i64 >r2=reg128#10,<r2=reg128#16,<t2=reg128#10
4286 +# asm 2: vadd.i64 >r2=q9,<r2=q15,<t2=q9
4287 +vadd.i64 q9,q15,q9
4288 +
4289 +# qhasm: r4 &= mask
4290 +# asm 1: vand >r4=reg128#3,<r4=reg128#3,<mask=reg128#1
4291 +# asm 2: vand >r4=q2,<r4=q2,<mask=q0
4292 +vand q2,q2,q0
4293 +
4294 +# qhasm: 2x r0 += t0
4295 +# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#11
4296 +# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q10
4297 +vadd.i64 q3,q3,q10
4298 +
4299 +# qhasm: 2x t0 <<= 2
4300 +# asm 1: vshl.i64 >t0=reg128#11,<t0=reg128#11,#2
4301 +# asm 2: vshl.i64 >t0=q10,<t0=q10,#2
4302 +vshl.i64 q10,q10,#2
4303 +
4304 +# qhasm: 2x t3 = r2 unsigned>> 26
4305 +# asm 1: vshr.u64 >t3=reg128#12,<r2=reg128#10,#26
4306 +# asm 2: vshr.u64 >t3=q11,<r2=q9,#26
4307 +vshr.u64 q11,q9,#26
4308 +
4309 +# qhasm: 2x r0 += t0
4310 +# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#11
4311 +# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q10
4312 +vadd.i64 q3,q3,q10
4313 +
4314 +# qhasm: x23 = r2 & mask
4315 +# asm 1: vand >x23=reg128#10,<r2=reg128#10,<mask=reg128#1
4316 +# asm 2: vand >x23=q9,<r2=q9,<mask=q0
4317 +vand q9,q9,q0
4318 +
4319 +# qhasm: 2x r3 += t3
4320 +# asm 1: vadd.i64 >r3=reg128#9,<r3=reg128#9,<t3=reg128#12
4321 +# asm 2: vadd.i64 >r3=q8,<r3=q8,<t3=q11
4322 +vadd.i64 q8,q8,q11
4323 +
4324 +# qhasm: 2x t1 = r0 unsigned>> 26
4325 +# asm 1: vshr.u64 >t1=reg128#11,<r0=reg128#4,#26
4326 +# asm 2: vshr.u64 >t1=q10,<r0=q3,#26
4327 +vshr.u64 q10,q3,#26
4328 +
4329 +# qhasm: x23 = x23[0,2,1,3]
4330 +# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top
4331 +# asm 2: vtrn.32 <x23=d18,<x23=d19
4332 +vtrn.32 d18,d19
4333 +
4334 +# qhasm: x01 = r0 & mask
4335 +# asm 1: vand >x01=reg128#4,<r0=reg128#4,<mask=reg128#1
4336 +# asm 2: vand >x01=q3,<r0=q3,<mask=q0
4337 +vand q3,q3,q0
4338 +
4339 +# qhasm: 2x r1 += t1
4340 +# asm 1: vadd.i64 >r1=reg128#2,<r1=reg128#2,<t1=reg128#11
4341 +# asm 2: vadd.i64 >r1=q1,<r1=q1,<t1=q10
4342 +vadd.i64 q1,q1,q10
4343 +
4344 +# qhasm: 2x t4 = r3 unsigned>> 26
4345 +# asm 1: vshr.u64 >t4=reg128#11,<r3=reg128#9,#26
4346 +# asm 2: vshr.u64 >t4=q10,<r3=q8,#26
4347 +vshr.u64 q10,q8,#26
4348 +
4349 +# qhasm: x01 = x01[0,2,1,3]
4350 +# asm 1: vtrn.32 <x01=reg128#4%bot,<x01=reg128#4%top
4351 +# asm 2: vtrn.32 <x01=d6,<x01=d7
4352 +vtrn.32 d6,d7
4353 +
4354 +# qhasm: r3 &= mask
4355 +# asm 1: vand >r3=reg128#1,<r3=reg128#9,<mask=reg128#1
4356 +# asm 2: vand >r3=q0,<r3=q8,<mask=q0
4357 +vand q0,q8,q0
4358 +
4359 +# qhasm: r1 = r1[0,2,1,3]
4360 +# asm 1: vtrn.32 <r1=reg128#2%bot,<r1=reg128#2%top
4361 +# asm 2: vtrn.32 <r1=d2,<r1=d3
4362 +vtrn.32 d2,d3
4363 +
4364 +# qhasm: 2x x4 = r4 + t4
4365 +# asm 1: vadd.i64 >x4=reg128#3,<r4=reg128#3,<t4=reg128#11
4366 +# asm 2: vadd.i64 >x4=q2,<r4=q2,<t4=q10
4367 +vadd.i64 q2,q2,q10
4368 +
4369 +# qhasm: r3 = r3[0,2,1,3]
4370 +# asm 1: vtrn.32 <r3=reg128#1%bot,<r3=reg128#1%top
4371 +# asm 2: vtrn.32 <r3=d0,<r3=d1
4372 +vtrn.32 d0,d1
4373 +
4374 +# qhasm: x01 = x01[0,1] r1[0,1]
4375 +# asm 1: vext.32 <x01=reg128#4%top,<r1=reg128#2%bot,<r1=reg128#2%bot,#0
4376 +# asm 2: vext.32 <x01=d7,<r1=d2,<r1=d2,#0
4377 +vext.32 d7,d2,d2,#0
4378 +
4379 +# qhasm: x23 = x23[0,1] r3[0,1]
4380 +# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#1%bot,<r3=reg128#1%bot,#0
4381 +# asm 2: vext.32 <x23=d19,<r3=d0,<r3=d0,#0
4382 +vext.32 d19,d0,d0,#0
4383 +
4384 +# qhasm: x4 = x4[0,2,1,3]
4385 +# asm 1: vtrn.32 <x4=reg128#3%bot,<x4=reg128#3%top
4386 +# asm 2: vtrn.32 <x4=d4,<x4=d5
4387 +vtrn.32 d4,d5
4388 +
4389 +# qhasm: mem128[input_0] aligned= x01;input_0+=16
4390 +# asm 1: vst1.8 {<x01=reg128#4%bot-<x01=reg128#4%top},[<input_0=int32#1,: 128]!
4391 +# asm 2: vst1.8 {<x01=d6-<x01=d7},[<input_0=r0,: 128]!
4392 +vst1.8 {d6-d7},[r0,: 128]!
4393 +
4394 +# qhasm: mem128[input_0] aligned= x23;input_0+=16
4395 +# asm 1: vst1.8 {<x23=reg128#10%bot-<x23=reg128#10%top},[<input_0=int32#1,: 128 ]!
4396 +# asm 2: vst1.8 {<x23=d18-<x23=d19},[<input_0=r0,: 128]!
4397 +vst1.8 {d18-d19},[r0,: 128]!
4398 +
4399 +# qhasm: mem64[input_0] aligned= x4[0]
4400 +# asm 1: vst1.8 <x4=reg128#3%bot,[<input_0=int32#1,: 64]
4401 +# asm 2: vst1.8 <x4=d4,[<input_0=r0,: 64]
4402 +vst1.8 d4,[r0,: 64]
4403 +
4404 +# qhasm: return
4405 +add sp,sp,#0
4406 +bx lr
4407 diff --git a/crypto/poly1305/poly1305_vec.c b/crypto/poly1305/poly1305_vec.c
4408 new file mode 100644
4409 index 0000000..c546200
4410 --- /dev/null
4411 +++ b/crypto/poly1305/poly1305_vec.c
4412 @@ -0,0 +1,733 @@
4413 +/* ====================================================================
4414 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved.
4415 + *
4416 + * Redistribution and use in source and binary forms, with or without
4417 + * modification, are permitted provided that the following conditions
4418 + * are met:
4419 + *
4420 + * 1. Redistributions of source code must retain the above copyright
4421 + * notice, this list of conditions and the following disclaimer.
4422 + *
4423 + * 2. Redistributions in binary form must reproduce the above copyright
4424 + * notice, this list of conditions and the following disclaimer in
4425 + * the documentation and/or other materials provided with the
4426 + * distribution.
4427 + *
4428 + * 3. All advertising materials mentioning features or use of this
4429 + * software must display the following acknowledgment:
4430 + * "This product includes software developed by the OpenSSL Project
4431 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
4432 + *
4433 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
4434 + * endorse or promote products derived from this software without
4435 + * prior written permission. For written permission, please contact
4436 + * licensing@OpenSSL.org.
4437 + *
4438 + * 5. Products derived from this software may not be called "OpenSSL"
4439 + * nor may "OpenSSL" appear in their names without prior written
4440 + * permission of the OpenSSL Project.
4441 + *
4442 + * 6. Redistributions of any form whatsoever must retain the following
4443 + * acknowledgment:
4444 + * "This product includes software developed by the OpenSSL Project
4445 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
4446 + *
4447 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
4448 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
4449 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
4450 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
4451 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
4452 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
4453 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
4454 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
4455 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
4456 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
4457 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
4458 + * OF THE POSSIBILITY OF SUCH DAMAGE.
4459 + * ====================================================================
4460 + */
4461 +
4462 +/* This implementation of poly1305 is by Andrew Moon
4463 + * (https://github.com/floodyberry/poly1305-donna) and released as public
4464 + * domain. It implements SIMD vectorization based on the algorithm described in
4465 + * http://cr.yp.to/papers.html#neoncrypto. Unrolled to 2 powers, i.e. 64 byte
4466 + * block size
4467 +*/
4468 +
4469 +#include <emmintrin.h>
4470 +#include <stdint.h>
4471 +#include <openssl/opensslconf.h>
4472 +
4473 +#if !defined(OPENSSL_NO_POLY1305)
4474 +
4475 +#include <openssl/poly1305.h>
4476 +
4477 +#define ALIGN(x) __attribute__((aligned(x)))
4478 +#define INLINE inline
4479 +#define U8TO64_LE(m) (*(uint64_t*)(m))
4480 +#define U8TO32_LE(m) (*(uint32_t*)(m))
4481 +#define U64TO8_LE(m,v) (*(uint64_t*)(m)) = v
4482 +
4483 +typedef __m128i xmmi;
4484 +typedef unsigned __int128 uint128_t;
4485 +
4486 +static const uint32_t ALIGN(16) poly1305_x64_sse2_message_mask[4] =
4487 + {(1 << 26) - 1, 0, (1 << 26) - 1, 0};
4488 +static const uint32_t ALIGN(16) poly1305_x64_sse2_5[4] = {5, 0, 5, 0};
4489 +static const uint32_t ALIGN(16) poly1305_x64_sse2_1shl128[4] =
4490 + {(1 << 24), 0, (1 << 24), 0};
4491 +
4492 +static uint128_t INLINE
4493 +add128(uint128_t a, uint128_t b)
4494 + {
4495 + return a + b;
4496 + }
4497 +
4498 +static uint128_t INLINE
4499 +add128_64(uint128_t a, uint64_t b)
4500 + {
4501 + return a + b;
4502 + }
4503 +
4504 +static uint128_t INLINE
4505 +mul64x64_128(uint64_t a, uint64_t b)
4506 + {
4507 + return (uint128_t)a * b;
4508 + }
4509 +
4510 +static uint64_t INLINE
4511 +lo128(uint128_t a)
4512 + {
4513 + return (uint64_t)a;
4514 + }
4515 +
4516 +static uint64_t INLINE
4517 +shr128(uint128_t v, const int shift)
4518 + {
4519 + return (uint64_t)(v >> shift);
4520 + }
4521 +
4522 +static uint64_t INLINE
4523 +shr128_pair(uint64_t hi, uint64_t lo, const int shift)
4524 + {
4525 + return (uint64_t)((((uint128_t)hi << 64) | lo) >> shift);
4526 + }
4527 +
4528 +typedef struct poly1305_power_t
4529 + {
4530 + union
4531 + {
4532 + xmmi v;
4533 + uint64_t u[2];
4534 + uint32_t d[4];
4535 + } R20,R21,R22,R23,R24,S21,S22,S23,S24;
4536 + } poly1305_power;
4537 +
4538 +typedef struct poly1305_state_internal_t
4539 + {
4540 + poly1305_power P[2]; /* 288 bytes, top 32 bit halves unused = 144
4541 + bytes of free storage */
4542 + union
4543 + {
4544 + xmmi H[5]; /* 80 bytes */
4545 + uint64_t HH[10];
4546 + };
4547 + /* uint64_t r0,r1,r2; [24 bytes] */
4548 + /* uint64_t pad0,pad1; [16 bytes] */
4549 + uint64_t started; /* 8 bytes */
4550 + uint64_t leftover; /* 8 bytes */
4551 + uint8_t buffer[64]; /* 64 bytes */
4552 + } poly1305_state_internal; /* 448 bytes total + 63 bytes for
4553 + alignment = 511 bytes raw */
4554 +
4555 +static poly1305_state_internal INLINE
4556 +*poly1305_aligned_state(poly1305_state *state)
4557 + {
4558 + return (poly1305_state_internal *)(((uint64_t)state + 63) & ~63);
4559 + }
4560 +
4561 +/* copy 0-63 bytes */
4562 +static void INLINE
4563 +poly1305_block_copy(uint8_t *dst, const uint8_t *src, size_t bytes)
4564 + {
4565 + size_t offset = src - dst;
4566 + if (bytes & 32)
4567 + {
4568 + _mm_storeu_si128((xmmi *)(dst + 0), _mm_loadu_si128((xmmi *)(dst + offset + 0)));
4569 + _mm_storeu_si128((xmmi *)(dst + 16), _mm_loadu_si128((xmmi *)(ds t + offset + 16)));
4570 + dst += 32;
4571 + }
4572 + if (bytes & 16)
4573 + {
4574 + _mm_storeu_si128((xmmi *)dst,
4575 + _mm_loadu_si128((xmmi *)(dst + offset)));
4576 + dst += 16;
4577 + }
4578 + if (bytes & 8)
4579 + {
4580 + *(uint64_t *)dst = *(uint64_t *)(dst + offset);
4581 + dst += 8;
4582 + }
4583 + if (bytes & 4)
4584 + {
4585 + *(uint32_t *)dst = *(uint32_t *)(dst + offset);
4586 + dst += 4;
4587 + }
4588 + if (bytes & 2)
4589 + {
4590 + *(uint16_t *)dst = *(uint16_t *)(dst + offset);
4591 + dst += 2;
4592 + }
4593 + if (bytes & 1)
4594 + {
4595 + *( uint8_t *)dst = *( uint8_t *)(dst + offset);
4596 + }
4597 + }
4598 +
4599 +/* zero 0-15 bytes */
4600 +static void INLINE
4601 +poly1305_block_zero(uint8_t *dst, size_t bytes)
4602 + {
4603 + if (bytes & 8) { *(uint64_t *)dst = 0; dst += 8; }
4604 + if (bytes & 4) { *(uint32_t *)dst = 0; dst += 4; }
4605 + if (bytes & 2) { *(uint16_t *)dst = 0; dst += 2; }
4606 + if (bytes & 1) { *( uint8_t *)dst = 0; }
4607 + }
4608 +
4609 +static size_t INLINE
4610 +poly1305_min(size_t a, size_t b)
4611 + {
4612 + return (a < b) ? a : b;
4613 + }
4614 +
4615 +void
4616 +CRYPTO_poly1305_init(poly1305_state *state, const unsigned char key[32])
4617 + {
4618 + poly1305_state_internal *st = poly1305_aligned_state(state);
4619 + poly1305_power *p;
4620 + uint64_t r0,r1,r2;
4621 + uint64_t t0,t1;
4622 +
4623 + /* clamp key */
4624 + t0 = U8TO64_LE(key + 0);
4625 + t1 = U8TO64_LE(key + 8);
4626 + r0 = t0 & 0xffc0fffffff; t0 >>= 44; t0 |= t1 << 20;
4627 + r1 = t0 & 0xfffffc0ffff; t1 >>= 24;
4628 + r2 = t1 & 0x00ffffffc0f;
4629 +
4630 + /* store r in un-used space of st->P[1] */
4631 + p = &st->P[1];
4632 + p->R20.d[1] = (uint32_t)(r0 );
4633 + p->R20.d[3] = (uint32_t)(r0 >> 32);
4634 + p->R21.d[1] = (uint32_t)(r1 );
4635 + p->R21.d[3] = (uint32_t)(r1 >> 32);
4636 + p->R22.d[1] = (uint32_t)(r2 );
4637 + p->R22.d[3] = (uint32_t)(r2 >> 32);
4638 +
4639 + /* store pad */
4640 + p->R23.d[1] = U8TO32_LE(key + 16);
4641 + p->R23.d[3] = U8TO32_LE(key + 20);
4642 + p->R24.d[1] = U8TO32_LE(key + 24);
4643 + p->R24.d[3] = U8TO32_LE(key + 28);
4644 +
4645 + /* H = 0 */
4646 + st->H[0] = _mm_setzero_si128();
4647 + st->H[1] = _mm_setzero_si128();
4648 + st->H[2] = _mm_setzero_si128();
4649 + st->H[3] = _mm_setzero_si128();
4650 + st->H[4] = _mm_setzero_si128();
4651 +
4652 + st->started = 0;
4653 + st->leftover = 0;
4654 + }
4655 +
4656 +static void
4657 +poly1305_first_block(poly1305_state_internal *st, const uint8_t *m)
4658 + {
4659 + const xmmi MMASK =
4660 + _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
4661 + const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5);
4662 + const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128);
4663 + xmmi T5,T6;
4664 + poly1305_power *p;
4665 + uint128_t d[3];
4666 + uint64_t r0,r1,r2;
4667 + uint64_t r20,r21,r22,s22;
4668 + uint64_t pad0,pad1;
4669 + uint64_t c;
4670 + uint64_t i;
4671 +
4672 + /* pull out stored info */
4673 + p = &st->P[1];
4674 +
4675 + r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
4676 + r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
4677 + r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
4678 + pad0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1];
4679 + pad1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1];
4680 +
4681 + /* compute powers r^2,r^4 */
4682 + r20 = r0;
4683 + r21 = r1;
4684 + r22 = r2;
4685 + for (i = 0; i < 2; i++)
4686 + {
4687 + s22 = r22 * (5 << 2);
4688 +
4689 + d[0] = add128(mul64x64_128(r20, r20), mul64x64_128(r21 * 2, s22) );
4690 + d[1] = add128(mul64x64_128(r22, s22), mul64x64_128(r20 * 2, r21) );
4691 + d[2] = add128(mul64x64_128(r21, r21), mul64x64_128(r22 * 2, r20) );
4692 +
4693 + r20 = lo128(d[0]) & 0xfffffffffff; c = shr128(d[0], 44);
4694 + d[1] = add128_64(d[1], c); r21 = lo128(d[1]) & 0xfffffffffff; c = shr128(d[1], 44);
4695 + d[2] = add128_64(d[2], c); r22 = lo128(d[2]) & 0x3ffffffffff; c = shr128(d[2], 42);
4696 + r20 += c * 5; c = (r20 >> 44); r20 = r20 & 0xfffffffffff;
4697 + r21 += c;
4698 +
4699 + p->R20.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)( r20 ) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0));
4700 + p->R21.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r20 > > 26) | (r21 << 18)) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0));
4701 + p->R22.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 > > 8) ) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0));
4702 + p->R23.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 > > 34) | (r22 << 10)) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0));
4703 + p->R24.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r22 > > 16) ) ), _MM_SHUFFLE(1,0,1,0));
4704 + p->S21.v = _mm_mul_epu32(p->R21.v, FIVE);
4705 + p->S22.v = _mm_mul_epu32(p->R22.v, FIVE);
4706 + p->S23.v = _mm_mul_epu32(p->R23.v, FIVE);
4707 + p->S24.v = _mm_mul_epu32(p->R24.v, FIVE);
4708 + p--;
4709 + }
4710 +
4711 + /* put saved info back */
4712 + p = &st->P[1];
4713 + p->R20.d[1] = (uint32_t)(r0 );
4714 + p->R20.d[3] = (uint32_t)(r0 >> 32);
4715 + p->R21.d[1] = (uint32_t)(r1 );
4716 + p->R21.d[3] = (uint32_t)(r1 >> 32);
4717 + p->R22.d[1] = (uint32_t)(r2 );
4718 + p->R22.d[3] = (uint32_t)(r2 >> 32);
4719 + p->R23.d[1] = (uint32_t)(pad0 );
4720 + p->R23.d[3] = (uint32_t)(pad0 >> 32);
4721 + p->R24.d[1] = (uint32_t)(pad1 );
4722 + p->R24.d[3] = (uint32_t)(pad1 >> 32);
4723 +
4724 + /* H = [Mx,My] */
4725 + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi6 4((xmmi *)(m + 16)));
4726 + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi6 4((xmmi *)(m + 24)));
4727 + st->H[0] = _mm_and_si128(MMASK, T5);
4728 + st->H[1] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
4729 + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
4730 + st->H[2] = _mm_and_si128(MMASK, T5);
4731 + st->H[3] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
4732 + st->H[4] = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
4733 + }
4734 +
4735 +static void
4736 +poly1305_blocks(poly1305_state_internal *st, const uint8_t *m, size_t bytes)
4737 + {
4738 + const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask );
4739 + const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5);
4740 + const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128);
4741 +
4742 + poly1305_power *p;
4743 + xmmi H0,H1,H2,H3,H4;
4744 + xmmi T0,T1,T2,T3,T4,T5,T6;
4745 + xmmi M0,M1,M2,M3,M4;
4746 + xmmi C1,C2;
4747 +
4748 + H0 = st->H[0];
4749 + H1 = st->H[1];
4750 + H2 = st->H[2];
4751 + H3 = st->H[3];
4752 + H4 = st->H[4];
4753 +
4754 + while (bytes >= 64)
4755 + {
4756 + /* H *= [r^4,r^4] */
4757 + p = &st->P[0];
4758 + T0 = _mm_mul_epu32(H0, p->R20.v);
4759 + T1 = _mm_mul_epu32(H0, p->R21.v);
4760 + T2 = _mm_mul_epu32(H0, p->R22.v);
4761 + T3 = _mm_mul_epu32(H0, p->R23.v);
4762 + T4 = _mm_mul_epu32(H0, p->R24.v);
4763 + T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4764 + T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4765 + T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4766 + T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4767 + T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4768 + T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4769 + T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4770 + T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4771 + T5 = _mm_mul_epu32(H1, p->R23.v); T4 = _mm_add_epi64(T4, T5);
4772 + T5 = _mm_mul_epu32(H2, p->R22.v); T4 = _mm_add_epi64(T4, T5);
4773 + T5 = _mm_mul_epu32(H3, p->R21.v); T4 = _mm_add_epi64(T4, T5);
4774 + T5 = _mm_mul_epu32(H4, p->R20.v); T4 = _mm_add_epi64(T4, T5);
4775 +
4776 + /* H += [Mx,My]*[r^2,r^2] */
4777 + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_lo adl_epi64((xmmi *)(m + 16)));
4778 + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_lo adl_epi64((xmmi *)(m + 24)));
4779 + M0 = _mm_and_si128(MMASK, T5);
4780 + M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
4781 + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12) );
4782 + M2 = _mm_and_si128(MMASK, T5);
4783 + M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
4784 + M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
4785 +
4786 + p = &st->P[1];
4787 + T5 = _mm_mul_epu32(M0, p->R20.v); T6 = _mm_mul_epu32(M0, p->R21. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4788 + T5 = _mm_mul_epu32(M1, p->S24.v); T6 = _mm_mul_epu32(M1, p->R20. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4789 + T5 = _mm_mul_epu32(M2, p->S23.v); T6 = _mm_mul_epu32(M2, p->S24. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4790 + T5 = _mm_mul_epu32(M3, p->S22.v); T6 = _mm_mul_epu32(M3, p->S23. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4791 + T5 = _mm_mul_epu32(M4, p->S21.v); T6 = _mm_mul_epu32(M4, p->S22. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4792 + T5 = _mm_mul_epu32(M0, p->R22.v); T6 = _mm_mul_epu32(M0, p->R23. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4793 + T5 = _mm_mul_epu32(M1, p->R21.v); T6 = _mm_mul_epu32(M1, p->R22. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4794 + T5 = _mm_mul_epu32(M2, p->R20.v); T6 = _mm_mul_epu32(M2, p->R21. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4795 + T5 = _mm_mul_epu32(M3, p->S24.v); T6 = _mm_mul_epu32(M3, p->R20. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4796 + T5 = _mm_mul_epu32(M4, p->S23.v); T6 = _mm_mul_epu32(M4, p->S24. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4797 + T5 = _mm_mul_epu32(M0, p->R24.v); T4 = _mm_add_epi64(T4, T5);
4798 + T5 = _mm_mul_epu32(M1, p->R23.v); T4 = _mm_add_epi64(T4, T5);
4799 + T5 = _mm_mul_epu32(M2, p->R22.v); T4 = _mm_add_epi64(T4, T5);
4800 + T5 = _mm_mul_epu32(M3, p->R21.v); T4 = _mm_add_epi64(T4, T5);
4801 + T5 = _mm_mul_epu32(M4, p->R20.v); T4 = _mm_add_epi64(T4, T5);
4802 +
4803 + /* H += [Mx,My] */
4804 + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 32)), _mm_l oadl_epi64((xmmi *)(m + 48)));
4805 + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 40)), _mm_l oadl_epi64((xmmi *)(m + 56)));
4806 + M0 = _mm_and_si128(MMASK, T5);
4807 + M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
4808 + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12) );
4809 + M2 = _mm_and_si128(MMASK, T5);
4810 + M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
4811 + M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
4812 +
4813 + T0 = _mm_add_epi64(T0, M0);
4814 + T1 = _mm_add_epi64(T1, M1);
4815 + T2 = _mm_add_epi64(T2, M2);
4816 + T3 = _mm_add_epi64(T3, M3);
4817 + T4 = _mm_add_epi64(T4, M4);
4818 +
4819 + /* reduce */
4820 + C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _ mm_and_si128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C 1); T4 = _mm_add_epi64(T4, C2);
4821 + C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _ mm_and_si128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C 1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
4822 + C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _ mm_and_si128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C 1); T1 = _mm_add_epi64(T1, C2);
4823 + C1 = _mm_srli_epi64(T3, 26); T3 = _ mm_and_si128(T3, MMASK); T4 = _mm_add_epi64(T4, C 1);
4824 +
4825 + /* H = (H*[r^4,r^4] + [Mx,My]*[r^2,r^2] + [Mx,My]) */
4826 + H0 = T0;
4827 + H1 = T1;
4828 + H2 = T2;
4829 + H3 = T3;
4830 + H4 = T4;
4831 +
4832 + m += 64;
4833 + bytes -= 64;
4834 + }
4835 +
4836 + st->H[0] = H0;
4837 + st->H[1] = H1;
4838 + st->H[2] = H2;
4839 + st->H[3] = H3;
4840 + st->H[4] = H4;
4841 + }
4842 +
4843 +static size_t
4844 +poly1305_combine(poly1305_state_internal *st, const uint8_t *m, size_t bytes)
4845 + {
4846 + const xmmi MMASK =
4847 + _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
4848 + const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128);
4849 + const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5);
4850 +
4851 + poly1305_power *p;
4852 + xmmi H0,H1,H2,H3,H4;
4853 + xmmi M0,M1,M2,M3,M4;
4854 + xmmi T0,T1,T2,T3,T4,T5,T6;
4855 + xmmi C1,C2;
4856 +
4857 + uint64_t r0,r1,r2;
4858 + uint64_t t0,t1,t2,t3,t4;
4859 + uint64_t c;
4860 + size_t consumed = 0;
4861 +
4862 + H0 = st->H[0];
4863 + H1 = st->H[1];
4864 + H2 = st->H[2];
4865 + H3 = st->H[3];
4866 + H4 = st->H[4];
4867 +
4868 + /* p = [r^2,r^2] */
4869 + p = &st->P[1];
4870 +
4871 + if (bytes >= 32)
4872 + {
4873 + /* H *= [r^2,r^2] */
4874 + T0 = _mm_mul_epu32(H0, p->R20.v);
4875 + T1 = _mm_mul_epu32(H0, p->R21.v);
4876 + T2 = _mm_mul_epu32(H0, p->R22.v);
4877 + T3 = _mm_mul_epu32(H0, p->R23.v);
4878 + T4 = _mm_mul_epu32(H0, p->R24.v);
4879 + T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4880 + T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4881 + T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4882 + T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4883 + T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4884 + T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4885 + T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4886 + T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4887 + T5 = _mm_mul_epu32(H1, p->R23.v); T4 = _mm_add_epi64(T4, T5);
4888 + T5 = _mm_mul_epu32(H2, p->R22.v); T4 = _mm_add_epi64(T4, T5);
4889 + T5 = _mm_mul_epu32(H3, p->R21.v); T4 = _mm_add_epi64(T4, T5);
4890 + T5 = _mm_mul_epu32(H4, p->R20.v); T4 = _mm_add_epi64(T4, T5);
4891 +
4892 + /* H += [Mx,My] */
4893 + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_lo adl_epi64((xmmi *)(m + 16)));
4894 + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_lo adl_epi64((xmmi *)(m + 24)));
4895 + M0 = _mm_and_si128(MMASK, T5);
4896 + M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
4897 + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12) );
4898 + M2 = _mm_and_si128(MMASK, T5);
4899 + M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
4900 + M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
4901 +
4902 + T0 = _mm_add_epi64(T0, M0);
4903 + T1 = _mm_add_epi64(T1, M1);
4904 + T2 = _mm_add_epi64(T2, M2);
4905 + T3 = _mm_add_epi64(T3, M3);
4906 + T4 = _mm_add_epi64(T4, M4);
4907 +
4908 + /* reduce */
4909 + C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _ mm_and_si128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C 1); T4 = _mm_add_epi64(T4, C2);
4910 + C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _ mm_and_si128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C 1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
4911 + C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _ mm_and_si128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C 1); T1 = _mm_add_epi64(T1, C2);
4912 + C1 = _mm_srli_epi64(T3, 26); T3 = _ mm_and_si128(T3, MMASK); T4 = _mm_add_epi64(T4, C 1);
4913 +
4914 + /* H = (H*[r^2,r^2] + [Mx,My]) */
4915 + H0 = T0;
4916 + H1 = T1;
4917 + H2 = T2;
4918 + H3 = T3;
4919 + H4 = T4;
4920 +
4921 + consumed = 32;
4922 + }
4923 +
4924 + /* finalize, H *= [r^2,r] */
4925 + r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
4926 + r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
4927 + r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
4928 +
4929 + p->R20.d[2] = (uint32_t)( r0 ) & 0x3ffffff;
4930 + p->R21.d[2] = (uint32_t)((r0 >> 26) | (r1 << 18)) & 0x3ffffff;
4931 + p->R22.d[2] = (uint32_t)((r1 >> 8) ) & 0x3ffffff;
4932 + p->R23.d[2] = (uint32_t)((r1 >> 34) | (r2 << 10)) & 0x3ffffff;
4933 + p->R24.d[2] = (uint32_t)((r2 >> 16) ) ;
4934 + p->S21.d[2] = p->R21.d[2] * 5;
4935 + p->S22.d[2] = p->R22.d[2] * 5;
4936 + p->S23.d[2] = p->R23.d[2] * 5;
4937 + p->S24.d[2] = p->R24.d[2] * 5;
4938 +
4939 + /* H *= [r^2,r] */
4940 + T0 = _mm_mul_epu32(H0, p->R20.v);
4941 + T1 = _mm_mul_epu32(H0, p->R21.v);
4942 + T2 = _mm_mul_epu32(H0, p->R22.v);
4943 + T3 = _mm_mul_epu32(H0, p->R23.v);
4944 + T4 = _mm_mul_epu32(H0, p->R24.v);
4945 + T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4946 + T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4947 + T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4948 + T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4949 + T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4950 + T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4951 + T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4952 + T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4953 + T5 = _mm_mul_epu32(H1, p->R23.v); T4 = _mm_add_epi64(T4, T5);
4954 + T5 = _mm_mul_epu32(H2, p->R22.v); T4 = _mm_add_epi64(T4, T5);
4955 + T5 = _mm_mul_epu32(H3, p->R21.v); T4 = _mm_add_epi64(T4, T5);
4956 + T5 = _mm_mul_epu32(H4, p->R20.v); T4 = _mm_add_epi64(T4, T5);
4957 +
4958 + C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _mm_and_s i128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C1); T4 = _mm_add_epi64(T4, C2);
4959 + C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _mm_and_s i128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
4960 + C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _mm_and_s i128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C1); T1 = _mm_add_epi64(T1, C2);
4961 + C1 = _mm_srli_epi64(T3, 26); T3 = _mm_and_s i128(T3, MMASK); T4 = _mm_add_epi64(T4, C1);
4962 +
4963 + /* H = H[0]+H[1] */
4964 + H0 = _mm_add_epi64(T0, _mm_srli_si128(T0, 8));
4965 + H1 = _mm_add_epi64(T1, _mm_srli_si128(T1, 8));
4966 + H2 = _mm_add_epi64(T2, _mm_srli_si128(T2, 8));
4967 + H3 = _mm_add_epi64(T3, _mm_srli_si128(T3, 8));
4968 + H4 = _mm_add_epi64(T4, _mm_srli_si128(T4, 8));
4969 +
4970 + t0 = _mm_cvtsi128_si32(H0) ; c = (t0 >> 26); t0 &= 0x3ffffff;
4971 + t1 = _mm_cvtsi128_si32(H1) + c; c = (t1 >> 26); t1 &= 0x3ffffff;
4972 + t2 = _mm_cvtsi128_si32(H2) + c; c = (t2 >> 26); t2 &= 0x3ffffff;
4973 + t3 = _mm_cvtsi128_si32(H3) + c; c = (t3 >> 26); t3 &= 0x3ffffff;
4974 + t4 = _mm_cvtsi128_si32(H4) + c; c = (t4 >> 26); t4 &= 0x3ffffff;
4975 + t0 = t0 + (c * 5); c = (t0 >> 26); t0 &= 0x3ffffff;
4976 + t1 = t1 + c;
4977 +
4978 + st->HH[0] = ((t0 ) | (t1 << 26) ) & 0xfffffffffffull;
4979 + st->HH[1] = ((t1 >> 18) | (t2 << 8) | (t3 << 34)) & 0xfffffffffffull;
4980 + st->HH[2] = ((t3 >> 10) | (t4 << 16) ) & 0x3ffffffffffull;
4981 +
4982 + return consumed;
4983 + }
4984 +
4985 +void
4986 +CRYPTO_poly1305_update(poly1305_state *state, const unsigned char *m,
4987 + size_t bytes)
4988 + {
4989 + poly1305_state_internal *st = poly1305_aligned_state(state);
4990 + size_t want;
4991 +
4992 + /* need at least 32 initial bytes to start the accelerated branch */
4993 + if (!st->started)
4994 + {
4995 + if ((st->leftover == 0) && (bytes > 32))
4996 + {
4997 + poly1305_first_block(st, m);
4998 + m += 32;
4999 + bytes -= 32;
5000 + }
5001 + else
5002 + {
5003 + want = poly1305_min(32 - st->leftover, bytes);
5004 + poly1305_block_copy(st->buffer + st->leftover, m, want);
5005 + bytes -= want;
5006 + m += want;
5007 + st->leftover += want;
5008 + if ((st->leftover < 32) || (bytes == 0))
5009 + return;
5010 + poly1305_first_block(st, st->buffer);
5011 + st->leftover = 0;
5012 + }
5013 + st->started = 1;
5014 + }
5015 +
5016 + /* handle leftover */
5017 + if (st->leftover)
5018 + {
5019 + want = poly1305_min(64 - st->leftover, bytes);
5020 + poly1305_block_copy(st->buffer + st->leftover, m, want);
5021 + bytes -= want;
5022 + m += want;
5023 + st->leftover += want;
5024 + if (st->leftover < 64)
5025 + return;
5026 + poly1305_blocks(st, st->buffer, 64);
5027 + st->leftover = 0;
5028 + }
5029 +
5030 + /* process 64 byte blocks */
5031 + if (bytes >= 64)
5032 + {
5033 + want = (bytes & ~63);
5034 + poly1305_blocks(st, m, want);
5035 + m += want;
5036 + bytes -= want;
5037 + }
5038 +
5039 + if (bytes)
5040 + {
5041 + poly1305_block_copy(st->buffer + st->leftover, m, bytes);
5042 + st->leftover += bytes;
5043 + }
5044 + }
5045 +
5046 +void
5047 +CRYPTO_poly1305_finish(poly1305_state *state, unsigned char mac[16])
5048 + {
5049 + poly1305_state_internal *st = poly1305_aligned_state(state);
5050 + size_t leftover = st->leftover;
5051 + uint8_t *m = st->buffer;
5052 + uint128_t d[3];
5053 + uint64_t h0,h1,h2;
5054 + uint64_t t0,t1;
5055 + uint64_t g0,g1,g2,c,nc;
5056 + uint64_t r0,r1,r2,s1,s2;
5057 + poly1305_power *p;
5058 +
5059 + if (st->started)
5060 + {
5061 + size_t consumed = poly1305_combine(st, m, leftover);
5062 + leftover -= consumed;
5063 + m += consumed;
5064 + }
5065 +
5066 + /* st->HH will either be 0 or have the combined result */
5067 + h0 = st->HH[0];
5068 + h1 = st->HH[1];
5069 + h2 = st->HH[2];
5070 +
5071 + p = &st->P[1];
5072 + r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
5073 + r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
5074 + r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
5075 + s1 = r1 * (5 << 2);
5076 + s2 = r2 * (5 << 2);
5077 +
5078 + if (leftover < 16)
5079 + goto poly1305_donna_atmost15bytes;
5080 +
5081 +poly1305_donna_atleast16bytes:
5082 + t0 = U8TO64_LE(m + 0);
5083 + t1 = U8TO64_LE(m + 8);
5084 + h0 += t0 & 0xfffffffffff;
5085 + t0 = shr128_pair(t1, t0, 44);
5086 + h1 += t0 & 0xfffffffffff;
5087 + h2 += (t1 >> 24) | ((uint64_t)1 << 40);
5088 +
5089 +poly1305_donna_mul:
5090 + d[0] = add128(add128(mul64x64_128(h0, r0), mul64x64_128(h1, s2)), mul64x 64_128(h2, s1));
5091 + d[1] = add128(add128(mul64x64_128(h0, r1), mul64x64_128(h1, r0)), mul64x 64_128(h2, s2));
5092 + d[2] = add128(add128(mul64x64_128(h0, r2), mul64x64_128(h1, r1)), mul64x 64_128(h2, r0));
5093 + h0 = lo128(d[0]) & 0xfffffffffff; c = shr128( d[0], 44);
5094 + d[1] = add128_64(d[1], c); h1 = lo128(d[1]) & 0xfffffffffff; c = shr128( d[1], 44);
5095 + d[2] = add128_64(d[2], c); h2 = lo128(d[2]) & 0x3ffffffffff; c = shr128( d[2], 42);
5096 + h0 += c * 5;
5097 +
5098 + m += 16;
5099 + leftover -= 16;
5100 + if (leftover >= 16) goto poly1305_donna_atleast16bytes;
5101 +
5102 + /* final bytes */
5103 +poly1305_donna_atmost15bytes:
5104 + if (!leftover) goto poly1305_donna_finish;
5105 +
5106 + m[leftover++] = 1;
5107 + poly1305_block_zero(m + leftover, 16 - leftover);
5108 + leftover = 16;
5109 +
5110 + t0 = U8TO64_LE(m+0);
5111 + t1 = U8TO64_LE(m+8);
5112 + h0 += t0 & 0xfffffffffff; t0 = shr128_pair(t1, t0, 44);
5113 + h1 += t0 & 0xfffffffffff;
5114 + h2 += (t1 >> 24);
5115 +
5116 + goto poly1305_donna_mul;
5117 +
5118 +poly1305_donna_finish:
5119 + c = (h0 >> 44); h0 &= 0xfffffffffff;
5120 + h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff;
5121 + h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff;
5122 + h0 += c * 5;
5123 +
5124 + g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff;
5125 + g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff;
5126 + g2 = h2 + c - ((uint64_t)1 << 42);
5127 +
5128 + c = (g2 >> 63) - 1;
5129 + nc = ~c;
5130 + h0 = (h0 & nc) | (g0 & c);
5131 + h1 = (h1 & nc) | (g1 & c);
5132 + h2 = (h2 & nc) | (g2 & c);
5133 +
5134 + /* pad */
5135 + t0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1];
5136 + t1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1];
5137 + h0 += (t0 & 0xfffffffffff) ; c = (h0 >> 44); h0 &= 0xfffffffffff; t0 = shr128_pair(t1, t0, 44);
5138 + h1 += (t0 & 0xfffffffffff) + c; c = (h1 >> 44); h1 &= 0xfffffffffff; t1 = (t1 >> 24);
5139 + h2 += (t1 ) + c;
5140 +
5141 + U64TO8_LE(mac + 0, ((h0 ) | (h1 << 44)));
5142 + U64TO8_LE(mac + 8, ((h1 >> 20) | (h2 << 24)));
5143 + }
5144 +
5145 +#endif /* !OPENSSL_NO_POLY1305 */
5146 diff --git a/crypto/poly1305/poly1305test.c b/crypto/poly1305/poly1305test.c
5147 new file mode 100644
5148 index 0000000..8dd26af
5149 --- /dev/null
5150 +++ b/crypto/poly1305/poly1305test.c
5151 @@ -0,0 +1,166 @@
5152 +/* ====================================================================
5153 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved.
5154 + *
5155 + * Redistribution and use in source and binary forms, with or without
5156 + * modification, are permitted provided that the following conditions
5157 + * are met:
5158 + *
5159 + * 1. Redistributions of source code must retain the above copyright
5160 + * notice, this list of conditions and the following disclaimer.
5161 + *
5162 + * 2. Redistributions in binary form must reproduce the above copyright
5163 + * notice, this list of conditions and the following disclaimer in
5164 + * the documentation and/or other materials provided with the
5165 + * distribution.
5166 + *
5167 + * 3. All advertising materials mentioning features or use of this
5168 + * software must display the following acknowledgment:
5169 + * "This product includes software developed by the OpenSSL Project
5170 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
5171 + *
5172 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
5173 + * endorse or promote products derived from this software without
5174 + * prior written permission. For written permission, please contact
5175 + * licensing@OpenSSL.org.
5176 + *
5177 + * 5. Products derived from this software may not be called "OpenSSL"
5178 + * nor may "OpenSSL" appear in their names without prior written
5179 + * permission of the OpenSSL Project.
5180 + *
5181 + * 6. Redistributions of any form whatsoever must retain the following
5182 + * acknowledgment:
5183 + * "This product includes software developed by the OpenSSL Project
5184 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
5185 + *
5186 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
5187 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
5188 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
5189 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
5190 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
5191 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
5192 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
5193 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
5194 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
5195 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
5196 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
5197 + * OF THE POSSIBILITY OF SUCH DAMAGE.
5198 + * ====================================================================
5199 + */
5200 +
5201 +#include <stdio.h>
5202 +#include <stdlib.h>
5203 +#include <string.h>
5204 +
5205 +#include <openssl/poly1305.h>
5206 +
5207 +struct poly1305_test
5208 + {
5209 + const char *inputhex;
5210 + const char *keyhex;
5211 + const char *outhex;
5212 + };
5213 +
5214 +static const struct poly1305_test poly1305_tests[] = {
5215 + {
5216 + "",
5217 + "c8afaac331ee372cd6082de134943b174710130e9f6fea8d72293850a667d86 c",
5218 + "4710130e9f6fea8d72293850a667d86c",
5219 + },
5220 + {
5221 + "48656c6c6f20776f726c6421",
5222 + "746869732069732033322d62797465206b657920666f7220506f6c793133303 5",
5223 + "a6f745008f81c916a20dcc74eef2b2f0",
5224 + },
5225 + {
5226 + "000000000000000000000000000000000000000000000000000000000000000 0",
5227 + "746869732069732033322d62797465206b657920666f7220506f6c793133303 5",
5228 + "49ec78090e481ec6c26b33b91ccc0307",
5229 + },
5230 + {
5231 + "000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000",
5232 + "746869732069732033322d62797465206b657920666f7220506f6c793133303 5",
5233 + "da84bcab02676c38cdb015604274c2aa",
5234 + },
5235 +};
5236 +
5237 +static unsigned char hex_digit(char h)
5238 + {
5239 + if (h >= '0' && h <= '9')
5240 + return h - '0';
5241 + else if (h >= 'a' && h <= 'f')
5242 + return h - 'a' + 10;
5243 + else if (h >= 'A' && h <= 'F')
5244 + return h - 'A' + 10;
5245 + else
5246 + abort();
5247 + }
5248 +
5249 +static void hex_decode(unsigned char *out, const char* hex)
5250 + {
5251 + size_t j = 0;
5252 +
5253 + while (*hex != 0)
5254 + {
5255 + unsigned char v = hex_digit(*hex++);
5256 + v <<= 4;
5257 + v |= hex_digit(*hex++);
5258 + out[j++] = v;
5259 + }
5260 + }
5261 +
5262 +static void hexdump(unsigned char *a, size_t len)
5263 + {
5264 + size_t i;
5265 +
5266 + for (i = 0; i < len; i++)
5267 + printf("%02x", a[i]);
5268 + }
5269 +
5270 +int main()
5271 + {
5272 + static const unsigned num_tests =
5273 + sizeof(poly1305_tests) / sizeof(struct poly1305_test);
5274 + unsigned i;
5275 + unsigned char key[32], out[16], expected[16];
5276 + poly1305_state poly1305;
5277 +
5278 + for (i = 0; i < num_tests; i++)
5279 + {
5280 + const struct poly1305_test *test = &poly1305_tests[i];
5281 + unsigned char *in;
5282 + size_t inlen = strlen(test->inputhex);
5283 +
5284 + if (strlen(test->keyhex) != sizeof(key)*2 ||
5285 + strlen(test->outhex) != sizeof(out)*2 ||
5286 + (inlen & 1) == 1)
5287 + return 1;
5288 +
5289 + inlen /= 2;
5290 +
5291 + hex_decode(key, test->keyhex);
5292 + hex_decode(expected, test->outhex);
5293 +
5294 + in = malloc(inlen);
5295 +
5296 + hex_decode(in, test->inputhex);
5297 + CRYPTO_poly1305_init(&poly1305, key);
5298 + CRYPTO_poly1305_update(&poly1305, in, inlen);
5299 + CRYPTO_poly1305_finish(&poly1305, out);
5300 +
5301 + if (memcmp(out, expected, sizeof(expected)) != 0)
5302 + {
5303 + printf("Poly1305 test #%d failed.\n", i);
5304 + printf("got: ");
5305 + hexdump(out, sizeof(out));
5306 + printf("\nexpected: ");
5307 + hexdump(expected, sizeof(expected));
5308 + printf("\n");
5309 + return 1;
5310 + }
5311 +
5312 + free(in);
5313 + }
5314 +
5315 + printf("PASS\n");
5316 + return 0;
5317 + }
5318 diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c
5319 index 75b6560..a042b8d 100644
5320 --- a/ssl/s3_lib.c
5321 +++ b/ssl/s3_lib.c
5322 @@ -1841,7 +1841,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5323 SSL_AEAD,
5324 SSL_TLSV1_2,
5325 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
5326 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4),
5327 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4)|
5328 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD,
5329 128,
5330 128,
5331 },
5332 @@ -1873,7 +1874,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5333 SSL_AEAD,
5334 SSL_TLSV1_2,
5335 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
5336 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4),
5337 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4)|
5338 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD,
5339 128,
5340 128,
5341 },
5342 @@ -1905,7 +1907,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5343 SSL_AEAD,
5344 SSL_TLSV1_2,
5345 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
5346 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4),
5347 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4)|
5348 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD,
5349 128,
5350 128,
5351 },
5352 @@ -1937,7 +1940,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5353 SSL_AEAD,
5354 SSL_TLSV1_2,
5355 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
5356 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4),
5357 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4)|
5358 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD,
5359 128,
5360 128,
5361 },
5362 @@ -1969,7 +1973,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5363 SSL_AEAD,
5364 SSL_TLSV1_2,
5365 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
5366 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4),
5367 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4)|
5368 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD,
5369 128,
5370 128,
5371 },
5372 @@ -2001,7 +2006,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5373 SSL_AEAD,
5374 SSL_TLSV1_2,
5375 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
5376 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4),
5377 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4)|
5378 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD,
5379 128,
5380 128,
5381 },
5382 @@ -2714,7 +2720,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5383 SSL_AEAD,
5384 SSL_TLSV1_2,
5385 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
5386 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4),
5387 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4)|
5388 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD,
5389 128,
5390 128,
5391 },
5392 @@ -2746,7 +2753,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5393 SSL_AEAD,
5394 SSL_TLSV1_2,
5395 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
5396 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4),
5397 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4)|
5398 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD,
5399 128,
5400 128,
5401 },
5402 @@ -2778,7 +2786,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5403 SSL_AEAD,
5404 SSL_TLSV1_2,
5405 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
5406 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4),
5407 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4)|
5408 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD,
5409 128,
5410 128,
5411 },
5412 @@ -2810,7 +2819,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5413 SSL_AEAD,
5414 SSL_TLSV1_2,
5415 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
5416 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4),
5417 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4)|
5418 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD,
5419 128,
5420 128,
5421 },
5422 @@ -2894,6 +2904,51 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5423 },
5424 #endif
5425
5426 + {
5427 + 1,
5428 + TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305,
5429 + TLS1_CK_ECDHE_RSA_CHACHA20_POLY1305,
5430 + SSL_kEECDH,
5431 + SSL_aRSA,
5432 + SSL_CHACHA20POLY1305,
5433 + SSL_AEAD,
5434 + SSL_TLSV1_2,
5435 + SSL_NOT_EXP|SSL_HIGH,
5436 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(0),
5437 + 256,
5438 + 0,
5439 + },
5440 +
5441 + {
5442 + 1,
5443 + TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,
5444 + TLS1_CK_ECDHE_ECDSA_CHACHA20_POLY1305,
5445 + SSL_kEECDH,
5446 + SSL_aECDSA,
5447 + SSL_CHACHA20POLY1305,
5448 + SSL_AEAD,
5449 + SSL_TLSV1_2,
5450 + SSL_NOT_EXP|SSL_HIGH,
5451 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(0),
5452 + 256,
5453 + 0,
5454 + },
5455 +
5456 + {
5457 + 1,
5458 + TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305,
5459 + TLS1_CK_DHE_RSA_CHACHA20_POLY1305,
5460 + SSL_kEDH,
5461 + SSL_aRSA,
5462 + SSL_CHACHA20POLY1305,
5463 + SSL_AEAD,
5464 + SSL_TLSV1_2,
5465 + SSL_NOT_EXP|SSL_HIGH,
5466 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(0),
5467 + 256,
5468 + 0,
5469 + },
5470 +
5471 /* end of list */
5472 };
5473
5474 diff --git a/ssl/s3_pkt.c b/ssl/s3_pkt.c
5475 index 5038f6c..04b474d 100644
5476 --- a/ssl/s3_pkt.c
5477 +++ b/ssl/s3_pkt.c
5478 @@ -790,8 +790,11 @@ static int do_ssl3_write(SSL *s, int type, const unsigned c har *buf,
5479 else
5480 eivlen = 0;
5481 }
5482 - else if (s->aead_write_ctx != NULL)
5483 + else if (s->aead_write_ctx != NULL &&
5484 + s->aead_write_ctx->variable_nonce_included_in_record)
5485 + {
5486 eivlen = s->aead_write_ctx->variable_nonce_len;
5487 + }
5488 else
5489 eivlen = 0;
5490
5491 diff --git a/ssl/ssl.h b/ssl/ssl.h
5492 index 0644cbf..d782a98 100644
5493 --- a/ssl/ssl.h
5494 +++ b/ssl/ssl.h
5495 @@ -291,6 +291,7 @@ extern "C" {
5496 #define SSL_TXT_CAMELLIA128 "CAMELLIA128"
5497 #define SSL_TXT_CAMELLIA256 "CAMELLIA256"
5498 #define SSL_TXT_CAMELLIA "CAMELLIA"
5499 +#define SSL_TXT_CHACHA20 "CHACHA20"
5500
5501 #define SSL_TXT_MD5 "MD5"
5502 #define SSL_TXT_SHA1 "SHA1"
5503 diff --git a/ssl/ssl_ciph.c b/ssl/ssl_ciph.c
5504 index 7e780cd..b6370bd 100644
5505 --- a/ssl/ssl_ciph.c
5506 +++ b/ssl/ssl_ciph.c
5507 @@ -298,6 +298,7 @@ static const SSL_CIPHER cipher_aliases[]={
5508 {0,SSL_TXT_CAMELLIA128,0,0,0,SSL_CAMELLIA128,0,0,0,0,0,0},
5509 {0,SSL_TXT_CAMELLIA256,0,0,0,SSL_CAMELLIA256,0,0,0,0,0,0},
5510 {0,SSL_TXT_CAMELLIA ,0,0,0,SSL_CAMELLIA128|SSL_CAMELLIA256,0,0,0,0,0,0 },
5511 + {0,SSL_TXT_CHACHA20 ,0,0,0,SSL_CHACHA20POLY1305,0,0,0,0,0,0},
5512
5513 /* MAC aliases */
5514 {0,SSL_TXT_MD5,0, 0,0,0,SSL_MD5, 0,0,0,0,0},
5515 @@ -523,9 +524,15 @@ int ssl_cipher_get_evp_aead(const SSL_SESSION *s, const EVP _AEAD **aead)
5516 return 0;
5517
5518 #ifndef OPENSSL_NO_AES
5519 - /* There is only one AEAD for now. */
5520 - *aead = EVP_aead_aes_128_gcm();
5521 - return 1;
5522 + switch (c->algorithm_enc)
5523 + {
5524 + case SSL_AES128GCM:
5525 + *aead = EVP_aead_aes_128_gcm();
5526 + return 1;
5527 + case SSL_CHACHA20POLY1305:
5528 + *aead = EVP_aead_chacha20_poly1305();
5529 + return 1;
5530 + }
5531 #endif
5532
5533 return 0;
5534 @@ -1715,6 +1722,9 @@ char *SSL_CIPHER_description(const SSL_CIPHER *cipher, cha r *buf, int len)
5535 case SSL_SEED:
5536 enc="SEED(128)";
5537 break;
5538 + case SSL_CHACHA20POLY1305:
5539 + enc="ChaCha20-Poly1305";
5540 + break;
5541 default:
5542 enc="unknown";
5543 break;
5544 diff --git a/ssl/ssl_locl.h b/ssl/ssl_locl.h
5545 index 63bc28b..b83d8cd 100644
5546 --- a/ssl/ssl_locl.h
5547 +++ b/ssl/ssl_locl.h
5548 @@ -328,6 +328,7 @@
5549 #define SSL_SEED 0x00000800L
5550 #define SSL_AES128GCM 0x00001000L
5551 #define SSL_AES256GCM 0x00002000L
5552 +#define SSL_CHACHA20POLY1305 0x00004000L
5553
5554 #define SSL_AES (SSL_AES128|SSL_AES256|SSL_AES128GCM|SSL _AES256GCM)
5555 #define SSL_CAMELLIA (SSL_CAMELLIA128|SSL_CAMELLIA256)
5556 @@ -389,6 +390,12 @@
5557 #define SSL_CIPHER_AEAD_FIXED_NONCE_LEN(ssl_cipher) \
5558 (((ssl_cipher->algorithm2 >> 24) & 0xf)*2)
5559
5560 +/* SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD is a flag in
5561 + * SSL_CIPHER.algorithm2 which indicates that the variable part of the nonce is
5562 + * included as a prefix of the record. (AES-GCM, for example, does with with an
5563 + * 8-byte variable nonce.) */
5564 +#define SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD (1<<22)
5565 +
5566 /*
5567 * Export and cipher strength information. For each cipher we have to decide
5568 * whether it is exportable or not. This information is likely to change
5569 @@ -605,6 +612,9 @@ struct ssl_aead_ctx_st
5570 * records. */
5571 unsigned char fixed_nonce[8];
5572 unsigned char fixed_nonce_len, variable_nonce_len, tag_len;
5573 + /* variable_nonce_included_in_record is non-zero if the variable nonce
5574 + * for a record is included as a prefix before the ciphertext. */
5575 + char variable_nonce_included_in_record;
5576 };
5577
5578 #ifndef OPENSSL_NO_COMP
5579 diff --git a/ssl/t1_enc.c b/ssl/t1_enc.c
5580 index 7af1a32..15800af 100644
5581 --- a/ssl/t1_enc.c
5582 +++ b/ssl/t1_enc.c
5583 @@ -366,6 +366,8 @@ static int tls1_change_cipher_state_aead(SSL *s, char is_rea d,
5584 memcpy(aead_ctx->fixed_nonce, iv, iv_len);
5585 aead_ctx->fixed_nonce_len = iv_len;
5586 aead_ctx->variable_nonce_len = 8; /* always the case, currently. */
5587 + aead_ctx->variable_nonce_included_in_record =
5588 + (s->s3->tmp.new_cipher->algorithm2 & SSL_CIPHER_ALGORITHM2_VARIA BLE_NONCE_INCLUDED_IN_RECORD) != 0;
5589 if (aead_ctx->variable_nonce_len + aead_ctx->fixed_nonce_len != EVP_AEAD _nonce_length(aead))
5590 {
5591 SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE_AEAD, ERR_R_INTERNAL_ERROR );
5592 @@ -863,6 +865,7 @@ int tls1_enc(SSL *s, int send)
5593 if (send)
5594 {
5595 size_t len = rec->length;
5596 + size_t eivlen = 0;
5597 in = rec->input;
5598 out = rec->data;
5599
5600 @@ -878,18 +881,22 @@ int tls1_enc(SSL *s, int send)
5601 * variable nonce. Thus we can copy the sequence number
5602 * bytes into place without overwriting any of the
5603 * plaintext. */
5604 - memcpy(out, ad, aead->variable_nonce_len);
5605 - len -= aead->variable_nonce_len;
5606 + if (aead->variable_nonce_included_in_record)
5607 + {
5608 + memcpy(out, ad, aead->variable_nonce_len);
5609 + len -= aead->variable_nonce_len;
5610 + eivlen = aead->variable_nonce_len;
5611 + }
5612
5613 ad[11] = len >> 8;
5614 ad[12] = len & 0xff;
5615
5616 n = EVP_AEAD_CTX_seal(&aead->ctx,
5617 - out + aead->variable_nonce_len, le n + aead->tag_len,
5618 + out + eivlen, len + aead->tag_len,
5619 nonce, nonce_used,
5620 - in + aead->variable_nonce_len, len ,
5621 + in + eivlen, len,
5622 ad, sizeof(ad));
5623 - if (n >= 0)
5624 + if (n >= 0 && aead->variable_nonce_included_in_record)
5625 n += aead->variable_nonce_len;
5626 }
5627 else
5628 @@ -903,12 +910,17 @@ int tls1_enc(SSL *s, int send)
5629
5630 if (len < aead->variable_nonce_len)
5631 return 0;
5632 - memcpy(nonce + nonce_used, in, aead->variable_nonce_len) ;
5633 + memcpy(nonce + nonce_used,
5634 + aead->variable_nonce_included_in_record ? in : ad ,
5635 + aead->variable_nonce_len);
5636 nonce_used += aead->variable_nonce_len;
5637
5638 - in += aead->variable_nonce_len;
5639 - len -= aead->variable_nonce_len;
5640 - out += aead->variable_nonce_len;
5641 + if (aead->variable_nonce_included_in_record)
5642 + {
5643 + in += aead->variable_nonce_len;
5644 + len -= aead->variable_nonce_len;
5645 + out += aead->variable_nonce_len;
5646 + }
5647
5648 if (len < aead->tag_len)
5649 return 0;
5650 diff --git a/ssl/tls1.h b/ssl/tls1.h
5651 index 8cac7df..3cbcb83 100644
5652 --- a/ssl/tls1.h
5653 +++ b/ssl/tls1.h
5654 @@ -526,6 +526,10 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB ,(void (*)(void))cb)
5655 #define TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256 0x0300C031
5656 #define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384 0x0300C032
5657
5658 +#define TLS1_CK_ECDHE_RSA_CHACHA20_POLY1305 0x0300CC13
5659 +#define TLS1_CK_ECDHE_ECDSA_CHACHA20_POLY1305 0x0300CC14
5660 +#define TLS1_CK_DHE_RSA_CHACHA20_POLY1305 0x0300CC15
5661 +
5662 /* XXX
5663 * Inconsistency alert:
5664 * The OpenSSL names of ciphers with ephemeral DH here include the string
5665 @@ -677,6 +681,10 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB ,(void (*)(void))cb)
5666 #define TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256 "ECDH-RSA-AES128-GCM-SH A256"
5667 #define TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384 "ECDH-RSA-AES256-GCM-SH A384"
5668
5669 +#define TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305 "ECDHE-RSA-CHACHA20-POLY 1305"
5670 +#define TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 "ECDHE-ECDSA-CHACHA20-PO LY1305"
5671 +#define TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305 "DHE-RSA-CHACHA2 0-POLY1305"
5672 +
5673 #define TLS_CT_RSA_SIGN 1
5674 #define TLS_CT_DSS_SIGN 2
5675 #define TLS_CT_RSA_FIXED_DH 3
5676 diff --git a/test/Makefile b/test/Makefile
5677 index 4c9eabc..4790aa8 100644
5678 --- a/test/Makefile
5679 +++ b/test/Makefile
5680 @@ -86,7 +86,9 @@ OBJ= $(BNTEST).o $(ECTEST).o $(ECDSATEST).o $(ECDHTEST).o $( IDEATEST).o \
5681 $(MDC2TEST).o $(RMDTEST).o \
5682 $(RANDTEST).o $(DHTEST).o $(ENGINETEST).o $(CASTTEST).o \
5683 $(BFTEST).o $(SSLTEST).o $(DSATEST).o $(EXPTEST).o $(RSATEST).o \
5684 - $(EVPTEST).o $(IGETEST).o $(JPAKETEST).o $(ASN1TEST).o
5685 + $(EVPTEST).o $(IGETEST).o $(JPAKETEST).o $(ASN1TEST).o $(CHACHATEST).o \
5686 + $(POLY1305TEST).o
5687 +
5688 SRC= $(BNTEST).c $(ECTEST).c $(ECDSATEST).c $(ECDHTEST).c $(IDEATEST).c \
5689 $(MD2TEST).c $(MD4TEST).c $(MD5TEST).c \
5690 $(HMACTEST).c $(WPTEST).c \
5691 @@ -94,7 +96,8 @@ SRC= $(BNTEST).c $(ECTEST).c $(ECDSATEST).c $(ECDHTEST).c $( IDEATEST).c \
5692 $(DESTEST).c $(SHATEST).c $(SHA1TEST).c $(MDC2TEST).c $(RMDTEST).c \
5693 $(RANDTEST).c $(DHTEST).c $(ENGINETEST).c $(CASTTEST).c \
5694 $(BFTEST).c $(SSLTEST).c $(DSATEST).c $(EXPTEST).c $(RSATEST).c \
5695 - $(EVPTEST).c $(IGETEST).c $(JPAKETEST).c $(SRPTEST).c $(ASN1TEST).c
5696 + $(EVPTEST).c $(IGETEST).c $(JPAKETEST).c $(SRPTEST).c $(ASN1TEST).c \
5697 + $(CHACHATEST).c $(POLY1305TEST).c
5698
5699 EXHEADER=
5700 HEADER= $(EXHEADER)
5701 @@ -137,7 +140,7 @@ alltests: \
5702 test_enc test_x509 test_rsa test_crl test_sid \
5703 test_gen test_req test_pkcs7 test_verify test_dh test_dsa \
5704 test_ss test_ca test_engine test_evp test_ssl test_tsa test_ige \
5705 - test_jpake test_srp test_cms
5706 + test_jpake test_srp test_cms test_chacha test_poly1305
5707
5708 test_evp:
5709 ../util/shlib_wrap.sh ./$(EVPTEST) evptests.txt
5710 @@ -318,6 +321,14 @@ test_srp: $(SRPTEST)$(EXE_EXT)
5711 @echo "Test SRP"
5712 ../util/shlib_wrap.sh ./srptest
5713
5714 +test_chacha: $(CHACHATEST)$(EXE_EXT)
5715 + @echo "Test ChaCha"
5716 + ../util/shlib_wrap.sh ./$(CHACHATEST)
5717 +
5718 +test_poly1305: $(POLY1305TEST)$(EXE_EXT)
5719 + @echo "Test Poly1305"
5720 + ../util/shlib_wrap.sh ./$(POLY1305TEST)
5721 +
5722 lint:
5723 lint -DLINT $(INCLUDES) $(SRC)>fluff
5724
5725 @@ -394,6 +405,12 @@ $(SHA256TEST)$(EXE_EXT): $(SHA256TEST).o $(DLIBCRYPTO)
5726 $(SHA512TEST)$(EXE_EXT): $(SHA512TEST).o $(DLIBCRYPTO)
5727 @target=$(SHA512TEST); $(BUILD_CMD)
5728
5729 +$(CHACHATEST)$(EXE_EXT): $(CHACHATEST).o $(DLIBCRYPTO)
5730 + @target=$(CHACHATEST); $(BUILD_CMD)
5731 +
5732 +$(POLY1305TEST)$(EXE_EXT): $(POLY1305TEST).o $(DLIBCRYPTO)
5733 + @target=$(CHACHATEST); $(BUILD_CMD)
5734 +
5735 $(RMDTEST)$(EXE_EXT): $(RMDTEST).o $(DLIBCRYPTO)
5736 @target=$(RMDTEST); $(BUILD_CMD)
5737
5738 --
5739 1.8.4.1
5740
OLDNEW
« no previous file with comments | « openssl/patches/aead_support.patch ('k') | openssl/patches/channelid.patch » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698