Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1652)

Side by Side Diff: openssl/patches/chacha20poly1305.patch

Issue 59083010: third_party/openssl: add ChaCha20+Poly1305 support. Base URL: https://chromium.googlesource.com/chromium/deps/openssl.git@master
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « openssl/patches/aead_support.patch ('k') | openssl/patches/channelidchromium.patch » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 From 2688f00904e4ffd647afcff69bb8fe6df8c5902b Mon Sep 17 00:00:00 2001
2 From: Adam Langley <agl@chromium.org>
3 Date: Mon, 9 Sep 2013 12:13:24 -0400
4 Subject: [PATCH 43/52] chacha20poly1305
5
6 Add support for Chacha20 + Poly1305.
7 ---
8 .gitignore | 1 +
9 Configure | 56 +-
10 Makefile.org | 6 +-
11 apps/speed.c | 64 +-
12 crypto/chacha/Makefile | 80 ++
13 crypto/chacha/chacha.h | 85 ++
14 crypto/chacha/chacha_enc.c | 167 +++
15 crypto/chacha/chacha_vec.c | 345 +++++++
16 crypto/chacha/chachatest.c | 211 ++++
17 crypto/evp/Makefile | 35 +-
18 crypto/evp/e_chacha20poly1305.c | 261 +++++
19 crypto/evp/evp.h | 8 +
20 crypto/evp/evp_err.c | 3 +
21 crypto/poly1305/Makefile | 81 ++
22 crypto/poly1305/poly1305.c | 320 ++++++
23 crypto/poly1305/poly1305.h | 88 ++
24 crypto/poly1305/poly1305_arm.c | 335 ++++++
25 crypto/poly1305/poly1305_arm_asm.s | 2009 ++++++++++++++++++++++++++++++++++++
26 crypto/poly1305/poly1305_vec.c | 733 +++++++++++++
27 crypto/poly1305/poly1305test.c | 166 +++
28 ssl/s3_lib.c | 75 +-
29 ssl/s3_pkt.c | 5 +-
30 ssl/ssl.h | 1 +
31 ssl/ssl_ciph.c | 16 +-
32 ssl/ssl_locl.h | 10 +
33 ssl/t1_enc.c | 30 +-
34 ssl/tls1.h | 8 +
35 test/Makefile | 23 +-
36 28 files changed, 5166 insertions(+), 56 deletions(-)
37 create mode 100644 crypto/chacha/Makefile
38 create mode 100644 crypto/chacha/chacha.h
39 create mode 100644 crypto/chacha/chacha_enc.c
40 create mode 100644 crypto/chacha/chacha_vec.c
41 create mode 100644 crypto/chacha/chachatest.c
42 create mode 100644 crypto/evp/e_chacha20poly1305.c
43 create mode 100644 crypto/poly1305/Makefile
44 create mode 100644 crypto/poly1305/poly1305.c
45 create mode 100644 crypto/poly1305/poly1305.h
46 create mode 100644 crypto/poly1305/poly1305_arm.c
47 create mode 100644 crypto/poly1305/poly1305_arm_asm.s
48 create mode 100644 crypto/poly1305/poly1305_vec.c
49 create mode 100644 crypto/poly1305/poly1305test.c
50
51 diff --git a/Configure b/Configure
52 index 9c803dc..1b95384 100755
53 --- a/Configure
54 +++ b/Configure
55 @@ -124,24 +124,24 @@ my $tlib="-lnsl -lsocket";
56 my $bits1="THIRTY_TWO_BIT ";
57 my $bits2="SIXTY_FOUR_BIT ";
58
59 -my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o crypt 586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586 .o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cml l-x86.o:ghash-x86.o:";
60 +my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o crypt 586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586 .o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cml l-x86.o:ghash-x86.o:::";
61
62 my $x86_elf_asm="$x86_asm:elf";
63
64 -my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64- gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_ 64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_ 64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghas h-x86_64.o:";
65 -my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64. o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::: :ghash-ia64.o::void";
66 -my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a -mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-spa rcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void";
67 -my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void";
68 -my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash- alpha.o::void";
69 -my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::: :::::";
70 -my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha2 56-mips.o sha512-mips.o::::::::";
71 -my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::ae s-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4- s390x.o:::::ghash-s390x.o:";
72 -my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cb c.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-a rmv4.o::void";
73 -my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-p arisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash- parisc.o::32";
74 -my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o ae s-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::gha sh-parisc.o::64";
75 -my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::";
76 -my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::";
77 -my $no_asm=":::::::::::::::void";
78 +my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64- gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_ 64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_ 64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghas h-x86_64.o::chacha_vec.o:poly1305_vec.o";
79 +my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64. o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::: :ghash-ia64.o::::void";
80 +my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a -mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-spa rcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::::void";
81 +my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::::void";
82 +my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash- alpha.o::::void";
83 +my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::: :::::::";
84 +my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha2 56-mips.o sha512-mips.o::::::::::";
85 +my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::ae s-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4- s390x.o:::::::ghash-s390x.o:";
86 +my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cb c.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-a rmv4.o::chacha_vec.o:poly1305_arm.o poly1305_arm_asm.o:void";
87 +my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-p arisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash- parisc.o::::32";
88 +my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o ae s-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::gha sh-parisc.o::::64";
89 +my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::::";
90 +my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::::";
91 +my $no_asm=":::::::::::::::::void";
92
93 # As for $BSDthreads. Idea is to maintain "collective" set of flags,
94 # which would cover all BSD flavors. -pthread applies to them all,
95 @@ -152,7 +152,7 @@ my $no_asm=":::::::::::::::void";
96 # seems to be sufficient?
97 my $BSDthreads="-pthread -D_THREAD_SAFE -D_REENTRANT";
98
99 -#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lflags : $b n_ops : $cpuid_obj : $bn_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj : $sha1_ obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_obj : $mod es_obj : $engines_obj : $dso_scheme : $shared_target : $shared_cflag : $shared_l dflag : $shared_extension : $ranlib : $arflags : $multilib
100 +#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lflags : $b n_ops : $cpuid_obj : $bn_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj : $sha1_ obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_obj : $mod es_obj : $engines_obj : $chacha_obj : $poly1305_obj : $dso_scheme : $shared_targ et : $shared_cflag : $shared_ldflag : $shared_extension : $ranlib : $arflags : $ multilib :
101
102 my %table=(
103 # File 'TABLE' (created by 'make TABLE') contains the data from this list,
104 @@ -647,6 +647,8 @@ my $idx_wp_obj = $idx++;
105 my $idx_cmll_obj = $idx++;
106 my $idx_modes_obj = $idx++;
107 my $idx_engines_obj = $idx++;
108 +my $idx_chacha_obj = $idx++;
109 +my $idx_poly1305_obj = $idx++;
110 my $idx_perlasm_scheme = $idx++;
111 my $idx_dso_scheme = $idx++;
112 my $idx_shared_target = $idx++;
113 @@ -692,6 +694,8 @@ my $aes_enc="aes_core.o aes_cbc.o";
114 my $bf_enc ="bf_enc.o";
115 my $cast_enc="c_enc.o";
116 my $rc4_enc="rc4_enc.o rc4_skey.o";
117 +my $chacha_enc="chacha_enc.o";
118 +my $poly1305 ="poly1305.o";
119 my $rc5_enc="rc5_enc.o";
120 my $md5_obj="";
121 my $sha1_obj="";
122 @@ -1144,7 +1148,7 @@ $openssldir=$prefix . "/" . $openssldir if $openssldir !~ /(^\/|^[a-zA-Z]:[\\\/]
123
124 print "IsMK1MF=$IsMK1MF\n";
125
126 -my @fields = split(/\s*:\s*/,$table{$target} . ":" x 30 , -1);
127 +my @fields = split(/\s*:\s*/,$table{$target} . ":" x 31 , -1);
128 my $cc = $fields[$idx_cc];
129 # Allow environment CC to override compiler...
130 if($ENV{CC}) {
131 @@ -1181,6 +1185,8 @@ my $ranlib = $ENV{'RANLIB'} || $fields[$idx_ranlib];
132 my $ar = $ENV{'AR'} || "ar";
133 my $arflags = $fields[$idx_arflags];
134 my $multilib = $fields[$idx_multilib];
135 +my $chacha_obj = $fields[$idx_chacha_obj];
136 +my $poly1305_obj = $fields[$idx_poly1305_obj];
137
138 # if $prefix/lib$multilib is not an existing directory, then
139 # assume that it's not searched by linker automatically, in
140 @@ -1477,6 +1483,8 @@ $des_obj=$des_enc unless ($des_obj =~ /\.o$/);
141 $bf_obj=$bf_enc unless ($bf_obj =~ /\.o$/);
142 $cast_obj=$cast_enc unless ($cast_obj =~ /\.o$/);
143 $rc4_obj=$rc4_enc unless ($rc4_obj =~ /\.o$/);
144 +$chacha_obj=$chacha_enc unless ($chacha_obj =~ /\.o$/);
145 +$poly1305_obj=$poly1305 unless ($poly1305_obj =~ /\.o$/);
146 $rc5_obj=$rc5_enc unless ($rc5_obj =~ /\.o$/);
147 if ($sha1_obj =~ /\.o$/)
148 {
149 @@ -1637,6 +1645,8 @@ while (<IN>)
150 s/^BF_ENC=.*$/BF_ENC= $bf_obj/;
151 s/^CAST_ENC=.*$/CAST_ENC= $cast_obj/;
152 s/^RC4_ENC=.*$/RC4_ENC= $rc4_obj/;
153 + s/^CHACHA_ENC=.*$/CHACHA_ENC= $chacha_obj/;
154 + s/^POLY1305=.*$/POLY1305= $poly1305_obj/;
155 s/^RC5_ENC=.*$/RC5_ENC= $rc5_obj/;
156 s/^MD5_ASM_OBJ=.*$/MD5_ASM_OBJ= $md5_obj/;
157 s/^SHA1_ASM_OBJ=.*$/SHA1_ASM_OBJ= $sha1_obj/;
158 @@ -1698,6 +1708,8 @@ print "AES_ENC =$aes_obj\n";
159 print "BF_ENC =$bf_obj\n";
160 print "CAST_ENC =$cast_obj\n";
161 print "RC4_ENC =$rc4_obj\n";
162 +print "CHACHA_ENC =$chacha_obj\n";
163 +print "POLY1305 =$poly1305_obj\n";
164 print "RC5_ENC =$rc5_obj\n";
165 print "MD5_OBJ_ASM =$md5_obj\n";
166 print "SHA1_OBJ_ASM =$sha1_obj\n";
167 @@ -2096,11 +2108,11 @@ sub print_table_entry
168
169 (my $cc,my $cflags,my $unistd,my $thread_cflag,my $sys_id,my $lflags,
170 my $bn_ops,my $cpuid_obj,my $bn_obj,my $des_obj,my $aes_obj, my $bf_obj,
171 - my $md5_obj,my $sha1_obj,my $cast_obj,my $rc4_obj,my $rmd160_obj,
172 - my $rc5_obj,my $wp_obj,my $cmll_obj,my $modes_obj, my $engines_obj,
173 + my $md5_obj,my $sha1_obj,my $cast_obj,my $rc4_obj,my $chacha_obj,my $pol y1305_obj,
174 + my $rmd160_obj, my $rc5_obj,my $wp_obj,my $cmll_obj,my $modes_obj, my $e ngines_obj,
175 my $perlasm_scheme,my $dso_scheme,my $shared_target,my $shared_cflag,
176 my $shared_ldflag,my $shared_extension,my $ranlib,my $arflags,my $multil ib)=
177 - split(/\s*:\s*/,$table{$target} . ":" x 30 , -1);
178 + split(/\s*:\s*/,$table{$target} . ":" x 31 , -1);
179
180 print <<EOF
181
182 @@ -2121,6 +2133,8 @@ sub print_table_entry
183 \$sha1_obj = $sha1_obj
184 \$cast_obj = $cast_obj
185 \$rc4_obj = $rc4_obj
186 +\$chacha_obj = $chacha_obj
187 +\$poly1305_obj = $poly1305_obj
188 \$rmd160_obj = $rmd160_obj
189 \$rc5_obj = $rc5_obj
190 \$wp_obj = $wp_obj
191 @@ -2150,7 +2164,7 @@ sub test_sanity
192
193 foreach $target (sort keys %table)
194 {
195 - @fields = split(/\s*:\s*/,$table{$target} . ":" x 30 , -1);
196 + @fields = split(/\s*:\s*/,$table{$target} . ":" x 31 , -1);
197
198 if ($fields[$idx_dso_scheme-1] =~ /^(beos|dl|dlfcn|win32|vms)$/)
199 {
200 diff --git a/Makefile.org b/Makefile.org
201 index 2db31ea..919466d 100644
202 --- a/Makefile.org
203 +++ b/Makefile.org
204 @@ -94,6 +94,8 @@ BF_ENC= bf_enc.o
205 CAST_ENC= c_enc.o
206 RC4_ENC= rc4_enc.o
207 RC5_ENC= rc5_enc.o
208 +CHACHA_ENC= chacha_enc.o
209 +POLY1305= poly1305.o
210 MD5_ASM_OBJ=
211 SHA1_ASM_OBJ=
212 RMD160_ASM_OBJ=
213 @@ -147,7 +149,7 @@ SDIRS= \
214 bn ec rsa dsa ecdsa dh ecdh dso engine \
215 buffer bio stack lhash rand err \
216 evp asn1 pem x509 x509v3 conf txt_db pkcs7 pkcs12 comp ocsp ui krb5 \
217 - cms pqueue ts jpake srp store cmac
218 + cms pqueue ts jpake srp store cmac poly1305 chacha
219 # keep in mind that the above list is adjusted by ./Configure
220 # according to no-xxx arguments...
221
222 @@ -232,6 +234,8 @@ BUILDENV= PLATFORM='$(PLATFORM)' PROCESSOR='$(PROCESSOR)' \
223 WP_ASM_OBJ='$(WP_ASM_OBJ)' \
224 MODES_ASM_OBJ='$(MODES_ASM_OBJ)' \
225 ENGINES_ASM_OBJ='$(ENGINES_ASM_OBJ)' \
226 + CHACHA_ENC='$(CHACHA_ENC)' \
227 + POLY1305='$(POLY1305)' \
228 PERLASM_SCHEME='$(PERLASM_SCHEME)' \
229 FIPSLIBDIR='${FIPSLIBDIR}' \
230 FIPSDIR='${FIPSDIR}' \
231 diff --git a/crypto/chacha/Makefile b/crypto/chacha/Makefile
232 new file mode 100644
233 index 0000000..289933b
234 --- /dev/null
235 +++ b/crypto/chacha/Makefile
236 @@ -0,0 +1,80 @@
237 +#
238 +# OpenSSL/crypto/chacha/Makefile
239 +#
240 +
241 +DIR= chacha
242 +TOP= ../..
243 +CC= cc
244 +CPP= $(CC) -E
245 +INCLUDES=
246 +CFLAG=-g
247 +AR= ar r
248 +
249 +CFLAGS= $(INCLUDES) $(CFLAG)
250 +ASFLAGS= $(INCLUDES) $(ASFLAG)
251 +AFLAGS= $(ASFLAGS)
252 +
253 +CHACHA_ENC=chacha_enc.o
254 +
255 +GENERAL=Makefile
256 +TEST=chachatest.o
257 +APPS=
258 +
259 +LIB=$(TOP)/libcrypto.a
260 +LIBSRC=
261 +LIBOBJ=$(CHACHA_ENC)
262 +
263 +SRC= $(LIBSRC)
264 +
265 +EXHEADER=chacha.h
266 +HEADER= $(EXHEADER)
267 +
268 +ALL= $(GENERAL) $(SRC) $(HEADER)
269 +
270 +top:
271 + (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all)
272 +
273 +all: lib
274 +
275 +lib: $(LIBOBJ)
276 + $(AR) $(LIB) $(LIBOBJ)
277 + $(RANLIB) $(LIB) || echo Never mind.
278 + @touch lib
279 +
280 +files:
281 + $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
282 +
283 +links:
284 + @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER)
285 + @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST)
286 + @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS)
287 +
288 +install:
289 + @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile...
290 + @headerlist="$(EXHEADER)"; for i in $$headerlist ; \
291 + do \
292 + (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \
293 + chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \
294 + done;
295 +
296 +tags:
297 + ctags $(SRC)
298 +
299 +tests:
300 +
301 +lint:
302 + lint -DLINT $(INCLUDES) $(SRC)>fluff
303 +
304 +depend:
305 + @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile...
306 + $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC)
307 +
308 +dclean:
309 + $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKE FILE) >Makefile.new
310 + mv -f Makefile.new $(MAKEFILE)
311 +
312 +clean:
313 + rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff
314 +
315 +# DO NOT DELETE THIS LINE -- make depend depends on it.
316 +
317 diff --git a/crypto/chacha/chacha.h b/crypto/chacha/chacha.h
318 new file mode 100644
319 index 0000000..d56519d
320 --- /dev/null
321 +++ b/crypto/chacha/chacha.h
322 @@ -0,0 +1,85 @@
323 +/*
324 + * Chacha stream algorithm.
325 + *
326 + * Created on: Jun, 2013
327 + * Author: Elie Bursztein (elieb@google.com)
328 + *
329 + * Adapted from the estream code by D. Bernstein.
330 + */
331 +/* ====================================================================
332 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved.
333 + *
334 + * Redistribution and use in source and binary forms, with or without
335 + * modification, are permitted provided that the following conditions
336 + * are met:
337 + *
338 + * 1. Redistributions of source code must retain the above copyright
339 + * notice, this list of conditions and the following disclaimer.
340 + *
341 + * 2. Redistributions in binary form must reproduce the above copyright
342 + * notice, this list of conditions and the following disclaimer in
343 + * the documentation and/or other materials provided with the
344 + * distribution.
345 + *
346 + * 3. All advertising materials mentioning features or use of this
347 + * software must display the following acknowledgment:
348 + * "This product includes software developed by the OpenSSL Project
349 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
350 + *
351 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
352 + * endorse or promote products derived from this software without
353 + * prior written permission. For written permission, please contact
354 + * licensing@OpenSSL.org.
355 + *
356 + * 5. Products derived from this software may not be called "OpenSSL"
357 + * nor may "OpenSSL" appear in their names without prior written
358 + * permission of the OpenSSL Project.
359 + *
360 + * 6. Redistributions of any form whatsoever must retain the following
361 + * acknowledgment:
362 + * "This product includes software developed by the OpenSSL Project
363 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
364 + *
365 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
366 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
367 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
368 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
369 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
370 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
371 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
372 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
373 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
374 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
375 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
376 + * OF THE POSSIBILITY OF SUCH DAMAGE.
377 + * ====================================================================
378 + */
379 +#ifndef HEADER_CHACHA_H
380 +#define HEADER_CHACHA_H
381 +
382 +#include <openssl/opensslconf.h>
383 +
384 +#if defined(OPENSSL_NO_CHACHA)
385 +#error ChaCha support is disabled.
386 +#endif
387 +
388 +#include <stddef.h>
389 +
390 +#ifdef __cplusplus
391 +extern "C" {
392 +#endif
393 +
394 +/* CRYPTO_chacha_20 encrypts |in_len| bytes from |in| with the given key and
395 + * nonce and writes the result to |out|, which may be equal to |in|. The
396 + * initial block counter is specified by |counter|. */
397 +void CRYPTO_chacha_20(unsigned char *out,
398 + const unsigned char *in, size_t in_len,
399 + const unsigned char key[32],
400 + const unsigned char nonce[8],
401 + size_t counter);
402 +
403 +#ifdef __cplusplus
404 +}
405 +#endif
406 +
407 +#endif
408 diff --git a/crypto/chacha/chacha_enc.c b/crypto/chacha/chacha_enc.c
409 new file mode 100644
410 index 0000000..54d1ca3
411 --- /dev/null
412 +++ b/crypto/chacha/chacha_enc.c
413 @@ -0,0 +1,167 @@
414 +/*
415 + * Chacha stream algorithm.
416 + *
417 + * Created on: Jun, 2013
418 + * Author: Elie Bursztein (elieb@google.com)
419 + *
420 + * Adapted from the estream code by D. Bernstein.
421 + */
422 +/* ====================================================================
423 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved.
424 + *
425 + * Redistribution and use in source and binary forms, with or without
426 + * modification, are permitted provided that the following conditions
427 + * are met:
428 + *
429 + * 1. Redistributions of source code must retain the above copyright
430 + * notice, this list of conditions and the following disclaimer.
431 + *
432 + * 2. Redistributions in binary form must reproduce the above copyright
433 + * notice, this list of conditions and the following disclaimer in
434 + * the documentation and/or other materials provided with the
435 + * distribution.
436 + *
437 + * 3. All advertising materials mentioning features or use of this
438 + * software must display the following acknowledgment:
439 + * "This product includes software developed by the OpenSSL Project
440 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
441 + *
442 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
443 + * endorse or promote products derived from this software without
444 + * prior written permission. For written permission, please contact
445 + * licensing@OpenSSL.org.
446 + *
447 + * 5. Products derived from this software may not be called "OpenSSL"
448 + * nor may "OpenSSL" appear in their names without prior written
449 + * permission of the OpenSSL Project.
450 + *
451 + * 6. Redistributions of any form whatsoever must retain the following
452 + * acknowledgment:
453 + * "This product includes software developed by the OpenSSL Project
454 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
455 + *
456 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
457 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
458 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
459 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
460 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
461 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
462 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
463 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
464 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
465 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
466 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
467 + * OF THE POSSIBILITY OF SUCH DAMAGE.
468 + * ====================================================================
469 + */
470 +
471 +#include <stdint.h>
472 +#include <string.h>
473 +#include <openssl/opensslconf.h>
474 +
475 +#if !defined(OPENSSL_NO_CHACHA)
476 +
477 +#include <openssl/chacha.h>
478 +
479 +/* sigma contains the ChaCha constants, which happen to be an ASCII string. */
480 +static const char sigma[16] = "expand 32-byte k";
481 +
482 +#define ROTATE(v, n) (((v) << (n)) | ((v) >> (32 - (n))))
483 +#define XOR(v, w) ((v) ^ (w))
484 +#define PLUS(x, y) ((x) + (y))
485 +#define PLUSONE(v) (PLUS((v), 1))
486 +
487 +#define U32TO8_LITTLE(p, v) \
488 + { (p)[0] = (v >> 0) & 0xff; (p)[1] = (v >> 8) & 0xff; \
489 + (p)[2] = (v >> 16) & 0xff; (p)[3] = (v >> 24) & 0xff; }
490 +#define U8TO32_LITTLE(p) \
491 + (((uint32_t)((p)[0]) ) | ((uint32_t)((p)[1]) << 8) | \
492 + ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24) )
493 +
494 +/* QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round. */
495 +#define QUARTERROUND(a,b,c,d) \
496 + x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \
497 + x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \
498 + x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \
499 + x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7);
500 +
501 +typedef unsigned int uint32_t;
502 +
503 +/* chacha_core performs |num_rounds| rounds of ChaCha20 on the input words in
504 + * |input| and writes the 64 output bytes to |output|. */
505 +static void chacha_core(unsigned char output[64], const uint32_t input[16],
506 + int num_rounds)
507 + {
508 + uint32_t x[16];
509 + int i;
510 +
511 + memcpy(x, input, sizeof(uint32_t) * 16);
512 + for (i = 20; i > 0; i -= 2)
513 + {
514 + QUARTERROUND( 0, 4, 8,12)
515 + QUARTERROUND( 1, 5, 9,13)
516 + QUARTERROUND( 2, 6,10,14)
517 + QUARTERROUND( 3, 7,11,15)
518 + QUARTERROUND( 0, 5,10,15)
519 + QUARTERROUND( 1, 6,11,12)
520 + QUARTERROUND( 2, 7, 8,13)
521 + QUARTERROUND( 3, 4, 9,14)
522 + }
523 +
524 + for (i = 0; i < 16; ++i)
525 + x[i] = PLUS(x[i], input[i]);
526 + for (i = 0; i < 16; ++i)
527 + U32TO8_LITTLE(output + 4 * i, x[i]);
528 + }
529 +
530 +void CRYPTO_chacha_20(unsigned char *out,
531 + const unsigned char *in, size_t in_len,
532 + const unsigned char key[32],
533 + const unsigned char nonce[8],
534 + size_t counter)
535 + {
536 + uint32_t input[16];
537 + unsigned char buf[64];
538 + size_t todo, i;
539 +
540 + input[0] = U8TO32_LITTLE(sigma + 0);
541 + input[1] = U8TO32_LITTLE(sigma + 4);
542 + input[2] = U8TO32_LITTLE(sigma + 8);
543 + input[3] = U8TO32_LITTLE(sigma + 12);
544 +
545 + input[4] = U8TO32_LITTLE(key + 0);
546 + input[5] = U8TO32_LITTLE(key + 4);
547 + input[6] = U8TO32_LITTLE(key + 8);
548 + input[7] = U8TO32_LITTLE(key + 12);
549 +
550 + input[8] = U8TO32_LITTLE(key + 16);
551 + input[9] = U8TO32_LITTLE(key + 20);
552 + input[10] = U8TO32_LITTLE(key + 24);
553 + input[11] = U8TO32_LITTLE(key + 28);
554 +
555 + input[12] = counter;
556 + input[13] = ((uint64_t) counter) >> 32;
557 + input[14] = U8TO32_LITTLE(nonce + 0);
558 + input[15] = U8TO32_LITTLE(nonce + 4);
559 +
560 + while (in_len > 0)
561 + {
562 + todo = sizeof(buf);
563 + if (in_len < todo)
564 + todo = in_len;
565 +
566 + chacha_core(buf, input, 20);
567 + for (i = 0; i < todo; i++)
568 + out[i] = in[i] ^ buf[i];
569 +
570 + out += todo;
571 + in += todo;
572 + in_len -= todo;
573 +
574 + input[12]++;
575 + if (input[12] == 0)
576 + input[13]++;
577 + }
578 + }
579 +
580 +#endif /* !OPENSSL_NO_CHACHA */
581 diff --git a/crypto/chacha/chacha_vec.c b/crypto/chacha/chacha_vec.c
582 new file mode 100644
583 index 0000000..33b2238
584 --- /dev/null
585 +++ b/crypto/chacha/chacha_vec.c
586 @@ -0,0 +1,345 @@
587 +/* ====================================================================
588 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved.
589 + *
590 + * Redistribution and use in source and binary forms, with or without
591 + * modification, are permitted provided that the following conditions
592 + * are met:
593 + *
594 + * 1. Redistributions of source code must retain the above copyright
595 + * notice, this list of conditions and the following disclaimer.
596 + *
597 + * 2. Redistributions in binary form must reproduce the above copyright
598 + * notice, this list of conditions and the following disclaimer in
599 + * the documentation and/or other materials provided with the
600 + * distribution.
601 + *
602 + * 3. All advertising materials mentioning features or use of this
603 + * software must display the following acknowledgment:
604 + * "This product includes software developed by the OpenSSL Project
605 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
606 + *
607 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
608 + * endorse or promote products derived from this software without
609 + * prior written permission. For written permission, please contact
610 + * licensing@OpenSSL.org.
611 + *
612 + * 5. Products derived from this software may not be called "OpenSSL"
613 + * nor may "OpenSSL" appear in their names without prior written
614 + * permission of the OpenSSL Project.
615 + *
616 + * 6. Redistributions of any form whatsoever must retain the following
617 + * acknowledgment:
618 + * "This product includes software developed by the OpenSSL Project
619 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
620 + *
621 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
622 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
623 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
624 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
625 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
626 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
627 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
628 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
629 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
630 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
631 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
632 + * OF THE POSSIBILITY OF SUCH DAMAGE.
633 + * ====================================================================
634 + */
635 +
636 +/* This implementation is by Ted Krovetz and was submitted to SUPERCOP and
637 + * marked as public domain. It was been altered to allow for non-aligned inputs
638 + * and to allow the block counter to be passed in specifically. */
639 +
640 +#include <string.h>
641 +#include <stdint.h>
642 +#include <openssl/opensslconf.h>
643 +
644 +#if !defined(OPENSSL_NO_CHACHA)
645 +
646 +#include <openssl/chacha.h>
647 +
648 +#ifndef CHACHA_RNDS
649 +#define CHACHA_RNDS 20 /* 8 (high speed), 20 (conservative), 12 (middle) */
650 +#endif
651 +
652 +/* Architecture-neutral way to specify 16-byte vector of ints */
653 +typedef unsigned vec __attribute__ ((vector_size (16)));
654 +
655 +/* This implementation is designed for Neon, SSE and AltiVec machines. The
656 + * following specify how to do certain vector operations efficiently on
657 + * each architecture, using intrinsics.
658 + * This implementation supports parallel processing of multiple blocks,
659 + * including potentially using general-purpose registers.
660 + */
661 +#if __ARM_NEON__
662 +#include <arm_neon.h>
663 +#define GPR_TOO 1
664 +#define VBPI 2
665 +#define ONE (vec)vsetq_lane_u32(1,vdupq_n_u32(0),0)
666 +#define LOAD(m) (vec)(*((vec*)(m)))
667 +#define STORE(m,r) (*((vec*)(m))) = (r)
668 +#define ROTV1(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,1)
669 +#define ROTV2(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,2)
670 +#define ROTV3(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,3)
671 +#define ROTW16(x) (vec)vrev32q_u16((uint16x8_t)x)
672 +#if __clang__
673 +#define ROTW7(x) (x << ((vec){ 7, 7, 7, 7})) ^ (x >> ((vec){25,25,25,25}))
674 +#define ROTW8(x) (x << ((vec){ 8, 8, 8, 8})) ^ (x >> ((vec){24,24,24,24}))
675 +#define ROTW12(x) (x << ((vec){12,12,12,12})) ^ (x >> ((vec){20,20,20,20}))
676 +#else
677 +#define ROTW7(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,7),(uint32x4_t)x,2 5)
678 +#define ROTW8(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,8),(uint32x4_t)x,2 4)
679 +#define ROTW12(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,12),(uint32x4_t)x, 20)
680 +#endif
681 +#elif __SSE2__
682 +#include <emmintrin.h>
683 +#define GPR_TOO 0
684 +#if __clang__
685 +#define VBPI 4
686 +#else
687 +#define VBPI 3
688 +#endif
689 +#define ONE (vec)_mm_set_epi32(0,0,0,1)
690 +#define LOAD(m) (vec)_mm_loadu_si128((__m128i*)(m))
691 +#define STORE(m,r) _mm_storeu_si128((__m128i*)(m), (__m128i) (r))
692 +#define ROTV1(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(0,3,2,1))
693 +#define ROTV2(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(1,0,3,2))
694 +#define ROTV3(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(2,1,0,3))
695 +#define ROTW7(x) (vec)(_mm_slli_epi32((__m128i)x, 7) ^ _mm_srli_epi32((__m128i )x,25))
696 +#define ROTW12(x) (vec)(_mm_slli_epi32((__m128i)x,12) ^ _mm_srli_epi32((__m128i )x,20))
697 +#if __SSSE3__
698 +#include <tmmintrin.h>
699 +#define ROTW8(x) (vec)_mm_shuffle_epi8((__m128i)x,_mm_set_epi8(14,13,12,15,10, 9,8,11,6,5,4,7,2,1,0,3))
700 +#define ROTW16(x) (vec)_mm_shuffle_epi8((__m128i)x,_mm_set_epi8(13,12,15,14,9,8 ,11,10,5,4,7,6,1,0,3,2))
701 +#else
702 +#define ROTW8(x) (vec)(_mm_slli_epi32((__m128i)x, 8) ^ _mm_srli_epi32((__m128i )x,24))
703 +#define ROTW16(x) (vec)(_mm_slli_epi32((__m128i)x,16) ^ _mm_srli_epi32((__m128i )x,16))
704 +#endif
705 +#else
706 +#error -- Implementation supports only machines with neon or SSE2
707 +#endif
708 +
709 +#ifndef REVV_BE
710 +#define REVV_BE(x) (x)
711 +#endif
712 +
713 +#ifndef REVW_BE
714 +#define REVW_BE(x) (x)
715 +#endif
716 +
717 +#define BPI (VBPI + GPR_TOO) /* Blocks computed per loop iteration */
718 +
719 +#define DQROUND_VECTORS(a,b,c,d) \
720 + a += b; d ^= a; d = ROTW16(d); \
721 + c += d; b ^= c; b = ROTW12(b); \
722 + a += b; d ^= a; d = ROTW8(d); \
723 + c += d; b ^= c; b = ROTW7(b); \
724 + b = ROTV1(b); c = ROTV2(c); d = ROTV3(d); \
725 + a += b; d ^= a; d = ROTW16(d); \
726 + c += d; b ^= c; b = ROTW12(b); \
727 + a += b; d ^= a; d = ROTW8(d); \
728 + c += d; b ^= c; b = ROTW7(b); \
729 + b = ROTV3(b); c = ROTV2(c); d = ROTV1(d);
730 +
731 +#define QROUND_WORDS(a,b,c,d) \
732 + a = a+b; d ^= a; d = d<<16 | d>>16; \
733 + c = c+d; b ^= c; b = b<<12 | b>>20; \
734 + a = a+b; d ^= a; d = d<< 8 | d>>24; \
735 + c = c+d; b ^= c; b = b<< 7 | b>>25;
736 +
737 +#define WRITE_XOR(in, op, d, v0, v1, v2, v3) \
738 + STORE(op + d + 0, LOAD(in + d + 0) ^ REVV_BE(v0)); \
739 + STORE(op + d + 4, LOAD(in + d + 4) ^ REVV_BE(v1)); \
740 + STORE(op + d + 8, LOAD(in + d + 8) ^ REVV_BE(v2)); \
741 + STORE(op + d +12, LOAD(in + d +12) ^ REVV_BE(v3));
742 +
743 +void CRYPTO_chacha_20(
744 + unsigned char *out,
745 + const unsigned char *in,
746 + size_t inlen,
747 + const unsigned char key[32],
748 + const unsigned char nonce[8],
749 + size_t counter)
750 + {
751 + unsigned iters, i, *op=(unsigned *)out, *ip=(unsigned *)in, *kp;
752 +#if defined(__ARM_NEON__)
753 + unsigned *np;
754 +#endif
755 + vec s0, s1, s2, s3;
756 +#if !defined(__ARM_NEON__) && !defined(__SSE2__)
757 + __attribute__ ((aligned (16))) unsigned key[8], nonce[4];
758 +#endif
759 + __attribute__ ((aligned (16))) unsigned chacha_const[] =
760 + {0x61707865,0x3320646E,0x79622D32,0x6B206574};
761 +#if defined(__ARM_NEON__) || defined(__SSE2__)
762 + kp = (unsigned *)key;
763 +#else
764 + ((vec *)key)[0] = REVV_BE(((vec *)key)[0]);
765 + ((vec *)key)[1] = REVV_BE(((vec *)key)[1]);
766 + nonce[0] = REVW_BE(((unsigned *)nonce)[0]);
767 + nonce[1] = REVW_BE(((unsigned *)nonce)[1]);
768 + nonce[2] = REVW_BE(((unsigned *)nonce)[2]);
769 + nonce[3] = REVW_BE(((unsigned *)nonce)[3]);
770 + kp = (unsigned *)key;
771 + np = (unsigned *)nonce;
772 +#endif
773 +#if defined(__ARM_NEON__)
774 + np = (unsigned*) nonce;
775 +#endif
776 + s0 = LOAD(chacha_const);
777 + s1 = LOAD(&((vec*)kp)[0]);
778 + s2 = LOAD(&((vec*)kp)[1]);
779 + s3 = (vec){
780 + counter & 0xffffffff,
781 +#if __ARM_NEON__
782 + 0, /* can't right-shift 32 bits on a 32-bit system. */
783 +#else
784 + counter >> 32,
785 +#endif
786 + ((uint32_t*)nonce)[0],
787 + ((uint32_t*)nonce)[1]
788 + };
789 +
790 + for (iters = 0; iters < inlen/(BPI*64); iters++)
791 + {
792 +#if GPR_TOO
793 + register unsigned x0, x1, x2, x3, x4, x5, x6, x7, x8,
794 + x9, x10, x11, x12, x13, x14, x15;
795 +#endif
796 +#if VBPI > 2
797 + vec v8,v9,v10,v11;
798 +#endif
799 +#if VBPI > 3
800 + vec v12,v13,v14,v15;
801 +#endif
802 +
803 + vec v0,v1,v2,v3,v4,v5,v6,v7;
804 + v4 = v0 = s0; v5 = v1 = s1; v6 = v2 = s2; v3 = s3;
805 + v7 = v3 + ONE;
806 +#if VBPI > 2
807 + v8 = v4; v9 = v5; v10 = v6;
808 + v11 = v7 + ONE;
809 +#endif
810 +#if VBPI > 3
811 + v12 = v8; v13 = v9; v14 = v10;
812 + v15 = v11 + ONE;
813 +#endif
814 +#if GPR_TOO
815 + x0 = chacha_const[0]; x1 = chacha_const[1];
816 + x2 = chacha_const[2]; x3 = chacha_const[3];
817 + x4 = kp[0]; x5 = kp[1]; x6 = kp[2]; x7 = kp[3];
818 + x8 = kp[4]; x9 = kp[5]; x10 = kp[6]; x11 = kp[7];
819 + x12 = counter+BPI*iters+(BPI-1); x13 = 0;
820 + x14 = np[0]; x15 = np[1];
821 +#endif
822 + for (i = CHACHA_RNDS/2; i; i--)
823 + {
824 + DQROUND_VECTORS(v0,v1,v2,v3)
825 + DQROUND_VECTORS(v4,v5,v6,v7)
826 +#if VBPI > 2
827 + DQROUND_VECTORS(v8,v9,v10,v11)
828 +#endif
829 +#if VBPI > 3
830 + DQROUND_VECTORS(v12,v13,v14,v15)
831 +#endif
832 +#if GPR_TOO
833 + QROUND_WORDS( x0, x4, x8,x12)
834 + QROUND_WORDS( x1, x5, x9,x13)
835 + QROUND_WORDS( x2, x6,x10,x14)
836 + QROUND_WORDS( x3, x7,x11,x15)
837 + QROUND_WORDS( x0, x5,x10,x15)
838 + QROUND_WORDS( x1, x6,x11,x12)
839 + QROUND_WORDS( x2, x7, x8,x13)
840 + QROUND_WORDS( x3, x4, x9,x14)
841 +#endif
842 + }
843 +
844 + WRITE_XOR(ip, op, 0, v0+s0, v1+s1, v2+s2, v3+s3)
845 + s3 += ONE;
846 + WRITE_XOR(ip, op, 16, v4+s0, v5+s1, v6+s2, v7+s3)
847 + s3 += ONE;
848 +#if VBPI > 2
849 + WRITE_XOR(ip, op, 32, v8+s0, v9+s1, v10+s2, v11+s3)
850 + s3 += ONE;
851 +#endif
852 +#if VBPI > 3
853 + WRITE_XOR(ip, op, 48, v12+s0, v13+s1, v14+s2, v15+s3)
854 + s3 += ONE;
855 +#endif
856 + ip += VBPI*16;
857 + op += VBPI*16;
858 +#if GPR_TOO
859 + op[0] = REVW_BE(REVW_BE(ip[0]) ^ (x0 + chacha_const[0]));
860 + op[1] = REVW_BE(REVW_BE(ip[1]) ^ (x1 + chacha_const[1]));
861 + op[2] = REVW_BE(REVW_BE(ip[2]) ^ (x2 + chacha_const[2]));
862 + op[3] = REVW_BE(REVW_BE(ip[3]) ^ (x3 + chacha_const[3]));
863 + op[4] = REVW_BE(REVW_BE(ip[4]) ^ (x4 + kp[0]));
864 + op[5] = REVW_BE(REVW_BE(ip[5]) ^ (x5 + kp[1]));
865 + op[6] = REVW_BE(REVW_BE(ip[6]) ^ (x6 + kp[2]));
866 + op[7] = REVW_BE(REVW_BE(ip[7]) ^ (x7 + kp[3]));
867 + op[8] = REVW_BE(REVW_BE(ip[8]) ^ (x8 + kp[4]));
868 + op[9] = REVW_BE(REVW_BE(ip[9]) ^ (x9 + kp[5]));
869 + op[10] = REVW_BE(REVW_BE(ip[10]) ^ (x10 + kp[6]));
870 + op[11] = REVW_BE(REVW_BE(ip[11]) ^ (x11 + kp[7]));
871 + op[12] = REVW_BE(REVW_BE(ip[12]) ^ (x12 + BPI*iters+(BPI-1)));
872 + op[13] = REVW_BE(REVW_BE(ip[13]) ^ (x13));
873 + op[14] = REVW_BE(REVW_BE(ip[14]) ^ (x14 + np[0]));
874 + op[15] = REVW_BE(REVW_BE(ip[15]) ^ (x15 + np[1]));
875 + s3 += ONE;
876 + ip += 16;
877 + op += 16;
878 +#endif
879 + }
880 +
881 + for (iters = inlen%(BPI*64)/64; iters != 0; iters--)
882 + {
883 + vec v0 = s0, v1 = s1, v2 = s2, v3 = s3;
884 + for (i = CHACHA_RNDS/2; i; i--)
885 + {
886 + DQROUND_VECTORS(v0,v1,v2,v3);
887 + }
888 + WRITE_XOR(ip, op, 0, v0+s0, v1+s1, v2+s2, v3+s3)
889 + s3 += ONE;
890 + ip += 16;
891 + op += 16;
892 + }
893 +
894 + inlen = inlen % 64;
895 + if (inlen)
896 + {
897 + __attribute__ ((aligned (16))) vec buf[4];
898 + vec v0,v1,v2,v3;
899 + v0 = s0; v1 = s1; v2 = s2; v3 = s3;
900 + for (i = CHACHA_RNDS/2; i; i--)
901 + {
902 + DQROUND_VECTORS(v0,v1,v2,v3);
903 + }
904 +
905 + if (inlen >= 16)
906 + {
907 + STORE(op + 0, LOAD(ip + 0) ^ REVV_BE(v0 + s0));
908 + if (inlen >= 32)
909 + {
910 + STORE(op + 4, LOAD(ip + 4) ^ REVV_BE(v1 + s1));
911 + if (inlen >= 48)
912 + {
913 + STORE(op + 8, LOAD(ip + 8) ^
914 + REVV_BE(v2 + s2));
915 + buf[3] = REVV_BE(v3 + s3);
916 + }
917 + else
918 + buf[2] = REVV_BE(v2 + s2);
919 + }
920 + else
921 + buf[1] = REVV_BE(v1 + s1);
922 + }
923 + else
924 + buf[0] = REVV_BE(v0 + s0);
925 +
926 + for (i=inlen & ~15; i<inlen; i++)
927 + ((char *)op)[i] = ((char *)ip)[i] ^ ((char *)buf)[i];
928 + }
929 + }
930 +
931 +#endif /* !OPENSSL_NO_CHACHA */
932 diff --git a/crypto/chacha/chachatest.c b/crypto/chacha/chachatest.c
933 new file mode 100644
934 index 0000000..b2a9389
935 --- /dev/null
936 +++ b/crypto/chacha/chachatest.c
937 @@ -0,0 +1,211 @@
938 +/*
939 + * Chacha stream algorithm.
940 + *
941 + * Created on: Jun, 2013
942 + * Author: Elie Bursztein (elieb@google.com)
943 + *
944 + * Adapted from the estream code by D. Bernstein.
945 + */
946 +/* ====================================================================
947 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved.
948 + *
949 + * Redistribution and use in source and binary forms, with or without
950 + * modification, are permitted provided that the following conditions
951 + * are met:
952 + *
953 + * 1. Redistributions of source code must retain the above copyright
954 + * notice, this list of conditions and the following disclaimer.
955 + *
956 + * 2. Redistributions in binary form must reproduce the above copyright
957 + * notice, this list of conditions and the following disclaimer in
958 + * the documentation and/or other materials provided with the
959 + * distribution.
960 + *
961 + * 3. All advertising materials mentioning features or use of this
962 + * software must display the following acknowledgment:
963 + * "This product includes software developed by the OpenSSL Project
964 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
965 + *
966 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
967 + * endorse or promote products derived from this software without
968 + * prior written permission. For written permission, please contact
969 + * licensing@OpenSSL.org.
970 + *
971 + * 5. Products derived from this software may not be called "OpenSSL"
972 + * nor may "OpenSSL" appear in their names without prior written
973 + * permission of the OpenSSL Project.
974 + *
975 + * 6. Redistributions of any form whatsoever must retain the following
976 + * acknowledgment:
977 + * "This product includes software developed by the OpenSSL Project
978 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
979 + *
980 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
981 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
982 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
983 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
984 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
985 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
986 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
987 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
988 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
989 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
990 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
991 + * OF THE POSSIBILITY OF SUCH DAMAGE.
992 + * ====================================================================
993 + */
994 +
995 +#include <stdio.h>
996 +#include <stdlib.h>
997 +#include <string.h>
998 +#include <stdint.h>
999 +
1000 +#include <openssl/chacha.h>
1001 +
1002 +struct chacha_test {
1003 + const char *keyhex;
1004 + const char *noncehex;
1005 + const char *outhex;
1006 +};
1007 +
1008 +static const struct chacha_test chacha_tests[] = {
1009 + {
1010 + "000000000000000000000000000000000000000000000000000000000000000 0",
1011 + "0000000000000000",
1012 + "76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc 7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee6586",
1013 + },
1014 + {
1015 + "000000000000000000000000000000000000000000000000000000000000000 1",
1016 + "0000000000000000",
1017 + "4540f05a9f1fb296d7736e7b208e3c96eb4fe1834688d2604f450952ed432d4 1bbe2a0b6ea7566d2a5d1e7e20d42af2c53d792b1c43fea817e9ad275ae546963",
1018 + },
1019 + {
1020 + "000000000000000000000000000000000000000000000000000000000000000 0",
1021 + "0000000000000001",
1022 + "de9cba7bf3d69ef5e786dc63973f653a0b49e015adbff7134fcb7df13782103 1e85a050278a7084527214f73efc7fa5b5277062eb7a0433e445f41e31afab757",
1023 + },
1024 + {
1025 + "000000000000000000000000000000000000000000000000000000000000000 0",
1026 + "0100000000000000",
1027 + "ef3fdfd6c61578fbf5cf35bd3dd33b8009631634d21e42ac33960bd138e50d3 2111e4caf237ee53ca8ad6426194a88545ddc497a0b466e7d6bbdb0041b2f586b",
1028 + },
1029 + {
1030 + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1 f",
1031 + "0001020304050607",
1032 + "f798a189f195e66982105ffb640bb7757f579da31602fc93ec01ac56f85ac3c 134a4547b733b46413042c9440049176905d3be59ea1c53f15916155c2be8241a38008b9a26bc359 41e2444177c8ade6689de95264986d95889fb60e84629c9bd9a5acb1cc118be563eb9b3a4a472f82 e09a7e778492b562ef7130e88dfe031c79db9d4f7c7a899151b9a475032b63fc385245fe054e3dd5 a97a5f576fe064025d3ce042c566ab2c507b138db853e3d6959660996546cc9c4a6eafdc777c040d 70eaf46f76dad3979e5c5360c3317166a1c894c94a371876a94df7628fe4eaaf2ccb27d5aaae0ad7 ad0f9d4b6ad3b54098746d4524d38407a6deb",
1033 + },
1034 +};
1035 +
1036 +static unsigned char hex_digit(char h)
1037 + {
1038 + if (h >= '0' && h <= '9')
1039 + return h - '0';
1040 + else if (h >= 'a' && h <= 'f')
1041 + return h - 'a' + 10;
1042 + else if (h >= 'A' && h <= 'F')
1043 + return h - 'A' + 10;
1044 + else
1045 + abort();
1046 + }
1047 +
1048 +static void hex_decode(unsigned char *out, const char* hex)
1049 + {
1050 + size_t j = 0;
1051 +
1052 + while (*hex != 0)
1053 + {
1054 + unsigned char v = hex_digit(*hex++);
1055 + v <<= 4;
1056 + v |= hex_digit(*hex++);
1057 + out[j++] = v;
1058 + }
1059 + }
1060 +
1061 +static void hexdump(unsigned char *a, size_t len)
1062 + {
1063 + size_t i;
1064 +
1065 + for (i = 0; i < len; i++)
1066 + printf("%02x", a[i]);
1067 + }
1068 +
1069 +/* misalign returns a pointer that points 0 to 15 bytes into |in| such that the
1070 + * returned pointer has alignment 1 mod 16. */
1071 +static void* misalign(void* in)
1072 + {
1073 + intptr_t x = (intptr_t) in;
1074 + x += (17 - (x % 16)) % 16;
1075 + return (void*) x;
1076 + }
1077 +
1078 +int main()
1079 + {
1080 + static const unsigned num_tests =
1081 + sizeof(chacha_tests) / sizeof(struct chacha_test);
1082 + unsigned i;
1083 + unsigned char key_bytes[32 + 16];
1084 + unsigned char nonce_bytes[8 + 16] = {0};
1085 +
1086 + unsigned char *key = misalign(key_bytes);
1087 + unsigned char *nonce = misalign(nonce_bytes);
1088 +
1089 + for (i = 0; i < num_tests; i++)
1090 + {
1091 + const struct chacha_test *test = &chacha_tests[i];
1092 + unsigned char *expected, *out_bytes, *zero_bytes, *out, *zeros;
1093 + size_t len = strlen(test->outhex);
1094 +
1095 + if (strlen(test->keyhex) != 32*2 ||
1096 + strlen(test->noncehex) != 8*2 ||
1097 + (len & 1) == 1)
1098 + return 1;
1099 +
1100 + len /= 2;
1101 +
1102 + hex_decode(key, test->keyhex);
1103 + hex_decode(nonce, test->noncehex);
1104 +
1105 + expected = malloc(len);
1106 + out_bytes = malloc(len+16);
1107 + zero_bytes = malloc(len+16);
1108 + /* Attempt to test unaligned inputs. */
1109 + out = misalign(out_bytes);
1110 + zeros = misalign(zero_bytes);
1111 + memset(zeros, 0, len);
1112 +
1113 + hex_decode(expected, test->outhex);
1114 + CRYPTO_chacha_20(out, zeros, len, key, nonce, 0);
1115 +
1116 + if (memcmp(out, expected, len) != 0)
1117 + {
1118 + printf("ChaCha20 test #%d failed.\n", i);
1119 + printf("got: ");
1120 + hexdump(out, len);
1121 + printf("\nexpected: ");
1122 + hexdump(expected, len);
1123 + printf("\n");
1124 + return 1;
1125 + }
1126 +
1127 + /* The last test has a large output. We test whether the
1128 + * counter works as expected by skipping the first 64 bytes of
1129 + * it. */
1130 + if (i == num_tests - 1)
1131 + {
1132 + CRYPTO_chacha_20(out, zeros, len - 64, key, nonce, 1);
1133 + if (memcmp(out, expected + 64, len - 64) != 0)
1134 + {
1135 + printf("ChaCha20 skip test failed.\n");
1136 + return 1;
1137 + }
1138 + }
1139 +
1140 + free(expected);
1141 + free(zero_bytes);
1142 + free(out_bytes);
1143 + }
1144 +
1145 +
1146 + printf("PASS\n");
1147 + return 0;
1148 + }
1149 diff --git a/crypto/evp/Makefile b/crypto/evp/Makefile
1150 index b73038d..86b0504 100644
1151 --- a/crypto/evp/Makefile
1152 +++ b/crypto/evp/Makefile
1153 @@ -29,7 +29,8 @@ LIBSRC= encode.c digest.c evp_enc.c evp_key.c evp_acnf.c evp_c nf.c \
1154 c_all.c c_allc.c c_alld.c evp_lib.c bio_ok.c \
1155 evp_pkey.c evp_pbe.c p5_crpt.c p5_crpt2.c \
1156 e_old.c pmeth_lib.c pmeth_fn.c pmeth_gn.c m_sigver.c evp_fips.c \
1157 - e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c evp_aead.c
1158 + e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c evp_aead.c \
1159 + e_chacha20poly1305.c
1160
1161 LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \
1162 e_des.o e_bf.o e_idea.o e_des3.o e_camellia.o\
1163 @@ -42,7 +43,7 @@ LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \
1164 c_all.o c_allc.o c_alld.o evp_lib.o bio_ok.o \
1165 evp_pkey.o evp_pbe.o p5_crpt.o p5_crpt2.o \
1166 e_old.o pmeth_lib.o pmeth_fn.o pmeth_gn.o m_sigver.o evp_fips.o \
1167 - e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o evp_aead.o
1168 + e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o evp_aead.o e_chacha20poly1305.o
1169
1170 SRC= $(LIBSRC)
1171
1172 @@ -239,6 +240,21 @@ e_cast.o: ../../include/openssl/objects.h ../../include/ope nssl/opensslconf.h
1173 e_cast.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
1174 e_cast.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
1175 e_cast.o: ../../include/openssl/symhacks.h ../cryptlib.h e_cast.c evp_locl.h
1176 +e_chacha20poly1305.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h
1177 +e_chacha20poly1305.o: ../../include/openssl/chacha.h
1178 +e_chacha20poly1305.o: ../../include/openssl/crypto.h
1179 +e_chacha20poly1305.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
1180 +e_chacha20poly1305.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
1181 +e_chacha20poly1305.o: ../../include/openssl/obj_mac.h
1182 +e_chacha20poly1305.o: ../../include/openssl/objects.h
1183 +e_chacha20poly1305.o: ../../include/openssl/opensslconf.h
1184 +e_chacha20poly1305.o: ../../include/openssl/opensslv.h
1185 +e_chacha20poly1305.o: ../../include/openssl/ossl_typ.h
1186 +e_chacha20poly1305.o: ../../include/openssl/poly1305.h
1187 +e_chacha20poly1305.o: ../../include/openssl/safestack.h
1188 +e_chacha20poly1305.o: ../../include/openssl/stack.h
1189 +e_chacha20poly1305.o: ../../include/openssl/symhacks.h e_chacha20poly1305.c
1190 +e_chacha20poly1305.o: evp_locl.h
1191 e_des.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h
1192 e_des.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
1193 e_des.o: ../../include/openssl/des.h ../../include/openssl/des_old.h
1194 @@ -258,9 +274,10 @@ e_des3.o: ../../include/openssl/evp.h ../../include/openssl /lhash.h
1195 e_des3.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
1196 e_des3.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
1197 e_des3.o: ../../include/openssl/ossl_typ.h ../../include/openssl/rand.h
1198 -e_des3.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
1199 -e_des3.o: ../../include/openssl/symhacks.h ../../include/openssl/ui.h
1200 -e_des3.o: ../../include/openssl/ui_compat.h ../cryptlib.h e_des3.c evp_locl.h
1201 +e_des3.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
1202 +e_des3.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
1203 +e_des3.o: ../../include/openssl/ui.h ../../include/openssl/ui_compat.h
1204 +e_des3.o: ../cryptlib.h e_des3.c evp_locl.h
1205 e_idea.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h
1206 e_idea.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
1207 e_idea.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
1208 @@ -356,6 +373,14 @@ evp_acnf.o: ../../include/openssl/opensslconf.h
1209 evp_acnf.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
1210 evp_acnf.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
1211 evp_acnf.o: ../../include/openssl/symhacks.h ../cryptlib.h evp_acnf.c
1212 +evp_aead.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h
1213 +evp_aead.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
1214 +evp_aead.o: ../../include/openssl/err.h ../../include/openssl/evp.h
1215 +evp_aead.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
1216 +evp_aead.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
1217 +evp_aead.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
1218 +evp_aead.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
1219 +evp_aead.o: ../../include/openssl/symhacks.h evp_aead.c
1220 evp_cnf.o: ../../e_os.h ../../include/openssl/asn1.h
1221 evp_cnf.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
1222 evp_cnf.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h
1223 diff --git a/crypto/evp/e_chacha20poly1305.c b/crypto/evp/e_chacha20poly1305.c
1224 new file mode 100644
1225 index 0000000..1c0c0fb
1226 --- /dev/null
1227 +++ b/crypto/evp/e_chacha20poly1305.c
1228 @@ -0,0 +1,261 @@
1229 +/* ====================================================================
1230 + * Copyright (c) 2013 The OpenSSL Project. All rights reserved.
1231 + *
1232 + * Redistribution and use in source and binary forms, with or without
1233 + * modification, are permitted provided that the following conditions
1234 + * are met:
1235 + *
1236 + * 1. Redistributions of source code must retain the above copyright
1237 + * notice, this list of conditions and the following disclaimer.
1238 + *
1239 + * 2. Redistributions in binary form must reproduce the above copyright
1240 + * notice, this list of conditions and the following disclaimer in
1241 + * the documentation and/or other materials provided with the
1242 + * distribution.
1243 + *
1244 + * 3. All advertising materials mentioning features or use of this
1245 + * software must display the following acknowledgment:
1246 + * "This product includes software developed by the OpenSSL Project
1247 + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
1248 + *
1249 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
1250 + * endorse or promote products derived from this software without
1251 + * prior written permission. For written permission, please contact
1252 + * openssl-core@openssl.org.
1253 + *
1254 + * 5. Products derived from this software may not be called "OpenSSL"
1255 + * nor may "OpenSSL" appear in their names without prior written
1256 + * permission of the OpenSSL Project.
1257 + *
1258 + * 6. Redistributions of any form whatsoever must retain the following
1259 + * acknowledgment:
1260 + * "This product includes software developed by the OpenSSL Project
1261 + * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
1262 + *
1263 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
1264 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1265 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
1266 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
1267 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1268 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
1269 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
1270 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1271 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
1272 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
1273 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
1274 + * OF THE POSSIBILITY OF SUCH DAMAGE.
1275 + * ====================================================================
1276 + *
1277 + */
1278 +
1279 +#include <stdint.h>
1280 +#include <string.h>
1281 +#include <openssl/opensslconf.h>
1282 +
1283 +#if !defined(OPENSSL_NO_CHACHA) && !defined(OPENSSL_NO_POLY1305)
1284 +
1285 +#include <openssl/chacha.h>
1286 +#include <openssl/poly1305.h>
1287 +#include <openssl/evp.h>
1288 +#include <openssl/err.h>
1289 +#include "evp_locl.h"
1290 +
1291 +#define POLY1305_TAG_LEN 16
1292 +#define CHACHA20_NONCE_LEN 8
1293 +
1294 +struct aead_chacha20_poly1305_ctx
1295 + {
1296 + unsigned char key[32];
1297 + unsigned char tag_len;
1298 + };
1299 +
1300 +static int aead_chacha20_poly1305_init(EVP_AEAD_CTX *ctx, const unsigned char * key, size_t key_len, size_t tag_len)
1301 + {
1302 + struct aead_chacha20_poly1305_ctx *c20_ctx;
1303 +
1304 + if (tag_len == 0)
1305 + tag_len = POLY1305_TAG_LEN;
1306 +
1307 + if (tag_len > POLY1305_TAG_LEN)
1308 + {
1309 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_INIT, EVP_R_TOO_LARGE);
1310 + return 0;
1311 + }
1312 +
1313 + if (key_len != sizeof(c20_ctx->key))
1314 + return 0; /* internal error - EVP_AEAD_CTX_init should catch th is. */
1315 +
1316 + c20_ctx = OPENSSL_malloc(sizeof(struct aead_chacha20_poly1305_ctx));
1317 + if (c20_ctx == NULL)
1318 + return 0;
1319 +
1320 + memcpy(&c20_ctx->key[0], key, key_len);
1321 + c20_ctx->tag_len = tag_len;
1322 + ctx->aead_state = c20_ctx;
1323 +
1324 + return 1;
1325 + }
1326 +
1327 +static void aead_chacha20_poly1305_cleanup(EVP_AEAD_CTX *ctx)
1328 + {
1329 + struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state;
1330 + OPENSSL_cleanse(c20_ctx->key, sizeof(c20_ctx->key));
1331 + OPENSSL_free(c20_ctx);
1332 + }
1333 +
1334 +static void poly1305_update_with_length(poly1305_state *poly1305,
1335 + const unsigned char *data, size_t data_len)
1336 + {
1337 + size_t j = data_len;
1338 + unsigned char length_bytes[8];
1339 + unsigned i;
1340 +
1341 + for (i = 0; i < sizeof(length_bytes); i++)
1342 + {
1343 + length_bytes[i] = j;
1344 + j >>= 8;
1345 + }
1346 +
1347 + CRYPTO_poly1305_update(poly1305, data, data_len);
1348 + CRYPTO_poly1305_update(poly1305, length_bytes, sizeof(length_bytes));
1349 +}
1350 +
1351 +static ssize_t aead_chacha20_poly1305_seal(const EVP_AEAD_CTX *ctx,
1352 + unsigned char *out, size_t max_out_len,
1353 + const unsigned char *nonce, size_t nonce_len,
1354 + const unsigned char *in, size_t in_len,
1355 + const unsigned char *ad, size_t ad_len)
1356 + {
1357 + const struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state;
1358 + unsigned char poly1305_key[32];
1359 + poly1305_state poly1305;
1360 + const uint64_t in_len_64 = in_len;
1361 +
1362 + /* The underlying ChaCha implementation may not overflow the block
1363 + * counter into the second counter word. Therefore we disallow
1364 + * individual operations that work on more than 2TB at a time.
1365 + * |in_len_64| is needed because, on 32-bit platforms, size_t is only
1366 + * 32-bits and this produces a warning because it's always false.
1367 + * Casting to uint64_t inside the conditional is not sufficient to stop
1368 + * the warning. */
1369 + if (in_len_64 >= (1ull << 32)*64-64)
1370 + {
1371 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_TOO_LARGE);
1372 + return -1;
1373 + }
1374 +
1375 + if (max_out_len < in_len + c20_ctx->tag_len)
1376 + {
1377 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_BUFFER_TOO_SMALL );
1378 + return -1;
1379 + }
1380 +
1381 + if (nonce_len != CHACHA20_NONCE_LEN)
1382 + {
1383 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_IV_TOO_LARGE);
1384 + return -1;
1385 + }
1386 +
1387 + memset(poly1305_key, 0, sizeof(poly1305_key));
1388 + CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key), c20_c tx->key, nonce, 0);
1389 +
1390 + CRYPTO_poly1305_init(&poly1305, poly1305_key);
1391 + poly1305_update_with_length(&poly1305, ad, ad_len);
1392 + CRYPTO_chacha_20(out, in, in_len, c20_ctx->key, nonce, 1);
1393 + poly1305_update_with_length(&poly1305, out, in_len);
1394 +
1395 + if (c20_ctx->tag_len != POLY1305_TAG_LEN)
1396 + {
1397 + unsigned char tag[POLY1305_TAG_LEN];
1398 + CRYPTO_poly1305_finish(&poly1305, tag);
1399 + memcpy(out + in_len, tag, c20_ctx->tag_len);
1400 + return in_len + c20_ctx->tag_len;
1401 + }
1402 +
1403 + CRYPTO_poly1305_finish(&poly1305, out + in_len);
1404 + return in_len + POLY1305_TAG_LEN;
1405 + }
1406 +
1407 +static ssize_t aead_chacha20_poly1305_open(const EVP_AEAD_CTX *ctx,
1408 + unsigned char *out, size_t max_out_len,
1409 + const unsigned char *nonce, size_t nonce_len,
1410 + const unsigned char *in, size_t in_len,
1411 + const unsigned char *ad, size_t ad_len)
1412 + {
1413 + const struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state;
1414 + unsigned char mac[POLY1305_TAG_LEN];
1415 + unsigned char poly1305_key[32];
1416 + size_t out_len;
1417 + poly1305_state poly1305;
1418 + const uint64_t in_len_64 = in_len;
1419 +
1420 + if (in_len < c20_ctx->tag_len)
1421 + {
1422 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BAD_DECRYPT);
1423 + return -1;
1424 + }
1425 +
1426 + /* The underlying ChaCha implementation may not overflow the block
1427 + * counter into the second counter word. Therefore we disallow
1428 + * individual operations that work on more than 2TB at a time.
1429 + * |in_len_64| is needed because, on 32-bit platforms, size_t is only
1430 + * 32-bits and this produces a warning because it's always false.
1431 + * Casting to uint64_t inside the conditional is not sufficient to stop
1432 + * the warning. */
1433 + if (in_len_64 >= (1ull << 32)*64-64)
1434 + {
1435 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_TOO_LARGE);
1436 + return -1;
1437 + }
1438 +
1439 + if (nonce_len != CHACHA20_NONCE_LEN)
1440 + {
1441 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_IV_TOO_LARGE);
1442 + return -1;
1443 + }
1444 +
1445 + out_len = in_len - c20_ctx->tag_len;
1446 +
1447 + if (max_out_len < out_len)
1448 + {
1449 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BUFFER_TOO_SMALL );
1450 + return -1;
1451 + }
1452 +
1453 + memset(poly1305_key, 0, sizeof(poly1305_key));
1454 + CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key), c20_c tx->key, nonce, 0);
1455 +
1456 + CRYPTO_poly1305_init(&poly1305, poly1305_key);
1457 + poly1305_update_with_length(&poly1305, ad, ad_len);
1458 + poly1305_update_with_length(&poly1305, in, out_len);
1459 + CRYPTO_poly1305_finish(&poly1305, mac);
1460 +
1461 + if (CRYPTO_memcmp(mac, in + out_len, c20_ctx->tag_len) != 0)
1462 + {
1463 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BAD_DECRYPT);
1464 + return -1;
1465 + }
1466 +
1467 + CRYPTO_chacha_20(out, in, out_len, c20_ctx->key, nonce, 1);
1468 + return out_len;
1469 + }
1470 +
1471 +static const EVP_AEAD aead_chacha20_poly1305 =
1472 + {
1473 + 32, /* key len */
1474 + CHACHA20_NONCE_LEN, /* nonce len */
1475 + POLY1305_TAG_LEN, /* overhead */
1476 + POLY1305_TAG_LEN, /* max tag length */
1477 +
1478 + aead_chacha20_poly1305_init,
1479 + aead_chacha20_poly1305_cleanup,
1480 + aead_chacha20_poly1305_seal,
1481 + aead_chacha20_poly1305_open,
1482 + };
1483 +
1484 +const EVP_AEAD *EVP_aead_chacha20_poly1305()
1485 + {
1486 + return &aead_chacha20_poly1305;
1487 + }
1488 +
1489 +#endif /* !OPENSSL_NO_CHACHA && !OPENSSL_NO_POLY1305 */
1490 diff --git a/crypto/evp/evp.h b/crypto/evp/evp.h
1491 index bd10642..7dc1656 100644
1492 --- a/crypto/evp/evp.h
1493 +++ b/crypto/evp/evp.h
1494 @@ -1258,6 +1258,11 @@ typedef struct evp_aead_st EVP_AEAD;
1495 const EVP_AEAD *EVP_aead_aes_128_gcm(void);
1496 #endif
1497
1498 +#if !defined(OPENSSL_NO_CHACHA) && !defined(OPENSSL_NO_POLY1305)
1499 +/* EVP_aead_chacha20_poly1305 is ChaCha20 with a Poly1305 authenticator. */
1500 +const EVP_AEAD *EVP_aead_chacha20_poly1305(void);
1501 +#endif
1502 +
1503 /* EVP_AEAD_key_length returns the length, in bytes, of the keys used by
1504 * |aead|. */
1505 size_t EVP_AEAD_key_length(const EVP_AEAD *aead);
1506 @@ -1360,6 +1365,9 @@ void ERR_load_EVP_strings(void);
1507 #define EVP_F_AEAD_AES_128_GCM_INIT 183
1508 #define EVP_F_AEAD_AES_128_GCM_OPEN 181
1509 #define EVP_F_AEAD_AES_128_GCM_SEAL 182
1510 +#define EVP_F_AEAD_CHACHA20_POLY1305_INIT 187
1511 +#define EVP_F_AEAD_CHACHA20_POLY1305_OPEN 184
1512 +#define EVP_F_AEAD_CHACHA20_POLY1305_SEAL 183
1513 #define EVP_F_AEAD_CTX_OPEN 185
1514 #define EVP_F_AEAD_CTX_SEAL 186
1515 #define EVP_F_AESNI_INIT_KEY 165
1516 diff --git a/crypto/evp/evp_err.c b/crypto/evp/evp_err.c
1517 index c47969c..fb747e5 100644
1518 --- a/crypto/evp/evp_err.c
1519 +++ b/crypto/evp/evp_err.c
1520 @@ -73,6 +73,9 @@ static ERR_STRING_DATA EVP_str_functs[]=
1521 {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_INIT), "AEAD_AES_128_GCM_INIT"},
1522 {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_OPEN), "AEAD_AES_128_GCM_OPEN"},
1523 {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_SEAL), "AEAD_AES_128_GCM_SEAL"},
1524 +{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_INIT), "AEAD_CHACHA20_POLY1305_INIT"},
1525 +{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_OPEN), "AEAD_CHACHA20_POLY1305_OPEN"},
1526 +{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_SEAL), "AEAD_CHACHA20_POLY1305_SEAL"},
1527 {ERR_FUNC(EVP_F_AEAD_CTX_OPEN), "AEAD_CTX_OPEN"},
1528 {ERR_FUNC(EVP_F_AEAD_CTX_SEAL), "AEAD_CTX_SEAL"},
1529 {ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"},
1530 diff --git a/crypto/poly1305/Makefile b/crypto/poly1305/Makefile
1531 new file mode 100644
1532 index 0000000..397d7cd
1533 --- /dev/null
1534 +++ b/crypto/poly1305/Makefile
1535 @@ -0,0 +1,81 @@
1536 +#
1537 +# OpenSSL/crypto/poly1305/Makefile
1538 +#
1539 +
1540 +DIR= poly1305
1541 +TOP= ../..
1542 +CC= cc
1543 +CPP= $(CC) -E
1544 +INCLUDES=
1545 +CFLAG=-g
1546 +AR= ar r
1547 +
1548 +POLY1305=poly1305_vec.o
1549 +
1550 +CFLAGS= $(INCLUDES) $(CFLAG)
1551 +ASFLAGS= $(INCLUDES) $(ASFLAG)
1552 +AFLAGS= $(ASFLAGS)
1553 +
1554 +GENERAL=Makefile
1555 +TEST=
1556 +APPS=
1557 +
1558 +LIB=$(TOP)/libcrypto.a
1559 +LIBSRC=poly1305_vec.c
1560 +LIBOBJ=$(POLY1305)
1561 +
1562 +SRC= $(LIBSRC)
1563 +
1564 +EXHEADER=poly1305.h
1565 +HEADER= $(EXHEADER)
1566 +
1567 +ALL= $(GENERAL) $(SRC) $(HEADER)
1568 +
1569 +top:
1570 + (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all)
1571 +
1572 +all: lib
1573 +
1574 +lib: $(LIBOBJ)
1575 + $(AR) $(LIB) $(LIBOBJ)
1576 + $(RANLIB) $(LIB) || echo Never mind.
1577 + @touch lib
1578 +
1579 +files:
1580 + $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
1581 +
1582 +links:
1583 + @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER)
1584 + @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST)
1585 + @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS)
1586 +
1587 +install:
1588 + @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile...
1589 + @headerlist="$(EXHEADER)"; for i in $$headerlist ; \
1590 + do \
1591 + (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \
1592 + chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \
1593 + done;
1594 +
1595 +tags:
1596 + ctags $(SRC)
1597 +
1598 +tests:
1599 +
1600 +lint:
1601 + lint -DLINT $(INCLUDES) $(SRC)>fluff
1602 +
1603 +depend:
1604 + @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile...
1605 + $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC)
1606 +
1607 +dclean:
1608 + $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKE FILE) >Makefile.new
1609 + mv -f Makefile.new $(MAKEFILE)
1610 +
1611 +clean:
1612 + rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff
1613 +
1614 +# DO NOT DELETE THIS LINE -- make depend depends on it.
1615 +
1616 +poly1305_vec.o: ../../include/openssl/poly1305.h poly1305_vec.c
1617 diff --git a/crypto/poly1305/poly1305.c b/crypto/poly1305/poly1305.c
1618 new file mode 100644
1619 index 0000000..2e5621d
1620 --- /dev/null
1621 +++ b/crypto/poly1305/poly1305.c
1622 @@ -0,0 +1,320 @@
1623 +/* ====================================================================
1624 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved.
1625 + *
1626 + * Redistribution and use in source and binary forms, with or without
1627 + * modification, are permitted provided that the following conditions
1628 + * are met:
1629 + *
1630 + * 1. Redistributions of source code must retain the above copyright
1631 + * notice, this list of conditions and the following disclaimer.
1632 + *
1633 + * 2. Redistributions in binary form must reproduce the above copyright
1634 + * notice, this list of conditions and the following disclaimer in
1635 + * the documentation and/or other materials provided with the
1636 + * distribution.
1637 + *
1638 + * 3. All advertising materials mentioning features or use of this
1639 + * software must display the following acknowledgment:
1640 + * "This product includes software developed by the OpenSSL Project
1641 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
1642 + *
1643 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
1644 + * endorse or promote products derived from this software without
1645 + * prior written permission. For written permission, please contact
1646 + * licensing@OpenSSL.org.
1647 + *
1648 + * 5. Products derived from this software may not be called "OpenSSL"
1649 + * nor may "OpenSSL" appear in their names without prior written
1650 + * permission of the OpenSSL Project.
1651 + *
1652 + * 6. Redistributions of any form whatsoever must retain the following
1653 + * acknowledgment:
1654 + * "This product includes software developed by the OpenSSL Project
1655 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
1656 + *
1657 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
1658 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1659 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
1660 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
1661 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1662 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
1663 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
1664 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1665 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
1666 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
1667 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
1668 + * OF THE POSSIBILITY OF SUCH DAMAGE.
1669 + * ====================================================================
1670 + */
1671 +
1672 +/* This implementation of poly1305 is by Andrew Moon
1673 + * (https://github.com/floodyberry/poly1305-donna) and released as public
1674 + * domain. */
1675 +
1676 +#include <string.h>
1677 +#include <stdint.h>
1678 +#include <openssl/opensslconf.h>
1679 +
1680 +#if !defined(OPENSSL_NO_POLY1305)
1681 +
1682 +#include <openssl/poly1305.h>
1683 +
1684 +#if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_ 64__)
1685 +/* We can assume little-endian. */
1686 +static uint32_t U8TO32_LE(const unsigned char *m)
1687 + {
1688 + uint32_t r;
1689 + memcpy(&r, m, sizeof(r));
1690 + return r;
1691 + }
1692 +
1693 +static void U32TO8_LE(unsigned char *m, uint32_t v)
1694 + {
1695 + memcpy(m, &v, sizeof(v));
1696 + }
1697 +#else
1698 +static uint32_t U8TO32_LE(const unsigned char *m)
1699 + {
1700 + return (uint32_t)m[0] |
1701 + (uint32_t)m[1] << 8 |
1702 + (uint32_t)m[2] << 16 |
1703 + (uint32_t)m[3] << 24;
1704 + }
1705 +
1706 +static void U32TO8_LE(unsigned char *m, uint32_t v)
1707 + {
1708 + m[0] = v;
1709 + m[1] = v >> 8;
1710 + m[2] = v >> 16;
1711 + m[3] = v >> 24;
1712 + }
1713 +#endif
1714 +
1715 +static uint64_t
1716 +mul32x32_64(uint32_t a, uint32_t b)
1717 + {
1718 + return (uint64_t)a * b;
1719 + }
1720 +
1721 +
1722 +struct poly1305_state_st
1723 + {
1724 + uint32_t r0,r1,r2,r3,r4;
1725 + uint32_t s1,s2,s3,s4;
1726 + uint32_t h0,h1,h2,h3,h4;
1727 + unsigned char buf[16];
1728 + unsigned int buf_used;
1729 + unsigned char key[16];
1730 + };
1731 +
1732 +/* poly1305_blocks updates |state| given some amount of input data. This
1733 + * function may only be called with a |len| that is not a multiple of 16 at the
1734 + * end of the data. Otherwise the input must be buffered into 16 byte blocks.
1735 + * */
1736 +static void poly1305_update(struct poly1305_state_st *state,
1737 + const unsigned char *in, size_t len)
1738 + {
1739 + uint32_t t0,t1,t2,t3;
1740 + uint64_t t[5];
1741 + uint32_t b;
1742 + uint64_t c;
1743 + size_t j;
1744 + unsigned char mp[16];
1745 +
1746 + if (len < 16)
1747 + goto poly1305_donna_atmost15bytes;
1748 +
1749 +poly1305_donna_16bytes:
1750 + t0 = U8TO32_LE(in);
1751 + t1 = U8TO32_LE(in+4);
1752 + t2 = U8TO32_LE(in+8);
1753 + t3 = U8TO32_LE(in+12);
1754 +
1755 + in += 16;
1756 + len -= 16;
1757 +
1758 + state->h0 += t0 & 0x3ffffff;
1759 + state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff;
1760 + state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff;
1761 + state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff;
1762 + state->h4 += (t3 >> 8) | (1 << 24);
1763 +
1764 +poly1305_donna_mul:
1765 + t[0] = mul32x32_64(state->h0,state->r0) +
1766 + mul32x32_64(state->h1,state->s4) +
1767 + mul32x32_64(state->h2,state->s3) +
1768 + mul32x32_64(state->h3,state->s2) +
1769 + mul32x32_64(state->h4,state->s1);
1770 + t[1] = mul32x32_64(state->h0,state->r1) +
1771 + mul32x32_64(state->h1,state->r0) +
1772 + mul32x32_64(state->h2,state->s4) +
1773 + mul32x32_64(state->h3,state->s3) +
1774 + mul32x32_64(state->h4,state->s2);
1775 + t[2] = mul32x32_64(state->h0,state->r2) +
1776 + mul32x32_64(state->h1,state->r1) +
1777 + mul32x32_64(state->h2,state->r0) +
1778 + mul32x32_64(state->h3,state->s4) +
1779 + mul32x32_64(state->h4,state->s3);
1780 + t[3] = mul32x32_64(state->h0,state->r3) +
1781 + mul32x32_64(state->h1,state->r2) +
1782 + mul32x32_64(state->h2,state->r1) +
1783 + mul32x32_64(state->h3,state->r0) +
1784 + mul32x32_64(state->h4,state->s4);
1785 + t[4] = mul32x32_64(state->h0,state->r4) +
1786 + mul32x32_64(state->h1,state->r3) +
1787 + mul32x32_64(state->h2,state->r2) +
1788 + mul32x32_64(state->h3,state->r1) +
1789 + mul32x32_64(state->h4,state->r0);
1790 +
1791 + state->h0 = (uint32_t)t[0] & 0x3ffffff; c = (t[0] > > 26);
1792 + t[1] += c; state->h1 = (uint32_t)t[1] & 0x3ffffff; b = (uint32_t)(t[1] > > 26);
1793 + t[2] += b; state->h2 = (uint32_t)t[2] & 0x3ffffff; b = (uint32_t)(t[2] > > 26);
1794 + t[3] += b; state->h3 = (uint32_t)t[3] & 0x3ffffff; b = (uint32_t)(t[3] > > 26);
1795 + t[4] += b; state->h4 = (uint32_t)t[4] & 0x3ffffff; b = (uint32_t)(t[4] > > 26);
1796 + state->h0 += b * 5;
1797 +
1798 + if (len >= 16)
1799 + goto poly1305_donna_16bytes;
1800 +
1801 + /* final bytes */
1802 +poly1305_donna_atmost15bytes:
1803 + if (!len)
1804 + return;
1805 +
1806 + for (j = 0; j < len; j++)
1807 + mp[j] = in[j];
1808 + mp[j++] = 1;
1809 + for (; j < 16; j++)
1810 + mp[j] = 0;
1811 + len = 0;
1812 +
1813 + t0 = U8TO32_LE(mp+0);
1814 + t1 = U8TO32_LE(mp+4);
1815 + t2 = U8TO32_LE(mp+8);
1816 + t3 = U8TO32_LE(mp+12);
1817 +
1818 + state->h0 += t0 & 0x3ffffff;
1819 + state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff;
1820 + state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff;
1821 + state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff;
1822 + state->h4 += (t3 >> 8);
1823 +
1824 + goto poly1305_donna_mul;
1825 + }
1826 +
1827 +void CRYPTO_poly1305_init(poly1305_state *statep, const unsigned char key[32])
1828 + {
1829 + struct poly1305_state_st *state = (struct poly1305_state_st*) statep;
1830 + uint32_t t0,t1,t2,t3;
1831 +
1832 + t0 = U8TO32_LE(key+0);
1833 + t1 = U8TO32_LE(key+4);
1834 + t2 = U8TO32_LE(key+8);
1835 + t3 = U8TO32_LE(key+12);
1836 +
1837 + /* precompute multipliers */
1838 + state->r0 = t0 & 0x3ffffff; t0 >>= 26; t0 |= t1 << 6;
1839 + state->r1 = t0 & 0x3ffff03; t1 >>= 20; t1 |= t2 << 12;
1840 + state->r2 = t1 & 0x3ffc0ff; t2 >>= 14; t2 |= t3 << 18;
1841 + state->r3 = t2 & 0x3f03fff; t3 >>= 8;
1842 + state->r4 = t3 & 0x00fffff;
1843 +
1844 + state->s1 = state->r1 * 5;
1845 + state->s2 = state->r2 * 5;
1846 + state->s3 = state->r3 * 5;
1847 + state->s4 = state->r4 * 5;
1848 +
1849 + /* init state */
1850 + state->h0 = 0;
1851 + state->h1 = 0;
1852 + state->h2 = 0;
1853 + state->h3 = 0;
1854 + state->h4 = 0;
1855 +
1856 + state->buf_used = 0;
1857 + memcpy(state->key, key + 16, sizeof(state->key));
1858 + }
1859 +
1860 +void CRYPTO_poly1305_update(poly1305_state *statep, const unsigned char *in,
1861 + size_t in_len)
1862 + {
1863 + unsigned int i;
1864 + struct poly1305_state_st *state = (struct poly1305_state_st*) statep;
1865 +
1866 + if (state->buf_used)
1867 + {
1868 + unsigned int todo = 16 - state->buf_used;
1869 + if (todo > in_len)
1870 + todo = in_len;
1871 + for (i = 0; i < todo; i++)
1872 + state->buf[state->buf_used + i] = in[i];
1873 + state->buf_used += todo;
1874 + in_len -= todo;
1875 + in += todo;
1876 +
1877 + if (state->buf_used == 16)
1878 + {
1879 + poly1305_update(state, state->buf, 16);
1880 + state->buf_used = 0;
1881 + }
1882 + }
1883 +
1884 + if (in_len >= 16)
1885 + {
1886 + size_t todo = in_len & ~0xf;
1887 + poly1305_update(state, in, todo);
1888 + in += todo;
1889 + in_len &= 0xf;
1890 + }
1891 +
1892 + if (in_len)
1893 + {
1894 + for (i = 0; i < in_len; i++)
1895 + state->buf[i] = in[i];
1896 + state->buf_used = in_len;
1897 + }
1898 + }
1899 +
1900 +void CRYPTO_poly1305_finish(poly1305_state *statep, unsigned char mac[16])
1901 + {
1902 + struct poly1305_state_st *state = (struct poly1305_state_st*) statep;
1903 + uint64_t f0,f1,f2,f3;
1904 + uint32_t g0,g1,g2,g3,g4;
1905 + uint32_t b, nb;
1906 +
1907 + if (state->buf_used)
1908 + poly1305_update(state, state->buf, state->buf_used);
1909 +
1910 + b = state->h0 >> 26; state->h0 = state->h0 & 0x3ffff ff;
1911 + state->h1 += b; b = state->h1 >> 26; state->h1 = state->h1 & 0x3ffff ff;
1912 + state->h2 += b; b = state->h2 >> 26; state->h2 = state->h2 & 0x3ffff ff;
1913 + state->h3 += b; b = state->h3 >> 26; state->h3 = state->h3 & 0x3ffff ff;
1914 + state->h4 += b; b = state->h4 >> 26; state->h4 = state->h4 & 0x3ffff ff;
1915 + state->h0 += b * 5;
1916 +
1917 + g0 = state->h0 + 5; b = g0 >> 26; g0 &= 0x3ffffff;
1918 + g1 = state->h1 + b; b = g1 >> 26; g1 &= 0x3ffffff;
1919 + g2 = state->h2 + b; b = g2 >> 26; g2 &= 0x3ffffff;
1920 + g3 = state->h3 + b; b = g3 >> 26; g3 &= 0x3ffffff;
1921 + g4 = state->h4 + b - (1 << 26);
1922 +
1923 + b = (g4 >> 31) - 1;
1924 + nb = ~b;
1925 + state->h0 = (state->h0 & nb) | (g0 & b);
1926 + state->h1 = (state->h1 & nb) | (g1 & b);
1927 + state->h2 = (state->h2 & nb) | (g2 & b);
1928 + state->h3 = (state->h3 & nb) | (g3 & b);
1929 + state->h4 = (state->h4 & nb) | (g4 & b);
1930 +
1931 + f0 = ((state->h0 ) | (state->h1 << 26)) + (uint64_t)U8TO32_LE(&stat e->key[0]);
1932 + f1 = ((state->h1 >> 6) | (state->h2 << 20)) + (uint64_t)U8TO32_LE(&stat e->key[4]);
1933 + f2 = ((state->h2 >> 12) | (state->h3 << 14)) + (uint64_t)U8TO32_LE(&stat e->key[8]);
1934 + f3 = ((state->h3 >> 18) | (state->h4 << 8)) + (uint64_t)U8TO32_LE(&stat e->key[12]);
1935 +
1936 + U32TO8_LE(&mac[ 0], f0); f1 += (f0 >> 32);
1937 + U32TO8_LE(&mac[ 4], f1); f2 += (f1 >> 32);
1938 + U32TO8_LE(&mac[ 8], f2); f3 += (f2 >> 32);
1939 + U32TO8_LE(&mac[12], f3);
1940 + }
1941 +
1942 +#endif /* !OPENSSL_NO_POLY1305 */
1943 diff --git a/crypto/poly1305/poly1305.h b/crypto/poly1305/poly1305.h
1944 new file mode 100644
1945 index 0000000..28f85ed
1946 --- /dev/null
1947 +++ b/crypto/poly1305/poly1305.h
1948 @@ -0,0 +1,88 @@
1949 +/*
1950 + * Poly1305
1951 + *
1952 + * Created on: Jun, 2013
1953 + * Author: Elie Bursztein (elieb@google.com)
1954 + *
1955 + * Adapted from the estream code by D. Bernstein.
1956 + */
1957 +/* ====================================================================
1958 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved.
1959 + *
1960 + * Redistribution and use in source and binary forms, with or without
1961 + * modification, are permitted provided that the following conditions
1962 + * are met:
1963 + *
1964 + * 1. Redistributions of source code must retain the above copyright
1965 + * notice, this list of conditions and the following disclaimer.
1966 + *
1967 + * 2. Redistributions in binary form must reproduce the above copyright
1968 + * notice, this list of conditions and the following disclaimer in
1969 + * the documentation and/or other materials provided with the
1970 + * distribution.
1971 + *
1972 + * 3. All advertising materials mentioning features or use of this
1973 + * software must display the following acknowledgment:
1974 + * "This product includes software developed by the OpenSSL Project
1975 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
1976 + *
1977 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
1978 + * endorse or promote products derived from this software without
1979 + * prior written permission. For written permission, please contact
1980 + * licensing@OpenSSL.org.
1981 + *
1982 + * 5. Products derived from this software may not be called "OpenSSL"
1983 + * nor may "OpenSSL" appear in their names without prior written
1984 + * permission of the OpenSSL Project.
1985 + *
1986 + * 6. Redistributions of any form whatsoever must retain the following
1987 + * acknowledgment:
1988 + * "This product includes software developed by the OpenSSL Project
1989 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
1990 + *
1991 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
1992 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1993 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
1994 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
1995 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1996 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
1997 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
1998 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1999 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
2000 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2001 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
2002 + * OF THE POSSIBILITY OF SUCH DAMAGE.
2003 + * ====================================================================
2004 + */
2005 +
2006 +#ifndef HEADER_POLY1305_H_
2007 +#define HEADER_POLY1305_H_
2008 +
2009 +#include <stdint.h>
2010 +#include <openssl/opensslconf.h>
2011 +
2012 +#if defined(OPENSSL_NO_POLY1305)
2013 +#error Poly1305 support is disabled.
2014 +#endif
2015 +
2016 +typedef unsigned char poly1305_state[512];
2017 +
2018 +/* poly1305_init sets up |state| so that it can be used to calculate an
2019 + * authentication tag with the one-time key |key|. Note that |key| is a
2020 + * one-time key and therefore there is no `reset' method because that would
2021 + * enable several messages to be authenticated with the same key. */
2022 +extern void CRYPTO_poly1305_init(poly1305_state* state,
2023 + const unsigned char key[32]);
2024 +
2025 +/* poly1305_update processes |in_len| bytes from |in|. It can be called zero or
2026 + * more times after poly1305_init. */
2027 +extern void CRYPTO_poly1305_update(poly1305_state* state,
2028 + const unsigned char *in,
2029 + size_t in_len);
2030 +
2031 +/* poly1305_finish completes the poly1305 calculation and writes a 16 byte
2032 + * authentication tag to |mac|. */
2033 +extern void CRYPTO_poly1305_finish(poly1305_state* state,
2034 + unsigned char mac[16]);
2035 +
2036 +#endif /* HEADER_POLY1305_H_ */
2037 diff --git a/crypto/poly1305/poly1305_arm.c b/crypto/poly1305/poly1305_arm.c
2038 new file mode 100644
2039 index 0000000..adcef35
2040 --- /dev/null
2041 +++ b/crypto/poly1305/poly1305_arm.c
2042 @@ -0,0 +1,335 @@
2043 +/* ====================================================================
2044 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved.
2045 + *
2046 + * Redistribution and use in source and binary forms, with or without
2047 + * modification, are permitted provided that the following conditions
2048 + * are met:
2049 + *
2050 + * 1. Redistributions of source code must retain the above copyright
2051 + * notice, this list of conditions and the following disclaimer.
2052 + *
2053 + * 2. Redistributions in binary form must reproduce the above copyright
2054 + * notice, this list of conditions and the following disclaimer in
2055 + * the documentation and/or other materials provided with the
2056 + * distribution.
2057 + *
2058 + * 3. All advertising materials mentioning features or use of this
2059 + * software must display the following acknowledgment:
2060 + * "This product includes software developed by the OpenSSL Project
2061 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
2062 + *
2063 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
2064 + * endorse or promote products derived from this software without
2065 + * prior written permission. For written permission, please contact
2066 + * licensing@OpenSSL.org.
2067 + *
2068 + * 5. Products derived from this software may not be called "OpenSSL"
2069 + * nor may "OpenSSL" appear in their names without prior written
2070 + * permission of the OpenSSL Project.
2071 + *
2072 + * 6. Redistributions of any form whatsoever must retain the following
2073 + * acknowledgment:
2074 + * "This product includes software developed by the OpenSSL Project
2075 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
2076 + *
2077 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
2078 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2079 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
2080 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
2081 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2082 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
2083 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
2084 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2085 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
2086 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2087 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
2088 + * OF THE POSSIBILITY OF SUCH DAMAGE.
2089 + * ====================================================================
2090 + */
2091 +
2092 +/* This implementation was taken from the public domain, neon2 version in
2093 + * SUPERCOP by D. J. Bernstein and Peter Schwabe. */
2094 +
2095 +#include <stdint.h>
2096 +
2097 +#include <openssl/poly1305.h>
2098 +
2099 +#if !defined(OPENSSL_NO_POLY1305)
2100 +
2101 +typedef struct {
2102 + uint32_t v[12]; /* for alignment; only using 10 */
2103 +} fe1305x2;
2104 +
2105 +#define addmulmod openssl_poly1305_neon2_addmulmod
2106 +#define blocks openssl_poly1305_neon2_blocks
2107 +
2108 +extern void addmulmod(fe1305x2 *r, const fe1305x2 *x, const fe1305x2 *y, const fe1305x2 *c);
2109 +
2110 +extern int blocks(fe1305x2 *h, const fe1305x2 *precomp, const unsigned char *in , unsigned int inlen);
2111 +
2112 +static void freeze(fe1305x2 *r)
2113 + {
2114 + int i;
2115 +
2116 + uint32_t x0 = r->v[0];
2117 + uint32_t x1 = r->v[2];
2118 + uint32_t x2 = r->v[4];
2119 + uint32_t x3 = r->v[6];
2120 + uint32_t x4 = r->v[8];
2121 + uint32_t y0;
2122 + uint32_t y1;
2123 + uint32_t y2;
2124 + uint32_t y3;
2125 + uint32_t y4;
2126 + uint32_t swap;
2127 +
2128 + for (i = 0;i < 3;++i)
2129 + {
2130 + x1 += x0 >> 26; x0 &= 0x3ffffff;
2131 + x2 += x1 >> 26; x1 &= 0x3ffffff;
2132 + x3 += x2 >> 26; x2 &= 0x3ffffff;
2133 + x4 += x3 >> 26; x3 &= 0x3ffffff;
2134 + x0 += 5*(x4 >> 26); x4 &= 0x3ffffff;
2135 + }
2136 +
2137 + y0 = x0 + 5;
2138 + y1 = x1 + (y0 >> 26); y0 &= 0x3ffffff;
2139 + y2 = x2 + (y1 >> 26); y1 &= 0x3ffffff;
2140 + y3 = x3 + (y2 >> 26); y2 &= 0x3ffffff;
2141 + y4 = x4 + (y3 >> 26); y3 &= 0x3ffffff;
2142 + swap = -(y4 >> 26); y4 &= 0x3ffffff;
2143 +
2144 + y0 ^= x0;
2145 + y1 ^= x1;
2146 + y2 ^= x2;
2147 + y3 ^= x3;
2148 + y4 ^= x4;
2149 +
2150 + y0 &= swap;
2151 + y1 &= swap;
2152 + y2 &= swap;
2153 + y3 &= swap;
2154 + y4 &= swap;
2155 +
2156 + y0 ^= x0;
2157 + y1 ^= x1;
2158 + y2 ^= x2;
2159 + y3 ^= x3;
2160 + y4 ^= x4;
2161 +
2162 + r->v[0] = y0;
2163 + r->v[2] = y1;
2164 + r->v[4] = y2;
2165 + r->v[6] = y3;
2166 + r->v[8] = y4;
2167 + }
2168 +
2169 +static void fe1305x2_tobytearray(unsigned char *r, fe1305x2 *x)
2170 + {
2171 + uint32_t x0 = x->v[0];
2172 + uint32_t x1 = x->v[2];
2173 + uint32_t x2 = x->v[4];
2174 + uint32_t x3 = x->v[6];
2175 + uint32_t x4 = x->v[8];
2176 +
2177 + x1 += x0 >> 26;
2178 + x0 &= 0x3ffffff;
2179 + x2 += x1 >> 26;
2180 + x1 &= 0x3ffffff;
2181 + x3 += x2 >> 26;
2182 + x2 &= 0x3ffffff;
2183 + x4 += x3 >> 26;
2184 + x3 &= 0x3ffffff;
2185 +
2186 + *(uint32_t *) r = x0 + (x1 << 26);
2187 + *(uint32_t *) (r + 4) = (x1 >> 6) + (x2 << 20);
2188 + *(uint32_t *) (r + 8) = (x2 >> 12) + (x3 << 14);
2189 + *(uint32_t *) (r + 12) = (x3 >> 18) + (x4 << 8);
2190 + }
2191 +
2192 +/* load32 exists to avoid breaking strict aliasing rules in
2193 + * fe1305x2_frombytearray. */
2194 +static uint32_t load32(unsigned char *t)
2195 + {
2196 + uint32_t tmp;
2197 + memcpy(&tmp, t, sizeof(tmp));
2198 + return tmp;
2199 + }
2200 +
2201 +static void fe1305x2_frombytearray(fe1305x2 *r, const unsigned char *x, unsigne d long long xlen)
2202 + {
2203 + int i;
2204 + unsigned char t[17];
2205 +
2206 + for (i = 0; (i < 16) && (i < xlen); i++)
2207 + t[i] = x[i];
2208 + xlen -= i;
2209 + x += i;
2210 + t[i++] = 1;
2211 + for (; i<17; i++)
2212 + t[i] = 0;
2213 +
2214 + r->v[0] = 0x3ffffff & load32(t);
2215 + r->v[2] = 0x3ffffff & (load32(t + 3) >> 2);
2216 + r->v[4] = 0x3ffffff & (load32(t + 6) >> 4);
2217 + r->v[6] = 0x3ffffff & (load32(t + 9) >> 6);
2218 + r->v[8] = load32(t + 13);
2219 +
2220 + if (xlen)
2221 + {
2222 + for (i = 0; (i < 16) && (i < xlen); i++)
2223 + t[i] = x[i];
2224 + t[i++] = 1;
2225 + for (; i<17; i++)
2226 + t[i] = 0;
2227 +
2228 + r->v[1] = 0x3ffffff & load32(t);
2229 + r->v[3] = 0x3ffffff & (load32(t + 3) >> 2);
2230 + r->v[5] = 0x3ffffff & (load32(t + 6) >> 4);
2231 + r->v[7] = 0x3ffffff & (load32(t + 9) >> 6);
2232 + r->v[9] = load32(t + 13);
2233 + }
2234 + else
2235 + r->v[1] = r->v[3] = r->v[5] = r->v[7] = r->v[9] = 0;
2236 + }
2237 +
2238 +static const fe1305x2 zero __attribute__ ((aligned (16)));
2239 +
2240 +struct poly1305_state_st {
2241 + unsigned char data[sizeof(fe1305x2[5]) + 128];
2242 + unsigned char buf[32];
2243 + unsigned int buf_used;
2244 + unsigned char key[16];
2245 +};
2246 +
2247 +void CRYPTO_poly1305_init(poly1305_state *state, const unsigned char key[32])
2248 + {
2249 + struct poly1305_state_st *st = (struct poly1305_state_st*) (state);
2250 + fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data)));
2251 + fe1305x2 *const h = r + 1;
2252 + fe1305x2 *const c = h + 1;
2253 + fe1305x2 *const precomp = c + 1;
2254 + unsigned int j;
2255 +
2256 + r->v[1] = r->v[0] = 0x3ffffff & *(uint32_t *) key;
2257 + r->v[3] = r->v[2] = 0x3ffff03 & ((*(uint32_t *) (key + 3)) >> 2);
2258 + r->v[5] = r->v[4] = 0x3ffc0ff & ((*(uint32_t *) (key + 6)) >> 4);
2259 + r->v[7] = r->v[6] = 0x3f03fff & ((*(uint32_t *) (key + 9)) >> 6);
2260 + r->v[9] = r->v[8] = 0x00fffff & ((*(uint32_t *) (key + 12)) >> 8);
2261 +
2262 + for (j = 0; j < 10; j++)
2263 + h->v[j] = 0; /* XXX: should fast-forward a bit */
2264 +
2265 + addmulmod(precomp,r,r,&zero); /* precompute r^2 */
2266 + addmulmod(precomp + 1,precomp,precomp,&zero); /* precompute r^4 */
2267 +
2268 + memcpy(st->key, key + 16, 16);
2269 + st->buf_used = 0;
2270 + }
2271 +
2272 +void CRYPTO_poly1305_update(poly1305_state *state, const unsigned char *in, siz e_t in_len)
2273 + {
2274 + struct poly1305_state_st *st = (struct poly1305_state_st*) (state);
2275 + fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data)));
2276 + fe1305x2 *const h = r + 1;
2277 + fe1305x2 *const c = h + 1;
2278 + fe1305x2 *const precomp = c + 1;
2279 + unsigned int i;
2280 + unsigned char data[sizeof(fe1305x2) + 16];
2281 + fe1305x2 *const r2r = (fe1305x2 *) (data + (15 & (-(int) data)));
2282 +
2283 + if (st->buf_used)
2284 + {
2285 + unsigned int todo = 32 - st->buf_used;
2286 + if (todo > in_len)
2287 + todo = in_len;
2288 + for (i = 0; i < todo; i++)
2289 + st->buf[st->buf_used + i] = in[i];
2290 + st->buf_used += todo;
2291 + in_len -= todo;
2292 + in += todo;
2293 +
2294 + if (st->buf_used == sizeof(st->buf))
2295 + {
2296 + fe1305x2_frombytearray(c, st->buf, sizeof(st->buf));
2297 + r2r->v[0] = precomp->v[0];
2298 + r2r->v[2] = precomp->v[2];
2299 + r2r->v[4] = precomp->v[4];
2300 + r2r->v[6] = precomp->v[6];
2301 + r2r->v[8] = precomp->v[8];
2302 + r2r->v[1] = r->v[1];
2303 + r2r->v[3] = r->v[3];
2304 + r2r->v[5] = r->v[5];
2305 + r2r->v[7] = r->v[7];
2306 + r2r->v[9] = r->v[9];
2307 + addmulmod(h,h,r2r,c);
2308 + st->buf_used = 0;
2309 + }
2310 + }
2311 +
2312 + while (in_len > 32)
2313 + {
2314 + unsigned int tlen = 1048576;
2315 + if (in_len < 1048576)
2316 + tlen = in_len;
2317 + tlen -= blocks(h, precomp, in, tlen);
2318 + in_len -= tlen;
2319 + in += tlen;
2320 + }
2321 +
2322 + if (in_len)
2323 + {
2324 + for (i = 0; i < in_len; i++)
2325 + st->buf[i] = in[i];
2326 + st->buf_used = in_len;
2327 + }
2328 + }
2329 +
2330 +void CRYPTO_poly1305_finish(poly1305_state* state, unsigned char mac[16])
2331 + {
2332 + struct poly1305_state_st *st = (struct poly1305_state_st*) (state);
2333 + fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data)));
2334 + fe1305x2 *const h = r + 1;
2335 + fe1305x2 *const c = h + 1;
2336 + fe1305x2 *const precomp = c + 1;
2337 +
2338 + if (st->buf_used > 16)
2339 + {
2340 + fe1305x2_frombytearray(c, st->buf, st->buf_used);
2341 + precomp->v[1] = r->v[1];
2342 + precomp->v[3] = r->v[3];
2343 + precomp->v[5] = r->v[5];
2344 + precomp->v[7] = r->v[7];
2345 + precomp->v[9] = r->v[9];
2346 + addmulmod(h,h,precomp,c);
2347 + }
2348 + else if (st->buf_used > 0)
2349 + {
2350 + fe1305x2_frombytearray(c, st->buf, st->buf_used);
2351 + r->v[1] = 1;
2352 + r->v[3] = 0;
2353 + r->v[5] = 0;
2354 + r->v[7] = 0;
2355 + r->v[9] = 0;
2356 + addmulmod(h,h,r,c);
2357 + }
2358 +
2359 + h->v[0] += h->v[1];
2360 + h->v[2] += h->v[3];
2361 + h->v[4] += h->v[5];
2362 + h->v[6] += h->v[7];
2363 + h->v[8] += h->v[9];
2364 + freeze(h);
2365 +
2366 + fe1305x2_frombytearray(c, st->key, 16);
2367 + c->v[8] ^= (1 << 24);
2368 +
2369 + h->v[0] += c->v[0];
2370 + h->v[2] += c->v[2];
2371 + h->v[4] += c->v[4];
2372 + h->v[6] += c->v[6];
2373 + h->v[8] += c->v[8];
2374 + fe1305x2_tobytearray(mac, h);
2375 + }
2376 +
2377 +#endif /* !OPENSSL_NO_POLY1305 */
2378 diff --git a/crypto/poly1305/poly1305_arm_asm.s b/crypto/poly1305/poly1305_arm_a sm.s
2379 new file mode 100644
2380 index 0000000..449d16f
2381 --- /dev/null
2382 +++ b/crypto/poly1305/poly1305_arm_asm.s
2383 @@ -0,0 +1,2009 @@
2384 +# This implementation was taken from the public domain, neon2 version in
2385 +# SUPERCOP by D. J. Bernstein and Peter Schwabe.
2386 +
2387 +# qhasm: int32 input_0
2388 +
2389 +# qhasm: int32 input_1
2390 +
2391 +# qhasm: int32 input_2
2392 +
2393 +# qhasm: int32 input_3
2394 +
2395 +# qhasm: stack32 input_4
2396 +
2397 +# qhasm: stack32 input_5
2398 +
2399 +# qhasm: stack32 input_6
2400 +
2401 +# qhasm: stack32 input_7
2402 +
2403 +# qhasm: int32 caller_r4
2404 +
2405 +# qhasm: int32 caller_r5
2406 +
2407 +# qhasm: int32 caller_r6
2408 +
2409 +# qhasm: int32 caller_r7
2410 +
2411 +# qhasm: int32 caller_r8
2412 +
2413 +# qhasm: int32 caller_r9
2414 +
2415 +# qhasm: int32 caller_r10
2416 +
2417 +# qhasm: int32 caller_r11
2418 +
2419 +# qhasm: int32 caller_r12
2420 +
2421 +# qhasm: int32 caller_r14
2422 +
2423 +# qhasm: reg128 caller_q4
2424 +
2425 +# qhasm: reg128 caller_q5
2426 +
2427 +# qhasm: reg128 caller_q6
2428 +
2429 +# qhasm: reg128 caller_q7
2430 +
2431 +# qhasm: startcode
2432 +.fpu neon
2433 +.text
2434 +
2435 +# qhasm: reg128 r0
2436 +
2437 +# qhasm: reg128 r1
2438 +
2439 +# qhasm: reg128 r2
2440 +
2441 +# qhasm: reg128 r3
2442 +
2443 +# qhasm: reg128 r4
2444 +
2445 +# qhasm: reg128 x01
2446 +
2447 +# qhasm: reg128 x23
2448 +
2449 +# qhasm: reg128 x4
2450 +
2451 +# qhasm: reg128 y0
2452 +
2453 +# qhasm: reg128 y12
2454 +
2455 +# qhasm: reg128 y34
2456 +
2457 +# qhasm: reg128 5y12
2458 +
2459 +# qhasm: reg128 5y34
2460 +
2461 +# qhasm: stack128 y0_stack
2462 +
2463 +# qhasm: stack128 y12_stack
2464 +
2465 +# qhasm: stack128 y34_stack
2466 +
2467 +# qhasm: stack128 5y12_stack
2468 +
2469 +# qhasm: stack128 5y34_stack
2470 +
2471 +# qhasm: reg128 z0
2472 +
2473 +# qhasm: reg128 z12
2474 +
2475 +# qhasm: reg128 z34
2476 +
2477 +# qhasm: reg128 5z12
2478 +
2479 +# qhasm: reg128 5z34
2480 +
2481 +# qhasm: stack128 z0_stack
2482 +
2483 +# qhasm: stack128 z12_stack
2484 +
2485 +# qhasm: stack128 z34_stack
2486 +
2487 +# qhasm: stack128 5z12_stack
2488 +
2489 +# qhasm: stack128 5z34_stack
2490 +
2491 +# qhasm: stack128 two24
2492 +
2493 +# qhasm: int32 ptr
2494 +
2495 +# qhasm: reg128 c01
2496 +
2497 +# qhasm: reg128 c23
2498 +
2499 +# qhasm: reg128 d01
2500 +
2501 +# qhasm: reg128 d23
2502 +
2503 +# qhasm: reg128 t0
2504 +
2505 +# qhasm: reg128 t1
2506 +
2507 +# qhasm: reg128 t2
2508 +
2509 +# qhasm: reg128 t3
2510 +
2511 +# qhasm: reg128 t4
2512 +
2513 +# qhasm: reg128 mask
2514 +
2515 +# qhasm: reg128 u0
2516 +
2517 +# qhasm: reg128 u1
2518 +
2519 +# qhasm: reg128 u2
2520 +
2521 +# qhasm: reg128 u3
2522 +
2523 +# qhasm: reg128 u4
2524 +
2525 +# qhasm: reg128 v01
2526 +
2527 +# qhasm: reg128 mid
2528 +
2529 +# qhasm: reg128 v23
2530 +
2531 +# qhasm: reg128 v4
2532 +
2533 +# qhasm: int32 len
2534 +
2535 +# qhasm: qpushenter crypto_onetimeauth_poly1305_neon2_blocks
2536 +.align 4
2537 +.global openssl_poly1305_neon2_blocks
2538 +.type openssl_poly1305_neon2_blocks STT_FUNC
2539 +openssl_poly1305_neon2_blocks:
2540 +vpush {q4,q5,q6,q7}
2541 +mov r12,sp
2542 +sub sp,sp,#192
2543 +and sp,sp,#0xffffffe0
2544 +
2545 +# qhasm: len = input_3
2546 +# asm 1: mov >len=int32#4,<input_3=int32#4
2547 +# asm 2: mov >len=r3,<input_3=r3
2548 +mov r3,r3
2549 +
2550 +# qhasm: new y0
2551 +
2552 +# qhasm: y0 = mem64[input_1]y0[1]; input_1 += 8
2553 +# asm 1: vld1.8 {<y0=reg128#1%bot},[<input_1=int32#2]!
2554 +# asm 2: vld1.8 {<y0=d0},[<input_1=r1]!
2555 +vld1.8 {d0},[r1]!
2556 +
2557 +# qhasm: y12 = mem128[input_1]; input_1 += 16
2558 +# asm 1: vld1.8 {>y12=reg128#2%bot->y12=reg128#2%top},[<input_1=int32#2]!
2559 +# asm 2: vld1.8 {>y12=d2->y12=d3},[<input_1=r1]!
2560 +vld1.8 {d2-d3},[r1]!
2561 +
2562 +# qhasm: y34 = mem128[input_1]; input_1 += 16
2563 +# asm 1: vld1.8 {>y34=reg128#3%bot->y34=reg128#3%top},[<input_1=int32#2]!
2564 +# asm 2: vld1.8 {>y34=d4->y34=d5},[<input_1=r1]!
2565 +vld1.8 {d4-d5},[r1]!
2566 +
2567 +# qhasm: input_1 += 8
2568 +# asm 1: add >input_1=int32#2,<input_1=int32#2,#8
2569 +# asm 2: add >input_1=r1,<input_1=r1,#8
2570 +add r1,r1,#8
2571 +
2572 +# qhasm: new z0
2573 +
2574 +# qhasm: z0 = mem64[input_1]z0[1]; input_1 += 8
2575 +# asm 1: vld1.8 {<z0=reg128#4%bot},[<input_1=int32#2]!
2576 +# asm 2: vld1.8 {<z0=d6},[<input_1=r1]!
2577 +vld1.8 {d6},[r1]!
2578 +
2579 +# qhasm: z12 = mem128[input_1]; input_1 += 16
2580 +# asm 1: vld1.8 {>z12=reg128#5%bot->z12=reg128#5%top},[<input_1=int32#2]!
2581 +# asm 2: vld1.8 {>z12=d8->z12=d9},[<input_1=r1]!
2582 +vld1.8 {d8-d9},[r1]!
2583 +
2584 +# qhasm: z34 = mem128[input_1]; input_1 += 16
2585 +# asm 1: vld1.8 {>z34=reg128#6%bot->z34=reg128#6%top},[<input_1=int32#2]!
2586 +# asm 2: vld1.8 {>z34=d10->z34=d11},[<input_1=r1]!
2587 +vld1.8 {d10-d11},[r1]!
2588 +
2589 +# qhasm: 2x mask = 0xffffffff
2590 +# asm 1: vmov.i64 >mask=reg128#7,#0xffffffff
2591 +# asm 2: vmov.i64 >mask=q6,#0xffffffff
2592 +vmov.i64 q6,#0xffffffff
2593 +
2594 +# qhasm: 2x u4 = 0xff
2595 +# asm 1: vmov.i64 >u4=reg128#8,#0xff
2596 +# asm 2: vmov.i64 >u4=q7,#0xff
2597 +vmov.i64 q7,#0xff
2598 +
2599 +# qhasm: x01 aligned= mem128[input_0];input_0+=16
2600 +# asm 1: vld1.8 {>x01=reg128#9%bot->x01=reg128#9%top},[<input_0=int32#1,: 128]!
2601 +# asm 2: vld1.8 {>x01=d16->x01=d17},[<input_0=r0,: 128]!
2602 +vld1.8 {d16-d17},[r0,: 128]!
2603 +
2604 +# qhasm: x23 aligned= mem128[input_0];input_0+=16
2605 +# asm 1: vld1.8 {>x23=reg128#10%bot->x23=reg128#10%top},[<input_0=int32#1,: 128 ]!
2606 +# asm 2: vld1.8 {>x23=d18->x23=d19},[<input_0=r0,: 128]!
2607 +vld1.8 {d18-d19},[r0,: 128]!
2608 +
2609 +# qhasm: x4 aligned= mem64[input_0]x4[1]
2610 +# asm 1: vld1.8 {<x4=reg128#11%bot},[<input_0=int32#1,: 64]
2611 +# asm 2: vld1.8 {<x4=d20},[<input_0=r0,: 64]
2612 +vld1.8 {d20},[r0,: 64]
2613 +
2614 +# qhasm: input_0 -= 32
2615 +# asm 1: sub >input_0=int32#1,<input_0=int32#1,#32
2616 +# asm 2: sub >input_0=r0,<input_0=r0,#32
2617 +sub r0,r0,#32
2618 +
2619 +# qhasm: 2x mask unsigned>>=6
2620 +# asm 1: vshr.u64 >mask=reg128#7,<mask=reg128#7,#6
2621 +# asm 2: vshr.u64 >mask=q6,<mask=q6,#6
2622 +vshr.u64 q6,q6,#6
2623 +
2624 +# qhasm: 2x u4 unsigned>>= 7
2625 +# asm 1: vshr.u64 >u4=reg128#8,<u4=reg128#8,#7
2626 +# asm 2: vshr.u64 >u4=q7,<u4=q7,#7
2627 +vshr.u64 q7,q7,#7
2628 +
2629 +# qhasm: 4x 5y12 = y12 << 2
2630 +# asm 1: vshl.i32 >5y12=reg128#12,<y12=reg128#2,#2
2631 +# asm 2: vshl.i32 >5y12=q11,<y12=q1,#2
2632 +vshl.i32 q11,q1,#2
2633 +
2634 +# qhasm: 4x 5y34 = y34 << 2
2635 +# asm 1: vshl.i32 >5y34=reg128#13,<y34=reg128#3,#2
2636 +# asm 2: vshl.i32 >5y34=q12,<y34=q2,#2
2637 +vshl.i32 q12,q2,#2
2638 +
2639 +# qhasm: 4x 5y12 += y12
2640 +# asm 1: vadd.i32 >5y12=reg128#12,<5y12=reg128#12,<y12=reg128#2
2641 +# asm 2: vadd.i32 >5y12=q11,<5y12=q11,<y12=q1
2642 +vadd.i32 q11,q11,q1
2643 +
2644 +# qhasm: 4x 5y34 += y34
2645 +# asm 1: vadd.i32 >5y34=reg128#13,<5y34=reg128#13,<y34=reg128#3
2646 +# asm 2: vadd.i32 >5y34=q12,<5y34=q12,<y34=q2
2647 +vadd.i32 q12,q12,q2
2648 +
2649 +# qhasm: 2x u4 <<= 24
2650 +# asm 1: vshl.i64 >u4=reg128#8,<u4=reg128#8,#24
2651 +# asm 2: vshl.i64 >u4=q7,<u4=q7,#24
2652 +vshl.i64 q7,q7,#24
2653 +
2654 +# qhasm: 4x 5z12 = z12 << 2
2655 +# asm 1: vshl.i32 >5z12=reg128#14,<z12=reg128#5,#2
2656 +# asm 2: vshl.i32 >5z12=q13,<z12=q4,#2
2657 +vshl.i32 q13,q4,#2
2658 +
2659 +# qhasm: 4x 5z34 = z34 << 2
2660 +# asm 1: vshl.i32 >5z34=reg128#15,<z34=reg128#6,#2
2661 +# asm 2: vshl.i32 >5z34=q14,<z34=q5,#2
2662 +vshl.i32 q14,q5,#2
2663 +
2664 +# qhasm: 4x 5z12 += z12
2665 +# asm 1: vadd.i32 >5z12=reg128#14,<5z12=reg128#14,<z12=reg128#5
2666 +# asm 2: vadd.i32 >5z12=q13,<5z12=q13,<z12=q4
2667 +vadd.i32 q13,q13,q4
2668 +
2669 +# qhasm: 4x 5z34 += z34
2670 +# asm 1: vadd.i32 >5z34=reg128#15,<5z34=reg128#15,<z34=reg128#6
2671 +# asm 2: vadd.i32 >5z34=q14,<5z34=q14,<z34=q5
2672 +vadd.i32 q14,q14,q5
2673 +
2674 +# qhasm: new two24
2675 +
2676 +# qhasm: new y0_stack
2677 +
2678 +# qhasm: new y12_stack
2679 +
2680 +# qhasm: new y34_stack
2681 +
2682 +# qhasm: new 5y12_stack
2683 +
2684 +# qhasm: new 5y34_stack
2685 +
2686 +# qhasm: new z0_stack
2687 +
2688 +# qhasm: new z12_stack
2689 +
2690 +# qhasm: new z34_stack
2691 +
2692 +# qhasm: new 5z12_stack
2693 +
2694 +# qhasm: new 5z34_stack
2695 +
2696 +# qhasm: ptr = &two24
2697 +# asm 1: lea >ptr=int32#2,<two24=stack128#1
2698 +# asm 2: lea >ptr=r1,<two24=[sp,#0]
2699 +add r1,sp,#0
2700 +
2701 +# qhasm: mem128[ptr] aligned= u4
2702 +# asm 1: vst1.8 {<u4=reg128#8%bot-<u4=reg128#8%top},[<ptr=int32#2,: 128]
2703 +# asm 2: vst1.8 {<u4=d14-<u4=d15},[<ptr=r1,: 128]
2704 +vst1.8 {d14-d15},[r1,: 128]
2705 +
2706 +# qhasm: r4 = u4
2707 +# asm 1: vmov >r4=reg128#16,<u4=reg128#8
2708 +# asm 2: vmov >r4=q15,<u4=q7
2709 +vmov q15,q7
2710 +
2711 +# qhasm: r0 = u4
2712 +# asm 1: vmov >r0=reg128#8,<u4=reg128#8
2713 +# asm 2: vmov >r0=q7,<u4=q7
2714 +vmov q7,q7
2715 +
2716 +# qhasm: ptr = &y0_stack
2717 +# asm 1: lea >ptr=int32#2,<y0_stack=stack128#2
2718 +# asm 2: lea >ptr=r1,<y0_stack=[sp,#16]
2719 +add r1,sp,#16
2720 +
2721 +# qhasm: mem128[ptr] aligned= y0
2722 +# asm 1: vst1.8 {<y0=reg128#1%bot-<y0=reg128#1%top},[<ptr=int32#2,: 128]
2723 +# asm 2: vst1.8 {<y0=d0-<y0=d1},[<ptr=r1,: 128]
2724 +vst1.8 {d0-d1},[r1,: 128]
2725 +
2726 +# qhasm: ptr = &y12_stack
2727 +# asm 1: lea >ptr=int32#2,<y12_stack=stack128#3
2728 +# asm 2: lea >ptr=r1,<y12_stack=[sp,#32]
2729 +add r1,sp,#32
2730 +
2731 +# qhasm: mem128[ptr] aligned= y12
2732 +# asm 1: vst1.8 {<y12=reg128#2%bot-<y12=reg128#2%top},[<ptr=int32#2,: 128]
2733 +# asm 2: vst1.8 {<y12=d2-<y12=d3},[<ptr=r1,: 128]
2734 +vst1.8 {d2-d3},[r1,: 128]
2735 +
2736 +# qhasm: ptr = &y34_stack
2737 +# asm 1: lea >ptr=int32#2,<y34_stack=stack128#4
2738 +# asm 2: lea >ptr=r1,<y34_stack=[sp,#48]
2739 +add r1,sp,#48
2740 +
2741 +# qhasm: mem128[ptr] aligned= y34
2742 +# asm 1: vst1.8 {<y34=reg128#3%bot-<y34=reg128#3%top},[<ptr=int32#2,: 128]
2743 +# asm 2: vst1.8 {<y34=d4-<y34=d5},[<ptr=r1,: 128]
2744 +vst1.8 {d4-d5},[r1,: 128]
2745 +
2746 +# qhasm: ptr = &z0_stack
2747 +# asm 1: lea >ptr=int32#2,<z0_stack=stack128#7
2748 +# asm 2: lea >ptr=r1,<z0_stack=[sp,#96]
2749 +add r1,sp,#96
2750 +
2751 +# qhasm: mem128[ptr] aligned= z0
2752 +# asm 1: vst1.8 {<z0=reg128#4%bot-<z0=reg128#4%top},[<ptr=int32#2,: 128]
2753 +# asm 2: vst1.8 {<z0=d6-<z0=d7},[<ptr=r1,: 128]
2754 +vst1.8 {d6-d7},[r1,: 128]
2755 +
2756 +# qhasm: ptr = &z12_stack
2757 +# asm 1: lea >ptr=int32#2,<z12_stack=stack128#8
2758 +# asm 2: lea >ptr=r1,<z12_stack=[sp,#112]
2759 +add r1,sp,#112
2760 +
2761 +# qhasm: mem128[ptr] aligned= z12
2762 +# asm 1: vst1.8 {<z12=reg128#5%bot-<z12=reg128#5%top},[<ptr=int32#2,: 128]
2763 +# asm 2: vst1.8 {<z12=d8-<z12=d9},[<ptr=r1,: 128]
2764 +vst1.8 {d8-d9},[r1,: 128]
2765 +
2766 +# qhasm: ptr = &z34_stack
2767 +# asm 1: lea >ptr=int32#2,<z34_stack=stack128#9
2768 +# asm 2: lea >ptr=r1,<z34_stack=[sp,#128]
2769 +add r1,sp,#128
2770 +
2771 +# qhasm: mem128[ptr] aligned= z34
2772 +# asm 1: vst1.8 {<z34=reg128#6%bot-<z34=reg128#6%top},[<ptr=int32#2,: 128]
2773 +# asm 2: vst1.8 {<z34=d10-<z34=d11},[<ptr=r1,: 128]
2774 +vst1.8 {d10-d11},[r1,: 128]
2775 +
2776 +# qhasm: ptr = &5y12_stack
2777 +# asm 1: lea >ptr=int32#2,<5y12_stack=stack128#5
2778 +# asm 2: lea >ptr=r1,<5y12_stack=[sp,#64]
2779 +add r1,sp,#64
2780 +
2781 +# qhasm: mem128[ptr] aligned= 5y12
2782 +# asm 1: vst1.8 {<5y12=reg128#12%bot-<5y12=reg128#12%top},[<ptr=int32#2,: 128]
2783 +# asm 2: vst1.8 {<5y12=d22-<5y12=d23},[<ptr=r1,: 128]
2784 +vst1.8 {d22-d23},[r1,: 128]
2785 +
2786 +# qhasm: ptr = &5y34_stack
2787 +# asm 1: lea >ptr=int32#2,<5y34_stack=stack128#6
2788 +# asm 2: lea >ptr=r1,<5y34_stack=[sp,#80]
2789 +add r1,sp,#80
2790 +
2791 +# qhasm: mem128[ptr] aligned= 5y34
2792 +# asm 1: vst1.8 {<5y34=reg128#13%bot-<5y34=reg128#13%top},[<ptr=int32#2,: 128]
2793 +# asm 2: vst1.8 {<5y34=d24-<5y34=d25},[<ptr=r1,: 128]
2794 +vst1.8 {d24-d25},[r1,: 128]
2795 +
2796 +# qhasm: ptr = &5z12_stack
2797 +# asm 1: lea >ptr=int32#2,<5z12_stack=stack128#10
2798 +# asm 2: lea >ptr=r1,<5z12_stack=[sp,#144]
2799 +add r1,sp,#144
2800 +
2801 +# qhasm: mem128[ptr] aligned= 5z12
2802 +# asm 1: vst1.8 {<5z12=reg128#14%bot-<5z12=reg128#14%top},[<ptr=int32#2,: 128]
2803 +# asm 2: vst1.8 {<5z12=d26-<5z12=d27},[<ptr=r1,: 128]
2804 +vst1.8 {d26-d27},[r1,: 128]
2805 +
2806 +# qhasm: ptr = &5z34_stack
2807 +# asm 1: lea >ptr=int32#2,<5z34_stack=stack128#11
2808 +# asm 2: lea >ptr=r1,<5z34_stack=[sp,#160]
2809 +add r1,sp,#160
2810 +
2811 +# qhasm: mem128[ptr] aligned= 5z34
2812 +# asm 1: vst1.8 {<5z34=reg128#15%bot-<5z34=reg128#15%top},[<ptr=int32#2,: 128]
2813 +# asm 2: vst1.8 {<5z34=d28-<5z34=d29},[<ptr=r1,: 128]
2814 +vst1.8 {d28-d29},[r1,: 128]
2815 +
2816 +# qhasm: unsigned>? len - 64
2817 +# asm 1: cmp <len=int32#4,#64
2818 +# asm 2: cmp <len=r3,#64
2819 +cmp r3,#64
2820 +
2821 +# qhasm: goto below64bytes if !unsigned>
2822 +bls ._below64bytes
2823 +
2824 +# qhasm: input_2 += 32
2825 +# asm 1: add >input_2=int32#2,<input_2=int32#3,#32
2826 +# asm 2: add >input_2=r1,<input_2=r2,#32
2827 +add r1,r2,#32
2828 +
2829 +# qhasm: mainloop2:
2830 +._mainloop2:
2831 +
2832 +# qhasm: c01 = mem128[input_2];input_2+=16
2833 +# asm 1: vld1.8 {>c01=reg128#1%bot->c01=reg128#1%top},[<input_2=int32#2]!
2834 +# asm 2: vld1.8 {>c01=d0->c01=d1},[<input_2=r1]!
2835 +vld1.8 {d0-d1},[r1]!
2836 +
2837 +# qhasm: c23 = mem128[input_2];input_2+=16
2838 +# asm 1: vld1.8 {>c23=reg128#2%bot->c23=reg128#2%top},[<input_2=int32#2]!
2839 +# asm 2: vld1.8 {>c23=d2->c23=d3},[<input_2=r1]!
2840 +vld1.8 {d2-d3},[r1]!
2841 +
2842 +# qhasm: r4[0,1] += x01[0] unsigned* z34[2]; r4[2,3] += x01[1] unsigned* z34 [3]
2843 +# asm 1: vmlal.u32 <r4=reg128#16,<x01=reg128#9%bot,<z34=reg128#6%top
2844 +# asm 2: vmlal.u32 <r4=q15,<x01=d16,<z34=d11
2845 +vmlal.u32 q15,d16,d11
2846 +
2847 +# qhasm: ptr = &z12_stack
2848 +# asm 1: lea >ptr=int32#3,<z12_stack=stack128#8
2849 +# asm 2: lea >ptr=r2,<z12_stack=[sp,#112]
2850 +add r2,sp,#112
2851 +
2852 +# qhasm: z12 aligned= mem128[ptr]
2853 +# asm 1: vld1.8 {>z12=reg128#3%bot->z12=reg128#3%top},[<ptr=int32#3,: 128]
2854 +# asm 2: vld1.8 {>z12=d4->z12=d5},[<ptr=r2,: 128]
2855 +vld1.8 {d4-d5},[r2,: 128]
2856 +
2857 +# qhasm: r4[0,1] += x01[2] unsigned* z34[0]; r4[2,3] += x01[3] unsigned* z34[1 ]
2858 +# asm 1: vmlal.u32 <r4=reg128#16,<x01=reg128#9%top,<z34=reg128#6%bot
2859 +# asm 2: vmlal.u32 <r4=q15,<x01=d17,<z34=d10
2860 +vmlal.u32 q15,d17,d10
2861 +
2862 +# qhasm: ptr = &z0_stack
2863 +# asm 1: lea >ptr=int32#3,<z0_stack=stack128#7
2864 +# asm 2: lea >ptr=r2,<z0_stack=[sp,#96]
2865 +add r2,sp,#96
2866 +
2867 +# qhasm: z0 aligned= mem128[ptr]
2868 +# asm 1: vld1.8 {>z0=reg128#4%bot->z0=reg128#4%top},[<ptr=int32#3,: 128]
2869 +# asm 2: vld1.8 {>z0=d6->z0=d7},[<ptr=r2,: 128]
2870 +vld1.8 {d6-d7},[r2,: 128]
2871 +
2872 +# qhasm: r4[0,1] += x23[0] unsigned* z12[2]; r4[2,3] += x23[1] unsigned* z12[3 ]
2873 +# asm 1: vmlal.u32 <r4=reg128#16,<x23=reg128#10%bot,<z12=reg128#3%top
2874 +# asm 2: vmlal.u32 <r4=q15,<x23=d18,<z12=d5
2875 +vmlal.u32 q15,d18,d5
2876 +
2877 +# qhasm: c01 c23 = c01[0]c01[1]c01[2]c23[2]c23[0]c23[1]c01[3]c23[3]
2878 +# asm 1: vtrn.32 <c01=reg128#1%top,<c23=reg128#2%top
2879 +# asm 2: vtrn.32 <c01=d1,<c23=d3
2880 +vtrn.32 d1,d3
2881 +
2882 +# qhasm: r4[0,1] += x23[2] unsigned* z12[0]; r4[2,3] += x23[3] unsigned* z12[1 ]
2883 +# asm 1: vmlal.u32 <r4=reg128#16,<x23=reg128#10%top,<z12=reg128#3%bot
2884 +# asm 2: vmlal.u32 <r4=q15,<x23=d19,<z12=d4
2885 +vmlal.u32 q15,d19,d4
2886 +
2887 +# qhasm: r4[0,1] += x4[0] unsigned* z0[0]; r4[2,3] += x4[1] unsigned* z0[1]
2888 +# asm 1: vmlal.u32 <r4=reg128#16,<x4=reg128#11%bot,<z0=reg128#4%bot
2889 +# asm 2: vmlal.u32 <r4=q15,<x4=d20,<z0=d6
2890 +vmlal.u32 q15,d20,d6
2891 +
2892 +# qhasm: r3[0,1] = c23[2]<<18; r3[2,3] = c23[3]<<18
2893 +# asm 1: vshll.u32 >r3=reg128#5,<c23=reg128#2%top,#18
2894 +# asm 2: vshll.u32 >r3=q4,<c23=d3,#18
2895 +vshll.u32 q4,d3,#18
2896 +
2897 +# qhasm: c01 c23 = c01[0]c23[0]c01[2]c01[3]c01[1]c23[1]c23[2]c23[3]
2898 +# asm 1: vtrn.32 <c01=reg128#1%bot,<c23=reg128#2%bot
2899 +# asm 2: vtrn.32 <c01=d0,<c23=d2
2900 +vtrn.32 d0,d2
2901 +
2902 +# qhasm: r3[0,1] += x01[0] unsigned* z34[0]; r3[2,3] += x01[1] unsigned* z34[ 1]
2903 +# asm 1: vmlal.u32 <r3=reg128#5,<x01=reg128#9%bot,<z34=reg128#6%bot
2904 +# asm 2: vmlal.u32 <r3=q4,<x01=d16,<z34=d10
2905 +vmlal.u32 q4,d16,d10
2906 +
2907 +# qhasm: r3[0,1] += x01[2] unsigned* z12[2]; r3[2,3] += x01[3] unsigned* z12[ 3]
2908 +# asm 1: vmlal.u32 <r3=reg128#5,<x01=reg128#9%top,<z12=reg128#3%top
2909 +# asm 2: vmlal.u32 <r3=q4,<x01=d17,<z12=d5
2910 +vmlal.u32 q4,d17,d5
2911 +
2912 +# qhasm: r0 = r0[1]c01[0]r0[2,3]
2913 +# asm 1: vext.32 <r0=reg128#8%bot,<r0=reg128#8%bot,<c01=reg128#1%bot,#1
2914 +# asm 2: vext.32 <r0=d14,<r0=d14,<c01=d0,#1
2915 +vext.32 d14,d14,d0,#1
2916 +
2917 +# qhasm: r3[0,1] += x23[0] unsigned* z12[0]; r3[2,3] += x23[1] unsigned* z12[ 1]
2918 +# asm 1: vmlal.u32 <r3=reg128#5,<x23=reg128#10%bot,<z12=reg128#3%bot
2919 +# asm 2: vmlal.u32 <r3=q4,<x23=d18,<z12=d4
2920 +vmlal.u32 q4,d18,d4
2921 +
2922 +# qhasm: input_2 -= 64
2923 +# asm 1: sub >input_2=int32#2,<input_2=int32#2,#64
2924 +# asm 2: sub >input_2=r1,<input_2=r1,#64
2925 +sub r1,r1,#64
2926 +
2927 +# qhasm: r3[0,1] += x23[2] unsigned* z0[0]; r3[2,3] += x23[3] unsigned* z0[1]
2928 +# asm 1: vmlal.u32 <r3=reg128#5,<x23=reg128#10%top,<z0=reg128#4%bot
2929 +# asm 2: vmlal.u32 <r3=q4,<x23=d19,<z0=d6
2930 +vmlal.u32 q4,d19,d6
2931 +
2932 +# qhasm: ptr = &5z34_stack
2933 +# asm 1: lea >ptr=int32#3,<5z34_stack=stack128#11
2934 +# asm 2: lea >ptr=r2,<5z34_stack=[sp,#160]
2935 +add r2,sp,#160
2936 +
2937 +# qhasm: 5z34 aligned= mem128[ptr]
2938 +# asm 1: vld1.8 {>5z34=reg128#6%bot->5z34=reg128#6%top},[<ptr=int32#3,: 128]
2939 +# asm 2: vld1.8 {>5z34=d10->5z34=d11},[<ptr=r2,: 128]
2940 +vld1.8 {d10-d11},[r2,: 128]
2941 +
2942 +# qhasm: r3[0,1] += x4[0] unsigned* 5z34[2]; r3[2,3] += x4[1] unsigned* 5z3 4[3]
2943 +# asm 1: vmlal.u32 <r3=reg128#5,<x4=reg128#11%bot,<5z34=reg128#6%top
2944 +# asm 2: vmlal.u32 <r3=q4,<x4=d20,<5z34=d11
2945 +vmlal.u32 q4,d20,d11
2946 +
2947 +# qhasm: r0 = r0[1]r0[0]r0[3]r0[2]
2948 +# asm 1: vrev64.i32 >r0=reg128#8,<r0=reg128#8
2949 +# asm 2: vrev64.i32 >r0=q7,<r0=q7
2950 +vrev64.i32 q7,q7
2951 +
2952 +# qhasm: r2[0,1] = c01[2]<<12; r2[2,3] = c01[3]<<12
2953 +# asm 1: vshll.u32 >r2=reg128#14,<c01=reg128#1%top,#12
2954 +# asm 2: vshll.u32 >r2=q13,<c01=d1,#12
2955 +vshll.u32 q13,d1,#12
2956 +
2957 +# qhasm: d01 = mem128[input_2];input_2+=16
2958 +# asm 1: vld1.8 {>d01=reg128#12%bot->d01=reg128#12%top},[<input_2=int32#2]!
2959 +# asm 2: vld1.8 {>d01=d22->d01=d23},[<input_2=r1]!
2960 +vld1.8 {d22-d23},[r1]!
2961 +
2962 +# qhasm: r2[0,1] += x01[0] unsigned* z12[2]; r2[2,3] += x01[1] unsigned* z12[ 3]
2963 +# asm 1: vmlal.u32 <r2=reg128#14,<x01=reg128#9%bot,<z12=reg128#3%top
2964 +# asm 2: vmlal.u32 <r2=q13,<x01=d16,<z12=d5
2965 +vmlal.u32 q13,d16,d5
2966 +
2967 +# qhasm: r2[0,1] += x01[2] unsigned* z12[0]; r2[2,3] += x01[3] unsigned* z12[ 1]
2968 +# asm 1: vmlal.u32 <r2=reg128#14,<x01=reg128#9%top,<z12=reg128#3%bot
2969 +# asm 2: vmlal.u32 <r2=q13,<x01=d17,<z12=d4
2970 +vmlal.u32 q13,d17,d4
2971 +
2972 +# qhasm: r2[0,1] += x23[0] unsigned* z0[0]; r2[2,3] += x23[1] unsigned* z0[1]
2973 +# asm 1: vmlal.u32 <r2=reg128#14,<x23=reg128#10%bot,<z0=reg128#4%bot
2974 +# asm 2: vmlal.u32 <r2=q13,<x23=d18,<z0=d6
2975 +vmlal.u32 q13,d18,d6
2976 +
2977 +# qhasm: r2[0,1] += x23[2] unsigned* 5z34[2]; r2[2,3] += x23[3] unsigned* 5z3 4[3]
2978 +# asm 1: vmlal.u32 <r2=reg128#14,<x23=reg128#10%top,<5z34=reg128#6%top
2979 +# asm 2: vmlal.u32 <r2=q13,<x23=d19,<5z34=d11
2980 +vmlal.u32 q13,d19,d11
2981 +
2982 +# qhasm: r2[0,1] += x4[0] unsigned* 5z34[0]; r2[2,3] += x4[1] unsigned* 5z34[ 1]
2983 +# asm 1: vmlal.u32 <r2=reg128#14,<x4=reg128#11%bot,<5z34=reg128#6%bot
2984 +# asm 2: vmlal.u32 <r2=q13,<x4=d20,<5z34=d10
2985 +vmlal.u32 q13,d20,d10
2986 +
2987 +# qhasm: r0 = r0[0,1]c01[1]r0[2]
2988 +# asm 1: vext.32 <r0=reg128#8%top,<c01=reg128#1%bot,<r0=reg128#8%top,#1
2989 +# asm 2: vext.32 <r0=d15,<c01=d0,<r0=d15,#1
2990 +vext.32 d15,d0,d15,#1
2991 +
2992 +# qhasm: r1[0,1] = c23[0]<<6; r1[2,3] = c23[1]<<6
2993 +# asm 1: vshll.u32 >r1=reg128#15,<c23=reg128#2%bot,#6
2994 +# asm 2: vshll.u32 >r1=q14,<c23=d2,#6
2995 +vshll.u32 q14,d2,#6
2996 +
2997 +# qhasm: r1[0,1] += x01[0] unsigned* z12[0]; r1[2,3] += x01[1] unsigned* z12[ 1]
2998 +# asm 1: vmlal.u32 <r1=reg128#15,<x01=reg128#9%bot,<z12=reg128#3%bot
2999 +# asm 2: vmlal.u32 <r1=q14,<x01=d16,<z12=d4
3000 +vmlal.u32 q14,d16,d4
3001 +
3002 +# qhasm: r1[0,1] += x01[2] unsigned* z0[0]; r1[2,3] += x01[3] unsigned* z0[1]
3003 +# asm 1: vmlal.u32 <r1=reg128#15,<x01=reg128#9%top,<z0=reg128#4%bot
3004 +# asm 2: vmlal.u32 <r1=q14,<x01=d17,<z0=d6
3005 +vmlal.u32 q14,d17,d6
3006 +
3007 +# qhasm: r1[0,1] += x23[0] unsigned* 5z34[2]; r1[2,3] += x23[1] unsigned* 5z3 4[3]
3008 +# asm 1: vmlal.u32 <r1=reg128#15,<x23=reg128#10%bot,<5z34=reg128#6%top
3009 +# asm 2: vmlal.u32 <r1=q14,<x23=d18,<5z34=d11
3010 +vmlal.u32 q14,d18,d11
3011 +
3012 +# qhasm: r1[0,1] += x23[2] unsigned* 5z34[0]; r1[2,3] += x23[3] unsigned* 5z34[ 1]
3013 +# asm 1: vmlal.u32 <r1=reg128#15,<x23=reg128#10%top,<5z34=reg128#6%bot
3014 +# asm 2: vmlal.u32 <r1=q14,<x23=d19,<5z34=d10
3015 +vmlal.u32 q14,d19,d10
3016 +
3017 +# qhasm: ptr = &5z12_stack
3018 +# asm 1: lea >ptr=int32#3,<5z12_stack=stack128#10
3019 +# asm 2: lea >ptr=r2,<5z12_stack=[sp,#144]
3020 +add r2,sp,#144
3021 +
3022 +# qhasm: 5z12 aligned= mem128[ptr]
3023 +# asm 1: vld1.8 {>5z12=reg128#1%bot->5z12=reg128#1%top},[<ptr=int32#3,: 128]
3024 +# asm 2: vld1.8 {>5z12=d0->5z12=d1},[<ptr=r2,: 128]
3025 +vld1.8 {d0-d1},[r2,: 128]
3026 +
3027 +# qhasm: r1[0,1] += x4[0] unsigned* 5z12[2]; r1[2,3] += x4[1] unsigned* 5z12[ 3]
3028 +# asm 1: vmlal.u32 <r1=reg128#15,<x4=reg128#11%bot,<5z12=reg128#1%top
3029 +# asm 2: vmlal.u32 <r1=q14,<x4=d20,<5z12=d1
3030 +vmlal.u32 q14,d20,d1
3031 +
3032 +# qhasm: d23 = mem128[input_2];input_2+=16
3033 +# asm 1: vld1.8 {>d23=reg128#2%bot->d23=reg128#2%top},[<input_2=int32#2]!
3034 +# asm 2: vld1.8 {>d23=d2->d23=d3},[<input_2=r1]!
3035 +vld1.8 {d2-d3},[r1]!
3036 +
3037 +# qhasm: input_2 += 32
3038 +# asm 1: add >input_2=int32#2,<input_2=int32#2,#32
3039 +# asm 2: add >input_2=r1,<input_2=r1,#32
3040 +add r1,r1,#32
3041 +
3042 +# qhasm: r0[0,1] += x4[0] unsigned* 5z12[0]; r0[2,3] += x4[1] unsigned* 5z12[ 1]
3043 +# asm 1: vmlal.u32 <r0=reg128#8,<x4=reg128#11%bot,<5z12=reg128#1%bot
3044 +# asm 2: vmlal.u32 <r0=q7,<x4=d20,<5z12=d0
3045 +vmlal.u32 q7,d20,d0
3046 +
3047 +# qhasm: r0[0,1] += x23[0] unsigned* 5z34[0]; r0[2,3] += x23[1] unsigned* 5z34[ 1]
3048 +# asm 1: vmlal.u32 <r0=reg128#8,<x23=reg128#10%bot,<5z34=reg128#6%bot
3049 +# asm 2: vmlal.u32 <r0=q7,<x23=d18,<5z34=d10
3050 +vmlal.u32 q7,d18,d10
3051 +
3052 +# qhasm: d01 d23 = d01[0] d23[0] d01[1] d23[1]
3053 +# asm 1: vswp <d23=reg128#2%bot,<d01=reg128#12%top
3054 +# asm 2: vswp <d23=d2,<d01=d23
3055 +vswp d2,d23
3056 +
3057 +# qhasm: r0[0,1] += x23[2] unsigned* 5z12[2]; r0[2,3] += x23[3] unsigned* 5z12[ 3]
3058 +# asm 1: vmlal.u32 <r0=reg128#8,<x23=reg128#10%top,<5z12=reg128#1%top
3059 +# asm 2: vmlal.u32 <r0=q7,<x23=d19,<5z12=d1
3060 +vmlal.u32 q7,d19,d1
3061 +
3062 +# qhasm: r0[0,1] += x01[0] unsigned* z0[0]; r0[2,3] += x01[1] unsigned* z0[1]
3063 +# asm 1: vmlal.u32 <r0=reg128#8,<x01=reg128#9%bot,<z0=reg128#4%bot
3064 +# asm 2: vmlal.u32 <r0=q7,<x01=d16,<z0=d6
3065 +vmlal.u32 q7,d16,d6
3066 +
3067 +# qhasm: new mid
3068 +
3069 +# qhasm: 2x v4 = d23 unsigned>> 40
3070 +# asm 1: vshr.u64 >v4=reg128#4,<d23=reg128#2,#40
3071 +# asm 2: vshr.u64 >v4=q3,<d23=q1,#40
3072 +vshr.u64 q3,q1,#40
3073 +
3074 +# qhasm: mid = d01[1]d23[0] mid[2,3]
3075 +# asm 1: vext.32 <mid=reg128#1%bot,<d01=reg128#12%bot,<d23=reg128#2%bot,#1
3076 +# asm 2: vext.32 <mid=d0,<d01=d22,<d23=d2,#1
3077 +vext.32 d0,d22,d2,#1
3078 +
3079 +# qhasm: new v23
3080 +
3081 +# qhasm: v23[2] = d23[0,1] unsigned>> 14; v23[3] = d23[2,3] unsig ned>> 14
3082 +# asm 1: vshrn.u64 <v23=reg128#10%top,<d23=reg128#2,#14
3083 +# asm 2: vshrn.u64 <v23=d19,<d23=q1,#14
3084 +vshrn.u64 d19,q1,#14
3085 +
3086 +# qhasm: mid = mid[0,1] d01[3]d23[2]
3087 +# asm 1: vext.32 <mid=reg128#1%top,<d01=reg128#12%top,<d23=reg128#2%top,#1
3088 +# asm 2: vext.32 <mid=d1,<d01=d23,<d23=d3,#1
3089 +vext.32 d1,d23,d3,#1
3090 +
3091 +# qhasm: new v01
3092 +
3093 +# qhasm: v01[2] = d01[0,1] unsigned>> 26; v01[3] = d01[2,3] unsig ned>> 26
3094 +# asm 1: vshrn.u64 <v01=reg128#11%top,<d01=reg128#12,#26
3095 +# asm 2: vshrn.u64 <v01=d21,<d01=q11,#26
3096 +vshrn.u64 d21,q11,#26
3097 +
3098 +# qhasm: v01 = d01[1]d01[0] v01[2,3]
3099 +# asm 1: vext.32 <v01=reg128#11%bot,<d01=reg128#12%bot,<d01=reg128#12%bot,#1
3100 +# asm 2: vext.32 <v01=d20,<d01=d22,<d01=d22,#1
3101 +vext.32 d20,d22,d22,#1
3102 +
3103 +# qhasm: r0[0,1] += x01[2] unsigned* 5z34[2]; r0[2,3] += x01[3] unsigned* 5z3 4[3]
3104 +# asm 1: vmlal.u32 <r0=reg128#8,<x01=reg128#9%top,<5z34=reg128#6%top
3105 +# asm 2: vmlal.u32 <r0=q7,<x01=d17,<5z34=d11
3106 +vmlal.u32 q7,d17,d11
3107 +
3108 +# qhasm: v01 = v01[1]d01[2] v01[2,3]
3109 +# asm 1: vext.32 <v01=reg128#11%bot,<v01=reg128#11%bot,<d01=reg128#12%top,#1
3110 +# asm 2: vext.32 <v01=d20,<v01=d20,<d01=d23,#1
3111 +vext.32 d20,d20,d23,#1
3112 +
3113 +# qhasm: v23[0] = mid[0,1] unsigned>> 20; v23[1] = mid[2,3] unsig ned>> 20
3114 +# asm 1: vshrn.u64 <v23=reg128#10%bot,<mid=reg128#1,#20
3115 +# asm 2: vshrn.u64 <v23=d18,<mid=q0,#20
3116 +vshrn.u64 d18,q0,#20
3117 +
3118 +# qhasm: v4 = v4[0]v4[2]v4[1]v4[3]
3119 +# asm 1: vtrn.32 <v4=reg128#4%bot,<v4=reg128#4%top
3120 +# asm 2: vtrn.32 <v4=d6,<v4=d7
3121 +vtrn.32 d6,d7
3122 +
3123 +# qhasm: 4x v01 &= 0x03ffffff
3124 +# asm 1: vand.i32 <v01=reg128#11,#0x03ffffff
3125 +# asm 2: vand.i32 <v01=q10,#0x03ffffff
3126 +vand.i32 q10,#0x03ffffff
3127 +
3128 +# qhasm: ptr = &y34_stack
3129 +# asm 1: lea >ptr=int32#3,<y34_stack=stack128#4
3130 +# asm 2: lea >ptr=r2,<y34_stack=[sp,#48]
3131 +add r2,sp,#48
3132 +
3133 +# qhasm: y34 aligned= mem128[ptr]
3134 +# asm 1: vld1.8 {>y34=reg128#3%bot->y34=reg128#3%top},[<ptr=int32#3,: 128]
3135 +# asm 2: vld1.8 {>y34=d4->y34=d5},[<ptr=r2,: 128]
3136 +vld1.8 {d4-d5},[r2,: 128]
3137 +
3138 +# qhasm: 4x v23 &= 0x03ffffff
3139 +# asm 1: vand.i32 <v23=reg128#10,#0x03ffffff
3140 +# asm 2: vand.i32 <v23=q9,#0x03ffffff
3141 +vand.i32 q9,#0x03ffffff
3142 +
3143 +# qhasm: ptr = &y12_stack
3144 +# asm 1: lea >ptr=int32#3,<y12_stack=stack128#3
3145 +# asm 2: lea >ptr=r2,<y12_stack=[sp,#32]
3146 +add r2,sp,#32
3147 +
3148 +# qhasm: y12 aligned= mem128[ptr]
3149 +# asm 1: vld1.8 {>y12=reg128#2%bot->y12=reg128#2%top},[<ptr=int32#3,: 128]
3150 +# asm 2: vld1.8 {>y12=d2->y12=d3},[<ptr=r2,: 128]
3151 +vld1.8 {d2-d3},[r2,: 128]
3152 +
3153 +# qhasm: 4x v4 |= 0x01000000
3154 +# asm 1: vorr.i32 <v4=reg128#4,#0x01000000
3155 +# asm 2: vorr.i32 <v4=q3,#0x01000000
3156 +vorr.i32 q3,#0x01000000
3157 +
3158 +# qhasm: ptr = &y0_stack
3159 +# asm 1: lea >ptr=int32#3,<y0_stack=stack128#2
3160 +# asm 2: lea >ptr=r2,<y0_stack=[sp,#16]
3161 +add r2,sp,#16
3162 +
3163 +# qhasm: y0 aligned= mem128[ptr]
3164 +# asm 1: vld1.8 {>y0=reg128#1%bot->y0=reg128#1%top},[<ptr=int32#3,: 128]
3165 +# asm 2: vld1.8 {>y0=d0->y0=d1},[<ptr=r2,: 128]
3166 +vld1.8 {d0-d1},[r2,: 128]
3167 +
3168 +# qhasm: r4[0,1] += v01[0] unsigned* y34[2]; r4[2,3] += v01[1] unsigned* y34 [3]
3169 +# asm 1: vmlal.u32 <r4=reg128#16,<v01=reg128#11%bot,<y34=reg128#3%top
3170 +# asm 2: vmlal.u32 <r4=q15,<v01=d20,<y34=d5
3171 +vmlal.u32 q15,d20,d5
3172 +
3173 +# qhasm: r4[0,1] += v01[2] unsigned* y34[0]; r4[2,3] += v01[3] unsigned* y34[1 ]
3174 +# asm 1: vmlal.u32 <r4=reg128#16,<v01=reg128#11%top,<y34=reg128#3%bot
3175 +# asm 2: vmlal.u32 <r4=q15,<v01=d21,<y34=d4
3176 +vmlal.u32 q15,d21,d4
3177 +
3178 +# qhasm: r4[0,1] += v23[0] unsigned* y12[2]; r4[2,3] += v23[1] unsigned* y12[3 ]
3179 +# asm 1: vmlal.u32 <r4=reg128#16,<v23=reg128#10%bot,<y12=reg128#2%top
3180 +# asm 2: vmlal.u32 <r4=q15,<v23=d18,<y12=d3
3181 +vmlal.u32 q15,d18,d3
3182 +
3183 +# qhasm: r4[0,1] += v23[2] unsigned* y12[0]; r4[2,3] += v23[3] unsigned* y12[1 ]
3184 +# asm 1: vmlal.u32 <r4=reg128#16,<v23=reg128#10%top,<y12=reg128#2%bot
3185 +# asm 2: vmlal.u32 <r4=q15,<v23=d19,<y12=d2
3186 +vmlal.u32 q15,d19,d2
3187 +
3188 +# qhasm: r4[0,1] += v4[0] unsigned* y0[0]; r4[2,3] += v4[1] unsigned* y0[1]
3189 +# asm 1: vmlal.u32 <r4=reg128#16,<v4=reg128#4%bot,<y0=reg128#1%bot
3190 +# asm 2: vmlal.u32 <r4=q15,<v4=d6,<y0=d0
3191 +vmlal.u32 q15,d6,d0
3192 +
3193 +# qhasm: ptr = &5y34_stack
3194 +# asm 1: lea >ptr=int32#3,<5y34_stack=stack128#6
3195 +# asm 2: lea >ptr=r2,<5y34_stack=[sp,#80]
3196 +add r2,sp,#80
3197 +
3198 +# qhasm: 5y34 aligned= mem128[ptr]
3199 +# asm 1: vld1.8 {>5y34=reg128#13%bot->5y34=reg128#13%top},[<ptr=int32#3,: 128]
3200 +# asm 2: vld1.8 {>5y34=d24->5y34=d25},[<ptr=r2,: 128]
3201 +vld1.8 {d24-d25},[r2,: 128]
3202 +
3203 +# qhasm: r3[0,1] += v01[0] unsigned* y34[0]; r3[2,3] += v01[1] unsigned* y34[ 1]
3204 +# asm 1: vmlal.u32 <r3=reg128#5,<v01=reg128#11%bot,<y34=reg128#3%bot
3205 +# asm 2: vmlal.u32 <r3=q4,<v01=d20,<y34=d4
3206 +vmlal.u32 q4,d20,d4
3207 +
3208 +# qhasm: r3[0,1] += v01[2] unsigned* y12[2]; r3[2,3] += v01[3] unsigned* y12[ 3]
3209 +# asm 1: vmlal.u32 <r3=reg128#5,<v01=reg128#11%top,<y12=reg128#2%top
3210 +# asm 2: vmlal.u32 <r3=q4,<v01=d21,<y12=d3
3211 +vmlal.u32 q4,d21,d3
3212 +
3213 +# qhasm: r3[0,1] += v23[0] unsigned* y12[0]; r3[2,3] += v23[1] unsigned* y12[ 1]
3214 +# asm 1: vmlal.u32 <r3=reg128#5,<v23=reg128#10%bot,<y12=reg128#2%bot
3215 +# asm 2: vmlal.u32 <r3=q4,<v23=d18,<y12=d2
3216 +vmlal.u32 q4,d18,d2
3217 +
3218 +# qhasm: r3[0,1] += v23[2] unsigned* y0[0]; r3[2,3] += v23[3] unsigned* y0[1]
3219 +# asm 1: vmlal.u32 <r3=reg128#5,<v23=reg128#10%top,<y0=reg128#1%bot
3220 +# asm 2: vmlal.u32 <r3=q4,<v23=d19,<y0=d0
3221 +vmlal.u32 q4,d19,d0
3222 +
3223 +# qhasm: r3[0,1] += v4[0] unsigned* 5y34[2]; r3[2,3] += v4[1] unsigned* 5y3 4[3]
3224 +# asm 1: vmlal.u32 <r3=reg128#5,<v4=reg128#4%bot,<5y34=reg128#13%top
3225 +# asm 2: vmlal.u32 <r3=q4,<v4=d6,<5y34=d25
3226 +vmlal.u32 q4,d6,d25
3227 +
3228 +# qhasm: ptr = &5y12_stack
3229 +# asm 1: lea >ptr=int32#3,<5y12_stack=stack128#5
3230 +# asm 2: lea >ptr=r2,<5y12_stack=[sp,#64]
3231 +add r2,sp,#64
3232 +
3233 +# qhasm: 5y12 aligned= mem128[ptr]
3234 +# asm 1: vld1.8 {>5y12=reg128#12%bot->5y12=reg128#12%top},[<ptr=int32#3,: 128]
3235 +# asm 2: vld1.8 {>5y12=d22->5y12=d23},[<ptr=r2,: 128]
3236 +vld1.8 {d22-d23},[r2,: 128]
3237 +
3238 +# qhasm: r0[0,1] += v4[0] unsigned* 5y12[0]; r0[2,3] += v4[1] unsigned* 5y12[ 1]
3239 +# asm 1: vmlal.u32 <r0=reg128#8,<v4=reg128#4%bot,<5y12=reg128#12%bot
3240 +# asm 2: vmlal.u32 <r0=q7,<v4=d6,<5y12=d22
3241 +vmlal.u32 q7,d6,d22
3242 +
3243 +# qhasm: r0[0,1] += v23[0] unsigned* 5y34[0]; r0[2,3] += v23[1] unsigned* 5y34[ 1]
3244 +# asm 1: vmlal.u32 <r0=reg128#8,<v23=reg128#10%bot,<5y34=reg128#13%bot
3245 +# asm 2: vmlal.u32 <r0=q7,<v23=d18,<5y34=d24
3246 +vmlal.u32 q7,d18,d24
3247 +
3248 +# qhasm: r0[0,1] += v23[2] unsigned* 5y12[2]; r0[2,3] += v23[3] unsigned* 5y12[ 3]
3249 +# asm 1: vmlal.u32 <r0=reg128#8,<v23=reg128#10%top,<5y12=reg128#12%top
3250 +# asm 2: vmlal.u32 <r0=q7,<v23=d19,<5y12=d23
3251 +vmlal.u32 q7,d19,d23
3252 +
3253 +# qhasm: r0[0,1] += v01[0] unsigned* y0[0]; r0[2,3] += v01[1] unsigned* y0[1]
3254 +# asm 1: vmlal.u32 <r0=reg128#8,<v01=reg128#11%bot,<y0=reg128#1%bot
3255 +# asm 2: vmlal.u32 <r0=q7,<v01=d20,<y0=d0
3256 +vmlal.u32 q7,d20,d0
3257 +
3258 +# qhasm: r0[0,1] += v01[2] unsigned* 5y34[2]; r0[2,3] += v01[3] unsigned* 5y3 4[3]
3259 +# asm 1: vmlal.u32 <r0=reg128#8,<v01=reg128#11%top,<5y34=reg128#13%top
3260 +# asm 2: vmlal.u32 <r0=q7,<v01=d21,<5y34=d25
3261 +vmlal.u32 q7,d21,d25
3262 +
3263 +# qhasm: r1[0,1] += v01[0] unsigned* y12[0]; r1[2,3] += v01[1] unsigned* y12[ 1]
3264 +# asm 1: vmlal.u32 <r1=reg128#15,<v01=reg128#11%bot,<y12=reg128#2%bot
3265 +# asm 2: vmlal.u32 <r1=q14,<v01=d20,<y12=d2
3266 +vmlal.u32 q14,d20,d2
3267 +
3268 +# qhasm: r1[0,1] += v01[2] unsigned* y0[0]; r1[2,3] += v01[3] unsigned* y0[1]
3269 +# asm 1: vmlal.u32 <r1=reg128#15,<v01=reg128#11%top,<y0=reg128#1%bot
3270 +# asm 2: vmlal.u32 <r1=q14,<v01=d21,<y0=d0
3271 +vmlal.u32 q14,d21,d0
3272 +
3273 +# qhasm: r1[0,1] += v23[0] unsigned* 5y34[2]; r1[2,3] += v23[1] unsigned* 5y3 4[3]
3274 +# asm 1: vmlal.u32 <r1=reg128#15,<v23=reg128#10%bot,<5y34=reg128#13%top
3275 +# asm 2: vmlal.u32 <r1=q14,<v23=d18,<5y34=d25
3276 +vmlal.u32 q14,d18,d25
3277 +
3278 +# qhasm: r1[0,1] += v23[2] unsigned* 5y34[0]; r1[2,3] += v23[3] unsigned* 5y34[ 1]
3279 +# asm 1: vmlal.u32 <r1=reg128#15,<v23=reg128#10%top,<5y34=reg128#13%bot
3280 +# asm 2: vmlal.u32 <r1=q14,<v23=d19,<5y34=d24
3281 +vmlal.u32 q14,d19,d24
3282 +
3283 +# qhasm: r1[0,1] += v4[0] unsigned* 5y12[2]; r1[2,3] += v4[1] unsigned* 5y12[ 3]
3284 +# asm 1: vmlal.u32 <r1=reg128#15,<v4=reg128#4%bot,<5y12=reg128#12%top
3285 +# asm 2: vmlal.u32 <r1=q14,<v4=d6,<5y12=d23
3286 +vmlal.u32 q14,d6,d23
3287 +
3288 +# qhasm: r2[0,1] += v01[0] unsigned* y12[2]; r2[2,3] += v01[1] unsigned* y12[ 3]
3289 +# asm 1: vmlal.u32 <r2=reg128#14,<v01=reg128#11%bot,<y12=reg128#2%top
3290 +# asm 2: vmlal.u32 <r2=q13,<v01=d20,<y12=d3
3291 +vmlal.u32 q13,d20,d3
3292 +
3293 +# qhasm: r2[0,1] += v01[2] unsigned* y12[0]; r2[2,3] += v01[3] unsigned* y12[ 1]
3294 +# asm 1: vmlal.u32 <r2=reg128#14,<v01=reg128#11%top,<y12=reg128#2%bot
3295 +# asm 2: vmlal.u32 <r2=q13,<v01=d21,<y12=d2
3296 +vmlal.u32 q13,d21,d2
3297 +
3298 +# qhasm: r2[0,1] += v23[0] unsigned* y0[0]; r2[2,3] += v23[1] unsigned* y0[1]
3299 +# asm 1: vmlal.u32 <r2=reg128#14,<v23=reg128#10%bot,<y0=reg128#1%bot
3300 +# asm 2: vmlal.u32 <r2=q13,<v23=d18,<y0=d0
3301 +vmlal.u32 q13,d18,d0
3302 +
3303 +# qhasm: r2[0,1] += v23[2] unsigned* 5y34[2]; r2[2,3] += v23[3] unsigned* 5y3 4[3]
3304 +# asm 1: vmlal.u32 <r2=reg128#14,<v23=reg128#10%top,<5y34=reg128#13%top
3305 +# asm 2: vmlal.u32 <r2=q13,<v23=d19,<5y34=d25
3306 +vmlal.u32 q13,d19,d25
3307 +
3308 +# qhasm: r2[0,1] += v4[0] unsigned* 5y34[0]; r2[2,3] += v4[1] unsigned* 5y34[ 1]
3309 +# asm 1: vmlal.u32 <r2=reg128#14,<v4=reg128#4%bot,<5y34=reg128#13%bot
3310 +# asm 2: vmlal.u32 <r2=q13,<v4=d6,<5y34=d24
3311 +vmlal.u32 q13,d6,d24
3312 +
3313 +# qhasm: ptr = &two24
3314 +# asm 1: lea >ptr=int32#3,<two24=stack128#1
3315 +# asm 2: lea >ptr=r2,<two24=[sp,#0]
3316 +add r2,sp,#0
3317 +
3318 +# qhasm: 2x t1 = r0 unsigned>> 26
3319 +# asm 1: vshr.u64 >t1=reg128#4,<r0=reg128#8,#26
3320 +# asm 2: vshr.u64 >t1=q3,<r0=q7,#26
3321 +vshr.u64 q3,q7,#26
3322 +
3323 +# qhasm: len -= 64
3324 +# asm 1: sub >len=int32#4,<len=int32#4,#64
3325 +# asm 2: sub >len=r3,<len=r3,#64
3326 +sub r3,r3,#64
3327 +
3328 +# qhasm: r0 &= mask
3329 +# asm 1: vand >r0=reg128#6,<r0=reg128#8,<mask=reg128#7
3330 +# asm 2: vand >r0=q5,<r0=q7,<mask=q6
3331 +vand q5,q7,q6
3332 +
3333 +# qhasm: 2x r1 += t1
3334 +# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#15,<t1=reg128#4
3335 +# asm 2: vadd.i64 >r1=q3,<r1=q14,<t1=q3
3336 +vadd.i64 q3,q14,q3
3337 +
3338 +# qhasm: 2x t4 = r3 unsigned>> 26
3339 +# asm 1: vshr.u64 >t4=reg128#8,<r3=reg128#5,#26
3340 +# asm 2: vshr.u64 >t4=q7,<r3=q4,#26
3341 +vshr.u64 q7,q4,#26
3342 +
3343 +# qhasm: r3 &= mask
3344 +# asm 1: vand >r3=reg128#5,<r3=reg128#5,<mask=reg128#7
3345 +# asm 2: vand >r3=q4,<r3=q4,<mask=q6
3346 +vand q4,q4,q6
3347 +
3348 +# qhasm: 2x x4 = r4 + t4
3349 +# asm 1: vadd.i64 >x4=reg128#8,<r4=reg128#16,<t4=reg128#8
3350 +# asm 2: vadd.i64 >x4=q7,<r4=q15,<t4=q7
3351 +vadd.i64 q7,q15,q7
3352 +
3353 +# qhasm: r4 aligned= mem128[ptr]
3354 +# asm 1: vld1.8 {>r4=reg128#16%bot->r4=reg128#16%top},[<ptr=int32#3,: 128]
3355 +# asm 2: vld1.8 {>r4=d30->r4=d31},[<ptr=r2,: 128]
3356 +vld1.8 {d30-d31},[r2,: 128]
3357 +
3358 +# qhasm: 2x t2 = r1 unsigned>> 26
3359 +# asm 1: vshr.u64 >t2=reg128#9,<r1=reg128#4,#26
3360 +# asm 2: vshr.u64 >t2=q8,<r1=q3,#26
3361 +vshr.u64 q8,q3,#26
3362 +
3363 +# qhasm: r1 &= mask
3364 +# asm 1: vand >r1=reg128#4,<r1=reg128#4,<mask=reg128#7
3365 +# asm 2: vand >r1=q3,<r1=q3,<mask=q6
3366 +vand q3,q3,q6
3367 +
3368 +# qhasm: 2x t0 = x4 unsigned>> 26
3369 +# asm 1: vshr.u64 >t0=reg128#10,<x4=reg128#8,#26
3370 +# asm 2: vshr.u64 >t0=q9,<x4=q7,#26
3371 +vshr.u64 q9,q7,#26
3372 +
3373 +# qhasm: 2x r2 += t2
3374 +# asm 1: vadd.i64 >r2=reg128#9,<r2=reg128#14,<t2=reg128#9
3375 +# asm 2: vadd.i64 >r2=q8,<r2=q13,<t2=q8
3376 +vadd.i64 q8,q13,q8
3377 +
3378 +# qhasm: x4 &= mask
3379 +# asm 1: vand >x4=reg128#11,<x4=reg128#8,<mask=reg128#7
3380 +# asm 2: vand >x4=q10,<x4=q7,<mask=q6
3381 +vand q10,q7,q6
3382 +
3383 +# qhasm: 2x x01 = r0 + t0
3384 +# asm 1: vadd.i64 >x01=reg128#6,<r0=reg128#6,<t0=reg128#10
3385 +# asm 2: vadd.i64 >x01=q5,<r0=q5,<t0=q9
3386 +vadd.i64 q5,q5,q9
3387 +
3388 +# qhasm: r0 aligned= mem128[ptr]
3389 +# asm 1: vld1.8 {>r0=reg128#8%bot->r0=reg128#8%top},[<ptr=int32#3,: 128]
3390 +# asm 2: vld1.8 {>r0=d14->r0=d15},[<ptr=r2,: 128]
3391 +vld1.8 {d14-d15},[r2,: 128]
3392 +
3393 +# qhasm: ptr = &z34_stack
3394 +# asm 1: lea >ptr=int32#3,<z34_stack=stack128#9
3395 +# asm 2: lea >ptr=r2,<z34_stack=[sp,#128]
3396 +add r2,sp,#128
3397 +
3398 +# qhasm: 2x t0 <<= 2
3399 +# asm 1: vshl.i64 >t0=reg128#10,<t0=reg128#10,#2
3400 +# asm 2: vshl.i64 >t0=q9,<t0=q9,#2
3401 +vshl.i64 q9,q9,#2
3402 +
3403 +# qhasm: 2x t3 = r2 unsigned>> 26
3404 +# asm 1: vshr.u64 >t3=reg128#14,<r2=reg128#9,#26
3405 +# asm 2: vshr.u64 >t3=q13,<r2=q8,#26
3406 +vshr.u64 q13,q8,#26
3407 +
3408 +# qhasm: 2x x01 += t0
3409 +# asm 1: vadd.i64 >x01=reg128#15,<x01=reg128#6,<t0=reg128#10
3410 +# asm 2: vadd.i64 >x01=q14,<x01=q5,<t0=q9
3411 +vadd.i64 q14,q5,q9
3412 +
3413 +# qhasm: z34 aligned= mem128[ptr]
3414 +# asm 1: vld1.8 {>z34=reg128#6%bot->z34=reg128#6%top},[<ptr=int32#3,: 128]
3415 +# asm 2: vld1.8 {>z34=d10->z34=d11},[<ptr=r2,: 128]
3416 +vld1.8 {d10-d11},[r2,: 128]
3417 +
3418 +# qhasm: x23 = r2 & mask
3419 +# asm 1: vand >x23=reg128#10,<r2=reg128#9,<mask=reg128#7
3420 +# asm 2: vand >x23=q9,<r2=q8,<mask=q6
3421 +vand q9,q8,q6
3422 +
3423 +# qhasm: 2x r3 += t3
3424 +# asm 1: vadd.i64 >r3=reg128#5,<r3=reg128#5,<t3=reg128#14
3425 +# asm 2: vadd.i64 >r3=q4,<r3=q4,<t3=q13
3426 +vadd.i64 q4,q4,q13
3427 +
3428 +# qhasm: input_2 += 32
3429 +# asm 1: add >input_2=int32#2,<input_2=int32#2,#32
3430 +# asm 2: add >input_2=r1,<input_2=r1,#32
3431 +add r1,r1,#32
3432 +
3433 +# qhasm: 2x t1 = x01 unsigned>> 26
3434 +# asm 1: vshr.u64 >t1=reg128#14,<x01=reg128#15,#26
3435 +# asm 2: vshr.u64 >t1=q13,<x01=q14,#26
3436 +vshr.u64 q13,q14,#26
3437 +
3438 +# qhasm: x23 = x23[0,2,1,3]
3439 +# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top
3440 +# asm 2: vtrn.32 <x23=d18,<x23=d19
3441 +vtrn.32 d18,d19
3442 +
3443 +# qhasm: x01 = x01 & mask
3444 +# asm 1: vand >x01=reg128#9,<x01=reg128#15,<mask=reg128#7
3445 +# asm 2: vand >x01=q8,<x01=q14,<mask=q6
3446 +vand q8,q14,q6
3447 +
3448 +# qhasm: 2x r1 += t1
3449 +# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#4,<t1=reg128#14
3450 +# asm 2: vadd.i64 >r1=q3,<r1=q3,<t1=q13
3451 +vadd.i64 q3,q3,q13
3452 +
3453 +# qhasm: 2x t4 = r3 unsigned>> 26
3454 +# asm 1: vshr.u64 >t4=reg128#14,<r3=reg128#5,#26
3455 +# asm 2: vshr.u64 >t4=q13,<r3=q4,#26
3456 +vshr.u64 q13,q4,#26
3457 +
3458 +# qhasm: x01 = x01[0,2,1,3]
3459 +# asm 1: vtrn.32 <x01=reg128#9%bot,<x01=reg128#9%top
3460 +# asm 2: vtrn.32 <x01=d16,<x01=d17
3461 +vtrn.32 d16,d17
3462 +
3463 +# qhasm: r3 &= mask
3464 +# asm 1: vand >r3=reg128#5,<r3=reg128#5,<mask=reg128#7
3465 +# asm 2: vand >r3=q4,<r3=q4,<mask=q6
3466 +vand q4,q4,q6
3467 +
3468 +# qhasm: r1 = r1[0,2,1,3]
3469 +# asm 1: vtrn.32 <r1=reg128#4%bot,<r1=reg128#4%top
3470 +# asm 2: vtrn.32 <r1=d6,<r1=d7
3471 +vtrn.32 d6,d7
3472 +
3473 +# qhasm: 2x x4 += t4
3474 +# asm 1: vadd.i64 >x4=reg128#11,<x4=reg128#11,<t4=reg128#14
3475 +# asm 2: vadd.i64 >x4=q10,<x4=q10,<t4=q13
3476 +vadd.i64 q10,q10,q13
3477 +
3478 +# qhasm: r3 = r3[0,2,1,3]
3479 +# asm 1: vtrn.32 <r3=reg128#5%bot,<r3=reg128#5%top
3480 +# asm 2: vtrn.32 <r3=d8,<r3=d9
3481 +vtrn.32 d8,d9
3482 +
3483 +# qhasm: x01 = x01[0,1] r1[0,1]
3484 +# asm 1: vext.32 <x01=reg128#9%top,<r1=reg128#4%bot,<r1=reg128#4%bot,#0
3485 +# asm 2: vext.32 <x01=d17,<r1=d6,<r1=d6,#0
3486 +vext.32 d17,d6,d6,#0
3487 +
3488 +# qhasm: x23 = x23[0,1] r3[0,1]
3489 +# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#5%bot,<r3=reg128#5%bot,#0
3490 +# asm 2: vext.32 <x23=d19,<r3=d8,<r3=d8,#0
3491 +vext.32 d19,d8,d8,#0
3492 +
3493 +# qhasm: x4 = x4[0,2,1,3]
3494 +# asm 1: vtrn.32 <x4=reg128#11%bot,<x4=reg128#11%top
3495 +# asm 2: vtrn.32 <x4=d20,<x4=d21
3496 +vtrn.32 d20,d21
3497 +
3498 +# qhasm: unsigned>? len - 64
3499 +# asm 1: cmp <len=int32#4,#64
3500 +# asm 2: cmp <len=r3,#64
3501 +cmp r3,#64
3502 +
3503 +# qhasm: goto mainloop2 if unsigned>
3504 +bhi ._mainloop2
3505 +
3506 +# qhasm: input_2 -= 32
3507 +# asm 1: sub >input_2=int32#3,<input_2=int32#2,#32
3508 +# asm 2: sub >input_2=r2,<input_2=r1,#32
3509 +sub r2,r1,#32
3510 +
3511 +# qhasm: below64bytes:
3512 +._below64bytes:
3513 +
3514 +# qhasm: unsigned>? len - 32
3515 +# asm 1: cmp <len=int32#4,#32
3516 +# asm 2: cmp <len=r3,#32
3517 +cmp r3,#32
3518 +
3519 +# qhasm: goto end if !unsigned>
3520 +bls ._end
3521 +
3522 +# qhasm: mainloop:
3523 +._mainloop:
3524 +
3525 +# qhasm: new r0
3526 +
3527 +# qhasm: ptr = &two24
3528 +# asm 1: lea >ptr=int32#2,<two24=stack128#1
3529 +# asm 2: lea >ptr=r1,<two24=[sp,#0]
3530 +add r1,sp,#0
3531 +
3532 +# qhasm: r4 aligned= mem128[ptr]
3533 +# asm 1: vld1.8 {>r4=reg128#5%bot->r4=reg128#5%top},[<ptr=int32#2,: 128]
3534 +# asm 2: vld1.8 {>r4=d8->r4=d9},[<ptr=r1,: 128]
3535 +vld1.8 {d8-d9},[r1,: 128]
3536 +
3537 +# qhasm: u4 aligned= mem128[ptr]
3538 +# asm 1: vld1.8 {>u4=reg128#6%bot->u4=reg128#6%top},[<ptr=int32#2,: 128]
3539 +# asm 2: vld1.8 {>u4=d10->u4=d11},[<ptr=r1,: 128]
3540 +vld1.8 {d10-d11},[r1,: 128]
3541 +
3542 +# qhasm: c01 = mem128[input_2];input_2+=16
3543 +# asm 1: vld1.8 {>c01=reg128#8%bot->c01=reg128#8%top},[<input_2=int32#3]!
3544 +# asm 2: vld1.8 {>c01=d14->c01=d15},[<input_2=r2]!
3545 +vld1.8 {d14-d15},[r2]!
3546 +
3547 +# qhasm: r4[0,1] += x01[0] unsigned* y34[2]; r4[2,3] += x01[1] unsigned* y34 [3]
3548 +# asm 1: vmlal.u32 <r4=reg128#5,<x01=reg128#9%bot,<y34=reg128#3%top
3549 +# asm 2: vmlal.u32 <r4=q4,<x01=d16,<y34=d5
3550 +vmlal.u32 q4,d16,d5
3551 +
3552 +# qhasm: c23 = mem128[input_2];input_2+=16
3553 +# asm 1: vld1.8 {>c23=reg128#14%bot->c23=reg128#14%top},[<input_2=int32#3]!
3554 +# asm 2: vld1.8 {>c23=d26->c23=d27},[<input_2=r2]!
3555 +vld1.8 {d26-d27},[r2]!
3556 +
3557 +# qhasm: r4[0,1] += x01[2] unsigned* y34[0]; r4[2,3] += x01[3] unsigned* y34[1 ]
3558 +# asm 1: vmlal.u32 <r4=reg128#5,<x01=reg128#9%top,<y34=reg128#3%bot
3559 +# asm 2: vmlal.u32 <r4=q4,<x01=d17,<y34=d4
3560 +vmlal.u32 q4,d17,d4
3561 +
3562 +# qhasm: r0 = u4[1]c01[0]r0[2,3]
3563 +# asm 1: vext.32 <r0=reg128#4%bot,<u4=reg128#6%bot,<c01=reg128#8%bot,#1
3564 +# asm 2: vext.32 <r0=d6,<u4=d10,<c01=d14,#1
3565 +vext.32 d6,d10,d14,#1
3566 +
3567 +# qhasm: r4[0,1] += x23[0] unsigned* y12[2]; r4[2,3] += x23[1] unsigned* y12[3 ]
3568 +# asm 1: vmlal.u32 <r4=reg128#5,<x23=reg128#10%bot,<y12=reg128#2%top
3569 +# asm 2: vmlal.u32 <r4=q4,<x23=d18,<y12=d3
3570 +vmlal.u32 q4,d18,d3
3571 +
3572 +# qhasm: r0 = r0[0,1]u4[1]c23[0]
3573 +# asm 1: vext.32 <r0=reg128#4%top,<u4=reg128#6%bot,<c23=reg128#14%bot,#1
3574 +# asm 2: vext.32 <r0=d7,<u4=d10,<c23=d26,#1
3575 +vext.32 d7,d10,d26,#1
3576 +
3577 +# qhasm: r4[0,1] += x23[2] unsigned* y12[0]; r4[2,3] += x23[3] unsigned* y12[1 ]
3578 +# asm 1: vmlal.u32 <r4=reg128#5,<x23=reg128#10%top,<y12=reg128#2%bot
3579 +# asm 2: vmlal.u32 <r4=q4,<x23=d19,<y12=d2
3580 +vmlal.u32 q4,d19,d2
3581 +
3582 +# qhasm: r0 = r0[1]r0[0]r0[3]r0[2]
3583 +# asm 1: vrev64.i32 >r0=reg128#4,<r0=reg128#4
3584 +# asm 2: vrev64.i32 >r0=q3,<r0=q3
3585 +vrev64.i32 q3,q3
3586 +
3587 +# qhasm: r4[0,1] += x4[0] unsigned* y0[0]; r4[2,3] += x4[1] unsigned* y0[1]
3588 +# asm 1: vmlal.u32 <r4=reg128#5,<x4=reg128#11%bot,<y0=reg128#1%bot
3589 +# asm 2: vmlal.u32 <r4=q4,<x4=d20,<y0=d0
3590 +vmlal.u32 q4,d20,d0
3591 +
3592 +# qhasm: r0[0,1] += x4[0] unsigned* 5y12[0]; r0[2,3] += x4[1] unsigned* 5y12[ 1]
3593 +# asm 1: vmlal.u32 <r0=reg128#4,<x4=reg128#11%bot,<5y12=reg128#12%bot
3594 +# asm 2: vmlal.u32 <r0=q3,<x4=d20,<5y12=d22
3595 +vmlal.u32 q3,d20,d22
3596 +
3597 +# qhasm: r0[0,1] += x23[0] unsigned* 5y34[0]; r0[2,3] += x23[1] unsigned* 5y34[ 1]
3598 +# asm 1: vmlal.u32 <r0=reg128#4,<x23=reg128#10%bot,<5y34=reg128#13%bot
3599 +# asm 2: vmlal.u32 <r0=q3,<x23=d18,<5y34=d24
3600 +vmlal.u32 q3,d18,d24
3601 +
3602 +# qhasm: r0[0,1] += x23[2] unsigned* 5y12[2]; r0[2,3] += x23[3] unsigned* 5y12[ 3]
3603 +# asm 1: vmlal.u32 <r0=reg128#4,<x23=reg128#10%top,<5y12=reg128#12%top
3604 +# asm 2: vmlal.u32 <r0=q3,<x23=d19,<5y12=d23
3605 +vmlal.u32 q3,d19,d23
3606 +
3607 +# qhasm: c01 c23 = c01[0]c23[0]c01[2]c23[2]c01[1]c23[1]c01[3]c23[3]
3608 +# asm 1: vtrn.32 <c01=reg128#8,<c23=reg128#14
3609 +# asm 2: vtrn.32 <c01=q7,<c23=q13
3610 +vtrn.32 q7,q13
3611 +
3612 +# qhasm: r0[0,1] += x01[0] unsigned* y0[0]; r0[2,3] += x01[1] unsigned* y0[1]
3613 +# asm 1: vmlal.u32 <r0=reg128#4,<x01=reg128#9%bot,<y0=reg128#1%bot
3614 +# asm 2: vmlal.u32 <r0=q3,<x01=d16,<y0=d0
3615 +vmlal.u32 q3,d16,d0
3616 +
3617 +# qhasm: r3[0,1] = c23[2]<<18; r3[2,3] = c23[3]<<18
3618 +# asm 1: vshll.u32 >r3=reg128#6,<c23=reg128#14%top,#18
3619 +# asm 2: vshll.u32 >r3=q5,<c23=d27,#18
3620 +vshll.u32 q5,d27,#18
3621 +
3622 +# qhasm: r0[0,1] += x01[2] unsigned* 5y34[2]; r0[2,3] += x01[3] unsigned* 5y3 4[3]
3623 +# asm 1: vmlal.u32 <r0=reg128#4,<x01=reg128#9%top,<5y34=reg128#13%top
3624 +# asm 2: vmlal.u32 <r0=q3,<x01=d17,<5y34=d25
3625 +vmlal.u32 q3,d17,d25
3626 +
3627 +# qhasm: r3[0,1] += x01[0] unsigned* y34[0]; r3[2,3] += x01[1] unsigned* y34[ 1]
3628 +# asm 1: vmlal.u32 <r3=reg128#6,<x01=reg128#9%bot,<y34=reg128#3%bot
3629 +# asm 2: vmlal.u32 <r3=q5,<x01=d16,<y34=d4
3630 +vmlal.u32 q5,d16,d4
3631 +
3632 +# qhasm: r3[0,1] += x01[2] unsigned* y12[2]; r3[2,3] += x01[3] unsigned* y12[ 3]
3633 +# asm 1: vmlal.u32 <r3=reg128#6,<x01=reg128#9%top,<y12=reg128#2%top
3634 +# asm 2: vmlal.u32 <r3=q5,<x01=d17,<y12=d3
3635 +vmlal.u32 q5,d17,d3
3636 +
3637 +# qhasm: r3[0,1] += x23[0] unsigned* y12[0]; r3[2,3] += x23[1] unsigned* y12[ 1]
3638 +# asm 1: vmlal.u32 <r3=reg128#6,<x23=reg128#10%bot,<y12=reg128#2%bot
3639 +# asm 2: vmlal.u32 <r3=q5,<x23=d18,<y12=d2
3640 +vmlal.u32 q5,d18,d2
3641 +
3642 +# qhasm: r3[0,1] += x23[2] unsigned* y0[0]; r3[2,3] += x23[3] unsigned* y0[1]
3643 +# asm 1: vmlal.u32 <r3=reg128#6,<x23=reg128#10%top,<y0=reg128#1%bot
3644 +# asm 2: vmlal.u32 <r3=q5,<x23=d19,<y0=d0
3645 +vmlal.u32 q5,d19,d0
3646 +
3647 +# qhasm: r1[0,1] = c23[0]<<6; r1[2,3] = c23[1]<<6
3648 +# asm 1: vshll.u32 >r1=reg128#14,<c23=reg128#14%bot,#6
3649 +# asm 2: vshll.u32 >r1=q13,<c23=d26,#6
3650 +vshll.u32 q13,d26,#6
3651 +
3652 +# qhasm: r3[0,1] += x4[0] unsigned* 5y34[2]; r3[2,3] += x4[1] unsigned* 5y3 4[3]
3653 +# asm 1: vmlal.u32 <r3=reg128#6,<x4=reg128#11%bot,<5y34=reg128#13%top
3654 +# asm 2: vmlal.u32 <r3=q5,<x4=d20,<5y34=d25
3655 +vmlal.u32 q5,d20,d25
3656 +
3657 +# qhasm: r1[0,1] += x01[0] unsigned* y12[0]; r1[2,3] += x01[1] unsigned* y12[ 1]
3658 +# asm 1: vmlal.u32 <r1=reg128#14,<x01=reg128#9%bot,<y12=reg128#2%bot
3659 +# asm 2: vmlal.u32 <r1=q13,<x01=d16,<y12=d2
3660 +vmlal.u32 q13,d16,d2
3661 +
3662 +# qhasm: r1[0,1] += x01[2] unsigned* y0[0]; r1[2,3] += x01[3] unsigned* y0[1]
3663 +# asm 1: vmlal.u32 <r1=reg128#14,<x01=reg128#9%top,<y0=reg128#1%bot
3664 +# asm 2: vmlal.u32 <r1=q13,<x01=d17,<y0=d0
3665 +vmlal.u32 q13,d17,d0
3666 +
3667 +# qhasm: r1[0,1] += x23[0] unsigned* 5y34[2]; r1[2,3] += x23[1] unsigned* 5y3 4[3]
3668 +# asm 1: vmlal.u32 <r1=reg128#14,<x23=reg128#10%bot,<5y34=reg128#13%top
3669 +# asm 2: vmlal.u32 <r1=q13,<x23=d18,<5y34=d25
3670 +vmlal.u32 q13,d18,d25
3671 +
3672 +# qhasm: r1[0,1] += x23[2] unsigned* 5y34[0]; r1[2,3] += x23[3] unsigned* 5y34[ 1]
3673 +# asm 1: vmlal.u32 <r1=reg128#14,<x23=reg128#10%top,<5y34=reg128#13%bot
3674 +# asm 2: vmlal.u32 <r1=q13,<x23=d19,<5y34=d24
3675 +vmlal.u32 q13,d19,d24
3676 +
3677 +# qhasm: r2[0,1] = c01[2]<<12; r2[2,3] = c01[3]<<12
3678 +# asm 1: vshll.u32 >r2=reg128#8,<c01=reg128#8%top,#12
3679 +# asm 2: vshll.u32 >r2=q7,<c01=d15,#12
3680 +vshll.u32 q7,d15,#12
3681 +
3682 +# qhasm: r1[0,1] += x4[0] unsigned* 5y12[2]; r1[2,3] += x4[1] unsigned* 5y12[ 3]
3683 +# asm 1: vmlal.u32 <r1=reg128#14,<x4=reg128#11%bot,<5y12=reg128#12%top
3684 +# asm 2: vmlal.u32 <r1=q13,<x4=d20,<5y12=d23
3685 +vmlal.u32 q13,d20,d23
3686 +
3687 +# qhasm: r2[0,1] += x01[0] unsigned* y12[2]; r2[2,3] += x01[1] unsigned* y12[ 3]
3688 +# asm 1: vmlal.u32 <r2=reg128#8,<x01=reg128#9%bot,<y12=reg128#2%top
3689 +# asm 2: vmlal.u32 <r2=q7,<x01=d16,<y12=d3
3690 +vmlal.u32 q7,d16,d3
3691 +
3692 +# qhasm: r2[0,1] += x01[2] unsigned* y12[0]; r2[2,3] += x01[3] unsigned* y12[ 1]
3693 +# asm 1: vmlal.u32 <r2=reg128#8,<x01=reg128#9%top,<y12=reg128#2%bot
3694 +# asm 2: vmlal.u32 <r2=q7,<x01=d17,<y12=d2
3695 +vmlal.u32 q7,d17,d2
3696 +
3697 +# qhasm: r2[0,1] += x23[0] unsigned* y0[0]; r2[2,3] += x23[1] unsigned* y0[1]
3698 +# asm 1: vmlal.u32 <r2=reg128#8,<x23=reg128#10%bot,<y0=reg128#1%bot
3699 +# asm 2: vmlal.u32 <r2=q7,<x23=d18,<y0=d0
3700 +vmlal.u32 q7,d18,d0
3701 +
3702 +# qhasm: r2[0,1] += x23[2] unsigned* 5y34[2]; r2[2,3] += x23[3] unsigned* 5y3 4[3]
3703 +# asm 1: vmlal.u32 <r2=reg128#8,<x23=reg128#10%top,<5y34=reg128#13%top
3704 +# asm 2: vmlal.u32 <r2=q7,<x23=d19,<5y34=d25
3705 +vmlal.u32 q7,d19,d25
3706 +
3707 +# qhasm: r2[0,1] += x4[0] unsigned* 5y34[0]; r2[2,3] += x4[1] unsigned* 5y34[ 1]
3708 +# asm 1: vmlal.u32 <r2=reg128#8,<x4=reg128#11%bot,<5y34=reg128#13%bot
3709 +# asm 2: vmlal.u32 <r2=q7,<x4=d20,<5y34=d24
3710 +vmlal.u32 q7,d20,d24
3711 +
3712 +# qhasm: 2x t1 = r0 unsigned>> 26
3713 +# asm 1: vshr.u64 >t1=reg128#9,<r0=reg128#4,#26
3714 +# asm 2: vshr.u64 >t1=q8,<r0=q3,#26
3715 +vshr.u64 q8,q3,#26
3716 +
3717 +# qhasm: r0 &= mask
3718 +# asm 1: vand >r0=reg128#4,<r0=reg128#4,<mask=reg128#7
3719 +# asm 2: vand >r0=q3,<r0=q3,<mask=q6
3720 +vand q3,q3,q6
3721 +
3722 +# qhasm: 2x r1 += t1
3723 +# asm 1: vadd.i64 >r1=reg128#9,<r1=reg128#14,<t1=reg128#9
3724 +# asm 2: vadd.i64 >r1=q8,<r1=q13,<t1=q8
3725 +vadd.i64 q8,q13,q8
3726 +
3727 +# qhasm: 2x t4 = r3 unsigned>> 26
3728 +# asm 1: vshr.u64 >t4=reg128#10,<r3=reg128#6,#26
3729 +# asm 2: vshr.u64 >t4=q9,<r3=q5,#26
3730 +vshr.u64 q9,q5,#26
3731 +
3732 +# qhasm: r3 &= mask
3733 +# asm 1: vand >r3=reg128#6,<r3=reg128#6,<mask=reg128#7
3734 +# asm 2: vand >r3=q5,<r3=q5,<mask=q6
3735 +vand q5,q5,q6
3736 +
3737 +# qhasm: 2x r4 += t4
3738 +# asm 1: vadd.i64 >r4=reg128#5,<r4=reg128#5,<t4=reg128#10
3739 +# asm 2: vadd.i64 >r4=q4,<r4=q4,<t4=q9
3740 +vadd.i64 q4,q4,q9
3741 +
3742 +# qhasm: 2x t2 = r1 unsigned>> 26
3743 +# asm 1: vshr.u64 >t2=reg128#10,<r1=reg128#9,#26
3744 +# asm 2: vshr.u64 >t2=q9,<r1=q8,#26
3745 +vshr.u64 q9,q8,#26
3746 +
3747 +# qhasm: r1 &= mask
3748 +# asm 1: vand >r1=reg128#11,<r1=reg128#9,<mask=reg128#7
3749 +# asm 2: vand >r1=q10,<r1=q8,<mask=q6
3750 +vand q10,q8,q6
3751 +
3752 +# qhasm: 2x t0 = r4 unsigned>> 26
3753 +# asm 1: vshr.u64 >t0=reg128#9,<r4=reg128#5,#26
3754 +# asm 2: vshr.u64 >t0=q8,<r4=q4,#26
3755 +vshr.u64 q8,q4,#26
3756 +
3757 +# qhasm: 2x r2 += t2
3758 +# asm 1: vadd.i64 >r2=reg128#8,<r2=reg128#8,<t2=reg128#10
3759 +# asm 2: vadd.i64 >r2=q7,<r2=q7,<t2=q9
3760 +vadd.i64 q7,q7,q9
3761 +
3762 +# qhasm: r4 &= mask
3763 +# asm 1: vand >r4=reg128#5,<r4=reg128#5,<mask=reg128#7
3764 +# asm 2: vand >r4=q4,<r4=q4,<mask=q6
3765 +vand q4,q4,q6
3766 +
3767 +# qhasm: 2x r0 += t0
3768 +# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#9
3769 +# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q8
3770 +vadd.i64 q3,q3,q8
3771 +
3772 +# qhasm: 2x t0 <<= 2
3773 +# asm 1: vshl.i64 >t0=reg128#9,<t0=reg128#9,#2
3774 +# asm 2: vshl.i64 >t0=q8,<t0=q8,#2
3775 +vshl.i64 q8,q8,#2
3776 +
3777 +# qhasm: 2x t3 = r2 unsigned>> 26
3778 +# asm 1: vshr.u64 >t3=reg128#14,<r2=reg128#8,#26
3779 +# asm 2: vshr.u64 >t3=q13,<r2=q7,#26
3780 +vshr.u64 q13,q7,#26
3781 +
3782 +# qhasm: 2x r0 += t0
3783 +# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#9
3784 +# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q8
3785 +vadd.i64 q3,q3,q8
3786 +
3787 +# qhasm: x23 = r2 & mask
3788 +# asm 1: vand >x23=reg128#10,<r2=reg128#8,<mask=reg128#7
3789 +# asm 2: vand >x23=q9,<r2=q7,<mask=q6
3790 +vand q9,q7,q6
3791 +
3792 +# qhasm: 2x r3 += t3
3793 +# asm 1: vadd.i64 >r3=reg128#6,<r3=reg128#6,<t3=reg128#14
3794 +# asm 2: vadd.i64 >r3=q5,<r3=q5,<t3=q13
3795 +vadd.i64 q5,q5,q13
3796 +
3797 +# qhasm: 2x t1 = r0 unsigned>> 26
3798 +# asm 1: vshr.u64 >t1=reg128#8,<r0=reg128#4,#26
3799 +# asm 2: vshr.u64 >t1=q7,<r0=q3,#26
3800 +vshr.u64 q7,q3,#26
3801 +
3802 +# qhasm: x01 = r0 & mask
3803 +# asm 1: vand >x01=reg128#9,<r0=reg128#4,<mask=reg128#7
3804 +# asm 2: vand >x01=q8,<r0=q3,<mask=q6
3805 +vand q8,q3,q6
3806 +
3807 +# qhasm: 2x r1 += t1
3808 +# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#11,<t1=reg128#8
3809 +# asm 2: vadd.i64 >r1=q3,<r1=q10,<t1=q7
3810 +vadd.i64 q3,q10,q7
3811 +
3812 +# qhasm: 2x t4 = r3 unsigned>> 26
3813 +# asm 1: vshr.u64 >t4=reg128#8,<r3=reg128#6,#26
3814 +# asm 2: vshr.u64 >t4=q7,<r3=q5,#26
3815 +vshr.u64 q7,q5,#26
3816 +
3817 +# qhasm: r3 &= mask
3818 +# asm 1: vand >r3=reg128#6,<r3=reg128#6,<mask=reg128#7
3819 +# asm 2: vand >r3=q5,<r3=q5,<mask=q6
3820 +vand q5,q5,q6
3821 +
3822 +# qhasm: 2x x4 = r4 + t4
3823 +# asm 1: vadd.i64 >x4=reg128#11,<r4=reg128#5,<t4=reg128#8
3824 +# asm 2: vadd.i64 >x4=q10,<r4=q4,<t4=q7
3825 +vadd.i64 q10,q4,q7
3826 +
3827 +# qhasm: len -= 32
3828 +# asm 1: sub >len=int32#4,<len=int32#4,#32
3829 +# asm 2: sub >len=r3,<len=r3,#32
3830 +sub r3,r3,#32
3831 +
3832 +# qhasm: x01 = x01[0,2,1,3]
3833 +# asm 1: vtrn.32 <x01=reg128#9%bot,<x01=reg128#9%top
3834 +# asm 2: vtrn.32 <x01=d16,<x01=d17
3835 +vtrn.32 d16,d17
3836 +
3837 +# qhasm: x23 = x23[0,2,1,3]
3838 +# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top
3839 +# asm 2: vtrn.32 <x23=d18,<x23=d19
3840 +vtrn.32 d18,d19
3841 +
3842 +# qhasm: r1 = r1[0,2,1,3]
3843 +# asm 1: vtrn.32 <r1=reg128#4%bot,<r1=reg128#4%top
3844 +# asm 2: vtrn.32 <r1=d6,<r1=d7
3845 +vtrn.32 d6,d7
3846 +
3847 +# qhasm: r3 = r3[0,2,1,3]
3848 +# asm 1: vtrn.32 <r3=reg128#6%bot,<r3=reg128#6%top
3849 +# asm 2: vtrn.32 <r3=d10,<r3=d11
3850 +vtrn.32 d10,d11
3851 +
3852 +# qhasm: x4 = x4[0,2,1,3]
3853 +# asm 1: vtrn.32 <x4=reg128#11%bot,<x4=reg128#11%top
3854 +# asm 2: vtrn.32 <x4=d20,<x4=d21
3855 +vtrn.32 d20,d21
3856 +
3857 +# qhasm: x01 = x01[0,1] r1[0,1]
3858 +# asm 1: vext.32 <x01=reg128#9%top,<r1=reg128#4%bot,<r1=reg128#4%bot,#0
3859 +# asm 2: vext.32 <x01=d17,<r1=d6,<r1=d6,#0
3860 +vext.32 d17,d6,d6,#0
3861 +
3862 +# qhasm: x23 = x23[0,1] r3[0,1]
3863 +# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#6%bot,<r3=reg128#6%bot,#0
3864 +# asm 2: vext.32 <x23=d19,<r3=d10,<r3=d10,#0
3865 +vext.32 d19,d10,d10,#0
3866 +
3867 +# qhasm: unsigned>? len - 32
3868 +# asm 1: cmp <len=int32#4,#32
3869 +# asm 2: cmp <len=r3,#32
3870 +cmp r3,#32
3871 +
3872 +# qhasm: goto mainloop if unsigned>
3873 +bhi ._mainloop
3874 +
3875 +# qhasm: end:
3876 +._end:
3877 +
3878 +# qhasm: mem128[input_0] = x01;input_0+=16
3879 +# asm 1: vst1.8 {<x01=reg128#9%bot-<x01=reg128#9%top},[<input_0=int32#1]!
3880 +# asm 2: vst1.8 {<x01=d16-<x01=d17},[<input_0=r0]!
3881 +vst1.8 {d16-d17},[r0]!
3882 +
3883 +# qhasm: mem128[input_0] = x23;input_0+=16
3884 +# asm 1: vst1.8 {<x23=reg128#10%bot-<x23=reg128#10%top},[<input_0=int32#1]!
3885 +# asm 2: vst1.8 {<x23=d18-<x23=d19},[<input_0=r0]!
3886 +vst1.8 {d18-d19},[r0]!
3887 +
3888 +# qhasm: mem64[input_0] = x4[0]
3889 +# asm 1: vst1.8 <x4=reg128#11%bot,[<input_0=int32#1]
3890 +# asm 2: vst1.8 <x4=d20,[<input_0=r0]
3891 +vst1.8 d20,[r0]
3892 +
3893 +# qhasm: len = len
3894 +# asm 1: mov >len=int32#1,<len=int32#4
3895 +# asm 2: mov >len=r0,<len=r3
3896 +mov r0,r3
3897 +
3898 +# qhasm: qpopreturn len
3899 +mov sp,r12
3900 +vpop {q4,q5,q6,q7}
3901 +bx lr
3902 +
3903 +# qhasm: int32 input_0
3904 +
3905 +# qhasm: int32 input_1
3906 +
3907 +# qhasm: int32 input_2
3908 +
3909 +# qhasm: int32 input_3
3910 +
3911 +# qhasm: stack32 input_4
3912 +
3913 +# qhasm: stack32 input_5
3914 +
3915 +# qhasm: stack32 input_6
3916 +
3917 +# qhasm: stack32 input_7
3918 +
3919 +# qhasm: int32 caller_r4
3920 +
3921 +# qhasm: int32 caller_r5
3922 +
3923 +# qhasm: int32 caller_r6
3924 +
3925 +# qhasm: int32 caller_r7
3926 +
3927 +# qhasm: int32 caller_r8
3928 +
3929 +# qhasm: int32 caller_r9
3930 +
3931 +# qhasm: int32 caller_r10
3932 +
3933 +# qhasm: int32 caller_r11
3934 +
3935 +# qhasm: int32 caller_r12
3936 +
3937 +# qhasm: int32 caller_r14
3938 +
3939 +# qhasm: reg128 caller_q4
3940 +
3941 +# qhasm: reg128 caller_q5
3942 +
3943 +# qhasm: reg128 caller_q6
3944 +
3945 +# qhasm: reg128 caller_q7
3946 +
3947 +# qhasm: reg128 r0
3948 +
3949 +# qhasm: reg128 r1
3950 +
3951 +# qhasm: reg128 r2
3952 +
3953 +# qhasm: reg128 r3
3954 +
3955 +# qhasm: reg128 r4
3956 +
3957 +# qhasm: reg128 x01
3958 +
3959 +# qhasm: reg128 x23
3960 +
3961 +# qhasm: reg128 x4
3962 +
3963 +# qhasm: reg128 y01
3964 +
3965 +# qhasm: reg128 y23
3966 +
3967 +# qhasm: reg128 y4
3968 +
3969 +# qhasm: reg128 _5y01
3970 +
3971 +# qhasm: reg128 _5y23
3972 +
3973 +# qhasm: reg128 _5y4
3974 +
3975 +# qhasm: reg128 c01
3976 +
3977 +# qhasm: reg128 c23
3978 +
3979 +# qhasm: reg128 c4
3980 +
3981 +# qhasm: reg128 t0
3982 +
3983 +# qhasm: reg128 t1
3984 +
3985 +# qhasm: reg128 t2
3986 +
3987 +# qhasm: reg128 t3
3988 +
3989 +# qhasm: reg128 t4
3990 +
3991 +# qhasm: reg128 mask
3992 +
3993 +# qhasm: enter crypto_onetimeauth_poly1305_neon2_addmulmod
3994 +.align 2
3995 +.global openssl_poly1305_neon2_addmulmod
3996 +.type openssl_poly1305_neon2_addmulmod STT_FUNC
3997 +openssl_poly1305_neon2_addmulmod:
3998 +sub sp,sp,#0
3999 +
4000 +# qhasm: 2x mask = 0xffffffff
4001 +# asm 1: vmov.i64 >mask=reg128#1,#0xffffffff
4002 +# asm 2: vmov.i64 >mask=q0,#0xffffffff
4003 +vmov.i64 q0,#0xffffffff
4004 +
4005 +# qhasm: y01 aligned= mem128[input_2];input_2+=16
4006 +# asm 1: vld1.8 {>y01=reg128#2%bot->y01=reg128#2%top},[<input_2=int32#3,: 128]!
4007 +# asm 2: vld1.8 {>y01=d2->y01=d3},[<input_2=r2,: 128]!
4008 +vld1.8 {d2-d3},[r2,: 128]!
4009 +
4010 +# qhasm: 4x _5y01 = y01 << 2
4011 +# asm 1: vshl.i32 >_5y01=reg128#3,<y01=reg128#2,#2
4012 +# asm 2: vshl.i32 >_5y01=q2,<y01=q1,#2
4013 +vshl.i32 q2,q1,#2
4014 +
4015 +# qhasm: y23 aligned= mem128[input_2];input_2+=16
4016 +# asm 1: vld1.8 {>y23=reg128#4%bot->y23=reg128#4%top},[<input_2=int32#3,: 128]!
4017 +# asm 2: vld1.8 {>y23=d6->y23=d7},[<input_2=r2,: 128]!
4018 +vld1.8 {d6-d7},[r2,: 128]!
4019 +
4020 +# qhasm: 4x _5y23 = y23 << 2
4021 +# asm 1: vshl.i32 >_5y23=reg128#9,<y23=reg128#4,#2
4022 +# asm 2: vshl.i32 >_5y23=q8,<y23=q3,#2
4023 +vshl.i32 q8,q3,#2
4024 +
4025 +# qhasm: y4 aligned= mem64[input_2]y4[1]
4026 +# asm 1: vld1.8 {<y4=reg128#10%bot},[<input_2=int32#3,: 64]
4027 +# asm 2: vld1.8 {<y4=d18},[<input_2=r2,: 64]
4028 +vld1.8 {d18},[r2,: 64]
4029 +
4030 +# qhasm: 4x _5y4 = y4 << 2
4031 +# asm 1: vshl.i32 >_5y4=reg128#11,<y4=reg128#10,#2
4032 +# asm 2: vshl.i32 >_5y4=q10,<y4=q9,#2
4033 +vshl.i32 q10,q9,#2
4034 +
4035 +# qhasm: x01 aligned= mem128[input_1];input_1+=16
4036 +# asm 1: vld1.8 {>x01=reg128#12%bot->x01=reg128#12%top},[<input_1=int32#2,: 128 ]!
4037 +# asm 2: vld1.8 {>x01=d22->x01=d23},[<input_1=r1,: 128]!
4038 +vld1.8 {d22-d23},[r1,: 128]!
4039 +
4040 +# qhasm: 4x _5y01 += y01
4041 +# asm 1: vadd.i32 >_5y01=reg128#3,<_5y01=reg128#3,<y01=reg128#2
4042 +# asm 2: vadd.i32 >_5y01=q2,<_5y01=q2,<y01=q1
4043 +vadd.i32 q2,q2,q1
4044 +
4045 +# qhasm: x23 aligned= mem128[input_1];input_1+=16
4046 +# asm 1: vld1.8 {>x23=reg128#13%bot->x23=reg128#13%top},[<input_1=int32#2,: 128 ]!
4047 +# asm 2: vld1.8 {>x23=d24->x23=d25},[<input_1=r1,: 128]!
4048 +vld1.8 {d24-d25},[r1,: 128]!
4049 +
4050 +# qhasm: 4x _5y23 += y23
4051 +# asm 1: vadd.i32 >_5y23=reg128#9,<_5y23=reg128#9,<y23=reg128#4
4052 +# asm 2: vadd.i32 >_5y23=q8,<_5y23=q8,<y23=q3
4053 +vadd.i32 q8,q8,q3
4054 +
4055 +# qhasm: 4x _5y4 += y4
4056 +# asm 1: vadd.i32 >_5y4=reg128#11,<_5y4=reg128#11,<y4=reg128#10
4057 +# asm 2: vadd.i32 >_5y4=q10,<_5y4=q10,<y4=q9
4058 +vadd.i32 q10,q10,q9
4059 +
4060 +# qhasm: c01 aligned= mem128[input_3];input_3+=16
4061 +# asm 1: vld1.8 {>c01=reg128#14%bot->c01=reg128#14%top},[<input_3=int32#4,: 128 ]!
4062 +# asm 2: vld1.8 {>c01=d26->c01=d27},[<input_3=r3,: 128]!
4063 +vld1.8 {d26-d27},[r3,: 128]!
4064 +
4065 +# qhasm: 4x x01 += c01
4066 +# asm 1: vadd.i32 >x01=reg128#12,<x01=reg128#12,<c01=reg128#14
4067 +# asm 2: vadd.i32 >x01=q11,<x01=q11,<c01=q13
4068 +vadd.i32 q11,q11,q13
4069 +
4070 +# qhasm: c23 aligned= mem128[input_3];input_3+=16
4071 +# asm 1: vld1.8 {>c23=reg128#14%bot->c23=reg128#14%top},[<input_3=int32#4,: 128 ]!
4072 +# asm 2: vld1.8 {>c23=d26->c23=d27},[<input_3=r3,: 128]!
4073 +vld1.8 {d26-d27},[r3,: 128]!
4074 +
4075 +# qhasm: 4x x23 += c23
4076 +# asm 1: vadd.i32 >x23=reg128#13,<x23=reg128#13,<c23=reg128#14
4077 +# asm 2: vadd.i32 >x23=q12,<x23=q12,<c23=q13
4078 +vadd.i32 q12,q12,q13
4079 +
4080 +# qhasm: x4 aligned= mem64[input_1]x4[1]
4081 +# asm 1: vld1.8 {<x4=reg128#14%bot},[<input_1=int32#2,: 64]
4082 +# asm 2: vld1.8 {<x4=d26},[<input_1=r1,: 64]
4083 +vld1.8 {d26},[r1,: 64]
4084 +
4085 +# qhasm: 2x mask unsigned>>=6
4086 +# asm 1: vshr.u64 >mask=reg128#1,<mask=reg128#1,#6
4087 +# asm 2: vshr.u64 >mask=q0,<mask=q0,#6
4088 +vshr.u64 q0,q0,#6
4089 +
4090 +# qhasm: c4 aligned= mem64[input_3]c4[1]
4091 +# asm 1: vld1.8 {<c4=reg128#15%bot},[<input_3=int32#4,: 64]
4092 +# asm 2: vld1.8 {<c4=d28},[<input_3=r3,: 64]
4093 +vld1.8 {d28},[r3,: 64]
4094 +
4095 +# qhasm: 4x x4 += c4
4096 +# asm 1: vadd.i32 >x4=reg128#14,<x4=reg128#14,<c4=reg128#15
4097 +# asm 2: vadd.i32 >x4=q13,<x4=q13,<c4=q14
4098 +vadd.i32 q13,q13,q14
4099 +
4100 +# qhasm: r0[0,1] = x01[0] unsigned* y01[0]; r0[2,3] = x01[1] unsigned* y01[ 1]
4101 +# asm 1: vmull.u32 >r0=reg128#15,<x01=reg128#12%bot,<y01=reg128#2%bot
4102 +# asm 2: vmull.u32 >r0=q14,<x01=d22,<y01=d2
4103 +vmull.u32 q14,d22,d2
4104 +
4105 +# qhasm: r0[0,1] += x01[2] unsigned* _5y4[0]; r0[2,3] += x01[3] unsigned* _5y 4[1]
4106 +# asm 1: vmlal.u32 <r0=reg128#15,<x01=reg128#12%top,<_5y4=reg128#11%bot
4107 +# asm 2: vmlal.u32 <r0=q14,<x01=d23,<_5y4=d20
4108 +vmlal.u32 q14,d23,d20
4109 +
4110 +# qhasm: r0[0,1] += x23[0] unsigned* _5y23[2]; r0[2,3] += x23[1] unsigned* _5y2 3[3]
4111 +# asm 1: vmlal.u32 <r0=reg128#15,<x23=reg128#13%bot,<_5y23=reg128#9%top
4112 +# asm 2: vmlal.u32 <r0=q14,<x23=d24,<_5y23=d17
4113 +vmlal.u32 q14,d24,d17
4114 +
4115 +# qhasm: r0[0,1] += x23[2] unsigned* _5y23[0]; r0[2,3] += x23[3] unsigned* _5y2 3[1]
4116 +# asm 1: vmlal.u32 <r0=reg128#15,<x23=reg128#13%top,<_5y23=reg128#9%bot
4117 +# asm 2: vmlal.u32 <r0=q14,<x23=d25,<_5y23=d16
4118 +vmlal.u32 q14,d25,d16
4119 +
4120 +# qhasm: r0[0,1] += x4[0] unsigned* _5y01[2]; r0[2,3] += x4[1] unsigned* _5y0 1[3]
4121 +# asm 1: vmlal.u32 <r0=reg128#15,<x4=reg128#14%bot,<_5y01=reg128#3%top
4122 +# asm 2: vmlal.u32 <r0=q14,<x4=d26,<_5y01=d5
4123 +vmlal.u32 q14,d26,d5
4124 +
4125 +# qhasm: r1[0,1] = x01[0] unsigned* y01[2]; r1[2,3] = x01[1] unsigned* y01[ 3]
4126 +# asm 1: vmull.u32 >r1=reg128#3,<x01=reg128#12%bot,<y01=reg128#2%top
4127 +# asm 2: vmull.u32 >r1=q2,<x01=d22,<y01=d3
4128 +vmull.u32 q2,d22,d3
4129 +
4130 +# qhasm: r1[0,1] += x01[2] unsigned* y01[0]; r1[2,3] += x01[3] unsigned* y01[ 1]
4131 +# asm 1: vmlal.u32 <r1=reg128#3,<x01=reg128#12%top,<y01=reg128#2%bot
4132 +# asm 2: vmlal.u32 <r1=q2,<x01=d23,<y01=d2
4133 +vmlal.u32 q2,d23,d2
4134 +
4135 +# qhasm: r1[0,1] += x23[0] unsigned* _5y4[0]; r1[2,3] += x23[1] unsigned* _5y 4[1]
4136 +# asm 1: vmlal.u32 <r1=reg128#3,<x23=reg128#13%bot,<_5y4=reg128#11%bot
4137 +# asm 2: vmlal.u32 <r1=q2,<x23=d24,<_5y4=d20
4138 +vmlal.u32 q2,d24,d20
4139 +
4140 +# qhasm: r1[0,1] += x23[2] unsigned* _5y23[2]; r1[2,3] += x23[3] unsigned* _5y2 3[3]
4141 +# asm 1: vmlal.u32 <r1=reg128#3,<x23=reg128#13%top,<_5y23=reg128#9%top
4142 +# asm 2: vmlal.u32 <r1=q2,<x23=d25,<_5y23=d17
4143 +vmlal.u32 q2,d25,d17
4144 +
4145 +# qhasm: r1[0,1] += x4[0] unsigned* _5y23[0]; r1[2,3] += x4[1] unsigned* _5y2 3[1]
4146 +# asm 1: vmlal.u32 <r1=reg128#3,<x4=reg128#14%bot,<_5y23=reg128#9%bot
4147 +# asm 2: vmlal.u32 <r1=q2,<x4=d26,<_5y23=d16
4148 +vmlal.u32 q2,d26,d16
4149 +
4150 +# qhasm: r2[0,1] = x01[0] unsigned* y23[0]; r2[2,3] = x01[1] unsigned* y23[ 1]
4151 +# asm 1: vmull.u32 >r2=reg128#16,<x01=reg128#12%bot,<y23=reg128#4%bot
4152 +# asm 2: vmull.u32 >r2=q15,<x01=d22,<y23=d6
4153 +vmull.u32 q15,d22,d6
4154 +
4155 +# qhasm: r2[0,1] += x01[2] unsigned* y01[2]; r2[2,3] += x01[3] unsigned* y01[ 3]
4156 +# asm 1: vmlal.u32 <r2=reg128#16,<x01=reg128#12%top,<y01=reg128#2%top
4157 +# asm 2: vmlal.u32 <r2=q15,<x01=d23,<y01=d3
4158 +vmlal.u32 q15,d23,d3
4159 +
4160 +# qhasm: r2[0,1] += x23[0] unsigned* y01[0]; r2[2,3] += x23[1] unsigned* y01[ 1]
4161 +# asm 1: vmlal.u32 <r2=reg128#16,<x23=reg128#13%bot,<y01=reg128#2%bot
4162 +# asm 2: vmlal.u32 <r2=q15,<x23=d24,<y01=d2
4163 +vmlal.u32 q15,d24,d2
4164 +
4165 +# qhasm: r2[0,1] += x23[2] unsigned* _5y4[0]; r2[2,3] += x23[3] unsigned* _5y 4[1]
4166 +# asm 1: vmlal.u32 <r2=reg128#16,<x23=reg128#13%top,<_5y4=reg128#11%bot
4167 +# asm 2: vmlal.u32 <r2=q15,<x23=d25,<_5y4=d20
4168 +vmlal.u32 q15,d25,d20
4169 +
4170 +# qhasm: r2[0,1] += x4[0] unsigned* _5y23[2]; r2[2,3] += x4[1] unsigned* _5y2 3[3]
4171 +# asm 1: vmlal.u32 <r2=reg128#16,<x4=reg128#14%bot,<_5y23=reg128#9%top
4172 +# asm 2: vmlal.u32 <r2=q15,<x4=d26,<_5y23=d17
4173 +vmlal.u32 q15,d26,d17
4174 +
4175 +# qhasm: r3[0,1] = x01[0] unsigned* y23[2]; r3[2,3] = x01[1] unsigned* y23[ 3]
4176 +# asm 1: vmull.u32 >r3=reg128#9,<x01=reg128#12%bot,<y23=reg128#4%top
4177 +# asm 2: vmull.u32 >r3=q8,<x01=d22,<y23=d7
4178 +vmull.u32 q8,d22,d7
4179 +
4180 +# qhasm: r3[0,1] += x01[2] unsigned* y23[0]; r3[2,3] += x01[3] unsigned* y23[ 1]
4181 +# asm 1: vmlal.u32 <r3=reg128#9,<x01=reg128#12%top,<y23=reg128#4%bot
4182 +# asm 2: vmlal.u32 <r3=q8,<x01=d23,<y23=d6
4183 +vmlal.u32 q8,d23,d6
4184 +
4185 +# qhasm: r3[0,1] += x23[0] unsigned* y01[2]; r3[2,3] += x23[1] unsigned* y01[ 3]
4186 +# asm 1: vmlal.u32 <r3=reg128#9,<x23=reg128#13%bot,<y01=reg128#2%top
4187 +# asm 2: vmlal.u32 <r3=q8,<x23=d24,<y01=d3
4188 +vmlal.u32 q8,d24,d3
4189 +
4190 +# qhasm: r3[0,1] += x23[2] unsigned* y01[0]; r3[2,3] += x23[3] unsigned* y01[ 1]
4191 +# asm 1: vmlal.u32 <r3=reg128#9,<x23=reg128#13%top,<y01=reg128#2%bot
4192 +# asm 2: vmlal.u32 <r3=q8,<x23=d25,<y01=d2
4193 +vmlal.u32 q8,d25,d2
4194 +
4195 +# qhasm: r3[0,1] += x4[0] unsigned* _5y4[0]; r3[2,3] += x4[1] unsigned* _5y 4[1]
4196 +# asm 1: vmlal.u32 <r3=reg128#9,<x4=reg128#14%bot,<_5y4=reg128#11%bot
4197 +# asm 2: vmlal.u32 <r3=q8,<x4=d26,<_5y4=d20
4198 +vmlal.u32 q8,d26,d20
4199 +
4200 +# qhasm: r4[0,1] = x01[0] unsigned* y4[0]; r4[2,3] = x01[1] unsigned* y4[1 ]
4201 +# asm 1: vmull.u32 >r4=reg128#10,<x01=reg128#12%bot,<y4=reg128#10%bot
4202 +# asm 2: vmull.u32 >r4=q9,<x01=d22,<y4=d18
4203 +vmull.u32 q9,d22,d18
4204 +
4205 +# qhasm: r4[0,1] += x01[2] unsigned* y23[2]; r4[2,3] += x01[3] unsigned* y23[3 ]
4206 +# asm 1: vmlal.u32 <r4=reg128#10,<x01=reg128#12%top,<y23=reg128#4%top
4207 +# asm 2: vmlal.u32 <r4=q9,<x01=d23,<y23=d7
4208 +vmlal.u32 q9,d23,d7
4209 +
4210 +# qhasm: r4[0,1] += x23[0] unsigned* y23[0]; r4[2,3] += x23[1] unsigned* y23[1 ]
4211 +# asm 1: vmlal.u32 <r4=reg128#10,<x23=reg128#13%bot,<y23=reg128#4%bot
4212 +# asm 2: vmlal.u32 <r4=q9,<x23=d24,<y23=d6
4213 +vmlal.u32 q9,d24,d6
4214 +
4215 +# qhasm: r4[0,1] += x23[2] unsigned* y01[2]; r4[2,3] += x23[3] unsigned* y01[3 ]
4216 +# asm 1: vmlal.u32 <r4=reg128#10,<x23=reg128#13%top,<y01=reg128#2%top
4217 +# asm 2: vmlal.u32 <r4=q9,<x23=d25,<y01=d3
4218 +vmlal.u32 q9,d25,d3
4219 +
4220 +# qhasm: r4[0,1] += x4[0] unsigned* y01[0]; r4[2,3] += x4[1] unsigned* y01[1 ]
4221 +# asm 1: vmlal.u32 <r4=reg128#10,<x4=reg128#14%bot,<y01=reg128#2%bot
4222 +# asm 2: vmlal.u32 <r4=q9,<x4=d26,<y01=d2
4223 +vmlal.u32 q9,d26,d2
4224 +
4225 +# qhasm: 2x t1 = r0 unsigned>> 26
4226 +# asm 1: vshr.u64 >t1=reg128#2,<r0=reg128#15,#26
4227 +# asm 2: vshr.u64 >t1=q1,<r0=q14,#26
4228 +vshr.u64 q1,q14,#26
4229 +
4230 +# qhasm: r0 &= mask
4231 +# asm 1: vand >r0=reg128#4,<r0=reg128#15,<mask=reg128#1
4232 +# asm 2: vand >r0=q3,<r0=q14,<mask=q0
4233 +vand q3,q14,q0
4234 +
4235 +# qhasm: 2x r1 += t1
4236 +# asm 1: vadd.i64 >r1=reg128#2,<r1=reg128#3,<t1=reg128#2
4237 +# asm 2: vadd.i64 >r1=q1,<r1=q2,<t1=q1
4238 +vadd.i64 q1,q2,q1
4239 +
4240 +# qhasm: 2x t4 = r3 unsigned>> 26
4241 +# asm 1: vshr.u64 >t4=reg128#3,<r3=reg128#9,#26
4242 +# asm 2: vshr.u64 >t4=q2,<r3=q8,#26
4243 +vshr.u64 q2,q8,#26
4244 +
4245 +# qhasm: r3 &= mask
4246 +# asm 1: vand >r3=reg128#9,<r3=reg128#9,<mask=reg128#1
4247 +# asm 2: vand >r3=q8,<r3=q8,<mask=q0
4248 +vand q8,q8,q0
4249 +
4250 +# qhasm: 2x r4 += t4
4251 +# asm 1: vadd.i64 >r4=reg128#3,<r4=reg128#10,<t4=reg128#3
4252 +# asm 2: vadd.i64 >r4=q2,<r4=q9,<t4=q2
4253 +vadd.i64 q2,q9,q2
4254 +
4255 +# qhasm: 2x t2 = r1 unsigned>> 26
4256 +# asm 1: vshr.u64 >t2=reg128#10,<r1=reg128#2,#26
4257 +# asm 2: vshr.u64 >t2=q9,<r1=q1,#26
4258 +vshr.u64 q9,q1,#26
4259 +
4260 +# qhasm: r1 &= mask
4261 +# asm 1: vand >r1=reg128#2,<r1=reg128#2,<mask=reg128#1
4262 +# asm 2: vand >r1=q1,<r1=q1,<mask=q0
4263 +vand q1,q1,q0
4264 +
4265 +# qhasm: 2x t0 = r4 unsigned>> 26
4266 +# asm 1: vshr.u64 >t0=reg128#11,<r4=reg128#3,#26
4267 +# asm 2: vshr.u64 >t0=q10,<r4=q2,#26
4268 +vshr.u64 q10,q2,#26
4269 +
4270 +# qhasm: 2x r2 += t2
4271 +# asm 1: vadd.i64 >r2=reg128#10,<r2=reg128#16,<t2=reg128#10
4272 +# asm 2: vadd.i64 >r2=q9,<r2=q15,<t2=q9
4273 +vadd.i64 q9,q15,q9
4274 +
4275 +# qhasm: r4 &= mask
4276 +# asm 1: vand >r4=reg128#3,<r4=reg128#3,<mask=reg128#1
4277 +# asm 2: vand >r4=q2,<r4=q2,<mask=q0
4278 +vand q2,q2,q0
4279 +
4280 +# qhasm: 2x r0 += t0
4281 +# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#11
4282 +# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q10
4283 +vadd.i64 q3,q3,q10
4284 +
4285 +# qhasm: 2x t0 <<= 2
4286 +# asm 1: vshl.i64 >t0=reg128#11,<t0=reg128#11,#2
4287 +# asm 2: vshl.i64 >t0=q10,<t0=q10,#2
4288 +vshl.i64 q10,q10,#2
4289 +
4290 +# qhasm: 2x t3 = r2 unsigned>> 26
4291 +# asm 1: vshr.u64 >t3=reg128#12,<r2=reg128#10,#26
4292 +# asm 2: vshr.u64 >t3=q11,<r2=q9,#26
4293 +vshr.u64 q11,q9,#26
4294 +
4295 +# qhasm: 2x r0 += t0
4296 +# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#11
4297 +# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q10
4298 +vadd.i64 q3,q3,q10
4299 +
4300 +# qhasm: x23 = r2 & mask
4301 +# asm 1: vand >x23=reg128#10,<r2=reg128#10,<mask=reg128#1
4302 +# asm 2: vand >x23=q9,<r2=q9,<mask=q0
4303 +vand q9,q9,q0
4304 +
4305 +# qhasm: 2x r3 += t3
4306 +# asm 1: vadd.i64 >r3=reg128#9,<r3=reg128#9,<t3=reg128#12
4307 +# asm 2: vadd.i64 >r3=q8,<r3=q8,<t3=q11
4308 +vadd.i64 q8,q8,q11
4309 +
4310 +# qhasm: 2x t1 = r0 unsigned>> 26
4311 +# asm 1: vshr.u64 >t1=reg128#11,<r0=reg128#4,#26
4312 +# asm 2: vshr.u64 >t1=q10,<r0=q3,#26
4313 +vshr.u64 q10,q3,#26
4314 +
4315 +# qhasm: x23 = x23[0,2,1,3]
4316 +# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top
4317 +# asm 2: vtrn.32 <x23=d18,<x23=d19
4318 +vtrn.32 d18,d19
4319 +
4320 +# qhasm: x01 = r0 & mask
4321 +# asm 1: vand >x01=reg128#4,<r0=reg128#4,<mask=reg128#1
4322 +# asm 2: vand >x01=q3,<r0=q3,<mask=q0
4323 +vand q3,q3,q0
4324 +
4325 +# qhasm: 2x r1 += t1
4326 +# asm 1: vadd.i64 >r1=reg128#2,<r1=reg128#2,<t1=reg128#11
4327 +# asm 2: vadd.i64 >r1=q1,<r1=q1,<t1=q10
4328 +vadd.i64 q1,q1,q10
4329 +
4330 +# qhasm: 2x t4 = r3 unsigned>> 26
4331 +# asm 1: vshr.u64 >t4=reg128#11,<r3=reg128#9,#26
4332 +# asm 2: vshr.u64 >t4=q10,<r3=q8,#26
4333 +vshr.u64 q10,q8,#26
4334 +
4335 +# qhasm: x01 = x01[0,2,1,3]
4336 +# asm 1: vtrn.32 <x01=reg128#4%bot,<x01=reg128#4%top
4337 +# asm 2: vtrn.32 <x01=d6,<x01=d7
4338 +vtrn.32 d6,d7
4339 +
4340 +# qhasm: r3 &= mask
4341 +# asm 1: vand >r3=reg128#1,<r3=reg128#9,<mask=reg128#1
4342 +# asm 2: vand >r3=q0,<r3=q8,<mask=q0
4343 +vand q0,q8,q0
4344 +
4345 +# qhasm: r1 = r1[0,2,1,3]
4346 +# asm 1: vtrn.32 <r1=reg128#2%bot,<r1=reg128#2%top
4347 +# asm 2: vtrn.32 <r1=d2,<r1=d3
4348 +vtrn.32 d2,d3
4349 +
4350 +# qhasm: 2x x4 = r4 + t4
4351 +# asm 1: vadd.i64 >x4=reg128#3,<r4=reg128#3,<t4=reg128#11
4352 +# asm 2: vadd.i64 >x4=q2,<r4=q2,<t4=q10
4353 +vadd.i64 q2,q2,q10
4354 +
4355 +# qhasm: r3 = r3[0,2,1,3]
4356 +# asm 1: vtrn.32 <r3=reg128#1%bot,<r3=reg128#1%top
4357 +# asm 2: vtrn.32 <r3=d0,<r3=d1
4358 +vtrn.32 d0,d1
4359 +
4360 +# qhasm: x01 = x01[0,1] r1[0,1]
4361 +# asm 1: vext.32 <x01=reg128#4%top,<r1=reg128#2%bot,<r1=reg128#2%bot,#0
4362 +# asm 2: vext.32 <x01=d7,<r1=d2,<r1=d2,#0
4363 +vext.32 d7,d2,d2,#0
4364 +
4365 +# qhasm: x23 = x23[0,1] r3[0,1]
4366 +# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#1%bot,<r3=reg128#1%bot,#0
4367 +# asm 2: vext.32 <x23=d19,<r3=d0,<r3=d0,#0
4368 +vext.32 d19,d0,d0,#0
4369 +
4370 +# qhasm: x4 = x4[0,2,1,3]
4371 +# asm 1: vtrn.32 <x4=reg128#3%bot,<x4=reg128#3%top
4372 +# asm 2: vtrn.32 <x4=d4,<x4=d5
4373 +vtrn.32 d4,d5
4374 +
4375 +# qhasm: mem128[input_0] aligned= x01;input_0+=16
4376 +# asm 1: vst1.8 {<x01=reg128#4%bot-<x01=reg128#4%top},[<input_0=int32#1,: 128]!
4377 +# asm 2: vst1.8 {<x01=d6-<x01=d7},[<input_0=r0,: 128]!
4378 +vst1.8 {d6-d7},[r0,: 128]!
4379 +
4380 +# qhasm: mem128[input_0] aligned= x23;input_0+=16
4381 +# asm 1: vst1.8 {<x23=reg128#10%bot-<x23=reg128#10%top},[<input_0=int32#1,: 128 ]!
4382 +# asm 2: vst1.8 {<x23=d18-<x23=d19},[<input_0=r0,: 128]!
4383 +vst1.8 {d18-d19},[r0,: 128]!
4384 +
4385 +# qhasm: mem64[input_0] aligned= x4[0]
4386 +# asm 1: vst1.8 <x4=reg128#3%bot,[<input_0=int32#1,: 64]
4387 +# asm 2: vst1.8 <x4=d4,[<input_0=r0,: 64]
4388 +vst1.8 d4,[r0,: 64]
4389 +
4390 +# qhasm: return
4391 +add sp,sp,#0
4392 +bx lr
4393 diff --git a/crypto/poly1305/poly1305_vec.c b/crypto/poly1305/poly1305_vec.c
4394 new file mode 100644
4395 index 0000000..c546200
4396 --- /dev/null
4397 +++ b/crypto/poly1305/poly1305_vec.c
4398 @@ -0,0 +1,733 @@
4399 +/* ====================================================================
4400 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved.
4401 + *
4402 + * Redistribution and use in source and binary forms, with or without
4403 + * modification, are permitted provided that the following conditions
4404 + * are met:
4405 + *
4406 + * 1. Redistributions of source code must retain the above copyright
4407 + * notice, this list of conditions and the following disclaimer.
4408 + *
4409 + * 2. Redistributions in binary form must reproduce the above copyright
4410 + * notice, this list of conditions and the following disclaimer in
4411 + * the documentation and/or other materials provided with the
4412 + * distribution.
4413 + *
4414 + * 3. All advertising materials mentioning features or use of this
4415 + * software must display the following acknowledgment:
4416 + * "This product includes software developed by the OpenSSL Project
4417 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
4418 + *
4419 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
4420 + * endorse or promote products derived from this software without
4421 + * prior written permission. For written permission, please contact
4422 + * licensing@OpenSSL.org.
4423 + *
4424 + * 5. Products derived from this software may not be called "OpenSSL"
4425 + * nor may "OpenSSL" appear in their names without prior written
4426 + * permission of the OpenSSL Project.
4427 + *
4428 + * 6. Redistributions of any form whatsoever must retain the following
4429 + * acknowledgment:
4430 + * "This product includes software developed by the OpenSSL Project
4431 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
4432 + *
4433 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
4434 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
4435 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
4436 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
4437 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
4438 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
4439 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
4440 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
4441 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
4442 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
4443 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
4444 + * OF THE POSSIBILITY OF SUCH DAMAGE.
4445 + * ====================================================================
4446 + */
4447 +
4448 +/* This implementation of poly1305 is by Andrew Moon
4449 + * (https://github.com/floodyberry/poly1305-donna) and released as public
4450 + * domain. It implements SIMD vectorization based on the algorithm described in
4451 + * http://cr.yp.to/papers.html#neoncrypto. Unrolled to 2 powers, i.e. 64 byte
4452 + * block size
4453 +*/
4454 +
4455 +#include <emmintrin.h>
4456 +#include <stdint.h>
4457 +#include <openssl/opensslconf.h>
4458 +
4459 +#if !defined(OPENSSL_NO_POLY1305)
4460 +
4461 +#include <openssl/poly1305.h>
4462 +
4463 +#define ALIGN(x) __attribute__((aligned(x)))
4464 +#define INLINE inline
4465 +#define U8TO64_LE(m) (*(uint64_t*)(m))
4466 +#define U8TO32_LE(m) (*(uint32_t*)(m))
4467 +#define U64TO8_LE(m,v) (*(uint64_t*)(m)) = v
4468 +
4469 +typedef __m128i xmmi;
4470 +typedef unsigned __int128 uint128_t;
4471 +
4472 +static const uint32_t ALIGN(16) poly1305_x64_sse2_message_mask[4] =
4473 + {(1 << 26) - 1, 0, (1 << 26) - 1, 0};
4474 +static const uint32_t ALIGN(16) poly1305_x64_sse2_5[4] = {5, 0, 5, 0};
4475 +static const uint32_t ALIGN(16) poly1305_x64_sse2_1shl128[4] =
4476 + {(1 << 24), 0, (1 << 24), 0};
4477 +
4478 +static uint128_t INLINE
4479 +add128(uint128_t a, uint128_t b)
4480 + {
4481 + return a + b;
4482 + }
4483 +
4484 +static uint128_t INLINE
4485 +add128_64(uint128_t a, uint64_t b)
4486 + {
4487 + return a + b;
4488 + }
4489 +
4490 +static uint128_t INLINE
4491 +mul64x64_128(uint64_t a, uint64_t b)
4492 + {
4493 + return (uint128_t)a * b;
4494 + }
4495 +
4496 +static uint64_t INLINE
4497 +lo128(uint128_t a)
4498 + {
4499 + return (uint64_t)a;
4500 + }
4501 +
4502 +static uint64_t INLINE
4503 +shr128(uint128_t v, const int shift)
4504 + {
4505 + return (uint64_t)(v >> shift);
4506 + }
4507 +
4508 +static uint64_t INLINE
4509 +shr128_pair(uint64_t hi, uint64_t lo, const int shift)
4510 + {
4511 + return (uint64_t)((((uint128_t)hi << 64) | lo) >> shift);
4512 + }
4513 +
4514 +typedef struct poly1305_power_t
4515 + {
4516 + union
4517 + {
4518 + xmmi v;
4519 + uint64_t u[2];
4520 + uint32_t d[4];
4521 + } R20,R21,R22,R23,R24,S21,S22,S23,S24;
4522 + } poly1305_power;
4523 +
4524 +typedef struct poly1305_state_internal_t
4525 + {
4526 + poly1305_power P[2]; /* 288 bytes, top 32 bit halves unused = 144
4527 + bytes of free storage */
4528 + union
4529 + {
4530 + xmmi H[5]; /* 80 bytes */
4531 + uint64_t HH[10];
4532 + };
4533 + /* uint64_t r0,r1,r2; [24 bytes] */
4534 + /* uint64_t pad0,pad1; [16 bytes] */
4535 + uint64_t started; /* 8 bytes */
4536 + uint64_t leftover; /* 8 bytes */
4537 + uint8_t buffer[64]; /* 64 bytes */
4538 + } poly1305_state_internal; /* 448 bytes total + 63 bytes for
4539 + alignment = 511 bytes raw */
4540 +
4541 +static poly1305_state_internal INLINE
4542 +*poly1305_aligned_state(poly1305_state *state)
4543 + {
4544 + return (poly1305_state_internal *)(((uint64_t)state + 63) & ~63);
4545 + }
4546 +
4547 +/* copy 0-63 bytes */
4548 +static void INLINE
4549 +poly1305_block_copy(uint8_t *dst, const uint8_t *src, size_t bytes)
4550 + {
4551 + size_t offset = src - dst;
4552 + if (bytes & 32)
4553 + {
4554 + _mm_storeu_si128((xmmi *)(dst + 0), _mm_loadu_si128((xmmi *)(dst + offset + 0)));
4555 + _mm_storeu_si128((xmmi *)(dst + 16), _mm_loadu_si128((xmmi *)(ds t + offset + 16)));
4556 + dst += 32;
4557 + }
4558 + if (bytes & 16)
4559 + {
4560 + _mm_storeu_si128((xmmi *)dst,
4561 + _mm_loadu_si128((xmmi *)(dst + offset)));
4562 + dst += 16;
4563 + }
4564 + if (bytes & 8)
4565 + {
4566 + *(uint64_t *)dst = *(uint64_t *)(dst + offset);
4567 + dst += 8;
4568 + }
4569 + if (bytes & 4)
4570 + {
4571 + *(uint32_t *)dst = *(uint32_t *)(dst + offset);
4572 + dst += 4;
4573 + }
4574 + if (bytes & 2)
4575 + {
4576 + *(uint16_t *)dst = *(uint16_t *)(dst + offset);
4577 + dst += 2;
4578 + }
4579 + if (bytes & 1)
4580 + {
4581 + *( uint8_t *)dst = *( uint8_t *)(dst + offset);
4582 + }
4583 + }
4584 +
4585 +/* zero 0-15 bytes */
4586 +static void INLINE
4587 +poly1305_block_zero(uint8_t *dst, size_t bytes)
4588 + {
4589 + if (bytes & 8) { *(uint64_t *)dst = 0; dst += 8; }
4590 + if (bytes & 4) { *(uint32_t *)dst = 0; dst += 4; }
4591 + if (bytes & 2) { *(uint16_t *)dst = 0; dst += 2; }
4592 + if (bytes & 1) { *( uint8_t *)dst = 0; }
4593 + }
4594 +
4595 +static size_t INLINE
4596 +poly1305_min(size_t a, size_t b)
4597 + {
4598 + return (a < b) ? a : b;
4599 + }
4600 +
4601 +void
4602 +CRYPTO_poly1305_init(poly1305_state *state, const unsigned char key[32])
4603 + {
4604 + poly1305_state_internal *st = poly1305_aligned_state(state);
4605 + poly1305_power *p;
4606 + uint64_t r0,r1,r2;
4607 + uint64_t t0,t1;
4608 +
4609 + /* clamp key */
4610 + t0 = U8TO64_LE(key + 0);
4611 + t1 = U8TO64_LE(key + 8);
4612 + r0 = t0 & 0xffc0fffffff; t0 >>= 44; t0 |= t1 << 20;
4613 + r1 = t0 & 0xfffffc0ffff; t1 >>= 24;
4614 + r2 = t1 & 0x00ffffffc0f;
4615 +
4616 + /* store r in un-used space of st->P[1] */
4617 + p = &st->P[1];
4618 + p->R20.d[1] = (uint32_t)(r0 );
4619 + p->R20.d[3] = (uint32_t)(r0 >> 32);
4620 + p->R21.d[1] = (uint32_t)(r1 );
4621 + p->R21.d[3] = (uint32_t)(r1 >> 32);
4622 + p->R22.d[1] = (uint32_t)(r2 );
4623 + p->R22.d[3] = (uint32_t)(r2 >> 32);
4624 +
4625 + /* store pad */
4626 + p->R23.d[1] = U8TO32_LE(key + 16);
4627 + p->R23.d[3] = U8TO32_LE(key + 20);
4628 + p->R24.d[1] = U8TO32_LE(key + 24);
4629 + p->R24.d[3] = U8TO32_LE(key + 28);
4630 +
4631 + /* H = 0 */
4632 + st->H[0] = _mm_setzero_si128();
4633 + st->H[1] = _mm_setzero_si128();
4634 + st->H[2] = _mm_setzero_si128();
4635 + st->H[3] = _mm_setzero_si128();
4636 + st->H[4] = _mm_setzero_si128();
4637 +
4638 + st->started = 0;
4639 + st->leftover = 0;
4640 + }
4641 +
4642 +static void
4643 +poly1305_first_block(poly1305_state_internal *st, const uint8_t *m)
4644 + {
4645 + const xmmi MMASK =
4646 + _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
4647 + const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5);
4648 + const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128);
4649 + xmmi T5,T6;
4650 + poly1305_power *p;
4651 + uint128_t d[3];
4652 + uint64_t r0,r1,r2;
4653 + uint64_t r20,r21,r22,s22;
4654 + uint64_t pad0,pad1;
4655 + uint64_t c;
4656 + uint64_t i;
4657 +
4658 + /* pull out stored info */
4659 + p = &st->P[1];
4660 +
4661 + r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
4662 + r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
4663 + r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
4664 + pad0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1];
4665 + pad1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1];
4666 +
4667 + /* compute powers r^2,r^4 */
4668 + r20 = r0;
4669 + r21 = r1;
4670 + r22 = r2;
4671 + for (i = 0; i < 2; i++)
4672 + {
4673 + s22 = r22 * (5 << 2);
4674 +
4675 + d[0] = add128(mul64x64_128(r20, r20), mul64x64_128(r21 * 2, s22) );
4676 + d[1] = add128(mul64x64_128(r22, s22), mul64x64_128(r20 * 2, r21) );
4677 + d[2] = add128(mul64x64_128(r21, r21), mul64x64_128(r22 * 2, r20) );
4678 +
4679 + r20 = lo128(d[0]) & 0xfffffffffff; c = shr128(d[0], 44);
4680 + d[1] = add128_64(d[1], c); r21 = lo128(d[1]) & 0xfffffffffff; c = shr128(d[1], 44);
4681 + d[2] = add128_64(d[2], c); r22 = lo128(d[2]) & 0x3ffffffffff; c = shr128(d[2], 42);
4682 + r20 += c * 5; c = (r20 >> 44); r20 = r20 & 0xfffffffffff;
4683 + r21 += c;
4684 +
4685 + p->R20.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)( r20 ) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0));
4686 + p->R21.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r20 > > 26) | (r21 << 18)) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0));
4687 + p->R22.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 > > 8) ) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0));
4688 + p->R23.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 > > 34) | (r22 << 10)) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0));
4689 + p->R24.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r22 > > 16) ) ), _MM_SHUFFLE(1,0,1,0));
4690 + p->S21.v = _mm_mul_epu32(p->R21.v, FIVE);
4691 + p->S22.v = _mm_mul_epu32(p->R22.v, FIVE);
4692 + p->S23.v = _mm_mul_epu32(p->R23.v, FIVE);
4693 + p->S24.v = _mm_mul_epu32(p->R24.v, FIVE);
4694 + p--;
4695 + }
4696 +
4697 + /* put saved info back */
4698 + p = &st->P[1];
4699 + p->R20.d[1] = (uint32_t)(r0 );
4700 + p->R20.d[3] = (uint32_t)(r0 >> 32);
4701 + p->R21.d[1] = (uint32_t)(r1 );
4702 + p->R21.d[3] = (uint32_t)(r1 >> 32);
4703 + p->R22.d[1] = (uint32_t)(r2 );
4704 + p->R22.d[3] = (uint32_t)(r2 >> 32);
4705 + p->R23.d[1] = (uint32_t)(pad0 );
4706 + p->R23.d[3] = (uint32_t)(pad0 >> 32);
4707 + p->R24.d[1] = (uint32_t)(pad1 );
4708 + p->R24.d[3] = (uint32_t)(pad1 >> 32);
4709 +
4710 + /* H = [Mx,My] */
4711 + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi6 4((xmmi *)(m + 16)));
4712 + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi6 4((xmmi *)(m + 24)));
4713 + st->H[0] = _mm_and_si128(MMASK, T5);
4714 + st->H[1] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
4715 + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
4716 + st->H[2] = _mm_and_si128(MMASK, T5);
4717 + st->H[3] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
4718 + st->H[4] = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
4719 + }
4720 +
4721 +static void
4722 +poly1305_blocks(poly1305_state_internal *st, const uint8_t *m, size_t bytes)
4723 + {
4724 + const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask );
4725 + const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5);
4726 + const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128);
4727 +
4728 + poly1305_power *p;
4729 + xmmi H0,H1,H2,H3,H4;
4730 + xmmi T0,T1,T2,T3,T4,T5,T6;
4731 + xmmi M0,M1,M2,M3,M4;
4732 + xmmi C1,C2;
4733 +
4734 + H0 = st->H[0];
4735 + H1 = st->H[1];
4736 + H2 = st->H[2];
4737 + H3 = st->H[3];
4738 + H4 = st->H[4];
4739 +
4740 + while (bytes >= 64)
4741 + {
4742 + /* H *= [r^4,r^4] */
4743 + p = &st->P[0];
4744 + T0 = _mm_mul_epu32(H0, p->R20.v);
4745 + T1 = _mm_mul_epu32(H0, p->R21.v);
4746 + T2 = _mm_mul_epu32(H0, p->R22.v);
4747 + T3 = _mm_mul_epu32(H0, p->R23.v);
4748 + T4 = _mm_mul_epu32(H0, p->R24.v);
4749 + T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4750 + T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4751 + T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4752 + T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4753 + T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4754 + T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4755 + T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4756 + T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4757 + T5 = _mm_mul_epu32(H1, p->R23.v); T4 = _mm_add_epi64(T4, T5);
4758 + T5 = _mm_mul_epu32(H2, p->R22.v); T4 = _mm_add_epi64(T4, T5);
4759 + T5 = _mm_mul_epu32(H3, p->R21.v); T4 = _mm_add_epi64(T4, T5);
4760 + T5 = _mm_mul_epu32(H4, p->R20.v); T4 = _mm_add_epi64(T4, T5);
4761 +
4762 + /* H += [Mx,My]*[r^2,r^2] */
4763 + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_lo adl_epi64((xmmi *)(m + 16)));
4764 + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_lo adl_epi64((xmmi *)(m + 24)));
4765 + M0 = _mm_and_si128(MMASK, T5);
4766 + M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
4767 + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12) );
4768 + M2 = _mm_and_si128(MMASK, T5);
4769 + M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
4770 + M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
4771 +
4772 + p = &st->P[1];
4773 + T5 = _mm_mul_epu32(M0, p->R20.v); T6 = _mm_mul_epu32(M0, p->R21. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4774 + T5 = _mm_mul_epu32(M1, p->S24.v); T6 = _mm_mul_epu32(M1, p->R20. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4775 + T5 = _mm_mul_epu32(M2, p->S23.v); T6 = _mm_mul_epu32(M2, p->S24. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4776 + T5 = _mm_mul_epu32(M3, p->S22.v); T6 = _mm_mul_epu32(M3, p->S23. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4777 + T5 = _mm_mul_epu32(M4, p->S21.v); T6 = _mm_mul_epu32(M4, p->S22. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4778 + T5 = _mm_mul_epu32(M0, p->R22.v); T6 = _mm_mul_epu32(M0, p->R23. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4779 + T5 = _mm_mul_epu32(M1, p->R21.v); T6 = _mm_mul_epu32(M1, p->R22. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4780 + T5 = _mm_mul_epu32(M2, p->R20.v); T6 = _mm_mul_epu32(M2, p->R21. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4781 + T5 = _mm_mul_epu32(M3, p->S24.v); T6 = _mm_mul_epu32(M3, p->R20. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4782 + T5 = _mm_mul_epu32(M4, p->S23.v); T6 = _mm_mul_epu32(M4, p->S24. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4783 + T5 = _mm_mul_epu32(M0, p->R24.v); T4 = _mm_add_epi64(T4, T5);
4784 + T5 = _mm_mul_epu32(M1, p->R23.v); T4 = _mm_add_epi64(T4, T5);
4785 + T5 = _mm_mul_epu32(M2, p->R22.v); T4 = _mm_add_epi64(T4, T5);
4786 + T5 = _mm_mul_epu32(M3, p->R21.v); T4 = _mm_add_epi64(T4, T5);
4787 + T5 = _mm_mul_epu32(M4, p->R20.v); T4 = _mm_add_epi64(T4, T5);
4788 +
4789 + /* H += [Mx,My] */
4790 + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 32)), _mm_l oadl_epi64((xmmi *)(m + 48)));
4791 + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 40)), _mm_l oadl_epi64((xmmi *)(m + 56)));
4792 + M0 = _mm_and_si128(MMASK, T5);
4793 + M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
4794 + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12) );
4795 + M2 = _mm_and_si128(MMASK, T5);
4796 + M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
4797 + M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
4798 +
4799 + T0 = _mm_add_epi64(T0, M0);
4800 + T1 = _mm_add_epi64(T1, M1);
4801 + T2 = _mm_add_epi64(T2, M2);
4802 + T3 = _mm_add_epi64(T3, M3);
4803 + T4 = _mm_add_epi64(T4, M4);
4804 +
4805 + /* reduce */
4806 + C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _ mm_and_si128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C 1); T4 = _mm_add_epi64(T4, C2);
4807 + C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _ mm_and_si128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C 1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
4808 + C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _ mm_and_si128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C 1); T1 = _mm_add_epi64(T1, C2);
4809 + C1 = _mm_srli_epi64(T3, 26); T3 = _ mm_and_si128(T3, MMASK); T4 = _mm_add_epi64(T4, C 1);
4810 +
4811 + /* H = (H*[r^4,r^4] + [Mx,My]*[r^2,r^2] + [Mx,My]) */
4812 + H0 = T0;
4813 + H1 = T1;
4814 + H2 = T2;
4815 + H3 = T3;
4816 + H4 = T4;
4817 +
4818 + m += 64;
4819 + bytes -= 64;
4820 + }
4821 +
4822 + st->H[0] = H0;
4823 + st->H[1] = H1;
4824 + st->H[2] = H2;
4825 + st->H[3] = H3;
4826 + st->H[4] = H4;
4827 + }
4828 +
4829 +static size_t
4830 +poly1305_combine(poly1305_state_internal *st, const uint8_t *m, size_t bytes)
4831 + {
4832 + const xmmi MMASK =
4833 + _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
4834 + const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128);
4835 + const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5);
4836 +
4837 + poly1305_power *p;
4838 + xmmi H0,H1,H2,H3,H4;
4839 + xmmi M0,M1,M2,M3,M4;
4840 + xmmi T0,T1,T2,T3,T4,T5,T6;
4841 + xmmi C1,C2;
4842 +
4843 + uint64_t r0,r1,r2;
4844 + uint64_t t0,t1,t2,t3,t4;
4845 + uint64_t c;
4846 + size_t consumed = 0;
4847 +
4848 + H0 = st->H[0];
4849 + H1 = st->H[1];
4850 + H2 = st->H[2];
4851 + H3 = st->H[3];
4852 + H4 = st->H[4];
4853 +
4854 + /* p = [r^2,r^2] */
4855 + p = &st->P[1];
4856 +
4857 + if (bytes >= 32)
4858 + {
4859 + /* H *= [r^2,r^2] */
4860 + T0 = _mm_mul_epu32(H0, p->R20.v);
4861 + T1 = _mm_mul_epu32(H0, p->R21.v);
4862 + T2 = _mm_mul_epu32(H0, p->R22.v);
4863 + T3 = _mm_mul_epu32(H0, p->R23.v);
4864 + T4 = _mm_mul_epu32(H0, p->R24.v);
4865 + T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4866 + T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4867 + T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4868 + T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22. v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4869 + T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4870 + T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4871 + T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4872 + T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24. v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4873 + T5 = _mm_mul_epu32(H1, p->R23.v); T4 = _mm_add_epi64(T4, T5);
4874 + T5 = _mm_mul_epu32(H2, p->R22.v); T4 = _mm_add_epi64(T4, T5);
4875 + T5 = _mm_mul_epu32(H3, p->R21.v); T4 = _mm_add_epi64(T4, T5);
4876 + T5 = _mm_mul_epu32(H4, p->R20.v); T4 = _mm_add_epi64(T4, T5);
4877 +
4878 + /* H += [Mx,My] */
4879 + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_lo adl_epi64((xmmi *)(m + 16)));
4880 + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_lo adl_epi64((xmmi *)(m + 24)));
4881 + M0 = _mm_and_si128(MMASK, T5);
4882 + M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
4883 + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12) );
4884 + M2 = _mm_and_si128(MMASK, T5);
4885 + M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
4886 + M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
4887 +
4888 + T0 = _mm_add_epi64(T0, M0);
4889 + T1 = _mm_add_epi64(T1, M1);
4890 + T2 = _mm_add_epi64(T2, M2);
4891 + T3 = _mm_add_epi64(T3, M3);
4892 + T4 = _mm_add_epi64(T4, M4);
4893 +
4894 + /* reduce */
4895 + C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _ mm_and_si128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C 1); T4 = _mm_add_epi64(T4, C2);
4896 + C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _ mm_and_si128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C 1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
4897 + C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _ mm_and_si128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C 1); T1 = _mm_add_epi64(T1, C2);
4898 + C1 = _mm_srli_epi64(T3, 26); T3 = _ mm_and_si128(T3, MMASK); T4 = _mm_add_epi64(T4, C 1);
4899 +
4900 + /* H = (H*[r^2,r^2] + [Mx,My]) */
4901 + H0 = T0;
4902 + H1 = T1;
4903 + H2 = T2;
4904 + H3 = T3;
4905 + H4 = T4;
4906 +
4907 + consumed = 32;
4908 + }
4909 +
4910 + /* finalize, H *= [r^2,r] */
4911 + r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
4912 + r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
4913 + r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
4914 +
4915 + p->R20.d[2] = (uint32_t)( r0 ) & 0x3ffffff;
4916 + p->R21.d[2] = (uint32_t)((r0 >> 26) | (r1 << 18)) & 0x3ffffff;
4917 + p->R22.d[2] = (uint32_t)((r1 >> 8) ) & 0x3ffffff;
4918 + p->R23.d[2] = (uint32_t)((r1 >> 34) | (r2 << 10)) & 0x3ffffff;
4919 + p->R24.d[2] = (uint32_t)((r2 >> 16) ) ;
4920 + p->S21.d[2] = p->R21.d[2] * 5;
4921 + p->S22.d[2] = p->R22.d[2] * 5;
4922 + p->S23.d[2] = p->R23.d[2] * 5;
4923 + p->S24.d[2] = p->R24.d[2] * 5;
4924 +
4925 + /* H *= [r^2,r] */
4926 + T0 = _mm_mul_epu32(H0, p->R20.v);
4927 + T1 = _mm_mul_epu32(H0, p->R21.v);
4928 + T2 = _mm_mul_epu32(H0, p->R22.v);
4929 + T3 = _mm_mul_epu32(H0, p->R23.v);
4930 + T4 = _mm_mul_epu32(H0, p->R24.v);
4931 + T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4932 + T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4933 + T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4934 + T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22.v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6);
4935 + T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4936 + T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4937 + T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4938 + T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24.v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6);
4939 + T5 = _mm_mul_epu32(H1, p->R23.v); T4 = _mm_add_epi64(T4, T5);
4940 + T5 = _mm_mul_epu32(H2, p->R22.v); T4 = _mm_add_epi64(T4, T5);
4941 + T5 = _mm_mul_epu32(H3, p->R21.v); T4 = _mm_add_epi64(T4, T5);
4942 + T5 = _mm_mul_epu32(H4, p->R20.v); T4 = _mm_add_epi64(T4, T5);
4943 +
4944 + C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _mm_and_s i128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C1); T4 = _mm_add_epi64(T4, C2);
4945 + C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _mm_and_s i128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
4946 + C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _mm_and_s i128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C1); T1 = _mm_add_epi64(T1, C2);
4947 + C1 = _mm_srli_epi64(T3, 26); T3 = _mm_and_s i128(T3, MMASK); T4 = _mm_add_epi64(T4, C1);
4948 +
4949 + /* H = H[0]+H[1] */
4950 + H0 = _mm_add_epi64(T0, _mm_srli_si128(T0, 8));
4951 + H1 = _mm_add_epi64(T1, _mm_srli_si128(T1, 8));
4952 + H2 = _mm_add_epi64(T2, _mm_srli_si128(T2, 8));
4953 + H3 = _mm_add_epi64(T3, _mm_srli_si128(T3, 8));
4954 + H4 = _mm_add_epi64(T4, _mm_srli_si128(T4, 8));
4955 +
4956 + t0 = _mm_cvtsi128_si32(H0) ; c = (t0 >> 26); t0 &= 0x3ffffff;
4957 + t1 = _mm_cvtsi128_si32(H1) + c; c = (t1 >> 26); t1 &= 0x3ffffff;
4958 + t2 = _mm_cvtsi128_si32(H2) + c; c = (t2 >> 26); t2 &= 0x3ffffff;
4959 + t3 = _mm_cvtsi128_si32(H3) + c; c = (t3 >> 26); t3 &= 0x3ffffff;
4960 + t4 = _mm_cvtsi128_si32(H4) + c; c = (t4 >> 26); t4 &= 0x3ffffff;
4961 + t0 = t0 + (c * 5); c = (t0 >> 26); t0 &= 0x3ffffff;
4962 + t1 = t1 + c;
4963 +
4964 + st->HH[0] = ((t0 ) | (t1 << 26) ) & 0xfffffffffffull;
4965 + st->HH[1] = ((t1 >> 18) | (t2 << 8) | (t3 << 34)) & 0xfffffffffffull;
4966 + st->HH[2] = ((t3 >> 10) | (t4 << 16) ) & 0x3ffffffffffull;
4967 +
4968 + return consumed;
4969 + }
4970 +
4971 +void
4972 +CRYPTO_poly1305_update(poly1305_state *state, const unsigned char *m,
4973 + size_t bytes)
4974 + {
4975 + poly1305_state_internal *st = poly1305_aligned_state(state);
4976 + size_t want;
4977 +
4978 + /* need at least 32 initial bytes to start the accelerated branch */
4979 + if (!st->started)
4980 + {
4981 + if ((st->leftover == 0) && (bytes > 32))
4982 + {
4983 + poly1305_first_block(st, m);
4984 + m += 32;
4985 + bytes -= 32;
4986 + }
4987 + else
4988 + {
4989 + want = poly1305_min(32 - st->leftover, bytes);
4990 + poly1305_block_copy(st->buffer + st->leftover, m, want);
4991 + bytes -= want;
4992 + m += want;
4993 + st->leftover += want;
4994 + if ((st->leftover < 32) || (bytes == 0))
4995 + return;
4996 + poly1305_first_block(st, st->buffer);
4997 + st->leftover = 0;
4998 + }
4999 + st->started = 1;
5000 + }
5001 +
5002 + /* handle leftover */
5003 + if (st->leftover)
5004 + {
5005 + want = poly1305_min(64 - st->leftover, bytes);
5006 + poly1305_block_copy(st->buffer + st->leftover, m, want);
5007 + bytes -= want;
5008 + m += want;
5009 + st->leftover += want;
5010 + if (st->leftover < 64)
5011 + return;
5012 + poly1305_blocks(st, st->buffer, 64);
5013 + st->leftover = 0;
5014 + }
5015 +
5016 + /* process 64 byte blocks */
5017 + if (bytes >= 64)
5018 + {
5019 + want = (bytes & ~63);
5020 + poly1305_blocks(st, m, want);
5021 + m += want;
5022 + bytes -= want;
5023 + }
5024 +
5025 + if (bytes)
5026 + {
5027 + poly1305_block_copy(st->buffer + st->leftover, m, bytes);
5028 + st->leftover += bytes;
5029 + }
5030 + }
5031 +
5032 +void
5033 +CRYPTO_poly1305_finish(poly1305_state *state, unsigned char mac[16])
5034 + {
5035 + poly1305_state_internal *st = poly1305_aligned_state(state);
5036 + size_t leftover = st->leftover;
5037 + uint8_t *m = st->buffer;
5038 + uint128_t d[3];
5039 + uint64_t h0,h1,h2;
5040 + uint64_t t0,t1;
5041 + uint64_t g0,g1,g2,c,nc;
5042 + uint64_t r0,r1,r2,s1,s2;
5043 + poly1305_power *p;
5044 +
5045 + if (st->started)
5046 + {
5047 + size_t consumed = poly1305_combine(st, m, leftover);
5048 + leftover -= consumed;
5049 + m += consumed;
5050 + }
5051 +
5052 + /* st->HH will either be 0 or have the combined result */
5053 + h0 = st->HH[0];
5054 + h1 = st->HH[1];
5055 + h2 = st->HH[2];
5056 +
5057 + p = &st->P[1];
5058 + r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
5059 + r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
5060 + r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
5061 + s1 = r1 * (5 << 2);
5062 + s2 = r2 * (5 << 2);
5063 +
5064 + if (leftover < 16)
5065 + goto poly1305_donna_atmost15bytes;
5066 +
5067 +poly1305_donna_atleast16bytes:
5068 + t0 = U8TO64_LE(m + 0);
5069 + t1 = U8TO64_LE(m + 8);
5070 + h0 += t0 & 0xfffffffffff;
5071 + t0 = shr128_pair(t1, t0, 44);
5072 + h1 += t0 & 0xfffffffffff;
5073 + h2 += (t1 >> 24) | ((uint64_t)1 << 40);
5074 +
5075 +poly1305_donna_mul:
5076 + d[0] = add128(add128(mul64x64_128(h0, r0), mul64x64_128(h1, s2)), mul64x 64_128(h2, s1));
5077 + d[1] = add128(add128(mul64x64_128(h0, r1), mul64x64_128(h1, r0)), mul64x 64_128(h2, s2));
5078 + d[2] = add128(add128(mul64x64_128(h0, r2), mul64x64_128(h1, r1)), mul64x 64_128(h2, r0));
5079 + h0 = lo128(d[0]) & 0xfffffffffff; c = shr128( d[0], 44);
5080 + d[1] = add128_64(d[1], c); h1 = lo128(d[1]) & 0xfffffffffff; c = shr128( d[1], 44);
5081 + d[2] = add128_64(d[2], c); h2 = lo128(d[2]) & 0x3ffffffffff; c = shr128( d[2], 42);
5082 + h0 += c * 5;
5083 +
5084 + m += 16;
5085 + leftover -= 16;
5086 + if (leftover >= 16) goto poly1305_donna_atleast16bytes;
5087 +
5088 + /* final bytes */
5089 +poly1305_donna_atmost15bytes:
5090 + if (!leftover) goto poly1305_donna_finish;
5091 +
5092 + m[leftover++] = 1;
5093 + poly1305_block_zero(m + leftover, 16 - leftover);
5094 + leftover = 16;
5095 +
5096 + t0 = U8TO64_LE(m+0);
5097 + t1 = U8TO64_LE(m+8);
5098 + h0 += t0 & 0xfffffffffff; t0 = shr128_pair(t1, t0, 44);
5099 + h1 += t0 & 0xfffffffffff;
5100 + h2 += (t1 >> 24);
5101 +
5102 + goto poly1305_donna_mul;
5103 +
5104 +poly1305_donna_finish:
5105 + c = (h0 >> 44); h0 &= 0xfffffffffff;
5106 + h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff;
5107 + h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff;
5108 + h0 += c * 5;
5109 +
5110 + g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff;
5111 + g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff;
5112 + g2 = h2 + c - ((uint64_t)1 << 42);
5113 +
5114 + c = (g2 >> 63) - 1;
5115 + nc = ~c;
5116 + h0 = (h0 & nc) | (g0 & c);
5117 + h1 = (h1 & nc) | (g1 & c);
5118 + h2 = (h2 & nc) | (g2 & c);
5119 +
5120 + /* pad */
5121 + t0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1];
5122 + t1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1];
5123 + h0 += (t0 & 0xfffffffffff) ; c = (h0 >> 44); h0 &= 0xfffffffffff; t0 = shr128_pair(t1, t0, 44);
5124 + h1 += (t0 & 0xfffffffffff) + c; c = (h1 >> 44); h1 &= 0xfffffffffff; t1 = (t1 >> 24);
5125 + h2 += (t1 ) + c;
5126 +
5127 + U64TO8_LE(mac + 0, ((h0 ) | (h1 << 44)));
5128 + U64TO8_LE(mac + 8, ((h1 >> 20) | (h2 << 24)));
5129 + }
5130 +
5131 +#endif /* !OPENSSL_NO_POLY1305 */
5132 diff --git a/crypto/poly1305/poly1305test.c b/crypto/poly1305/poly1305test.c
5133 new file mode 100644
5134 index 0000000..8dd26af
5135 --- /dev/null
5136 +++ b/crypto/poly1305/poly1305test.c
5137 @@ -0,0 +1,166 @@
5138 +/* ====================================================================
5139 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved.
5140 + *
5141 + * Redistribution and use in source and binary forms, with or without
5142 + * modification, are permitted provided that the following conditions
5143 + * are met:
5144 + *
5145 + * 1. Redistributions of source code must retain the above copyright
5146 + * notice, this list of conditions and the following disclaimer.
5147 + *
5148 + * 2. Redistributions in binary form must reproduce the above copyright
5149 + * notice, this list of conditions and the following disclaimer in
5150 + * the documentation and/or other materials provided with the
5151 + * distribution.
5152 + *
5153 + * 3. All advertising materials mentioning features or use of this
5154 + * software must display the following acknowledgment:
5155 + * "This product includes software developed by the OpenSSL Project
5156 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
5157 + *
5158 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
5159 + * endorse or promote products derived from this software without
5160 + * prior written permission. For written permission, please contact
5161 + * licensing@OpenSSL.org.
5162 + *
5163 + * 5. Products derived from this software may not be called "OpenSSL"
5164 + * nor may "OpenSSL" appear in their names without prior written
5165 + * permission of the OpenSSL Project.
5166 + *
5167 + * 6. Redistributions of any form whatsoever must retain the following
5168 + * acknowledgment:
5169 + * "This product includes software developed by the OpenSSL Project
5170 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
5171 + *
5172 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
5173 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
5174 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
5175 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
5176 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
5177 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
5178 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
5179 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
5180 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
5181 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
5182 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
5183 + * OF THE POSSIBILITY OF SUCH DAMAGE.
5184 + * ====================================================================
5185 + */
5186 +
5187 +#include <stdio.h>
5188 +#include <stdlib.h>
5189 +#include <string.h>
5190 +
5191 +#include <openssl/poly1305.h>
5192 +
5193 +struct poly1305_test
5194 + {
5195 + const char *inputhex;
5196 + const char *keyhex;
5197 + const char *outhex;
5198 + };
5199 +
5200 +static const struct poly1305_test poly1305_tests[] = {
5201 + {
5202 + "",
5203 + "c8afaac331ee372cd6082de134943b174710130e9f6fea8d72293850a667d86 c",
5204 + "4710130e9f6fea8d72293850a667d86c",
5205 + },
5206 + {
5207 + "48656c6c6f20776f726c6421",
5208 + "746869732069732033322d62797465206b657920666f7220506f6c793133303 5",
5209 + "a6f745008f81c916a20dcc74eef2b2f0",
5210 + },
5211 + {
5212 + "000000000000000000000000000000000000000000000000000000000000000 0",
5213 + "746869732069732033322d62797465206b657920666f7220506f6c793133303 5",
5214 + "49ec78090e481ec6c26b33b91ccc0307",
5215 + },
5216 + {
5217 + "000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000",
5218 + "746869732069732033322d62797465206b657920666f7220506f6c793133303 5",
5219 + "da84bcab02676c38cdb015604274c2aa",
5220 + },
5221 +};
5222 +
5223 +static unsigned char hex_digit(char h)
5224 + {
5225 + if (h >= '0' && h <= '9')
5226 + return h - '0';
5227 + else if (h >= 'a' && h <= 'f')
5228 + return h - 'a' + 10;
5229 + else if (h >= 'A' && h <= 'F')
5230 + return h - 'A' + 10;
5231 + else
5232 + abort();
5233 + }
5234 +
5235 +static void hex_decode(unsigned char *out, const char* hex)
5236 + {
5237 + size_t j = 0;
5238 +
5239 + while (*hex != 0)
5240 + {
5241 + unsigned char v = hex_digit(*hex++);
5242 + v <<= 4;
5243 + v |= hex_digit(*hex++);
5244 + out[j++] = v;
5245 + }
5246 + }
5247 +
5248 +static void hexdump(unsigned char *a, size_t len)
5249 + {
5250 + size_t i;
5251 +
5252 + for (i = 0; i < len; i++)
5253 + printf("%02x", a[i]);
5254 + }
5255 +
5256 +int main()
5257 + {
5258 + static const unsigned num_tests =
5259 + sizeof(poly1305_tests) / sizeof(struct poly1305_test);
5260 + unsigned i;
5261 + unsigned char key[32], out[16], expected[16];
5262 + poly1305_state poly1305;
5263 +
5264 + for (i = 0; i < num_tests; i++)
5265 + {
5266 + const struct poly1305_test *test = &poly1305_tests[i];
5267 + unsigned char *in;
5268 + size_t inlen = strlen(test->inputhex);
5269 +
5270 + if (strlen(test->keyhex) != sizeof(key)*2 ||
5271 + strlen(test->outhex) != sizeof(out)*2 ||
5272 + (inlen & 1) == 1)
5273 + return 1;
5274 +
5275 + inlen /= 2;
5276 +
5277 + hex_decode(key, test->keyhex);
5278 + hex_decode(expected, test->outhex);
5279 +
5280 + in = malloc(inlen);
5281 +
5282 + hex_decode(in, test->inputhex);
5283 + CRYPTO_poly1305_init(&poly1305, key);
5284 + CRYPTO_poly1305_update(&poly1305, in, inlen);
5285 + CRYPTO_poly1305_finish(&poly1305, out);
5286 +
5287 + if (memcmp(out, expected, sizeof(expected)) != 0)
5288 + {
5289 + printf("Poly1305 test #%d failed.\n", i);
5290 + printf("got: ");
5291 + hexdump(out, sizeof(out));
5292 + printf("\nexpected: ");
5293 + hexdump(expected, sizeof(expected));
5294 + printf("\n");
5295 + return 1;
5296 + }
5297 +
5298 + free(in);
5299 + }
5300 +
5301 + printf("PASS\n");
5302 + return 0;
5303 + }
5304 diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c
5305 index 75b6560..a042b8d 100644
5306 --- a/ssl/s3_lib.c
5307 +++ b/ssl/s3_lib.c
5308 @@ -1841,7 +1841,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5309 SSL_AEAD,
5310 SSL_TLSV1_2,
5311 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
5312 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4),
5313 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4)|
5314 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD,
5315 128,
5316 128,
5317 },
5318 @@ -1873,7 +1874,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5319 SSL_AEAD,
5320 SSL_TLSV1_2,
5321 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
5322 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4),
5323 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4)|
5324 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD,
5325 128,
5326 128,
5327 },
5328 @@ -1905,7 +1907,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5329 SSL_AEAD,
5330 SSL_TLSV1_2,
5331 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
5332 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4),
5333 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4)|
5334 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD,
5335 128,
5336 128,
5337 },
5338 @@ -1937,7 +1940,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5339 SSL_AEAD,
5340 SSL_TLSV1_2,
5341 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
5342 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4),
5343 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4)|
5344 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD,
5345 128,
5346 128,
5347 },
5348 @@ -1969,7 +1973,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5349 SSL_AEAD,
5350 SSL_TLSV1_2,
5351 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
5352 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4),
5353 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4)|
5354 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD,
5355 128,
5356 128,
5357 },
5358 @@ -2001,7 +2006,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5359 SSL_AEAD,
5360 SSL_TLSV1_2,
5361 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
5362 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4),
5363 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4)|
5364 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD,
5365 128,
5366 128,
5367 },
5368 @@ -2714,7 +2720,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5369 SSL_AEAD,
5370 SSL_TLSV1_2,
5371 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
5372 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4),
5373 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4)|
5374 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD,
5375 128,
5376 128,
5377 },
5378 @@ -2746,7 +2753,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5379 SSL_AEAD,
5380 SSL_TLSV1_2,
5381 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
5382 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4),
5383 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4)|
5384 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD,
5385 128,
5386 128,
5387 },
5388 @@ -2778,7 +2786,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5389 SSL_AEAD,
5390 SSL_TLSV1_2,
5391 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
5392 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4),
5393 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4)|
5394 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD,
5395 128,
5396 128,
5397 },
5398 @@ -2810,7 +2819,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5399 SSL_AEAD,
5400 SSL_TLSV1_2,
5401 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS,
5402 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4),
5403 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(4)|
5404 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD,
5405 128,
5406 128,
5407 },
5408 @@ -2894,6 +2904,51 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={
5409 },
5410 #endif
5411
5412 + {
5413 + 1,
5414 + TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305,
5415 + TLS1_CK_ECDHE_RSA_CHACHA20_POLY1305,
5416 + SSL_kEECDH,
5417 + SSL_aRSA,
5418 + SSL_CHACHA20POLY1305,
5419 + SSL_AEAD,
5420 + SSL_TLSV1_2,
5421 + SSL_NOT_EXP|SSL_HIGH,
5422 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(0),
5423 + 256,
5424 + 0,
5425 + },
5426 +
5427 + {
5428 + 1,
5429 + TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,
5430 + TLS1_CK_ECDHE_ECDSA_CHACHA20_POLY1305,
5431 + SSL_kEECDH,
5432 + SSL_aECDSA,
5433 + SSL_CHACHA20POLY1305,
5434 + SSL_AEAD,
5435 + SSL_TLSV1_2,
5436 + SSL_NOT_EXP|SSL_HIGH,
5437 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(0),
5438 + 256,
5439 + 0,
5440 + },
5441 +
5442 + {
5443 + 1,
5444 + TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305,
5445 + TLS1_CK_DHE_RSA_CHACHA20_POLY1305,
5446 + SSL_kEDH,
5447 + SSL_aRSA,
5448 + SSL_CHACHA20POLY1305,
5449 + SSL_AEAD,
5450 + SSL_TLSV1_2,
5451 + SSL_NOT_EXP|SSL_HIGH,
5452 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE D_NONCE_LEN(0),
5453 + 256,
5454 + 0,
5455 + },
5456 +
5457 /* end of list */
5458 };
5459
5460 diff --git a/ssl/s3_pkt.c b/ssl/s3_pkt.c
5461 index 5038f6c..04b474d 100644
5462 --- a/ssl/s3_pkt.c
5463 +++ b/ssl/s3_pkt.c
5464 @@ -790,8 +790,11 @@ static int do_ssl3_write(SSL *s, int type, const unsigned c har *buf,
5465 else
5466 eivlen = 0;
5467 }
5468 - else if (s->aead_write_ctx != NULL)
5469 + else if (s->aead_write_ctx != NULL &&
5470 + s->aead_write_ctx->variable_nonce_included_in_record)
5471 + {
5472 eivlen = s->aead_write_ctx->variable_nonce_len;
5473 + }
5474 else
5475 eivlen = 0;
5476
5477 diff --git a/ssl/ssl.h b/ssl/ssl.h
5478 index 0644cbf..d782a98 100644
5479 --- a/ssl/ssl.h
5480 +++ b/ssl/ssl.h
5481 @@ -291,6 +291,7 @@ extern "C" {
5482 #define SSL_TXT_CAMELLIA128 "CAMELLIA128"
5483 #define SSL_TXT_CAMELLIA256 "CAMELLIA256"
5484 #define SSL_TXT_CAMELLIA "CAMELLIA"
5485 +#define SSL_TXT_CHACHA20 "CHACHA20"
5486
5487 #define SSL_TXT_MD5 "MD5"
5488 #define SSL_TXT_SHA1 "SHA1"
5489 diff --git a/ssl/ssl_ciph.c b/ssl/ssl_ciph.c
5490 index 7e780cd..b6370bd 100644
5491 --- a/ssl/ssl_ciph.c
5492 +++ b/ssl/ssl_ciph.c
5493 @@ -298,6 +298,7 @@ static const SSL_CIPHER cipher_aliases[]={
5494 {0,SSL_TXT_CAMELLIA128,0,0,0,SSL_CAMELLIA128,0,0,0,0,0,0},
5495 {0,SSL_TXT_CAMELLIA256,0,0,0,SSL_CAMELLIA256,0,0,0,0,0,0},
5496 {0,SSL_TXT_CAMELLIA ,0,0,0,SSL_CAMELLIA128|SSL_CAMELLIA256,0,0,0,0,0,0 },
5497 + {0,SSL_TXT_CHACHA20 ,0,0,0,SSL_CHACHA20POLY1305,0,0,0,0,0,0},
5498
5499 /* MAC aliases */
5500 {0,SSL_TXT_MD5,0, 0,0,0,SSL_MD5, 0,0,0,0,0},
5501 @@ -523,9 +524,15 @@ int ssl_cipher_get_evp_aead(const SSL_SESSION *s, const EVP _AEAD **aead)
5502 return 0;
5503
5504 #ifndef OPENSSL_NO_AES
5505 - /* There is only one AEAD for now. */
5506 - *aead = EVP_aead_aes_128_gcm();
5507 - return 1;
5508 + switch (c->algorithm_enc)
5509 + {
5510 + case SSL_AES128GCM:
5511 + *aead = EVP_aead_aes_128_gcm();
5512 + return 1;
5513 + case SSL_CHACHA20POLY1305:
5514 + *aead = EVP_aead_chacha20_poly1305();
5515 + return 1;
5516 + }
5517 #endif
5518
5519 return 0;
5520 @@ -1715,6 +1722,9 @@ char *SSL_CIPHER_description(const SSL_CIPHER *cipher, cha r *buf, int len)
5521 case SSL_SEED:
5522 enc="SEED(128)";
5523 break;
5524 + case SSL_CHACHA20POLY1305:
5525 + enc="ChaCha20-Poly1305";
5526 + break;
5527 default:
5528 enc="unknown";
5529 break;
5530 diff --git a/ssl/ssl_locl.h b/ssl/ssl_locl.h
5531 index 63bc28b..b83d8cd 100644
5532 --- a/ssl/ssl_locl.h
5533 +++ b/ssl/ssl_locl.h
5534 @@ -328,6 +328,7 @@
5535 #define SSL_SEED 0x00000800L
5536 #define SSL_AES128GCM 0x00001000L
5537 #define SSL_AES256GCM 0x00002000L
5538 +#define SSL_CHACHA20POLY1305 0x00004000L
5539
5540 #define SSL_AES (SSL_AES128|SSL_AES256|SSL_AES128GCM|SSL _AES256GCM)
5541 #define SSL_CAMELLIA (SSL_CAMELLIA128|SSL_CAMELLIA256)
5542 @@ -389,6 +390,12 @@
5543 #define SSL_CIPHER_AEAD_FIXED_NONCE_LEN(ssl_cipher) \
5544 (((ssl_cipher->algorithm2 >> 24) & 0xf)*2)
5545
5546 +/* SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD is a flag in
5547 + * SSL_CIPHER.algorithm2 which indicates that the variable part of the nonce is
5548 + * included as a prefix of the record. (AES-GCM, for example, does with with an
5549 + * 8-byte variable nonce.) */
5550 +#define SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD (1<<22)
5551 +
5552 /*
5553 * Export and cipher strength information. For each cipher we have to decide
5554 * whether it is exportable or not. This information is likely to change
5555 @@ -605,6 +612,9 @@ struct ssl_aead_ctx_st
5556 * records. */
5557 unsigned char fixed_nonce[8];
5558 unsigned char fixed_nonce_len, variable_nonce_len, tag_len;
5559 + /* variable_nonce_included_in_record is non-zero if the variable nonce
5560 + * for a record is included as a prefix before the ciphertext. */
5561 + char variable_nonce_included_in_record;
5562 };
5563
5564 #ifndef OPENSSL_NO_COMP
5565 diff --git a/ssl/t1_enc.c b/ssl/t1_enc.c
5566 index 7af1a32..15800af 100644
5567 --- a/ssl/t1_enc.c
5568 +++ b/ssl/t1_enc.c
5569 @@ -366,6 +366,8 @@ static int tls1_change_cipher_state_aead(SSL *s, char is_rea d,
5570 memcpy(aead_ctx->fixed_nonce, iv, iv_len);
5571 aead_ctx->fixed_nonce_len = iv_len;
5572 aead_ctx->variable_nonce_len = 8; /* always the case, currently. */
5573 + aead_ctx->variable_nonce_included_in_record =
5574 + (s->s3->tmp.new_cipher->algorithm2 & SSL_CIPHER_ALGORITHM2_VARIA BLE_NONCE_INCLUDED_IN_RECORD) != 0;
5575 if (aead_ctx->variable_nonce_len + aead_ctx->fixed_nonce_len != EVP_AEAD _nonce_length(aead))
5576 {
5577 SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE_AEAD, ERR_R_INTERNAL_ERROR );
5578 @@ -863,6 +865,7 @@ int tls1_enc(SSL *s, int send)
5579 if (send)
5580 {
5581 size_t len = rec->length;
5582 + size_t eivlen = 0;
5583 in = rec->input;
5584 out = rec->data;
5585
5586 @@ -878,18 +881,22 @@ int tls1_enc(SSL *s, int send)
5587 * variable nonce. Thus we can copy the sequence number
5588 * bytes into place without overwriting any of the
5589 * plaintext. */
5590 - memcpy(out, ad, aead->variable_nonce_len);
5591 - len -= aead->variable_nonce_len;
5592 + if (aead->variable_nonce_included_in_record)
5593 + {
5594 + memcpy(out, ad, aead->variable_nonce_len);
5595 + len -= aead->variable_nonce_len;
5596 + eivlen = aead->variable_nonce_len;
5597 + }
5598
5599 ad[11] = len >> 8;
5600 ad[12] = len & 0xff;
5601
5602 n = EVP_AEAD_CTX_seal(&aead->ctx,
5603 - out + aead->variable_nonce_len, le n + aead->tag_len,
5604 + out + eivlen, len + aead->tag_len,
5605 nonce, nonce_used,
5606 - in + aead->variable_nonce_len, len ,
5607 + in + eivlen, len,
5608 ad, sizeof(ad));
5609 - if (n >= 0)
5610 + if (n >= 0 && aead->variable_nonce_included_in_record)
5611 n += aead->variable_nonce_len;
5612 }
5613 else
5614 @@ -903,12 +910,17 @@ int tls1_enc(SSL *s, int send)
5615
5616 if (len < aead->variable_nonce_len)
5617 return 0;
5618 - memcpy(nonce + nonce_used, in, aead->variable_nonce_len) ;
5619 + memcpy(nonce + nonce_used,
5620 + aead->variable_nonce_included_in_record ? in : ad ,
5621 + aead->variable_nonce_len);
5622 nonce_used += aead->variable_nonce_len;
5623
5624 - in += aead->variable_nonce_len;
5625 - len -= aead->variable_nonce_len;
5626 - out += aead->variable_nonce_len;
5627 + if (aead->variable_nonce_included_in_record)
5628 + {
5629 + in += aead->variable_nonce_len;
5630 + len -= aead->variable_nonce_len;
5631 + out += aead->variable_nonce_len;
5632 + }
5633
5634 if (len < aead->tag_len)
5635 return 0;
5636 diff --git a/ssl/tls1.h b/ssl/tls1.h
5637 index 8cac7df..3cbcb83 100644
5638 --- a/ssl/tls1.h
5639 +++ b/ssl/tls1.h
5640 @@ -526,6 +526,10 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB ,(void (*)(void))cb)
5641 #define TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256 0x0300C031
5642 #define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384 0x0300C032
5643
5644 +#define TLS1_CK_ECDHE_RSA_CHACHA20_POLY1305 0x0300CC13
5645 +#define TLS1_CK_ECDHE_ECDSA_CHACHA20_POLY1305 0x0300CC14
5646 +#define TLS1_CK_DHE_RSA_CHACHA20_POLY1305 0x0300CC15
5647 +
5648 /* XXX
5649 * Inconsistency alert:
5650 * The OpenSSL names of ciphers with ephemeral DH here include the string
5651 @@ -677,6 +681,10 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB ,(void (*)(void))cb)
5652 #define TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256 "ECDH-RSA-AES128-GCM-SH A256"
5653 #define TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384 "ECDH-RSA-AES256-GCM-SH A384"
5654
5655 +#define TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305 "ECDHE-RSA-CHACHA20-POLY 1305"
5656 +#define TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 "ECDHE-ECDSA-CHACHA20-PO LY1305"
5657 +#define TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305 "DHE-RSA-CHACHA2 0-POLY1305"
5658 +
5659 #define TLS_CT_RSA_SIGN 1
5660 #define TLS_CT_DSS_SIGN 2
5661 #define TLS_CT_RSA_FIXED_DH 3
5662 diff --git a/test/Makefile b/test/Makefile
5663 index 4c9eabc..4790aa8 100644
5664 --- a/test/Makefile
5665 +++ b/test/Makefile
5666 @@ -86,7 +86,9 @@ OBJ= $(BNTEST).o $(ECTEST).o $(ECDSATEST).o $(ECDHTEST).o $( IDEATEST).o \
5667 $(MDC2TEST).o $(RMDTEST).o \
5668 $(RANDTEST).o $(DHTEST).o $(ENGINETEST).o $(CASTTEST).o \
5669 $(BFTEST).o $(SSLTEST).o $(DSATEST).o $(EXPTEST).o $(RSATEST).o \
5670 - $(EVPTEST).o $(IGETEST).o $(JPAKETEST).o $(ASN1TEST).o
5671 + $(EVPTEST).o $(IGETEST).o $(JPAKETEST).o $(ASN1TEST).o $(CHACHATEST).o \
5672 + $(POLY1305TEST).o
5673 +
5674 SRC= $(BNTEST).c $(ECTEST).c $(ECDSATEST).c $(ECDHTEST).c $(IDEATEST).c \
5675 $(MD2TEST).c $(MD4TEST).c $(MD5TEST).c \
5676 $(HMACTEST).c $(WPTEST).c \
5677 @@ -94,7 +96,8 @@ SRC= $(BNTEST).c $(ECTEST).c $(ECDSATEST).c $(ECDHTEST).c $( IDEATEST).c \
5678 $(DESTEST).c $(SHATEST).c $(SHA1TEST).c $(MDC2TEST).c $(RMDTEST).c \
5679 $(RANDTEST).c $(DHTEST).c $(ENGINETEST).c $(CASTTEST).c \
5680 $(BFTEST).c $(SSLTEST).c $(DSATEST).c $(EXPTEST).c $(RSATEST).c \
5681 - $(EVPTEST).c $(IGETEST).c $(JPAKETEST).c $(SRPTEST).c $(ASN1TEST).c
5682 + $(EVPTEST).c $(IGETEST).c $(JPAKETEST).c $(SRPTEST).c $(ASN1TEST).c \
5683 + $(CHACHATEST).c $(POLY1305TEST).c
5684
5685 EXHEADER=
5686 HEADER= $(EXHEADER)
5687 @@ -137,7 +140,7 @@ alltests: \
5688 test_enc test_x509 test_rsa test_crl test_sid \
5689 test_gen test_req test_pkcs7 test_verify test_dh test_dsa \
5690 test_ss test_ca test_engine test_evp test_ssl test_tsa test_ige \
5691 - test_jpake test_srp test_cms
5692 + test_jpake test_srp test_cms test_chacha test_poly1305
5693
5694 test_evp:
5695 ../util/shlib_wrap.sh ./$(EVPTEST) evptests.txt
5696 @@ -318,6 +321,14 @@ test_srp: $(SRPTEST)$(EXE_EXT)
5697 @echo "Test SRP"
5698 ../util/shlib_wrap.sh ./srptest
5699
5700 +test_chacha: $(CHACHATEST)$(EXE_EXT)
5701 + @echo "Test ChaCha"
5702 + ../util/shlib_wrap.sh ./$(CHACHATEST)
5703 +
5704 +test_poly1305: $(POLY1305TEST)$(EXE_EXT)
5705 + @echo "Test Poly1305"
5706 + ../util/shlib_wrap.sh ./$(POLY1305TEST)
5707 +
5708 lint:
5709 lint -DLINT $(INCLUDES) $(SRC)>fluff
5710
5711 @@ -394,6 +405,12 @@ $(SHA256TEST)$(EXE_EXT): $(SHA256TEST).o $(DLIBCRYPTO)
5712 $(SHA512TEST)$(EXE_EXT): $(SHA512TEST).o $(DLIBCRYPTO)
5713 @target=$(SHA512TEST); $(BUILD_CMD)
5714
5715 +$(CHACHATEST)$(EXE_EXT): $(CHACHATEST).o $(DLIBCRYPTO)
5716 + @target=$(CHACHATEST); $(BUILD_CMD)
5717 +
5718 +$(POLY1305TEST)$(EXE_EXT): $(POLY1305TEST).o $(DLIBCRYPTO)
5719 + @target=$(CHACHATEST); $(BUILD_CMD)
5720 +
5721 $(RMDTEST)$(EXE_EXT): $(RMDTEST).o $(DLIBCRYPTO)
5722 @target=$(RMDTEST); $(BUILD_CMD)
5723
5724 --
5725 1.8.4.1
5726
OLDNEW
« no previous file with comments | « openssl/patches/aead_support.patch ('k') | openssl/patches/channelidchromium.patch » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698