OLD | NEW |
| (Empty) |
1 From 2688f00904e4ffd647afcff69bb8fe6df8c5902b Mon Sep 17 00:00:00 2001 | |
2 From: Adam Langley <agl@chromium.org> | |
3 Date: Mon, 9 Sep 2013 12:13:24 -0400 | |
4 Subject: [PATCH 43/52] chacha20poly1305 | |
5 | |
6 Add support for Chacha20 + Poly1305. | |
7 --- | |
8 .gitignore | 1 + | |
9 Configure | 56 +- | |
10 Makefile.org | 6 +- | |
11 apps/speed.c | 64 +- | |
12 crypto/chacha/Makefile | 80 ++ | |
13 crypto/chacha/chacha.h | 85 ++ | |
14 crypto/chacha/chacha_enc.c | 167 +++ | |
15 crypto/chacha/chacha_vec.c | 345 +++++++ | |
16 crypto/chacha/chachatest.c | 211 ++++ | |
17 crypto/evp/Makefile | 35 +- | |
18 crypto/evp/e_chacha20poly1305.c | 261 +++++ | |
19 crypto/evp/evp.h | 8 + | |
20 crypto/evp/evp_err.c | 3 + | |
21 crypto/poly1305/Makefile | 81 ++ | |
22 crypto/poly1305/poly1305.c | 320 ++++++ | |
23 crypto/poly1305/poly1305.h | 88 ++ | |
24 crypto/poly1305/poly1305_arm.c | 335 ++++++ | |
25 crypto/poly1305/poly1305_arm_asm.s | 2009 ++++++++++++++++++++++++++++++++++++ | |
26 crypto/poly1305/poly1305_vec.c | 733 +++++++++++++ | |
27 crypto/poly1305/poly1305test.c | 166 +++ | |
28 ssl/s3_lib.c | 75 +- | |
29 ssl/s3_pkt.c | 5 +- | |
30 ssl/ssl.h | 1 + | |
31 ssl/ssl_ciph.c | 16 +- | |
32 ssl/ssl_locl.h | 10 + | |
33 ssl/t1_enc.c | 30 +- | |
34 ssl/tls1.h | 8 + | |
35 test/Makefile | 23 +- | |
36 28 files changed, 5166 insertions(+), 56 deletions(-) | |
37 create mode 100644 crypto/chacha/Makefile | |
38 create mode 100644 crypto/chacha/chacha.h | |
39 create mode 100644 crypto/chacha/chacha_enc.c | |
40 create mode 100644 crypto/chacha/chacha_vec.c | |
41 create mode 100644 crypto/chacha/chachatest.c | |
42 create mode 100644 crypto/evp/e_chacha20poly1305.c | |
43 create mode 100644 crypto/poly1305/Makefile | |
44 create mode 100644 crypto/poly1305/poly1305.c | |
45 create mode 100644 crypto/poly1305/poly1305.h | |
46 create mode 100644 crypto/poly1305/poly1305_arm.c | |
47 create mode 100644 crypto/poly1305/poly1305_arm_asm.s | |
48 create mode 100644 crypto/poly1305/poly1305_vec.c | |
49 create mode 100644 crypto/poly1305/poly1305test.c | |
50 | |
51 diff --git a/openssl/ssl/ssl_ciph.c b/openssl/ssl/ssl_ciph.c | |
52 index db85b29..cebb18a 100644 | |
53 --- a/ssl/ssl_ciph.c | |
54 +++ b/ssl/ssl_ciph.c | |
55 @@ -1442,7 +1442,9 @@ STACK_OF(SSL_CIPHER) *ssl_create_cipher_list(const SSL_MET
HOD *ssl_method, | |
56 ssl_cipher_apply_rule(0, SSL_kEECDH, 0, 0, 0, 0, 0, CIPHER_ADD, -1, &hea
d, &tail); | |
57 ssl_cipher_apply_rule(0, SSL_kEECDH, 0, 0, 0, 0, 0, CIPHER_DEL, -1, &hea
d, &tail); | |
58 | |
59 - /* AES is our preferred symmetric cipher */ | |
60 + /* CHACHA20 is fast and safe on all hardware and is thus our preferred | |
61 + * symmetric cipher, with AES second. */ | |
62 + ssl_cipher_apply_rule(0, 0, 0, SSL_CHACHA20POLY1305, 0, 0, 0, CIPHER_ADD
, -1, &head, &tail); | |
63 ssl_cipher_apply_rule(0, 0, 0, SSL_AES, 0, 0, 0, CIPHER_ADD, -1, &head,
&tail); | |
64 | |
65 /* Temporarily enable everything else for sorting */ | |
66 diff --git a/Configure b/Configure | |
67 index 9c803dc..1b95384 100755 | |
68 --- a/Configure | |
69 +++ b/Configure | |
70 @@ -124,24 +124,24 @@ my $tlib="-lnsl -lsocket"; | |
71 my $bits1="THIRTY_TWO_BIT "; | |
72 my $bits2="SIXTY_FOUR_BIT "; | |
73 | |
74 -my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o crypt
586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586
.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cml
l-x86.o:ghash-x86.o:"; | |
75 +my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o:des-586.o crypt
586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586
.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cml
l-x86.o:ghash-x86.o:::"; | |
76 | |
77 my $x86_elf_asm="$x86_asm:elf"; | |
78 | |
79 -my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-
gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_
64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_
64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghas
h-x86_64.o:"; | |
80 -my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.
o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o::::
:ghash-ia64.o::void"; | |
81 -my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a
-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-spa
rcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::void"; | |
82 -my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void"; | |
83 -my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-
alpha.o::void"; | |
84 -my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o:::
:::::"; | |
85 -my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha2
56-mips.o sha512-mips.o::::::::"; | |
86 -my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::ae
s-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-
s390x.o:::::ghash-s390x.o:"; | |
87 -my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cb
c.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-a
rmv4.o::void"; | |
88 -my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-p
arisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-
parisc.o::32"; | |
89 -my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o ae
s-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::gha
sh-parisc.o::64"; | |
90 -my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o
aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::"; | |
91 -my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o
aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::"; | |
92 -my $no_asm=":::::::::::::::void"; | |
93 +my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-
gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_
64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_
64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghas
h-x86_64.o::chacha_vec.o:poly1305_vec.o"; | |
94 +my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.
o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o::::
:ghash-ia64.o::::void"; | |
95 +my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a
-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-spa
rcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::ghash-sparcv9.o::::void"; | |
96 +my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::::void"; | |
97 +my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-
alpha.o::::void"; | |
98 +my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o:::
:::::::"; | |
99 +my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha2
56-mips.o sha512-mips.o::::::::::"; | |
100 +my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::ae
s-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-
s390x.o:::::::ghash-s390x.o:"; | |
101 +my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cb
c.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-a
rmv4.o::chacha_vec.o:poly1305_arm.o poly1305_arm_asm.o:void"; | |
102 +my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-p
arisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-
parisc.o::::32"; | |
103 +my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o ae
s-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::gha
sh-parisc.o::::64"; | |
104 +my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o
aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::::"; | |
105 +my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o
aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::::"; | |
106 +my $no_asm=":::::::::::::::::void"; | |
107 | |
108 # As for $BSDthreads. Idea is to maintain "collective" set of flags, | |
109 # which would cover all BSD flavors. -pthread applies to them all, | |
110 @@ -152,7 +152,7 @@ my $no_asm=":::::::::::::::void"; | |
111 # seems to be sufficient? | |
112 my $BSDthreads="-pthread -D_THREAD_SAFE -D_REENTRANT"; | |
113 | |
114 -#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lflags : $b
n_ops : $cpuid_obj : $bn_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj : $sha1_
obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_obj : $mod
es_obj : $engines_obj : $dso_scheme : $shared_target : $shared_cflag : $shared_l
dflag : $shared_extension : $ranlib : $arflags : $multilib | |
115 +#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lflags : $b
n_ops : $cpuid_obj : $bn_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj : $sha1_
obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_obj : $mod
es_obj : $engines_obj : $chacha_obj : $poly1305_obj : $dso_scheme : $shared_targ
et : $shared_cflag : $shared_ldflag : $shared_extension : $ranlib : $arflags : $
multilib : | |
116 | |
117 my %table=( | |
118 # File 'TABLE' (created by 'make TABLE') contains the data from this list, | |
119 @@ -647,6 +647,8 @@ my $idx_wp_obj = $idx++; | |
120 my $idx_cmll_obj = $idx++; | |
121 my $idx_modes_obj = $idx++; | |
122 my $idx_engines_obj = $idx++; | |
123 +my $idx_chacha_obj = $idx++; | |
124 +my $idx_poly1305_obj = $idx++; | |
125 my $idx_perlasm_scheme = $idx++; | |
126 my $idx_dso_scheme = $idx++; | |
127 my $idx_shared_target = $idx++; | |
128 @@ -692,6 +694,8 @@ my $aes_enc="aes_core.o aes_cbc.o"; | |
129 my $bf_enc ="bf_enc.o"; | |
130 my $cast_enc="c_enc.o"; | |
131 my $rc4_enc="rc4_enc.o rc4_skey.o"; | |
132 +my $chacha_enc="chacha_enc.o"; | |
133 +my $poly1305 ="poly1305.o"; | |
134 my $rc5_enc="rc5_enc.o"; | |
135 my $md5_obj=""; | |
136 my $sha1_obj=""; | |
137 @@ -1144,7 +1148,7 @@ $openssldir=$prefix . "/" . $openssldir if $openssldir !~
/(^\/|^[a-zA-Z]:[\\\/] | |
138 | |
139 print "IsMK1MF=$IsMK1MF\n"; | |
140 | |
141 -my @fields = split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); | |
142 +my @fields = split(/\s*:\s*/,$table{$target} . ":" x 31 , -1); | |
143 my $cc = $fields[$idx_cc]; | |
144 # Allow environment CC to override compiler... | |
145 if($ENV{CC}) { | |
146 @@ -1181,6 +1185,8 @@ my $ranlib = $ENV{'RANLIB'} || $fields[$idx_ranlib]; | |
147 my $ar = $ENV{'AR'} || "ar"; | |
148 my $arflags = $fields[$idx_arflags]; | |
149 my $multilib = $fields[$idx_multilib]; | |
150 +my $chacha_obj = $fields[$idx_chacha_obj]; | |
151 +my $poly1305_obj = $fields[$idx_poly1305_obj]; | |
152 | |
153 # if $prefix/lib$multilib is not an existing directory, then | |
154 # assume that it's not searched by linker automatically, in | |
155 @@ -1477,6 +1483,8 @@ $des_obj=$des_enc unless ($des_obj =~ /\.o$/); | |
156 $bf_obj=$bf_enc unless ($bf_obj =~ /\.o$/); | |
157 $cast_obj=$cast_enc unless ($cast_obj =~ /\.o$/); | |
158 $rc4_obj=$rc4_enc unless ($rc4_obj =~ /\.o$/); | |
159 +$chacha_obj=$chacha_enc unless ($chacha_obj =~ /\.o$/); | |
160 +$poly1305_obj=$poly1305 unless ($poly1305_obj =~ /\.o$/); | |
161 $rc5_obj=$rc5_enc unless ($rc5_obj =~ /\.o$/); | |
162 if ($sha1_obj =~ /\.o$/) | |
163 { | |
164 @@ -1637,6 +1645,8 @@ while (<IN>) | |
165 s/^BF_ENC=.*$/BF_ENC= $bf_obj/; | |
166 s/^CAST_ENC=.*$/CAST_ENC= $cast_obj/; | |
167 s/^RC4_ENC=.*$/RC4_ENC= $rc4_obj/; | |
168 + s/^CHACHA_ENC=.*$/CHACHA_ENC= $chacha_obj/; | |
169 + s/^POLY1305=.*$/POLY1305= $poly1305_obj/; | |
170 s/^RC5_ENC=.*$/RC5_ENC= $rc5_obj/; | |
171 s/^MD5_ASM_OBJ=.*$/MD5_ASM_OBJ= $md5_obj/; | |
172 s/^SHA1_ASM_OBJ=.*$/SHA1_ASM_OBJ= $sha1_obj/; | |
173 @@ -1698,6 +1708,8 @@ print "AES_ENC =$aes_obj\n"; | |
174 print "BF_ENC =$bf_obj\n"; | |
175 print "CAST_ENC =$cast_obj\n"; | |
176 print "RC4_ENC =$rc4_obj\n"; | |
177 +print "CHACHA_ENC =$chacha_obj\n"; | |
178 +print "POLY1305 =$poly1305_obj\n"; | |
179 print "RC5_ENC =$rc5_obj\n"; | |
180 print "MD5_OBJ_ASM =$md5_obj\n"; | |
181 print "SHA1_OBJ_ASM =$sha1_obj\n"; | |
182 @@ -2096,11 +2108,11 @@ sub print_table_entry | |
183 | |
184 (my $cc,my $cflags,my $unistd,my $thread_cflag,my $sys_id,my $lflags, | |
185 my $bn_ops,my $cpuid_obj,my $bn_obj,my $des_obj,my $aes_obj, my $bf_obj, | |
186 - my $md5_obj,my $sha1_obj,my $cast_obj,my $rc4_obj,my $rmd160_obj, | |
187 - my $rc5_obj,my $wp_obj,my $cmll_obj,my $modes_obj, my $engines_obj, | |
188 + my $md5_obj,my $sha1_obj,my $cast_obj,my $rc4_obj,my $chacha_obj,my $pol
y1305_obj, | |
189 + my $rmd160_obj, my $rc5_obj,my $wp_obj,my $cmll_obj,my $modes_obj, my $e
ngines_obj, | |
190 my $perlasm_scheme,my $dso_scheme,my $shared_target,my $shared_cflag, | |
191 my $shared_ldflag,my $shared_extension,my $ranlib,my $arflags,my $multil
ib)= | |
192 - split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); | |
193 + split(/\s*:\s*/,$table{$target} . ":" x 31 , -1); | |
194 | |
195 print <<EOF | |
196 | |
197 @@ -2121,6 +2133,8 @@ sub print_table_entry | |
198 \$sha1_obj = $sha1_obj | |
199 \$cast_obj = $cast_obj | |
200 \$rc4_obj = $rc4_obj | |
201 +\$chacha_obj = $chacha_obj | |
202 +\$poly1305_obj = $poly1305_obj | |
203 \$rmd160_obj = $rmd160_obj | |
204 \$rc5_obj = $rc5_obj | |
205 \$wp_obj = $wp_obj | |
206 @@ -2150,7 +2164,7 @@ sub test_sanity | |
207 | |
208 foreach $target (sort keys %table) | |
209 { | |
210 - @fields = split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); | |
211 + @fields = split(/\s*:\s*/,$table{$target} . ":" x 31 , -1); | |
212 | |
213 if ($fields[$idx_dso_scheme-1] =~ /^(beos|dl|dlfcn|win32|vms)$/) | |
214 { | |
215 diff --git a/Makefile.org b/Makefile.org | |
216 index 2db31ea..919466d 100644 | |
217 --- a/Makefile.org | |
218 +++ b/Makefile.org | |
219 @@ -94,6 +94,8 @@ BF_ENC= bf_enc.o | |
220 CAST_ENC= c_enc.o | |
221 RC4_ENC= rc4_enc.o | |
222 RC5_ENC= rc5_enc.o | |
223 +CHACHA_ENC= chacha_enc.o | |
224 +POLY1305= poly1305.o | |
225 MD5_ASM_OBJ= | |
226 SHA1_ASM_OBJ= | |
227 RMD160_ASM_OBJ= | |
228 @@ -147,7 +149,7 @@ SDIRS= \ | |
229 bn ec rsa dsa ecdsa dh ecdh dso engine \ | |
230 buffer bio stack lhash rand err \ | |
231 evp asn1 pem x509 x509v3 conf txt_db pkcs7 pkcs12 comp ocsp ui krb5 \ | |
232 - cms pqueue ts jpake srp store cmac | |
233 + cms pqueue ts jpake srp store cmac poly1305 chacha | |
234 # keep in mind that the above list is adjusted by ./Configure | |
235 # according to no-xxx arguments... | |
236 | |
237 @@ -232,6 +234,8 @@ BUILDENV= PLATFORM='$(PLATFORM)' PROCESSOR='$(PROCESSOR)'
\ | |
238 WP_ASM_OBJ='$(WP_ASM_OBJ)' \ | |
239 MODES_ASM_OBJ='$(MODES_ASM_OBJ)' \ | |
240 ENGINES_ASM_OBJ='$(ENGINES_ASM_OBJ)' \ | |
241 + CHACHA_ENC='$(CHACHA_ENC)' \ | |
242 + POLY1305='$(POLY1305)' \ | |
243 PERLASM_SCHEME='$(PERLASM_SCHEME)' \ | |
244 FIPSLIBDIR='${FIPSLIBDIR}' \ | |
245 FIPSDIR='${FIPSDIR}' \ | |
246 diff --git a/crypto/chacha/Makefile b/crypto/chacha/Makefile | |
247 new file mode 100644 | |
248 index 0000000..289933b | |
249 --- /dev/null | |
250 +++ b/crypto/chacha/Makefile | |
251 @@ -0,0 +1,80 @@ | |
252 +# | |
253 +# OpenSSL/crypto/chacha/Makefile | |
254 +# | |
255 + | |
256 +DIR= chacha | |
257 +TOP= ../.. | |
258 +CC= cc | |
259 +CPP= $(CC) -E | |
260 +INCLUDES= | |
261 +CFLAG=-g | |
262 +AR= ar r | |
263 + | |
264 +CFLAGS= $(INCLUDES) $(CFLAG) | |
265 +ASFLAGS= $(INCLUDES) $(ASFLAG) | |
266 +AFLAGS= $(ASFLAGS) | |
267 + | |
268 +CHACHA_ENC=chacha_enc.o | |
269 + | |
270 +GENERAL=Makefile | |
271 +TEST=chachatest.o | |
272 +APPS= | |
273 + | |
274 +LIB=$(TOP)/libcrypto.a | |
275 +LIBSRC= | |
276 +LIBOBJ=$(CHACHA_ENC) | |
277 + | |
278 +SRC= $(LIBSRC) | |
279 + | |
280 +EXHEADER=chacha.h | |
281 +HEADER= $(EXHEADER) | |
282 + | |
283 +ALL= $(GENERAL) $(SRC) $(HEADER) | |
284 + | |
285 +top: | |
286 + (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all) | |
287 + | |
288 +all: lib | |
289 + | |
290 +lib: $(LIBOBJ) | |
291 + $(AR) $(LIB) $(LIBOBJ) | |
292 + $(RANLIB) $(LIB) || echo Never mind. | |
293 + @touch lib | |
294 + | |
295 +files: | |
296 + $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO | |
297 + | |
298 +links: | |
299 + @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER) | |
300 + @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST) | |
301 + @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS) | |
302 + | |
303 +install: | |
304 + @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile... | |
305 + @headerlist="$(EXHEADER)"; for i in $$headerlist ; \ | |
306 + do \ | |
307 + (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \ | |
308 + chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \ | |
309 + done; | |
310 + | |
311 +tags: | |
312 + ctags $(SRC) | |
313 + | |
314 +tests: | |
315 + | |
316 +lint: | |
317 + lint -DLINT $(INCLUDES) $(SRC)>fluff | |
318 + | |
319 +depend: | |
320 + @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... | |
321 + $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) | |
322 + | |
323 +dclean: | |
324 + $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKE
FILE) >Makefile.new | |
325 + mv -f Makefile.new $(MAKEFILE) | |
326 + | |
327 +clean: | |
328 + rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff | |
329 + | |
330 +# DO NOT DELETE THIS LINE -- make depend depends on it. | |
331 + | |
332 diff --git a/crypto/chacha/chacha.h b/crypto/chacha/chacha.h | |
333 new file mode 100644 | |
334 index 0000000..d56519d | |
335 --- /dev/null | |
336 +++ b/crypto/chacha/chacha.h | |
337 @@ -0,0 +1,85 @@ | |
338 +/* | |
339 + * Chacha stream algorithm. | |
340 + * | |
341 + * Created on: Jun, 2013 | |
342 + * Author: Elie Bursztein (elieb@google.com) | |
343 + * | |
344 + * Adapted from the estream code by D. Bernstein. | |
345 + */ | |
346 +/* ==================================================================== | |
347 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
348 + * | |
349 + * Redistribution and use in source and binary forms, with or without | |
350 + * modification, are permitted provided that the following conditions | |
351 + * are met: | |
352 + * | |
353 + * 1. Redistributions of source code must retain the above copyright | |
354 + * notice, this list of conditions and the following disclaimer. | |
355 + * | |
356 + * 2. Redistributions in binary form must reproduce the above copyright | |
357 + * notice, this list of conditions and the following disclaimer in | |
358 + * the documentation and/or other materials provided with the | |
359 + * distribution. | |
360 + * | |
361 + * 3. All advertising materials mentioning features or use of this | |
362 + * software must display the following acknowledgment: | |
363 + * "This product includes software developed by the OpenSSL Project | |
364 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
365 + * | |
366 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
367 + * endorse or promote products derived from this software without | |
368 + * prior written permission. For written permission, please contact | |
369 + * licensing@OpenSSL.org. | |
370 + * | |
371 + * 5. Products derived from this software may not be called "OpenSSL" | |
372 + * nor may "OpenSSL" appear in their names without prior written | |
373 + * permission of the OpenSSL Project. | |
374 + * | |
375 + * 6. Redistributions of any form whatsoever must retain the following | |
376 + * acknowledgment: | |
377 + * "This product includes software developed by the OpenSSL Project | |
378 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
379 + * | |
380 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
381 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
382 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
383 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
384 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
385 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
386 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
387 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
388 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
389 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
390 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
391 + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
392 + * ==================================================================== | |
393 + */ | |
394 +#ifndef HEADER_CHACHA_H | |
395 +#define HEADER_CHACHA_H | |
396 + | |
397 +#include <openssl/opensslconf.h> | |
398 + | |
399 +#if defined(OPENSSL_NO_CHACHA) | |
400 +#error ChaCha support is disabled. | |
401 +#endif | |
402 + | |
403 +#include <stddef.h> | |
404 + | |
405 +#ifdef __cplusplus | |
406 +extern "C" { | |
407 +#endif | |
408 + | |
409 +/* CRYPTO_chacha_20 encrypts |in_len| bytes from |in| with the given key and | |
410 + * nonce and writes the result to |out|, which may be equal to |in|. The | |
411 + * initial block counter is specified by |counter|. */ | |
412 +void CRYPTO_chacha_20(unsigned char *out, | |
413 + const unsigned char *in, size_t in_len, | |
414 + const unsigned char key[32], | |
415 + const unsigned char nonce[8], | |
416 + size_t counter); | |
417 + | |
418 +#ifdef __cplusplus | |
419 +} | |
420 +#endif | |
421 + | |
422 +#endif | |
423 diff --git a/crypto/chacha/chacha_enc.c b/crypto/chacha/chacha_enc.c | |
424 new file mode 100644 | |
425 index 0000000..54d1ca3 | |
426 --- /dev/null | |
427 +++ b/crypto/chacha/chacha_enc.c | |
428 @@ -0,0 +1,167 @@ | |
429 +/* | |
430 + * Chacha stream algorithm. | |
431 + * | |
432 + * Created on: Jun, 2013 | |
433 + * Author: Elie Bursztein (elieb@google.com) | |
434 + * | |
435 + * Adapted from the estream code by D. Bernstein. | |
436 + */ | |
437 +/* ==================================================================== | |
438 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
439 + * | |
440 + * Redistribution and use in source and binary forms, with or without | |
441 + * modification, are permitted provided that the following conditions | |
442 + * are met: | |
443 + * | |
444 + * 1. Redistributions of source code must retain the above copyright | |
445 + * notice, this list of conditions and the following disclaimer. | |
446 + * | |
447 + * 2. Redistributions in binary form must reproduce the above copyright | |
448 + * notice, this list of conditions and the following disclaimer in | |
449 + * the documentation and/or other materials provided with the | |
450 + * distribution. | |
451 + * | |
452 + * 3. All advertising materials mentioning features or use of this | |
453 + * software must display the following acknowledgment: | |
454 + * "This product includes software developed by the OpenSSL Project | |
455 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
456 + * | |
457 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
458 + * endorse or promote products derived from this software without | |
459 + * prior written permission. For written permission, please contact | |
460 + * licensing@OpenSSL.org. | |
461 + * | |
462 + * 5. Products derived from this software may not be called "OpenSSL" | |
463 + * nor may "OpenSSL" appear in their names without prior written | |
464 + * permission of the OpenSSL Project. | |
465 + * | |
466 + * 6. Redistributions of any form whatsoever must retain the following | |
467 + * acknowledgment: | |
468 + * "This product includes software developed by the OpenSSL Project | |
469 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
470 + * | |
471 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
472 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
473 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
474 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
475 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
476 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
477 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
478 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
479 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
480 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
481 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
482 + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
483 + * ==================================================================== | |
484 + */ | |
485 + | |
486 +#include <stdint.h> | |
487 +#include <string.h> | |
488 +#include <openssl/opensslconf.h> | |
489 + | |
490 +#if !defined(OPENSSL_NO_CHACHA) | |
491 + | |
492 +#include <openssl/chacha.h> | |
493 + | |
494 +/* sigma contains the ChaCha constants, which happen to be an ASCII string. */ | |
495 +static const char sigma[16] = "expand 32-byte k"; | |
496 + | |
497 +#define ROTATE(v, n) (((v) << (n)) | ((v) >> (32 - (n)))) | |
498 +#define XOR(v, w) ((v) ^ (w)) | |
499 +#define PLUS(x, y) ((x) + (y)) | |
500 +#define PLUSONE(v) (PLUS((v), 1)) | |
501 + | |
502 +#define U32TO8_LITTLE(p, v) \ | |
503 + { (p)[0] = (v >> 0) & 0xff; (p)[1] = (v >> 8) & 0xff; \ | |
504 + (p)[2] = (v >> 16) & 0xff; (p)[3] = (v >> 24) & 0xff; } | |
505 +#define U8TO32_LITTLE(p) \ | |
506 + (((uint32_t)((p)[0]) ) | ((uint32_t)((p)[1]) << 8) | \ | |
507 + ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24) ) | |
508 + | |
509 +/* QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round. */ | |
510 +#define QUARTERROUND(a,b,c,d) \ | |
511 + x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \ | |
512 + x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \ | |
513 + x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \ | |
514 + x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7); | |
515 + | |
516 +typedef unsigned int uint32_t; | |
517 + | |
518 +/* chacha_core performs |num_rounds| rounds of ChaCha20 on the input words in | |
519 + * |input| and writes the 64 output bytes to |output|. */ | |
520 +static void chacha_core(unsigned char output[64], const uint32_t input[16], | |
521 + int num_rounds) | |
522 + { | |
523 + uint32_t x[16]; | |
524 + int i; | |
525 + | |
526 + memcpy(x, input, sizeof(uint32_t) * 16); | |
527 + for (i = 20; i > 0; i -= 2) | |
528 + { | |
529 + QUARTERROUND( 0, 4, 8,12) | |
530 + QUARTERROUND( 1, 5, 9,13) | |
531 + QUARTERROUND( 2, 6,10,14) | |
532 + QUARTERROUND( 3, 7,11,15) | |
533 + QUARTERROUND( 0, 5,10,15) | |
534 + QUARTERROUND( 1, 6,11,12) | |
535 + QUARTERROUND( 2, 7, 8,13) | |
536 + QUARTERROUND( 3, 4, 9,14) | |
537 + } | |
538 + | |
539 + for (i = 0; i < 16; ++i) | |
540 + x[i] = PLUS(x[i], input[i]); | |
541 + for (i = 0; i < 16; ++i) | |
542 + U32TO8_LITTLE(output + 4 * i, x[i]); | |
543 + } | |
544 + | |
545 +void CRYPTO_chacha_20(unsigned char *out, | |
546 + const unsigned char *in, size_t in_len, | |
547 + const unsigned char key[32], | |
548 + const unsigned char nonce[8], | |
549 + size_t counter) | |
550 + { | |
551 + uint32_t input[16]; | |
552 + unsigned char buf[64]; | |
553 + size_t todo, i; | |
554 + | |
555 + input[0] = U8TO32_LITTLE(sigma + 0); | |
556 + input[1] = U8TO32_LITTLE(sigma + 4); | |
557 + input[2] = U8TO32_LITTLE(sigma + 8); | |
558 + input[3] = U8TO32_LITTLE(sigma + 12); | |
559 + | |
560 + input[4] = U8TO32_LITTLE(key + 0); | |
561 + input[5] = U8TO32_LITTLE(key + 4); | |
562 + input[6] = U8TO32_LITTLE(key + 8); | |
563 + input[7] = U8TO32_LITTLE(key + 12); | |
564 + | |
565 + input[8] = U8TO32_LITTLE(key + 16); | |
566 + input[9] = U8TO32_LITTLE(key + 20); | |
567 + input[10] = U8TO32_LITTLE(key + 24); | |
568 + input[11] = U8TO32_LITTLE(key + 28); | |
569 + | |
570 + input[12] = counter; | |
571 + input[13] = ((uint64_t) counter) >> 32; | |
572 + input[14] = U8TO32_LITTLE(nonce + 0); | |
573 + input[15] = U8TO32_LITTLE(nonce + 4); | |
574 + | |
575 + while (in_len > 0) | |
576 + { | |
577 + todo = sizeof(buf); | |
578 + if (in_len < todo) | |
579 + todo = in_len; | |
580 + | |
581 + chacha_core(buf, input, 20); | |
582 + for (i = 0; i < todo; i++) | |
583 + out[i] = in[i] ^ buf[i]; | |
584 + | |
585 + out += todo; | |
586 + in += todo; | |
587 + in_len -= todo; | |
588 + | |
589 + input[12]++; | |
590 + if (input[12] == 0) | |
591 + input[13]++; | |
592 + } | |
593 + } | |
594 + | |
595 +#endif /* !OPENSSL_NO_CHACHA */ | |
596 diff --git a/crypto/chacha/chacha_vec.c b/crypto/chacha/chacha_vec.c | |
597 new file mode 100644 | |
598 index 0000000..33b2238 | |
599 --- /dev/null | |
600 +++ b/crypto/chacha/chacha_vec.c | |
601 @@ -0,0 +1,345 @@ | |
602 +/* ==================================================================== | |
603 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
604 + * | |
605 + * Redistribution and use in source and binary forms, with or without | |
606 + * modification, are permitted provided that the following conditions | |
607 + * are met: | |
608 + * | |
609 + * 1. Redistributions of source code must retain the above copyright | |
610 + * notice, this list of conditions and the following disclaimer. | |
611 + * | |
612 + * 2. Redistributions in binary form must reproduce the above copyright | |
613 + * notice, this list of conditions and the following disclaimer in | |
614 + * the documentation and/or other materials provided with the | |
615 + * distribution. | |
616 + * | |
617 + * 3. All advertising materials mentioning features or use of this | |
618 + * software must display the following acknowledgment: | |
619 + * "This product includes software developed by the OpenSSL Project | |
620 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
621 + * | |
622 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
623 + * endorse or promote products derived from this software without | |
624 + * prior written permission. For written permission, please contact | |
625 + * licensing@OpenSSL.org. | |
626 + * | |
627 + * 5. Products derived from this software may not be called "OpenSSL" | |
628 + * nor may "OpenSSL" appear in their names without prior written | |
629 + * permission of the OpenSSL Project. | |
630 + * | |
631 + * 6. Redistributions of any form whatsoever must retain the following | |
632 + * acknowledgment: | |
633 + * "This product includes software developed by the OpenSSL Project | |
634 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
635 + * | |
636 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
637 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
638 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
639 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
640 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
641 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
642 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
643 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
644 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
645 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
646 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
647 + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
648 + * ==================================================================== | |
649 + */ | |
650 + | |
651 +/* This implementation is by Ted Krovetz and was submitted to SUPERCOP and | |
652 + * marked as public domain. It was been altered to allow for non-aligned inputs | |
653 + * and to allow the block counter to be passed in specifically. */ | |
654 + | |
655 +#include <string.h> | |
656 +#include <stdint.h> | |
657 +#include <openssl/opensslconf.h> | |
658 + | |
659 +#if !defined(OPENSSL_NO_CHACHA) | |
660 + | |
661 +#include <openssl/chacha.h> | |
662 + | |
663 +#ifndef CHACHA_RNDS | |
664 +#define CHACHA_RNDS 20 /* 8 (high speed), 20 (conservative), 12 (middle) */ | |
665 +#endif | |
666 + | |
667 +/* Architecture-neutral way to specify 16-byte vector of ints */ | |
668 +typedef unsigned vec __attribute__ ((vector_size (16))); | |
669 + | |
670 +/* This implementation is designed for Neon, SSE and AltiVec machines. The | |
671 + * following specify how to do certain vector operations efficiently on | |
672 + * each architecture, using intrinsics. | |
673 + * This implementation supports parallel processing of multiple blocks, | |
674 + * including potentially using general-purpose registers. | |
675 + */ | |
676 +#if __ARM_NEON__ | |
677 +#include <arm_neon.h> | |
678 +#define GPR_TOO 1 | |
679 +#define VBPI 2 | |
680 +#define ONE (vec)vsetq_lane_u32(1,vdupq_n_u32(0),0) | |
681 +#define LOAD(m) (vec)(*((vec*)(m))) | |
682 +#define STORE(m,r) (*((vec*)(m))) = (r) | |
683 +#define ROTV1(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,1) | |
684 +#define ROTV2(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,2) | |
685 +#define ROTV3(x) (vec)vextq_u32((uint32x4_t)x,(uint32x4_t)x,3) | |
686 +#define ROTW16(x) (vec)vrev32q_u16((uint16x8_t)x) | |
687 +#if __clang__ | |
688 +#define ROTW7(x) (x << ((vec){ 7, 7, 7, 7})) ^ (x >> ((vec){25,25,25,25})) | |
689 +#define ROTW8(x) (x << ((vec){ 8, 8, 8, 8})) ^ (x >> ((vec){24,24,24,24})) | |
690 +#define ROTW12(x) (x << ((vec){12,12,12,12})) ^ (x >> ((vec){20,20,20,20})) | |
691 +#else | |
692 +#define ROTW7(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,7),(uint32x4_t)x,2
5) | |
693 +#define ROTW8(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,8),(uint32x4_t)x,2
4) | |
694 +#define ROTW12(x) (vec)vsriq_n_u32(vshlq_n_u32((uint32x4_t)x,12),(uint32x4_t)x,
20) | |
695 +#endif | |
696 +#elif __SSE2__ | |
697 +#include <emmintrin.h> | |
698 +#define GPR_TOO 0 | |
699 +#if __clang__ | |
700 +#define VBPI 4 | |
701 +#else | |
702 +#define VBPI 3 | |
703 +#endif | |
704 +#define ONE (vec)_mm_set_epi32(0,0,0,1) | |
705 +#define LOAD(m) (vec)_mm_loadu_si128((__m128i*)(m)) | |
706 +#define STORE(m,r) _mm_storeu_si128((__m128i*)(m), (__m128i) (r)) | |
707 +#define ROTV1(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(0,3,2,1)) | |
708 +#define ROTV2(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(1,0,3,2)) | |
709 +#define ROTV3(x) (vec)_mm_shuffle_epi32((__m128i)x,_MM_SHUFFLE(2,1,0,3)) | |
710 +#define ROTW7(x) (vec)(_mm_slli_epi32((__m128i)x, 7) ^ _mm_srli_epi32((__m128i
)x,25)) | |
711 +#define ROTW12(x) (vec)(_mm_slli_epi32((__m128i)x,12) ^ _mm_srli_epi32((__m128i
)x,20)) | |
712 +#if __SSSE3__ | |
713 +#include <tmmintrin.h> | |
714 +#define ROTW8(x) (vec)_mm_shuffle_epi8((__m128i)x,_mm_set_epi8(14,13,12,15,10,
9,8,11,6,5,4,7,2,1,0,3)) | |
715 +#define ROTW16(x) (vec)_mm_shuffle_epi8((__m128i)x,_mm_set_epi8(13,12,15,14,9,8
,11,10,5,4,7,6,1,0,3,2)) | |
716 +#else | |
717 +#define ROTW8(x) (vec)(_mm_slli_epi32((__m128i)x, 8) ^ _mm_srli_epi32((__m128i
)x,24)) | |
718 +#define ROTW16(x) (vec)(_mm_slli_epi32((__m128i)x,16) ^ _mm_srli_epi32((__m128i
)x,16)) | |
719 +#endif | |
720 +#else | |
721 +#error -- Implementation supports only machines with neon or SSE2 | |
722 +#endif | |
723 + | |
724 +#ifndef REVV_BE | |
725 +#define REVV_BE(x) (x) | |
726 +#endif | |
727 + | |
728 +#ifndef REVW_BE | |
729 +#define REVW_BE(x) (x) | |
730 +#endif | |
731 + | |
732 +#define BPI (VBPI + GPR_TOO) /* Blocks computed per loop iteration */ | |
733 + | |
734 +#define DQROUND_VECTORS(a,b,c,d) \ | |
735 + a += b; d ^= a; d = ROTW16(d); \ | |
736 + c += d; b ^= c; b = ROTW12(b); \ | |
737 + a += b; d ^= a; d = ROTW8(d); \ | |
738 + c += d; b ^= c; b = ROTW7(b); \ | |
739 + b = ROTV1(b); c = ROTV2(c); d = ROTV3(d); \ | |
740 + a += b; d ^= a; d = ROTW16(d); \ | |
741 + c += d; b ^= c; b = ROTW12(b); \ | |
742 + a += b; d ^= a; d = ROTW8(d); \ | |
743 + c += d; b ^= c; b = ROTW7(b); \ | |
744 + b = ROTV3(b); c = ROTV2(c); d = ROTV1(d); | |
745 + | |
746 +#define QROUND_WORDS(a,b,c,d) \ | |
747 + a = a+b; d ^= a; d = d<<16 | d>>16; \ | |
748 + c = c+d; b ^= c; b = b<<12 | b>>20; \ | |
749 + a = a+b; d ^= a; d = d<< 8 | d>>24; \ | |
750 + c = c+d; b ^= c; b = b<< 7 | b>>25; | |
751 + | |
752 +#define WRITE_XOR(in, op, d, v0, v1, v2, v3) \ | |
753 + STORE(op + d + 0, LOAD(in + d + 0) ^ REVV_BE(v0)); \ | |
754 + STORE(op + d + 4, LOAD(in + d + 4) ^ REVV_BE(v1)); \ | |
755 + STORE(op + d + 8, LOAD(in + d + 8) ^ REVV_BE(v2)); \ | |
756 + STORE(op + d +12, LOAD(in + d +12) ^ REVV_BE(v3)); | |
757 + | |
758 +void CRYPTO_chacha_20( | |
759 + unsigned char *out, | |
760 + const unsigned char *in, | |
761 + size_t inlen, | |
762 + const unsigned char key[32], | |
763 + const unsigned char nonce[8], | |
764 + size_t counter) | |
765 + { | |
766 + unsigned iters, i, *op=(unsigned *)out, *ip=(unsigned *)in, *kp; | |
767 +#if defined(__ARM_NEON__) | |
768 + unsigned *np; | |
769 +#endif | |
770 + vec s0, s1, s2, s3; | |
771 +#if !defined(__ARM_NEON__) && !defined(__SSE2__) | |
772 + __attribute__ ((aligned (16))) unsigned key[8], nonce[4]; | |
773 +#endif | |
774 + __attribute__ ((aligned (16))) unsigned chacha_const[] = | |
775 + {0x61707865,0x3320646E,0x79622D32,0x6B206574}; | |
776 +#if defined(__ARM_NEON__) || defined(__SSE2__) | |
777 + kp = (unsigned *)key; | |
778 +#else | |
779 + ((vec *)key)[0] = REVV_BE(((vec *)key)[0]); | |
780 + ((vec *)key)[1] = REVV_BE(((vec *)key)[1]); | |
781 + nonce[0] = REVW_BE(((unsigned *)nonce)[0]); | |
782 + nonce[1] = REVW_BE(((unsigned *)nonce)[1]); | |
783 + nonce[2] = REVW_BE(((unsigned *)nonce)[2]); | |
784 + nonce[3] = REVW_BE(((unsigned *)nonce)[3]); | |
785 + kp = (unsigned *)key; | |
786 + np = (unsigned *)nonce; | |
787 +#endif | |
788 +#if defined(__ARM_NEON__) | |
789 + np = (unsigned*) nonce; | |
790 +#endif | |
791 + s0 = LOAD(chacha_const); | |
792 + s1 = LOAD(&((vec*)kp)[0]); | |
793 + s2 = LOAD(&((vec*)kp)[1]); | |
794 + s3 = (vec){ | |
795 + counter & 0xffffffff, | |
796 +#if __ARM_NEON__ | |
797 + 0, /* can't right-shift 32 bits on a 32-bit system. */ | |
798 +#else | |
799 + counter >> 32, | |
800 +#endif | |
801 + ((uint32_t*)nonce)[0], | |
802 + ((uint32_t*)nonce)[1] | |
803 + }; | |
804 + | |
805 + for (iters = 0; iters < inlen/(BPI*64); iters++) | |
806 + { | |
807 +#if GPR_TOO | |
808 + register unsigned x0, x1, x2, x3, x4, x5, x6, x7, x8, | |
809 + x9, x10, x11, x12, x13, x14, x15; | |
810 +#endif | |
811 +#if VBPI > 2 | |
812 + vec v8,v9,v10,v11; | |
813 +#endif | |
814 +#if VBPI > 3 | |
815 + vec v12,v13,v14,v15; | |
816 +#endif | |
817 + | |
818 + vec v0,v1,v2,v3,v4,v5,v6,v7; | |
819 + v4 = v0 = s0; v5 = v1 = s1; v6 = v2 = s2; v3 = s3; | |
820 + v7 = v3 + ONE; | |
821 +#if VBPI > 2 | |
822 + v8 = v4; v9 = v5; v10 = v6; | |
823 + v11 = v7 + ONE; | |
824 +#endif | |
825 +#if VBPI > 3 | |
826 + v12 = v8; v13 = v9; v14 = v10; | |
827 + v15 = v11 + ONE; | |
828 +#endif | |
829 +#if GPR_TOO | |
830 + x0 = chacha_const[0]; x1 = chacha_const[1]; | |
831 + x2 = chacha_const[2]; x3 = chacha_const[3]; | |
832 + x4 = kp[0]; x5 = kp[1]; x6 = kp[2]; x7 = kp[3]; | |
833 + x8 = kp[4]; x9 = kp[5]; x10 = kp[6]; x11 = kp[7]; | |
834 + x12 = counter+BPI*iters+(BPI-1); x13 = 0; | |
835 + x14 = np[0]; x15 = np[1]; | |
836 +#endif | |
837 + for (i = CHACHA_RNDS/2; i; i--) | |
838 + { | |
839 + DQROUND_VECTORS(v0,v1,v2,v3) | |
840 + DQROUND_VECTORS(v4,v5,v6,v7) | |
841 +#if VBPI > 2 | |
842 + DQROUND_VECTORS(v8,v9,v10,v11) | |
843 +#endif | |
844 +#if VBPI > 3 | |
845 + DQROUND_VECTORS(v12,v13,v14,v15) | |
846 +#endif | |
847 +#if GPR_TOO | |
848 + QROUND_WORDS( x0, x4, x8,x12) | |
849 + QROUND_WORDS( x1, x5, x9,x13) | |
850 + QROUND_WORDS( x2, x6,x10,x14) | |
851 + QROUND_WORDS( x3, x7,x11,x15) | |
852 + QROUND_WORDS( x0, x5,x10,x15) | |
853 + QROUND_WORDS( x1, x6,x11,x12) | |
854 + QROUND_WORDS( x2, x7, x8,x13) | |
855 + QROUND_WORDS( x3, x4, x9,x14) | |
856 +#endif | |
857 + } | |
858 + | |
859 + WRITE_XOR(ip, op, 0, v0+s0, v1+s1, v2+s2, v3+s3) | |
860 + s3 += ONE; | |
861 + WRITE_XOR(ip, op, 16, v4+s0, v5+s1, v6+s2, v7+s3) | |
862 + s3 += ONE; | |
863 +#if VBPI > 2 | |
864 + WRITE_XOR(ip, op, 32, v8+s0, v9+s1, v10+s2, v11+s3) | |
865 + s3 += ONE; | |
866 +#endif | |
867 +#if VBPI > 3 | |
868 + WRITE_XOR(ip, op, 48, v12+s0, v13+s1, v14+s2, v15+s3) | |
869 + s3 += ONE; | |
870 +#endif | |
871 + ip += VBPI*16; | |
872 + op += VBPI*16; | |
873 +#if GPR_TOO | |
874 + op[0] = REVW_BE(REVW_BE(ip[0]) ^ (x0 + chacha_const[0])); | |
875 + op[1] = REVW_BE(REVW_BE(ip[1]) ^ (x1 + chacha_const[1])); | |
876 + op[2] = REVW_BE(REVW_BE(ip[2]) ^ (x2 + chacha_const[2])); | |
877 + op[3] = REVW_BE(REVW_BE(ip[3]) ^ (x3 + chacha_const[3])); | |
878 + op[4] = REVW_BE(REVW_BE(ip[4]) ^ (x4 + kp[0])); | |
879 + op[5] = REVW_BE(REVW_BE(ip[5]) ^ (x5 + kp[1])); | |
880 + op[6] = REVW_BE(REVW_BE(ip[6]) ^ (x6 + kp[2])); | |
881 + op[7] = REVW_BE(REVW_BE(ip[7]) ^ (x7 + kp[3])); | |
882 + op[8] = REVW_BE(REVW_BE(ip[8]) ^ (x8 + kp[4])); | |
883 + op[9] = REVW_BE(REVW_BE(ip[9]) ^ (x9 + kp[5])); | |
884 + op[10] = REVW_BE(REVW_BE(ip[10]) ^ (x10 + kp[6])); | |
885 + op[11] = REVW_BE(REVW_BE(ip[11]) ^ (x11 + kp[7])); | |
886 + op[12] = REVW_BE(REVW_BE(ip[12]) ^ (x12 + counter+BPI*iters+(BPI
-1))); | |
887 + op[13] = REVW_BE(REVW_BE(ip[13]) ^ (x13)); | |
888 + op[14] = REVW_BE(REVW_BE(ip[14]) ^ (x14 + np[0])); | |
889 + op[15] = REVW_BE(REVW_BE(ip[15]) ^ (x15 + np[1])); | |
890 + s3 += ONE; | |
891 + ip += 16; | |
892 + op += 16; | |
893 +#endif | |
894 + } | |
895 + | |
896 + for (iters = inlen%(BPI*64)/64; iters != 0; iters--) | |
897 + { | |
898 + vec v0 = s0, v1 = s1, v2 = s2, v3 = s3; | |
899 + for (i = CHACHA_RNDS/2; i; i--) | |
900 + { | |
901 + DQROUND_VECTORS(v0,v1,v2,v3); | |
902 + } | |
903 + WRITE_XOR(ip, op, 0, v0+s0, v1+s1, v2+s2, v3+s3) | |
904 + s3 += ONE; | |
905 + ip += 16; | |
906 + op += 16; | |
907 + } | |
908 + | |
909 + inlen = inlen % 64; | |
910 + if (inlen) | |
911 + { | |
912 + __attribute__ ((aligned (16))) vec buf[4]; | |
913 + vec v0,v1,v2,v3; | |
914 + v0 = s0; v1 = s1; v2 = s2; v3 = s3; | |
915 + for (i = CHACHA_RNDS/2; i; i--) | |
916 + { | |
917 + DQROUND_VECTORS(v0,v1,v2,v3); | |
918 + } | |
919 + | |
920 + if (inlen >= 16) | |
921 + { | |
922 + STORE(op + 0, LOAD(ip + 0) ^ REVV_BE(v0 + s0)); | |
923 + if (inlen >= 32) | |
924 + { | |
925 + STORE(op + 4, LOAD(ip + 4) ^ REVV_BE(v1 + s1)); | |
926 + if (inlen >= 48) | |
927 + { | |
928 + STORE(op + 8, LOAD(ip + 8) ^ | |
929 + REVV_BE(v2 + s2)); | |
930 + buf[3] = REVV_BE(v3 + s3); | |
931 + } | |
932 + else | |
933 + buf[2] = REVV_BE(v2 + s2); | |
934 + } | |
935 + else | |
936 + buf[1] = REVV_BE(v1 + s1); | |
937 + } | |
938 + else | |
939 + buf[0] = REVV_BE(v0 + s0); | |
940 + | |
941 + for (i=inlen & ~15; i<inlen; i++) | |
942 + ((char *)op)[i] = ((char *)ip)[i] ^ ((char *)buf)[i]; | |
943 + } | |
944 + } | |
945 + | |
946 +#endif /* !OPENSSL_NO_CHACHA */ | |
947 diff --git a/crypto/chacha/chachatest.c b/crypto/chacha/chachatest.c | |
948 new file mode 100644 | |
949 index 0000000..b2a9389 | |
950 --- /dev/null | |
951 +++ b/crypto/chacha/chachatest.c | |
952 @@ -0,0 +1,211 @@ | |
953 +/* | |
954 + * Chacha stream algorithm. | |
955 + * | |
956 + * Created on: Jun, 2013 | |
957 + * Author: Elie Bursztein (elieb@google.com) | |
958 + * | |
959 + * Adapted from the estream code by D. Bernstein. | |
960 + */ | |
961 +/* ==================================================================== | |
962 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
963 + * | |
964 + * Redistribution and use in source and binary forms, with or without | |
965 + * modification, are permitted provided that the following conditions | |
966 + * are met: | |
967 + * | |
968 + * 1. Redistributions of source code must retain the above copyright | |
969 + * notice, this list of conditions and the following disclaimer. | |
970 + * | |
971 + * 2. Redistributions in binary form must reproduce the above copyright | |
972 + * notice, this list of conditions and the following disclaimer in | |
973 + * the documentation and/or other materials provided with the | |
974 + * distribution. | |
975 + * | |
976 + * 3. All advertising materials mentioning features or use of this | |
977 + * software must display the following acknowledgment: | |
978 + * "This product includes software developed by the OpenSSL Project | |
979 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
980 + * | |
981 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
982 + * endorse or promote products derived from this software without | |
983 + * prior written permission. For written permission, please contact | |
984 + * licensing@OpenSSL.org. | |
985 + * | |
986 + * 5. Products derived from this software may not be called "OpenSSL" | |
987 + * nor may "OpenSSL" appear in their names without prior written | |
988 + * permission of the OpenSSL Project. | |
989 + * | |
990 + * 6. Redistributions of any form whatsoever must retain the following | |
991 + * acknowledgment: | |
992 + * "This product includes software developed by the OpenSSL Project | |
993 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
994 + * | |
995 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
996 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
997 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
998 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
999 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
1000 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
1001 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
1002 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
1003 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
1004 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
1005 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
1006 + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
1007 + * ==================================================================== | |
1008 + */ | |
1009 + | |
1010 +#include <stdio.h> | |
1011 +#include <stdlib.h> | |
1012 +#include <string.h> | |
1013 +#include <stdint.h> | |
1014 + | |
1015 +#include <openssl/chacha.h> | |
1016 + | |
1017 +struct chacha_test { | |
1018 + const char *keyhex; | |
1019 + const char *noncehex; | |
1020 + const char *outhex; | |
1021 +}; | |
1022 + | |
1023 +static const struct chacha_test chacha_tests[] = { | |
1024 + { | |
1025 + "000000000000000000000000000000000000000000000000000000000000000
0", | |
1026 + "0000000000000000", | |
1027 + "76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc
7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee6586", | |
1028 + }, | |
1029 + { | |
1030 + "000000000000000000000000000000000000000000000000000000000000000
1", | |
1031 + "0000000000000000", | |
1032 + "4540f05a9f1fb296d7736e7b208e3c96eb4fe1834688d2604f450952ed432d4
1bbe2a0b6ea7566d2a5d1e7e20d42af2c53d792b1c43fea817e9ad275ae546963", | |
1033 + }, | |
1034 + { | |
1035 + "000000000000000000000000000000000000000000000000000000000000000
0", | |
1036 + "0000000000000001", | |
1037 + "de9cba7bf3d69ef5e786dc63973f653a0b49e015adbff7134fcb7df13782103
1e85a050278a7084527214f73efc7fa5b5277062eb7a0433e445f41e31afab757", | |
1038 + }, | |
1039 + { | |
1040 + "000000000000000000000000000000000000000000000000000000000000000
0", | |
1041 + "0100000000000000", | |
1042 + "ef3fdfd6c61578fbf5cf35bd3dd33b8009631634d21e42ac33960bd138e50d3
2111e4caf237ee53ca8ad6426194a88545ddc497a0b466e7d6bbdb0041b2f586b", | |
1043 + }, | |
1044 + { | |
1045 + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1
f", | |
1046 + "0001020304050607", | |
1047 + "f798a189f195e66982105ffb640bb7757f579da31602fc93ec01ac56f85ac3c
134a4547b733b46413042c9440049176905d3be59ea1c53f15916155c2be8241a38008b9a26bc359
41e2444177c8ade6689de95264986d95889fb60e84629c9bd9a5acb1cc118be563eb9b3a4a472f82
e09a7e778492b562ef7130e88dfe031c79db9d4f7c7a899151b9a475032b63fc385245fe054e3dd5
a97a5f576fe064025d3ce042c566ab2c507b138db853e3d6959660996546cc9c4a6eafdc777c040d
70eaf46f76dad3979e5c5360c3317166a1c894c94a371876a94df7628fe4eaaf2ccb27d5aaae0ad7
ad0f9d4b6ad3b54098746d4524d38407a6deb", | |
1048 + }, | |
1049 +}; | |
1050 + | |
1051 +static unsigned char hex_digit(char h) | |
1052 + { | |
1053 + if (h >= '0' && h <= '9') | |
1054 + return h - '0'; | |
1055 + else if (h >= 'a' && h <= 'f') | |
1056 + return h - 'a' + 10; | |
1057 + else if (h >= 'A' && h <= 'F') | |
1058 + return h - 'A' + 10; | |
1059 + else | |
1060 + abort(); | |
1061 + } | |
1062 + | |
1063 +static void hex_decode(unsigned char *out, const char* hex) | |
1064 + { | |
1065 + size_t j = 0; | |
1066 + | |
1067 + while (*hex != 0) | |
1068 + { | |
1069 + unsigned char v = hex_digit(*hex++); | |
1070 + v <<= 4; | |
1071 + v |= hex_digit(*hex++); | |
1072 + out[j++] = v; | |
1073 + } | |
1074 + } | |
1075 + | |
1076 +static void hexdump(unsigned char *a, size_t len) | |
1077 + { | |
1078 + size_t i; | |
1079 + | |
1080 + for (i = 0; i < len; i++) | |
1081 + printf("%02x", a[i]); | |
1082 + } | |
1083 + | |
1084 +/* misalign returns a pointer that points 0 to 15 bytes into |in| such that the | |
1085 + * returned pointer has alignment 1 mod 16. */ | |
1086 +static void* misalign(void* in) | |
1087 + { | |
1088 + intptr_t x = (intptr_t) in; | |
1089 + x += (17 - (x % 16)) % 16; | |
1090 + return (void*) x; | |
1091 + } | |
1092 + | |
1093 +int main() | |
1094 + { | |
1095 + static const unsigned num_tests = | |
1096 + sizeof(chacha_tests) / sizeof(struct chacha_test); | |
1097 + unsigned i; | |
1098 + unsigned char key_bytes[32 + 16]; | |
1099 + unsigned char nonce_bytes[8 + 16] = {0}; | |
1100 + | |
1101 + unsigned char *key = misalign(key_bytes); | |
1102 + unsigned char *nonce = misalign(nonce_bytes); | |
1103 + | |
1104 + for (i = 0; i < num_tests; i++) | |
1105 + { | |
1106 + const struct chacha_test *test = &chacha_tests[i]; | |
1107 + unsigned char *expected, *out_bytes, *zero_bytes, *out, *zeros; | |
1108 + size_t len = strlen(test->outhex); | |
1109 + | |
1110 + if (strlen(test->keyhex) != 32*2 || | |
1111 + strlen(test->noncehex) != 8*2 || | |
1112 + (len & 1) == 1) | |
1113 + return 1; | |
1114 + | |
1115 + len /= 2; | |
1116 + | |
1117 + hex_decode(key, test->keyhex); | |
1118 + hex_decode(nonce, test->noncehex); | |
1119 + | |
1120 + expected = malloc(len); | |
1121 + out_bytes = malloc(len+16); | |
1122 + zero_bytes = malloc(len+16); | |
1123 + /* Attempt to test unaligned inputs. */ | |
1124 + out = misalign(out_bytes); | |
1125 + zeros = misalign(zero_bytes); | |
1126 + memset(zeros, 0, len); | |
1127 + | |
1128 + hex_decode(expected, test->outhex); | |
1129 + CRYPTO_chacha_20(out, zeros, len, key, nonce, 0); | |
1130 + | |
1131 + if (memcmp(out, expected, len) != 0) | |
1132 + { | |
1133 + printf("ChaCha20 test #%d failed.\n", i); | |
1134 + printf("got: "); | |
1135 + hexdump(out, len); | |
1136 + printf("\nexpected: "); | |
1137 + hexdump(expected, len); | |
1138 + printf("\n"); | |
1139 + return 1; | |
1140 + } | |
1141 + | |
1142 + /* The last test has a large output. We test whether the | |
1143 + * counter works as expected by skipping the first 64 bytes of | |
1144 + * it. */ | |
1145 + if (i == num_tests - 1) | |
1146 + { | |
1147 + CRYPTO_chacha_20(out, zeros, len - 64, key, nonce, 1); | |
1148 + if (memcmp(out, expected + 64, len - 64) != 0) | |
1149 + { | |
1150 + printf("ChaCha20 skip test failed.\n"); | |
1151 + return 1; | |
1152 + } | |
1153 + } | |
1154 + | |
1155 + free(expected); | |
1156 + free(zero_bytes); | |
1157 + free(out_bytes); | |
1158 + } | |
1159 + | |
1160 + | |
1161 + printf("PASS\n"); | |
1162 + return 0; | |
1163 + } | |
1164 diff --git a/crypto/evp/Makefile b/crypto/evp/Makefile | |
1165 index b73038d..86b0504 100644 | |
1166 --- a/crypto/evp/Makefile | |
1167 +++ b/crypto/evp/Makefile | |
1168 @@ -29,7 +29,8 @@ LIBSRC= encode.c digest.c evp_enc.c evp_key.c evp_acnf.c evp_c
nf.c \ | |
1169 c_all.c c_allc.c c_alld.c evp_lib.c bio_ok.c \ | |
1170 evp_pkey.c evp_pbe.c p5_crpt.c p5_crpt2.c \ | |
1171 e_old.c pmeth_lib.c pmeth_fn.c pmeth_gn.c m_sigver.c evp_fips.c \ | |
1172 - e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c evp_aead.c | |
1173 + e_aes_cbc_hmac_sha1.c e_rc4_hmac_md5.c evp_aead.c \ | |
1174 + e_chacha20poly1305.c | |
1175 | |
1176 LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \ | |
1177 e_des.o e_bf.o e_idea.o e_des3.o e_camellia.o\ | |
1178 @@ -42,7 +43,7 @@ LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o
evp_cnf.o \ | |
1179 c_all.o c_allc.o c_alld.o evp_lib.o bio_ok.o \ | |
1180 evp_pkey.o evp_pbe.o p5_crpt.o p5_crpt2.o \ | |
1181 e_old.o pmeth_lib.o pmeth_fn.o pmeth_gn.o m_sigver.o evp_fips.o \ | |
1182 - e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o evp_aead.o | |
1183 + e_aes_cbc_hmac_sha1.o e_rc4_hmac_md5.o evp_aead.o e_chacha20poly1305.o | |
1184 | |
1185 SRC= $(LIBSRC) | |
1186 | |
1187 @@ -239,6 +240,21 @@ e_cast.o: ../../include/openssl/objects.h ../../include/ope
nssl/opensslconf.h | |
1188 e_cast.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h | |
1189 e_cast.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h | |
1190 e_cast.o: ../../include/openssl/symhacks.h ../cryptlib.h e_cast.c evp_locl.h | |
1191 +e_chacha20poly1305.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h | |
1192 +e_chacha20poly1305.o: ../../include/openssl/chacha.h | |
1193 +e_chacha20poly1305.o: ../../include/openssl/crypto.h | |
1194 +e_chacha20poly1305.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h | |
1195 +e_chacha20poly1305.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h | |
1196 +e_chacha20poly1305.o: ../../include/openssl/obj_mac.h | |
1197 +e_chacha20poly1305.o: ../../include/openssl/objects.h | |
1198 +e_chacha20poly1305.o: ../../include/openssl/opensslconf.h | |
1199 +e_chacha20poly1305.o: ../../include/openssl/opensslv.h | |
1200 +e_chacha20poly1305.o: ../../include/openssl/ossl_typ.h | |
1201 +e_chacha20poly1305.o: ../../include/openssl/poly1305.h | |
1202 +e_chacha20poly1305.o: ../../include/openssl/safestack.h | |
1203 +e_chacha20poly1305.o: ../../include/openssl/stack.h | |
1204 +e_chacha20poly1305.o: ../../include/openssl/symhacks.h e_chacha20poly1305.c | |
1205 +e_chacha20poly1305.o: evp_locl.h | |
1206 e_des.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h | |
1207 e_des.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | |
1208 e_des.o: ../../include/openssl/des.h ../../include/openssl/des_old.h | |
1209 @@ -258,9 +274,10 @@ e_des3.o: ../../include/openssl/evp.h ../../include/openssl
/lhash.h | |
1210 e_des3.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h | |
1211 e_des3.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h | |
1212 e_des3.o: ../../include/openssl/ossl_typ.h ../../include/openssl/rand.h | |
1213 -e_des3.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h | |
1214 -e_des3.o: ../../include/openssl/symhacks.h ../../include/openssl/ui.h | |
1215 -e_des3.o: ../../include/openssl/ui_compat.h ../cryptlib.h e_des3.c evp_locl.h | |
1216 +e_des3.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h | |
1217 +e_des3.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h | |
1218 +e_des3.o: ../../include/openssl/ui.h ../../include/openssl/ui_compat.h | |
1219 +e_des3.o: ../cryptlib.h e_des3.c evp_locl.h | |
1220 e_idea.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h | |
1221 e_idea.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h | |
1222 e_idea.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h | |
1223 @@ -356,6 +373,14 @@ evp_acnf.o: ../../include/openssl/opensslconf.h | |
1224 evp_acnf.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h | |
1225 evp_acnf.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h | |
1226 evp_acnf.o: ../../include/openssl/symhacks.h ../cryptlib.h evp_acnf.c | |
1227 +evp_aead.o: ../../include/openssl/asn1.h ../../include/openssl/bio.h | |
1228 +evp_aead.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h | |
1229 +evp_aead.o: ../../include/openssl/err.h ../../include/openssl/evp.h | |
1230 +evp_aead.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h | |
1231 +evp_aead.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h | |
1232 +evp_aead.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h | |
1233 +evp_aead.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h | |
1234 +evp_aead.o: ../../include/openssl/symhacks.h evp_aead.c | |
1235 evp_cnf.o: ../../e_os.h ../../include/openssl/asn1.h | |
1236 evp_cnf.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h | |
1237 evp_cnf.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h | |
1238 diff --git a/crypto/evp/e_chacha20poly1305.c b/crypto/evp/e_chacha20poly1305.c | |
1239 new file mode 100644 | |
1240 index 0000000..1c0c0fb | |
1241 --- /dev/null | |
1242 +++ b/crypto/evp/e_chacha20poly1305.c | |
1243 @@ -0,0 +1,267 @@ | |
1244 +/* ==================================================================== | |
1245 + * Copyright (c) 2013 The OpenSSL Project. All rights reserved. | |
1246 + * | |
1247 + * Redistribution and use in source and binary forms, with or without | |
1248 + * modification, are permitted provided that the following conditions | |
1249 + * are met: | |
1250 + * | |
1251 + * 1. Redistributions of source code must retain the above copyright | |
1252 + * notice, this list of conditions and the following disclaimer. | |
1253 + * | |
1254 + * 2. Redistributions in binary form must reproduce the above copyright | |
1255 + * notice, this list of conditions and the following disclaimer in | |
1256 + * the documentation and/or other materials provided with the | |
1257 + * distribution. | |
1258 + * | |
1259 + * 3. All advertising materials mentioning features or use of this | |
1260 + * software must display the following acknowledgment: | |
1261 + * "This product includes software developed by the OpenSSL Project | |
1262 + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" | |
1263 + * | |
1264 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
1265 + * endorse or promote products derived from this software without | |
1266 + * prior written permission. For written permission, please contact | |
1267 + * openssl-core@openssl.org. | |
1268 + * | |
1269 + * 5. Products derived from this software may not be called "OpenSSL" | |
1270 + * nor may "OpenSSL" appear in their names without prior written | |
1271 + * permission of the OpenSSL Project. | |
1272 + * | |
1273 + * 6. Redistributions of any form whatsoever must retain the following | |
1274 + * acknowledgment: | |
1275 + * "This product includes software developed by the OpenSSL Project | |
1276 + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" | |
1277 + * | |
1278 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
1279 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
1280 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
1281 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
1282 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
1283 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
1284 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
1285 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
1286 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
1287 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
1288 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
1289 + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
1290 + * ==================================================================== | |
1291 + * | |
1292 + */ | |
1293 + | |
1294 +#include <stdint.h> | |
1295 +#include <string.h> | |
1296 +#include <openssl/opensslconf.h> | |
1297 + | |
1298 +#if !defined(OPENSSL_NO_CHACHA) && !defined(OPENSSL_NO_POLY1305) | |
1299 + | |
1300 +#include <openssl/chacha.h> | |
1301 +#include <openssl/poly1305.h> | |
1302 +#include <openssl/evp.h> | |
1303 +#include <openssl/err.h> | |
1304 +#include "evp_locl.h" | |
1305 + | |
1306 +#define POLY1305_TAG_LEN 16 | |
1307 +#define CHACHA20_NONCE_LEN 8 | |
1308 + | |
1309 +struct aead_chacha20_poly1305_ctx | |
1310 + { | |
1311 + unsigned char key[32]; | |
1312 + unsigned char tag_len; | |
1313 + }; | |
1314 + | |
1315 +static int aead_chacha20_poly1305_init(EVP_AEAD_CTX *ctx, const unsigned char *
key, size_t key_len, size_t tag_len) | |
1316 + { | |
1317 + struct aead_chacha20_poly1305_ctx *c20_ctx; | |
1318 + | |
1319 + if (tag_len == 0) | |
1320 + tag_len = POLY1305_TAG_LEN; | |
1321 + | |
1322 + if (tag_len > POLY1305_TAG_LEN) | |
1323 + { | |
1324 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_INIT, EVP_R_TOO_LARGE); | |
1325 + return 0; | |
1326 + } | |
1327 + | |
1328 + if (key_len != sizeof(c20_ctx->key)) | |
1329 + return 0; /* internal error - EVP_AEAD_CTX_init should catch th
is. */ | |
1330 + | |
1331 + c20_ctx = OPENSSL_malloc(sizeof(struct aead_chacha20_poly1305_ctx)); | |
1332 + if (c20_ctx == NULL) | |
1333 + return 0; | |
1334 + | |
1335 + memcpy(&c20_ctx->key[0], key, key_len); | |
1336 + c20_ctx->tag_len = tag_len; | |
1337 + ctx->aead_state = c20_ctx; | |
1338 + | |
1339 + return 1; | |
1340 + } | |
1341 + | |
1342 +static void aead_chacha20_poly1305_cleanup(EVP_AEAD_CTX *ctx) | |
1343 + { | |
1344 + struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state; | |
1345 + OPENSSL_cleanse(c20_ctx->key, sizeof(c20_ctx->key)); | |
1346 + OPENSSL_free(c20_ctx); | |
1347 + } | |
1348 + | |
1349 +static void poly1305_update_with_length(poly1305_state *poly1305, | |
1350 + const unsigned char *data, size_t data_len) | |
1351 + { | |
1352 + size_t j = data_len; | |
1353 + unsigned char length_bytes[8]; | |
1354 + unsigned i; | |
1355 + | |
1356 + for (i = 0; i < sizeof(length_bytes); i++) | |
1357 + { | |
1358 + length_bytes[i] = j; | |
1359 + j >>= 8; | |
1360 + } | |
1361 + | |
1362 + CRYPTO_poly1305_update(poly1305, data, data_len); | |
1363 + CRYPTO_poly1305_update(poly1305, length_bytes, sizeof(length_bytes)); | |
1364 +} | |
1365 + | |
1366 +#if __arm__ | |
1367 +#define ALIGNED __attribute__((aligned(16))) | |
1368 +#else | |
1369 +#define ALIGNED | |
1370 +#endif | |
1371 + | |
1372 +static ssize_t aead_chacha20_poly1305_seal(const EVP_AEAD_CTX *ctx, | |
1373 + unsigned char *out, size_t max_out_len, | |
1374 + const unsigned char *nonce, size_t nonce_len, | |
1375 + const unsigned char *in, size_t in_len, | |
1376 + const unsigned char *ad, size_t ad_len) | |
1377 + { | |
1378 + const struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state; | |
1379 + unsigned char poly1305_key[32] ALIGNED; | |
1380 + poly1305_state poly1305; | |
1381 + const uint64_t in_len_64 = in_len; | |
1382 + | |
1383 + /* The underlying ChaCha implementation may not overflow the block | |
1384 + * counter into the second counter word. Therefore we disallow | |
1385 + * individual operations that work on more than 2TB at a time. | |
1386 + * |in_len_64| is needed because, on 32-bit platforms, size_t is only | |
1387 + * 32-bits and this produces a warning because it's always false. | |
1388 + * Casting to uint64_t inside the conditional is not sufficient to stop | |
1389 + * the warning. */ | |
1390 + if (in_len_64 >= (1ull << 32)*64-64) | |
1391 + { | |
1392 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_TOO_LARGE); | |
1393 + return -1; | |
1394 + } | |
1395 + | |
1396 + if (max_out_len < in_len + c20_ctx->tag_len) | |
1397 + { | |
1398 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_BUFFER_TOO_SMALL
); | |
1399 + return -1; | |
1400 + } | |
1401 + | |
1402 + if (nonce_len != CHACHA20_NONCE_LEN) | |
1403 + { | |
1404 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_IV_TOO_LARGE); | |
1405 + return -1; | |
1406 + } | |
1407 + | |
1408 + memset(poly1305_key, 0, sizeof(poly1305_key)); | |
1409 + CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key), c20_c
tx->key, nonce, 0); | |
1410 + | |
1411 + CRYPTO_poly1305_init(&poly1305, poly1305_key); | |
1412 + poly1305_update_with_length(&poly1305, ad, ad_len); | |
1413 + CRYPTO_chacha_20(out, in, in_len, c20_ctx->key, nonce, 1); | |
1414 + poly1305_update_with_length(&poly1305, out, in_len); | |
1415 + | |
1416 + if (c20_ctx->tag_len != POLY1305_TAG_LEN) | |
1417 + { | |
1418 + unsigned char tag[POLY1305_TAG_LEN]; | |
1419 + CRYPTO_poly1305_finish(&poly1305, tag); | |
1420 + memcpy(out + in_len, tag, c20_ctx->tag_len); | |
1421 + return in_len + c20_ctx->tag_len; | |
1422 + } | |
1423 + | |
1424 + CRYPTO_poly1305_finish(&poly1305, out + in_len); | |
1425 + return in_len + POLY1305_TAG_LEN; | |
1426 + } | |
1427 + | |
1428 +static ssize_t aead_chacha20_poly1305_open(const EVP_AEAD_CTX *ctx, | |
1429 + unsigned char *out, size_t max_out_len, | |
1430 + const unsigned char *nonce, size_t nonce_len, | |
1431 + const unsigned char *in, size_t in_len, | |
1432 + const unsigned char *ad, size_t ad_len) | |
1433 + { | |
1434 + const struct aead_chacha20_poly1305_ctx *c20_ctx = ctx->aead_state; | |
1435 + unsigned char mac[POLY1305_TAG_LEN]; | |
1436 + unsigned char poly1305_key[32] ALIGNED; | |
1437 + size_t out_len; | |
1438 + poly1305_state poly1305; | |
1439 + const uint64_t in_len_64 = in_len; | |
1440 + | |
1441 + if (in_len < c20_ctx->tag_len) | |
1442 + { | |
1443 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BAD_DECRYPT); | |
1444 + return -1; | |
1445 + } | |
1446 + | |
1447 + /* The underlying ChaCha implementation may not overflow the block | |
1448 + * counter into the second counter word. Therefore we disallow | |
1449 + * individual operations that work on more than 2TB at a time. | |
1450 + * |in_len_64| is needed because, on 32-bit platforms, size_t is only | |
1451 + * 32-bits and this produces a warning because it's always false. | |
1452 + * Casting to uint64_t inside the conditional is not sufficient to stop | |
1453 + * the warning. */ | |
1454 + if (in_len_64 >= (1ull << 32)*64-64) | |
1455 + { | |
1456 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_SEAL, EVP_R_TOO_LARGE); | |
1457 + return -1; | |
1458 + } | |
1459 + | |
1460 + if (nonce_len != CHACHA20_NONCE_LEN) | |
1461 + { | |
1462 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_IV_TOO_LARGE); | |
1463 + return -1; | |
1464 + } | |
1465 + | |
1466 + out_len = in_len - c20_ctx->tag_len; | |
1467 + | |
1468 + if (max_out_len < out_len) | |
1469 + { | |
1470 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BUFFER_TOO_SMALL
); | |
1471 + return -1; | |
1472 + } | |
1473 + | |
1474 + memset(poly1305_key, 0, sizeof(poly1305_key)); | |
1475 + CRYPTO_chacha_20(poly1305_key, poly1305_key, sizeof(poly1305_key), c20_c
tx->key, nonce, 0); | |
1476 + | |
1477 + CRYPTO_poly1305_init(&poly1305, poly1305_key); | |
1478 + poly1305_update_with_length(&poly1305, ad, ad_len); | |
1479 + poly1305_update_with_length(&poly1305, in, out_len); | |
1480 + CRYPTO_poly1305_finish(&poly1305, mac); | |
1481 + | |
1482 + if (CRYPTO_memcmp(mac, in + out_len, c20_ctx->tag_len) != 0) | |
1483 + { | |
1484 + EVPerr(EVP_F_AEAD_CHACHA20_POLY1305_OPEN, EVP_R_BAD_DECRYPT); | |
1485 + return -1; | |
1486 + } | |
1487 + | |
1488 + CRYPTO_chacha_20(out, in, out_len, c20_ctx->key, nonce, 1); | |
1489 + return out_len; | |
1490 + } | |
1491 + | |
1492 +static const EVP_AEAD aead_chacha20_poly1305 = | |
1493 + { | |
1494 + 32, /* key len */ | |
1495 + CHACHA20_NONCE_LEN, /* nonce len */ | |
1496 + POLY1305_TAG_LEN, /* overhead */ | |
1497 + POLY1305_TAG_LEN, /* max tag length */ | |
1498 + | |
1499 + aead_chacha20_poly1305_init, | |
1500 + aead_chacha20_poly1305_cleanup, | |
1501 + aead_chacha20_poly1305_seal, | |
1502 + aead_chacha20_poly1305_open, | |
1503 + }; | |
1504 + | |
1505 +const EVP_AEAD *EVP_aead_chacha20_poly1305() | |
1506 + { | |
1507 + return &aead_chacha20_poly1305; | |
1508 + } | |
1509 + | |
1510 +#endif /* !OPENSSL_NO_CHACHA && !OPENSSL_NO_POLY1305 */ | |
1511 diff --git a/crypto/evp/evp.h b/crypto/evp/evp.h | |
1512 index bd10642..7dc1656 100644 | |
1513 --- a/crypto/evp/evp.h | |
1514 +++ b/crypto/evp/evp.h | |
1515 @@ -1258,6 +1258,11 @@ typedef struct evp_aead_st EVP_AEAD; | |
1516 const EVP_AEAD *EVP_aead_aes_128_gcm(void); | |
1517 #endif | |
1518 | |
1519 +#if !defined(OPENSSL_NO_CHACHA) && !defined(OPENSSL_NO_POLY1305) | |
1520 +/* EVP_aead_chacha20_poly1305 is ChaCha20 with a Poly1305 authenticator. */ | |
1521 +const EVP_AEAD *EVP_aead_chacha20_poly1305(void); | |
1522 +#endif | |
1523 + | |
1524 /* EVP_AEAD_key_length returns the length, in bytes, of the keys used by | |
1525 * |aead|. */ | |
1526 size_t EVP_AEAD_key_length(const EVP_AEAD *aead); | |
1527 @@ -1360,6 +1365,9 @@ void ERR_load_EVP_strings(void); | |
1528 #define EVP_F_AEAD_AES_128_GCM_INIT 183 | |
1529 #define EVP_F_AEAD_AES_128_GCM_OPEN 181 | |
1530 #define EVP_F_AEAD_AES_128_GCM_SEAL 182 | |
1531 +#define EVP_F_AEAD_CHACHA20_POLY1305_INIT 187 | |
1532 +#define EVP_F_AEAD_CHACHA20_POLY1305_OPEN 184 | |
1533 +#define EVP_F_AEAD_CHACHA20_POLY1305_SEAL 183 | |
1534 #define EVP_F_AEAD_CTX_OPEN 185 | |
1535 #define EVP_F_AEAD_CTX_SEAL 186 | |
1536 #define EVP_F_AESNI_INIT_KEY 165 | |
1537 diff --git a/crypto/evp/evp_err.c b/crypto/evp/evp_err.c | |
1538 index c47969c..fb747e5 100644 | |
1539 --- a/crypto/evp/evp_err.c | |
1540 +++ b/crypto/evp/evp_err.c | |
1541 @@ -73,6 +73,9 @@ static ERR_STRING_DATA EVP_str_functs[]= | |
1542 {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_INIT), "AEAD_AES_128_GCM_INIT"}, | |
1543 {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_OPEN), "AEAD_AES_128_GCM_OPEN"}, | |
1544 {ERR_FUNC(EVP_F_AEAD_AES_128_GCM_SEAL), "AEAD_AES_128_GCM_SEAL"}, | |
1545 +{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_INIT), "AEAD_CHACHA20_POLY1305_INIT"}, | |
1546 +{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_OPEN), "AEAD_CHACHA20_POLY1305_OPEN"}, | |
1547 +{ERR_FUNC(EVP_F_AEAD_CHACHA20_POLY1305_SEAL), "AEAD_CHACHA20_POLY1305_SEAL"}, | |
1548 {ERR_FUNC(EVP_F_AEAD_CTX_OPEN), "AEAD_CTX_OPEN"}, | |
1549 {ERR_FUNC(EVP_F_AEAD_CTX_SEAL), "AEAD_CTX_SEAL"}, | |
1550 {ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"}, | |
1551 diff --git a/crypto/poly1305/Makefile b/crypto/poly1305/Makefile | |
1552 new file mode 100644 | |
1553 index 0000000..397d7cd | |
1554 --- /dev/null | |
1555 +++ b/crypto/poly1305/Makefile | |
1556 @@ -0,0 +1,81 @@ | |
1557 +# | |
1558 +# OpenSSL/crypto/poly1305/Makefile | |
1559 +# | |
1560 + | |
1561 +DIR= poly1305 | |
1562 +TOP= ../.. | |
1563 +CC= cc | |
1564 +CPP= $(CC) -E | |
1565 +INCLUDES= | |
1566 +CFLAG=-g | |
1567 +AR= ar r | |
1568 + | |
1569 +POLY1305=poly1305_vec.o | |
1570 + | |
1571 +CFLAGS= $(INCLUDES) $(CFLAG) | |
1572 +ASFLAGS= $(INCLUDES) $(ASFLAG) | |
1573 +AFLAGS= $(ASFLAGS) | |
1574 + | |
1575 +GENERAL=Makefile | |
1576 +TEST= | |
1577 +APPS= | |
1578 + | |
1579 +LIB=$(TOP)/libcrypto.a | |
1580 +LIBSRC=poly1305_vec.c | |
1581 +LIBOBJ=$(POLY1305) | |
1582 + | |
1583 +SRC= $(LIBSRC) | |
1584 + | |
1585 +EXHEADER=poly1305.h | |
1586 +HEADER= $(EXHEADER) | |
1587 + | |
1588 +ALL= $(GENERAL) $(SRC) $(HEADER) | |
1589 + | |
1590 +top: | |
1591 + (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all) | |
1592 + | |
1593 +all: lib | |
1594 + | |
1595 +lib: $(LIBOBJ) | |
1596 + $(AR) $(LIB) $(LIBOBJ) | |
1597 + $(RANLIB) $(LIB) || echo Never mind. | |
1598 + @touch lib | |
1599 + | |
1600 +files: | |
1601 + $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO | |
1602 + | |
1603 +links: | |
1604 + @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER) | |
1605 + @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST) | |
1606 + @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS) | |
1607 + | |
1608 +install: | |
1609 + @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile... | |
1610 + @headerlist="$(EXHEADER)"; for i in $$headerlist ; \ | |
1611 + do \ | |
1612 + (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \ | |
1613 + chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \ | |
1614 + done; | |
1615 + | |
1616 +tags: | |
1617 + ctags $(SRC) | |
1618 + | |
1619 +tests: | |
1620 + | |
1621 +lint: | |
1622 + lint -DLINT $(INCLUDES) $(SRC)>fluff | |
1623 + | |
1624 +depend: | |
1625 + @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... | |
1626 + $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) | |
1627 + | |
1628 +dclean: | |
1629 + $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKE
FILE) >Makefile.new | |
1630 + mv -f Makefile.new $(MAKEFILE) | |
1631 + | |
1632 +clean: | |
1633 + rm -f *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff | |
1634 + | |
1635 +# DO NOT DELETE THIS LINE -- make depend depends on it. | |
1636 + | |
1637 +poly1305_vec.o: ../../include/openssl/poly1305.h poly1305_vec.c | |
1638 diff --git a/crypto/poly1305/poly1305.c b/crypto/poly1305/poly1305.c | |
1639 new file mode 100644 | |
1640 index 0000000..2e5621d | |
1641 --- /dev/null | |
1642 +++ b/crypto/poly1305/poly1305.c | |
1643 @@ -0,0 +1,321 @@ | |
1644 +/* ==================================================================== | |
1645 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
1646 + * | |
1647 + * Redistribution and use in source and binary forms, with or without | |
1648 + * modification, are permitted provided that the following conditions | |
1649 + * are met: | |
1650 + * | |
1651 + * 1. Redistributions of source code must retain the above copyright | |
1652 + * notice, this list of conditions and the following disclaimer. | |
1653 + * | |
1654 + * 2. Redistributions in binary form must reproduce the above copyright | |
1655 + * notice, this list of conditions and the following disclaimer in | |
1656 + * the documentation and/or other materials provided with the | |
1657 + * distribution. | |
1658 + * | |
1659 + * 3. All advertising materials mentioning features or use of this | |
1660 + * software must display the following acknowledgment: | |
1661 + * "This product includes software developed by the OpenSSL Project | |
1662 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
1663 + * | |
1664 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
1665 + * endorse or promote products derived from this software without | |
1666 + * prior written permission. For written permission, please contact | |
1667 + * licensing@OpenSSL.org. | |
1668 + * | |
1669 + * 5. Products derived from this software may not be called "OpenSSL" | |
1670 + * nor may "OpenSSL" appear in their names without prior written | |
1671 + * permission of the OpenSSL Project. | |
1672 + * | |
1673 + * 6. Redistributions of any form whatsoever must retain the following | |
1674 + * acknowledgment: | |
1675 + * "This product includes software developed by the OpenSSL Project | |
1676 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
1677 + * | |
1678 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
1679 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
1680 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
1681 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
1682 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
1683 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
1684 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
1685 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
1686 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
1687 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
1688 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
1689 + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
1690 + * ==================================================================== | |
1691 + */ | |
1692 + | |
1693 +/* This implementation of poly1305 is by Andrew Moon | |
1694 + * (https://github.com/floodyberry/poly1305-donna) and released as public | |
1695 + * domain. */ | |
1696 + | |
1697 +#include <string.h> | |
1698 +#include <stdint.h> | |
1699 +#include <openssl/opensslconf.h> | |
1700 + | |
1701 +#if !defined(OPENSSL_NO_POLY1305) | |
1702 + | |
1703 +#include <openssl/poly1305.h> | |
1704 +#include <openssl/crypto.h> | |
1705 + | |
1706 +#if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_
64__) | |
1707 +/* We can assume little-endian. */ | |
1708 +static uint32_t U8TO32_LE(const unsigned char *m) | |
1709 + { | |
1710 + uint32_t r; | |
1711 + memcpy(&r, m, sizeof(r)); | |
1712 + return r; | |
1713 + } | |
1714 + | |
1715 +static void U32TO8_LE(unsigned char *m, uint32_t v) | |
1716 + { | |
1717 + memcpy(m, &v, sizeof(v)); | |
1718 + } | |
1719 +#else | |
1720 +static uint32_t U8TO32_LE(const unsigned char *m) | |
1721 + { | |
1722 + return (uint32_t)m[0] | | |
1723 + (uint32_t)m[1] << 8 | | |
1724 + (uint32_t)m[2] << 16 | | |
1725 + (uint32_t)m[3] << 24; | |
1726 + } | |
1727 + | |
1728 +static void U32TO8_LE(unsigned char *m, uint32_t v) | |
1729 + { | |
1730 + m[0] = v; | |
1731 + m[1] = v >> 8; | |
1732 + m[2] = v >> 16; | |
1733 + m[3] = v >> 24; | |
1734 + } | |
1735 +#endif | |
1736 + | |
1737 +static uint64_t | |
1738 +mul32x32_64(uint32_t a, uint32_t b) | |
1739 + { | |
1740 + return (uint64_t)a * b; | |
1741 + } | |
1742 + | |
1743 + | |
1744 +struct poly1305_state_st | |
1745 + { | |
1746 + uint32_t r0,r1,r2,r3,r4; | |
1747 + uint32_t s1,s2,s3,s4; | |
1748 + uint32_t h0,h1,h2,h3,h4; | |
1749 + unsigned char buf[16]; | |
1750 + unsigned int buf_used; | |
1751 + unsigned char key[16]; | |
1752 + }; | |
1753 + | |
1754 +/* poly1305_blocks updates |state| given some amount of input data. This | |
1755 + * function may only be called with a |len| that is not a multiple of 16 at the | |
1756 + * end of the data. Otherwise the input must be buffered into 16 byte blocks. | |
1757 + * */ | |
1758 +static void poly1305_update(struct poly1305_state_st *state, | |
1759 + const unsigned char *in, size_t len) | |
1760 + { | |
1761 + uint32_t t0,t1,t2,t3; | |
1762 + uint64_t t[5]; | |
1763 + uint32_t b; | |
1764 + uint64_t c; | |
1765 + size_t j; | |
1766 + unsigned char mp[16]; | |
1767 + | |
1768 + if (len < 16) | |
1769 + goto poly1305_donna_atmost15bytes; | |
1770 + | |
1771 +poly1305_donna_16bytes: | |
1772 + t0 = U8TO32_LE(in); | |
1773 + t1 = U8TO32_LE(in+4); | |
1774 + t2 = U8TO32_LE(in+8); | |
1775 + t3 = U8TO32_LE(in+12); | |
1776 + | |
1777 + in += 16; | |
1778 + len -= 16; | |
1779 + | |
1780 + state->h0 += t0 & 0x3ffffff; | |
1781 + state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; | |
1782 + state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; | |
1783 + state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; | |
1784 + state->h4 += (t3 >> 8) | (1 << 24); | |
1785 + | |
1786 +poly1305_donna_mul: | |
1787 + t[0] = mul32x32_64(state->h0,state->r0) + | |
1788 + mul32x32_64(state->h1,state->s4) + | |
1789 + mul32x32_64(state->h2,state->s3) + | |
1790 + mul32x32_64(state->h3,state->s2) + | |
1791 + mul32x32_64(state->h4,state->s1); | |
1792 + t[1] = mul32x32_64(state->h0,state->r1) + | |
1793 + mul32x32_64(state->h1,state->r0) + | |
1794 + mul32x32_64(state->h2,state->s4) + | |
1795 + mul32x32_64(state->h3,state->s3) + | |
1796 + mul32x32_64(state->h4,state->s2); | |
1797 + t[2] = mul32x32_64(state->h0,state->r2) + | |
1798 + mul32x32_64(state->h1,state->r1) + | |
1799 + mul32x32_64(state->h2,state->r0) + | |
1800 + mul32x32_64(state->h3,state->s4) + | |
1801 + mul32x32_64(state->h4,state->s3); | |
1802 + t[3] = mul32x32_64(state->h0,state->r3) + | |
1803 + mul32x32_64(state->h1,state->r2) + | |
1804 + mul32x32_64(state->h2,state->r1) + | |
1805 + mul32x32_64(state->h3,state->r0) + | |
1806 + mul32x32_64(state->h4,state->s4); | |
1807 + t[4] = mul32x32_64(state->h0,state->r4) + | |
1808 + mul32x32_64(state->h1,state->r3) + | |
1809 + mul32x32_64(state->h2,state->r2) + | |
1810 + mul32x32_64(state->h3,state->r1) + | |
1811 + mul32x32_64(state->h4,state->r0); | |
1812 + | |
1813 + state->h0 = (uint32_t)t[0] & 0x3ffffff; c = (t[0] >
> 26); | |
1814 + t[1] += c; state->h1 = (uint32_t)t[1] & 0x3ffffff; b = (uint32_t)(t[1] >
> 26); | |
1815 + t[2] += b; state->h2 = (uint32_t)t[2] & 0x3ffffff; b = (uint32_t)(t[2] >
> 26); | |
1816 + t[3] += b; state->h3 = (uint32_t)t[3] & 0x3ffffff; b = (uint32_t)(t[3] >
> 26); | |
1817 + t[4] += b; state->h4 = (uint32_t)t[4] & 0x3ffffff; b = (uint32_t)(t[4] >
> 26); | |
1818 + state->h0 += b * 5; | |
1819 + | |
1820 + if (len >= 16) | |
1821 + goto poly1305_donna_16bytes; | |
1822 + | |
1823 + /* final bytes */ | |
1824 +poly1305_donna_atmost15bytes: | |
1825 + if (!len) | |
1826 + return; | |
1827 + | |
1828 + for (j = 0; j < len; j++) | |
1829 + mp[j] = in[j]; | |
1830 + mp[j++] = 1; | |
1831 + for (; j < 16; j++) | |
1832 + mp[j] = 0; | |
1833 + len = 0; | |
1834 + | |
1835 + t0 = U8TO32_LE(mp+0); | |
1836 + t1 = U8TO32_LE(mp+4); | |
1837 + t2 = U8TO32_LE(mp+8); | |
1838 + t3 = U8TO32_LE(mp+12); | |
1839 + | |
1840 + state->h0 += t0 & 0x3ffffff; | |
1841 + state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; | |
1842 + state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; | |
1843 + state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; | |
1844 + state->h4 += (t3 >> 8); | |
1845 + | |
1846 + goto poly1305_donna_mul; | |
1847 + } | |
1848 + | |
1849 +void CRYPTO_poly1305_init(poly1305_state *statep, const unsigned char key[32]) | |
1850 + { | |
1851 + struct poly1305_state_st *state = (struct poly1305_state_st*) statep; | |
1852 + uint32_t t0,t1,t2,t3; | |
1853 + | |
1854 + t0 = U8TO32_LE(key+0); | |
1855 + t1 = U8TO32_LE(key+4); | |
1856 + t2 = U8TO32_LE(key+8); | |
1857 + t3 = U8TO32_LE(key+12); | |
1858 + | |
1859 + /* precompute multipliers */ | |
1860 + state->r0 = t0 & 0x3ffffff; t0 >>= 26; t0 |= t1 << 6; | |
1861 + state->r1 = t0 & 0x3ffff03; t1 >>= 20; t1 |= t2 << 12; | |
1862 + state->r2 = t1 & 0x3ffc0ff; t2 >>= 14; t2 |= t3 << 18; | |
1863 + state->r3 = t2 & 0x3f03fff; t3 >>= 8; | |
1864 + state->r4 = t3 & 0x00fffff; | |
1865 + | |
1866 + state->s1 = state->r1 * 5; | |
1867 + state->s2 = state->r2 * 5; | |
1868 + state->s3 = state->r3 * 5; | |
1869 + state->s4 = state->r4 * 5; | |
1870 + | |
1871 + /* init state */ | |
1872 + state->h0 = 0; | |
1873 + state->h1 = 0; | |
1874 + state->h2 = 0; | |
1875 + state->h3 = 0; | |
1876 + state->h4 = 0; | |
1877 + | |
1878 + state->buf_used = 0; | |
1879 + memcpy(state->key, key + 16, sizeof(state->key)); | |
1880 + } | |
1881 + | |
1882 +void CRYPTO_poly1305_update(poly1305_state *statep, const unsigned char *in, | |
1883 + size_t in_len) | |
1884 + { | |
1885 + unsigned int i; | |
1886 + struct poly1305_state_st *state = (struct poly1305_state_st*) statep; | |
1887 + | |
1888 + if (state->buf_used) | |
1889 + { | |
1890 + unsigned int todo = 16 - state->buf_used; | |
1891 + if (todo > in_len) | |
1892 + todo = in_len; | |
1893 + for (i = 0; i < todo; i++) | |
1894 + state->buf[state->buf_used + i] = in[i]; | |
1895 + state->buf_used += todo; | |
1896 + in_len -= todo; | |
1897 + in += todo; | |
1898 + | |
1899 + if (state->buf_used == 16) | |
1900 + { | |
1901 + poly1305_update(state, state->buf, 16); | |
1902 + state->buf_used = 0; | |
1903 + } | |
1904 + } | |
1905 + | |
1906 + if (in_len >= 16) | |
1907 + { | |
1908 + size_t todo = in_len & ~0xf; | |
1909 + poly1305_update(state, in, todo); | |
1910 + in += todo; | |
1911 + in_len &= 0xf; | |
1912 + } | |
1913 + | |
1914 + if (in_len) | |
1915 + { | |
1916 + for (i = 0; i < in_len; i++) | |
1917 + state->buf[i] = in[i]; | |
1918 + state->buf_used = in_len; | |
1919 + } | |
1920 + } | |
1921 + | |
1922 +void CRYPTO_poly1305_finish(poly1305_state *statep, unsigned char mac[16]) | |
1923 + { | |
1924 + struct poly1305_state_st *state = (struct poly1305_state_st*) statep; | |
1925 + uint64_t f0,f1,f2,f3; | |
1926 + uint32_t g0,g1,g2,g3,g4; | |
1927 + uint32_t b, nb; | |
1928 + | |
1929 + if (state->buf_used) | |
1930 + poly1305_update(state, state->buf, state->buf_used); | |
1931 + | |
1932 + b = state->h0 >> 26; state->h0 = state->h0 & 0x3ffff
ff; | |
1933 + state->h1 += b; b = state->h1 >> 26; state->h1 = state->h1 & 0x3ffff
ff; | |
1934 + state->h2 += b; b = state->h2 >> 26; state->h2 = state->h2 & 0x3ffff
ff; | |
1935 + state->h3 += b; b = state->h3 >> 26; state->h3 = state->h3 & 0x3ffff
ff; | |
1936 + state->h4 += b; b = state->h4 >> 26; state->h4 = state->h4 & 0x3ffff
ff; | |
1937 + state->h0 += b * 5; | |
1938 + | |
1939 + g0 = state->h0 + 5; b = g0 >> 26; g0 &= 0x3ffffff; | |
1940 + g1 = state->h1 + b; b = g1 >> 26; g1 &= 0x3ffffff; | |
1941 + g2 = state->h2 + b; b = g2 >> 26; g2 &= 0x3ffffff; | |
1942 + g3 = state->h3 + b; b = g3 >> 26; g3 &= 0x3ffffff; | |
1943 + g4 = state->h4 + b - (1 << 26); | |
1944 + | |
1945 + b = (g4 >> 31) - 1; | |
1946 + nb = ~b; | |
1947 + state->h0 = (state->h0 & nb) | (g0 & b); | |
1948 + state->h1 = (state->h1 & nb) | (g1 & b); | |
1949 + state->h2 = (state->h2 & nb) | (g2 & b); | |
1950 + state->h3 = (state->h3 & nb) | (g3 & b); | |
1951 + state->h4 = (state->h4 & nb) | (g4 & b); | |
1952 + | |
1953 + f0 = ((state->h0 ) | (state->h1 << 26)) + (uint64_t)U8TO32_LE(&stat
e->key[0]); | |
1954 + f1 = ((state->h1 >> 6) | (state->h2 << 20)) + (uint64_t)U8TO32_LE(&stat
e->key[4]); | |
1955 + f2 = ((state->h2 >> 12) | (state->h3 << 14)) + (uint64_t)U8TO32_LE(&stat
e->key[8]); | |
1956 + f3 = ((state->h3 >> 18) | (state->h4 << 8)) + (uint64_t)U8TO32_LE(&stat
e->key[12]); | |
1957 + | |
1958 + U32TO8_LE(&mac[ 0], f0); f1 += (f0 >> 32); | |
1959 + U32TO8_LE(&mac[ 4], f1); f2 += (f1 >> 32); | |
1960 + U32TO8_LE(&mac[ 8], f2); f3 += (f2 >> 32); | |
1961 + U32TO8_LE(&mac[12], f3); | |
1962 + } | |
1963 + | |
1964 +#endif /* !OPENSSL_NO_POLY1305 */ | |
1965 diff --git a/crypto/poly1305/poly1305.h b/crypto/poly1305/poly1305.h | |
1966 new file mode 100644 | |
1967 index 0000000..28f85ed | |
1968 --- /dev/null | |
1969 +++ b/crypto/poly1305/poly1305.h | |
1970 @@ -0,0 +1,88 @@ | |
1971 +/* | |
1972 + * Poly1305 | |
1973 + * | |
1974 + * Created on: Jun, 2013 | |
1975 + * Author: Elie Bursztein (elieb@google.com) | |
1976 + * | |
1977 + * Adapted from the estream code by D. Bernstein. | |
1978 + */ | |
1979 +/* ==================================================================== | |
1980 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
1981 + * | |
1982 + * Redistribution and use in source and binary forms, with or without | |
1983 + * modification, are permitted provided that the following conditions | |
1984 + * are met: | |
1985 + * | |
1986 + * 1. Redistributions of source code must retain the above copyright | |
1987 + * notice, this list of conditions and the following disclaimer. | |
1988 + * | |
1989 + * 2. Redistributions in binary form must reproduce the above copyright | |
1990 + * notice, this list of conditions and the following disclaimer in | |
1991 + * the documentation and/or other materials provided with the | |
1992 + * distribution. | |
1993 + * | |
1994 + * 3. All advertising materials mentioning features or use of this | |
1995 + * software must display the following acknowledgment: | |
1996 + * "This product includes software developed by the OpenSSL Project | |
1997 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
1998 + * | |
1999 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
2000 + * endorse or promote products derived from this software without | |
2001 + * prior written permission. For written permission, please contact | |
2002 + * licensing@OpenSSL.org. | |
2003 + * | |
2004 + * 5. Products derived from this software may not be called "OpenSSL" | |
2005 + * nor may "OpenSSL" appear in their names without prior written | |
2006 + * permission of the OpenSSL Project. | |
2007 + * | |
2008 + * 6. Redistributions of any form whatsoever must retain the following | |
2009 + * acknowledgment: | |
2010 + * "This product includes software developed by the OpenSSL Project | |
2011 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
2012 + * | |
2013 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
2014 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
2015 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
2016 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
2017 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
2018 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
2019 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
2020 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
2021 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
2022 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
2023 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
2024 + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
2025 + * ==================================================================== | |
2026 + */ | |
2027 + | |
2028 +#ifndef HEADER_POLY1305_H_ | |
2029 +#define HEADER_POLY1305_H_ | |
2030 + | |
2031 +#include <stdint.h> | |
2032 +#include <openssl/opensslconf.h> | |
2033 + | |
2034 +#if defined(OPENSSL_NO_POLY1305) | |
2035 +#error Poly1305 support is disabled. | |
2036 +#endif | |
2037 + | |
2038 +typedef unsigned char poly1305_state[512]; | |
2039 + | |
2040 +/* poly1305_init sets up |state| so that it can be used to calculate an | |
2041 + * authentication tag with the one-time key |key|. Note that |key| is a | |
2042 + * one-time key and therefore there is no `reset' method because that would | |
2043 + * enable several messages to be authenticated with the same key. */ | |
2044 +extern void CRYPTO_poly1305_init(poly1305_state* state, | |
2045 + const unsigned char key[32]); | |
2046 + | |
2047 +/* poly1305_update processes |in_len| bytes from |in|. It can be called zero or | |
2048 + * more times after poly1305_init. */ | |
2049 +extern void CRYPTO_poly1305_update(poly1305_state* state, | |
2050 + const unsigned char *in, | |
2051 + size_t in_len); | |
2052 + | |
2053 +/* poly1305_finish completes the poly1305 calculation and writes a 16 byte | |
2054 + * authentication tag to |mac|. */ | |
2055 +extern void CRYPTO_poly1305_finish(poly1305_state* state, | |
2056 + unsigned char mac[16]); | |
2057 + | |
2058 +#endif /* HEADER_POLY1305_H_ */ | |
2059 diff --git a/crypto/poly1305/poly1305_arm.c b/crypto/poly1305/poly1305_arm.c | |
2060 new file mode 100644 | |
2061 index 0000000..adcef35 | |
2062 --- /dev/null | |
2063 +++ b/crypto/poly1305/poly1305_arm.c | |
2064 @@ -0,0 +1,327 @@ | |
2065 +/* ==================================================================== | |
2066 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
2067 + * | |
2068 + * Redistribution and use in source and binary forms, with or without | |
2069 + * modification, are permitted provided that the following conditions | |
2070 + * are met: | |
2071 + * | |
2072 + * 1. Redistributions of source code must retain the above copyright | |
2073 + * notice, this list of conditions and the following disclaimer. | |
2074 + * | |
2075 + * 2. Redistributions in binary form must reproduce the above copyright | |
2076 + * notice, this list of conditions and the following disclaimer in | |
2077 + * the documentation and/or other materials provided with the | |
2078 + * distribution. | |
2079 + * | |
2080 + * 3. All advertising materials mentioning features or use of this | |
2081 + * software must display the following acknowledgment: | |
2082 + * "This product includes software developed by the OpenSSL Project | |
2083 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
2084 + * | |
2085 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
2086 + * endorse or promote products derived from this software without | |
2087 + * prior written permission. For written permission, please contact | |
2088 + * licensing@OpenSSL.org. | |
2089 + * | |
2090 + * 5. Products derived from this software may not be called "OpenSSL" | |
2091 + * nor may "OpenSSL" appear in their names without prior written | |
2092 + * permission of the OpenSSL Project. | |
2093 + * | |
2094 + * 6. Redistributions of any form whatsoever must retain the following | |
2095 + * acknowledgment: | |
2096 + * "This product includes software developed by the OpenSSL Project | |
2097 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
2098 + * | |
2099 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
2100 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
2101 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
2102 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
2103 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
2104 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
2105 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
2106 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
2107 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
2108 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
2109 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
2110 + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
2111 + * ==================================================================== | |
2112 + */ | |
2113 + | |
2114 +/* This implementation was taken from the public domain, neon2 version in | |
2115 + * SUPERCOP by D. J. Bernstein and Peter Schwabe. */ | |
2116 + | |
2117 +#include <stdint.h> | |
2118 + | |
2119 +#include <openssl/poly1305.h> | |
2120 + | |
2121 +#if !defined(OPENSSL_NO_POLY1305) | |
2122 + | |
2123 +typedef struct { | |
2124 + uint32_t v[12]; /* for alignment; only using 10 */ | |
2125 +} fe1305x2; | |
2126 + | |
2127 +#define addmulmod openssl_poly1305_neon2_addmulmod | |
2128 +#define blocks openssl_poly1305_neon2_blocks | |
2129 + | |
2130 +extern void addmulmod(fe1305x2 *r, const fe1305x2 *x, const fe1305x2 *y, const
fe1305x2 *c); | |
2131 + | |
2132 +extern int blocks(fe1305x2 *h, const fe1305x2 *precomp, const unsigned char *in
, unsigned int inlen); | |
2133 + | |
2134 +static void freeze(fe1305x2 *r) | |
2135 + { | |
2136 + int i; | |
2137 + | |
2138 + uint32_t x0 = r->v[0]; | |
2139 + uint32_t x1 = r->v[2]; | |
2140 + uint32_t x2 = r->v[4]; | |
2141 + uint32_t x3 = r->v[6]; | |
2142 + uint32_t x4 = r->v[8]; | |
2143 + uint32_t y0; | |
2144 + uint32_t y1; | |
2145 + uint32_t y2; | |
2146 + uint32_t y3; | |
2147 + uint32_t y4; | |
2148 + uint32_t swap; | |
2149 + | |
2150 + for (i = 0;i < 3;++i) | |
2151 + { | |
2152 + x1 += x0 >> 26; x0 &= 0x3ffffff; | |
2153 + x2 += x1 >> 26; x1 &= 0x3ffffff; | |
2154 + x3 += x2 >> 26; x2 &= 0x3ffffff; | |
2155 + x4 += x3 >> 26; x3 &= 0x3ffffff; | |
2156 + x0 += 5*(x4 >> 26); x4 &= 0x3ffffff; | |
2157 + } | |
2158 + | |
2159 + y0 = x0 + 5; | |
2160 + y1 = x1 + (y0 >> 26); y0 &= 0x3ffffff; | |
2161 + y2 = x2 + (y1 >> 26); y1 &= 0x3ffffff; | |
2162 + y3 = x3 + (y2 >> 26); y2 &= 0x3ffffff; | |
2163 + y4 = x4 + (y3 >> 26); y3 &= 0x3ffffff; | |
2164 + swap = -(y4 >> 26); y4 &= 0x3ffffff; | |
2165 + | |
2166 + y0 ^= x0; | |
2167 + y1 ^= x1; | |
2168 + y2 ^= x2; | |
2169 + y3 ^= x3; | |
2170 + y4 ^= x4; | |
2171 + | |
2172 + y0 &= swap; | |
2173 + y1 &= swap; | |
2174 + y2 &= swap; | |
2175 + y3 &= swap; | |
2176 + y4 &= swap; | |
2177 + | |
2178 + y0 ^= x0; | |
2179 + y1 ^= x1; | |
2180 + y2 ^= x2; | |
2181 + y3 ^= x3; | |
2182 + y4 ^= x4; | |
2183 + | |
2184 + r->v[0] = y0; | |
2185 + r->v[2] = y1; | |
2186 + r->v[4] = y2; | |
2187 + r->v[6] = y3; | |
2188 + r->v[8] = y4; | |
2189 + } | |
2190 + | |
2191 +static void fe1305x2_tobytearray(unsigned char *r, fe1305x2 *x) | |
2192 + { | |
2193 + uint32_t x0 = x->v[0]; | |
2194 + uint32_t x1 = x->v[2]; | |
2195 + uint32_t x2 = x->v[4]; | |
2196 + uint32_t x3 = x->v[6]; | |
2197 + uint32_t x4 = x->v[8]; | |
2198 + | |
2199 + x1 += x0 >> 26; | |
2200 + x0 &= 0x3ffffff; | |
2201 + x2 += x1 >> 26; | |
2202 + x1 &= 0x3ffffff; | |
2203 + x3 += x2 >> 26; | |
2204 + x2 &= 0x3ffffff; | |
2205 + x4 += x3 >> 26; | |
2206 + x3 &= 0x3ffffff; | |
2207 + | |
2208 + *(uint32_t *) r = x0 + (x1 << 26); | |
2209 + *(uint32_t *) (r + 4) = (x1 >> 6) + (x2 << 20); | |
2210 + *(uint32_t *) (r + 8) = (x2 >> 12) + (x3 << 14); | |
2211 + *(uint32_t *) (r + 12) = (x3 >> 18) + (x4 << 8); | |
2212 + } | |
2213 + | |
2214 +/* load32 exists to avoid breaking strict aliasing rules in | |
2215 + * fe1305x2_frombytearray. */ | |
2216 +static uint32_t load32(unsigned char *t) | |
2217 + { | |
2218 + uint32_t tmp; | |
2219 + memcpy(&tmp, t, sizeof(tmp)); | |
2220 + return tmp; | |
2221 + } | |
2222 + | |
2223 +static void fe1305x2_frombytearray(fe1305x2 *r, const unsigned char *x, unsigne
d long long xlen) | |
2224 + { | |
2225 + int i; | |
2226 + unsigned char t[17]; | |
2227 + | |
2228 + for (i = 0; (i < 16) && (i < xlen); i++) | |
2229 + t[i] = x[i]; | |
2230 + xlen -= i; | |
2231 + x += i; | |
2232 + t[i++] = 1; | |
2233 + for (; i<17; i++) | |
2234 + t[i] = 0; | |
2235 + | |
2236 + r->v[0] = 0x3ffffff & load32(t); | |
2237 + r->v[2] = 0x3ffffff & (load32(t + 3) >> 2); | |
2238 + r->v[4] = 0x3ffffff & (load32(t + 6) >> 4); | |
2239 + r->v[6] = 0x3ffffff & (load32(t + 9) >> 6); | |
2240 + r->v[8] = load32(t + 13); | |
2241 + | |
2242 + if (xlen) | |
2243 + { | |
2244 + for (i = 0; (i < 16) && (i < xlen); i++) | |
2245 + t[i] = x[i]; | |
2246 + t[i++] = 1; | |
2247 + for (; i<17; i++) | |
2248 + t[i] = 0; | |
2249 + | |
2250 + r->v[1] = 0x3ffffff & load32(t); | |
2251 + r->v[3] = 0x3ffffff & (load32(t + 3) >> 2); | |
2252 + r->v[5] = 0x3ffffff & (load32(t + 6) >> 4); | |
2253 + r->v[7] = 0x3ffffff & (load32(t + 9) >> 6); | |
2254 + r->v[9] = load32(t + 13); | |
2255 + } | |
2256 + else | |
2257 + r->v[1] = r->v[3] = r->v[5] = r->v[7] = r->v[9] = 0; | |
2258 + } | |
2259 + | |
2260 +static const fe1305x2 zero __attribute__ ((aligned (16))); | |
2261 + | |
2262 +struct poly1305_state_st { | |
2263 + unsigned char data[sizeof(fe1305x2[5]) + 128]; | |
2264 + unsigned char buf[32]; | |
2265 + unsigned int buf_used; | |
2266 + unsigned char key[16]; | |
2267 +}; | |
2268 + | |
2269 +void CRYPTO_poly1305_init(poly1305_state *state, const unsigned char key[32]) | |
2270 + { | |
2271 + struct poly1305_state_st *st = (struct poly1305_state_st*) (state); | |
2272 + fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); | |
2273 + fe1305x2 *const h = r + 1; | |
2274 + fe1305x2 *const c = h + 1; | |
2275 + fe1305x2 *const precomp = c + 1; | |
2276 + unsigned int j; | |
2277 + | |
2278 + r->v[1] = r->v[0] = 0x3ffffff & *(uint32_t *) key; | |
2279 + r->v[3] = r->v[2] = 0x3ffff03 & ((*(uint32_t *) (key + 3)) >> 2); | |
2280 + r->v[5] = r->v[4] = 0x3ffc0ff & ((*(uint32_t *) (key + 6)) >> 4); | |
2281 + r->v[7] = r->v[6] = 0x3f03fff & ((*(uint32_t *) (key + 9)) >> 6); | |
2282 + r->v[9] = r->v[8] = 0x00fffff & ((*(uint32_t *) (key + 12)) >> 8); | |
2283 + | |
2284 + for (j = 0; j < 10; j++) | |
2285 + h->v[j] = 0; /* XXX: should fast-forward a bit */ | |
2286 + | |
2287 + addmulmod(precomp,r,r,&zero); /* precompute r^2 */ | |
2288 + addmulmod(precomp + 1,precomp,precomp,&zero); /* precompute r^4 */ | |
2289 + | |
2290 + memcpy(st->key, key + 16, 16); | |
2291 + st->buf_used = 0; | |
2292 + } | |
2293 + | |
2294 +void CRYPTO_poly1305_update(poly1305_state *state, const unsigned char *in, siz
e_t in_len) | |
2295 + { | |
2296 + struct poly1305_state_st *st = (struct poly1305_state_st*) (state); | |
2297 + fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); | |
2298 + fe1305x2 *const h = r + 1; | |
2299 + fe1305x2 *const c = h + 1; | |
2300 + fe1305x2 *const precomp = c + 1; | |
2301 + unsigned int i; | |
2302 + | |
2303 + if (st->buf_used) | |
2304 + { | |
2305 + unsigned int todo = 32 - st->buf_used; | |
2306 + if (todo > in_len) | |
2307 + todo = in_len; | |
2308 + for (i = 0; i < todo; i++) | |
2309 + st->buf[st->buf_used + i] = in[i]; | |
2310 + st->buf_used += todo; | |
2311 + in_len -= todo; | |
2312 + in += todo; | |
2313 + | |
2314 + if (st->buf_used == sizeof(st->buf) && in_len) | |
2315 + { | |
2316 + addmulmod(h,h,precomp,&zero); | |
2317 + fe1305x2_frombytearray(c, st->buf, sizeof(st->buf)); | |
2318 + for (i = 0; i < 10; i++) | |
2319 + h->v[i] += c->v[i]; | |
2320 + st->buf_used = 0; | |
2321 + } | |
2322 + } | |
2323 + | |
2324 + while (in_len > 32) | |
2325 + { | |
2326 + unsigned int tlen = 1048576; | |
2327 + if (in_len < tlen) | |
2328 + tlen = in_len; | |
2329 + tlen -= blocks(h, precomp, in, tlen); | |
2330 + in_len -= tlen; | |
2331 + in += tlen; | |
2332 + } | |
2333 + | |
2334 + if (in_len) | |
2335 + { | |
2336 + for (i = 0; i < in_len; i++) | |
2337 + st->buf[i] = in[i]; | |
2338 + st->buf_used = in_len; | |
2339 + } | |
2340 + } | |
2341 + | |
2342 +void CRYPTO_poly1305_finish(poly1305_state* state, unsigned char mac[16]) | |
2343 + { | |
2344 + struct poly1305_state_st *st = (struct poly1305_state_st*) (state); | |
2345 + fe1305x2 *const r = (fe1305x2 *) (st->data + (15 & (-(int) st->data))); | |
2346 + fe1305x2 *const h = r + 1; | |
2347 + fe1305x2 *const c = h + 1; | |
2348 + fe1305x2 *const precomp = c + 1; | |
2349 + | |
2350 + addmulmod(h,h,precomp,&zero); | |
2351 + | |
2352 + if (st->buf_used > 16) | |
2353 + { | |
2354 + fe1305x2_frombytearray(c, st->buf, st->buf_used); | |
2355 + precomp->v[1] = r->v[1]; | |
2356 + precomp->v[3] = r->v[3]; | |
2357 + precomp->v[5] = r->v[5]; | |
2358 + precomp->v[7] = r->v[7]; | |
2359 + precomp->v[9] = r->v[9]; | |
2360 + addmulmod(h,h,precomp,c); | |
2361 + } | |
2362 + else if (st->buf_used > 0) | |
2363 + { | |
2364 + fe1305x2_frombytearray(c, st->buf, st->buf_used); | |
2365 + r->v[1] = 1; | |
2366 + r->v[3] = 0; | |
2367 + r->v[5] = 0; | |
2368 + r->v[7] = 0; | |
2369 + r->v[9] = 0; | |
2370 + addmulmod(h,h,r,c); | |
2371 + } | |
2372 + | |
2373 + h->v[0] += h->v[1]; | |
2374 + h->v[2] += h->v[3]; | |
2375 + h->v[4] += h->v[5]; | |
2376 + h->v[6] += h->v[7]; | |
2377 + h->v[8] += h->v[9]; | |
2378 + freeze(h); | |
2379 + | |
2380 + fe1305x2_frombytearray(c, st->key, 16); | |
2381 + c->v[8] ^= (1 << 24); | |
2382 + | |
2383 + h->v[0] += c->v[0]; | |
2384 + h->v[2] += c->v[2]; | |
2385 + h->v[4] += c->v[4]; | |
2386 + h->v[6] += c->v[6]; | |
2387 + h->v[8] += c->v[8]; | |
2388 + fe1305x2_tobytearray(mac, h); | |
2389 + } | |
2390 + | |
2391 +#endif /* !OPENSSL_NO_POLY1305 */ | |
2392 diff --git a/crypto/poly1305/poly1305_arm_asm.S b/crypto/poly1305/poly1305_arm_a
sm.S | |
2393 new file mode 100644 | |
2394 index 0000000..449d16f | |
2395 --- /dev/null | |
2396 +++ b/crypto/poly1305/poly1305_arm_asm.S | |
2397 @@ -0,0 +1,2009 @@ | |
2398 +# This implementation was taken from the public domain, neon2 version in | |
2399 +# SUPERCOP by D. J. Bernstein and Peter Schwabe. | |
2400 + | |
2401 +# qhasm: int32 input_0 | |
2402 + | |
2403 +# qhasm: int32 input_1 | |
2404 + | |
2405 +# qhasm: int32 input_2 | |
2406 + | |
2407 +# qhasm: int32 input_3 | |
2408 + | |
2409 +# qhasm: stack32 input_4 | |
2410 + | |
2411 +# qhasm: stack32 input_5 | |
2412 + | |
2413 +# qhasm: stack32 input_6 | |
2414 + | |
2415 +# qhasm: stack32 input_7 | |
2416 + | |
2417 +# qhasm: int32 caller_r4 | |
2418 + | |
2419 +# qhasm: int32 caller_r5 | |
2420 + | |
2421 +# qhasm: int32 caller_r6 | |
2422 + | |
2423 +# qhasm: int32 caller_r7 | |
2424 + | |
2425 +# qhasm: int32 caller_r8 | |
2426 + | |
2427 +# qhasm: int32 caller_r9 | |
2428 + | |
2429 +# qhasm: int32 caller_r10 | |
2430 + | |
2431 +# qhasm: int32 caller_r11 | |
2432 + | |
2433 +# qhasm: int32 caller_r12 | |
2434 + | |
2435 +# qhasm: int32 caller_r14 | |
2436 + | |
2437 +# qhasm: reg128 caller_q4 | |
2438 + | |
2439 +# qhasm: reg128 caller_q5 | |
2440 + | |
2441 +# qhasm: reg128 caller_q6 | |
2442 + | |
2443 +# qhasm: reg128 caller_q7 | |
2444 + | |
2445 +# qhasm: startcode | |
2446 +.fpu neon | |
2447 +.text | |
2448 + | |
2449 +# qhasm: reg128 r0 | |
2450 + | |
2451 +# qhasm: reg128 r1 | |
2452 + | |
2453 +# qhasm: reg128 r2 | |
2454 + | |
2455 +# qhasm: reg128 r3 | |
2456 + | |
2457 +# qhasm: reg128 r4 | |
2458 + | |
2459 +# qhasm: reg128 x01 | |
2460 + | |
2461 +# qhasm: reg128 x23 | |
2462 + | |
2463 +# qhasm: reg128 x4 | |
2464 + | |
2465 +# qhasm: reg128 y0 | |
2466 + | |
2467 +# qhasm: reg128 y12 | |
2468 + | |
2469 +# qhasm: reg128 y34 | |
2470 + | |
2471 +# qhasm: reg128 5y12 | |
2472 + | |
2473 +# qhasm: reg128 5y34 | |
2474 + | |
2475 +# qhasm: stack128 y0_stack | |
2476 + | |
2477 +# qhasm: stack128 y12_stack | |
2478 + | |
2479 +# qhasm: stack128 y34_stack | |
2480 + | |
2481 +# qhasm: stack128 5y12_stack | |
2482 + | |
2483 +# qhasm: stack128 5y34_stack | |
2484 + | |
2485 +# qhasm: reg128 z0 | |
2486 + | |
2487 +# qhasm: reg128 z12 | |
2488 + | |
2489 +# qhasm: reg128 z34 | |
2490 + | |
2491 +# qhasm: reg128 5z12 | |
2492 + | |
2493 +# qhasm: reg128 5z34 | |
2494 + | |
2495 +# qhasm: stack128 z0_stack | |
2496 + | |
2497 +# qhasm: stack128 z12_stack | |
2498 + | |
2499 +# qhasm: stack128 z34_stack | |
2500 + | |
2501 +# qhasm: stack128 5z12_stack | |
2502 + | |
2503 +# qhasm: stack128 5z34_stack | |
2504 + | |
2505 +# qhasm: stack128 two24 | |
2506 + | |
2507 +# qhasm: int32 ptr | |
2508 + | |
2509 +# qhasm: reg128 c01 | |
2510 + | |
2511 +# qhasm: reg128 c23 | |
2512 + | |
2513 +# qhasm: reg128 d01 | |
2514 + | |
2515 +# qhasm: reg128 d23 | |
2516 + | |
2517 +# qhasm: reg128 t0 | |
2518 + | |
2519 +# qhasm: reg128 t1 | |
2520 + | |
2521 +# qhasm: reg128 t2 | |
2522 + | |
2523 +# qhasm: reg128 t3 | |
2524 + | |
2525 +# qhasm: reg128 t4 | |
2526 + | |
2527 +# qhasm: reg128 mask | |
2528 + | |
2529 +# qhasm: reg128 u0 | |
2530 + | |
2531 +# qhasm: reg128 u1 | |
2532 + | |
2533 +# qhasm: reg128 u2 | |
2534 + | |
2535 +# qhasm: reg128 u3 | |
2536 + | |
2537 +# qhasm: reg128 u4 | |
2538 + | |
2539 +# qhasm: reg128 v01 | |
2540 + | |
2541 +# qhasm: reg128 mid | |
2542 + | |
2543 +# qhasm: reg128 v23 | |
2544 + | |
2545 +# qhasm: reg128 v4 | |
2546 + | |
2547 +# qhasm: int32 len | |
2548 + | |
2549 +# qhasm: qpushenter crypto_onetimeauth_poly1305_neon2_blocks | |
2550 +.align 4 | |
2551 +.global openssl_poly1305_neon2_blocks | |
2552 +.type openssl_poly1305_neon2_blocks STT_FUNC | |
2553 +openssl_poly1305_neon2_blocks: | |
2554 +vpush {q4,q5,q6,q7} | |
2555 +mov r12,sp | |
2556 +sub sp,sp,#192 | |
2557 +and sp,sp,#0xffffffe0 | |
2558 + | |
2559 +# qhasm: len = input_3 | |
2560 +# asm 1: mov >len=int32#4,<input_3=int32#4 | |
2561 +# asm 2: mov >len=r3,<input_3=r3 | |
2562 +mov r3,r3 | |
2563 + | |
2564 +# qhasm: new y0 | |
2565 + | |
2566 +# qhasm: y0 = mem64[input_1]y0[1]; input_1 += 8 | |
2567 +# asm 1: vld1.8 {<y0=reg128#1%bot},[<input_1=int32#2]! | |
2568 +# asm 2: vld1.8 {<y0=d0},[<input_1=r1]! | |
2569 +vld1.8 {d0},[r1]! | |
2570 + | |
2571 +# qhasm: y12 = mem128[input_1]; input_1 += 16 | |
2572 +# asm 1: vld1.8 {>y12=reg128#2%bot->y12=reg128#2%top},[<input_1=int32#2]! | |
2573 +# asm 2: vld1.8 {>y12=d2->y12=d3},[<input_1=r1]! | |
2574 +vld1.8 {d2-d3},[r1]! | |
2575 + | |
2576 +# qhasm: y34 = mem128[input_1]; input_1 += 16 | |
2577 +# asm 1: vld1.8 {>y34=reg128#3%bot->y34=reg128#3%top},[<input_1=int32#2]! | |
2578 +# asm 2: vld1.8 {>y34=d4->y34=d5},[<input_1=r1]! | |
2579 +vld1.8 {d4-d5},[r1]! | |
2580 + | |
2581 +# qhasm: input_1 += 8 | |
2582 +# asm 1: add >input_1=int32#2,<input_1=int32#2,#8 | |
2583 +# asm 2: add >input_1=r1,<input_1=r1,#8 | |
2584 +add r1,r1,#8 | |
2585 + | |
2586 +# qhasm: new z0 | |
2587 + | |
2588 +# qhasm: z0 = mem64[input_1]z0[1]; input_1 += 8 | |
2589 +# asm 1: vld1.8 {<z0=reg128#4%bot},[<input_1=int32#2]! | |
2590 +# asm 2: vld1.8 {<z0=d6},[<input_1=r1]! | |
2591 +vld1.8 {d6},[r1]! | |
2592 + | |
2593 +# qhasm: z12 = mem128[input_1]; input_1 += 16 | |
2594 +# asm 1: vld1.8 {>z12=reg128#5%bot->z12=reg128#5%top},[<input_1=int32#2]! | |
2595 +# asm 2: vld1.8 {>z12=d8->z12=d9},[<input_1=r1]! | |
2596 +vld1.8 {d8-d9},[r1]! | |
2597 + | |
2598 +# qhasm: z34 = mem128[input_1]; input_1 += 16 | |
2599 +# asm 1: vld1.8 {>z34=reg128#6%bot->z34=reg128#6%top},[<input_1=int32#2]! | |
2600 +# asm 2: vld1.8 {>z34=d10->z34=d11},[<input_1=r1]! | |
2601 +vld1.8 {d10-d11},[r1]! | |
2602 + | |
2603 +# qhasm: 2x mask = 0xffffffff | |
2604 +# asm 1: vmov.i64 >mask=reg128#7,#0xffffffff | |
2605 +# asm 2: vmov.i64 >mask=q6,#0xffffffff | |
2606 +vmov.i64 q6,#0xffffffff | |
2607 + | |
2608 +# qhasm: 2x u4 = 0xff | |
2609 +# asm 1: vmov.i64 >u4=reg128#8,#0xff | |
2610 +# asm 2: vmov.i64 >u4=q7,#0xff | |
2611 +vmov.i64 q7,#0xff | |
2612 + | |
2613 +# qhasm: x01 aligned= mem128[input_0];input_0+=16 | |
2614 +# asm 1: vld1.8 {>x01=reg128#9%bot->x01=reg128#9%top},[<input_0=int32#1,: 128]! | |
2615 +# asm 2: vld1.8 {>x01=d16->x01=d17},[<input_0=r0,: 128]! | |
2616 +vld1.8 {d16-d17},[r0,: 128]! | |
2617 + | |
2618 +# qhasm: x23 aligned= mem128[input_0];input_0+=16 | |
2619 +# asm 1: vld1.8 {>x23=reg128#10%bot->x23=reg128#10%top},[<input_0=int32#1,: 128
]! | |
2620 +# asm 2: vld1.8 {>x23=d18->x23=d19},[<input_0=r0,: 128]! | |
2621 +vld1.8 {d18-d19},[r0,: 128]! | |
2622 + | |
2623 +# qhasm: x4 aligned= mem64[input_0]x4[1] | |
2624 +# asm 1: vld1.8 {<x4=reg128#11%bot},[<input_0=int32#1,: 64] | |
2625 +# asm 2: vld1.8 {<x4=d20},[<input_0=r0,: 64] | |
2626 +vld1.8 {d20},[r0,: 64] | |
2627 + | |
2628 +# qhasm: input_0 -= 32 | |
2629 +# asm 1: sub >input_0=int32#1,<input_0=int32#1,#32 | |
2630 +# asm 2: sub >input_0=r0,<input_0=r0,#32 | |
2631 +sub r0,r0,#32 | |
2632 + | |
2633 +# qhasm: 2x mask unsigned>>=6 | |
2634 +# asm 1: vshr.u64 >mask=reg128#7,<mask=reg128#7,#6 | |
2635 +# asm 2: vshr.u64 >mask=q6,<mask=q6,#6 | |
2636 +vshr.u64 q6,q6,#6 | |
2637 + | |
2638 +# qhasm: 2x u4 unsigned>>= 7 | |
2639 +# asm 1: vshr.u64 >u4=reg128#8,<u4=reg128#8,#7 | |
2640 +# asm 2: vshr.u64 >u4=q7,<u4=q7,#7 | |
2641 +vshr.u64 q7,q7,#7 | |
2642 + | |
2643 +# qhasm: 4x 5y12 = y12 << 2 | |
2644 +# asm 1: vshl.i32 >5y12=reg128#12,<y12=reg128#2,#2 | |
2645 +# asm 2: vshl.i32 >5y12=q11,<y12=q1,#2 | |
2646 +vshl.i32 q11,q1,#2 | |
2647 + | |
2648 +# qhasm: 4x 5y34 = y34 << 2 | |
2649 +# asm 1: vshl.i32 >5y34=reg128#13,<y34=reg128#3,#2 | |
2650 +# asm 2: vshl.i32 >5y34=q12,<y34=q2,#2 | |
2651 +vshl.i32 q12,q2,#2 | |
2652 + | |
2653 +# qhasm: 4x 5y12 += y12 | |
2654 +# asm 1: vadd.i32 >5y12=reg128#12,<5y12=reg128#12,<y12=reg128#2 | |
2655 +# asm 2: vadd.i32 >5y12=q11,<5y12=q11,<y12=q1 | |
2656 +vadd.i32 q11,q11,q1 | |
2657 + | |
2658 +# qhasm: 4x 5y34 += y34 | |
2659 +# asm 1: vadd.i32 >5y34=reg128#13,<5y34=reg128#13,<y34=reg128#3 | |
2660 +# asm 2: vadd.i32 >5y34=q12,<5y34=q12,<y34=q2 | |
2661 +vadd.i32 q12,q12,q2 | |
2662 + | |
2663 +# qhasm: 2x u4 <<= 24 | |
2664 +# asm 1: vshl.i64 >u4=reg128#8,<u4=reg128#8,#24 | |
2665 +# asm 2: vshl.i64 >u4=q7,<u4=q7,#24 | |
2666 +vshl.i64 q7,q7,#24 | |
2667 + | |
2668 +# qhasm: 4x 5z12 = z12 << 2 | |
2669 +# asm 1: vshl.i32 >5z12=reg128#14,<z12=reg128#5,#2 | |
2670 +# asm 2: vshl.i32 >5z12=q13,<z12=q4,#2 | |
2671 +vshl.i32 q13,q4,#2 | |
2672 + | |
2673 +# qhasm: 4x 5z34 = z34 << 2 | |
2674 +# asm 1: vshl.i32 >5z34=reg128#15,<z34=reg128#6,#2 | |
2675 +# asm 2: vshl.i32 >5z34=q14,<z34=q5,#2 | |
2676 +vshl.i32 q14,q5,#2 | |
2677 + | |
2678 +# qhasm: 4x 5z12 += z12 | |
2679 +# asm 1: vadd.i32 >5z12=reg128#14,<5z12=reg128#14,<z12=reg128#5 | |
2680 +# asm 2: vadd.i32 >5z12=q13,<5z12=q13,<z12=q4 | |
2681 +vadd.i32 q13,q13,q4 | |
2682 + | |
2683 +# qhasm: 4x 5z34 += z34 | |
2684 +# asm 1: vadd.i32 >5z34=reg128#15,<5z34=reg128#15,<z34=reg128#6 | |
2685 +# asm 2: vadd.i32 >5z34=q14,<5z34=q14,<z34=q5 | |
2686 +vadd.i32 q14,q14,q5 | |
2687 + | |
2688 +# qhasm: new two24 | |
2689 + | |
2690 +# qhasm: new y0_stack | |
2691 + | |
2692 +# qhasm: new y12_stack | |
2693 + | |
2694 +# qhasm: new y34_stack | |
2695 + | |
2696 +# qhasm: new 5y12_stack | |
2697 + | |
2698 +# qhasm: new 5y34_stack | |
2699 + | |
2700 +# qhasm: new z0_stack | |
2701 + | |
2702 +# qhasm: new z12_stack | |
2703 + | |
2704 +# qhasm: new z34_stack | |
2705 + | |
2706 +# qhasm: new 5z12_stack | |
2707 + | |
2708 +# qhasm: new 5z34_stack | |
2709 + | |
2710 +# qhasm: ptr = &two24 | |
2711 +# asm 1: lea >ptr=int32#2,<two24=stack128#1 | |
2712 +# asm 2: lea >ptr=r1,<two24=[sp,#0] | |
2713 +add r1,sp,#0 | |
2714 + | |
2715 +# qhasm: mem128[ptr] aligned= u4 | |
2716 +# asm 1: vst1.8 {<u4=reg128#8%bot-<u4=reg128#8%top},[<ptr=int32#2,: 128] | |
2717 +# asm 2: vst1.8 {<u4=d14-<u4=d15},[<ptr=r1,: 128] | |
2718 +vst1.8 {d14-d15},[r1,: 128] | |
2719 + | |
2720 +# qhasm: r4 = u4 | |
2721 +# asm 1: vmov >r4=reg128#16,<u4=reg128#8 | |
2722 +# asm 2: vmov >r4=q15,<u4=q7 | |
2723 +vmov q15,q7 | |
2724 + | |
2725 +# qhasm: r0 = u4 | |
2726 +# asm 1: vmov >r0=reg128#8,<u4=reg128#8 | |
2727 +# asm 2: vmov >r0=q7,<u4=q7 | |
2728 +vmov q7,q7 | |
2729 + | |
2730 +# qhasm: ptr = &y0_stack | |
2731 +# asm 1: lea >ptr=int32#2,<y0_stack=stack128#2 | |
2732 +# asm 2: lea >ptr=r1,<y0_stack=[sp,#16] | |
2733 +add r1,sp,#16 | |
2734 + | |
2735 +# qhasm: mem128[ptr] aligned= y0 | |
2736 +# asm 1: vst1.8 {<y0=reg128#1%bot-<y0=reg128#1%top},[<ptr=int32#2,: 128] | |
2737 +# asm 2: vst1.8 {<y0=d0-<y0=d1},[<ptr=r1,: 128] | |
2738 +vst1.8 {d0-d1},[r1,: 128] | |
2739 + | |
2740 +# qhasm: ptr = &y12_stack | |
2741 +# asm 1: lea >ptr=int32#2,<y12_stack=stack128#3 | |
2742 +# asm 2: lea >ptr=r1,<y12_stack=[sp,#32] | |
2743 +add r1,sp,#32 | |
2744 + | |
2745 +# qhasm: mem128[ptr] aligned= y12 | |
2746 +# asm 1: vst1.8 {<y12=reg128#2%bot-<y12=reg128#2%top},[<ptr=int32#2,: 128] | |
2747 +# asm 2: vst1.8 {<y12=d2-<y12=d3},[<ptr=r1,: 128] | |
2748 +vst1.8 {d2-d3},[r1,: 128] | |
2749 + | |
2750 +# qhasm: ptr = &y34_stack | |
2751 +# asm 1: lea >ptr=int32#2,<y34_stack=stack128#4 | |
2752 +# asm 2: lea >ptr=r1,<y34_stack=[sp,#48] | |
2753 +add r1,sp,#48 | |
2754 + | |
2755 +# qhasm: mem128[ptr] aligned= y34 | |
2756 +# asm 1: vst1.8 {<y34=reg128#3%bot-<y34=reg128#3%top},[<ptr=int32#2,: 128] | |
2757 +# asm 2: vst1.8 {<y34=d4-<y34=d5},[<ptr=r1,: 128] | |
2758 +vst1.8 {d4-d5},[r1,: 128] | |
2759 + | |
2760 +# qhasm: ptr = &z0_stack | |
2761 +# asm 1: lea >ptr=int32#2,<z0_stack=stack128#7 | |
2762 +# asm 2: lea >ptr=r1,<z0_stack=[sp,#96] | |
2763 +add r1,sp,#96 | |
2764 + | |
2765 +# qhasm: mem128[ptr] aligned= z0 | |
2766 +# asm 1: vst1.8 {<z0=reg128#4%bot-<z0=reg128#4%top},[<ptr=int32#2,: 128] | |
2767 +# asm 2: vst1.8 {<z0=d6-<z0=d7},[<ptr=r1,: 128] | |
2768 +vst1.8 {d6-d7},[r1,: 128] | |
2769 + | |
2770 +# qhasm: ptr = &z12_stack | |
2771 +# asm 1: lea >ptr=int32#2,<z12_stack=stack128#8 | |
2772 +# asm 2: lea >ptr=r1,<z12_stack=[sp,#112] | |
2773 +add r1,sp,#112 | |
2774 + | |
2775 +# qhasm: mem128[ptr] aligned= z12 | |
2776 +# asm 1: vst1.8 {<z12=reg128#5%bot-<z12=reg128#5%top},[<ptr=int32#2,: 128] | |
2777 +# asm 2: vst1.8 {<z12=d8-<z12=d9},[<ptr=r1,: 128] | |
2778 +vst1.8 {d8-d9},[r1,: 128] | |
2779 + | |
2780 +# qhasm: ptr = &z34_stack | |
2781 +# asm 1: lea >ptr=int32#2,<z34_stack=stack128#9 | |
2782 +# asm 2: lea >ptr=r1,<z34_stack=[sp,#128] | |
2783 +add r1,sp,#128 | |
2784 + | |
2785 +# qhasm: mem128[ptr] aligned= z34 | |
2786 +# asm 1: vst1.8 {<z34=reg128#6%bot-<z34=reg128#6%top},[<ptr=int32#2,: 128] | |
2787 +# asm 2: vst1.8 {<z34=d10-<z34=d11},[<ptr=r1,: 128] | |
2788 +vst1.8 {d10-d11},[r1,: 128] | |
2789 + | |
2790 +# qhasm: ptr = &5y12_stack | |
2791 +# asm 1: lea >ptr=int32#2,<5y12_stack=stack128#5 | |
2792 +# asm 2: lea >ptr=r1,<5y12_stack=[sp,#64] | |
2793 +add r1,sp,#64 | |
2794 + | |
2795 +# qhasm: mem128[ptr] aligned= 5y12 | |
2796 +# asm 1: vst1.8 {<5y12=reg128#12%bot-<5y12=reg128#12%top},[<ptr=int32#2,: 128] | |
2797 +# asm 2: vst1.8 {<5y12=d22-<5y12=d23},[<ptr=r1,: 128] | |
2798 +vst1.8 {d22-d23},[r1,: 128] | |
2799 + | |
2800 +# qhasm: ptr = &5y34_stack | |
2801 +# asm 1: lea >ptr=int32#2,<5y34_stack=stack128#6 | |
2802 +# asm 2: lea >ptr=r1,<5y34_stack=[sp,#80] | |
2803 +add r1,sp,#80 | |
2804 + | |
2805 +# qhasm: mem128[ptr] aligned= 5y34 | |
2806 +# asm 1: vst1.8 {<5y34=reg128#13%bot-<5y34=reg128#13%top},[<ptr=int32#2,: 128] | |
2807 +# asm 2: vst1.8 {<5y34=d24-<5y34=d25},[<ptr=r1,: 128] | |
2808 +vst1.8 {d24-d25},[r1,: 128] | |
2809 + | |
2810 +# qhasm: ptr = &5z12_stack | |
2811 +# asm 1: lea >ptr=int32#2,<5z12_stack=stack128#10 | |
2812 +# asm 2: lea >ptr=r1,<5z12_stack=[sp,#144] | |
2813 +add r1,sp,#144 | |
2814 + | |
2815 +# qhasm: mem128[ptr] aligned= 5z12 | |
2816 +# asm 1: vst1.8 {<5z12=reg128#14%bot-<5z12=reg128#14%top},[<ptr=int32#2,: 128] | |
2817 +# asm 2: vst1.8 {<5z12=d26-<5z12=d27},[<ptr=r1,: 128] | |
2818 +vst1.8 {d26-d27},[r1,: 128] | |
2819 + | |
2820 +# qhasm: ptr = &5z34_stack | |
2821 +# asm 1: lea >ptr=int32#2,<5z34_stack=stack128#11 | |
2822 +# asm 2: lea >ptr=r1,<5z34_stack=[sp,#160] | |
2823 +add r1,sp,#160 | |
2824 + | |
2825 +# qhasm: mem128[ptr] aligned= 5z34 | |
2826 +# asm 1: vst1.8 {<5z34=reg128#15%bot-<5z34=reg128#15%top},[<ptr=int32#2,: 128] | |
2827 +# asm 2: vst1.8 {<5z34=d28-<5z34=d29},[<ptr=r1,: 128] | |
2828 +vst1.8 {d28-d29},[r1,: 128] | |
2829 + | |
2830 +# qhasm: unsigned>? len - 64 | |
2831 +# asm 1: cmp <len=int32#4,#64 | |
2832 +# asm 2: cmp <len=r3,#64 | |
2833 +cmp r3,#64 | |
2834 + | |
2835 +# qhasm: goto below64bytes if !unsigned> | |
2836 +bls ._below64bytes | |
2837 + | |
2838 +# qhasm: input_2 += 32 | |
2839 +# asm 1: add >input_2=int32#2,<input_2=int32#3,#32 | |
2840 +# asm 2: add >input_2=r1,<input_2=r2,#32 | |
2841 +add r1,r2,#32 | |
2842 + | |
2843 +# qhasm: mainloop2: | |
2844 +._mainloop2: | |
2845 + | |
2846 +# qhasm: c01 = mem128[input_2];input_2+=16 | |
2847 +# asm 1: vld1.8 {>c01=reg128#1%bot->c01=reg128#1%top},[<input_2=int32#2]! | |
2848 +# asm 2: vld1.8 {>c01=d0->c01=d1},[<input_2=r1]! | |
2849 +vld1.8 {d0-d1},[r1]! | |
2850 + | |
2851 +# qhasm: c23 = mem128[input_2];input_2+=16 | |
2852 +# asm 1: vld1.8 {>c23=reg128#2%bot->c23=reg128#2%top},[<input_2=int32#2]! | |
2853 +# asm 2: vld1.8 {>c23=d2->c23=d3},[<input_2=r1]! | |
2854 +vld1.8 {d2-d3},[r1]! | |
2855 + | |
2856 +# qhasm: r4[0,1] += x01[0] unsigned* z34[2]; r4[2,3] += x01[1] unsigned* z34
[3] | |
2857 +# asm 1: vmlal.u32 <r4=reg128#16,<x01=reg128#9%bot,<z34=reg128#6%top | |
2858 +# asm 2: vmlal.u32 <r4=q15,<x01=d16,<z34=d11 | |
2859 +vmlal.u32 q15,d16,d11 | |
2860 + | |
2861 +# qhasm: ptr = &z12_stack | |
2862 +# asm 1: lea >ptr=int32#3,<z12_stack=stack128#8 | |
2863 +# asm 2: lea >ptr=r2,<z12_stack=[sp,#112] | |
2864 +add r2,sp,#112 | |
2865 + | |
2866 +# qhasm: z12 aligned= mem128[ptr] | |
2867 +# asm 1: vld1.8 {>z12=reg128#3%bot->z12=reg128#3%top},[<ptr=int32#3,: 128] | |
2868 +# asm 2: vld1.8 {>z12=d4->z12=d5},[<ptr=r2,: 128] | |
2869 +vld1.8 {d4-d5},[r2,: 128] | |
2870 + | |
2871 +# qhasm: r4[0,1] += x01[2] unsigned* z34[0]; r4[2,3] += x01[3] unsigned* z34[1
] | |
2872 +# asm 1: vmlal.u32 <r4=reg128#16,<x01=reg128#9%top,<z34=reg128#6%bot | |
2873 +# asm 2: vmlal.u32 <r4=q15,<x01=d17,<z34=d10 | |
2874 +vmlal.u32 q15,d17,d10 | |
2875 + | |
2876 +# qhasm: ptr = &z0_stack | |
2877 +# asm 1: lea >ptr=int32#3,<z0_stack=stack128#7 | |
2878 +# asm 2: lea >ptr=r2,<z0_stack=[sp,#96] | |
2879 +add r2,sp,#96 | |
2880 + | |
2881 +# qhasm: z0 aligned= mem128[ptr] | |
2882 +# asm 1: vld1.8 {>z0=reg128#4%bot->z0=reg128#4%top},[<ptr=int32#3,: 128] | |
2883 +# asm 2: vld1.8 {>z0=d6->z0=d7},[<ptr=r2,: 128] | |
2884 +vld1.8 {d6-d7},[r2,: 128] | |
2885 + | |
2886 +# qhasm: r4[0,1] += x23[0] unsigned* z12[2]; r4[2,3] += x23[1] unsigned* z12[3
] | |
2887 +# asm 1: vmlal.u32 <r4=reg128#16,<x23=reg128#10%bot,<z12=reg128#3%top | |
2888 +# asm 2: vmlal.u32 <r4=q15,<x23=d18,<z12=d5 | |
2889 +vmlal.u32 q15,d18,d5 | |
2890 + | |
2891 +# qhasm: c01 c23 = c01[0]c01[1]c01[2]c23[2]c23[0]c23[1]c01[3]c23[3] | |
2892 +# asm 1: vtrn.32 <c01=reg128#1%top,<c23=reg128#2%top | |
2893 +# asm 2: vtrn.32 <c01=d1,<c23=d3 | |
2894 +vtrn.32 d1,d3 | |
2895 + | |
2896 +# qhasm: r4[0,1] += x23[2] unsigned* z12[0]; r4[2,3] += x23[3] unsigned* z12[1
] | |
2897 +# asm 1: vmlal.u32 <r4=reg128#16,<x23=reg128#10%top,<z12=reg128#3%bot | |
2898 +# asm 2: vmlal.u32 <r4=q15,<x23=d19,<z12=d4 | |
2899 +vmlal.u32 q15,d19,d4 | |
2900 + | |
2901 +# qhasm: r4[0,1] += x4[0] unsigned* z0[0]; r4[2,3] += x4[1] unsigned* z0[1] | |
2902 +# asm 1: vmlal.u32 <r4=reg128#16,<x4=reg128#11%bot,<z0=reg128#4%bot | |
2903 +# asm 2: vmlal.u32 <r4=q15,<x4=d20,<z0=d6 | |
2904 +vmlal.u32 q15,d20,d6 | |
2905 + | |
2906 +# qhasm: r3[0,1] = c23[2]<<18; r3[2,3] = c23[3]<<18 | |
2907 +# asm 1: vshll.u32 >r3=reg128#5,<c23=reg128#2%top,#18 | |
2908 +# asm 2: vshll.u32 >r3=q4,<c23=d3,#18 | |
2909 +vshll.u32 q4,d3,#18 | |
2910 + | |
2911 +# qhasm: c01 c23 = c01[0]c23[0]c01[2]c01[3]c01[1]c23[1]c23[2]c23[3] | |
2912 +# asm 1: vtrn.32 <c01=reg128#1%bot,<c23=reg128#2%bot | |
2913 +# asm 2: vtrn.32 <c01=d0,<c23=d2 | |
2914 +vtrn.32 d0,d2 | |
2915 + | |
2916 +# qhasm: r3[0,1] += x01[0] unsigned* z34[0]; r3[2,3] += x01[1] unsigned* z34[
1] | |
2917 +# asm 1: vmlal.u32 <r3=reg128#5,<x01=reg128#9%bot,<z34=reg128#6%bot | |
2918 +# asm 2: vmlal.u32 <r3=q4,<x01=d16,<z34=d10 | |
2919 +vmlal.u32 q4,d16,d10 | |
2920 + | |
2921 +# qhasm: r3[0,1] += x01[2] unsigned* z12[2]; r3[2,3] += x01[3] unsigned* z12[
3] | |
2922 +# asm 1: vmlal.u32 <r3=reg128#5,<x01=reg128#9%top,<z12=reg128#3%top | |
2923 +# asm 2: vmlal.u32 <r3=q4,<x01=d17,<z12=d5 | |
2924 +vmlal.u32 q4,d17,d5 | |
2925 + | |
2926 +# qhasm: r0 = r0[1]c01[0]r0[2,3] | |
2927 +# asm 1: vext.32 <r0=reg128#8%bot,<r0=reg128#8%bot,<c01=reg128#1%bot,#1 | |
2928 +# asm 2: vext.32 <r0=d14,<r0=d14,<c01=d0,#1 | |
2929 +vext.32 d14,d14,d0,#1 | |
2930 + | |
2931 +# qhasm: r3[0,1] += x23[0] unsigned* z12[0]; r3[2,3] += x23[1] unsigned* z12[
1] | |
2932 +# asm 1: vmlal.u32 <r3=reg128#5,<x23=reg128#10%bot,<z12=reg128#3%bot | |
2933 +# asm 2: vmlal.u32 <r3=q4,<x23=d18,<z12=d4 | |
2934 +vmlal.u32 q4,d18,d4 | |
2935 + | |
2936 +# qhasm: input_2
-= 64 | |
2937 +# asm 1: sub >input_2=int32#2,<input_2=int32#2,#64 | |
2938 +# asm 2: sub >input_2=r1,<input_2=r1,#64 | |
2939 +sub r1,r1,#64 | |
2940 + | |
2941 +# qhasm: r3[0,1] += x23[2] unsigned* z0[0]; r3[2,3] += x23[3] unsigned* z0[1] | |
2942 +# asm 1: vmlal.u32 <r3=reg128#5,<x23=reg128#10%top,<z0=reg128#4%bot | |
2943 +# asm 2: vmlal.u32 <r3=q4,<x23=d19,<z0=d6 | |
2944 +vmlal.u32 q4,d19,d6 | |
2945 + | |
2946 +# qhasm: ptr = &5z34_stack | |
2947 +# asm 1: lea >ptr=int32#3,<5z34_stack=stack128#11 | |
2948 +# asm 2: lea >ptr=r2,<5z34_stack=[sp,#160] | |
2949 +add r2,sp,#160 | |
2950 + | |
2951 +# qhasm: 5z34 aligned= mem128[ptr] | |
2952 +# asm 1: vld1.8 {>5z34=reg128#6%bot->5z34=reg128#6%top},[<ptr=int32#3,: 128] | |
2953 +# asm 2: vld1.8 {>5z34=d10->5z34=d11},[<ptr=r2,: 128] | |
2954 +vld1.8 {d10-d11},[r2,: 128] | |
2955 + | |
2956 +# qhasm: r3[0,1] += x4[0] unsigned* 5z34[2]; r3[2,3] += x4[1] unsigned* 5z3
4[3] | |
2957 +# asm 1: vmlal.u32 <r3=reg128#5,<x4=reg128#11%bot,<5z34=reg128#6%top | |
2958 +# asm 2: vmlal.u32 <r3=q4,<x4=d20,<5z34=d11 | |
2959 +vmlal.u32 q4,d20,d11 | |
2960 + | |
2961 +# qhasm: r0 = r0[1]r0[0]r0[3]r0[2] | |
2962 +# asm 1: vrev64.i32 >r0=reg128#8,<r0=reg128#8 | |
2963 +# asm 2: vrev64.i32 >r0=q7,<r0=q7 | |
2964 +vrev64.i32 q7,q7 | |
2965 + | |
2966 +# qhasm: r2[0,1] = c01[2]<<12; r2[2,3] = c01[3]<<12 | |
2967 +# asm 1: vshll.u32 >r2=reg128#14,<c01=reg128#1%top,#12 | |
2968 +# asm 2: vshll.u32 >r2=q13,<c01=d1,#12 | |
2969 +vshll.u32 q13,d1,#12 | |
2970 + | |
2971 +# qhasm: d01 = mem128[input_2];input_2+=16 | |
2972 +# asm 1: vld1.8 {>d01=reg128#12%bot->d01=reg128#12%top},[<input_2=int32#2]! | |
2973 +# asm 2: vld1.8 {>d01=d22->d01=d23},[<input_2=r1]! | |
2974 +vld1.8 {d22-d23},[r1]! | |
2975 + | |
2976 +# qhasm: r2[0,1] += x01[0] unsigned* z12[2]; r2[2,3] += x01[1] unsigned* z12[
3] | |
2977 +# asm 1: vmlal.u32 <r2=reg128#14,<x01=reg128#9%bot,<z12=reg128#3%top | |
2978 +# asm 2: vmlal.u32 <r2=q13,<x01=d16,<z12=d5 | |
2979 +vmlal.u32 q13,d16,d5 | |
2980 + | |
2981 +# qhasm: r2[0,1] += x01[2] unsigned* z12[0]; r2[2,3] += x01[3] unsigned* z12[
1] | |
2982 +# asm 1: vmlal.u32 <r2=reg128#14,<x01=reg128#9%top,<z12=reg128#3%bot | |
2983 +# asm 2: vmlal.u32 <r2=q13,<x01=d17,<z12=d4 | |
2984 +vmlal.u32 q13,d17,d4 | |
2985 + | |
2986 +# qhasm: r2[0,1] += x23[0] unsigned* z0[0]; r2[2,3] += x23[1] unsigned* z0[1] | |
2987 +# asm 1: vmlal.u32 <r2=reg128#14,<x23=reg128#10%bot,<z0=reg128#4%bot | |
2988 +# asm 2: vmlal.u32 <r2=q13,<x23=d18,<z0=d6 | |
2989 +vmlal.u32 q13,d18,d6 | |
2990 + | |
2991 +# qhasm: r2[0,1] += x23[2] unsigned* 5z34[2]; r2[2,3] += x23[3] unsigned* 5z3
4[3] | |
2992 +# asm 1: vmlal.u32 <r2=reg128#14,<x23=reg128#10%top,<5z34=reg128#6%top | |
2993 +# asm 2: vmlal.u32 <r2=q13,<x23=d19,<5z34=d11 | |
2994 +vmlal.u32 q13,d19,d11 | |
2995 + | |
2996 +# qhasm: r2[0,1] += x4[0] unsigned* 5z34[0]; r2[2,3] += x4[1] unsigned* 5z34[
1] | |
2997 +# asm 1: vmlal.u32 <r2=reg128#14,<x4=reg128#11%bot,<5z34=reg128#6%bot | |
2998 +# asm 2: vmlal.u32 <r2=q13,<x4=d20,<5z34=d10 | |
2999 +vmlal.u32 q13,d20,d10 | |
3000 + | |
3001 +# qhasm: r0 = r0[0,1]c01[1]r0[2] | |
3002 +# asm 1: vext.32 <r0=reg128#8%top,<c01=reg128#1%bot,<r0=reg128#8%top,#1 | |
3003 +# asm 2: vext.32 <r0=d15,<c01=d0,<r0=d15,#1 | |
3004 +vext.32 d15,d0,d15,#1 | |
3005 + | |
3006 +# qhasm: r1[0,1] = c23[0]<<6; r1[2,3] = c23[1]<<6 | |
3007 +# asm 1: vshll.u32 >r1=reg128#15,<c23=reg128#2%bot,#6 | |
3008 +# asm 2: vshll.u32 >r1=q14,<c23=d2,#6 | |
3009 +vshll.u32 q14,d2,#6 | |
3010 + | |
3011 +# qhasm: r1[0,1] += x01[0] unsigned* z12[0]; r1[2,3] += x01[1] unsigned* z12[
1] | |
3012 +# asm 1: vmlal.u32 <r1=reg128#15,<x01=reg128#9%bot,<z12=reg128#3%bot | |
3013 +# asm 2: vmlal.u32 <r1=q14,<x01=d16,<z12=d4 | |
3014 +vmlal.u32 q14,d16,d4 | |
3015 + | |
3016 +# qhasm: r1[0,1] += x01[2] unsigned* z0[0]; r1[2,3] += x01[3] unsigned* z0[1] | |
3017 +# asm 1: vmlal.u32 <r1=reg128#15,<x01=reg128#9%top,<z0=reg128#4%bot | |
3018 +# asm 2: vmlal.u32 <r1=q14,<x01=d17,<z0=d6 | |
3019 +vmlal.u32 q14,d17,d6 | |
3020 + | |
3021 +# qhasm: r1[0,1] += x23[0] unsigned* 5z34[2]; r1[2,3] += x23[1] unsigned* 5z3
4[3] | |
3022 +# asm 1: vmlal.u32 <r1=reg128#15,<x23=reg128#10%bot,<5z34=reg128#6%top | |
3023 +# asm 2: vmlal.u32 <r1=q14,<x23=d18,<5z34=d11 | |
3024 +vmlal.u32 q14,d18,d11 | |
3025 + | |
3026 +# qhasm: r1[0,1] += x23[2] unsigned* 5z34[0]; r1[2,3] += x23[3] unsigned* 5z34[
1] | |
3027 +# asm 1: vmlal.u32 <r1=reg128#15,<x23=reg128#10%top,<5z34=reg128#6%bot | |
3028 +# asm 2: vmlal.u32 <r1=q14,<x23=d19,<5z34=d10 | |
3029 +vmlal.u32 q14,d19,d10 | |
3030 + | |
3031 +# qhasm: ptr = &5z12_stack | |
3032 +# asm 1: lea >ptr=int32#3,<5z12_stack=stack128#10 | |
3033 +# asm 2: lea >ptr=r2,<5z12_stack=[sp,#144] | |
3034 +add r2,sp,#144 | |
3035 + | |
3036 +# qhasm: 5z12 aligned= mem128[ptr] | |
3037 +# asm 1: vld1.8 {>5z12=reg128#1%bot->5z12=reg128#1%top},[<ptr=int32#3,: 128] | |
3038 +# asm 2: vld1.8 {>5z12=d0->5z12=d1},[<ptr=r2,: 128] | |
3039 +vld1.8 {d0-d1},[r2,: 128] | |
3040 + | |
3041 +# qhasm: r1[0,1] += x4[0] unsigned* 5z12[2]; r1[2,3] += x4[1] unsigned* 5z12[
3] | |
3042 +# asm 1: vmlal.u32 <r1=reg128#15,<x4=reg128#11%bot,<5z12=reg128#1%top | |
3043 +# asm 2: vmlal.u32 <r1=q14,<x4=d20,<5z12=d1 | |
3044 +vmlal.u32 q14,d20,d1 | |
3045 + | |
3046 +# qhasm: d23 = mem128[input_2];input_2+=16 | |
3047 +# asm 1: vld1.8 {>d23=reg128#2%bot->d23=reg128#2%top},[<input_2=int32#2]! | |
3048 +# asm 2: vld1.8 {>d23=d2->d23=d3},[<input_2=r1]! | |
3049 +vld1.8 {d2-d3},[r1]! | |
3050 + | |
3051 +# qhasm: input_2 += 32 | |
3052 +# asm 1: add >input_2=int32#2,<input_2=int32#2,#32 | |
3053 +# asm 2: add >input_2=r1,<input_2=r1,#32 | |
3054 +add r1,r1,#32 | |
3055 + | |
3056 +# qhasm: r0[0,1] += x4[0] unsigned* 5z12[0]; r0[2,3] += x4[1] unsigned* 5z12[
1] | |
3057 +# asm 1: vmlal.u32 <r0=reg128#8,<x4=reg128#11%bot,<5z12=reg128#1%bot | |
3058 +# asm 2: vmlal.u32 <r0=q7,<x4=d20,<5z12=d0 | |
3059 +vmlal.u32 q7,d20,d0 | |
3060 + | |
3061 +# qhasm: r0[0,1] += x23[0] unsigned* 5z34[0]; r0[2,3] += x23[1] unsigned* 5z34[
1] | |
3062 +# asm 1: vmlal.u32 <r0=reg128#8,<x23=reg128#10%bot,<5z34=reg128#6%bot | |
3063 +# asm 2: vmlal.u32 <r0=q7,<x23=d18,<5z34=d10 | |
3064 +vmlal.u32 q7,d18,d10 | |
3065 + | |
3066 +# qhasm: d01 d23 = d01[0] d23[0] d01[1] d23[1] | |
3067 +# asm 1: vswp <d23=reg128#2%bot,<d01=reg128#12%top | |
3068 +# asm 2: vswp <d23=d2,<d01=d23 | |
3069 +vswp d2,d23 | |
3070 + | |
3071 +# qhasm: r0[0,1] += x23[2] unsigned* 5z12[2]; r0[2,3] += x23[3] unsigned* 5z12[
3] | |
3072 +# asm 1: vmlal.u32 <r0=reg128#8,<x23=reg128#10%top,<5z12=reg128#1%top | |
3073 +# asm 2: vmlal.u32 <r0=q7,<x23=d19,<5z12=d1 | |
3074 +vmlal.u32 q7,d19,d1 | |
3075 + | |
3076 +# qhasm: r0[0,1] += x01[0] unsigned* z0[0]; r0[2,3] += x01[1] unsigned* z0[1] | |
3077 +# asm 1: vmlal.u32 <r0=reg128#8,<x01=reg128#9%bot,<z0=reg128#4%bot | |
3078 +# asm 2: vmlal.u32 <r0=q7,<x01=d16,<z0=d6 | |
3079 +vmlal.u32 q7,d16,d6 | |
3080 + | |
3081 +# qhasm: new mid | |
3082 + | |
3083 +# qhasm: 2x v4 = d23 unsigned>> 40 | |
3084 +# asm 1: vshr.u64 >v4=reg128#4,<d23=reg128#2,#40 | |
3085 +# asm 2: vshr.u64 >v4=q3,<d23=q1,#40 | |
3086 +vshr.u64 q3,q1,#40 | |
3087 + | |
3088 +# qhasm: mid = d01[1]d23[0] mid[2,3] | |
3089 +# asm 1: vext.32 <mid=reg128#1%bot,<d01=reg128#12%bot,<d23=reg128#2%bot,#1 | |
3090 +# asm 2: vext.32 <mid=d0,<d01=d22,<d23=d2,#1 | |
3091 +vext.32 d0,d22,d2,#1 | |
3092 + | |
3093 +# qhasm: new v23 | |
3094 + | |
3095 +# qhasm: v23[2] = d23[0,1] unsigned>> 14; v23[3] = d23[2,3] unsig
ned>> 14 | |
3096 +# asm 1: vshrn.u64 <v23=reg128#10%top,<d23=reg128#2,#14 | |
3097 +# asm 2: vshrn.u64 <v23=d19,<d23=q1,#14 | |
3098 +vshrn.u64 d19,q1,#14 | |
3099 + | |
3100 +# qhasm: mid = mid[0,1] d01[3]d23[2] | |
3101 +# asm 1: vext.32 <mid=reg128#1%top,<d01=reg128#12%top,<d23=reg128#2%top,#1 | |
3102 +# asm 2: vext.32 <mid=d1,<d01=d23,<d23=d3,#1 | |
3103 +vext.32 d1,d23,d3,#1 | |
3104 + | |
3105 +# qhasm: new v01 | |
3106 + | |
3107 +# qhasm: v01[2] = d01[0,1] unsigned>> 26; v01[3] = d01[2,3] unsig
ned>> 26 | |
3108 +# asm 1: vshrn.u64 <v01=reg128#11%top,<d01=reg128#12,#26 | |
3109 +# asm 2: vshrn.u64 <v01=d21,<d01=q11,#26 | |
3110 +vshrn.u64 d21,q11,#26 | |
3111 + | |
3112 +# qhasm: v01 = d01[1]d01[0] v01[2,3] | |
3113 +# asm 1: vext.32 <v01=reg128#11%bot,<d01=reg128#12%bot,<d01=reg128#12%bot,#1 | |
3114 +# asm 2: vext.32 <v01=d20,<d01=d22,<d01=d22,#1 | |
3115 +vext.32 d20,d22,d22,#1 | |
3116 + | |
3117 +# qhasm: r0[0,1] += x01[2] unsigned* 5z34[2]; r0[2,3] += x01[3] unsigned* 5z3
4[3] | |
3118 +# asm 1: vmlal.u32 <r0=reg128#8,<x01=reg128#9%top,<5z34=reg128#6%top | |
3119 +# asm 2: vmlal.u32 <r0=q7,<x01=d17,<5z34=d11 | |
3120 +vmlal.u32 q7,d17,d11 | |
3121 + | |
3122 +# qhasm: v01 = v01[1]d01[2] v01[2,3] | |
3123 +# asm 1: vext.32 <v01=reg128#11%bot,<v01=reg128#11%bot,<d01=reg128#12%top,#1 | |
3124 +# asm 2: vext.32 <v01=d20,<v01=d20,<d01=d23,#1 | |
3125 +vext.32 d20,d20,d23,#1 | |
3126 + | |
3127 +# qhasm: v23[0] = mid[0,1] unsigned>> 20; v23[1] = mid[2,3] unsig
ned>> 20 | |
3128 +# asm 1: vshrn.u64 <v23=reg128#10%bot,<mid=reg128#1,#20 | |
3129 +# asm 2: vshrn.u64 <v23=d18,<mid=q0,#20 | |
3130 +vshrn.u64 d18,q0,#20 | |
3131 + | |
3132 +# qhasm: v4 = v4[0]v4[2]v4[1]v4[3] | |
3133 +# asm 1: vtrn.32 <v4=reg128#4%bot,<v4=reg128#4%top | |
3134 +# asm 2: vtrn.32 <v4=d6,<v4=d7 | |
3135 +vtrn.32 d6,d7 | |
3136 + | |
3137 +# qhasm: 4x v01 &= 0x03ffffff | |
3138 +# asm 1: vand.i32 <v01=reg128#11,#0x03ffffff | |
3139 +# asm 2: vand.i32 <v01=q10,#0x03ffffff | |
3140 +vand.i32 q10,#0x03ffffff | |
3141 + | |
3142 +# qhasm: ptr = &y34_stack | |
3143 +# asm 1: lea >ptr=int32#3,<y34_stack=stack128#4 | |
3144 +# asm 2: lea >ptr=r2,<y34_stack=[sp,#48] | |
3145 +add r2,sp,#48 | |
3146 + | |
3147 +# qhasm: y34 aligned= mem128[ptr] | |
3148 +# asm 1: vld1.8 {>y34=reg128#3%bot->y34=reg128#3%top},[<ptr=int32#3,: 128] | |
3149 +# asm 2: vld1.8 {>y34=d4->y34=d5},[<ptr=r2,: 128] | |
3150 +vld1.8 {d4-d5},[r2,: 128] | |
3151 + | |
3152 +# qhasm: 4x v23 &= 0x03ffffff | |
3153 +# asm 1: vand.i32 <v23=reg128#10,#0x03ffffff | |
3154 +# asm 2: vand.i32 <v23=q9,#0x03ffffff | |
3155 +vand.i32 q9,#0x03ffffff | |
3156 + | |
3157 +# qhasm: ptr = &y12_stack | |
3158 +# asm 1: lea >ptr=int32#3,<y12_stack=stack128#3 | |
3159 +# asm 2: lea >ptr=r2,<y12_stack=[sp,#32] | |
3160 +add r2,sp,#32 | |
3161 + | |
3162 +# qhasm: y12 aligned= mem128[ptr] | |
3163 +# asm 1: vld1.8 {>y12=reg128#2%bot->y12=reg128#2%top},[<ptr=int32#3,: 128] | |
3164 +# asm 2: vld1.8 {>y12=d2->y12=d3},[<ptr=r2,: 128] | |
3165 +vld1.8 {d2-d3},[r2,: 128] | |
3166 + | |
3167 +# qhasm: 4x v4 |= 0x01000000 | |
3168 +# asm 1: vorr.i32 <v4=reg128#4,#0x01000000 | |
3169 +# asm 2: vorr.i32 <v4=q3,#0x01000000 | |
3170 +vorr.i32 q3,#0x01000000 | |
3171 + | |
3172 +# qhasm: ptr = &y0_stack | |
3173 +# asm 1: lea >ptr=int32#3,<y0_stack=stack128#2 | |
3174 +# asm 2: lea >ptr=r2,<y0_stack=[sp,#16] | |
3175 +add r2,sp,#16 | |
3176 + | |
3177 +# qhasm: y0 aligned= mem128[ptr] | |
3178 +# asm 1: vld1.8 {>y0=reg128#1%bot->y0=reg128#1%top},[<ptr=int32#3,: 128] | |
3179 +# asm 2: vld1.8 {>y0=d0->y0=d1},[<ptr=r2,: 128] | |
3180 +vld1.8 {d0-d1},[r2,: 128] | |
3181 + | |
3182 +# qhasm: r4[0,1] += v01[0] unsigned* y34[2]; r4[2,3] += v01[1] unsigned* y34
[3] | |
3183 +# asm 1: vmlal.u32 <r4=reg128#16,<v01=reg128#11%bot,<y34=reg128#3%top | |
3184 +# asm 2: vmlal.u32 <r4=q15,<v01=d20,<y34=d5 | |
3185 +vmlal.u32 q15,d20,d5 | |
3186 + | |
3187 +# qhasm: r4[0,1] += v01[2] unsigned* y34[0]; r4[2,3] += v01[3] unsigned* y34[1
] | |
3188 +# asm 1: vmlal.u32 <r4=reg128#16,<v01=reg128#11%top,<y34=reg128#3%bot | |
3189 +# asm 2: vmlal.u32 <r4=q15,<v01=d21,<y34=d4 | |
3190 +vmlal.u32 q15,d21,d4 | |
3191 + | |
3192 +# qhasm: r4[0,1] += v23[0] unsigned* y12[2]; r4[2,3] += v23[1] unsigned* y12[3
] | |
3193 +# asm 1: vmlal.u32 <r4=reg128#16,<v23=reg128#10%bot,<y12=reg128#2%top | |
3194 +# asm 2: vmlal.u32 <r4=q15,<v23=d18,<y12=d3 | |
3195 +vmlal.u32 q15,d18,d3 | |
3196 + | |
3197 +# qhasm: r4[0,1] += v23[2] unsigned* y12[0]; r4[2,3] += v23[3] unsigned* y12[1
] | |
3198 +# asm 1: vmlal.u32 <r4=reg128#16,<v23=reg128#10%top,<y12=reg128#2%bot | |
3199 +# asm 2: vmlal.u32 <r4=q15,<v23=d19,<y12=d2 | |
3200 +vmlal.u32 q15,d19,d2 | |
3201 + | |
3202 +# qhasm: r4[0,1] += v4[0] unsigned* y0[0]; r4[2,3] += v4[1] unsigned* y0[1] | |
3203 +# asm 1: vmlal.u32 <r4=reg128#16,<v4=reg128#4%bot,<y0=reg128#1%bot | |
3204 +# asm 2: vmlal.u32 <r4=q15,<v4=d6,<y0=d0 | |
3205 +vmlal.u32 q15,d6,d0 | |
3206 + | |
3207 +# qhasm: ptr = &5y34_stack | |
3208 +# asm 1: lea >ptr=int32#3,<5y34_stack=stack128#6 | |
3209 +# asm 2: lea >ptr=r2,<5y34_stack=[sp,#80] | |
3210 +add r2,sp,#80 | |
3211 + | |
3212 +# qhasm: 5y34 aligned= mem128[ptr] | |
3213 +# asm 1: vld1.8 {>5y34=reg128#13%bot->5y34=reg128#13%top},[<ptr=int32#3,: 128] | |
3214 +# asm 2: vld1.8 {>5y34=d24->5y34=d25},[<ptr=r2,: 128] | |
3215 +vld1.8 {d24-d25},[r2,: 128] | |
3216 + | |
3217 +# qhasm: r3[0,1] += v01[0] unsigned* y34[0]; r3[2,3] += v01[1] unsigned* y34[
1] | |
3218 +# asm 1: vmlal.u32 <r3=reg128#5,<v01=reg128#11%bot,<y34=reg128#3%bot | |
3219 +# asm 2: vmlal.u32 <r3=q4,<v01=d20,<y34=d4 | |
3220 +vmlal.u32 q4,d20,d4 | |
3221 + | |
3222 +# qhasm: r3[0,1] += v01[2] unsigned* y12[2]; r3[2,3] += v01[3] unsigned* y12[
3] | |
3223 +# asm 1: vmlal.u32 <r3=reg128#5,<v01=reg128#11%top,<y12=reg128#2%top | |
3224 +# asm 2: vmlal.u32 <r3=q4,<v01=d21,<y12=d3 | |
3225 +vmlal.u32 q4,d21,d3 | |
3226 + | |
3227 +# qhasm: r3[0,1] += v23[0] unsigned* y12[0]; r3[2,3] += v23[1] unsigned* y12[
1] | |
3228 +# asm 1: vmlal.u32 <r3=reg128#5,<v23=reg128#10%bot,<y12=reg128#2%bot | |
3229 +# asm 2: vmlal.u32 <r3=q4,<v23=d18,<y12=d2 | |
3230 +vmlal.u32 q4,d18,d2 | |
3231 + | |
3232 +# qhasm: r3[0,1] += v23[2] unsigned* y0[0]; r3[2,3] += v23[3] unsigned* y0[1] | |
3233 +# asm 1: vmlal.u32 <r3=reg128#5,<v23=reg128#10%top,<y0=reg128#1%bot | |
3234 +# asm 2: vmlal.u32 <r3=q4,<v23=d19,<y0=d0 | |
3235 +vmlal.u32 q4,d19,d0 | |
3236 + | |
3237 +# qhasm: r3[0,1] += v4[0] unsigned* 5y34[2]; r3[2,3] += v4[1] unsigned* 5y3
4[3] | |
3238 +# asm 1: vmlal.u32 <r3=reg128#5,<v4=reg128#4%bot,<5y34=reg128#13%top | |
3239 +# asm 2: vmlal.u32 <r3=q4,<v4=d6,<5y34=d25 | |
3240 +vmlal.u32 q4,d6,d25 | |
3241 + | |
3242 +# qhasm: ptr = &5y12_stack | |
3243 +# asm 1: lea >ptr=int32#3,<5y12_stack=stack128#5 | |
3244 +# asm 2: lea >ptr=r2,<5y12_stack=[sp,#64] | |
3245 +add r2,sp,#64 | |
3246 + | |
3247 +# qhasm: 5y12 aligned= mem128[ptr] | |
3248 +# asm 1: vld1.8 {>5y12=reg128#12%bot->5y12=reg128#12%top},[<ptr=int32#3,: 128] | |
3249 +# asm 2: vld1.8 {>5y12=d22->5y12=d23},[<ptr=r2,: 128] | |
3250 +vld1.8 {d22-d23},[r2,: 128] | |
3251 + | |
3252 +# qhasm: r0[0,1] += v4[0] unsigned* 5y12[0]; r0[2,3] += v4[1] unsigned* 5y12[
1] | |
3253 +# asm 1: vmlal.u32 <r0=reg128#8,<v4=reg128#4%bot,<5y12=reg128#12%bot | |
3254 +# asm 2: vmlal.u32 <r0=q7,<v4=d6,<5y12=d22 | |
3255 +vmlal.u32 q7,d6,d22 | |
3256 + | |
3257 +# qhasm: r0[0,1] += v23[0] unsigned* 5y34[0]; r0[2,3] += v23[1] unsigned* 5y34[
1] | |
3258 +# asm 1: vmlal.u32 <r0=reg128#8,<v23=reg128#10%bot,<5y34=reg128#13%bot | |
3259 +# asm 2: vmlal.u32 <r0=q7,<v23=d18,<5y34=d24 | |
3260 +vmlal.u32 q7,d18,d24 | |
3261 + | |
3262 +# qhasm: r0[0,1] += v23[2] unsigned* 5y12[2]; r0[2,3] += v23[3] unsigned* 5y12[
3] | |
3263 +# asm 1: vmlal.u32 <r0=reg128#8,<v23=reg128#10%top,<5y12=reg128#12%top | |
3264 +# asm 2: vmlal.u32 <r0=q7,<v23=d19,<5y12=d23 | |
3265 +vmlal.u32 q7,d19,d23 | |
3266 + | |
3267 +# qhasm: r0[0,1] += v01[0] unsigned* y0[0]; r0[2,3] += v01[1] unsigned* y0[1] | |
3268 +# asm 1: vmlal.u32 <r0=reg128#8,<v01=reg128#11%bot,<y0=reg128#1%bot | |
3269 +# asm 2: vmlal.u32 <r0=q7,<v01=d20,<y0=d0 | |
3270 +vmlal.u32 q7,d20,d0 | |
3271 + | |
3272 +# qhasm: r0[0,1] += v01[2] unsigned* 5y34[2]; r0[2,3] += v01[3] unsigned* 5y3
4[3] | |
3273 +# asm 1: vmlal.u32 <r0=reg128#8,<v01=reg128#11%top,<5y34=reg128#13%top | |
3274 +# asm 2: vmlal.u32 <r0=q7,<v01=d21,<5y34=d25 | |
3275 +vmlal.u32 q7,d21,d25 | |
3276 + | |
3277 +# qhasm: r1[0,1] += v01[0] unsigned* y12[0]; r1[2,3] += v01[1] unsigned* y12[
1] | |
3278 +# asm 1: vmlal.u32 <r1=reg128#15,<v01=reg128#11%bot,<y12=reg128#2%bot | |
3279 +# asm 2: vmlal.u32 <r1=q14,<v01=d20,<y12=d2 | |
3280 +vmlal.u32 q14,d20,d2 | |
3281 + | |
3282 +# qhasm: r1[0,1] += v01[2] unsigned* y0[0]; r1[2,3] += v01[3] unsigned* y0[1] | |
3283 +# asm 1: vmlal.u32 <r1=reg128#15,<v01=reg128#11%top,<y0=reg128#1%bot | |
3284 +# asm 2: vmlal.u32 <r1=q14,<v01=d21,<y0=d0 | |
3285 +vmlal.u32 q14,d21,d0 | |
3286 + | |
3287 +# qhasm: r1[0,1] += v23[0] unsigned* 5y34[2]; r1[2,3] += v23[1] unsigned* 5y3
4[3] | |
3288 +# asm 1: vmlal.u32 <r1=reg128#15,<v23=reg128#10%bot,<5y34=reg128#13%top | |
3289 +# asm 2: vmlal.u32 <r1=q14,<v23=d18,<5y34=d25 | |
3290 +vmlal.u32 q14,d18,d25 | |
3291 + | |
3292 +# qhasm: r1[0,1] += v23[2] unsigned* 5y34[0]; r1[2,3] += v23[3] unsigned* 5y34[
1] | |
3293 +# asm 1: vmlal.u32 <r1=reg128#15,<v23=reg128#10%top,<5y34=reg128#13%bot | |
3294 +# asm 2: vmlal.u32 <r1=q14,<v23=d19,<5y34=d24 | |
3295 +vmlal.u32 q14,d19,d24 | |
3296 + | |
3297 +# qhasm: r1[0,1] += v4[0] unsigned* 5y12[2]; r1[2,3] += v4[1] unsigned* 5y12[
3] | |
3298 +# asm 1: vmlal.u32 <r1=reg128#15,<v4=reg128#4%bot,<5y12=reg128#12%top | |
3299 +# asm 2: vmlal.u32 <r1=q14,<v4=d6,<5y12=d23 | |
3300 +vmlal.u32 q14,d6,d23 | |
3301 + | |
3302 +# qhasm: r2[0,1] += v01[0] unsigned* y12[2]; r2[2,3] += v01[1] unsigned* y12[
3] | |
3303 +# asm 1: vmlal.u32 <r2=reg128#14,<v01=reg128#11%bot,<y12=reg128#2%top | |
3304 +# asm 2: vmlal.u32 <r2=q13,<v01=d20,<y12=d3 | |
3305 +vmlal.u32 q13,d20,d3 | |
3306 + | |
3307 +# qhasm: r2[0,1] += v01[2] unsigned* y12[0]; r2[2,3] += v01[3] unsigned* y12[
1] | |
3308 +# asm 1: vmlal.u32 <r2=reg128#14,<v01=reg128#11%top,<y12=reg128#2%bot | |
3309 +# asm 2: vmlal.u32 <r2=q13,<v01=d21,<y12=d2 | |
3310 +vmlal.u32 q13,d21,d2 | |
3311 + | |
3312 +# qhasm: r2[0,1] += v23[0] unsigned* y0[0]; r2[2,3] += v23[1] unsigned* y0[1] | |
3313 +# asm 1: vmlal.u32 <r2=reg128#14,<v23=reg128#10%bot,<y0=reg128#1%bot | |
3314 +# asm 2: vmlal.u32 <r2=q13,<v23=d18,<y0=d0 | |
3315 +vmlal.u32 q13,d18,d0 | |
3316 + | |
3317 +# qhasm: r2[0,1] += v23[2] unsigned* 5y34[2]; r2[2,3] += v23[3] unsigned* 5y3
4[3] | |
3318 +# asm 1: vmlal.u32 <r2=reg128#14,<v23=reg128#10%top,<5y34=reg128#13%top | |
3319 +# asm 2: vmlal.u32 <r2=q13,<v23=d19,<5y34=d25 | |
3320 +vmlal.u32 q13,d19,d25 | |
3321 + | |
3322 +# qhasm: r2[0,1] += v4[0] unsigned* 5y34[0]; r2[2,3] += v4[1] unsigned* 5y34[
1] | |
3323 +# asm 1: vmlal.u32 <r2=reg128#14,<v4=reg128#4%bot,<5y34=reg128#13%bot | |
3324 +# asm 2: vmlal.u32 <r2=q13,<v4=d6,<5y34=d24 | |
3325 +vmlal.u32 q13,d6,d24 | |
3326 + | |
3327 +# qhasm: ptr = &two24 | |
3328 +# asm 1: lea >ptr=int32#3,<two24=stack128#1 | |
3329 +# asm 2: lea >ptr=r2,<two24=[sp,#0] | |
3330 +add r2,sp,#0 | |
3331 + | |
3332 +# qhasm: 2x t1 = r0 unsigned>> 26 | |
3333 +# asm 1: vshr.u64 >t1=reg128#4,<r0=reg128#8,#26 | |
3334 +# asm 2: vshr.u64 >t1=q3,<r0=q7,#26 | |
3335 +vshr.u64 q3,q7,#26 | |
3336 + | |
3337 +# qhasm: len -= 64 | |
3338 +# asm 1: sub >len=int32#4,<len=int32#4,#64 | |
3339 +# asm 2: sub >len=r3,<len=r3,#64 | |
3340 +sub r3,r3,#64 | |
3341 + | |
3342 +# qhasm: r0 &= mask | |
3343 +# asm 1: vand >r0=reg128#6,<r0=reg128#8,<mask=reg128#7 | |
3344 +# asm 2: vand >r0=q5,<r0=q7,<mask=q6 | |
3345 +vand q5,q7,q6 | |
3346 + | |
3347 +# qhasm: 2x r1 += t1 | |
3348 +# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#15,<t1=reg128#4 | |
3349 +# asm 2: vadd.i64 >r1=q3,<r1=q14,<t1=q3 | |
3350 +vadd.i64 q3,q14,q3 | |
3351 + | |
3352 +# qhasm: 2x t4 = r3 unsigned>> 26 | |
3353 +# asm 1: vshr.u64 >t4=reg128#8,<r3=reg128#5,#26 | |
3354 +# asm 2: vshr.u64 >t4=q7,<r3=q4,#26 | |
3355 +vshr.u64 q7,q4,#26 | |
3356 + | |
3357 +# qhasm: r3 &= mask | |
3358 +# asm 1: vand >r3=reg128#5,<r3=reg128#5,<mask=reg128#7 | |
3359 +# asm 2: vand >r3=q4,<r3=q4,<mask=q6 | |
3360 +vand q4,q4,q6 | |
3361 + | |
3362 +# qhasm: 2x x4 = r4 + t4 | |
3363 +# asm 1: vadd.i64 >x4=reg128#8,<r4=reg128#16,<t4=reg128#8 | |
3364 +# asm 2: vadd.i64 >x4=q7,<r4=q15,<t4=q7 | |
3365 +vadd.i64 q7,q15,q7 | |
3366 + | |
3367 +# qhasm: r4 aligned= mem128[ptr] | |
3368 +# asm 1: vld1.8 {>r4=reg128#16%bot->r4=reg128#16%top},[<ptr=int32#3,: 128] | |
3369 +# asm 2: vld1.8 {>r4=d30->r4=d31},[<ptr=r2,: 128] | |
3370 +vld1.8 {d30-d31},[r2,: 128] | |
3371 + | |
3372 +# qhasm: 2x t2 = r1 unsigned>> 26 | |
3373 +# asm 1: vshr.u64 >t2=reg128#9,<r1=reg128#4,#26 | |
3374 +# asm 2: vshr.u64 >t2=q8,<r1=q3,#26 | |
3375 +vshr.u64 q8,q3,#26 | |
3376 + | |
3377 +# qhasm: r1 &= mask | |
3378 +# asm 1: vand >r1=reg128#4,<r1=reg128#4,<mask=reg128#7 | |
3379 +# asm 2: vand >r1=q3,<r1=q3,<mask=q6 | |
3380 +vand q3,q3,q6 | |
3381 + | |
3382 +# qhasm: 2x t0 = x4 unsigned>> 26 | |
3383 +# asm 1: vshr.u64 >t0=reg128#10,<x4=reg128#8,#26 | |
3384 +# asm 2: vshr.u64 >t0=q9,<x4=q7,#26 | |
3385 +vshr.u64 q9,q7,#26 | |
3386 + | |
3387 +# qhasm: 2x r2 += t2 | |
3388 +# asm 1: vadd.i64 >r2=reg128#9,<r2=reg128#14,<t2=reg128#9 | |
3389 +# asm 2: vadd.i64 >r2=q8,<r2=q13,<t2=q8 | |
3390 +vadd.i64 q8,q13,q8 | |
3391 + | |
3392 +# qhasm: x4 &= mask | |
3393 +# asm 1: vand >x4=reg128#11,<x4=reg128#8,<mask=reg128#7 | |
3394 +# asm 2: vand >x4=q10,<x4=q7,<mask=q6 | |
3395 +vand q10,q7,q6 | |
3396 + | |
3397 +# qhasm: 2x x01 = r0 + t0 | |
3398 +# asm 1: vadd.i64 >x01=reg128#6,<r0=reg128#6,<t0=reg128#10 | |
3399 +# asm 2: vadd.i64 >x01=q5,<r0=q5,<t0=q9 | |
3400 +vadd.i64 q5,q5,q9 | |
3401 + | |
3402 +# qhasm: r0 aligned= mem128[ptr] | |
3403 +# asm 1: vld1.8 {>r0=reg128#8%bot->r0=reg128#8%top},[<ptr=int32#3,: 128] | |
3404 +# asm 2: vld1.8 {>r0=d14->r0=d15},[<ptr=r2,: 128] | |
3405 +vld1.8 {d14-d15},[r2,: 128] | |
3406 + | |
3407 +# qhasm: ptr = &z34_stack | |
3408 +# asm 1: lea >ptr=int32#3,<z34_stack=stack128#9 | |
3409 +# asm 2: lea >ptr=r2,<z34_stack=[sp,#128] | |
3410 +add r2,sp,#128 | |
3411 + | |
3412 +# qhasm: 2x t0 <<= 2 | |
3413 +# asm 1: vshl.i64 >t0=reg128#10,<t0=reg128#10,#2 | |
3414 +# asm 2: vshl.i64 >t0=q9,<t0=q9,#2 | |
3415 +vshl.i64 q9,q9,#2 | |
3416 + | |
3417 +# qhasm: 2x t3 = r2 unsigned>> 26 | |
3418 +# asm 1: vshr.u64 >t3=reg128#14,<r2=reg128#9,#26 | |
3419 +# asm 2: vshr.u64 >t3=q13,<r2=q8,#26 | |
3420 +vshr.u64 q13,q8,#26 | |
3421 + | |
3422 +# qhasm: 2x x01 += t0 | |
3423 +# asm 1: vadd.i64 >x01=reg128#15,<x01=reg128#6,<t0=reg128#10 | |
3424 +# asm 2: vadd.i64 >x01=q14,<x01=q5,<t0=q9 | |
3425 +vadd.i64 q14,q5,q9 | |
3426 + | |
3427 +# qhasm: z34 aligned= mem128[ptr] | |
3428 +# asm 1: vld1.8 {>z34=reg128#6%bot->z34=reg128#6%top},[<ptr=int32#3,: 128] | |
3429 +# asm 2: vld1.8 {>z34=d10->z34=d11},[<ptr=r2,: 128] | |
3430 +vld1.8 {d10-d11},[r2,: 128] | |
3431 + | |
3432 +# qhasm: x23 = r2 & mask | |
3433 +# asm 1: vand >x23=reg128#10,<r2=reg128#9,<mask=reg128#7 | |
3434 +# asm 2: vand >x23=q9,<r2=q8,<mask=q6 | |
3435 +vand q9,q8,q6 | |
3436 + | |
3437 +# qhasm: 2x r3 += t3 | |
3438 +# asm 1: vadd.i64 >r3=reg128#5,<r3=reg128#5,<t3=reg128#14 | |
3439 +# asm 2: vadd.i64 >r3=q4,<r3=q4,<t3=q13 | |
3440 +vadd.i64 q4,q4,q13 | |
3441 + | |
3442 +# qhasm: input_2
+= 32 | |
3443 +# asm 1: add >input_2=int32#2,<input_2=int32#2,#32 | |
3444 +# asm 2: add >input_2=r1,<input_2=r1,#32 | |
3445 +add r1,r1,#32 | |
3446 + | |
3447 +# qhasm: 2x t1 = x01 unsigned>> 26 | |
3448 +# asm 1: vshr.u64 >t1=reg128#14,<x01=reg128#15,#26 | |
3449 +# asm 2: vshr.u64 >t1=q13,<x01=q14,#26 | |
3450 +vshr.u64 q13,q14,#26 | |
3451 + | |
3452 +# qhasm: x23 = x23[0,2,1,3] | |
3453 +# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top | |
3454 +# asm 2: vtrn.32 <x23=d18,<x23=d19 | |
3455 +vtrn.32 d18,d19 | |
3456 + | |
3457 +# qhasm: x01 = x01 & mask | |
3458 +# asm 1: vand >x01=reg128#9,<x01=reg128#15,<mask=reg128#7 | |
3459 +# asm 2: vand >x01=q8,<x01=q14,<mask=q6 | |
3460 +vand q8,q14,q6 | |
3461 + | |
3462 +# qhasm: 2x r1 += t1 | |
3463 +# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#4,<t1=reg128#14 | |
3464 +# asm 2: vadd.i64 >r1=q3,<r1=q3,<t1=q13 | |
3465 +vadd.i64 q3,q3,q13 | |
3466 + | |
3467 +# qhasm: 2x t4 = r3 unsigned>> 26 | |
3468 +# asm 1: vshr.u64 >t4=reg128#14,<r3=reg128#5,#26 | |
3469 +# asm 2: vshr.u64 >t4=q13,<r3=q4,#26 | |
3470 +vshr.u64 q13,q4,#26 | |
3471 + | |
3472 +# qhasm: x01 = x01[0,2,1,3] | |
3473 +# asm 1: vtrn.32 <x01=reg128#9%bot,<x01=reg128#9%top | |
3474 +# asm 2: vtrn.32 <x01=d16,<x01=d17 | |
3475 +vtrn.32 d16,d17 | |
3476 + | |
3477 +# qhasm: r3 &= mask | |
3478 +# asm 1: vand >r3=reg128#5,<r3=reg128#5,<mask=reg128#7 | |
3479 +# asm 2: vand >r3=q4,<r3=q4,<mask=q6 | |
3480 +vand q4,q4,q6 | |
3481 + | |
3482 +# qhasm: r1 = r1[0,2,1,3] | |
3483 +# asm 1: vtrn.32 <r1=reg128#4%bot,<r1=reg128#4%top | |
3484 +# asm 2: vtrn.32 <r1=d6,<r1=d7 | |
3485 +vtrn.32 d6,d7 | |
3486 + | |
3487 +# qhasm: 2x x4 += t4 | |
3488 +# asm 1: vadd.i64 >x4=reg128#11,<x4=reg128#11,<t4=reg128#14 | |
3489 +# asm 2: vadd.i64 >x4=q10,<x4=q10,<t4=q13 | |
3490 +vadd.i64 q10,q10,q13 | |
3491 + | |
3492 +# qhasm: r3 = r3[0,2,1,3] | |
3493 +# asm 1: vtrn.32 <r3=reg128#5%bot,<r3=reg128#5%top | |
3494 +# asm 2: vtrn.32 <r3=d8,<r3=d9 | |
3495 +vtrn.32 d8,d9 | |
3496 + | |
3497 +# qhasm: x01 = x01[0,1] r1[0,1] | |
3498 +# asm 1: vext.32 <x01=reg128#9%top,<r1=reg128#4%bot,<r1=reg128#4%bot,#0 | |
3499 +# asm 2: vext.32 <x01=d17,<r1=d6,<r1=d6,#0 | |
3500 +vext.32 d17,d6,d6,#0 | |
3501 + | |
3502 +# qhasm: x23 = x23[0,1] r3[0,1] | |
3503 +# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#5%bot,<r3=reg128#5%bot,#0 | |
3504 +# asm 2: vext.32 <x23=d19,<r3=d8,<r3=d8,#0 | |
3505 +vext.32 d19,d8,d8,#0 | |
3506 + | |
3507 +# qhasm: x4 = x4[0,2,1,3] | |
3508 +# asm 1: vtrn.32 <x4=reg128#11%bot,<x4=reg128#11%top | |
3509 +# asm 2: vtrn.32 <x4=d20,<x4=d21 | |
3510 +vtrn.32 d20,d21 | |
3511 + | |
3512 +# qhasm: unsigned>? len - 64 | |
3513 +# asm 1: cmp <len=int32#4,#64 | |
3514 +# asm 2: cmp <len=r3,#64 | |
3515 +cmp r3,#64 | |
3516 + | |
3517 +# qhasm: goto mainloop2 if unsigned> | |
3518 +bhi ._mainloop2 | |
3519 + | |
3520 +# qhasm: input_2 -= 32 | |
3521 +# asm 1: sub >input_2=int32#3,<input_2=int32#2,#32 | |
3522 +# asm 2: sub >input_2=r2,<input_2=r1,#32 | |
3523 +sub r2,r1,#32 | |
3524 + | |
3525 +# qhasm: below64bytes: | |
3526 +._below64bytes: | |
3527 + | |
3528 +# qhasm: unsigned>? len - 32 | |
3529 +# asm 1: cmp <len=int32#4,#32 | |
3530 +# asm 2: cmp <len=r3,#32 | |
3531 +cmp r3,#32 | |
3532 + | |
3533 +# qhasm: goto end if !unsigned> | |
3534 +bls ._end | |
3535 + | |
3536 +# qhasm: mainloop: | |
3537 +._mainloop: | |
3538 + | |
3539 +# qhasm: new r0 | |
3540 + | |
3541 +# qhasm: ptr = &two24 | |
3542 +# asm 1: lea >ptr=int32#2,<two24=stack128#1 | |
3543 +# asm 2: lea >ptr=r1,<two24=[sp,#0] | |
3544 +add r1,sp,#0 | |
3545 + | |
3546 +# qhasm: r4 aligned= mem128[ptr] | |
3547 +# asm 1: vld1.8 {>r4=reg128#5%bot->r4=reg128#5%top},[<ptr=int32#2,: 128] | |
3548 +# asm 2: vld1.8 {>r4=d8->r4=d9},[<ptr=r1,: 128] | |
3549 +vld1.8 {d8-d9},[r1,: 128] | |
3550 + | |
3551 +# qhasm: u4 aligned= mem128[ptr] | |
3552 +# asm 1: vld1.8 {>u4=reg128#6%bot->u4=reg128#6%top},[<ptr=int32#2,: 128] | |
3553 +# asm 2: vld1.8 {>u4=d10->u4=d11},[<ptr=r1,: 128] | |
3554 +vld1.8 {d10-d11},[r1,: 128] | |
3555 + | |
3556 +# qhasm: c01 = mem128[input_2];input_2+=16 | |
3557 +# asm 1: vld1.8 {>c01=reg128#8%bot->c01=reg128#8%top},[<input_2=int32#3]! | |
3558 +# asm 2: vld1.8 {>c01=d14->c01=d15},[<input_2=r2]! | |
3559 +vld1.8 {d14-d15},[r2]! | |
3560 + | |
3561 +# qhasm: r4[0,1] += x01[0] unsigned* y34[2]; r4[2,3] += x01[1] unsigned* y34
[3] | |
3562 +# asm 1: vmlal.u32 <r4=reg128#5,<x01=reg128#9%bot,<y34=reg128#3%top | |
3563 +# asm 2: vmlal.u32 <r4=q4,<x01=d16,<y34=d5 | |
3564 +vmlal.u32 q4,d16,d5 | |
3565 + | |
3566 +# qhasm: c23 = mem128[input_2];input_2+=16 | |
3567 +# asm 1: vld1.8 {>c23=reg128#14%bot->c23=reg128#14%top},[<input_2=int32#3]! | |
3568 +# asm 2: vld1.8 {>c23=d26->c23=d27},[<input_2=r2]! | |
3569 +vld1.8 {d26-d27},[r2]! | |
3570 + | |
3571 +# qhasm: r4[0,1] += x01[2] unsigned* y34[0]; r4[2,3] += x01[3] unsigned* y34[1
] | |
3572 +# asm 1: vmlal.u32 <r4=reg128#5,<x01=reg128#9%top,<y34=reg128#3%bot | |
3573 +# asm 2: vmlal.u32 <r4=q4,<x01=d17,<y34=d4 | |
3574 +vmlal.u32 q4,d17,d4 | |
3575 + | |
3576 +# qhasm: r0 = u4[1]c01[0]r0[2,3] | |
3577 +# asm 1: vext.32 <r0=reg128#4%bot,<u4=reg128#6%bot,<c01=reg128#8%bot,#1 | |
3578 +# asm 2: vext.32 <r0=d6,<u4=d10,<c01=d14,#1 | |
3579 +vext.32 d6,d10,d14,#1 | |
3580 + | |
3581 +# qhasm: r4[0,1] += x23[0] unsigned* y12[2]; r4[2,3] += x23[1] unsigned* y12[3
] | |
3582 +# asm 1: vmlal.u32 <r4=reg128#5,<x23=reg128#10%bot,<y12=reg128#2%top | |
3583 +# asm 2: vmlal.u32 <r4=q4,<x23=d18,<y12=d3 | |
3584 +vmlal.u32 q4,d18,d3 | |
3585 + | |
3586 +# qhasm: r0 = r0[0,1]u4[1]c23[0] | |
3587 +# asm 1: vext.32 <r0=reg128#4%top,<u4=reg128#6%bot,<c23=reg128#14%bot,#1 | |
3588 +# asm 2: vext.32 <r0=d7,<u4=d10,<c23=d26,#1 | |
3589 +vext.32 d7,d10,d26,#1 | |
3590 + | |
3591 +# qhasm: r4[0,1] += x23[2] unsigned* y12[0]; r4[2,3] += x23[3] unsigned* y12[1
] | |
3592 +# asm 1: vmlal.u32 <r4=reg128#5,<x23=reg128#10%top,<y12=reg128#2%bot | |
3593 +# asm 2: vmlal.u32 <r4=q4,<x23=d19,<y12=d2 | |
3594 +vmlal.u32 q4,d19,d2 | |
3595 + | |
3596 +# qhasm: r0 = r0[1]r0[0]r0[3]r0[2] | |
3597 +# asm 1: vrev64.i32 >r0=reg128#4,<r0=reg128#4 | |
3598 +# asm 2: vrev64.i32 >r0=q3,<r0=q3 | |
3599 +vrev64.i32 q3,q3 | |
3600 + | |
3601 +# qhasm: r4[0,1] += x4[0] unsigned* y0[0]; r4[2,3] += x4[1] unsigned* y0[1] | |
3602 +# asm 1: vmlal.u32 <r4=reg128#5,<x4=reg128#11%bot,<y0=reg128#1%bot | |
3603 +# asm 2: vmlal.u32 <r4=q4,<x4=d20,<y0=d0 | |
3604 +vmlal.u32 q4,d20,d0 | |
3605 + | |
3606 +# qhasm: r0[0,1] += x4[0] unsigned* 5y12[0]; r0[2,3] += x4[1] unsigned* 5y12[
1] | |
3607 +# asm 1: vmlal.u32 <r0=reg128#4,<x4=reg128#11%bot,<5y12=reg128#12%bot | |
3608 +# asm 2: vmlal.u32 <r0=q3,<x4=d20,<5y12=d22 | |
3609 +vmlal.u32 q3,d20,d22 | |
3610 + | |
3611 +# qhasm: r0[0,1] += x23[0] unsigned* 5y34[0]; r0[2,3] += x23[1] unsigned* 5y34[
1] | |
3612 +# asm 1: vmlal.u32 <r0=reg128#4,<x23=reg128#10%bot,<5y34=reg128#13%bot | |
3613 +# asm 2: vmlal.u32 <r0=q3,<x23=d18,<5y34=d24 | |
3614 +vmlal.u32 q3,d18,d24 | |
3615 + | |
3616 +# qhasm: r0[0,1] += x23[2] unsigned* 5y12[2]; r0[2,3] += x23[3] unsigned* 5y12[
3] | |
3617 +# asm 1: vmlal.u32 <r0=reg128#4,<x23=reg128#10%top,<5y12=reg128#12%top | |
3618 +# asm 2: vmlal.u32 <r0=q3,<x23=d19,<5y12=d23 | |
3619 +vmlal.u32 q3,d19,d23 | |
3620 + | |
3621 +# qhasm: c01 c23 = c01[0]c23[0]c01[2]c23[2]c01[1]c23[1]c01[3]c23[3] | |
3622 +# asm 1: vtrn.32 <c01=reg128#8,<c23=reg128#14 | |
3623 +# asm 2: vtrn.32 <c01=q7,<c23=q13 | |
3624 +vtrn.32 q7,q13 | |
3625 + | |
3626 +# qhasm: r0[0,1] += x01[0] unsigned* y0[0]; r0[2,3] += x01[1] unsigned* y0[1] | |
3627 +# asm 1: vmlal.u32 <r0=reg128#4,<x01=reg128#9%bot,<y0=reg128#1%bot | |
3628 +# asm 2: vmlal.u32 <r0=q3,<x01=d16,<y0=d0 | |
3629 +vmlal.u32 q3,d16,d0 | |
3630 + | |
3631 +# qhasm: r3[0,1] = c23[2]<<18; r3[2,3] = c23[3]<<18 | |
3632 +# asm 1: vshll.u32 >r3=reg128#6,<c23=reg128#14%top,#18 | |
3633 +# asm 2: vshll.u32 >r3=q5,<c23=d27,#18 | |
3634 +vshll.u32 q5,d27,#18 | |
3635 + | |
3636 +# qhasm: r0[0,1] += x01[2] unsigned* 5y34[2]; r0[2,3] += x01[3] unsigned* 5y3
4[3] | |
3637 +# asm 1: vmlal.u32 <r0=reg128#4,<x01=reg128#9%top,<5y34=reg128#13%top | |
3638 +# asm 2: vmlal.u32 <r0=q3,<x01=d17,<5y34=d25 | |
3639 +vmlal.u32 q3,d17,d25 | |
3640 + | |
3641 +# qhasm: r3[0,1] += x01[0] unsigned* y34[0]; r3[2,3] += x01[1] unsigned* y34[
1] | |
3642 +# asm 1: vmlal.u32 <r3=reg128#6,<x01=reg128#9%bot,<y34=reg128#3%bot | |
3643 +# asm 2: vmlal.u32 <r3=q5,<x01=d16,<y34=d4 | |
3644 +vmlal.u32 q5,d16,d4 | |
3645 + | |
3646 +# qhasm: r3[0,1] += x01[2] unsigned* y12[2]; r3[2,3] += x01[3] unsigned* y12[
3] | |
3647 +# asm 1: vmlal.u32 <r3=reg128#6,<x01=reg128#9%top,<y12=reg128#2%top | |
3648 +# asm 2: vmlal.u32 <r3=q5,<x01=d17,<y12=d3 | |
3649 +vmlal.u32 q5,d17,d3 | |
3650 + | |
3651 +# qhasm: r3[0,1] += x23[0] unsigned* y12[0]; r3[2,3] += x23[1] unsigned* y12[
1] | |
3652 +# asm 1: vmlal.u32 <r3=reg128#6,<x23=reg128#10%bot,<y12=reg128#2%bot | |
3653 +# asm 2: vmlal.u32 <r3=q5,<x23=d18,<y12=d2 | |
3654 +vmlal.u32 q5,d18,d2 | |
3655 + | |
3656 +# qhasm: r3[0,1] += x23[2] unsigned* y0[0]; r3[2,3] += x23[3] unsigned* y0[1] | |
3657 +# asm 1: vmlal.u32 <r3=reg128#6,<x23=reg128#10%top,<y0=reg128#1%bot | |
3658 +# asm 2: vmlal.u32 <r3=q5,<x23=d19,<y0=d0 | |
3659 +vmlal.u32 q5,d19,d0 | |
3660 + | |
3661 +# qhasm: r1[0,1] = c23[0]<<6; r1[2,3] = c23[1]<<6 | |
3662 +# asm 1: vshll.u32 >r1=reg128#14,<c23=reg128#14%bot,#6 | |
3663 +# asm 2: vshll.u32 >r1=q13,<c23=d26,#6 | |
3664 +vshll.u32 q13,d26,#6 | |
3665 + | |
3666 +# qhasm: r3[0,1] += x4[0] unsigned* 5y34[2]; r3[2,3] += x4[1] unsigned* 5y3
4[3] | |
3667 +# asm 1: vmlal.u32 <r3=reg128#6,<x4=reg128#11%bot,<5y34=reg128#13%top | |
3668 +# asm 2: vmlal.u32 <r3=q5,<x4=d20,<5y34=d25 | |
3669 +vmlal.u32 q5,d20,d25 | |
3670 + | |
3671 +# qhasm: r1[0,1] += x01[0] unsigned* y12[0]; r1[2,3] += x01[1] unsigned* y12[
1] | |
3672 +# asm 1: vmlal.u32 <r1=reg128#14,<x01=reg128#9%bot,<y12=reg128#2%bot | |
3673 +# asm 2: vmlal.u32 <r1=q13,<x01=d16,<y12=d2 | |
3674 +vmlal.u32 q13,d16,d2 | |
3675 + | |
3676 +# qhasm: r1[0,1] += x01[2] unsigned* y0[0]; r1[2,3] += x01[3] unsigned* y0[1] | |
3677 +# asm 1: vmlal.u32 <r1=reg128#14,<x01=reg128#9%top,<y0=reg128#1%bot | |
3678 +# asm 2: vmlal.u32 <r1=q13,<x01=d17,<y0=d0 | |
3679 +vmlal.u32 q13,d17,d0 | |
3680 + | |
3681 +# qhasm: r1[0,1] += x23[0] unsigned* 5y34[2]; r1[2,3] += x23[1] unsigned* 5y3
4[3] | |
3682 +# asm 1: vmlal.u32 <r1=reg128#14,<x23=reg128#10%bot,<5y34=reg128#13%top | |
3683 +# asm 2: vmlal.u32 <r1=q13,<x23=d18,<5y34=d25 | |
3684 +vmlal.u32 q13,d18,d25 | |
3685 + | |
3686 +# qhasm: r1[0,1] += x23[2] unsigned* 5y34[0]; r1[2,3] += x23[3] unsigned* 5y34[
1] | |
3687 +# asm 1: vmlal.u32 <r1=reg128#14,<x23=reg128#10%top,<5y34=reg128#13%bot | |
3688 +# asm 2: vmlal.u32 <r1=q13,<x23=d19,<5y34=d24 | |
3689 +vmlal.u32 q13,d19,d24 | |
3690 + | |
3691 +# qhasm: r2[0,1] = c01[2]<<12; r2[2,3] = c01[3]<<12 | |
3692 +# asm 1: vshll.u32 >r2=reg128#8,<c01=reg128#8%top,#12 | |
3693 +# asm 2: vshll.u32 >r2=q7,<c01=d15,#12 | |
3694 +vshll.u32 q7,d15,#12 | |
3695 + | |
3696 +# qhasm: r1[0,1] += x4[0] unsigned* 5y12[2]; r1[2,3] += x4[1] unsigned* 5y12[
3] | |
3697 +# asm 1: vmlal.u32 <r1=reg128#14,<x4=reg128#11%bot,<5y12=reg128#12%top | |
3698 +# asm 2: vmlal.u32 <r1=q13,<x4=d20,<5y12=d23 | |
3699 +vmlal.u32 q13,d20,d23 | |
3700 + | |
3701 +# qhasm: r2[0,1] += x01[0] unsigned* y12[2]; r2[2,3] += x01[1] unsigned* y12[
3] | |
3702 +# asm 1: vmlal.u32 <r2=reg128#8,<x01=reg128#9%bot,<y12=reg128#2%top | |
3703 +# asm 2: vmlal.u32 <r2=q7,<x01=d16,<y12=d3 | |
3704 +vmlal.u32 q7,d16,d3 | |
3705 + | |
3706 +# qhasm: r2[0,1] += x01[2] unsigned* y12[0]; r2[2,3] += x01[3] unsigned* y12[
1] | |
3707 +# asm 1: vmlal.u32 <r2=reg128#8,<x01=reg128#9%top,<y12=reg128#2%bot | |
3708 +# asm 2: vmlal.u32 <r2=q7,<x01=d17,<y12=d2 | |
3709 +vmlal.u32 q7,d17,d2 | |
3710 + | |
3711 +# qhasm: r2[0,1] += x23[0] unsigned* y0[0]; r2[2,3] += x23[1] unsigned* y0[1] | |
3712 +# asm 1: vmlal.u32 <r2=reg128#8,<x23=reg128#10%bot,<y0=reg128#1%bot | |
3713 +# asm 2: vmlal.u32 <r2=q7,<x23=d18,<y0=d0 | |
3714 +vmlal.u32 q7,d18,d0 | |
3715 + | |
3716 +# qhasm: r2[0,1] += x23[2] unsigned* 5y34[2]; r2[2,3] += x23[3] unsigned* 5y3
4[3] | |
3717 +# asm 1: vmlal.u32 <r2=reg128#8,<x23=reg128#10%top,<5y34=reg128#13%top | |
3718 +# asm 2: vmlal.u32 <r2=q7,<x23=d19,<5y34=d25 | |
3719 +vmlal.u32 q7,d19,d25 | |
3720 + | |
3721 +# qhasm: r2[0,1] += x4[0] unsigned* 5y34[0]; r2[2,3] += x4[1] unsigned* 5y34[
1] | |
3722 +# asm 1: vmlal.u32 <r2=reg128#8,<x4=reg128#11%bot,<5y34=reg128#13%bot | |
3723 +# asm 2: vmlal.u32 <r2=q7,<x4=d20,<5y34=d24 | |
3724 +vmlal.u32 q7,d20,d24 | |
3725 + | |
3726 +# qhasm: 2x t1 = r0 unsigned>> 26 | |
3727 +# asm 1: vshr.u64 >t1=reg128#9,<r0=reg128#4,#26 | |
3728 +# asm 2: vshr.u64 >t1=q8,<r0=q3,#26 | |
3729 +vshr.u64 q8,q3,#26 | |
3730 + | |
3731 +# qhasm: r0 &= mask | |
3732 +# asm 1: vand >r0=reg128#4,<r0=reg128#4,<mask=reg128#7 | |
3733 +# asm 2: vand >r0=q3,<r0=q3,<mask=q6 | |
3734 +vand q3,q3,q6 | |
3735 + | |
3736 +# qhasm: 2x r1 += t1 | |
3737 +# asm 1: vadd.i64 >r1=reg128#9,<r1=reg128#14,<t1=reg128#9 | |
3738 +# asm 2: vadd.i64 >r1=q8,<r1=q13,<t1=q8 | |
3739 +vadd.i64 q8,q13,q8 | |
3740 + | |
3741 +# qhasm: 2x t4 = r3 unsigned>> 26 | |
3742 +# asm 1: vshr.u64 >t4=reg128#10,<r3=reg128#6,#26 | |
3743 +# asm 2: vshr.u64 >t4=q9,<r3=q5,#26 | |
3744 +vshr.u64 q9,q5,#26 | |
3745 + | |
3746 +# qhasm: r3 &= mask | |
3747 +# asm 1: vand >r3=reg128#6,<r3=reg128#6,<mask=reg128#7 | |
3748 +# asm 2: vand >r3=q5,<r3=q5,<mask=q6 | |
3749 +vand q5,q5,q6 | |
3750 + | |
3751 +# qhasm: 2x r4 += t4 | |
3752 +# asm 1: vadd.i64 >r4=reg128#5,<r4=reg128#5,<t4=reg128#10 | |
3753 +# asm 2: vadd.i64 >r4=q4,<r4=q4,<t4=q9 | |
3754 +vadd.i64 q4,q4,q9 | |
3755 + | |
3756 +# qhasm: 2x t2 = r1 unsigned>> 26 | |
3757 +# asm 1: vshr.u64 >t2=reg128#10,<r1=reg128#9,#26 | |
3758 +# asm 2: vshr.u64 >t2=q9,<r1=q8,#26 | |
3759 +vshr.u64 q9,q8,#26 | |
3760 + | |
3761 +# qhasm: r1 &= mask | |
3762 +# asm 1: vand >r1=reg128#11,<r1=reg128#9,<mask=reg128#7 | |
3763 +# asm 2: vand >r1=q10,<r1=q8,<mask=q6 | |
3764 +vand q10,q8,q6 | |
3765 + | |
3766 +# qhasm: 2x t0 = r4 unsigned>> 26 | |
3767 +# asm 1: vshr.u64 >t0=reg128#9,<r4=reg128#5,#26 | |
3768 +# asm 2: vshr.u64 >t0=q8,<r4=q4,#26 | |
3769 +vshr.u64 q8,q4,#26 | |
3770 + | |
3771 +# qhasm: 2x r2 += t2 | |
3772 +# asm 1: vadd.i64 >r2=reg128#8,<r2=reg128#8,<t2=reg128#10 | |
3773 +# asm 2: vadd.i64 >r2=q7,<r2=q7,<t2=q9 | |
3774 +vadd.i64 q7,q7,q9 | |
3775 + | |
3776 +# qhasm: r4 &= mask | |
3777 +# asm 1: vand >r4=reg128#5,<r4=reg128#5,<mask=reg128#7 | |
3778 +# asm 2: vand >r4=q4,<r4=q4,<mask=q6 | |
3779 +vand q4,q4,q6 | |
3780 + | |
3781 +# qhasm: 2x r0 += t0 | |
3782 +# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#9 | |
3783 +# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q8 | |
3784 +vadd.i64 q3,q3,q8 | |
3785 + | |
3786 +# qhasm: 2x t0 <<= 2 | |
3787 +# asm 1: vshl.i64 >t0=reg128#9,<t0=reg128#9,#2 | |
3788 +# asm 2: vshl.i64 >t0=q8,<t0=q8,#2 | |
3789 +vshl.i64 q8,q8,#2 | |
3790 + | |
3791 +# qhasm: 2x t3 = r2 unsigned>> 26 | |
3792 +# asm 1: vshr.u64 >t3=reg128#14,<r2=reg128#8,#26 | |
3793 +# asm 2: vshr.u64 >t3=q13,<r2=q7,#26 | |
3794 +vshr.u64 q13,q7,#26 | |
3795 + | |
3796 +# qhasm: 2x r0 += t0 | |
3797 +# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#9 | |
3798 +# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q8 | |
3799 +vadd.i64 q3,q3,q8 | |
3800 + | |
3801 +# qhasm: x23 = r2 & mask | |
3802 +# asm 1: vand >x23=reg128#10,<r2=reg128#8,<mask=reg128#7 | |
3803 +# asm 2: vand >x23=q9,<r2=q7,<mask=q6 | |
3804 +vand q9,q7,q6 | |
3805 + | |
3806 +# qhasm: 2x r3 += t3 | |
3807 +# asm 1: vadd.i64 >r3=reg128#6,<r3=reg128#6,<t3=reg128#14 | |
3808 +# asm 2: vadd.i64 >r3=q5,<r3=q5,<t3=q13 | |
3809 +vadd.i64 q5,q5,q13 | |
3810 + | |
3811 +# qhasm: 2x t1 = r0 unsigned>> 26 | |
3812 +# asm 1: vshr.u64 >t1=reg128#8,<r0=reg128#4,#26 | |
3813 +# asm 2: vshr.u64 >t1=q7,<r0=q3,#26 | |
3814 +vshr.u64 q7,q3,#26 | |
3815 + | |
3816 +# qhasm: x01 = r0 & mask | |
3817 +# asm 1: vand >x01=reg128#9,<r0=reg128#4,<mask=reg128#7 | |
3818 +# asm 2: vand >x01=q8,<r0=q3,<mask=q6 | |
3819 +vand q8,q3,q6 | |
3820 + | |
3821 +# qhasm: 2x r1 += t1 | |
3822 +# asm 1: vadd.i64 >r1=reg128#4,<r1=reg128#11,<t1=reg128#8 | |
3823 +# asm 2: vadd.i64 >r1=q3,<r1=q10,<t1=q7 | |
3824 +vadd.i64 q3,q10,q7 | |
3825 + | |
3826 +# qhasm: 2x t4 = r3 unsigned>> 26 | |
3827 +# asm 1: vshr.u64 >t4=reg128#8,<r3=reg128#6,#26 | |
3828 +# asm 2: vshr.u64 >t4=q7,<r3=q5,#26 | |
3829 +vshr.u64 q7,q5,#26 | |
3830 + | |
3831 +# qhasm: r3 &= mask | |
3832 +# asm 1: vand >r3=reg128#6,<r3=reg128#6,<mask=reg128#7 | |
3833 +# asm 2: vand >r3=q5,<r3=q5,<mask=q6 | |
3834 +vand q5,q5,q6 | |
3835 + | |
3836 +# qhasm: 2x x4 = r4 + t4 | |
3837 +# asm 1: vadd.i64 >x4=reg128#11,<r4=reg128#5,<t4=reg128#8 | |
3838 +# asm 2: vadd.i64 >x4=q10,<r4=q4,<t4=q7 | |
3839 +vadd.i64 q10,q4,q7 | |
3840 + | |
3841 +# qhasm: len -= 32 | |
3842 +# asm 1: sub >len=int32#4,<len=int32#4,#32 | |
3843 +# asm 2: sub >len=r3,<len=r3,#32 | |
3844 +sub r3,r3,#32 | |
3845 + | |
3846 +# qhasm: x01 = x01[0,2,1,3] | |
3847 +# asm 1: vtrn.32 <x01=reg128#9%bot,<x01=reg128#9%top | |
3848 +# asm 2: vtrn.32 <x01=d16,<x01=d17 | |
3849 +vtrn.32 d16,d17 | |
3850 + | |
3851 +# qhasm: x23 = x23[0,2,1,3] | |
3852 +# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top | |
3853 +# asm 2: vtrn.32 <x23=d18,<x23=d19 | |
3854 +vtrn.32 d18,d19 | |
3855 + | |
3856 +# qhasm: r1 = r1[0,2,1,3] | |
3857 +# asm 1: vtrn.32 <r1=reg128#4%bot,<r1=reg128#4%top | |
3858 +# asm 2: vtrn.32 <r1=d6,<r1=d7 | |
3859 +vtrn.32 d6,d7 | |
3860 + | |
3861 +# qhasm: r3 = r3[0,2,1,3] | |
3862 +# asm 1: vtrn.32 <r3=reg128#6%bot,<r3=reg128#6%top | |
3863 +# asm 2: vtrn.32 <r3=d10,<r3=d11 | |
3864 +vtrn.32 d10,d11 | |
3865 + | |
3866 +# qhasm: x4 = x4[0,2,1,3] | |
3867 +# asm 1: vtrn.32 <x4=reg128#11%bot,<x4=reg128#11%top | |
3868 +# asm 2: vtrn.32 <x4=d20,<x4=d21 | |
3869 +vtrn.32 d20,d21 | |
3870 + | |
3871 +# qhasm: x01 = x01[0,1] r1[0,1] | |
3872 +# asm 1: vext.32 <x01=reg128#9%top,<r1=reg128#4%bot,<r1=reg128#4%bot,#0 | |
3873 +# asm 2: vext.32 <x01=d17,<r1=d6,<r1=d6,#0 | |
3874 +vext.32 d17,d6,d6,#0 | |
3875 + | |
3876 +# qhasm: x23 = x23[0,1] r3[0,1] | |
3877 +# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#6%bot,<r3=reg128#6%bot,#0 | |
3878 +# asm 2: vext.32 <x23=d19,<r3=d10,<r3=d10,#0 | |
3879 +vext.32 d19,d10,d10,#0 | |
3880 + | |
3881 +# qhasm: unsigned>? len - 32 | |
3882 +# asm 1: cmp <len=int32#4,#32 | |
3883 +# asm 2: cmp <len=r3,#32 | |
3884 +cmp r3,#32 | |
3885 + | |
3886 +# qhasm: goto mainloop if unsigned> | |
3887 +bhi ._mainloop | |
3888 + | |
3889 +# qhasm: end: | |
3890 +._end: | |
3891 + | |
3892 +# qhasm: mem128[input_0] = x01;input_0+=16 | |
3893 +# asm 1: vst1.8 {<x01=reg128#9%bot-<x01=reg128#9%top},[<input_0=int32#1]! | |
3894 +# asm 2: vst1.8 {<x01=d16-<x01=d17},[<input_0=r0]! | |
3895 +vst1.8 {d16-d17},[r0]! | |
3896 + | |
3897 +# qhasm: mem128[input_0] = x23;input_0+=16 | |
3898 +# asm 1: vst1.8 {<x23=reg128#10%bot-<x23=reg128#10%top},[<input_0=int32#1]! | |
3899 +# asm 2: vst1.8 {<x23=d18-<x23=d19},[<input_0=r0]! | |
3900 +vst1.8 {d18-d19},[r0]! | |
3901 + | |
3902 +# qhasm: mem64[input_0] = x4[0] | |
3903 +# asm 1: vst1.8 <x4=reg128#11%bot,[<input_0=int32#1] | |
3904 +# asm 2: vst1.8 <x4=d20,[<input_0=r0] | |
3905 +vst1.8 d20,[r0] | |
3906 + | |
3907 +# qhasm: len = len | |
3908 +# asm 1: mov >len=int32#1,<len=int32#4 | |
3909 +# asm 2: mov >len=r0,<len=r3 | |
3910 +mov r0,r3 | |
3911 + | |
3912 +# qhasm: qpopreturn len | |
3913 +mov sp,r12 | |
3914 +vpop {q4,q5,q6,q7} | |
3915 +bx lr | |
3916 + | |
3917 +# qhasm: int32 input_0 | |
3918 + | |
3919 +# qhasm: int32 input_1 | |
3920 + | |
3921 +# qhasm: int32 input_2 | |
3922 + | |
3923 +# qhasm: int32 input_3 | |
3924 + | |
3925 +# qhasm: stack32 input_4 | |
3926 + | |
3927 +# qhasm: stack32 input_5 | |
3928 + | |
3929 +# qhasm: stack32 input_6 | |
3930 + | |
3931 +# qhasm: stack32 input_7 | |
3932 + | |
3933 +# qhasm: int32 caller_r4 | |
3934 + | |
3935 +# qhasm: int32 caller_r5 | |
3936 + | |
3937 +# qhasm: int32 caller_r6 | |
3938 + | |
3939 +# qhasm: int32 caller_r7 | |
3940 + | |
3941 +# qhasm: int32 caller_r8 | |
3942 + | |
3943 +# qhasm: int32 caller_r9 | |
3944 + | |
3945 +# qhasm: int32 caller_r10 | |
3946 + | |
3947 +# qhasm: int32 caller_r11 | |
3948 + | |
3949 +# qhasm: int32 caller_r12 | |
3950 + | |
3951 +# qhasm: int32 caller_r14 | |
3952 + | |
3953 +# qhasm: reg128 caller_q4 | |
3954 + | |
3955 +# qhasm: reg128 caller_q5 | |
3956 + | |
3957 +# qhasm: reg128 caller_q6 | |
3958 + | |
3959 +# qhasm: reg128 caller_q7 | |
3960 + | |
3961 +# qhasm: reg128 r0 | |
3962 + | |
3963 +# qhasm: reg128 r1 | |
3964 + | |
3965 +# qhasm: reg128 r2 | |
3966 + | |
3967 +# qhasm: reg128 r3 | |
3968 + | |
3969 +# qhasm: reg128 r4 | |
3970 + | |
3971 +# qhasm: reg128 x01 | |
3972 + | |
3973 +# qhasm: reg128 x23 | |
3974 + | |
3975 +# qhasm: reg128 x4 | |
3976 + | |
3977 +# qhasm: reg128 y01 | |
3978 + | |
3979 +# qhasm: reg128 y23 | |
3980 + | |
3981 +# qhasm: reg128 y4 | |
3982 + | |
3983 +# qhasm: reg128 _5y01 | |
3984 + | |
3985 +# qhasm: reg128 _5y23 | |
3986 + | |
3987 +# qhasm: reg128 _5y4 | |
3988 + | |
3989 +# qhasm: reg128 c01 | |
3990 + | |
3991 +# qhasm: reg128 c23 | |
3992 + | |
3993 +# qhasm: reg128 c4 | |
3994 + | |
3995 +# qhasm: reg128 t0 | |
3996 + | |
3997 +# qhasm: reg128 t1 | |
3998 + | |
3999 +# qhasm: reg128 t2 | |
4000 + | |
4001 +# qhasm: reg128 t3 | |
4002 + | |
4003 +# qhasm: reg128 t4 | |
4004 + | |
4005 +# qhasm: reg128 mask | |
4006 + | |
4007 +# qhasm: enter crypto_onetimeauth_poly1305_neon2_addmulmod | |
4008 +.align 2 | |
4009 +.global openssl_poly1305_neon2_addmulmod | |
4010 +.type openssl_poly1305_neon2_addmulmod STT_FUNC | |
4011 +openssl_poly1305_neon2_addmulmod: | |
4012 +sub sp,sp,#0 | |
4013 + | |
4014 +# qhasm: 2x mask = 0xffffffff | |
4015 +# asm 1: vmov.i64 >mask=reg128#1,#0xffffffff | |
4016 +# asm 2: vmov.i64 >mask=q0,#0xffffffff | |
4017 +vmov.i64 q0,#0xffffffff | |
4018 + | |
4019 +# qhasm: y01 aligned= mem128[input_2];input_2+=16 | |
4020 +# asm 1: vld1.8 {>y01=reg128#2%bot->y01=reg128#2%top},[<input_2=int32#3,: 128]! | |
4021 +# asm 2: vld1.8 {>y01=d2->y01=d3},[<input_2=r2,: 128]! | |
4022 +vld1.8 {d2-d3},[r2,: 128]! | |
4023 + | |
4024 +# qhasm: 4x _5y01 = y01 << 2 | |
4025 +# asm 1: vshl.i32 >_5y01=reg128#3,<y01=reg128#2,#2 | |
4026 +# asm 2: vshl.i32 >_5y01=q2,<y01=q1,#2 | |
4027 +vshl.i32 q2,q1,#2 | |
4028 + | |
4029 +# qhasm: y23 aligned= mem128[input_2];input_2+=16 | |
4030 +# asm 1: vld1.8 {>y23=reg128#4%bot->y23=reg128#4%top},[<input_2=int32#3,: 128]! | |
4031 +# asm 2: vld1.8 {>y23=d6->y23=d7},[<input_2=r2,: 128]! | |
4032 +vld1.8 {d6-d7},[r2,: 128]! | |
4033 + | |
4034 +# qhasm: 4x _5y23 = y23 << 2 | |
4035 +# asm 1: vshl.i32 >_5y23=reg128#9,<y23=reg128#4,#2 | |
4036 +# asm 2: vshl.i32 >_5y23=q8,<y23=q3,#2 | |
4037 +vshl.i32 q8,q3,#2 | |
4038 + | |
4039 +# qhasm: y4 aligned= mem64[input_2]y4[1] | |
4040 +# asm 1: vld1.8 {<y4=reg128#10%bot},[<input_2=int32#3,: 64] | |
4041 +# asm 2: vld1.8 {<y4=d18},[<input_2=r2,: 64] | |
4042 +vld1.8 {d18},[r2,: 64] | |
4043 + | |
4044 +# qhasm: 4x _5y4 = y4 << 2 | |
4045 +# asm 1: vshl.i32 >_5y4=reg128#11,<y4=reg128#10,#2 | |
4046 +# asm 2: vshl.i32 >_5y4=q10,<y4=q9,#2 | |
4047 +vshl.i32 q10,q9,#2 | |
4048 + | |
4049 +# qhasm: x01 aligned= mem128[input_1];input_1+=16 | |
4050 +# asm 1: vld1.8 {>x01=reg128#12%bot->x01=reg128#12%top},[<input_1=int32#2,: 128
]! | |
4051 +# asm 2: vld1.8 {>x01=d22->x01=d23},[<input_1=r1,: 128]! | |
4052 +vld1.8 {d22-d23},[r1,: 128]! | |
4053 + | |
4054 +# qhasm: 4x _5y01 += y01 | |
4055 +# asm 1: vadd.i32 >_5y01=reg128#3,<_5y01=reg128#3,<y01=reg128#2 | |
4056 +# asm 2: vadd.i32 >_5y01=q2,<_5y01=q2,<y01=q1 | |
4057 +vadd.i32 q2,q2,q1 | |
4058 + | |
4059 +# qhasm: x23 aligned= mem128[input_1];input_1+=16 | |
4060 +# asm 1: vld1.8 {>x23=reg128#13%bot->x23=reg128#13%top},[<input_1=int32#2,: 128
]! | |
4061 +# asm 2: vld1.8 {>x23=d24->x23=d25},[<input_1=r1,: 128]! | |
4062 +vld1.8 {d24-d25},[r1,: 128]! | |
4063 + | |
4064 +# qhasm: 4x _5y23 += y23 | |
4065 +# asm 1: vadd.i32 >_5y23=reg128#9,<_5y23=reg128#9,<y23=reg128#4 | |
4066 +# asm 2: vadd.i32 >_5y23=q8,<_5y23=q8,<y23=q3 | |
4067 +vadd.i32 q8,q8,q3 | |
4068 + | |
4069 +# qhasm: 4x _5y4 += y4 | |
4070 +# asm 1: vadd.i32 >_5y4=reg128#11,<_5y4=reg128#11,<y4=reg128#10 | |
4071 +# asm 2: vadd.i32 >_5y4=q10,<_5y4=q10,<y4=q9 | |
4072 +vadd.i32 q10,q10,q9 | |
4073 + | |
4074 +# qhasm: c01 aligned= mem128[input_3];input_3+=16 | |
4075 +# asm 1: vld1.8 {>c01=reg128#14%bot->c01=reg128#14%top},[<input_3=int32#4,: 128
]! | |
4076 +# asm 2: vld1.8 {>c01=d26->c01=d27},[<input_3=r3,: 128]! | |
4077 +vld1.8 {d26-d27},[r3,: 128]! | |
4078 + | |
4079 +# qhasm: 4x x01 += c01 | |
4080 +# asm 1: vadd.i32 >x01=reg128#12,<x01=reg128#12,<c01=reg128#14 | |
4081 +# asm 2: vadd.i32 >x01=q11,<x01=q11,<c01=q13 | |
4082 +vadd.i32 q11,q11,q13 | |
4083 + | |
4084 +# qhasm: c23 aligned= mem128[input_3];input_3+=16 | |
4085 +# asm 1: vld1.8 {>c23=reg128#14%bot->c23=reg128#14%top},[<input_3=int32#4,: 128
]! | |
4086 +# asm 2: vld1.8 {>c23=d26->c23=d27},[<input_3=r3,: 128]! | |
4087 +vld1.8 {d26-d27},[r3,: 128]! | |
4088 + | |
4089 +# qhasm: 4x x23 += c23 | |
4090 +# asm 1: vadd.i32 >x23=reg128#13,<x23=reg128#13,<c23=reg128#14 | |
4091 +# asm 2: vadd.i32 >x23=q12,<x23=q12,<c23=q13 | |
4092 +vadd.i32 q12,q12,q13 | |
4093 + | |
4094 +# qhasm: x4 aligned= mem64[input_1]x4[1] | |
4095 +# asm 1: vld1.8 {<x4=reg128#14%bot},[<input_1=int32#2,: 64] | |
4096 +# asm 2: vld1.8 {<x4=d26},[<input_1=r1,: 64] | |
4097 +vld1.8 {d26},[r1,: 64] | |
4098 + | |
4099 +# qhasm: 2x mask unsigned>>=6 | |
4100 +# asm 1: vshr.u64 >mask=reg128#1,<mask=reg128#1,#6 | |
4101 +# asm 2: vshr.u64 >mask=q0,<mask=q0,#6 | |
4102 +vshr.u64 q0,q0,#6 | |
4103 + | |
4104 +# qhasm: c4 aligned= mem64[input_3]c4[1] | |
4105 +# asm 1: vld1.8 {<c4=reg128#15%bot},[<input_3=int32#4,: 64] | |
4106 +# asm 2: vld1.8 {<c4=d28},[<input_3=r3,: 64] | |
4107 +vld1.8 {d28},[r3,: 64] | |
4108 + | |
4109 +# qhasm: 4x x4 += c4 | |
4110 +# asm 1: vadd.i32 >x4=reg128#14,<x4=reg128#14,<c4=reg128#15 | |
4111 +# asm 2: vadd.i32 >x4=q13,<x4=q13,<c4=q14 | |
4112 +vadd.i32 q13,q13,q14 | |
4113 + | |
4114 +# qhasm: r0[0,1] = x01[0] unsigned* y01[0]; r0[2,3] = x01[1] unsigned* y01[
1] | |
4115 +# asm 1: vmull.u32 >r0=reg128#15,<x01=reg128#12%bot,<y01=reg128#2%bot | |
4116 +# asm 2: vmull.u32 >r0=q14,<x01=d22,<y01=d2 | |
4117 +vmull.u32 q14,d22,d2 | |
4118 + | |
4119 +# qhasm: r0[0,1] += x01[2] unsigned* _5y4[0]; r0[2,3] += x01[3] unsigned* _5y
4[1] | |
4120 +# asm 1: vmlal.u32 <r0=reg128#15,<x01=reg128#12%top,<_5y4=reg128#11%bot | |
4121 +# asm 2: vmlal.u32 <r0=q14,<x01=d23,<_5y4=d20 | |
4122 +vmlal.u32 q14,d23,d20 | |
4123 + | |
4124 +# qhasm: r0[0,1] += x23[0] unsigned* _5y23[2]; r0[2,3] += x23[1] unsigned* _5y2
3[3] | |
4125 +# asm 1: vmlal.u32 <r0=reg128#15,<x23=reg128#13%bot,<_5y23=reg128#9%top | |
4126 +# asm 2: vmlal.u32 <r0=q14,<x23=d24,<_5y23=d17 | |
4127 +vmlal.u32 q14,d24,d17 | |
4128 + | |
4129 +# qhasm: r0[0,1] += x23[2] unsigned* _5y23[0]; r0[2,3] += x23[3] unsigned* _5y2
3[1] | |
4130 +# asm 1: vmlal.u32 <r0=reg128#15,<x23=reg128#13%top,<_5y23=reg128#9%bot | |
4131 +# asm 2: vmlal.u32 <r0=q14,<x23=d25,<_5y23=d16 | |
4132 +vmlal.u32 q14,d25,d16 | |
4133 + | |
4134 +# qhasm: r0[0,1] += x4[0] unsigned* _5y01[2]; r0[2,3] += x4[1] unsigned* _5y0
1[3] | |
4135 +# asm 1: vmlal.u32 <r0=reg128#15,<x4=reg128#14%bot,<_5y01=reg128#3%top | |
4136 +# asm 2: vmlal.u32 <r0=q14,<x4=d26,<_5y01=d5 | |
4137 +vmlal.u32 q14,d26,d5 | |
4138 + | |
4139 +# qhasm: r1[0,1] = x01[0] unsigned* y01[2]; r1[2,3] = x01[1] unsigned* y01[
3] | |
4140 +# asm 1: vmull.u32 >r1=reg128#3,<x01=reg128#12%bot,<y01=reg128#2%top | |
4141 +# asm 2: vmull.u32 >r1=q2,<x01=d22,<y01=d3 | |
4142 +vmull.u32 q2,d22,d3 | |
4143 + | |
4144 +# qhasm: r1[0,1] += x01[2] unsigned* y01[0]; r1[2,3] += x01[3] unsigned* y01[
1] | |
4145 +# asm 1: vmlal.u32 <r1=reg128#3,<x01=reg128#12%top,<y01=reg128#2%bot | |
4146 +# asm 2: vmlal.u32 <r1=q2,<x01=d23,<y01=d2 | |
4147 +vmlal.u32 q2,d23,d2 | |
4148 + | |
4149 +# qhasm: r1[0,1] += x23[0] unsigned* _5y4[0]; r1[2,3] += x23[1] unsigned* _5y
4[1] | |
4150 +# asm 1: vmlal.u32 <r1=reg128#3,<x23=reg128#13%bot,<_5y4=reg128#11%bot | |
4151 +# asm 2: vmlal.u32 <r1=q2,<x23=d24,<_5y4=d20 | |
4152 +vmlal.u32 q2,d24,d20 | |
4153 + | |
4154 +# qhasm: r1[0,1] += x23[2] unsigned* _5y23[2]; r1[2,3] += x23[3] unsigned* _5y2
3[3] | |
4155 +# asm 1: vmlal.u32 <r1=reg128#3,<x23=reg128#13%top,<_5y23=reg128#9%top | |
4156 +# asm 2: vmlal.u32 <r1=q2,<x23=d25,<_5y23=d17 | |
4157 +vmlal.u32 q2,d25,d17 | |
4158 + | |
4159 +# qhasm: r1[0,1] += x4[0] unsigned* _5y23[0]; r1[2,3] += x4[1] unsigned* _5y2
3[1] | |
4160 +# asm 1: vmlal.u32 <r1=reg128#3,<x4=reg128#14%bot,<_5y23=reg128#9%bot | |
4161 +# asm 2: vmlal.u32 <r1=q2,<x4=d26,<_5y23=d16 | |
4162 +vmlal.u32 q2,d26,d16 | |
4163 + | |
4164 +# qhasm: r2[0,1] = x01[0] unsigned* y23[0]; r2[2,3] = x01[1] unsigned* y23[
1] | |
4165 +# asm 1: vmull.u32 >r2=reg128#16,<x01=reg128#12%bot,<y23=reg128#4%bot | |
4166 +# asm 2: vmull.u32 >r2=q15,<x01=d22,<y23=d6 | |
4167 +vmull.u32 q15,d22,d6 | |
4168 + | |
4169 +# qhasm: r2[0,1] += x01[2] unsigned* y01[2]; r2[2,3] += x01[3] unsigned* y01[
3] | |
4170 +# asm 1: vmlal.u32 <r2=reg128#16,<x01=reg128#12%top,<y01=reg128#2%top | |
4171 +# asm 2: vmlal.u32 <r2=q15,<x01=d23,<y01=d3 | |
4172 +vmlal.u32 q15,d23,d3 | |
4173 + | |
4174 +# qhasm: r2[0,1] += x23[0] unsigned* y01[0]; r2[2,3] += x23[1] unsigned* y01[
1] | |
4175 +# asm 1: vmlal.u32 <r2=reg128#16,<x23=reg128#13%bot,<y01=reg128#2%bot | |
4176 +# asm 2: vmlal.u32 <r2=q15,<x23=d24,<y01=d2 | |
4177 +vmlal.u32 q15,d24,d2 | |
4178 + | |
4179 +# qhasm: r2[0,1] += x23[2] unsigned* _5y4[0]; r2[2,3] += x23[3] unsigned* _5y
4[1] | |
4180 +# asm 1: vmlal.u32 <r2=reg128#16,<x23=reg128#13%top,<_5y4=reg128#11%bot | |
4181 +# asm 2: vmlal.u32 <r2=q15,<x23=d25,<_5y4=d20 | |
4182 +vmlal.u32 q15,d25,d20 | |
4183 + | |
4184 +# qhasm: r2[0,1] += x4[0] unsigned* _5y23[2]; r2[2,3] += x4[1] unsigned* _5y2
3[3] | |
4185 +# asm 1: vmlal.u32 <r2=reg128#16,<x4=reg128#14%bot,<_5y23=reg128#9%top | |
4186 +# asm 2: vmlal.u32 <r2=q15,<x4=d26,<_5y23=d17 | |
4187 +vmlal.u32 q15,d26,d17 | |
4188 + | |
4189 +# qhasm: r3[0,1] = x01[0] unsigned* y23[2]; r3[2,3] = x01[1] unsigned* y23[
3] | |
4190 +# asm 1: vmull.u32 >r3=reg128#9,<x01=reg128#12%bot,<y23=reg128#4%top | |
4191 +# asm 2: vmull.u32 >r3=q8,<x01=d22,<y23=d7 | |
4192 +vmull.u32 q8,d22,d7 | |
4193 + | |
4194 +# qhasm: r3[0,1] += x01[2] unsigned* y23[0]; r3[2,3] += x01[3] unsigned* y23[
1] | |
4195 +# asm 1: vmlal.u32 <r3=reg128#9,<x01=reg128#12%top,<y23=reg128#4%bot | |
4196 +# asm 2: vmlal.u32 <r3=q8,<x01=d23,<y23=d6 | |
4197 +vmlal.u32 q8,d23,d6 | |
4198 + | |
4199 +# qhasm: r3[0,1] += x23[0] unsigned* y01[2]; r3[2,3] += x23[1] unsigned* y01[
3] | |
4200 +# asm 1: vmlal.u32 <r3=reg128#9,<x23=reg128#13%bot,<y01=reg128#2%top | |
4201 +# asm 2: vmlal.u32 <r3=q8,<x23=d24,<y01=d3 | |
4202 +vmlal.u32 q8,d24,d3 | |
4203 + | |
4204 +# qhasm: r3[0,1] += x23[2] unsigned* y01[0]; r3[2,3] += x23[3] unsigned* y01[
1] | |
4205 +# asm 1: vmlal.u32 <r3=reg128#9,<x23=reg128#13%top,<y01=reg128#2%bot | |
4206 +# asm 2: vmlal.u32 <r3=q8,<x23=d25,<y01=d2 | |
4207 +vmlal.u32 q8,d25,d2 | |
4208 + | |
4209 +# qhasm: r3[0,1] += x4[0] unsigned* _5y4[0]; r3[2,3] += x4[1] unsigned* _5y
4[1] | |
4210 +# asm 1: vmlal.u32 <r3=reg128#9,<x4=reg128#14%bot,<_5y4=reg128#11%bot | |
4211 +# asm 2: vmlal.u32 <r3=q8,<x4=d26,<_5y4=d20 | |
4212 +vmlal.u32 q8,d26,d20 | |
4213 + | |
4214 +# qhasm: r4[0,1] = x01[0] unsigned* y4[0]; r4[2,3] = x01[1] unsigned* y4[1
] | |
4215 +# asm 1: vmull.u32 >r4=reg128#10,<x01=reg128#12%bot,<y4=reg128#10%bot | |
4216 +# asm 2: vmull.u32 >r4=q9,<x01=d22,<y4=d18 | |
4217 +vmull.u32 q9,d22,d18 | |
4218 + | |
4219 +# qhasm: r4[0,1] += x01[2] unsigned* y23[2]; r4[2,3] += x01[3] unsigned* y23[3
] | |
4220 +# asm 1: vmlal.u32 <r4=reg128#10,<x01=reg128#12%top,<y23=reg128#4%top | |
4221 +# asm 2: vmlal.u32 <r4=q9,<x01=d23,<y23=d7 | |
4222 +vmlal.u32 q9,d23,d7 | |
4223 + | |
4224 +# qhasm: r4[0,1] += x23[0] unsigned* y23[0]; r4[2,3] += x23[1] unsigned* y23[1
] | |
4225 +# asm 1: vmlal.u32 <r4=reg128#10,<x23=reg128#13%bot,<y23=reg128#4%bot | |
4226 +# asm 2: vmlal.u32 <r4=q9,<x23=d24,<y23=d6 | |
4227 +vmlal.u32 q9,d24,d6 | |
4228 + | |
4229 +# qhasm: r4[0,1] += x23[2] unsigned* y01[2]; r4[2,3] += x23[3] unsigned* y01[3
] | |
4230 +# asm 1: vmlal.u32 <r4=reg128#10,<x23=reg128#13%top,<y01=reg128#2%top | |
4231 +# asm 2: vmlal.u32 <r4=q9,<x23=d25,<y01=d3 | |
4232 +vmlal.u32 q9,d25,d3 | |
4233 + | |
4234 +# qhasm: r4[0,1] += x4[0] unsigned* y01[0]; r4[2,3] += x4[1] unsigned* y01[1
] | |
4235 +# asm 1: vmlal.u32 <r4=reg128#10,<x4=reg128#14%bot,<y01=reg128#2%bot | |
4236 +# asm 2: vmlal.u32 <r4=q9,<x4=d26,<y01=d2 | |
4237 +vmlal.u32 q9,d26,d2 | |
4238 + | |
4239 +# qhasm: 2x t1 = r0 unsigned>> 26 | |
4240 +# asm 1: vshr.u64 >t1=reg128#2,<r0=reg128#15,#26 | |
4241 +# asm 2: vshr.u64 >t1=q1,<r0=q14,#26 | |
4242 +vshr.u64 q1,q14,#26 | |
4243 + | |
4244 +# qhasm: r0 &= mask | |
4245 +# asm 1: vand >r0=reg128#4,<r0=reg128#15,<mask=reg128#1 | |
4246 +# asm 2: vand >r0=q3,<r0=q14,<mask=q0 | |
4247 +vand q3,q14,q0 | |
4248 + | |
4249 +# qhasm: 2x r1 += t1 | |
4250 +# asm 1: vadd.i64 >r1=reg128#2,<r1=reg128#3,<t1=reg128#2 | |
4251 +# asm 2: vadd.i64 >r1=q1,<r1=q2,<t1=q1 | |
4252 +vadd.i64 q1,q2,q1 | |
4253 + | |
4254 +# qhasm: 2x t4 = r3 unsigned>> 26 | |
4255 +# asm 1: vshr.u64 >t4=reg128#3,<r3=reg128#9,#26 | |
4256 +# asm 2: vshr.u64 >t4=q2,<r3=q8,#26 | |
4257 +vshr.u64 q2,q8,#26 | |
4258 + | |
4259 +# qhasm: r3 &= mask | |
4260 +# asm 1: vand >r3=reg128#9,<r3=reg128#9,<mask=reg128#1 | |
4261 +# asm 2: vand >r3=q8,<r3=q8,<mask=q0 | |
4262 +vand q8,q8,q0 | |
4263 + | |
4264 +# qhasm: 2x r4 += t4 | |
4265 +# asm 1: vadd.i64 >r4=reg128#3,<r4=reg128#10,<t4=reg128#3 | |
4266 +# asm 2: vadd.i64 >r4=q2,<r4=q9,<t4=q2 | |
4267 +vadd.i64 q2,q9,q2 | |
4268 + | |
4269 +# qhasm: 2x t2 = r1 unsigned>> 26 | |
4270 +# asm 1: vshr.u64 >t2=reg128#10,<r1=reg128#2,#26 | |
4271 +# asm 2: vshr.u64 >t2=q9,<r1=q1,#26 | |
4272 +vshr.u64 q9,q1,#26 | |
4273 + | |
4274 +# qhasm: r1 &= mask | |
4275 +# asm 1: vand >r1=reg128#2,<r1=reg128#2,<mask=reg128#1 | |
4276 +# asm 2: vand >r1=q1,<r1=q1,<mask=q0 | |
4277 +vand q1,q1,q0 | |
4278 + | |
4279 +# qhasm: 2x t0 = r4 unsigned>> 26 | |
4280 +# asm 1: vshr.u64 >t0=reg128#11,<r4=reg128#3,#26 | |
4281 +# asm 2: vshr.u64 >t0=q10,<r4=q2,#26 | |
4282 +vshr.u64 q10,q2,#26 | |
4283 + | |
4284 +# qhasm: 2x r2 += t2 | |
4285 +# asm 1: vadd.i64 >r2=reg128#10,<r2=reg128#16,<t2=reg128#10 | |
4286 +# asm 2: vadd.i64 >r2=q9,<r2=q15,<t2=q9 | |
4287 +vadd.i64 q9,q15,q9 | |
4288 + | |
4289 +# qhasm: r4 &= mask | |
4290 +# asm 1: vand >r4=reg128#3,<r4=reg128#3,<mask=reg128#1 | |
4291 +# asm 2: vand >r4=q2,<r4=q2,<mask=q0 | |
4292 +vand q2,q2,q0 | |
4293 + | |
4294 +# qhasm: 2x r0 += t0 | |
4295 +# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#11 | |
4296 +# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q10 | |
4297 +vadd.i64 q3,q3,q10 | |
4298 + | |
4299 +# qhasm: 2x t0 <<= 2 | |
4300 +# asm 1: vshl.i64 >t0=reg128#11,<t0=reg128#11,#2 | |
4301 +# asm 2: vshl.i64 >t0=q10,<t0=q10,#2 | |
4302 +vshl.i64 q10,q10,#2 | |
4303 + | |
4304 +# qhasm: 2x t3 = r2 unsigned>> 26 | |
4305 +# asm 1: vshr.u64 >t3=reg128#12,<r2=reg128#10,#26 | |
4306 +# asm 2: vshr.u64 >t3=q11,<r2=q9,#26 | |
4307 +vshr.u64 q11,q9,#26 | |
4308 + | |
4309 +# qhasm: 2x r0 += t0 | |
4310 +# asm 1: vadd.i64 >r0=reg128#4,<r0=reg128#4,<t0=reg128#11 | |
4311 +# asm 2: vadd.i64 >r0=q3,<r0=q3,<t0=q10 | |
4312 +vadd.i64 q3,q3,q10 | |
4313 + | |
4314 +# qhasm: x23 = r2 & mask | |
4315 +# asm 1: vand >x23=reg128#10,<r2=reg128#10,<mask=reg128#1 | |
4316 +# asm 2: vand >x23=q9,<r2=q9,<mask=q0 | |
4317 +vand q9,q9,q0 | |
4318 + | |
4319 +# qhasm: 2x r3 += t3 | |
4320 +# asm 1: vadd.i64 >r3=reg128#9,<r3=reg128#9,<t3=reg128#12 | |
4321 +# asm 2: vadd.i64 >r3=q8,<r3=q8,<t3=q11 | |
4322 +vadd.i64 q8,q8,q11 | |
4323 + | |
4324 +# qhasm: 2x t1 = r0 unsigned>> 26 | |
4325 +# asm 1: vshr.u64 >t1=reg128#11,<r0=reg128#4,#26 | |
4326 +# asm 2: vshr.u64 >t1=q10,<r0=q3,#26 | |
4327 +vshr.u64 q10,q3,#26 | |
4328 + | |
4329 +# qhasm: x23 = x23[0,2,1,3] | |
4330 +# asm 1: vtrn.32 <x23=reg128#10%bot,<x23=reg128#10%top | |
4331 +# asm 2: vtrn.32 <x23=d18,<x23=d19 | |
4332 +vtrn.32 d18,d19 | |
4333 + | |
4334 +# qhasm: x01 = r0 & mask | |
4335 +# asm 1: vand >x01=reg128#4,<r0=reg128#4,<mask=reg128#1 | |
4336 +# asm 2: vand >x01=q3,<r0=q3,<mask=q0 | |
4337 +vand q3,q3,q0 | |
4338 + | |
4339 +# qhasm: 2x r1 += t1 | |
4340 +# asm 1: vadd.i64 >r1=reg128#2,<r1=reg128#2,<t1=reg128#11 | |
4341 +# asm 2: vadd.i64 >r1=q1,<r1=q1,<t1=q10 | |
4342 +vadd.i64 q1,q1,q10 | |
4343 + | |
4344 +# qhasm: 2x t4 = r3 unsigned>> 26 | |
4345 +# asm 1: vshr.u64 >t4=reg128#11,<r3=reg128#9,#26 | |
4346 +# asm 2: vshr.u64 >t4=q10,<r3=q8,#26 | |
4347 +vshr.u64 q10,q8,#26 | |
4348 + | |
4349 +# qhasm: x01 = x01[0,2,1,3] | |
4350 +# asm 1: vtrn.32 <x01=reg128#4%bot,<x01=reg128#4%top | |
4351 +# asm 2: vtrn.32 <x01=d6,<x01=d7 | |
4352 +vtrn.32 d6,d7 | |
4353 + | |
4354 +# qhasm: r3 &= mask | |
4355 +# asm 1: vand >r3=reg128#1,<r3=reg128#9,<mask=reg128#1 | |
4356 +# asm 2: vand >r3=q0,<r3=q8,<mask=q0 | |
4357 +vand q0,q8,q0 | |
4358 + | |
4359 +# qhasm: r1 = r1[0,2,1,3] | |
4360 +# asm 1: vtrn.32 <r1=reg128#2%bot,<r1=reg128#2%top | |
4361 +# asm 2: vtrn.32 <r1=d2,<r1=d3 | |
4362 +vtrn.32 d2,d3 | |
4363 + | |
4364 +# qhasm: 2x x4 = r4 + t4 | |
4365 +# asm 1: vadd.i64 >x4=reg128#3,<r4=reg128#3,<t4=reg128#11 | |
4366 +# asm 2: vadd.i64 >x4=q2,<r4=q2,<t4=q10 | |
4367 +vadd.i64 q2,q2,q10 | |
4368 + | |
4369 +# qhasm: r3 = r3[0,2,1,3] | |
4370 +# asm 1: vtrn.32 <r3=reg128#1%bot,<r3=reg128#1%top | |
4371 +# asm 2: vtrn.32 <r3=d0,<r3=d1 | |
4372 +vtrn.32 d0,d1 | |
4373 + | |
4374 +# qhasm: x01 = x01[0,1] r1[0,1] | |
4375 +# asm 1: vext.32 <x01=reg128#4%top,<r1=reg128#2%bot,<r1=reg128#2%bot,#0 | |
4376 +# asm 2: vext.32 <x01=d7,<r1=d2,<r1=d2,#0 | |
4377 +vext.32 d7,d2,d2,#0 | |
4378 + | |
4379 +# qhasm: x23 = x23[0,1] r3[0,1] | |
4380 +# asm 1: vext.32 <x23=reg128#10%top,<r3=reg128#1%bot,<r3=reg128#1%bot,#0 | |
4381 +# asm 2: vext.32 <x23=d19,<r3=d0,<r3=d0,#0 | |
4382 +vext.32 d19,d0,d0,#0 | |
4383 + | |
4384 +# qhasm: x4 = x4[0,2,1,3] | |
4385 +# asm 1: vtrn.32 <x4=reg128#3%bot,<x4=reg128#3%top | |
4386 +# asm 2: vtrn.32 <x4=d4,<x4=d5 | |
4387 +vtrn.32 d4,d5 | |
4388 + | |
4389 +# qhasm: mem128[input_0] aligned= x01;input_0+=16 | |
4390 +# asm 1: vst1.8 {<x01=reg128#4%bot-<x01=reg128#4%top},[<input_0=int32#1,: 128]! | |
4391 +# asm 2: vst1.8 {<x01=d6-<x01=d7},[<input_0=r0,: 128]! | |
4392 +vst1.8 {d6-d7},[r0,: 128]! | |
4393 + | |
4394 +# qhasm: mem128[input_0] aligned= x23;input_0+=16 | |
4395 +# asm 1: vst1.8 {<x23=reg128#10%bot-<x23=reg128#10%top},[<input_0=int32#1,: 128
]! | |
4396 +# asm 2: vst1.8 {<x23=d18-<x23=d19},[<input_0=r0,: 128]! | |
4397 +vst1.8 {d18-d19},[r0,: 128]! | |
4398 + | |
4399 +# qhasm: mem64[input_0] aligned= x4[0] | |
4400 +# asm 1: vst1.8 <x4=reg128#3%bot,[<input_0=int32#1,: 64] | |
4401 +# asm 2: vst1.8 <x4=d4,[<input_0=r0,: 64] | |
4402 +vst1.8 d4,[r0,: 64] | |
4403 + | |
4404 +# qhasm: return | |
4405 +add sp,sp,#0 | |
4406 +bx lr | |
4407 diff --git a/crypto/poly1305/poly1305_vec.c b/crypto/poly1305/poly1305_vec.c | |
4408 new file mode 100644 | |
4409 index 0000000..c546200 | |
4410 --- /dev/null | |
4411 +++ b/crypto/poly1305/poly1305_vec.c | |
4412 @@ -0,0 +1,733 @@ | |
4413 +/* ==================================================================== | |
4414 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
4415 + * | |
4416 + * Redistribution and use in source and binary forms, with or without | |
4417 + * modification, are permitted provided that the following conditions | |
4418 + * are met: | |
4419 + * | |
4420 + * 1. Redistributions of source code must retain the above copyright | |
4421 + * notice, this list of conditions and the following disclaimer. | |
4422 + * | |
4423 + * 2. Redistributions in binary form must reproduce the above copyright | |
4424 + * notice, this list of conditions and the following disclaimer in | |
4425 + * the documentation and/or other materials provided with the | |
4426 + * distribution. | |
4427 + * | |
4428 + * 3. All advertising materials mentioning features or use of this | |
4429 + * software must display the following acknowledgment: | |
4430 + * "This product includes software developed by the OpenSSL Project | |
4431 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
4432 + * | |
4433 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
4434 + * endorse or promote products derived from this software without | |
4435 + * prior written permission. For written permission, please contact | |
4436 + * licensing@OpenSSL.org. | |
4437 + * | |
4438 + * 5. Products derived from this software may not be called "OpenSSL" | |
4439 + * nor may "OpenSSL" appear in their names without prior written | |
4440 + * permission of the OpenSSL Project. | |
4441 + * | |
4442 + * 6. Redistributions of any form whatsoever must retain the following | |
4443 + * acknowledgment: | |
4444 + * "This product includes software developed by the OpenSSL Project | |
4445 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
4446 + * | |
4447 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
4448 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
4449 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
4450 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
4451 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
4452 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
4453 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
4454 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
4455 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
4456 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
4457 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
4458 + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
4459 + * ==================================================================== | |
4460 + */ | |
4461 + | |
4462 +/* This implementation of poly1305 is by Andrew Moon | |
4463 + * (https://github.com/floodyberry/poly1305-donna) and released as public | |
4464 + * domain. It implements SIMD vectorization based on the algorithm described in | |
4465 + * http://cr.yp.to/papers.html#neoncrypto. Unrolled to 2 powers, i.e. 64 byte | |
4466 + * block size | |
4467 +*/ | |
4468 + | |
4469 +#include <emmintrin.h> | |
4470 +#include <stdint.h> | |
4471 +#include <openssl/opensslconf.h> | |
4472 + | |
4473 +#if !defined(OPENSSL_NO_POLY1305) | |
4474 + | |
4475 +#include <openssl/poly1305.h> | |
4476 + | |
4477 +#define ALIGN(x) __attribute__((aligned(x))) | |
4478 +#define INLINE inline | |
4479 +#define U8TO64_LE(m) (*(uint64_t*)(m)) | |
4480 +#define U8TO32_LE(m) (*(uint32_t*)(m)) | |
4481 +#define U64TO8_LE(m,v) (*(uint64_t*)(m)) = v | |
4482 + | |
4483 +typedef __m128i xmmi; | |
4484 +typedef unsigned __int128 uint128_t; | |
4485 + | |
4486 +static const uint32_t ALIGN(16) poly1305_x64_sse2_message_mask[4] = | |
4487 + {(1 << 26) - 1, 0, (1 << 26) - 1, 0}; | |
4488 +static const uint32_t ALIGN(16) poly1305_x64_sse2_5[4] = {5, 0, 5, 0}; | |
4489 +static const uint32_t ALIGN(16) poly1305_x64_sse2_1shl128[4] = | |
4490 + {(1 << 24), 0, (1 << 24), 0}; | |
4491 + | |
4492 +static uint128_t INLINE | |
4493 +add128(uint128_t a, uint128_t b) | |
4494 + { | |
4495 + return a + b; | |
4496 + } | |
4497 + | |
4498 +static uint128_t INLINE | |
4499 +add128_64(uint128_t a, uint64_t b) | |
4500 + { | |
4501 + return a + b; | |
4502 + } | |
4503 + | |
4504 +static uint128_t INLINE | |
4505 +mul64x64_128(uint64_t a, uint64_t b) | |
4506 + { | |
4507 + return (uint128_t)a * b; | |
4508 + } | |
4509 + | |
4510 +static uint64_t INLINE | |
4511 +lo128(uint128_t a) | |
4512 + { | |
4513 + return (uint64_t)a; | |
4514 + } | |
4515 + | |
4516 +static uint64_t INLINE | |
4517 +shr128(uint128_t v, const int shift) | |
4518 + { | |
4519 + return (uint64_t)(v >> shift); | |
4520 + } | |
4521 + | |
4522 +static uint64_t INLINE | |
4523 +shr128_pair(uint64_t hi, uint64_t lo, const int shift) | |
4524 + { | |
4525 + return (uint64_t)((((uint128_t)hi << 64) | lo) >> shift); | |
4526 + } | |
4527 + | |
4528 +typedef struct poly1305_power_t | |
4529 + { | |
4530 + union | |
4531 + { | |
4532 + xmmi v; | |
4533 + uint64_t u[2]; | |
4534 + uint32_t d[4]; | |
4535 + } R20,R21,R22,R23,R24,S21,S22,S23,S24; | |
4536 + } poly1305_power; | |
4537 + | |
4538 +typedef struct poly1305_state_internal_t | |
4539 + { | |
4540 + poly1305_power P[2]; /* 288 bytes, top 32 bit halves unused = 144 | |
4541 + bytes of free storage */ | |
4542 + union | |
4543 + { | |
4544 + xmmi H[5]; /* 80 bytes */ | |
4545 + uint64_t HH[10]; | |
4546 + }; | |
4547 + /* uint64_t r0,r1,r2; [24 bytes] */ | |
4548 + /* uint64_t pad0,pad1; [16 bytes] */ | |
4549 + uint64_t started; /* 8 bytes */ | |
4550 + uint64_t leftover; /* 8 bytes */ | |
4551 + uint8_t buffer[64]; /* 64 bytes */ | |
4552 + } poly1305_state_internal; /* 448 bytes total + 63 bytes for | |
4553 + alignment = 511 bytes raw */ | |
4554 + | |
4555 +static poly1305_state_internal INLINE | |
4556 +*poly1305_aligned_state(poly1305_state *state) | |
4557 + { | |
4558 + return (poly1305_state_internal *)(((uint64_t)state + 63) & ~63); | |
4559 + } | |
4560 + | |
4561 +/* copy 0-63 bytes */ | |
4562 +static void INLINE | |
4563 +poly1305_block_copy(uint8_t *dst, const uint8_t *src, size_t bytes) | |
4564 + { | |
4565 + size_t offset = src - dst; | |
4566 + if (bytes & 32) | |
4567 + { | |
4568 + _mm_storeu_si128((xmmi *)(dst + 0), _mm_loadu_si128((xmmi *)(dst
+ offset + 0))); | |
4569 + _mm_storeu_si128((xmmi *)(dst + 16), _mm_loadu_si128((xmmi *)(ds
t + offset + 16))); | |
4570 + dst += 32; | |
4571 + } | |
4572 + if (bytes & 16) | |
4573 + { | |
4574 + _mm_storeu_si128((xmmi *)dst, | |
4575 + _mm_loadu_si128((xmmi *)(dst + offset))); | |
4576 + dst += 16; | |
4577 + } | |
4578 + if (bytes & 8) | |
4579 + { | |
4580 + *(uint64_t *)dst = *(uint64_t *)(dst + offset); | |
4581 + dst += 8; | |
4582 + } | |
4583 + if (bytes & 4) | |
4584 + { | |
4585 + *(uint32_t *)dst = *(uint32_t *)(dst + offset); | |
4586 + dst += 4; | |
4587 + } | |
4588 + if (bytes & 2) | |
4589 + { | |
4590 + *(uint16_t *)dst = *(uint16_t *)(dst + offset); | |
4591 + dst += 2; | |
4592 + } | |
4593 + if (bytes & 1) | |
4594 + { | |
4595 + *( uint8_t *)dst = *( uint8_t *)(dst + offset); | |
4596 + } | |
4597 + } | |
4598 + | |
4599 +/* zero 0-15 bytes */ | |
4600 +static void INLINE | |
4601 +poly1305_block_zero(uint8_t *dst, size_t bytes) | |
4602 + { | |
4603 + if (bytes & 8) { *(uint64_t *)dst = 0; dst += 8; } | |
4604 + if (bytes & 4) { *(uint32_t *)dst = 0; dst += 4; } | |
4605 + if (bytes & 2) { *(uint16_t *)dst = 0; dst += 2; } | |
4606 + if (bytes & 1) { *( uint8_t *)dst = 0; } | |
4607 + } | |
4608 + | |
4609 +static size_t INLINE | |
4610 +poly1305_min(size_t a, size_t b) | |
4611 + { | |
4612 + return (a < b) ? a : b; | |
4613 + } | |
4614 + | |
4615 +void | |
4616 +CRYPTO_poly1305_init(poly1305_state *state, const unsigned char key[32]) | |
4617 + { | |
4618 + poly1305_state_internal *st = poly1305_aligned_state(state); | |
4619 + poly1305_power *p; | |
4620 + uint64_t r0,r1,r2; | |
4621 + uint64_t t0,t1; | |
4622 + | |
4623 + /* clamp key */ | |
4624 + t0 = U8TO64_LE(key + 0); | |
4625 + t1 = U8TO64_LE(key + 8); | |
4626 + r0 = t0 & 0xffc0fffffff; t0 >>= 44; t0 |= t1 << 20; | |
4627 + r1 = t0 & 0xfffffc0ffff; t1 >>= 24; | |
4628 + r2 = t1 & 0x00ffffffc0f; | |
4629 + | |
4630 + /* store r in un-used space of st->P[1] */ | |
4631 + p = &st->P[1]; | |
4632 + p->R20.d[1] = (uint32_t)(r0 ); | |
4633 + p->R20.d[3] = (uint32_t)(r0 >> 32); | |
4634 + p->R21.d[1] = (uint32_t)(r1 ); | |
4635 + p->R21.d[3] = (uint32_t)(r1 >> 32); | |
4636 + p->R22.d[1] = (uint32_t)(r2 ); | |
4637 + p->R22.d[3] = (uint32_t)(r2 >> 32); | |
4638 + | |
4639 + /* store pad */ | |
4640 + p->R23.d[1] = U8TO32_LE(key + 16); | |
4641 + p->R23.d[3] = U8TO32_LE(key + 20); | |
4642 + p->R24.d[1] = U8TO32_LE(key + 24); | |
4643 + p->R24.d[3] = U8TO32_LE(key + 28); | |
4644 + | |
4645 + /* H = 0 */ | |
4646 + st->H[0] = _mm_setzero_si128(); | |
4647 + st->H[1] = _mm_setzero_si128(); | |
4648 + st->H[2] = _mm_setzero_si128(); | |
4649 + st->H[3] = _mm_setzero_si128(); | |
4650 + st->H[4] = _mm_setzero_si128(); | |
4651 + | |
4652 + st->started = 0; | |
4653 + st->leftover = 0; | |
4654 + } | |
4655 + | |
4656 +static void | |
4657 +poly1305_first_block(poly1305_state_internal *st, const uint8_t *m) | |
4658 + { | |
4659 + const xmmi MMASK = | |
4660 + _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask); | |
4661 + const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5); | |
4662 + const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128); | |
4663 + xmmi T5,T6; | |
4664 + poly1305_power *p; | |
4665 + uint128_t d[3]; | |
4666 + uint64_t r0,r1,r2; | |
4667 + uint64_t r20,r21,r22,s22; | |
4668 + uint64_t pad0,pad1; | |
4669 + uint64_t c; | |
4670 + uint64_t i; | |
4671 + | |
4672 + /* pull out stored info */ | |
4673 + p = &st->P[1]; | |
4674 + | |
4675 + r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; | |
4676 + r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; | |
4677 + r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; | |
4678 + pad0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1]; | |
4679 + pad1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1]; | |
4680 + | |
4681 + /* compute powers r^2,r^4 */ | |
4682 + r20 = r0; | |
4683 + r21 = r1; | |
4684 + r22 = r2; | |
4685 + for (i = 0; i < 2; i++) | |
4686 + { | |
4687 + s22 = r22 * (5 << 2); | |
4688 + | |
4689 + d[0] = add128(mul64x64_128(r20, r20), mul64x64_128(r21 * 2, s22)
); | |
4690 + d[1] = add128(mul64x64_128(r22, s22), mul64x64_128(r20 * 2, r21)
); | |
4691 + d[2] = add128(mul64x64_128(r21, r21), mul64x64_128(r22 * 2, r20)
); | |
4692 + | |
4693 + r20 = lo128(d[0]) & 0xfffffffffff; c
= shr128(d[0], 44); | |
4694 + d[1] = add128_64(d[1], c); r21 = lo128(d[1]) & 0xfffffffffff; c
= shr128(d[1], 44); | |
4695 + d[2] = add128_64(d[2], c); r22 = lo128(d[2]) & 0x3ffffffffff; c
= shr128(d[2], 42); | |
4696 + r20 += c * 5; c = (r20 >> 44); r20 = r20 & 0xfffffffffff; | |
4697 + r21 += c; | |
4698 + | |
4699 + p->R20.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)( r20
) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); | |
4700 + p->R21.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r20 >
> 26) | (r21 << 18)) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); | |
4701 + p->R22.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >
> 8) ) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); | |
4702 + p->R23.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >
> 34) | (r22 << 10)) & 0x3ffffff), _MM_SHUFFLE(1,0,1,0)); | |
4703 + p->R24.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r22 >
> 16) ) ), _MM_SHUFFLE(1,0,1,0)); | |
4704 + p->S21.v = _mm_mul_epu32(p->R21.v, FIVE); | |
4705 + p->S22.v = _mm_mul_epu32(p->R22.v, FIVE); | |
4706 + p->S23.v = _mm_mul_epu32(p->R23.v, FIVE); | |
4707 + p->S24.v = _mm_mul_epu32(p->R24.v, FIVE); | |
4708 + p--; | |
4709 + } | |
4710 + | |
4711 + /* put saved info back */ | |
4712 + p = &st->P[1]; | |
4713 + p->R20.d[1] = (uint32_t)(r0 ); | |
4714 + p->R20.d[3] = (uint32_t)(r0 >> 32); | |
4715 + p->R21.d[1] = (uint32_t)(r1 ); | |
4716 + p->R21.d[3] = (uint32_t)(r1 >> 32); | |
4717 + p->R22.d[1] = (uint32_t)(r2 ); | |
4718 + p->R22.d[3] = (uint32_t)(r2 >> 32); | |
4719 + p->R23.d[1] = (uint32_t)(pad0 ); | |
4720 + p->R23.d[3] = (uint32_t)(pad0 >> 32); | |
4721 + p->R24.d[1] = (uint32_t)(pad1 ); | |
4722 + p->R24.d[3] = (uint32_t)(pad1 >> 32); | |
4723 + | |
4724 + /* H = [Mx,My] */ | |
4725 + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi6
4((xmmi *)(m + 16))); | |
4726 + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi6
4((xmmi *)(m + 24))); | |
4727 + st->H[0] = _mm_and_si128(MMASK, T5); | |
4728 + st->H[1] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
4729 + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); | |
4730 + st->H[2] = _mm_and_si128(MMASK, T5); | |
4731 + st->H[3] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
4732 + st->H[4] = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); | |
4733 + } | |
4734 + | |
4735 +static void | |
4736 +poly1305_blocks(poly1305_state_internal *st, const uint8_t *m, size_t bytes) | |
4737 + { | |
4738 + const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask
); | |
4739 + const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5); | |
4740 + const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128); | |
4741 + | |
4742 + poly1305_power *p; | |
4743 + xmmi H0,H1,H2,H3,H4; | |
4744 + xmmi T0,T1,T2,T3,T4,T5,T6; | |
4745 + xmmi M0,M1,M2,M3,M4; | |
4746 + xmmi C1,C2; | |
4747 + | |
4748 + H0 = st->H[0]; | |
4749 + H1 = st->H[1]; | |
4750 + H2 = st->H[2]; | |
4751 + H3 = st->H[3]; | |
4752 + H4 = st->H[4]; | |
4753 + | |
4754 + while (bytes >= 64) | |
4755 + { | |
4756 + /* H *= [r^4,r^4] */ | |
4757 + p = &st->P[0]; | |
4758 + T0 = _mm_mul_epu32(H0, p->R20.v); | |
4759 + T1 = _mm_mul_epu32(H0, p->R21.v); | |
4760 + T2 = _mm_mul_epu32(H0, p->R22.v); | |
4761 + T3 = _mm_mul_epu32(H0, p->R23.v); | |
4762 + T4 = _mm_mul_epu32(H0, p->R24.v); | |
4763 + T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
4764 + T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
4765 + T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
4766 + T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
4767 + T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
4768 + T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
4769 + T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
4770 + T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
4771 + T5 = _mm_mul_epu32(H1, p->R23.v);
T4 = _mm_add_epi64(T4, T5); | |
4772 + T5 = _mm_mul_epu32(H2, p->R22.v);
T4 = _mm_add_epi64(T4, T5); | |
4773 + T5 = _mm_mul_epu32(H3, p->R21.v);
T4 = _mm_add_epi64(T4, T5); | |
4774 + T5 = _mm_mul_epu32(H4, p->R20.v);
T4 = _mm_add_epi64(T4, T5); | |
4775 + | |
4776 + /* H += [Mx,My]*[r^2,r^2] */ | |
4777 + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_lo
adl_epi64((xmmi *)(m + 16))); | |
4778 + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_lo
adl_epi64((xmmi *)(m + 24))); | |
4779 + M0 = _mm_and_si128(MMASK, T5); | |
4780 + M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
4781 + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)
); | |
4782 + M2 = _mm_and_si128(MMASK, T5); | |
4783 + M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
4784 + M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); | |
4785 + | |
4786 + p = &st->P[1]; | |
4787 + T5 = _mm_mul_epu32(M0, p->R20.v); T6 = _mm_mul_epu32(M0, p->R21.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
4788 + T5 = _mm_mul_epu32(M1, p->S24.v); T6 = _mm_mul_epu32(M1, p->R20.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
4789 + T5 = _mm_mul_epu32(M2, p->S23.v); T6 = _mm_mul_epu32(M2, p->S24.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
4790 + T5 = _mm_mul_epu32(M3, p->S22.v); T6 = _mm_mul_epu32(M3, p->S23.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
4791 + T5 = _mm_mul_epu32(M4, p->S21.v); T6 = _mm_mul_epu32(M4, p->S22.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
4792 + T5 = _mm_mul_epu32(M0, p->R22.v); T6 = _mm_mul_epu32(M0, p->R23.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
4793 + T5 = _mm_mul_epu32(M1, p->R21.v); T6 = _mm_mul_epu32(M1, p->R22.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
4794 + T5 = _mm_mul_epu32(M2, p->R20.v); T6 = _mm_mul_epu32(M2, p->R21.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
4795 + T5 = _mm_mul_epu32(M3, p->S24.v); T6 = _mm_mul_epu32(M3, p->R20.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
4796 + T5 = _mm_mul_epu32(M4, p->S23.v); T6 = _mm_mul_epu32(M4, p->S24.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
4797 + T5 = _mm_mul_epu32(M0, p->R24.v);
T4 = _mm_add_epi64(T4, T5); | |
4798 + T5 = _mm_mul_epu32(M1, p->R23.v);
T4 = _mm_add_epi64(T4, T5); | |
4799 + T5 = _mm_mul_epu32(M2, p->R22.v);
T4 = _mm_add_epi64(T4, T5); | |
4800 + T5 = _mm_mul_epu32(M3, p->R21.v);
T4 = _mm_add_epi64(T4, T5); | |
4801 + T5 = _mm_mul_epu32(M4, p->R20.v);
T4 = _mm_add_epi64(T4, T5); | |
4802 + | |
4803 + /* H += [Mx,My] */ | |
4804 + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 32)), _mm_l
oadl_epi64((xmmi *)(m + 48))); | |
4805 + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 40)), _mm_l
oadl_epi64((xmmi *)(m + 56))); | |
4806 + M0 = _mm_and_si128(MMASK, T5); | |
4807 + M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
4808 + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)
); | |
4809 + M2 = _mm_and_si128(MMASK, T5); | |
4810 + M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
4811 + M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); | |
4812 + | |
4813 + T0 = _mm_add_epi64(T0, M0); | |
4814 + T1 = _mm_add_epi64(T1, M1); | |
4815 + T2 = _mm_add_epi64(T2, M2); | |
4816 + T3 = _mm_add_epi64(T3, M3); | |
4817 + T4 = _mm_add_epi64(T4, M4); | |
4818 + | |
4819 + /* reduce */ | |
4820 + C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _
mm_and_si128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C
1); T4 = _mm_add_epi64(T4, C2); | |
4821 + C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _
mm_and_si128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C
1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); | |
4822 + C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _
mm_and_si128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C
1); T1 = _mm_add_epi64(T1, C2); | |
4823 + C1 = _mm_srli_epi64(T3, 26); T3 = _
mm_and_si128(T3, MMASK); T4 = _mm_add_epi64(T4, C
1); | |
4824 + | |
4825 + /* H = (H*[r^4,r^4] + [Mx,My]*[r^2,r^2] + [Mx,My]) */ | |
4826 + H0 = T0; | |
4827 + H1 = T1; | |
4828 + H2 = T2; | |
4829 + H3 = T3; | |
4830 + H4 = T4; | |
4831 + | |
4832 + m += 64; | |
4833 + bytes -= 64; | |
4834 + } | |
4835 + | |
4836 + st->H[0] = H0; | |
4837 + st->H[1] = H1; | |
4838 + st->H[2] = H2; | |
4839 + st->H[3] = H3; | |
4840 + st->H[4] = H4; | |
4841 + } | |
4842 + | |
4843 +static size_t | |
4844 +poly1305_combine(poly1305_state_internal *st, const uint8_t *m, size_t bytes) | |
4845 + { | |
4846 + const xmmi MMASK = | |
4847 + _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask); | |
4848 + const xmmi HIBIT = _mm_load_si128((xmmi*)poly1305_x64_sse2_1shl128); | |
4849 + const xmmi FIVE = _mm_load_si128((xmmi*)poly1305_x64_sse2_5); | |
4850 + | |
4851 + poly1305_power *p; | |
4852 + xmmi H0,H1,H2,H3,H4; | |
4853 + xmmi M0,M1,M2,M3,M4; | |
4854 + xmmi T0,T1,T2,T3,T4,T5,T6; | |
4855 + xmmi C1,C2; | |
4856 + | |
4857 + uint64_t r0,r1,r2; | |
4858 + uint64_t t0,t1,t2,t3,t4; | |
4859 + uint64_t c; | |
4860 + size_t consumed = 0; | |
4861 + | |
4862 + H0 = st->H[0]; | |
4863 + H1 = st->H[1]; | |
4864 + H2 = st->H[2]; | |
4865 + H3 = st->H[3]; | |
4866 + H4 = st->H[4]; | |
4867 + | |
4868 + /* p = [r^2,r^2] */ | |
4869 + p = &st->P[1]; | |
4870 + | |
4871 + if (bytes >= 32) | |
4872 + { | |
4873 + /* H *= [r^2,r^2] */ | |
4874 + T0 = _mm_mul_epu32(H0, p->R20.v); | |
4875 + T1 = _mm_mul_epu32(H0, p->R21.v); | |
4876 + T2 = _mm_mul_epu32(H0, p->R22.v); | |
4877 + T3 = _mm_mul_epu32(H0, p->R23.v); | |
4878 + T4 = _mm_mul_epu32(H0, p->R24.v); | |
4879 + T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
4880 + T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
4881 + T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
4882 + T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22.
v); T0 = _mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
4883 + T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
4884 + T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
4885 + T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
4886 + T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24.
v); T2 = _mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
4887 + T5 = _mm_mul_epu32(H1, p->R23.v);
T4 = _mm_add_epi64(T4, T5); | |
4888 + T5 = _mm_mul_epu32(H2, p->R22.v);
T4 = _mm_add_epi64(T4, T5); | |
4889 + T5 = _mm_mul_epu32(H3, p->R21.v);
T4 = _mm_add_epi64(T4, T5); | |
4890 + T5 = _mm_mul_epu32(H4, p->R20.v);
T4 = _mm_add_epi64(T4, T5); | |
4891 + | |
4892 + /* H += [Mx,My] */ | |
4893 + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_lo
adl_epi64((xmmi *)(m + 16))); | |
4894 + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_lo
adl_epi64((xmmi *)(m + 24))); | |
4895 + M0 = _mm_and_si128(MMASK, T5); | |
4896 + M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
4897 + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)
); | |
4898 + M2 = _mm_and_si128(MMASK, T5); | |
4899 + M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); | |
4900 + M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); | |
4901 + | |
4902 + T0 = _mm_add_epi64(T0, M0); | |
4903 + T1 = _mm_add_epi64(T1, M1); | |
4904 + T2 = _mm_add_epi64(T2, M2); | |
4905 + T3 = _mm_add_epi64(T3, M3); | |
4906 + T4 = _mm_add_epi64(T4, M4); | |
4907 + | |
4908 + /* reduce */ | |
4909 + C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _
mm_and_si128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C
1); T4 = _mm_add_epi64(T4, C2); | |
4910 + C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _
mm_and_si128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C
1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); | |
4911 + C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _
mm_and_si128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C
1); T1 = _mm_add_epi64(T1, C2); | |
4912 + C1 = _mm_srli_epi64(T3, 26); T3 = _
mm_and_si128(T3, MMASK); T4 = _mm_add_epi64(T4, C
1); | |
4913 + | |
4914 + /* H = (H*[r^2,r^2] + [Mx,My]) */ | |
4915 + H0 = T0; | |
4916 + H1 = T1; | |
4917 + H2 = T2; | |
4918 + H3 = T3; | |
4919 + H4 = T4; | |
4920 + | |
4921 + consumed = 32; | |
4922 + } | |
4923 + | |
4924 + /* finalize, H *= [r^2,r] */ | |
4925 + r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; | |
4926 + r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; | |
4927 + r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; | |
4928 + | |
4929 + p->R20.d[2] = (uint32_t)( r0 ) & 0x3ffffff; | |
4930 + p->R21.d[2] = (uint32_t)((r0 >> 26) | (r1 << 18)) & 0x3ffffff; | |
4931 + p->R22.d[2] = (uint32_t)((r1 >> 8) ) & 0x3ffffff; | |
4932 + p->R23.d[2] = (uint32_t)((r1 >> 34) | (r2 << 10)) & 0x3ffffff; | |
4933 + p->R24.d[2] = (uint32_t)((r2 >> 16) ) ; | |
4934 + p->S21.d[2] = p->R21.d[2] * 5; | |
4935 + p->S22.d[2] = p->R22.d[2] * 5; | |
4936 + p->S23.d[2] = p->R23.d[2] * 5; | |
4937 + p->S24.d[2] = p->R24.d[2] * 5; | |
4938 + | |
4939 + /* H *= [r^2,r] */ | |
4940 + T0 = _mm_mul_epu32(H0, p->R20.v); | |
4941 + T1 = _mm_mul_epu32(H0, p->R21.v); | |
4942 + T2 = _mm_mul_epu32(H0, p->R22.v); | |
4943 + T3 = _mm_mul_epu32(H0, p->R23.v); | |
4944 + T4 = _mm_mul_epu32(H0, p->R24.v); | |
4945 + T5 = _mm_mul_epu32(H1, p->S24.v); T6 = _mm_mul_epu32(H1, p->R20.v); T0 =
_mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
4946 + T5 = _mm_mul_epu32(H2, p->S23.v); T6 = _mm_mul_epu32(H2, p->S24.v); T0 =
_mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
4947 + T5 = _mm_mul_epu32(H3, p->S22.v); T6 = _mm_mul_epu32(H3, p->S23.v); T0 =
_mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
4948 + T5 = _mm_mul_epu32(H4, p->S21.v); T6 = _mm_mul_epu32(H4, p->S22.v); T0 =
_mm_add_epi64(T0, T5); T1 = _mm_add_epi64(T1, T6); | |
4949 + T5 = _mm_mul_epu32(H1, p->R21.v); T6 = _mm_mul_epu32(H1, p->R22.v); T2 =
_mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
4950 + T5 = _mm_mul_epu32(H2, p->R20.v); T6 = _mm_mul_epu32(H2, p->R21.v); T2 =
_mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
4951 + T5 = _mm_mul_epu32(H3, p->S24.v); T6 = _mm_mul_epu32(H3, p->R20.v); T2 =
_mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
4952 + T5 = _mm_mul_epu32(H4, p->S23.v); T6 = _mm_mul_epu32(H4, p->S24.v); T2 =
_mm_add_epi64(T2, T5); T3 = _mm_add_epi64(T3, T6); | |
4953 + T5 = _mm_mul_epu32(H1, p->R23.v); T4 =
_mm_add_epi64(T4, T5); | |
4954 + T5 = _mm_mul_epu32(H2, p->R22.v); T4 =
_mm_add_epi64(T4, T5); | |
4955 + T5 = _mm_mul_epu32(H3, p->R21.v); T4 =
_mm_add_epi64(T4, T5); | |
4956 + T5 = _mm_mul_epu32(H4, p->R20.v); T4 =
_mm_add_epi64(T4, T5); | |
4957 + | |
4958 + C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _mm_and_s
i128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C1); T4 =
_mm_add_epi64(T4, C2); | |
4959 + C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _mm_and_s
i128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C1); T0 =
_mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); | |
4960 + C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _mm_and_s
i128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C1); T1 =
_mm_add_epi64(T1, C2); | |
4961 + C1 = _mm_srli_epi64(T3, 26); T3 = _mm_and_s
i128(T3, MMASK); T4 = _mm_add_epi64(T4, C1); | |
4962 + | |
4963 + /* H = H[0]+H[1] */ | |
4964 + H0 = _mm_add_epi64(T0, _mm_srli_si128(T0, 8)); | |
4965 + H1 = _mm_add_epi64(T1, _mm_srli_si128(T1, 8)); | |
4966 + H2 = _mm_add_epi64(T2, _mm_srli_si128(T2, 8)); | |
4967 + H3 = _mm_add_epi64(T3, _mm_srli_si128(T3, 8)); | |
4968 + H4 = _mm_add_epi64(T4, _mm_srli_si128(T4, 8)); | |
4969 + | |
4970 + t0 = _mm_cvtsi128_si32(H0) ; c = (t0 >> 26); t0 &= 0x3ffffff; | |
4971 + t1 = _mm_cvtsi128_si32(H1) + c; c = (t1 >> 26); t1 &= 0x3ffffff; | |
4972 + t2 = _mm_cvtsi128_si32(H2) + c; c = (t2 >> 26); t2 &= 0x3ffffff; | |
4973 + t3 = _mm_cvtsi128_si32(H3) + c; c = (t3 >> 26); t3 &= 0x3ffffff; | |
4974 + t4 = _mm_cvtsi128_si32(H4) + c; c = (t4 >> 26); t4 &= 0x3ffffff; | |
4975 + t0 = t0 + (c * 5); c = (t0 >> 26); t0 &= 0x3ffffff; | |
4976 + t1 = t1 + c; | |
4977 + | |
4978 + st->HH[0] = ((t0 ) | (t1 << 26) ) & 0xfffffffffffull; | |
4979 + st->HH[1] = ((t1 >> 18) | (t2 << 8) | (t3 << 34)) & 0xfffffffffffull; | |
4980 + st->HH[2] = ((t3 >> 10) | (t4 << 16) ) & 0x3ffffffffffull; | |
4981 + | |
4982 + return consumed; | |
4983 + } | |
4984 + | |
4985 +void | |
4986 +CRYPTO_poly1305_update(poly1305_state *state, const unsigned char *m, | |
4987 + size_t bytes) | |
4988 + { | |
4989 + poly1305_state_internal *st = poly1305_aligned_state(state); | |
4990 + size_t want; | |
4991 + | |
4992 + /* need at least 32 initial bytes to start the accelerated branch */ | |
4993 + if (!st->started) | |
4994 + { | |
4995 + if ((st->leftover == 0) && (bytes > 32)) | |
4996 + { | |
4997 + poly1305_first_block(st, m); | |
4998 + m += 32; | |
4999 + bytes -= 32; | |
5000 + } | |
5001 + else | |
5002 + { | |
5003 + want = poly1305_min(32 - st->leftover, bytes); | |
5004 + poly1305_block_copy(st->buffer + st->leftover, m, want); | |
5005 + bytes -= want; | |
5006 + m += want; | |
5007 + st->leftover += want; | |
5008 + if ((st->leftover < 32) || (bytes == 0)) | |
5009 + return; | |
5010 + poly1305_first_block(st, st->buffer); | |
5011 + st->leftover = 0; | |
5012 + } | |
5013 + st->started = 1; | |
5014 + } | |
5015 + | |
5016 + /* handle leftover */ | |
5017 + if (st->leftover) | |
5018 + { | |
5019 + want = poly1305_min(64 - st->leftover, bytes); | |
5020 + poly1305_block_copy(st->buffer + st->leftover, m, want); | |
5021 + bytes -= want; | |
5022 + m += want; | |
5023 + st->leftover += want; | |
5024 + if (st->leftover < 64) | |
5025 + return; | |
5026 + poly1305_blocks(st, st->buffer, 64); | |
5027 + st->leftover = 0; | |
5028 + } | |
5029 + | |
5030 + /* process 64 byte blocks */ | |
5031 + if (bytes >= 64) | |
5032 + { | |
5033 + want = (bytes & ~63); | |
5034 + poly1305_blocks(st, m, want); | |
5035 + m += want; | |
5036 + bytes -= want; | |
5037 + } | |
5038 + | |
5039 + if (bytes) | |
5040 + { | |
5041 + poly1305_block_copy(st->buffer + st->leftover, m, bytes); | |
5042 + st->leftover += bytes; | |
5043 + } | |
5044 + } | |
5045 + | |
5046 +void | |
5047 +CRYPTO_poly1305_finish(poly1305_state *state, unsigned char mac[16]) | |
5048 + { | |
5049 + poly1305_state_internal *st = poly1305_aligned_state(state); | |
5050 + size_t leftover = st->leftover; | |
5051 + uint8_t *m = st->buffer; | |
5052 + uint128_t d[3]; | |
5053 + uint64_t h0,h1,h2; | |
5054 + uint64_t t0,t1; | |
5055 + uint64_t g0,g1,g2,c,nc; | |
5056 + uint64_t r0,r1,r2,s1,s2; | |
5057 + poly1305_power *p; | |
5058 + | |
5059 + if (st->started) | |
5060 + { | |
5061 + size_t consumed = poly1305_combine(st, m, leftover); | |
5062 + leftover -= consumed; | |
5063 + m += consumed; | |
5064 + } | |
5065 + | |
5066 + /* st->HH will either be 0 or have the combined result */ | |
5067 + h0 = st->HH[0]; | |
5068 + h1 = st->HH[1]; | |
5069 + h2 = st->HH[2]; | |
5070 + | |
5071 + p = &st->P[1]; | |
5072 + r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; | |
5073 + r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; | |
5074 + r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; | |
5075 + s1 = r1 * (5 << 2); | |
5076 + s2 = r2 * (5 << 2); | |
5077 + | |
5078 + if (leftover < 16) | |
5079 + goto poly1305_donna_atmost15bytes; | |
5080 + | |
5081 +poly1305_donna_atleast16bytes: | |
5082 + t0 = U8TO64_LE(m + 0); | |
5083 + t1 = U8TO64_LE(m + 8); | |
5084 + h0 += t0 & 0xfffffffffff; | |
5085 + t0 = shr128_pair(t1, t0, 44); | |
5086 + h1 += t0 & 0xfffffffffff; | |
5087 + h2 += (t1 >> 24) | ((uint64_t)1 << 40); | |
5088 + | |
5089 +poly1305_donna_mul: | |
5090 + d[0] = add128(add128(mul64x64_128(h0, r0), mul64x64_128(h1, s2)), mul64x
64_128(h2, s1)); | |
5091 + d[1] = add128(add128(mul64x64_128(h0, r1), mul64x64_128(h1, r0)), mul64x
64_128(h2, s2)); | |
5092 + d[2] = add128(add128(mul64x64_128(h0, r2), mul64x64_128(h1, r1)), mul64x
64_128(h2, r0)); | |
5093 + h0 = lo128(d[0]) & 0xfffffffffff; c = shr128(
d[0], 44); | |
5094 + d[1] = add128_64(d[1], c); h1 = lo128(d[1]) & 0xfffffffffff; c = shr128(
d[1], 44); | |
5095 + d[2] = add128_64(d[2], c); h2 = lo128(d[2]) & 0x3ffffffffff; c = shr128(
d[2], 42); | |
5096 + h0 += c * 5; | |
5097 + | |
5098 + m += 16; | |
5099 + leftover -= 16; | |
5100 + if (leftover >= 16) goto poly1305_donna_atleast16bytes; | |
5101 + | |
5102 + /* final bytes */ | |
5103 +poly1305_donna_atmost15bytes: | |
5104 + if (!leftover) goto poly1305_donna_finish; | |
5105 + | |
5106 + m[leftover++] = 1; | |
5107 + poly1305_block_zero(m + leftover, 16 - leftover); | |
5108 + leftover = 16; | |
5109 + | |
5110 + t0 = U8TO64_LE(m+0); | |
5111 + t1 = U8TO64_LE(m+8); | |
5112 + h0 += t0 & 0xfffffffffff; t0 = shr128_pair(t1, t0, 44); | |
5113 + h1 += t0 & 0xfffffffffff; | |
5114 + h2 += (t1 >> 24); | |
5115 + | |
5116 + goto poly1305_donna_mul; | |
5117 + | |
5118 +poly1305_donna_finish: | |
5119 + c = (h0 >> 44); h0 &= 0xfffffffffff; | |
5120 + h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff; | |
5121 + h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff; | |
5122 + h0 += c * 5; | |
5123 + | |
5124 + g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff; | |
5125 + g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff; | |
5126 + g2 = h2 + c - ((uint64_t)1 << 42); | |
5127 + | |
5128 + c = (g2 >> 63) - 1; | |
5129 + nc = ~c; | |
5130 + h0 = (h0 & nc) | (g0 & c); | |
5131 + h1 = (h1 & nc) | (g1 & c); | |
5132 + h2 = (h2 & nc) | (g2 & c); | |
5133 + | |
5134 + /* pad */ | |
5135 + t0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1]; | |
5136 + t1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1]; | |
5137 + h0 += (t0 & 0xfffffffffff) ; c = (h0 >> 44); h0 &= 0xfffffffffff; t0
= shr128_pair(t1, t0, 44); | |
5138 + h1 += (t0 & 0xfffffffffff) + c; c = (h1 >> 44); h1 &= 0xfffffffffff; t1
= (t1 >> 24); | |
5139 + h2 += (t1 ) + c; | |
5140 + | |
5141 + U64TO8_LE(mac + 0, ((h0 ) | (h1 << 44))); | |
5142 + U64TO8_LE(mac + 8, ((h1 >> 20) | (h2 << 24))); | |
5143 + } | |
5144 + | |
5145 +#endif /* !OPENSSL_NO_POLY1305 */ | |
5146 diff --git a/crypto/poly1305/poly1305test.c b/crypto/poly1305/poly1305test.c | |
5147 new file mode 100644 | |
5148 index 0000000..8dd26af | |
5149 --- /dev/null | |
5150 +++ b/crypto/poly1305/poly1305test.c | |
5151 @@ -0,0 +1,166 @@ | |
5152 +/* ==================================================================== | |
5153 + * Copyright (c) 2011-2013 The OpenSSL Project. All rights reserved. | |
5154 + * | |
5155 + * Redistribution and use in source and binary forms, with or without | |
5156 + * modification, are permitted provided that the following conditions | |
5157 + * are met: | |
5158 + * | |
5159 + * 1. Redistributions of source code must retain the above copyright | |
5160 + * notice, this list of conditions and the following disclaimer. | |
5161 + * | |
5162 + * 2. Redistributions in binary form must reproduce the above copyright | |
5163 + * notice, this list of conditions and the following disclaimer in | |
5164 + * the documentation and/or other materials provided with the | |
5165 + * distribution. | |
5166 + * | |
5167 + * 3. All advertising materials mentioning features or use of this | |
5168 + * software must display the following acknowledgment: | |
5169 + * "This product includes software developed by the OpenSSL Project | |
5170 + * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
5171 + * | |
5172 + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
5173 + * endorse or promote products derived from this software without | |
5174 + * prior written permission. For written permission, please contact | |
5175 + * licensing@OpenSSL.org. | |
5176 + * | |
5177 + * 5. Products derived from this software may not be called "OpenSSL" | |
5178 + * nor may "OpenSSL" appear in their names without prior written | |
5179 + * permission of the OpenSSL Project. | |
5180 + * | |
5181 + * 6. Redistributions of any form whatsoever must retain the following | |
5182 + * acknowledgment: | |
5183 + * "This product includes software developed by the OpenSSL Project | |
5184 + * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
5185 + * | |
5186 + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
5187 + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
5188 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
5189 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
5190 + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
5191 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
5192 + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
5193 + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
5194 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
5195 + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
5196 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
5197 + * OF THE POSSIBILITY OF SUCH DAMAGE. | |
5198 + * ==================================================================== | |
5199 + */ | |
5200 + | |
5201 +#include <stdio.h> | |
5202 +#include <stdlib.h> | |
5203 +#include <string.h> | |
5204 + | |
5205 +#include <openssl/poly1305.h> | |
5206 + | |
5207 +struct poly1305_test | |
5208 + { | |
5209 + const char *inputhex; | |
5210 + const char *keyhex; | |
5211 + const char *outhex; | |
5212 + }; | |
5213 + | |
5214 +static const struct poly1305_test poly1305_tests[] = { | |
5215 + { | |
5216 + "", | |
5217 + "c8afaac331ee372cd6082de134943b174710130e9f6fea8d72293850a667d86
c", | |
5218 + "4710130e9f6fea8d72293850a667d86c", | |
5219 + }, | |
5220 + { | |
5221 + "48656c6c6f20776f726c6421", | |
5222 + "746869732069732033322d62797465206b657920666f7220506f6c793133303
5", | |
5223 + "a6f745008f81c916a20dcc74eef2b2f0", | |
5224 + }, | |
5225 + { | |
5226 + "000000000000000000000000000000000000000000000000000000000000000
0", | |
5227 + "746869732069732033322d62797465206b657920666f7220506f6c793133303
5", | |
5228 + "49ec78090e481ec6c26b33b91ccc0307", | |
5229 + }, | |
5230 + { | |
5231 + "000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000000000000", | |
5232 + "746869732069732033322d62797465206b657920666f7220506f6c793133303
5", | |
5233 + "da84bcab02676c38cdb015604274c2aa", | |
5234 + }, | |
5235 +}; | |
5236 + | |
5237 +static unsigned char hex_digit(char h) | |
5238 + { | |
5239 + if (h >= '0' && h <= '9') | |
5240 + return h - '0'; | |
5241 + else if (h >= 'a' && h <= 'f') | |
5242 + return h - 'a' + 10; | |
5243 + else if (h >= 'A' && h <= 'F') | |
5244 + return h - 'A' + 10; | |
5245 + else | |
5246 + abort(); | |
5247 + } | |
5248 + | |
5249 +static void hex_decode(unsigned char *out, const char* hex) | |
5250 + { | |
5251 + size_t j = 0; | |
5252 + | |
5253 + while (*hex != 0) | |
5254 + { | |
5255 + unsigned char v = hex_digit(*hex++); | |
5256 + v <<= 4; | |
5257 + v |= hex_digit(*hex++); | |
5258 + out[j++] = v; | |
5259 + } | |
5260 + } | |
5261 + | |
5262 +static void hexdump(unsigned char *a, size_t len) | |
5263 + { | |
5264 + size_t i; | |
5265 + | |
5266 + for (i = 0; i < len; i++) | |
5267 + printf("%02x", a[i]); | |
5268 + } | |
5269 + | |
5270 +int main() | |
5271 + { | |
5272 + static const unsigned num_tests = | |
5273 + sizeof(poly1305_tests) / sizeof(struct poly1305_test); | |
5274 + unsigned i; | |
5275 + unsigned char key[32], out[16], expected[16]; | |
5276 + poly1305_state poly1305; | |
5277 + | |
5278 + for (i = 0; i < num_tests; i++) | |
5279 + { | |
5280 + const struct poly1305_test *test = &poly1305_tests[i]; | |
5281 + unsigned char *in; | |
5282 + size_t inlen = strlen(test->inputhex); | |
5283 + | |
5284 + if (strlen(test->keyhex) != sizeof(key)*2 || | |
5285 + strlen(test->outhex) != sizeof(out)*2 || | |
5286 + (inlen & 1) == 1) | |
5287 + return 1; | |
5288 + | |
5289 + inlen /= 2; | |
5290 + | |
5291 + hex_decode(key, test->keyhex); | |
5292 + hex_decode(expected, test->outhex); | |
5293 + | |
5294 + in = malloc(inlen); | |
5295 + | |
5296 + hex_decode(in, test->inputhex); | |
5297 + CRYPTO_poly1305_init(&poly1305, key); | |
5298 + CRYPTO_poly1305_update(&poly1305, in, inlen); | |
5299 + CRYPTO_poly1305_finish(&poly1305, out); | |
5300 + | |
5301 + if (memcmp(out, expected, sizeof(expected)) != 0) | |
5302 + { | |
5303 + printf("Poly1305 test #%d failed.\n", i); | |
5304 + printf("got: "); | |
5305 + hexdump(out, sizeof(out)); | |
5306 + printf("\nexpected: "); | |
5307 + hexdump(expected, sizeof(expected)); | |
5308 + printf("\n"); | |
5309 + return 1; | |
5310 + } | |
5311 + | |
5312 + free(in); | |
5313 + } | |
5314 + | |
5315 + printf("PASS\n"); | |
5316 + return 0; | |
5317 + } | |
5318 diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c | |
5319 index 75b6560..a042b8d 100644 | |
5320 --- a/ssl/s3_lib.c | |
5321 +++ b/ssl/s3_lib.c | |
5322 @@ -1841,7 +1841,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
5323 SSL_AEAD, | |
5324 SSL_TLSV1_2, | |
5325 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
5326 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
5327 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
5328 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
5329 128, | |
5330 128, | |
5331 }, | |
5332 @@ -1873,7 +1874,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
5333 SSL_AEAD, | |
5334 SSL_TLSV1_2, | |
5335 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
5336 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
5337 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
5338 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
5339 128, | |
5340 128, | |
5341 }, | |
5342 @@ -1905,7 +1907,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
5343 SSL_AEAD, | |
5344 SSL_TLSV1_2, | |
5345 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
5346 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
5347 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
5348 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
5349 128, | |
5350 128, | |
5351 }, | |
5352 @@ -1937,7 +1940,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
5353 SSL_AEAD, | |
5354 SSL_TLSV1_2, | |
5355 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
5356 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
5357 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
5358 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
5359 128, | |
5360 128, | |
5361 }, | |
5362 @@ -1969,7 +1973,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
5363 SSL_AEAD, | |
5364 SSL_TLSV1_2, | |
5365 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
5366 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
5367 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
5368 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
5369 128, | |
5370 128, | |
5371 }, | |
5372 @@ -2001,7 +2006,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
5373 SSL_AEAD, | |
5374 SSL_TLSV1_2, | |
5375 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
5376 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
5377 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
5378 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
5379 128, | |
5380 128, | |
5381 }, | |
5382 @@ -2714,7 +2720,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
5383 SSL_AEAD, | |
5384 SSL_TLSV1_2, | |
5385 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
5386 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
5387 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
5388 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
5389 128, | |
5390 128, | |
5391 }, | |
5392 @@ -2746,7 +2753,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
5393 SSL_AEAD, | |
5394 SSL_TLSV1_2, | |
5395 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
5396 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
5397 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
5398 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
5399 128, | |
5400 128, | |
5401 }, | |
5402 @@ -2778,7 +2786,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
5403 SSL_AEAD, | |
5404 SSL_TLSV1_2, | |
5405 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
5406 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
5407 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
5408 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
5409 128, | |
5410 128, | |
5411 }, | |
5412 @@ -2810,7 +2819,8 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
5413 SSL_AEAD, | |
5414 SSL_TLSV1_2, | |
5415 SSL_NOT_EXP|SSL_HIGH|SSL_FIPS, | |
5416 - SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4), | |
5417 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(4)| | |
5418 + SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD, | |
5419 128, | |
5420 128, | |
5421 }, | |
5422 @@ -2894,6 +2904,51 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[]={ | |
5423 }, | |
5424 #endif | |
5425 | |
5426 + { | |
5427 + 1, | |
5428 + TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305, | |
5429 + TLS1_CK_ECDHE_RSA_CHACHA20_POLY1305, | |
5430 + SSL_kEECDH, | |
5431 + SSL_aRSA, | |
5432 + SSL_CHACHA20POLY1305, | |
5433 + SSL_AEAD, | |
5434 + SSL_TLSV1_2, | |
5435 + SSL_NOT_EXP|SSL_HIGH, | |
5436 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(0), | |
5437 + 256, | |
5438 + 0, | |
5439 + }, | |
5440 + | |
5441 + { | |
5442 + 1, | |
5443 + TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, | |
5444 + TLS1_CK_ECDHE_ECDSA_CHACHA20_POLY1305, | |
5445 + SSL_kEECDH, | |
5446 + SSL_aECDSA, | |
5447 + SSL_CHACHA20POLY1305, | |
5448 + SSL_AEAD, | |
5449 + SSL_TLSV1_2, | |
5450 + SSL_NOT_EXP|SSL_HIGH, | |
5451 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(0), | |
5452 + 256, | |
5453 + 0, | |
5454 + }, | |
5455 + | |
5456 + { | |
5457 + 1, | |
5458 + TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305, | |
5459 + TLS1_CK_DHE_RSA_CHACHA20_POLY1305, | |
5460 + SSL_kEDH, | |
5461 + SSL_aRSA, | |
5462 + SSL_CHACHA20POLY1305, | |
5463 + SSL_AEAD, | |
5464 + SSL_TLSV1_2, | |
5465 + SSL_NOT_EXP|SSL_HIGH, | |
5466 + SSL_HANDSHAKE_MAC_SHA256|TLS1_PRF_SHA256|SSL_CIPHER_ALGORITHM2_AEAD|FIXE
D_NONCE_LEN(0), | |
5467 + 256, | |
5468 + 0, | |
5469 + }, | |
5470 + | |
5471 /* end of list */ | |
5472 }; | |
5473 | |
5474 diff --git a/ssl/s3_pkt.c b/ssl/s3_pkt.c | |
5475 index 5038f6c..04b474d 100644 | |
5476 --- a/ssl/s3_pkt.c | |
5477 +++ b/ssl/s3_pkt.c | |
5478 @@ -790,8 +790,11 @@ static int do_ssl3_write(SSL *s, int type, const unsigned c
har *buf, | |
5479 else | |
5480 eivlen = 0; | |
5481 } | |
5482 - else if (s->aead_write_ctx != NULL) | |
5483 + else if (s->aead_write_ctx != NULL && | |
5484 + s->aead_write_ctx->variable_nonce_included_in_record) | |
5485 + { | |
5486 eivlen = s->aead_write_ctx->variable_nonce_len; | |
5487 + } | |
5488 else | |
5489 eivlen = 0; | |
5490 | |
5491 diff --git a/ssl/ssl.h b/ssl/ssl.h | |
5492 index 0644cbf..d782a98 100644 | |
5493 --- a/ssl/ssl.h | |
5494 +++ b/ssl/ssl.h | |
5495 @@ -291,6 +291,7 @@ extern "C" { | |
5496 #define SSL_TXT_CAMELLIA128 "CAMELLIA128" | |
5497 #define SSL_TXT_CAMELLIA256 "CAMELLIA256" | |
5498 #define SSL_TXT_CAMELLIA "CAMELLIA" | |
5499 +#define SSL_TXT_CHACHA20 "CHACHA20" | |
5500 | |
5501 #define SSL_TXT_MD5 "MD5" | |
5502 #define SSL_TXT_SHA1 "SHA1" | |
5503 diff --git a/ssl/ssl_ciph.c b/ssl/ssl_ciph.c | |
5504 index 7e780cd..b6370bd 100644 | |
5505 --- a/ssl/ssl_ciph.c | |
5506 +++ b/ssl/ssl_ciph.c | |
5507 @@ -298,6 +298,7 @@ static const SSL_CIPHER cipher_aliases[]={ | |
5508 {0,SSL_TXT_CAMELLIA128,0,0,0,SSL_CAMELLIA128,0,0,0,0,0,0}, | |
5509 {0,SSL_TXT_CAMELLIA256,0,0,0,SSL_CAMELLIA256,0,0,0,0,0,0}, | |
5510 {0,SSL_TXT_CAMELLIA ,0,0,0,SSL_CAMELLIA128|SSL_CAMELLIA256,0,0,0,0,0,0
}, | |
5511 + {0,SSL_TXT_CHACHA20 ,0,0,0,SSL_CHACHA20POLY1305,0,0,0,0,0,0}, | |
5512 | |
5513 /* MAC aliases */ | |
5514 {0,SSL_TXT_MD5,0, 0,0,0,SSL_MD5, 0,0,0,0,0}, | |
5515 @@ -523,9 +524,15 @@ int ssl_cipher_get_evp_aead(const SSL_SESSION *s, const EVP
_AEAD **aead) | |
5516 return 0; | |
5517 | |
5518 #ifndef OPENSSL_NO_AES | |
5519 - /* There is only one AEAD for now. */ | |
5520 - *aead = EVP_aead_aes_128_gcm(); | |
5521 - return 1; | |
5522 + switch (c->algorithm_enc) | |
5523 + { | |
5524 + case SSL_AES128GCM: | |
5525 + *aead = EVP_aead_aes_128_gcm(); | |
5526 + return 1; | |
5527 + case SSL_CHACHA20POLY1305: | |
5528 + *aead = EVP_aead_chacha20_poly1305(); | |
5529 + return 1; | |
5530 + } | |
5531 #endif | |
5532 | |
5533 return 0; | |
5534 @@ -1715,6 +1722,9 @@ char *SSL_CIPHER_description(const SSL_CIPHER *cipher, cha
r *buf, int len) | |
5535 case SSL_SEED: | |
5536 enc="SEED(128)"; | |
5537 break; | |
5538 + case SSL_CHACHA20POLY1305: | |
5539 + enc="ChaCha20-Poly1305"; | |
5540 + break; | |
5541 default: | |
5542 enc="unknown"; | |
5543 break; | |
5544 diff --git a/ssl/ssl_locl.h b/ssl/ssl_locl.h | |
5545 index 63bc28b..b83d8cd 100644 | |
5546 --- a/ssl/ssl_locl.h | |
5547 +++ b/ssl/ssl_locl.h | |
5548 @@ -328,6 +328,7 @@ | |
5549 #define SSL_SEED 0x00000800L | |
5550 #define SSL_AES128GCM 0x00001000L | |
5551 #define SSL_AES256GCM 0x00002000L | |
5552 +#define SSL_CHACHA20POLY1305 0x00004000L | |
5553 | |
5554 #define SSL_AES (SSL_AES128|SSL_AES256|SSL_AES128GCM|SSL
_AES256GCM) | |
5555 #define SSL_CAMELLIA (SSL_CAMELLIA128|SSL_CAMELLIA256) | |
5556 @@ -389,6 +390,12 @@ | |
5557 #define SSL_CIPHER_AEAD_FIXED_NONCE_LEN(ssl_cipher) \ | |
5558 (((ssl_cipher->algorithm2 >> 24) & 0xf)*2) | |
5559 | |
5560 +/* SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD is a flag in | |
5561 + * SSL_CIPHER.algorithm2 which indicates that the variable part of the nonce is | |
5562 + * included as a prefix of the record. (AES-GCM, for example, does with with an | |
5563 + * 8-byte variable nonce.) */ | |
5564 +#define SSL_CIPHER_ALGORITHM2_VARIABLE_NONCE_INCLUDED_IN_RECORD (1<<22) | |
5565 + | |
5566 /* | |
5567 * Export and cipher strength information. For each cipher we have to decide | |
5568 * whether it is exportable or not. This information is likely to change | |
5569 @@ -605,6 +612,9 @@ struct ssl_aead_ctx_st | |
5570 * records. */ | |
5571 unsigned char fixed_nonce[8]; | |
5572 unsigned char fixed_nonce_len, variable_nonce_len, tag_len; | |
5573 + /* variable_nonce_included_in_record is non-zero if the variable nonce | |
5574 + * for a record is included as a prefix before the ciphertext. */ | |
5575 + char variable_nonce_included_in_record; | |
5576 }; | |
5577 | |
5578 #ifndef OPENSSL_NO_COMP | |
5579 diff --git a/ssl/t1_enc.c b/ssl/t1_enc.c | |
5580 index 7af1a32..15800af 100644 | |
5581 --- a/ssl/t1_enc.c | |
5582 +++ b/ssl/t1_enc.c | |
5583 @@ -366,6 +366,8 @@ static int tls1_change_cipher_state_aead(SSL *s, char is_rea
d, | |
5584 memcpy(aead_ctx->fixed_nonce, iv, iv_len); | |
5585 aead_ctx->fixed_nonce_len = iv_len; | |
5586 aead_ctx->variable_nonce_len = 8; /* always the case, currently. */ | |
5587 + aead_ctx->variable_nonce_included_in_record = | |
5588 + (s->s3->tmp.new_cipher->algorithm2 & SSL_CIPHER_ALGORITHM2_VARIA
BLE_NONCE_INCLUDED_IN_RECORD) != 0; | |
5589 if (aead_ctx->variable_nonce_len + aead_ctx->fixed_nonce_len != EVP_AEAD
_nonce_length(aead)) | |
5590 { | |
5591 SSLerr(SSL_F_TLS1_CHANGE_CIPHER_STATE_AEAD, ERR_R_INTERNAL_ERROR
); | |
5592 @@ -863,6 +865,7 @@ int tls1_enc(SSL *s, int send) | |
5593 if (send) | |
5594 { | |
5595 size_t len = rec->length; | |
5596 + size_t eivlen = 0; | |
5597 in = rec->input; | |
5598 out = rec->data; | |
5599 | |
5600 @@ -878,18 +881,22 @@ int tls1_enc(SSL *s, int send) | |
5601 * variable nonce. Thus we can copy the sequence number | |
5602 * bytes into place without overwriting any of the | |
5603 * plaintext. */ | |
5604 - memcpy(out, ad, aead->variable_nonce_len); | |
5605 - len -= aead->variable_nonce_len; | |
5606 + if (aead->variable_nonce_included_in_record) | |
5607 + { | |
5608 + memcpy(out, ad, aead->variable_nonce_len); | |
5609 + len -= aead->variable_nonce_len; | |
5610 + eivlen = aead->variable_nonce_len; | |
5611 + } | |
5612 | |
5613 ad[11] = len >> 8; | |
5614 ad[12] = len & 0xff; | |
5615 | |
5616 n = EVP_AEAD_CTX_seal(&aead->ctx, | |
5617 - out + aead->variable_nonce_len, le
n + aead->tag_len, | |
5618 + out + eivlen, len + aead->tag_len, | |
5619 nonce, nonce_used, | |
5620 - in + aead->variable_nonce_len, len
, | |
5621 + in + eivlen, len, | |
5622 ad, sizeof(ad)); | |
5623 - if (n >= 0) | |
5624 + if (n >= 0 && aead->variable_nonce_included_in_record) | |
5625 n += aead->variable_nonce_len; | |
5626 } | |
5627 else | |
5628 @@ -903,12 +910,17 @@ int tls1_enc(SSL *s, int send) | |
5629 | |
5630 if (len < aead->variable_nonce_len) | |
5631 return 0; | |
5632 - memcpy(nonce + nonce_used, in, aead->variable_nonce_len)
; | |
5633 + memcpy(nonce + nonce_used, | |
5634 + aead->variable_nonce_included_in_record ? in : ad
, | |
5635 + aead->variable_nonce_len); | |
5636 nonce_used += aead->variable_nonce_len; | |
5637 | |
5638 - in += aead->variable_nonce_len; | |
5639 - len -= aead->variable_nonce_len; | |
5640 - out += aead->variable_nonce_len; | |
5641 + if (aead->variable_nonce_included_in_record) | |
5642 + { | |
5643 + in += aead->variable_nonce_len; | |
5644 + len -= aead->variable_nonce_len; | |
5645 + out += aead->variable_nonce_len; | |
5646 + } | |
5647 | |
5648 if (len < aead->tag_len) | |
5649 return 0; | |
5650 diff --git a/ssl/tls1.h b/ssl/tls1.h | |
5651 index 8cac7df..3cbcb83 100644 | |
5652 --- a/ssl/tls1.h | |
5653 +++ b/ssl/tls1.h | |
5654 @@ -526,6 +526,10 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB
,(void (*)(void))cb) | |
5655 #define TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256 0x0300C031 | |
5656 #define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384 0x0300C032 | |
5657 | |
5658 +#define TLS1_CK_ECDHE_RSA_CHACHA20_POLY1305 0x0300CC13 | |
5659 +#define TLS1_CK_ECDHE_ECDSA_CHACHA20_POLY1305 0x0300CC14 | |
5660 +#define TLS1_CK_DHE_RSA_CHACHA20_POLY1305 0x0300CC15 | |
5661 + | |
5662 /* XXX | |
5663 * Inconsistency alert: | |
5664 * The OpenSSL names of ciphers with ephemeral DH here include the string | |
5665 @@ -677,6 +681,10 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB
,(void (*)(void))cb) | |
5666 #define TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256 "ECDH-RSA-AES128-GCM-SH
A256" | |
5667 #define TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384 "ECDH-RSA-AES256-GCM-SH
A384" | |
5668 | |
5669 +#define TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305 "ECDHE-RSA-CHACHA20-POLY
1305" | |
5670 +#define TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 "ECDHE-ECDSA-CHACHA20-PO
LY1305" | |
5671 +#define TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305 "DHE-RSA-CHACHA2
0-POLY1305" | |
5672 + | |
5673 #define TLS_CT_RSA_SIGN 1 | |
5674 #define TLS_CT_DSS_SIGN 2 | |
5675 #define TLS_CT_RSA_FIXED_DH 3 | |
5676 diff --git a/test/Makefile b/test/Makefile | |
5677 index 4c9eabc..4790aa8 100644 | |
5678 --- a/test/Makefile | |
5679 +++ b/test/Makefile | |
5680 @@ -86,7 +86,9 @@ OBJ= $(BNTEST).o $(ECTEST).o $(ECDSATEST).o $(ECDHTEST).o $(
IDEATEST).o \ | |
5681 $(MDC2TEST).o $(RMDTEST).o \ | |
5682 $(RANDTEST).o $(DHTEST).o $(ENGINETEST).o $(CASTTEST).o \ | |
5683 $(BFTEST).o $(SSLTEST).o $(DSATEST).o $(EXPTEST).o $(RSATEST).o \ | |
5684 - $(EVPTEST).o $(IGETEST).o $(JPAKETEST).o $(ASN1TEST).o | |
5685 + $(EVPTEST).o $(IGETEST).o $(JPAKETEST).o $(ASN1TEST).o $(CHACHATEST).o \ | |
5686 + $(POLY1305TEST).o | |
5687 + | |
5688 SRC= $(BNTEST).c $(ECTEST).c $(ECDSATEST).c $(ECDHTEST).c $(IDEATEST).c \ | |
5689 $(MD2TEST).c $(MD4TEST).c $(MD5TEST).c \ | |
5690 $(HMACTEST).c $(WPTEST).c \ | |
5691 @@ -94,7 +96,8 @@ SRC= $(BNTEST).c $(ECTEST).c $(ECDSATEST).c $(ECDHTEST).c $(
IDEATEST).c \ | |
5692 $(DESTEST).c $(SHATEST).c $(SHA1TEST).c $(MDC2TEST).c $(RMDTEST).c \ | |
5693 $(RANDTEST).c $(DHTEST).c $(ENGINETEST).c $(CASTTEST).c \ | |
5694 $(BFTEST).c $(SSLTEST).c $(DSATEST).c $(EXPTEST).c $(RSATEST).c \ | |
5695 - $(EVPTEST).c $(IGETEST).c $(JPAKETEST).c $(SRPTEST).c $(ASN1TEST).c | |
5696 + $(EVPTEST).c $(IGETEST).c $(JPAKETEST).c $(SRPTEST).c $(ASN1TEST).c \ | |
5697 + $(CHACHATEST).c $(POLY1305TEST).c | |
5698 | |
5699 EXHEADER= | |
5700 HEADER= $(EXHEADER) | |
5701 @@ -137,7 +140,7 @@ alltests: \ | |
5702 test_enc test_x509 test_rsa test_crl test_sid \ | |
5703 test_gen test_req test_pkcs7 test_verify test_dh test_dsa \ | |
5704 test_ss test_ca test_engine test_evp test_ssl test_tsa test_ige \ | |
5705 - test_jpake test_srp test_cms | |
5706 + test_jpake test_srp test_cms test_chacha test_poly1305 | |
5707 | |
5708 test_evp: | |
5709 ../util/shlib_wrap.sh ./$(EVPTEST) evptests.txt | |
5710 @@ -318,6 +321,14 @@ test_srp: $(SRPTEST)$(EXE_EXT) | |
5711 @echo "Test SRP" | |
5712 ../util/shlib_wrap.sh ./srptest | |
5713 | |
5714 +test_chacha: $(CHACHATEST)$(EXE_EXT) | |
5715 + @echo "Test ChaCha" | |
5716 + ../util/shlib_wrap.sh ./$(CHACHATEST) | |
5717 + | |
5718 +test_poly1305: $(POLY1305TEST)$(EXE_EXT) | |
5719 + @echo "Test Poly1305" | |
5720 + ../util/shlib_wrap.sh ./$(POLY1305TEST) | |
5721 + | |
5722 lint: | |
5723 lint -DLINT $(INCLUDES) $(SRC)>fluff | |
5724 | |
5725 @@ -394,6 +405,12 @@ $(SHA256TEST)$(EXE_EXT): $(SHA256TEST).o $(DLIBCRYPTO) | |
5726 $(SHA512TEST)$(EXE_EXT): $(SHA512TEST).o $(DLIBCRYPTO) | |
5727 @target=$(SHA512TEST); $(BUILD_CMD) | |
5728 | |
5729 +$(CHACHATEST)$(EXE_EXT): $(CHACHATEST).o $(DLIBCRYPTO) | |
5730 + @target=$(CHACHATEST); $(BUILD_CMD) | |
5731 + | |
5732 +$(POLY1305TEST)$(EXE_EXT): $(POLY1305TEST).o $(DLIBCRYPTO) | |
5733 + @target=$(CHACHATEST); $(BUILD_CMD) | |
5734 + | |
5735 $(RMDTEST)$(EXE_EXT): $(RMDTEST).o $(DLIBCRYPTO) | |
5736 @target=$(RMDTEST); $(BUILD_CMD) | |
5737 | |
5738 -- | |
5739 1.8.4.1 | |
5740 | |
OLD | NEW |