Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(913)

Side by Side Diff: openssl/crypto/aes/asm/aes-s390x.pl

Issue 2072073002: Delete bundled copy of OpenSSL and replace with README. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/openssl@master
Patch Set: Delete bundled copy of OpenSSL and replace with README. Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « openssl/crypto/aes/asm/aes-ppc.pl ('k') | openssl/crypto/aes/asm/aes-sparcv9.pl » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # AES for s390x.
11
12 # April 2007.
13 #
14 # Software performance improvement over gcc-generated code is ~70% and
15 # in absolute terms is ~73 cycles per byte processed with 128-bit key.
16 # You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
17 # *strictly* in-order execution and issued instruction [in this case
18 # load value from memory is critical] has to complete before execution
19 # flow proceeds. S-boxes are compressed to 2KB[+256B].
20 #
21 # As for hardware acceleration support. It's basically a "teaser," as
22 # it can and should be improved in several ways. Most notably support
23 # for CBC is not utilized, nor multiple blocks are ever processed.
24 # Then software key schedule can be postponed till hardware support
25 # detection... Performance improvement over assembler is reportedly
26 # ~2.5x, but can reach >8x [naturally on larger chunks] if proper
27 # support is implemented.
28
29 # May 2007.
30 #
31 # Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
32 # for 128-bit keys, if hardware support is detected.
33
34 # Januray 2009.
35 #
36 # Add support for hardware AES192/256 and reschedule instructions to
37 # minimize/avoid Address Generation Interlock hazard and to favour
38 # dual-issue z10 pipeline. This gave ~25% improvement on z10 and
39 # almost 50% on z9. The gain is smaller on z10, because being dual-
40 # issue z10 makes it improssible to eliminate the interlock condition:
41 # critial path is not long enough. Yet it spends ~24 cycles per byte
42 # processed with 128-bit key.
43 #
44 # Unlike previous version hardware support detection takes place only
45 # at the moment of key schedule setup, which is denoted in key->rounds.
46 # This is done, because deferred key setup can't be made MT-safe, not
47 # for keys longer than 128 bits.
48 #
49 # Add AES_cbc_encrypt, which gives incredible performance improvement,
50 # it was measured to be ~6.6x. It's less than previously mentioned 8x,
51 # because software implementation was optimized.
52
53 # May 2010.
54 #
55 # Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x
56 # performance improvement over "generic" counter mode routine relying
57 # on single-block, also hardware-assisted, AES_encrypt. "Up to" refers
58 # to the fact that exact throughput value depends on current stack
59 # frame alignment within 4KB page. In worst case you get ~75% of the
60 # maximum, but *on average* it would be as much as ~98%. Meaning that
61 # worst case is unlike, it's like hitting ravine on plateau.
62
63 # November 2010.
64 #
65 # Adapt for -m31 build. If kernel supports what's called "highgprs"
66 # feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
67 # instructions and achieve "64-bit" performance even in 31-bit legacy
68 # application context. The feature is not specific to any particular
69 # processor, as long as it's "z-CPU". Latter implies that the code
70 # remains z/Architecture specific. On z990 it was measured to perform
71 # 2x better than code generated by gcc 4.3.
72
73 # December 2010.
74 #
75 # Add support for z196 "cipher message with counter" instruction.
76 # Note however that it's disengaged, because it was measured to
77 # perform ~12% worse than vanilla km-based code...
78
79 # February 2011.
80 #
81 # Add AES_xts_[en|de]crypt. This includes support for z196 km-xts-aes
82 # instructions, which deliver ~70% improvement at 8KB block size over
83 # vanilla km-based code, 37% - at most like 512-bytes block size.
84
85 $flavour = shift;
86
87 if ($flavour =~ /3[12]/) {
88 $SIZE_T=4;
89 $g="";
90 } else {
91 $SIZE_T=8;
92 $g="g";
93 }
94
95 while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
96 open STDOUT,">$output";
97
98 $softonly=0; # allow hardware support
99
100 $t0="%r0"; $mask="%r0";
101 $t1="%r1";
102 $t2="%r2"; $inp="%r2";
103 $t3="%r3"; $out="%r3"; $bits="%r3";
104 $key="%r4";
105 $i1="%r5";
106 $i2="%r6";
107 $i3="%r7";
108 $s0="%r8";
109 $s1="%r9";
110 $s2="%r10";
111 $s3="%r11";
112 $tbl="%r12";
113 $rounds="%r13";
114 $ra="%r14";
115 $sp="%r15";
116
117 $stdframe=16*$SIZE_T+4*8;
118
119 sub _data_word()
120 { my $i;
121 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
122 }
123
124 $code=<<___;
125 .text
126
127 .type AES_Te,\@object
128 .align 256
129 AES_Te:
130 ___
131 &_data_word(
132 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
133 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
134 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
135 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
136 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
137 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
138 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
139 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
140 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
141 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
142 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
143 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
144 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
145 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
146 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
147 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
148 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
149 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
150 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
151 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
152 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
153 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
154 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
155 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
156 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
157 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
158 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
159 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
160 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
161 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
162 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
163 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
164 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
165 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
166 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
167 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
168 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
169 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
170 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
171 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
172 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
173 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
174 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
175 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
176 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
177 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
178 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
179 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
180 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
181 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
182 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
183 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
184 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
185 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
186 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
187 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
188 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
189 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
190 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
191 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
192 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
193 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
194 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
195 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
196 $code.=<<___;
197 # Te4[256]
198 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
199 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
200 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
201 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
202 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
203 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
204 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
205 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
206 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
207 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
208 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
209 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
210 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
211 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
212 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
213 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
214 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
215 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
216 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
217 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
218 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
219 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
220 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
221 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
222 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
223 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
224 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
225 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
226 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
227 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
228 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
229 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
230 # rcon[]
231 .long 0x01000000, 0x02000000, 0x04000000, 0x08000000
232 .long 0x10000000, 0x20000000, 0x40000000, 0x80000000
233 .long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
234 .align 256
235 .size AES_Te,.-AES_Te
236
237 # void AES_encrypt(const unsigned char *inp, unsigned char *out,
238 # const AES_KEY *key) {
239 .globl AES_encrypt
240 .type AES_encrypt,\@function
241 AES_encrypt:
242 ___
243 $code.=<<___ if (!$softonly);
244 l %r0,240($key)
245 lhi %r1,16
246 clr %r0,%r1
247 jl .Lesoft
248
249 la %r1,0($key)
250 #la %r2,0($inp)
251 la %r4,0($out)
252 lghi %r3,16 # single block length
253 .long 0xb92e0042 # km %r4,%r2
254 brc 1,.-4 # can this happen?
255 br %r14
256 .align 64
257 .Lesoft:
258 ___
259 $code.=<<___;
260 stm${g} %r3,$ra,3*$SIZE_T($sp)
261
262 llgf $s0,0($inp)
263 llgf $s1,4($inp)
264 llgf $s2,8($inp)
265 llgf $s3,12($inp)
266
267 larl $tbl,AES_Te
268 bras $ra,_s390x_AES_encrypt
269
270 l${g} $out,3*$SIZE_T($sp)
271 st $s0,0($out)
272 st $s1,4($out)
273 st $s2,8($out)
274 st $s3,12($out)
275
276 lm${g} %r6,$ra,6*$SIZE_T($sp)
277 br $ra
278 .size AES_encrypt,.-AES_encrypt
279
280 .type _s390x_AES_encrypt,\@function
281 .align 16
282 _s390x_AES_encrypt:
283 st${g} $ra,15*$SIZE_T($sp)
284 x $s0,0($key)
285 x $s1,4($key)
286 x $s2,8($key)
287 x $s3,12($key)
288 l $rounds,240($key)
289 llill $mask,`0xff<<3`
290 aghi $rounds,-1
291 j .Lenc_loop
292 .align 16
293 .Lenc_loop:
294 sllg $t1,$s0,`0+3`
295 srlg $t2,$s0,`8-3`
296 srlg $t3,$s0,`16-3`
297 srl $s0,`24-3`
298 nr $s0,$mask
299 ngr $t1,$mask
300 nr $t2,$mask
301 nr $t3,$mask
302
303 srlg $i1,$s1,`16-3` # i0
304 sllg $i2,$s1,`0+3`
305 srlg $i3,$s1,`8-3`
306 srl $s1,`24-3`
307 nr $i1,$mask
308 nr $s1,$mask
309 ngr $i2,$mask
310 nr $i3,$mask
311
312 l $s0,0($s0,$tbl) # Te0[s0>>24]
313 l $t1,1($t1,$tbl) # Te3[s0>>0]
314 l $t2,2($t2,$tbl) # Te2[s0>>8]
315 l $t3,3($t3,$tbl) # Te1[s0>>16]
316
317 x $s0,3($i1,$tbl) # Te1[s1>>16]
318 l $s1,0($s1,$tbl) # Te0[s1>>24]
319 x $t2,1($i2,$tbl) # Te3[s1>>0]
320 x $t3,2($i3,$tbl) # Te2[s1>>8]
321
322 srlg $i1,$s2,`8-3` # i0
323 srlg $i2,$s2,`16-3` # i1
324 nr $i1,$mask
325 nr $i2,$mask
326 sllg $i3,$s2,`0+3`
327 srl $s2,`24-3`
328 nr $s2,$mask
329 ngr $i3,$mask
330
331 xr $s1,$t1
332 srlg $ra,$s3,`8-3` # i1
333 sllg $t1,$s3,`0+3` # i0
334 nr $ra,$mask
335 la $key,16($key)
336 ngr $t1,$mask
337
338 x $s0,2($i1,$tbl) # Te2[s2>>8]
339 x $s1,3($i2,$tbl) # Te1[s2>>16]
340 l $s2,0($s2,$tbl) # Te0[s2>>24]
341 x $t3,1($i3,$tbl) # Te3[s2>>0]
342
343 srlg $i3,$s3,`16-3` # i2
344 xr $s2,$t2
345 srl $s3,`24-3`
346 nr $i3,$mask
347 nr $s3,$mask
348
349 x $s0,0($key)
350 x $s1,4($key)
351 x $s2,8($key)
352 x $t3,12($key)
353
354 x $s0,1($t1,$tbl) # Te3[s3>>0]
355 x $s1,2($ra,$tbl) # Te2[s3>>8]
356 x $s2,3($i3,$tbl) # Te1[s3>>16]
357 l $s3,0($s3,$tbl) # Te0[s3>>24]
358 xr $s3,$t3
359
360 brct $rounds,.Lenc_loop
361 .align 16
362
363 sllg $t1,$s0,`0+3`
364 srlg $t2,$s0,`8-3`
365 ngr $t1,$mask
366 srlg $t3,$s0,`16-3`
367 srl $s0,`24-3`
368 nr $s0,$mask
369 nr $t2,$mask
370 nr $t3,$mask
371
372 srlg $i1,$s1,`16-3` # i0
373 sllg $i2,$s1,`0+3`
374 ngr $i2,$mask
375 srlg $i3,$s1,`8-3`
376 srl $s1,`24-3`
377 nr $i1,$mask
378 nr $s1,$mask
379 nr $i3,$mask
380
381 llgc $s0,2($s0,$tbl) # Te4[s0>>24]
382 llgc $t1,2($t1,$tbl) # Te4[s0>>0]
383 sll $s0,24
384 llgc $t2,2($t2,$tbl) # Te4[s0>>8]
385 llgc $t3,2($t3,$tbl) # Te4[s0>>16]
386 sll $t2,8
387 sll $t3,16
388
389 llgc $i1,2($i1,$tbl) # Te4[s1>>16]
390 llgc $s1,2($s1,$tbl) # Te4[s1>>24]
391 llgc $i2,2($i2,$tbl) # Te4[s1>>0]
392 llgc $i3,2($i3,$tbl) # Te4[s1>>8]
393 sll $i1,16
394 sll $s1,24
395 sll $i3,8
396 or $s0,$i1
397 or $s1,$t1
398 or $t2,$i2
399 or $t3,$i3
400
401 srlg $i1,$s2,`8-3` # i0
402 srlg $i2,$s2,`16-3` # i1
403 nr $i1,$mask
404 nr $i2,$mask
405 sllg $i3,$s2,`0+3`
406 srl $s2,`24-3`
407 ngr $i3,$mask
408 nr $s2,$mask
409
410 sllg $t1,$s3,`0+3` # i0
411 srlg $ra,$s3,`8-3` # i1
412 ngr $t1,$mask
413
414 llgc $i1,2($i1,$tbl) # Te4[s2>>8]
415 llgc $i2,2($i2,$tbl) # Te4[s2>>16]
416 sll $i1,8
417 llgc $s2,2($s2,$tbl) # Te4[s2>>24]
418 llgc $i3,2($i3,$tbl) # Te4[s2>>0]
419 sll $i2,16
420 nr $ra,$mask
421 sll $s2,24
422 or $s0,$i1
423 or $s1,$i2
424 or $s2,$t2
425 or $t3,$i3
426
427 srlg $i3,$s3,`16-3` # i2
428 srl $s3,`24-3`
429 nr $i3,$mask
430 nr $s3,$mask
431
432 l $t0,16($key)
433 l $t2,20($key)
434
435 llgc $i1,2($t1,$tbl) # Te4[s3>>0]
436 llgc $i2,2($ra,$tbl) # Te4[s3>>8]
437 llgc $i3,2($i3,$tbl) # Te4[s3>>16]
438 llgc $s3,2($s3,$tbl) # Te4[s3>>24]
439 sll $i2,8
440 sll $i3,16
441 sll $s3,24
442 or $s0,$i1
443 or $s1,$i2
444 or $s2,$i3
445 or $s3,$t3
446
447 l${g} $ra,15*$SIZE_T($sp)
448 xr $s0,$t0
449 xr $s1,$t2
450 x $s2,24($key)
451 x $s3,28($key)
452
453 br $ra
454 .size _s390x_AES_encrypt,.-_s390x_AES_encrypt
455 ___
456
457 $code.=<<___;
458 .type AES_Td,\@object
459 .align 256
460 AES_Td:
461 ___
462 &_data_word(
463 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
464 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
465 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
466 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
467 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
468 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
469 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
470 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
471 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
472 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
473 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
474 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
475 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
476 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
477 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
478 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
479 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
480 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
481 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
482 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
483 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
484 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
485 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
486 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
487 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
488 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
489 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
490 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
491 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
492 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
493 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
494 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
495 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
496 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
497 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
498 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
499 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
500 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
501 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
502 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
503 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
504 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
505 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
506 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
507 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
508 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
509 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
510 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
511 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
512 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
513 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
514 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
515 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
516 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
517 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
518 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
519 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
520 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
521 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
522 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
523 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
524 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
525 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
526 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
527 $code.=<<___;
528 # Td4[256]
529 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
530 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
531 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
532 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
533 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
534 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
535 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
536 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
537 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
538 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
539 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
540 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
541 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
542 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
543 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
544 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
545 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
546 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
547 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
548 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
549 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
550 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
551 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
552 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
553 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
554 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
555 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
556 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
557 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
558 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
559 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
560 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
561 .size AES_Td,.-AES_Td
562
563 # void AES_decrypt(const unsigned char *inp, unsigned char *out,
564 # const AES_KEY *key) {
565 .globl AES_decrypt
566 .type AES_decrypt,\@function
567 AES_decrypt:
568 ___
569 $code.=<<___ if (!$softonly);
570 l %r0,240($key)
571 lhi %r1,16
572 clr %r0,%r1
573 jl .Ldsoft
574
575 la %r1,0($key)
576 #la %r2,0($inp)
577 la %r4,0($out)
578 lghi %r3,16 # single block length
579 .long 0xb92e0042 # km %r4,%r2
580 brc 1,.-4 # can this happen?
581 br %r14
582 .align 64
583 .Ldsoft:
584 ___
585 $code.=<<___;
586 stm${g} %r3,$ra,3*$SIZE_T($sp)
587
588 llgf $s0,0($inp)
589 llgf $s1,4($inp)
590 llgf $s2,8($inp)
591 llgf $s3,12($inp)
592
593 larl $tbl,AES_Td
594 bras $ra,_s390x_AES_decrypt
595
596 l${g} $out,3*$SIZE_T($sp)
597 st $s0,0($out)
598 st $s1,4($out)
599 st $s2,8($out)
600 st $s3,12($out)
601
602 lm${g} %r6,$ra,6*$SIZE_T($sp)
603 br $ra
604 .size AES_decrypt,.-AES_decrypt
605
606 .type _s390x_AES_decrypt,\@function
607 .align 16
608 _s390x_AES_decrypt:
609 st${g} $ra,15*$SIZE_T($sp)
610 x $s0,0($key)
611 x $s1,4($key)
612 x $s2,8($key)
613 x $s3,12($key)
614 l $rounds,240($key)
615 llill $mask,`0xff<<3`
616 aghi $rounds,-1
617 j .Ldec_loop
618 .align 16
619 .Ldec_loop:
620 srlg $t1,$s0,`16-3`
621 srlg $t2,$s0,`8-3`
622 sllg $t3,$s0,`0+3`
623 srl $s0,`24-3`
624 nr $s0,$mask
625 nr $t1,$mask
626 nr $t2,$mask
627 ngr $t3,$mask
628
629 sllg $i1,$s1,`0+3` # i0
630 srlg $i2,$s1,`16-3`
631 srlg $i3,$s1,`8-3`
632 srl $s1,`24-3`
633 ngr $i1,$mask
634 nr $s1,$mask
635 nr $i2,$mask
636 nr $i3,$mask
637
638 l $s0,0($s0,$tbl) # Td0[s0>>24]
639 l $t1,3($t1,$tbl) # Td1[s0>>16]
640 l $t2,2($t2,$tbl) # Td2[s0>>8]
641 l $t3,1($t3,$tbl) # Td3[s0>>0]
642
643 x $s0,1($i1,$tbl) # Td3[s1>>0]
644 l $s1,0($s1,$tbl) # Td0[s1>>24]
645 x $t2,3($i2,$tbl) # Td1[s1>>16]
646 x $t3,2($i3,$tbl) # Td2[s1>>8]
647
648 srlg $i1,$s2,`8-3` # i0
649 sllg $i2,$s2,`0+3` # i1
650 srlg $i3,$s2,`16-3`
651 srl $s2,`24-3`
652 nr $i1,$mask
653 ngr $i2,$mask
654 nr $s2,$mask
655 nr $i3,$mask
656
657 xr $s1,$t1
658 srlg $ra,$s3,`8-3` # i1
659 srlg $t1,$s3,`16-3` # i0
660 nr $ra,$mask
661 la $key,16($key)
662 nr $t1,$mask
663
664 x $s0,2($i1,$tbl) # Td2[s2>>8]
665 x $s1,1($i2,$tbl) # Td3[s2>>0]
666 l $s2,0($s2,$tbl) # Td0[s2>>24]
667 x $t3,3($i3,$tbl) # Td1[s2>>16]
668
669 sllg $i3,$s3,`0+3` # i2
670 srl $s3,`24-3`
671 ngr $i3,$mask
672 nr $s3,$mask
673
674 xr $s2,$t2
675 x $s0,0($key)
676 x $s1,4($key)
677 x $s2,8($key)
678 x $t3,12($key)
679
680 x $s0,3($t1,$tbl) # Td1[s3>>16]
681 x $s1,2($ra,$tbl) # Td2[s3>>8]
682 x $s2,1($i3,$tbl) # Td3[s3>>0]
683 l $s3,0($s3,$tbl) # Td0[s3>>24]
684 xr $s3,$t3
685
686 brct $rounds,.Ldec_loop
687 .align 16
688
689 l $t1,`2048+0`($tbl) # prefetch Td4
690 l $t2,`2048+64`($tbl)
691 l $t3,`2048+128`($tbl)
692 l $i1,`2048+192`($tbl)
693 llill $mask,0xff
694
695 srlg $i3,$s0,24 # i0
696 srlg $t1,$s0,16
697 srlg $t2,$s0,8
698 nr $s0,$mask # i3
699 nr $t1,$mask
700
701 srlg $i1,$s1,24
702 nr $t2,$mask
703 srlg $i2,$s1,16
704 srlg $ra,$s1,8
705 nr $s1,$mask # i0
706 nr $i2,$mask
707 nr $ra,$mask
708
709 llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
710 llgc $t1,2048($t1,$tbl) # Td4[s0>>16]
711 llgc $t2,2048($t2,$tbl) # Td4[s0>>8]
712 sll $t1,16
713 llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
714 sllg $s0,$i3,24
715 sll $t2,8
716
717 llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
718 llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
719 llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
720 sll $i1,24
721 llgc $i3,2048($ra,$tbl) # Td4[s1>>8]
722 sll $i2,16
723 sll $i3,8
724 or $s0,$s1
725 or $t1,$i1
726 or $t2,$i2
727 or $t3,$i3
728
729 srlg $i1,$s2,8 # i0
730 srlg $i2,$s2,24
731 srlg $i3,$s2,16
732 nr $s2,$mask # i1
733 nr $i1,$mask
734 nr $i3,$mask
735 llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
736 llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
737 llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
738 llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
739 sll $i1,8
740 sll $i2,24
741 or $s0,$i1
742 sll $i3,16
743 or $t2,$i2
744 or $t3,$i3
745
746 srlg $i1,$s3,16 # i0
747 srlg $i2,$s3,8 # i1
748 srlg $i3,$s3,24
749 nr $s3,$mask # i2
750 nr $i1,$mask
751 nr $i2,$mask
752
753 l${g} $ra,15*$SIZE_T($sp)
754 or $s1,$t1
755 l $t0,16($key)
756 l $t1,20($key)
757
758 llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
759 llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
760 sll $i1,16
761 llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
762 llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
763 sll $i2,8
764 sll $s3,24
765 or $s0,$i1
766 or $s1,$i2
767 or $s2,$t2
768 or $s3,$t3
769
770 xr $s0,$t0
771 xr $s1,$t1
772 x $s2,24($key)
773 x $s3,28($key)
774
775 br $ra
776 .size _s390x_AES_decrypt,.-_s390x_AES_decrypt
777 ___
778
779 $code.=<<___;
780 # void AES_set_encrypt_key(const unsigned char *in, int bits,
781 # AES_KEY *key) {
782 .globl private_AES_set_encrypt_key
783 .type private_AES_set_encrypt_key,\@function
784 .align 16
785 private_AES_set_encrypt_key:
786 _s390x_AES_set_encrypt_key:
787 lghi $t0,0
788 cl${g}r $inp,$t0
789 je .Lminus1
790 cl${g}r $key,$t0
791 je .Lminus1
792
793 lghi $t0,128
794 clr $bits,$t0
795 je .Lproceed
796 lghi $t0,192
797 clr $bits,$t0
798 je .Lproceed
799 lghi $t0,256
800 clr $bits,$t0
801 je .Lproceed
802 lghi %r2,-2
803 br %r14
804
805 .align 16
806 .Lproceed:
807 ___
808 $code.=<<___ if (!$softonly);
809 # convert bits to km code, [128,192,256]->[18,19,20]
810 lhi %r5,-128
811 lhi %r0,18
812 ar %r5,$bits
813 srl %r5,6
814 ar %r5,%r0
815
816 larl %r1,OPENSSL_s390xcap_P
817 lg %r0,0(%r1)
818 tmhl %r0,0x4000 # check for message-security assist
819 jz .Lekey_internal
820
821 lghi %r0,0 # query capability vector
822 la %r1,16($sp)
823 .long 0xb92f0042 # kmc %r4,%r2
824
825 llihh %r1,0x8000
826 srlg %r1,%r1,0(%r5)
827 ng %r1,16($sp)
828 jz .Lekey_internal
829
830 lmg %r0,%r1,0($inp) # just copy 128 bits...
831 stmg %r0,%r1,0($key)
832 lhi %r0,192
833 cr $bits,%r0
834 jl 1f
835 lg %r1,16($inp)
836 stg %r1,16($key)
837 je 1f
838 lg %r1,24($inp)
839 stg %r1,24($key)
840 1: st $bits,236($key) # save bits [for debugging purposes]
841 lgr $t0,%r5
842 st %r5,240($key) # save km code
843 lghi %r2,0
844 br %r14
845 ___
846 $code.=<<___;
847 .align 16
848 .Lekey_internal:
849 stm${g} %r4,%r13,4*$SIZE_T($sp) # all non-volatile regs and $key
850
851 larl $tbl,AES_Te+2048
852
853 llgf $s0,0($inp)
854 llgf $s1,4($inp)
855 llgf $s2,8($inp)
856 llgf $s3,12($inp)
857 st $s0,0($key)
858 st $s1,4($key)
859 st $s2,8($key)
860 st $s3,12($key)
861 lghi $t0,128
862 cr $bits,$t0
863 jne .Lnot128
864
865 llill $mask,0xff
866 lghi $t3,0 # i=0
867 lghi $rounds,10
868 st $rounds,240($key)
869
870 llgfr $t2,$s3 # temp=rk[3]
871 srlg $i1,$s3,8
872 srlg $i2,$s3,16
873 srlg $i3,$s3,24
874 nr $t2,$mask
875 nr $i1,$mask
876 nr $i2,$mask
877
878 .align 16
879 .L128_loop:
880 la $t2,0($t2,$tbl)
881 la $i1,0($i1,$tbl)
882 la $i2,0($i2,$tbl)
883 la $i3,0($i3,$tbl)
884 icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
885 icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
886 icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
887 icm $t2,1,0($i3) # Te4[rk[3]>>24]
888 x $t2,256($t3,$tbl) # rcon[i]
889 xr $s0,$t2 # rk[4]=rk[0]^...
890 xr $s1,$s0 # rk[5]=rk[1]^rk[4]
891 xr $s2,$s1 # rk[6]=rk[2]^rk[5]
892 xr $s3,$s2 # rk[7]=rk[3]^rk[6]
893
894 llgfr $t2,$s3 # temp=rk[3]
895 srlg $i1,$s3,8
896 srlg $i2,$s3,16
897 nr $t2,$mask
898 nr $i1,$mask
899 srlg $i3,$s3,24
900 nr $i2,$mask
901
902 st $s0,16($key)
903 st $s1,20($key)
904 st $s2,24($key)
905 st $s3,28($key)
906 la $key,16($key) # key+=4
907 la $t3,4($t3) # i++
908 brct $rounds,.L128_loop
909 lghi $t0,10
910 lghi %r2,0
911 lm${g} %r4,%r13,4*$SIZE_T($sp)
912 br $ra
913
914 .align 16
915 .Lnot128:
916 llgf $t0,16($inp)
917 llgf $t1,20($inp)
918 st $t0,16($key)
919 st $t1,20($key)
920 lghi $t0,192
921 cr $bits,$t0
922 jne .Lnot192
923
924 llill $mask,0xff
925 lghi $t3,0 # i=0
926 lghi $rounds,12
927 st $rounds,240($key)
928 lghi $rounds,8
929
930 srlg $i1,$t1,8
931 srlg $i2,$t1,16
932 srlg $i3,$t1,24
933 nr $t1,$mask
934 nr $i1,$mask
935 nr $i2,$mask
936
937 .align 16
938 .L192_loop:
939 la $t1,0($t1,$tbl)
940 la $i1,0($i1,$tbl)
941 la $i2,0($i2,$tbl)
942 la $i3,0($i3,$tbl)
943 icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8
944 icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16
945 icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24
946 icm $t1,1,0($i3) # Te4[rk[5]>>24]
947 x $t1,256($t3,$tbl) # rcon[i]
948 xr $s0,$t1 # rk[6]=rk[0]^...
949 xr $s1,$s0 # rk[7]=rk[1]^rk[6]
950 xr $s2,$s1 # rk[8]=rk[2]^rk[7]
951 xr $s3,$s2 # rk[9]=rk[3]^rk[8]
952
953 st $s0,24($key)
954 st $s1,28($key)
955 st $s2,32($key)
956 st $s3,36($key)
957 brct $rounds,.L192_continue
958 lghi $t0,12
959 lghi %r2,0
960 lm${g} %r4,%r13,4*$SIZE_T($sp)
961 br $ra
962
963 .align 16
964 .L192_continue:
965 lgr $t1,$s3
966 x $t1,16($key) # rk[10]=rk[4]^rk[9]
967 st $t1,40($key)
968 x $t1,20($key) # rk[11]=rk[5]^rk[10]
969 st $t1,44($key)
970
971 srlg $i1,$t1,8
972 srlg $i2,$t1,16
973 srlg $i3,$t1,24
974 nr $t1,$mask
975 nr $i1,$mask
976 nr $i2,$mask
977
978 la $key,24($key) # key+=6
979 la $t3,4($t3) # i++
980 j .L192_loop
981
982 .align 16
983 .Lnot192:
984 llgf $t0,24($inp)
985 llgf $t1,28($inp)
986 st $t0,24($key)
987 st $t1,28($key)
988 llill $mask,0xff
989 lghi $t3,0 # i=0
990 lghi $rounds,14
991 st $rounds,240($key)
992 lghi $rounds,7
993
994 srlg $i1,$t1,8
995 srlg $i2,$t1,16
996 srlg $i3,$t1,24
997 nr $t1,$mask
998 nr $i1,$mask
999 nr $i2,$mask
1000
1001 .align 16
1002 .L256_loop:
1003 la $t1,0($t1,$tbl)
1004 la $i1,0($i1,$tbl)
1005 la $i2,0($i2,$tbl)
1006 la $i3,0($i3,$tbl)
1007 icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8
1008 icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16
1009 icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24
1010 icm $t1,1,0($i3) # Te4[rk[7]>>24]
1011 x $t1,256($t3,$tbl) # rcon[i]
1012 xr $s0,$t1 # rk[8]=rk[0]^...
1013 xr $s1,$s0 # rk[9]=rk[1]^rk[8]
1014 xr $s2,$s1 # rk[10]=rk[2]^rk[9]
1015 xr $s3,$s2 # rk[11]=rk[3]^rk[10]
1016 st $s0,32($key)
1017 st $s1,36($key)
1018 st $s2,40($key)
1019 st $s3,44($key)
1020 brct $rounds,.L256_continue
1021 lghi $t0,14
1022 lghi %r2,0
1023 lm${g} %r4,%r13,4*$SIZE_T($sp)
1024 br $ra
1025
1026 .align 16
1027 .L256_continue:
1028 lgr $t1,$s3 # temp=rk[11]
1029 srlg $i1,$s3,8
1030 srlg $i2,$s3,16
1031 srlg $i3,$s3,24
1032 nr $t1,$mask
1033 nr $i1,$mask
1034 nr $i2,$mask
1035 la $t1,0($t1,$tbl)
1036 la $i1,0($i1,$tbl)
1037 la $i2,0($i2,$tbl)
1038 la $i3,0($i3,$tbl)
1039 llgc $t1,0($t1) # Te4[rk[11]>>0]
1040 icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8
1041 icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16
1042 icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24
1043 x $t1,16($key) # rk[12]=rk[4]^...
1044 st $t1,48($key)
1045 x $t1,20($key) # rk[13]=rk[5]^rk[12]
1046 st $t1,52($key)
1047 x $t1,24($key) # rk[14]=rk[6]^rk[13]
1048 st $t1,56($key)
1049 x $t1,28($key) # rk[15]=rk[7]^rk[14]
1050 st $t1,60($key)
1051
1052 srlg $i1,$t1,8
1053 srlg $i2,$t1,16
1054 srlg $i3,$t1,24
1055 nr $t1,$mask
1056 nr $i1,$mask
1057 nr $i2,$mask
1058
1059 la $key,32($key) # key+=8
1060 la $t3,4($t3) # i++
1061 j .L256_loop
1062
1063 .Lminus1:
1064 lghi %r2,-1
1065 br $ra
1066 .size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
1067
1068 # void AES_set_decrypt_key(const unsigned char *in, int bits,
1069 # AES_KEY *key) {
1070 .globl private_AES_set_decrypt_key
1071 .type private_AES_set_decrypt_key,\@function
1072 .align 16
1073 private_AES_set_decrypt_key:
1074 #st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to
1075 st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers and $key!
1076 bras $ra,_s390x_AES_set_encrypt_key
1077 #l${g} $key,4*$SIZE_T($sp)
1078 l${g} $ra,14*$SIZE_T($sp)
1079 ltgr %r2,%r2
1080 bnzr $ra
1081 ___
1082 $code.=<<___ if (!$softonly);
1083 #l $t0,240($key)
1084 lhi $t1,16
1085 cr $t0,$t1
1086 jl .Lgo
1087 oill $t0,0x80 # set "decrypt" bit
1088 st $t0,240($key)
1089 br $ra
1090 ___
1091 $code.=<<___;
1092 .align 16
1093 .Lgo: lgr $rounds,$t0 #llgf $rounds,240($key)
1094 la $i1,0($key)
1095 sllg $i2,$rounds,4
1096 la $i2,0($i2,$key)
1097 srl $rounds,1
1098 lghi $t1,-16
1099
1100 .align 16
1101 .Linv: lmg $s0,$s1,0($i1)
1102 lmg $s2,$s3,0($i2)
1103 stmg $s0,$s1,0($i2)
1104 stmg $s2,$s3,0($i1)
1105 la $i1,16($i1)
1106 la $i2,0($t1,$i2)
1107 brct $rounds,.Linv
1108 ___
1109 $mask80=$i1;
1110 $mask1b=$i2;
1111 $maskfe=$i3;
1112 $code.=<<___;
1113 llgf $rounds,240($key)
1114 aghi $rounds,-1
1115 sll $rounds,2 # (rounds-1)*4
1116 llilh $mask80,0x8080
1117 llilh $mask1b,0x1b1b
1118 llilh $maskfe,0xfefe
1119 oill $mask80,0x8080
1120 oill $mask1b,0x1b1b
1121 oill $maskfe,0xfefe
1122
1123 .align 16
1124 .Lmix: l $s0,16($key) # tp1
1125 lr $s1,$s0
1126 ngr $s1,$mask80
1127 srlg $t1,$s1,7
1128 slr $s1,$t1
1129 nr $s1,$mask1b
1130 sllg $t1,$s0,1
1131 nr $t1,$maskfe
1132 xr $s1,$t1 # tp2
1133
1134 lr $s2,$s1
1135 ngr $s2,$mask80
1136 srlg $t1,$s2,7
1137 slr $s2,$t1
1138 nr $s2,$mask1b
1139 sllg $t1,$s1,1
1140 nr $t1,$maskfe
1141 xr $s2,$t1 # tp4
1142
1143 lr $s3,$s2
1144 ngr $s3,$mask80
1145 srlg $t1,$s3,7
1146 slr $s3,$t1
1147 nr $s3,$mask1b
1148 sllg $t1,$s2,1
1149 nr $t1,$maskfe
1150 xr $s3,$t1 # tp8
1151
1152 xr $s1,$s0 # tp2^tp1
1153 xr $s2,$s0 # tp4^tp1
1154 rll $s0,$s0,24 # = ROTATE(tp1,8)
1155 xr $s2,$s3 # ^=tp8
1156 xr $s0,$s1 # ^=tp2^tp1
1157 xr $s1,$s3 # tp2^tp1^tp8
1158 xr $s0,$s2 # ^=tp4^tp1^tp8
1159 rll $s1,$s1,8
1160 rll $s2,$s2,16
1161 xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
1162 rll $s3,$s3,24
1163 xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
1164 xr $s0,$s3 # ^= ROTATE(tp8,8)
1165
1166 st $s0,16($key)
1167 la $key,4($key)
1168 brct $rounds,.Lmix
1169
1170 lm${g} %r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key!
1171 lghi %r2,0
1172 br $ra
1173 .size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
1174 ___
1175
1176 ########################################################################
1177 # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
1178 # size_t length, const AES_KEY *key,
1179 # unsigned char *ivec, const int enc)
1180 {
1181 my $inp="%r2";
1182 my $out="%r4"; # length and out are swapped
1183 my $len="%r3";
1184 my $key="%r5";
1185 my $ivp="%r6";
1186
1187 $code.=<<___;
1188 .globl AES_cbc_encrypt
1189 .type AES_cbc_encrypt,\@function
1190 .align 16
1191 AES_cbc_encrypt:
1192 xgr %r3,%r4 # flip %r3 and %r4, out and len
1193 xgr %r4,%r3
1194 xgr %r3,%r4
1195 ___
1196 $code.=<<___ if (!$softonly);
1197 lhi %r0,16
1198 cl %r0,240($key)
1199 jh .Lcbc_software
1200
1201 lg %r0,0($ivp) # copy ivec
1202 lg %r1,8($ivp)
1203 stmg %r0,%r1,16($sp)
1204 lmg %r0,%r1,0($key) # copy key, cover 256 bit
1205 stmg %r0,%r1,32($sp)
1206 lmg %r0,%r1,16($key)
1207 stmg %r0,%r1,48($sp)
1208 l %r0,240($key) # load kmc code
1209 lghi $key,15 # res=len%16, len-=res;
1210 ngr $key,$len
1211 sl${g}r $len,$key
1212 la %r1,16($sp) # parameter block - ivec || key
1213 jz .Lkmc_truncated
1214 .long 0xb92f0042 # kmc %r4,%r2
1215 brc 1,.-4 # pay attention to "partial completion"
1216 ltr $key,$key
1217 jnz .Lkmc_truncated
1218 .Lkmc_done:
1219 lmg %r0,%r1,16($sp) # copy ivec to caller
1220 stg %r0,0($ivp)
1221 stg %r1,8($ivp)
1222 br $ra
1223 .align 16
1224 .Lkmc_truncated:
1225 ahi $key,-1 # it's the way it's encoded in mvc
1226 tmll %r0,0x80
1227 jnz .Lkmc_truncated_dec
1228 lghi %r1,0
1229 stg %r1,16*$SIZE_T($sp)
1230 stg %r1,16*$SIZE_T+8($sp)
1231 bras %r1,1f
1232 mvc 16*$SIZE_T(1,$sp),0($inp)
1233 1: ex $key,0(%r1)
1234 la %r1,16($sp) # restore parameter block
1235 la $inp,16*$SIZE_T($sp)
1236 lghi $len,16
1237 .long 0xb92f0042 # kmc %r4,%r2
1238 j .Lkmc_done
1239 .align 16
1240 .Lkmc_truncated_dec:
1241 st${g} $out,4*$SIZE_T($sp)
1242 la $out,16*$SIZE_T($sp)
1243 lghi $len,16
1244 .long 0xb92f0042 # kmc %r4,%r2
1245 l${g} $out,4*$SIZE_T($sp)
1246 bras %r1,2f
1247 mvc 0(1,$out),16*$SIZE_T($sp)
1248 2: ex $key,0(%r1)
1249 j .Lkmc_done
1250 .align 16
1251 .Lcbc_software:
1252 ___
1253 $code.=<<___;
1254 stm${g} $key,$ra,5*$SIZE_T($sp)
1255 lhi %r0,0
1256 cl %r0,`$stdframe+$SIZE_T-4`($sp)
1257 je .Lcbc_decrypt
1258
1259 larl $tbl,AES_Te
1260
1261 llgf $s0,0($ivp)
1262 llgf $s1,4($ivp)
1263 llgf $s2,8($ivp)
1264 llgf $s3,12($ivp)
1265
1266 lghi $t0,16
1267 sl${g}r $len,$t0
1268 brc 4,.Lcbc_enc_tail # if borrow
1269 .Lcbc_enc_loop:
1270 stm${g} $inp,$out,2*$SIZE_T($sp)
1271 x $s0,0($inp)
1272 x $s1,4($inp)
1273 x $s2,8($inp)
1274 x $s3,12($inp)
1275 lgr %r4,$key
1276
1277 bras $ra,_s390x_AES_encrypt
1278
1279 lm${g} $inp,$key,2*$SIZE_T($sp)
1280 st $s0,0($out)
1281 st $s1,4($out)
1282 st $s2,8($out)
1283 st $s3,12($out)
1284
1285 la $inp,16($inp)
1286 la $out,16($out)
1287 lghi $t0,16
1288 lt${g}r $len,$len
1289 jz .Lcbc_enc_done
1290 sl${g}r $len,$t0
1291 brc 4,.Lcbc_enc_tail # if borrow
1292 j .Lcbc_enc_loop
1293 .align 16
1294 .Lcbc_enc_done:
1295 l${g} $ivp,6*$SIZE_T($sp)
1296 st $s0,0($ivp)
1297 st $s1,4($ivp)
1298 st $s2,8($ivp)
1299 st $s3,12($ivp)
1300
1301 lm${g} %r7,$ra,7*$SIZE_T($sp)
1302 br $ra
1303
1304 .align 16
1305 .Lcbc_enc_tail:
1306 aghi $len,15
1307 lghi $t0,0
1308 stg $t0,16*$SIZE_T($sp)
1309 stg $t0,16*$SIZE_T+8($sp)
1310 bras $t1,3f
1311 mvc 16*$SIZE_T(1,$sp),0($inp)
1312 3: ex $len,0($t1)
1313 lghi $len,0
1314 la $inp,16*$SIZE_T($sp)
1315 j .Lcbc_enc_loop
1316
1317 .align 16
1318 .Lcbc_decrypt:
1319 larl $tbl,AES_Td
1320
1321 lg $t0,0($ivp)
1322 lg $t1,8($ivp)
1323 stmg $t0,$t1,16*$SIZE_T($sp)
1324
1325 .Lcbc_dec_loop:
1326 stm${g} $inp,$out,2*$SIZE_T($sp)
1327 llgf $s0,0($inp)
1328 llgf $s1,4($inp)
1329 llgf $s2,8($inp)
1330 llgf $s3,12($inp)
1331 lgr %r4,$key
1332
1333 bras $ra,_s390x_AES_decrypt
1334
1335 lm${g} $inp,$key,2*$SIZE_T($sp)
1336 sllg $s0,$s0,32
1337 sllg $s2,$s2,32
1338 lr $s0,$s1
1339 lr $s2,$s3
1340
1341 lg $t0,0($inp)
1342 lg $t1,8($inp)
1343 xg $s0,16*$SIZE_T($sp)
1344 xg $s2,16*$SIZE_T+8($sp)
1345 lghi $s1,16
1346 sl${g}r $len,$s1
1347 brc 4,.Lcbc_dec_tail # if borrow
1348 brc 2,.Lcbc_dec_done # if zero
1349 stg $s0,0($out)
1350 stg $s2,8($out)
1351 stmg $t0,$t1,16*$SIZE_T($sp)
1352
1353 la $inp,16($inp)
1354 la $out,16($out)
1355 j .Lcbc_dec_loop
1356
1357 .Lcbc_dec_done:
1358 stg $s0,0($out)
1359 stg $s2,8($out)
1360 .Lcbc_dec_exit:
1361 lm${g} %r6,$ra,6*$SIZE_T($sp)
1362 stmg $t0,$t1,0($ivp)
1363
1364 br $ra
1365
1366 .align 16
1367 .Lcbc_dec_tail:
1368 aghi $len,15
1369 stg $s0,16*$SIZE_T($sp)
1370 stg $s2,16*$SIZE_T+8($sp)
1371 bras $s1,4f
1372 mvc 0(1,$out),16*$SIZE_T($sp)
1373 4: ex $len,0($s1)
1374 j .Lcbc_dec_exit
1375 .size AES_cbc_encrypt,.-AES_cbc_encrypt
1376 ___
1377 }
1378 ########################################################################
1379 # void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
1380 # size_t blocks, const AES_KEY *key,
1381 # const unsigned char *ivec)
1382 {
1383 my $inp="%r2";
1384 my $out="%r4"; # blocks and out are swapped
1385 my $len="%r3";
1386 my $key="%r5"; my $iv0="%r5";
1387 my $ivp="%r6";
1388 my $fp ="%r7";
1389
1390 $code.=<<___;
1391 .globl AES_ctr32_encrypt
1392 .type AES_ctr32_encrypt,\@function
1393 .align 16
1394 AES_ctr32_encrypt:
1395 xgr %r3,%r4 # flip %r3 and %r4, $out and $len
1396 xgr %r4,%r3
1397 xgr %r3,%r4
1398 llgfr $len,$len # safe in ctr32 subroutine even in 64-bit case
1399 ___
1400 $code.=<<___ if (!$softonly);
1401 l %r0,240($key)
1402 lhi %r1,16
1403 clr %r0,%r1
1404 jl .Lctr32_software
1405
1406 stm${g} %r6,$s3,6*$SIZE_T($sp)
1407
1408 slgr $out,$inp
1409 la %r1,0($key) # %r1 is permanent copy of $key
1410 lg $iv0,0($ivp) # load ivec
1411 lg $ivp,8($ivp)
1412
1413 # prepare and allocate stack frame at the top of 4K page
1414 # with 1K reserved for eventual signal handling
1415 lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
1416 lghi $s1,-4096
1417 algr $s0,$sp
1418 lgr $fp,$sp
1419 ngr $s0,$s1 # align at page boundary
1420 slgr $fp,$s0 # total buffer size
1421 lgr $s2,$sp
1422 lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
1423 slgr $fp,$s1 # deduct reservation to get usable buffer size
1424 # buffer size is at lest 256 and at most 3072+256-16
1425
1426 la $sp,1024($s0) # alloca
1427 srlg $fp,$fp,4 # convert bytes to blocks, minimum 16
1428 st${g} $s2,0($sp) # back-chain
1429 st${g} $fp,$SIZE_T($sp)
1430
1431 slgr $len,$fp
1432 brc 1,.Lctr32_hw_switch # not zero, no borrow
1433 algr $fp,$len # input is shorter than allocated buffer
1434 lghi $len,0
1435 st${g} $fp,$SIZE_T($sp)
1436
1437 .Lctr32_hw_switch:
1438 ___
1439 $code.=<<___ if (0); ######### kmctr code was measured to be ~12% slower
1440 larl $s0,OPENSSL_s390xcap_P
1441 lg $s0,8($s0)
1442 tmhh $s0,0x0004 # check for message_security-assist-4
1443 jz .Lctr32_km_loop
1444
1445 llgfr $s0,%r0
1446 lgr $s1,%r1
1447 lghi %r0,0
1448 la %r1,16($sp)
1449 .long 0xb92d2042 # kmctr %r4,%r2,%r2
1450
1451 llihh %r0,0x8000 # check if kmctr supports the function code
1452 srlg %r0,%r0,0($s0)
1453 ng %r0,16($sp)
1454 lgr %r0,$s0
1455 lgr %r1,$s1
1456 jz .Lctr32_km_loop
1457
1458 ####### kmctr code
1459 algr $out,$inp # restore $out
1460 lgr $s1,$len # $s1 undertakes $len
1461 j .Lctr32_kmctr_loop
1462 .align 16
1463 .Lctr32_kmctr_loop:
1464 la $s2,16($sp)
1465 lgr $s3,$fp
1466 .Lctr32_kmctr_prepare:
1467 stg $iv0,0($s2)
1468 stg $ivp,8($s2)
1469 la $s2,16($s2)
1470 ahi $ivp,1 # 32-bit increment, preserves upper half
1471 brct $s3,.Lctr32_kmctr_prepare
1472
1473 #la $inp,0($inp) # inp
1474 sllg $len,$fp,4 # len
1475 #la $out,0($out) # out
1476 la $s2,16($sp) # iv
1477 .long 0xb92da042 # kmctr $out,$s2,$inp
1478 brc 1,.-4 # pay attention to "partial completion"
1479
1480 slgr $s1,$fp
1481 brc 1,.Lctr32_kmctr_loop # not zero, no borrow
1482 algr $fp,$s1
1483 lghi $s1,0
1484 brc 4+1,.Lctr32_kmctr_loop # not zero
1485
1486 l${g} $sp,0($sp)
1487 lm${g} %r6,$s3,6*$SIZE_T($sp)
1488 br $ra
1489 .align 16
1490 ___
1491 $code.=<<___;
1492 .Lctr32_km_loop:
1493 la $s2,16($sp)
1494 lgr $s3,$fp
1495 .Lctr32_km_prepare:
1496 stg $iv0,0($s2)
1497 stg $ivp,8($s2)
1498 la $s2,16($s2)
1499 ahi $ivp,1 # 32-bit increment, preserves upper half
1500 brct $s3,.Lctr32_km_prepare
1501
1502 la $s0,16($sp) # inp
1503 sllg $s1,$fp,4 # len
1504 la $s2,16($sp) # out
1505 .long 0xb92e00a8 # km %r10,%r8
1506 brc 1,.-4 # pay attention to "partial completion"
1507
1508 la $s2,16($sp)
1509 lgr $s3,$fp
1510 slgr $s2,$inp
1511 .Lctr32_km_xor:
1512 lg $s0,0($inp)
1513 lg $s1,8($inp)
1514 xg $s0,0($s2,$inp)
1515 xg $s1,8($s2,$inp)
1516 stg $s0,0($out,$inp)
1517 stg $s1,8($out,$inp)
1518 la $inp,16($inp)
1519 brct $s3,.Lctr32_km_xor
1520
1521 slgr $len,$fp
1522 brc 1,.Lctr32_km_loop # not zero, no borrow
1523 algr $fp,$len
1524 lghi $len,0
1525 brc 4+1,.Lctr32_km_loop # not zero
1526
1527 l${g} $s0,0($sp)
1528 l${g} $s1,$SIZE_T($sp)
1529 la $s2,16($sp)
1530 .Lctr32_km_zap:
1531 stg $s0,0($s2)
1532 stg $s0,8($s2)
1533 la $s2,16($s2)
1534 brct $s1,.Lctr32_km_zap
1535
1536 la $sp,0($s0)
1537 lm${g} %r6,$s3,6*$SIZE_T($sp)
1538 br $ra
1539 .align 16
1540 .Lctr32_software:
1541 ___
1542 $code.=<<___;
1543 stm${g} $key,$ra,5*$SIZE_T($sp)
1544 sl${g}r $inp,$out
1545 larl $tbl,AES_Te
1546 llgf $t1,12($ivp)
1547
1548 .Lctr32_loop:
1549 stm${g} $inp,$out,2*$SIZE_T($sp)
1550 llgf $s0,0($ivp)
1551 llgf $s1,4($ivp)
1552 llgf $s2,8($ivp)
1553 lgr $s3,$t1
1554 st $t1,16*$SIZE_T($sp)
1555 lgr %r4,$key
1556
1557 bras $ra,_s390x_AES_encrypt
1558
1559 lm${g} $inp,$ivp,2*$SIZE_T($sp)
1560 llgf $t1,16*$SIZE_T($sp)
1561 x $s0,0($inp,$out)
1562 x $s1,4($inp,$out)
1563 x $s2,8($inp,$out)
1564 x $s3,12($inp,$out)
1565 stm $s0,$s3,0($out)
1566
1567 la $out,16($out)
1568 ahi $t1,1 # 32-bit increment
1569 brct $len,.Lctr32_loop
1570
1571 lm${g} %r6,$ra,6*$SIZE_T($sp)
1572 br $ra
1573 .size AES_ctr32_encrypt,.-AES_ctr32_encrypt
1574 ___
1575 }
1576
1577 ########################################################################
1578 # void AES_xts_encrypt(const char *inp,char *out,size_t len,
1579 # const AES_KEY *key1, const AES_KEY *key2,
1580 # const unsigned char iv[16]);
1581 #
1582 {
1583 my $inp="%r2";
1584 my $out="%r4"; # len and out are swapped
1585 my $len="%r3";
1586 my $key1="%r5"; # $i1
1587 my $key2="%r6"; # $i2
1588 my $fp="%r7"; # $i3
1589 my $tweak=16*$SIZE_T+16; # or $stdframe-16, bottom of the frame...
1590
1591 $code.=<<___;
1592 .type _s390x_xts_km,\@function
1593 .align 16
1594 _s390x_xts_km:
1595 ___
1596 $code.=<<___ if(1);
1597 llgfr $s0,%r0 # put aside the function code
1598 lghi $s1,0x7f
1599 nr $s1,%r0
1600 lghi %r0,0 # query capability vector
1601 la %r1,$tweak-16($sp)
1602 .long 0xb92e0042 # km %r4,%r2
1603 llihh %r1,0x8000
1604 srlg %r1,%r1,32($s1) # check for 32+function code
1605 ng %r1,$tweak-16($sp)
1606 lgr %r0,$s0 # restore the function code
1607 la %r1,0($key1) # restore $key1
1608 jz .Lxts_km_vanilla
1609
1610 lmg $i2,$i3,$tweak($sp) # put aside the tweak value
1611 algr $out,$inp
1612
1613 oill %r0,32 # switch to xts function code
1614 aghi $s1,-18 #
1615 sllg $s1,$s1,3 # (function code - 18)*8, 0 or 16
1616 la %r1,$tweak-16($sp)
1617 slgr %r1,$s1 # parameter block position
1618 lmg $s0,$s3,0($key1) # load 256 bits of key material,
1619 stmg $s0,$s3,0(%r1) # and copy it to parameter block.
1620 # yes, it contains junk and overlaps
1621 # with the tweak in 128-bit case.
1622 # it's done to avoid conditional
1623 # branch.
1624 stmg $i2,$i3,$tweak($sp) # "re-seat" the tweak value
1625
1626 .long 0xb92e0042 # km %r4,%r2
1627 brc 1,.-4 # pay attention to "partial completion"
1628
1629 lrvg $s0,$tweak+0($sp) # load the last tweak
1630 lrvg $s1,$tweak+8($sp)
1631 stmg %r0,%r3,$tweak-32($sp) # wipe copy of the key
1632
1633 nill %r0,0xffdf # switch back to original function code
1634 la %r1,0($key1) # restore pointer to $key1
1635 slgr $out,$inp
1636
1637 llgc $len,2*$SIZE_T-1($sp)
1638 nill $len,0x0f # $len%=16
1639 br $ra
1640
1641 .align 16
1642 .Lxts_km_vanilla:
1643 ___
1644 $code.=<<___;
1645 # prepare and allocate stack frame at the top of 4K page
1646 # with 1K reserved for eventual signal handling
1647 lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
1648 lghi $s1,-4096
1649 algr $s0,$sp
1650 lgr $fp,$sp
1651 ngr $s0,$s1 # align at page boundary
1652 slgr $fp,$s0 # total buffer size
1653 lgr $s2,$sp
1654 lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
1655 slgr $fp,$s1 # deduct reservation to get usable buffer size
1656 # buffer size is at lest 256 and at most 3072+256-16
1657
1658 la $sp,1024($s0) # alloca
1659 nill $fp,0xfff0 # round to 16*n
1660 st${g} $s2,0($sp) # back-chain
1661 nill $len,0xfff0 # redundant
1662 st${g} $fp,$SIZE_T($sp)
1663
1664 slgr $len,$fp
1665 brc 1,.Lxts_km_go # not zero, no borrow
1666 algr $fp,$len # input is shorter than allocated buffer
1667 lghi $len,0
1668 st${g} $fp,$SIZE_T($sp)
1669
1670 .Lxts_km_go:
1671 lrvg $s0,$tweak+0($s2) # load the tweak value in little-endian
1672 lrvg $s1,$tweak+8($s2)
1673
1674 la $s2,16($sp) # vector of ascending tweak values
1675 slgr $s2,$inp
1676 srlg $s3,$fp,4
1677 j .Lxts_km_start
1678
1679 .Lxts_km_loop:
1680 la $s2,16($sp)
1681 slgr $s2,$inp
1682 srlg $s3,$fp,4
1683 .Lxts_km_prepare:
1684 lghi $i1,0x87
1685 srag $i2,$s1,63 # broadcast upper bit
1686 ngr $i1,$i2 # rem
1687 algr $s0,$s0
1688 alcgr $s1,$s1
1689 xgr $s0,$i1
1690 .Lxts_km_start:
1691 lrvgr $i1,$s0 # flip byte order
1692 lrvgr $i2,$s1
1693 stg $i1,0($s2,$inp)
1694 stg $i2,8($s2,$inp)
1695 xg $i1,0($inp)
1696 xg $i2,8($inp)
1697 stg $i1,0($out,$inp)
1698 stg $i2,8($out,$inp)
1699 la $inp,16($inp)
1700 brct $s3,.Lxts_km_prepare
1701
1702 slgr $inp,$fp # rewind $inp
1703 la $s2,0($out,$inp)
1704 lgr $s3,$fp
1705 .long 0xb92e00aa # km $s2,$s2
1706 brc 1,.-4 # pay attention to "partial completion"
1707
1708 la $s2,16($sp)
1709 slgr $s2,$inp
1710 srlg $s3,$fp,4
1711 .Lxts_km_xor:
1712 lg $i1,0($out,$inp)
1713 lg $i2,8($out,$inp)
1714 xg $i1,0($s2,$inp)
1715 xg $i2,8($s2,$inp)
1716 stg $i1,0($out,$inp)
1717 stg $i2,8($out,$inp)
1718 la $inp,16($inp)
1719 brct $s3,.Lxts_km_xor
1720
1721 slgr $len,$fp
1722 brc 1,.Lxts_km_loop # not zero, no borrow
1723 algr $fp,$len
1724 lghi $len,0
1725 brc 4+1,.Lxts_km_loop # not zero
1726
1727 l${g} $i1,0($sp) # back-chain
1728 llgf $fp,`2*$SIZE_T-4`($sp) # bytes used
1729 la $i2,16($sp)
1730 srlg $fp,$fp,4
1731 .Lxts_km_zap:
1732 stg $i1,0($i2)
1733 stg $i1,8($i2)
1734 la $i2,16($i2)
1735 brct $fp,.Lxts_km_zap
1736
1737 la $sp,0($i1)
1738 llgc $len,2*$SIZE_T-1($i1)
1739 nill $len,0x0f # $len%=16
1740 bzr $ra
1741
1742 # generate one more tweak...
1743 lghi $i1,0x87
1744 srag $i2,$s1,63 # broadcast upper bit
1745 ngr $i1,$i2 # rem
1746 algr $s0,$s0
1747 alcgr $s1,$s1
1748 xgr $s0,$i1
1749
1750 ltr $len,$len # clear zero flag
1751 br $ra
1752 .size _s390x_xts_km,.-_s390x_xts_km
1753
1754 .globl AES_xts_encrypt
1755 .type AES_xts_encrypt,\@function
1756 .align 16
1757 AES_xts_encrypt:
1758 xgr %r3,%r4 # flip %r3 and %r4, $out and $len
1759 xgr %r4,%r3
1760 xgr %r3,%r4
1761 ___
1762 $code.=<<___ if ($SIZE_T==4);
1763 llgfr $len,$len
1764 ___
1765 $code.=<<___;
1766 st${g} $len,1*$SIZE_T($sp) # save copy of $len
1767 srag $len,$len,4 # formally wrong, because it expands
1768 # sign byte, but who can afford asking
1769 # to process more than 2^63-1 bytes?
1770 # I use it, because it sets condition
1771 # code...
1772 bcr 8,$ra # abort if zero (i.e. less than 16)
1773 ___
1774 $code.=<<___ if (!$softonly);
1775 llgf %r0,240($key2)
1776 lhi %r1,16
1777 clr %r0,%r1
1778 jl .Lxts_enc_software
1779
1780 st${g} $ra,5*$SIZE_T($sp)
1781 stm${g} %r6,$s3,6*$SIZE_T($sp)
1782
1783 sllg $len,$len,4 # $len&=~15
1784 slgr $out,$inp
1785
1786 # generate the tweak value
1787 l${g} $s3,$stdframe($sp) # pointer to iv
1788 la $s2,$tweak($sp)
1789 lmg $s0,$s1,0($s3)
1790 lghi $s3,16
1791 stmg $s0,$s1,0($s2)
1792 la %r1,0($key2) # $key2 is not needed anymore
1793 .long 0xb92e00aa # km $s2,$s2, generate the tweak
1794 brc 1,.-4 # can this happen?
1795
1796 l %r0,240($key1)
1797 la %r1,0($key1) # $key1 is not needed anymore
1798 bras $ra,_s390x_xts_km
1799 jz .Lxts_enc_km_done
1800
1801 aghi $inp,-16 # take one step back
1802 la $i3,0($out,$inp) # put aside real $out
1803 .Lxts_enc_km_steal:
1804 llgc $i1,16($inp)
1805 llgc $i2,0($out,$inp)
1806 stc $i1,0($out,$inp)
1807 stc $i2,16($out,$inp)
1808 la $inp,1($inp)
1809 brct $len,.Lxts_enc_km_steal
1810
1811 la $s2,0($i3)
1812 lghi $s3,16
1813 lrvgr $i1,$s0 # flip byte order
1814 lrvgr $i2,$s1
1815 xg $i1,0($s2)
1816 xg $i2,8($s2)
1817 stg $i1,0($s2)
1818 stg $i2,8($s2)
1819 .long 0xb92e00aa # km $s2,$s2
1820 brc 1,.-4 # can this happen?
1821 lrvgr $i1,$s0 # flip byte order
1822 lrvgr $i2,$s1
1823 xg $i1,0($i3)
1824 xg $i2,8($i3)
1825 stg $i1,0($i3)
1826 stg $i2,8($i3)
1827
1828 .Lxts_enc_km_done:
1829 stg $sp,$tweak+0($sp) # wipe tweak
1830 stg $sp,$tweak+8($sp)
1831 l${g} $ra,5*$SIZE_T($sp)
1832 lm${g} %r6,$s3,6*$SIZE_T($sp)
1833 br $ra
1834 .align 16
1835 .Lxts_enc_software:
1836 ___
1837 $code.=<<___;
1838 stm${g} %r6,$ra,6*$SIZE_T($sp)
1839
1840 slgr $out,$inp
1841
1842 l${g} $s3,$stdframe($sp) # ivp
1843 llgf $s0,0($s3) # load iv
1844 llgf $s1,4($s3)
1845 llgf $s2,8($s3)
1846 llgf $s3,12($s3)
1847 stm${g} %r2,%r5,2*$SIZE_T($sp)
1848 la $key,0($key2)
1849 larl $tbl,AES_Te
1850 bras $ra,_s390x_AES_encrypt # generate the tweak
1851 lm${g} %r2,%r5,2*$SIZE_T($sp)
1852 stm $s0,$s3,$tweak($sp) # save the tweak
1853 j .Lxts_enc_enter
1854
1855 .align 16
1856 .Lxts_enc_loop:
1857 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
1858 lrvg $s3,$tweak+8($sp)
1859 lghi %r1,0x87
1860 srag %r0,$s3,63 # broadcast upper bit
1861 ngr %r1,%r0 # rem
1862 algr $s1,$s1
1863 alcgr $s3,$s3
1864 xgr $s1,%r1
1865 lrvgr $s1,$s1 # flip byte order
1866 lrvgr $s3,$s3
1867 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
1868 stg $s1,$tweak+0($sp) # save the tweak
1869 llgfr $s1,$s1
1870 srlg $s2,$s3,32
1871 stg $s3,$tweak+8($sp)
1872 llgfr $s3,$s3
1873 la $inp,16($inp) # $inp+=16
1874 .Lxts_enc_enter:
1875 x $s0,0($inp) # ^=*($inp)
1876 x $s1,4($inp)
1877 x $s2,8($inp)
1878 x $s3,12($inp)
1879 stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
1880 la $key,0($key1)
1881 bras $ra,_s390x_AES_encrypt
1882 lm${g} %r2,%r5,2*$SIZE_T($sp)
1883 x $s0,$tweak+0($sp) # ^=tweak
1884 x $s1,$tweak+4($sp)
1885 x $s2,$tweak+8($sp)
1886 x $s3,$tweak+12($sp)
1887 st $s0,0($out,$inp)
1888 st $s1,4($out,$inp)
1889 st $s2,8($out,$inp)
1890 st $s3,12($out,$inp)
1891 brct${g} $len,.Lxts_enc_loop
1892
1893 llgc $len,`2*$SIZE_T-1`($sp)
1894 nill $len,0x0f # $len%16
1895 jz .Lxts_enc_done
1896
1897 la $i3,0($inp,$out) # put aside real $out
1898 .Lxts_enc_steal:
1899 llgc %r0,16($inp)
1900 llgc %r1,0($out,$inp)
1901 stc %r0,0($out,$inp)
1902 stc %r1,16($out,$inp)
1903 la $inp,1($inp)
1904 brct $len,.Lxts_enc_steal
1905 la $out,0($i3) # restore real $out
1906
1907 # generate last tweak...
1908 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
1909 lrvg $s3,$tweak+8($sp)
1910 lghi %r1,0x87
1911 srag %r0,$s3,63 # broadcast upper bit
1912 ngr %r1,%r0 # rem
1913 algr $s1,$s1
1914 alcgr $s3,$s3
1915 xgr $s1,%r1
1916 lrvgr $s1,$s1 # flip byte order
1917 lrvgr $s3,$s3
1918 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
1919 stg $s1,$tweak+0($sp) # save the tweak
1920 llgfr $s1,$s1
1921 srlg $s2,$s3,32
1922 stg $s3,$tweak+8($sp)
1923 llgfr $s3,$s3
1924
1925 x $s0,0($out) # ^=*(inp)|stolen cipther-text
1926 x $s1,4($out)
1927 x $s2,8($out)
1928 x $s3,12($out)
1929 st${g} $out,4*$SIZE_T($sp)
1930 la $key,0($key1)
1931 bras $ra,_s390x_AES_encrypt
1932 l${g} $out,4*$SIZE_T($sp)
1933 x $s0,`$tweak+0`($sp) # ^=tweak
1934 x $s1,`$tweak+4`($sp)
1935 x $s2,`$tweak+8`($sp)
1936 x $s3,`$tweak+12`($sp)
1937 st $s0,0($out)
1938 st $s1,4($out)
1939 st $s2,8($out)
1940 st $s3,12($out)
1941
1942 .Lxts_enc_done:
1943 stg $sp,$tweak+0($sp) # wipe tweak
1944 stg $sp,$twesk+8($sp)
1945 lm${g} %r6,$ra,6*$SIZE_T($sp)
1946 br $ra
1947 .size AES_xts_encrypt,.-AES_xts_encrypt
1948 ___
1949 # void AES_xts_decrypt(const char *inp,char *out,size_t len,
1950 # const AES_KEY *key1, const AES_KEY *key2,
1951 # const unsigned char iv[16]);
1952 #
1953 $code.=<<___;
1954 .globl AES_xts_decrypt
1955 .type AES_xts_decrypt,\@function
1956 .align 16
1957 AES_xts_decrypt:
1958 xgr %r3,%r4 # flip %r3 and %r4, $out and $len
1959 xgr %r4,%r3
1960 xgr %r3,%r4
1961 ___
1962 $code.=<<___ if ($SIZE_T==4);
1963 llgfr $len,$len
1964 ___
1965 $code.=<<___;
1966 st${g} $len,1*$SIZE_T($sp) # save copy of $len
1967 aghi $len,-16
1968 bcr 4,$ra # abort if less than zero. formally
1969 # wrong, because $len is unsigned,
1970 # but who can afford asking to
1971 # process more than 2^63-1 bytes?
1972 tmll $len,0x0f
1973 jnz .Lxts_dec_proceed
1974 aghi $len,16
1975 .Lxts_dec_proceed:
1976 ___
1977 $code.=<<___ if (!$softonly);
1978 llgf %r0,240($key2)
1979 lhi %r1,16
1980 clr %r0,%r1
1981 jl .Lxts_dec_software
1982
1983 st${g} $ra,5*$SIZE_T($sp)
1984 stm${g} %r6,$s3,6*$SIZE_T($sp)
1985
1986 nill $len,0xfff0 # $len&=~15
1987 slgr $out,$inp
1988
1989 # generate the tweak value
1990 l${g} $s3,$stdframe($sp) # pointer to iv
1991 la $s2,$tweak($sp)
1992 lmg $s0,$s1,0($s3)
1993 lghi $s3,16
1994 stmg $s0,$s1,0($s2)
1995 la %r1,0($key2) # $key2 is not needed past this point
1996 .long 0xb92e00aa # km $s2,$s2, generate the tweak
1997 brc 1,.-4 # can this happen?
1998
1999 l %r0,240($key1)
2000 la %r1,0($key1) # $key1 is not needed anymore
2001
2002 ltgr $len,$len
2003 jz .Lxts_dec_km_short
2004 bras $ra,_s390x_xts_km
2005 jz .Lxts_dec_km_done
2006
2007 lrvgr $s2,$s0 # make copy in reverse byte order
2008 lrvgr $s3,$s1
2009 j .Lxts_dec_km_2ndtweak
2010
2011 .Lxts_dec_km_short:
2012 llgc $len,`2*$SIZE_T-1`($sp)
2013 nill $len,0x0f # $len%=16
2014 lrvg $s0,$tweak+0($sp) # load the tweak
2015 lrvg $s1,$tweak+8($sp)
2016 lrvgr $s2,$s0 # make copy in reverse byte order
2017 lrvgr $s3,$s1
2018
2019 .Lxts_dec_km_2ndtweak:
2020 lghi $i1,0x87
2021 srag $i2,$s1,63 # broadcast upper bit
2022 ngr $i1,$i2 # rem
2023 algr $s0,$s0
2024 alcgr $s1,$s1
2025 xgr $s0,$i1
2026 lrvgr $i1,$s0 # flip byte order
2027 lrvgr $i2,$s1
2028
2029 xg $i1,0($inp)
2030 xg $i2,8($inp)
2031 stg $i1,0($out,$inp)
2032 stg $i2,8($out,$inp)
2033 la $i2,0($out,$inp)
2034 lghi $i3,16
2035 .long 0xb92e0066 # km $i2,$i2
2036 brc 1,.-4 # can this happen?
2037 lrvgr $i1,$s0
2038 lrvgr $i2,$s1
2039 xg $i1,0($out,$inp)
2040 xg $i2,8($out,$inp)
2041 stg $i1,0($out,$inp)
2042 stg $i2,8($out,$inp)
2043
2044 la $i3,0($out,$inp) # put aside real $out
2045 .Lxts_dec_km_steal:
2046 llgc $i1,16($inp)
2047 llgc $i2,0($out,$inp)
2048 stc $i1,0($out,$inp)
2049 stc $i2,16($out,$inp)
2050 la $inp,1($inp)
2051 brct $len,.Lxts_dec_km_steal
2052
2053 lgr $s0,$s2
2054 lgr $s1,$s3
2055 xg $s0,0($i3)
2056 xg $s1,8($i3)
2057 stg $s0,0($i3)
2058 stg $s1,8($i3)
2059 la $s0,0($i3)
2060 lghi $s1,16
2061 .long 0xb92e0088 # km $s0,$s0
2062 brc 1,.-4 # can this happen?
2063 xg $s2,0($i3)
2064 xg $s3,8($i3)
2065 stg $s2,0($i3)
2066 stg $s3,8($i3)
2067 .Lxts_dec_km_done:
2068 stg $sp,$tweak+0($sp) # wipe tweak
2069 stg $sp,$tweak+8($sp)
2070 l${g} $ra,5*$SIZE_T($sp)
2071 lm${g} %r6,$s3,6*$SIZE_T($sp)
2072 br $ra
2073 .align 16
2074 .Lxts_dec_software:
2075 ___
2076 $code.=<<___;
2077 stm${g} %r6,$ra,6*$SIZE_T($sp)
2078
2079 srlg $len,$len,4
2080 slgr $out,$inp
2081
2082 l${g} $s3,$stdframe($sp) # ivp
2083 llgf $s0,0($s3) # load iv
2084 llgf $s1,4($s3)
2085 llgf $s2,8($s3)
2086 llgf $s3,12($s3)
2087 stm${g} %r2,%r5,2*$SIZE_T($sp)
2088 la $key,0($key2)
2089 larl $tbl,AES_Te
2090 bras $ra,_s390x_AES_encrypt # generate the tweak
2091 lm${g} %r2,%r5,2*$SIZE_T($sp)
2092 larl $tbl,AES_Td
2093 lt${g}r $len,$len
2094 stm $s0,$s3,$tweak($sp) # save the tweak
2095 jz .Lxts_dec_short
2096 j .Lxts_dec_enter
2097
2098 .align 16
2099 .Lxts_dec_loop:
2100 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
2101 lrvg $s3,$tweak+8($sp)
2102 lghi %r1,0x87
2103 srag %r0,$s3,63 # broadcast upper bit
2104 ngr %r1,%r0 # rem
2105 algr $s1,$s1
2106 alcgr $s3,$s3
2107 xgr $s1,%r1
2108 lrvgr $s1,$s1 # flip byte order
2109 lrvgr $s3,$s3
2110 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
2111 stg $s1,$tweak+0($sp) # save the tweak
2112 llgfr $s1,$s1
2113 srlg $s2,$s3,32
2114 stg $s3,$tweak+8($sp)
2115 llgfr $s3,$s3
2116 .Lxts_dec_enter:
2117 x $s0,0($inp) # tweak^=*(inp)
2118 x $s1,4($inp)
2119 x $s2,8($inp)
2120 x $s3,12($inp)
2121 stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
2122 la $key,0($key1)
2123 bras $ra,_s390x_AES_decrypt
2124 lm${g} %r2,%r5,2*$SIZE_T($sp)
2125 x $s0,$tweak+0($sp) # ^=tweak
2126 x $s1,$tweak+4($sp)
2127 x $s2,$tweak+8($sp)
2128 x $s3,$tweak+12($sp)
2129 st $s0,0($out,$inp)
2130 st $s1,4($out,$inp)
2131 st $s2,8($out,$inp)
2132 st $s3,12($out,$inp)
2133 la $inp,16($inp)
2134 brct${g} $len,.Lxts_dec_loop
2135
2136 llgc $len,`2*$SIZE_T-1`($sp)
2137 nill $len,0x0f # $len%16
2138 jz .Lxts_dec_done
2139
2140 # generate pair of tweaks...
2141 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
2142 lrvg $s3,$tweak+8($sp)
2143 lghi %r1,0x87
2144 srag %r0,$s3,63 # broadcast upper bit
2145 ngr %r1,%r0 # rem
2146 algr $s1,$s1
2147 alcgr $s3,$s3
2148 xgr $s1,%r1
2149 lrvgr $i2,$s1 # flip byte order
2150 lrvgr $i3,$s3
2151 stmg $i2,$i3,$tweak($sp) # save the 1st tweak
2152 j .Lxts_dec_2ndtweak
2153
2154 .align 16
2155 .Lxts_dec_short:
2156 llgc $len,`2*$SIZE_T-1`($sp)
2157 nill $len,0x0f # $len%16
2158 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
2159 lrvg $s3,$tweak+8($sp)
2160 .Lxts_dec_2ndtweak:
2161 lghi %r1,0x87
2162 srag %r0,$s3,63 # broadcast upper bit
2163 ngr %r1,%r0 # rem
2164 algr $s1,$s1
2165 alcgr $s3,$s3
2166 xgr $s1,%r1
2167 lrvgr $s1,$s1 # flip byte order
2168 lrvgr $s3,$s3
2169 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
2170 stg $s1,$tweak-16+0($sp) # save the 2nd tweak
2171 llgfr $s1,$s1
2172 srlg $s2,$s3,32
2173 stg $s3,$tweak-16+8($sp)
2174 llgfr $s3,$s3
2175
2176 x $s0,0($inp) # tweak_the_2nd^=*(inp)
2177 x $s1,4($inp)
2178 x $s2,8($inp)
2179 x $s3,12($inp)
2180 stm${g} %r2,%r3,2*$SIZE_T($sp)
2181 la $key,0($key1)
2182 bras $ra,_s390x_AES_decrypt
2183 lm${g} %r2,%r5,2*$SIZE_T($sp)
2184 x $s0,$tweak-16+0($sp) # ^=tweak_the_2nd
2185 x $s1,$tweak-16+4($sp)
2186 x $s2,$tweak-16+8($sp)
2187 x $s3,$tweak-16+12($sp)
2188 st $s0,0($out,$inp)
2189 st $s1,4($out,$inp)
2190 st $s2,8($out,$inp)
2191 st $s3,12($out,$inp)
2192
2193 la $i3,0($out,$inp) # put aside real $out
2194 .Lxts_dec_steal:
2195 llgc %r0,16($inp)
2196 llgc %r1,0($out,$inp)
2197 stc %r0,0($out,$inp)
2198 stc %r1,16($out,$inp)
2199 la $inp,1($inp)
2200 brct $len,.Lxts_dec_steal
2201 la $out,0($i3) # restore real $out
2202
2203 lm $s0,$s3,$tweak($sp) # load the 1st tweak
2204 x $s0,0($out) # tweak^=*(inp)|stolen cipher-text
2205 x $s1,4($out)
2206 x $s2,8($out)
2207 x $s3,12($out)
2208 st${g} $out,4*$SIZE_T($sp)
2209 la $key,0($key1)
2210 bras $ra,_s390x_AES_decrypt
2211 l${g} $out,4*$SIZE_T($sp)
2212 x $s0,$tweak+0($sp) # ^=tweak
2213 x $s1,$tweak+4($sp)
2214 x $s2,$tweak+8($sp)
2215 x $s3,$tweak+12($sp)
2216 st $s0,0($out)
2217 st $s1,4($out)
2218 st $s2,8($out)
2219 st $s3,12($out)
2220 stg $sp,$tweak-16+0($sp) # wipe 2nd tweak
2221 stg $sp,$tweak-16+8($sp)
2222 .Lxts_dec_done:
2223 stg $sp,$tweak+0($sp) # wipe tweak
2224 stg $sp,$twesk+8($sp)
2225 lm${g} %r6,$ra,6*$SIZE_T($sp)
2226 br $ra
2227 .size AES_xts_decrypt,.-AES_xts_decrypt
2228 ___
2229 }
2230 $code.=<<___;
2231 .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
2232 .comm OPENSSL_s390xcap_P,16,8
2233 ___
2234
2235 $code =~ s/\`([^\`]*)\`/eval $1/gem;
2236 print $code;
2237 close STDOUT; # force flush
OLDNEW
« no previous file with comments | « openssl/crypto/aes/asm/aes-ppc.pl ('k') | openssl/crypto/aes/asm/aes-sparcv9.pl » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698