Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(378)

Side by Side Diff: openssl/crypto/aes/asm/aes-ppc.pl

Issue 9254031: Upgrade chrome's OpenSSL to same version Android ships with. (Closed) Base URL: http://src.chromium.org/svn/trunk/deps/third_party/openssl/
Patch Set: '' Created 8 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « openssl/crypto/aes/asm/aes-armv4.pl ('k') | openssl/crypto/aes/asm/aes-s390x.pl » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # Needs more work: key setup, page boundaries, CBC routine...
11 #
12 # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
14 # 4.0. But these are not the ones currently used! Their "compact"
15 # counterparts are, for security reason. ppc_AES_encrypt_compact runs
16 # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17 # at 1/3 of ppc_AES_decrypt.
18
19 # February 2010
20 #
21 # Rescheduling instructions to favour Power6 pipeline gives 10%
22 # performance improvement on the platfrom in question (and marginal
23 # improvement even on others). It should be noted that Power6 fails
24 # to process byte in 18 cycles, only in 23, because it fails to issue
25 # 4 load instructions in two cycles, only in 3. As result non-compact
26 # block subroutines are 25% slower than one would expect. Compact
27 # functions scale better, because they have pure computational part,
28 # which scales perfectly with clock frequency. To be specific
29 # ppc_AES_encrypt_compact operates at 42 cycles per byte, while
30 # ppc_AES_decrypt_compact - at 55 (in 64-bit build).
31
32 $flavour = shift;
33
34 if ($flavour =~ /64/) {
35 $SIZE_T =8;
36 $STU ="stdu";
37 $POP ="ld";
38 $PUSH ="std";
39 } elsif ($flavour =~ /32/) {
40 $SIZE_T =4;
41 $STU ="stwu";
42 $POP ="lwz";
43 $PUSH ="stw";
44 } else { die "nonsense $flavour"; }
45
46 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
47 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
48 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
49 die "can't locate ppc-xlate.pl";
50
51 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
52
53 $FRAME=32*$SIZE_T;
54
55 sub _data_word()
56 { my $i;
57 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
58 }
59
60 $sp="r1";
61 $toc="r2";
62 $inp="r3";
63 $out="r4";
64 $key="r5";
65
66 $Tbl0="r3";
67 $Tbl1="r6";
68 $Tbl2="r7";
69 $Tbl3="r2";
70
71 $s0="r8";
72 $s1="r9";
73 $s2="r10";
74 $s3="r11";
75
76 $t0="r12";
77 $t1="r13";
78 $t2="r14";
79 $t3="r15";
80
81 $acc00="r16";
82 $acc01="r17";
83 $acc02="r18";
84 $acc03="r19";
85
86 $acc04="r20";
87 $acc05="r21";
88 $acc06="r22";
89 $acc07="r23";
90
91 $acc08="r24";
92 $acc09="r25";
93 $acc10="r26";
94 $acc11="r27";
95
96 $acc12="r28";
97 $acc13="r29";
98 $acc14="r30";
99 $acc15="r31";
100
101 # stay away from TLS pointer
102 if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; }
103 else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; }
104 $mask80=$Tbl2;
105 $mask1b=$Tbl3;
106
107 $code.=<<___;
108 .machine "any"
109 .text
110
111 .align 7
112 LAES_Te:
113 mflr r0
114 bcl 20,31,\$+4
115 mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry
116 addi $Tbl0,$Tbl0,`128-8`
117 mtlr r0
118 blr
119 .space `32-24`
120 LAES_Td:
121 mflr r0
122 bcl 20,31,\$+4
123 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry
124 addi $Tbl0,$Tbl0,`128-8-32+2048+256`
125 mtlr r0
126 blr
127 .space `128-32-24`
128 ___
129 &_data_word(
130 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
131 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
132 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
133 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
134 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
135 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
136 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
137 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
138 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
139 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
140 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
141 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
142 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
143 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
144 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
145 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
146 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
147 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
148 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
149 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
150 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
151 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
152 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
153 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
154 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
155 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
156 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
157 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
158 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
159 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
160 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
161 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
162 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
163 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
164 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
165 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
166 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
167 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
168 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
169 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
170 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
171 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
172 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
173 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
174 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
175 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
176 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
177 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
178 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
179 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
180 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
181 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
182 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
183 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
184 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
185 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
186 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
187 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
188 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
189 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
190 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
191 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
192 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
193 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
194 $code.=<<___;
195 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
196 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
197 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
198 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
199 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
200 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
201 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
202 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
203 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
204 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
205 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
206 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
207 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
208 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
209 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
210 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
211 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
212 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
213 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
214 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
215 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
216 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
217 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
218 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
219 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
220 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
221 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
222 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
223 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
224 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
225 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
226 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
227 ___
228 &_data_word(
229 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
230 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
231 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
232 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
233 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
234 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
235 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
236 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
237 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
238 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
239 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
240 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
241 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
242 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
243 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
244 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
245 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
246 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
247 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
248 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
249 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
250 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
251 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
252 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
253 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
254 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
255 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
256 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
257 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
258 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
259 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
260 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
261 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
262 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
263 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
264 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
265 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
266 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
267 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
268 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
269 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
270 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
271 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
272 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
273 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
274 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
275 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
276 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
277 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
278 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
279 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
280 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
281 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
282 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
283 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
284 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
285 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
286 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
287 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
288 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
289 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
290 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
291 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
292 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
293 $code.=<<___;
294 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
295 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
296 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
297 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
298 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
299 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
300 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
301 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
302 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
303 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
304 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
305 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
306 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
307 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
308 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
309 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
310 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
311 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
312 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
313 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
314 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
315 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
316 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
317 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
318 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
319 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
320 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
321 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
322 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
323 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
324 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
325 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
326
327
328 .globl .AES_encrypt
329 .align 7
330 .AES_encrypt:
331 mflr r0
332 $STU $sp,-$FRAME($sp)
333
334 $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
335 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
336 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
337 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
338 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
339 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
340 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
341 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
342 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
343 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
344 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
345 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
346 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
347 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
348 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
349 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
350 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
351 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
352 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
353 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
354 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
355
356 lwz $s0,0($inp)
357 lwz $s1,4($inp)
358 lwz $s2,8($inp)
359 lwz $s3,12($inp)
360 bl LAES_Te
361 bl Lppc_AES_encrypt_compact
362 stw $s0,0($out)
363 stw $s1,4($out)
364 stw $s2,8($out)
365 stw $s3,12($out)
366
367 $POP r0,`$FRAME-$SIZE_T*21`($sp)
368 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
369 $POP r13,`$FRAME-$SIZE_T*19`($sp)
370 $POP r14,`$FRAME-$SIZE_T*18`($sp)
371 $POP r15,`$FRAME-$SIZE_T*17`($sp)
372 $POP r16,`$FRAME-$SIZE_T*16`($sp)
373 $POP r17,`$FRAME-$SIZE_T*15`($sp)
374 $POP r18,`$FRAME-$SIZE_T*14`($sp)
375 $POP r19,`$FRAME-$SIZE_T*13`($sp)
376 $POP r20,`$FRAME-$SIZE_T*12`($sp)
377 $POP r21,`$FRAME-$SIZE_T*11`($sp)
378 $POP r22,`$FRAME-$SIZE_T*10`($sp)
379 $POP r23,`$FRAME-$SIZE_T*9`($sp)
380 $POP r24,`$FRAME-$SIZE_T*8`($sp)
381 $POP r25,`$FRAME-$SIZE_T*7`($sp)
382 $POP r26,`$FRAME-$SIZE_T*6`($sp)
383 $POP r27,`$FRAME-$SIZE_T*5`($sp)
384 $POP r28,`$FRAME-$SIZE_T*4`($sp)
385 $POP r29,`$FRAME-$SIZE_T*3`($sp)
386 $POP r30,`$FRAME-$SIZE_T*2`($sp)
387 $POP r31,`$FRAME-$SIZE_T*1`($sp)
388 mtlr r0
389 addi $sp,$sp,$FRAME
390 blr
391
392 .align 5
393 Lppc_AES_encrypt:
394 lwz $acc00,240($key)
395 lwz $t0,0($key)
396 lwz $t1,4($key)
397 lwz $t2,8($key)
398 lwz $t3,12($key)
399 addi $Tbl1,$Tbl0,3
400 addi $Tbl2,$Tbl0,2
401 addi $Tbl3,$Tbl0,1
402 addi $acc00,$acc00,-1
403 addi $key,$key,16
404 xor $s0,$s0,$t0
405 xor $s1,$s1,$t1
406 xor $s2,$s2,$t2
407 xor $s3,$s3,$t3
408 mtctr $acc00
409 .align 4
410 Lenc_loop:
411 rlwinm $acc00,$s0,`32-24+3`,21,28
412 rlwinm $acc01,$s1,`32-24+3`,21,28
413 rlwinm $acc02,$s2,`32-24+3`,21,28
414 rlwinm $acc03,$s3,`32-24+3`,21,28
415 lwz $t0,0($key)
416 lwz $t1,4($key)
417 rlwinm $acc04,$s1,`32-16+3`,21,28
418 rlwinm $acc05,$s2,`32-16+3`,21,28
419 lwz $t2,8($key)
420 lwz $t3,12($key)
421 rlwinm $acc06,$s3,`32-16+3`,21,28
422 rlwinm $acc07,$s0,`32-16+3`,21,28
423 lwzx $acc00,$Tbl0,$acc00
424 lwzx $acc01,$Tbl0,$acc01
425 rlwinm $acc08,$s2,`32-8+3`,21,28
426 rlwinm $acc09,$s3,`32-8+3`,21,28
427 lwzx $acc02,$Tbl0,$acc02
428 lwzx $acc03,$Tbl0,$acc03
429 rlwinm $acc10,$s0,`32-8+3`,21,28
430 rlwinm $acc11,$s1,`32-8+3`,21,28
431 lwzx $acc04,$Tbl1,$acc04
432 lwzx $acc05,$Tbl1,$acc05
433 rlwinm $acc12,$s3,`0+3`,21,28
434 rlwinm $acc13,$s0,`0+3`,21,28
435 lwzx $acc06,$Tbl1,$acc06
436 lwzx $acc07,$Tbl1,$acc07
437 rlwinm $acc14,$s1,`0+3`,21,28
438 rlwinm $acc15,$s2,`0+3`,21,28
439 lwzx $acc08,$Tbl2,$acc08
440 lwzx $acc09,$Tbl2,$acc09
441 xor $t0,$t0,$acc00
442 xor $t1,$t1,$acc01
443 lwzx $acc10,$Tbl2,$acc10
444 lwzx $acc11,$Tbl2,$acc11
445 xor $t2,$t2,$acc02
446 xor $t3,$t3,$acc03
447 lwzx $acc12,$Tbl3,$acc12
448 lwzx $acc13,$Tbl3,$acc13
449 xor $t0,$t0,$acc04
450 xor $t1,$t1,$acc05
451 lwzx $acc14,$Tbl3,$acc14
452 lwzx $acc15,$Tbl3,$acc15
453 xor $t2,$t2,$acc06
454 xor $t3,$t3,$acc07
455 xor $t0,$t0,$acc08
456 xor $t1,$t1,$acc09
457 xor $t2,$t2,$acc10
458 xor $t3,$t3,$acc11
459 xor $s0,$t0,$acc12
460 xor $s1,$t1,$acc13
461 xor $s2,$t2,$acc14
462 xor $s3,$t3,$acc15
463 addi $key,$key,16
464 bdnz- Lenc_loop
465
466 addi $Tbl2,$Tbl0,2048
467 nop
468 lwz $t0,0($key)
469 lwz $t1,4($key)
470 rlwinm $acc00,$s0,`32-24`,24,31
471 rlwinm $acc01,$s1,`32-24`,24,31
472 lwz $t2,8($key)
473 lwz $t3,12($key)
474 rlwinm $acc02,$s2,`32-24`,24,31
475 rlwinm $acc03,$s3,`32-24`,24,31
476 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
477 lwz $acc09,`2048+32`($Tbl0)
478 rlwinm $acc04,$s1,`32-16`,24,31
479 rlwinm $acc05,$s2,`32-16`,24,31
480 lwz $acc10,`2048+64`($Tbl0)
481 lwz $acc11,`2048+96`($Tbl0)
482 rlwinm $acc06,$s3,`32-16`,24,31
483 rlwinm $acc07,$s0,`32-16`,24,31
484 lwz $acc12,`2048+128`($Tbl0)
485 lwz $acc13,`2048+160`($Tbl0)
486 rlwinm $acc08,$s2,`32-8`,24,31
487 rlwinm $acc09,$s3,`32-8`,24,31
488 lwz $acc14,`2048+192`($Tbl0)
489 lwz $acc15,`2048+224`($Tbl0)
490 rlwinm $acc10,$s0,`32-8`,24,31
491 rlwinm $acc11,$s1,`32-8`,24,31
492 lbzx $acc00,$Tbl2,$acc00
493 lbzx $acc01,$Tbl2,$acc01
494 rlwinm $acc12,$s3,`0`,24,31
495 rlwinm $acc13,$s0,`0`,24,31
496 lbzx $acc02,$Tbl2,$acc02
497 lbzx $acc03,$Tbl2,$acc03
498 rlwinm $acc14,$s1,`0`,24,31
499 rlwinm $acc15,$s2,`0`,24,31
500 lbzx $acc04,$Tbl2,$acc04
501 lbzx $acc05,$Tbl2,$acc05
502 rlwinm $s0,$acc00,24,0,7
503 rlwinm $s1,$acc01,24,0,7
504 lbzx $acc06,$Tbl2,$acc06
505 lbzx $acc07,$Tbl2,$acc07
506 rlwinm $s2,$acc02,24,0,7
507 rlwinm $s3,$acc03,24,0,7
508 lbzx $acc08,$Tbl2,$acc08
509 lbzx $acc09,$Tbl2,$acc09
510 rlwimi $s0,$acc04,16,8,15
511 rlwimi $s1,$acc05,16,8,15
512 lbzx $acc10,$Tbl2,$acc10
513 lbzx $acc11,$Tbl2,$acc11
514 rlwimi $s2,$acc06,16,8,15
515 rlwimi $s3,$acc07,16,8,15
516 lbzx $acc12,$Tbl2,$acc12
517 lbzx $acc13,$Tbl2,$acc13
518 rlwimi $s0,$acc08,8,16,23
519 rlwimi $s1,$acc09,8,16,23
520 lbzx $acc14,$Tbl2,$acc14
521 lbzx $acc15,$Tbl2,$acc15
522 rlwimi $s2,$acc10,8,16,23
523 rlwimi $s3,$acc11,8,16,23
524 or $s0,$s0,$acc12
525 or $s1,$s1,$acc13
526 or $s2,$s2,$acc14
527 or $s3,$s3,$acc15
528 xor $s0,$s0,$t0
529 xor $s1,$s1,$t1
530 xor $s2,$s2,$t2
531 xor $s3,$s3,$t3
532 blr
533
534 .align 4
535 Lppc_AES_encrypt_compact:
536 lwz $acc00,240($key)
537 lwz $t0,0($key)
538 lwz $t1,4($key)
539 lwz $t2,8($key)
540 lwz $t3,12($key)
541 addi $Tbl1,$Tbl0,2048
542 lis $mask80,0x8080
543 lis $mask1b,0x1b1b
544 addi $key,$key,16
545 ori $mask80,$mask80,0x8080
546 ori $mask1b,$mask1b,0x1b1b
547 mtctr $acc00
548 .align 4
549 Lenc_compact_loop:
550 xor $s0,$s0,$t0
551 xor $s1,$s1,$t1
552 xor $s2,$s2,$t2
553 xor $s3,$s3,$t3
554 rlwinm $acc00,$s0,`32-24`,24,31
555 rlwinm $acc01,$s1,`32-24`,24,31
556 rlwinm $acc02,$s2,`32-24`,24,31
557 rlwinm $acc03,$s3,`32-24`,24,31
558 rlwinm $acc04,$s1,`32-16`,24,31
559 rlwinm $acc05,$s2,`32-16`,24,31
560 rlwinm $acc06,$s3,`32-16`,24,31
561 rlwinm $acc07,$s0,`32-16`,24,31
562 lbzx $acc00,$Tbl1,$acc00
563 lbzx $acc01,$Tbl1,$acc01
564 rlwinm $acc08,$s2,`32-8`,24,31
565 rlwinm $acc09,$s3,`32-8`,24,31
566 lbzx $acc02,$Tbl1,$acc02
567 lbzx $acc03,$Tbl1,$acc03
568 rlwinm $acc10,$s0,`32-8`,24,31
569 rlwinm $acc11,$s1,`32-8`,24,31
570 lbzx $acc04,$Tbl1,$acc04
571 lbzx $acc05,$Tbl1,$acc05
572 rlwinm $acc12,$s3,`0`,24,31
573 rlwinm $acc13,$s0,`0`,24,31
574 lbzx $acc06,$Tbl1,$acc06
575 lbzx $acc07,$Tbl1,$acc07
576 rlwinm $acc14,$s1,`0`,24,31
577 rlwinm $acc15,$s2,`0`,24,31
578 lbzx $acc08,$Tbl1,$acc08
579 lbzx $acc09,$Tbl1,$acc09
580 rlwinm $s0,$acc00,24,0,7
581 rlwinm $s1,$acc01,24,0,7
582 lbzx $acc10,$Tbl1,$acc10
583 lbzx $acc11,$Tbl1,$acc11
584 rlwinm $s2,$acc02,24,0,7
585 rlwinm $s3,$acc03,24,0,7
586 lbzx $acc12,$Tbl1,$acc12
587 lbzx $acc13,$Tbl1,$acc13
588 rlwimi $s0,$acc04,16,8,15
589 rlwimi $s1,$acc05,16,8,15
590 lbzx $acc14,$Tbl1,$acc14
591 lbzx $acc15,$Tbl1,$acc15
592 rlwimi $s2,$acc06,16,8,15
593 rlwimi $s3,$acc07,16,8,15
594 rlwimi $s0,$acc08,8,16,23
595 rlwimi $s1,$acc09,8,16,23
596 rlwimi $s2,$acc10,8,16,23
597 rlwimi $s3,$acc11,8,16,23
598 lwz $t0,0($key)
599 lwz $t1,4($key)
600 or $s0,$s0,$acc12
601 or $s1,$s1,$acc13
602 lwz $t2,8($key)
603 lwz $t3,12($key)
604 or $s2,$s2,$acc14
605 or $s3,$s3,$acc15
606
607 addi $key,$key,16
608 bdz Lenc_compact_done
609
610 and $acc00,$s0,$mask80 # r1=r0&0x80808080
611 and $acc01,$s1,$mask80
612 and $acc02,$s2,$mask80
613 and $acc03,$s3,$mask80
614 srwi $acc04,$acc00,7 # r1>>7
615 srwi $acc05,$acc01,7
616 srwi $acc06,$acc02,7
617 srwi $acc07,$acc03,7
618 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
619 andc $acc09,$s1,$mask80
620 andc $acc10,$s2,$mask80
621 andc $acc11,$s3,$mask80
622 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
623 sub $acc01,$acc01,$acc05
624 sub $acc02,$acc02,$acc06
625 sub $acc03,$acc03,$acc07
626 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
627 add $acc09,$acc09,$acc09
628 add $acc10,$acc10,$acc10
629 add $acc11,$acc11,$acc11
630 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
631 and $acc01,$acc01,$mask1b
632 and $acc02,$acc02,$mask1b
633 and $acc03,$acc03,$mask1b
634 xor $acc00,$acc00,$acc08 # r2
635 xor $acc01,$acc01,$acc09
636 xor $acc02,$acc02,$acc10
637 xor $acc03,$acc03,$acc11
638
639 rotlwi $acc12,$s0,16 # ROTATE(r0,16)
640 rotlwi $acc13,$s1,16
641 rotlwi $acc14,$s2,16
642 rotlwi $acc15,$s3,16
643 xor $s0,$s0,$acc00 # r0^r2
644 xor $s1,$s1,$acc01
645 xor $s2,$s2,$acc02
646 xor $s3,$s3,$acc03
647 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24)
648 rotrwi $s1,$s1,24
649 rotrwi $s2,$s2,24
650 rotrwi $s3,$s3,24
651 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2
652 xor $s1,$s1,$acc01
653 xor $s2,$s2,$acc02
654 xor $s3,$s3,$acc03
655 rotlwi $acc08,$acc12,8 # ROTATE(r0,24)
656 rotlwi $acc09,$acc13,8
657 rotlwi $acc10,$acc14,8
658 rotlwi $acc11,$acc15,8
659 xor $s0,$s0,$acc12 #
660 xor $s1,$s1,$acc13
661 xor $s2,$s2,$acc14
662 xor $s3,$s3,$acc15
663 xor $s0,$s0,$acc08 #
664 xor $s1,$s1,$acc09
665 xor $s2,$s2,$acc10
666 xor $s3,$s3,$acc11
667
668 b Lenc_compact_loop
669 .align 4
670 Lenc_compact_done:
671 xor $s0,$s0,$t0
672 xor $s1,$s1,$t1
673 xor $s2,$s2,$t2
674 xor $s3,$s3,$t3
675 blr
676
677 .globl .AES_decrypt
678 .align 7
679 .AES_decrypt:
680 mflr r0
681 $STU $sp,-$FRAME($sp)
682
683 $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
684 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
685 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
686 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
687 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
688 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
689 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
690 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
691 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
692 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
693 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
694 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
695 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
696 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
697 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
698 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
699 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
700 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
701 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
702 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
703 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
704
705 lwz $s0,0($inp)
706 lwz $s1,4($inp)
707 lwz $s2,8($inp)
708 lwz $s3,12($inp)
709 bl LAES_Td
710 bl Lppc_AES_decrypt_compact
711 stw $s0,0($out)
712 stw $s1,4($out)
713 stw $s2,8($out)
714 stw $s3,12($out)
715
716 $POP r0,`$FRAME-$SIZE_T*21`($sp)
717 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
718 $POP r13,`$FRAME-$SIZE_T*19`($sp)
719 $POP r14,`$FRAME-$SIZE_T*18`($sp)
720 $POP r15,`$FRAME-$SIZE_T*17`($sp)
721 $POP r16,`$FRAME-$SIZE_T*16`($sp)
722 $POP r17,`$FRAME-$SIZE_T*15`($sp)
723 $POP r18,`$FRAME-$SIZE_T*14`($sp)
724 $POP r19,`$FRAME-$SIZE_T*13`($sp)
725 $POP r20,`$FRAME-$SIZE_T*12`($sp)
726 $POP r21,`$FRAME-$SIZE_T*11`($sp)
727 $POP r22,`$FRAME-$SIZE_T*10`($sp)
728 $POP r23,`$FRAME-$SIZE_T*9`($sp)
729 $POP r24,`$FRAME-$SIZE_T*8`($sp)
730 $POP r25,`$FRAME-$SIZE_T*7`($sp)
731 $POP r26,`$FRAME-$SIZE_T*6`($sp)
732 $POP r27,`$FRAME-$SIZE_T*5`($sp)
733 $POP r28,`$FRAME-$SIZE_T*4`($sp)
734 $POP r29,`$FRAME-$SIZE_T*3`($sp)
735 $POP r30,`$FRAME-$SIZE_T*2`($sp)
736 $POP r31,`$FRAME-$SIZE_T*1`($sp)
737 mtlr r0
738 addi $sp,$sp,$FRAME
739 blr
740
741 .align 5
742 Lppc_AES_decrypt:
743 lwz $acc00,240($key)
744 lwz $t0,0($key)
745 lwz $t1,4($key)
746 lwz $t2,8($key)
747 lwz $t3,12($key)
748 addi $Tbl1,$Tbl0,3
749 addi $Tbl2,$Tbl0,2
750 addi $Tbl3,$Tbl0,1
751 addi $acc00,$acc00,-1
752 addi $key,$key,16
753 xor $s0,$s0,$t0
754 xor $s1,$s1,$t1
755 xor $s2,$s2,$t2
756 xor $s3,$s3,$t3
757 mtctr $acc00
758 .align 4
759 Ldec_loop:
760 rlwinm $acc00,$s0,`32-24+3`,21,28
761 rlwinm $acc01,$s1,`32-24+3`,21,28
762 rlwinm $acc02,$s2,`32-24+3`,21,28
763 rlwinm $acc03,$s3,`32-24+3`,21,28
764 lwz $t0,0($key)
765 lwz $t1,4($key)
766 rlwinm $acc04,$s3,`32-16+3`,21,28
767 rlwinm $acc05,$s0,`32-16+3`,21,28
768 lwz $t2,8($key)
769 lwz $t3,12($key)
770 rlwinm $acc06,$s1,`32-16+3`,21,28
771 rlwinm $acc07,$s2,`32-16+3`,21,28
772 lwzx $acc00,$Tbl0,$acc00
773 lwzx $acc01,$Tbl0,$acc01
774 rlwinm $acc08,$s2,`32-8+3`,21,28
775 rlwinm $acc09,$s3,`32-8+3`,21,28
776 lwzx $acc02,$Tbl0,$acc02
777 lwzx $acc03,$Tbl0,$acc03
778 rlwinm $acc10,$s0,`32-8+3`,21,28
779 rlwinm $acc11,$s1,`32-8+3`,21,28
780 lwzx $acc04,$Tbl1,$acc04
781 lwzx $acc05,$Tbl1,$acc05
782 rlwinm $acc12,$s1,`0+3`,21,28
783 rlwinm $acc13,$s2,`0+3`,21,28
784 lwzx $acc06,$Tbl1,$acc06
785 lwzx $acc07,$Tbl1,$acc07
786 rlwinm $acc14,$s3,`0+3`,21,28
787 rlwinm $acc15,$s0,`0+3`,21,28
788 lwzx $acc08,$Tbl2,$acc08
789 lwzx $acc09,$Tbl2,$acc09
790 xor $t0,$t0,$acc00
791 xor $t1,$t1,$acc01
792 lwzx $acc10,$Tbl2,$acc10
793 lwzx $acc11,$Tbl2,$acc11
794 xor $t2,$t2,$acc02
795 xor $t3,$t3,$acc03
796 lwzx $acc12,$Tbl3,$acc12
797 lwzx $acc13,$Tbl3,$acc13
798 xor $t0,$t0,$acc04
799 xor $t1,$t1,$acc05
800 lwzx $acc14,$Tbl3,$acc14
801 lwzx $acc15,$Tbl3,$acc15
802 xor $t2,$t2,$acc06
803 xor $t3,$t3,$acc07
804 xor $t0,$t0,$acc08
805 xor $t1,$t1,$acc09
806 xor $t2,$t2,$acc10
807 xor $t3,$t3,$acc11
808 xor $s0,$t0,$acc12
809 xor $s1,$t1,$acc13
810 xor $s2,$t2,$acc14
811 xor $s3,$t3,$acc15
812 addi $key,$key,16
813 bdnz- Ldec_loop
814
815 addi $Tbl2,$Tbl0,2048
816 nop
817 lwz $t0,0($key)
818 lwz $t1,4($key)
819 rlwinm $acc00,$s0,`32-24`,24,31
820 rlwinm $acc01,$s1,`32-24`,24,31
821 lwz $t2,8($key)
822 lwz $t3,12($key)
823 rlwinm $acc02,$s2,`32-24`,24,31
824 rlwinm $acc03,$s3,`32-24`,24,31
825 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
826 lwz $acc09,`2048+32`($Tbl0)
827 rlwinm $acc04,$s3,`32-16`,24,31
828 rlwinm $acc05,$s0,`32-16`,24,31
829 lwz $acc10,`2048+64`($Tbl0)
830 lwz $acc11,`2048+96`($Tbl0)
831 lbzx $acc00,$Tbl2,$acc00
832 lbzx $acc01,$Tbl2,$acc01
833 lwz $acc12,`2048+128`($Tbl0)
834 lwz $acc13,`2048+160`($Tbl0)
835 rlwinm $acc06,$s1,`32-16`,24,31
836 rlwinm $acc07,$s2,`32-16`,24,31
837 lwz $acc14,`2048+192`($Tbl0)
838 lwz $acc15,`2048+224`($Tbl0)
839 rlwinm $acc08,$s2,`32-8`,24,31
840 rlwinm $acc09,$s3,`32-8`,24,31
841 lbzx $acc02,$Tbl2,$acc02
842 lbzx $acc03,$Tbl2,$acc03
843 rlwinm $acc10,$s0,`32-8`,24,31
844 rlwinm $acc11,$s1,`32-8`,24,31
845 lbzx $acc04,$Tbl2,$acc04
846 lbzx $acc05,$Tbl2,$acc05
847 rlwinm $acc12,$s1,`0`,24,31
848 rlwinm $acc13,$s2,`0`,24,31
849 lbzx $acc06,$Tbl2,$acc06
850 lbzx $acc07,$Tbl2,$acc07
851 rlwinm $acc14,$s3,`0`,24,31
852 rlwinm $acc15,$s0,`0`,24,31
853 lbzx $acc08,$Tbl2,$acc08
854 lbzx $acc09,$Tbl2,$acc09
855 rlwinm $s0,$acc00,24,0,7
856 rlwinm $s1,$acc01,24,0,7
857 lbzx $acc10,$Tbl2,$acc10
858 lbzx $acc11,$Tbl2,$acc11
859 rlwinm $s2,$acc02,24,0,7
860 rlwinm $s3,$acc03,24,0,7
861 lbzx $acc12,$Tbl2,$acc12
862 lbzx $acc13,$Tbl2,$acc13
863 rlwimi $s0,$acc04,16,8,15
864 rlwimi $s1,$acc05,16,8,15
865 lbzx $acc14,$Tbl2,$acc14
866 lbzx $acc15,$Tbl2,$acc15
867 rlwimi $s2,$acc06,16,8,15
868 rlwimi $s3,$acc07,16,8,15
869 rlwimi $s0,$acc08,8,16,23
870 rlwimi $s1,$acc09,8,16,23
871 rlwimi $s2,$acc10,8,16,23
872 rlwimi $s3,$acc11,8,16,23
873 or $s0,$s0,$acc12
874 or $s1,$s1,$acc13
875 or $s2,$s2,$acc14
876 or $s3,$s3,$acc15
877 xor $s0,$s0,$t0
878 xor $s1,$s1,$t1
879 xor $s2,$s2,$t2
880 xor $s3,$s3,$t3
881 blr
882
883 .align 4
884 Lppc_AES_decrypt_compact:
885 lwz $acc00,240($key)
886 lwz $t0,0($key)
887 lwz $t1,4($key)
888 lwz $t2,8($key)
889 lwz $t3,12($key)
890 addi $Tbl1,$Tbl0,2048
891 lis $mask80,0x8080
892 lis $mask1b,0x1b1b
893 addi $key,$key,16
894 ori $mask80,$mask80,0x8080
895 ori $mask1b,$mask1b,0x1b1b
896 ___
897 $code.=<<___ if ($SIZE_T==8);
898 insrdi $mask80,$mask80,32,0
899 insrdi $mask1b,$mask1b,32,0
900 ___
901 $code.=<<___;
902 mtctr $acc00
903 .align 4
904 Ldec_compact_loop:
905 xor $s0,$s0,$t0
906 xor $s1,$s1,$t1
907 xor $s2,$s2,$t2
908 xor $s3,$s3,$t3
909 rlwinm $acc00,$s0,`32-24`,24,31
910 rlwinm $acc01,$s1,`32-24`,24,31
911 rlwinm $acc02,$s2,`32-24`,24,31
912 rlwinm $acc03,$s3,`32-24`,24,31
913 rlwinm $acc04,$s3,`32-16`,24,31
914 rlwinm $acc05,$s0,`32-16`,24,31
915 rlwinm $acc06,$s1,`32-16`,24,31
916 rlwinm $acc07,$s2,`32-16`,24,31
917 lbzx $acc00,$Tbl1,$acc00
918 lbzx $acc01,$Tbl1,$acc01
919 rlwinm $acc08,$s2,`32-8`,24,31
920 rlwinm $acc09,$s3,`32-8`,24,31
921 lbzx $acc02,$Tbl1,$acc02
922 lbzx $acc03,$Tbl1,$acc03
923 rlwinm $acc10,$s0,`32-8`,24,31
924 rlwinm $acc11,$s1,`32-8`,24,31
925 lbzx $acc04,$Tbl1,$acc04
926 lbzx $acc05,$Tbl1,$acc05
927 rlwinm $acc12,$s1,`0`,24,31
928 rlwinm $acc13,$s2,`0`,24,31
929 lbzx $acc06,$Tbl1,$acc06
930 lbzx $acc07,$Tbl1,$acc07
931 rlwinm $acc14,$s3,`0`,24,31
932 rlwinm $acc15,$s0,`0`,24,31
933 lbzx $acc08,$Tbl1,$acc08
934 lbzx $acc09,$Tbl1,$acc09
935 rlwinm $s0,$acc00,24,0,7
936 rlwinm $s1,$acc01,24,0,7
937 lbzx $acc10,$Tbl1,$acc10
938 lbzx $acc11,$Tbl1,$acc11
939 rlwinm $s2,$acc02,24,0,7
940 rlwinm $s3,$acc03,24,0,7
941 lbzx $acc12,$Tbl1,$acc12
942 lbzx $acc13,$Tbl1,$acc13
943 rlwimi $s0,$acc04,16,8,15
944 rlwimi $s1,$acc05,16,8,15
945 lbzx $acc14,$Tbl1,$acc14
946 lbzx $acc15,$Tbl1,$acc15
947 rlwimi $s2,$acc06,16,8,15
948 rlwimi $s3,$acc07,16,8,15
949 rlwimi $s0,$acc08,8,16,23
950 rlwimi $s1,$acc09,8,16,23
951 rlwimi $s2,$acc10,8,16,23
952 rlwimi $s3,$acc11,8,16,23
953 lwz $t0,0($key)
954 lwz $t1,4($key)
955 or $s0,$s0,$acc12
956 or $s1,$s1,$acc13
957 lwz $t2,8($key)
958 lwz $t3,12($key)
959 or $s2,$s2,$acc14
960 or $s3,$s3,$acc15
961
962 addi $key,$key,16
963 bdz Ldec_compact_done
964 ___
965 $code.=<<___ if ($SIZE_T==8);
966 # vectorized permutation improves decrypt performance by 10%
967 insrdi $s0,$s1,32,0
968 insrdi $s2,$s3,32,0
969
970 and $acc00,$s0,$mask80 # r1=r0&0x80808080
971 and $acc02,$s2,$mask80
972 srdi $acc04,$acc00,7 # r1>>7
973 srdi $acc06,$acc02,7
974 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
975 andc $acc10,$s2,$mask80
976 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
977 sub $acc02,$acc02,$acc06
978 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
979 add $acc10,$acc10,$acc10
980 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
981 and $acc02,$acc02,$mask1b
982 xor $acc00,$acc00,$acc08 # r2
983 xor $acc02,$acc02,$acc10
984
985 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
986 and $acc06,$acc02,$mask80
987 srdi $acc08,$acc04,7 # r1>>7
988 srdi $acc10,$acc06,7
989 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
990 andc $acc14,$acc02,$mask80
991 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
992 sub $acc06,$acc06,$acc10
993 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
994 add $acc14,$acc14,$acc14
995 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
996 and $acc06,$acc06,$mask1b
997 xor $acc04,$acc04,$acc12 # r4
998 xor $acc06,$acc06,$acc14
999
1000 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1001 and $acc10,$acc06,$mask80
1002 srdi $acc12,$acc08,7 # r1>>7
1003 srdi $acc14,$acc10,7
1004 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1005 sub $acc10,$acc10,$acc14
1006 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1007 andc $acc14,$acc06,$mask80
1008 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1009 add $acc14,$acc14,$acc14
1010 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1011 and $acc10,$acc10,$mask1b
1012 xor $acc08,$acc08,$acc12 # r8
1013 xor $acc10,$acc10,$acc14
1014
1015 xor $acc00,$acc00,$s0 # r2^r0
1016 xor $acc02,$acc02,$s2
1017 xor $acc04,$acc04,$s0 # r4^r0
1018 xor $acc06,$acc06,$s2
1019
1020 extrdi $acc01,$acc00,32,0
1021 extrdi $acc03,$acc02,32,0
1022 extrdi $acc05,$acc04,32,0
1023 extrdi $acc07,$acc06,32,0
1024 extrdi $acc09,$acc08,32,0
1025 extrdi $acc11,$acc10,32,0
1026 ___
1027 $code.=<<___ if ($SIZE_T==4);
1028 and $acc00,$s0,$mask80 # r1=r0&0x80808080
1029 and $acc01,$s1,$mask80
1030 and $acc02,$s2,$mask80
1031 and $acc03,$s3,$mask80
1032 srwi $acc04,$acc00,7 # r1>>7
1033 srwi $acc05,$acc01,7
1034 srwi $acc06,$acc02,7
1035 srwi $acc07,$acc03,7
1036 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
1037 andc $acc09,$s1,$mask80
1038 andc $acc10,$s2,$mask80
1039 andc $acc11,$s3,$mask80
1040 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
1041 sub $acc01,$acc01,$acc05
1042 sub $acc02,$acc02,$acc06
1043 sub $acc03,$acc03,$acc07
1044 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
1045 add $acc09,$acc09,$acc09
1046 add $acc10,$acc10,$acc10
1047 add $acc11,$acc11,$acc11
1048 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1049 and $acc01,$acc01,$mask1b
1050 and $acc02,$acc02,$mask1b
1051 and $acc03,$acc03,$mask1b
1052 xor $acc00,$acc00,$acc08 # r2
1053 xor $acc01,$acc01,$acc09
1054 xor $acc02,$acc02,$acc10
1055 xor $acc03,$acc03,$acc11
1056
1057 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
1058 and $acc05,$acc01,$mask80
1059 and $acc06,$acc02,$mask80
1060 and $acc07,$acc03,$mask80
1061 srwi $acc08,$acc04,7 # r1>>7
1062 srwi $acc09,$acc05,7
1063 srwi $acc10,$acc06,7
1064 srwi $acc11,$acc07,7
1065 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
1066 andc $acc13,$acc01,$mask80
1067 andc $acc14,$acc02,$mask80
1068 andc $acc15,$acc03,$mask80
1069 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
1070 sub $acc05,$acc05,$acc09
1071 sub $acc06,$acc06,$acc10
1072 sub $acc07,$acc07,$acc11
1073 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
1074 add $acc13,$acc13,$acc13
1075 add $acc14,$acc14,$acc14
1076 add $acc15,$acc15,$acc15
1077 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1078 and $acc05,$acc05,$mask1b
1079 and $acc06,$acc06,$mask1b
1080 and $acc07,$acc07,$mask1b
1081 xor $acc04,$acc04,$acc12 # r4
1082 xor $acc05,$acc05,$acc13
1083 xor $acc06,$acc06,$acc14
1084 xor $acc07,$acc07,$acc15
1085
1086 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1087 and $acc09,$acc05,$mask80
1088 and $acc10,$acc06,$mask80
1089 and $acc11,$acc07,$mask80
1090 srwi $acc12,$acc08,7 # r1>>7
1091 srwi $acc13,$acc09,7
1092 srwi $acc14,$acc10,7
1093 srwi $acc15,$acc11,7
1094 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1095 sub $acc09,$acc09,$acc13
1096 sub $acc10,$acc10,$acc14
1097 sub $acc11,$acc11,$acc15
1098 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1099 andc $acc13,$acc05,$mask80
1100 andc $acc14,$acc06,$mask80
1101 andc $acc15,$acc07,$mask80
1102 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1103 add $acc13,$acc13,$acc13
1104 add $acc14,$acc14,$acc14
1105 add $acc15,$acc15,$acc15
1106 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1107 and $acc09,$acc09,$mask1b
1108 and $acc10,$acc10,$mask1b
1109 and $acc11,$acc11,$mask1b
1110 xor $acc08,$acc08,$acc12 # r8
1111 xor $acc09,$acc09,$acc13
1112 xor $acc10,$acc10,$acc14
1113 xor $acc11,$acc11,$acc15
1114
1115 xor $acc00,$acc00,$s0 # r2^r0
1116 xor $acc01,$acc01,$s1
1117 xor $acc02,$acc02,$s2
1118 xor $acc03,$acc03,$s3
1119 xor $acc04,$acc04,$s0 # r4^r0
1120 xor $acc05,$acc05,$s1
1121 xor $acc06,$acc06,$s2
1122 xor $acc07,$acc07,$s3
1123 ___
1124 $code.=<<___;
1125 rotrwi $s0,$s0,8 # = ROTATE(r0,8)
1126 rotrwi $s1,$s1,8
1127 rotrwi $s2,$s2,8
1128 rotrwi $s3,$s3,8
1129 xor $s0,$s0,$acc00 # ^= r2^r0
1130 xor $s1,$s1,$acc01
1131 xor $s2,$s2,$acc02
1132 xor $s3,$s3,$acc03
1133 xor $acc00,$acc00,$acc08
1134 xor $acc01,$acc01,$acc09
1135 xor $acc02,$acc02,$acc10
1136 xor $acc03,$acc03,$acc11
1137 xor $s0,$s0,$acc04 # ^= r4^r0
1138 xor $s1,$s1,$acc05
1139 xor $s2,$s2,$acc06
1140 xor $s3,$s3,$acc07
1141 rotrwi $acc00,$acc00,24
1142 rotrwi $acc01,$acc01,24
1143 rotrwi $acc02,$acc02,24
1144 rotrwi $acc03,$acc03,24
1145 xor $acc04,$acc04,$acc08
1146 xor $acc05,$acc05,$acc09
1147 xor $acc06,$acc06,$acc10
1148 xor $acc07,$acc07,$acc11
1149 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1150 xor $s1,$s1,$acc09
1151 xor $s2,$s2,$acc10
1152 xor $s3,$s3,$acc11
1153 rotrwi $acc04,$acc04,16
1154 rotrwi $acc05,$acc05,16
1155 rotrwi $acc06,$acc06,16
1156 rotrwi $acc07,$acc07,16
1157 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24)
1158 xor $s1,$s1,$acc01
1159 xor $s2,$s2,$acc02
1160 xor $s3,$s3,$acc03
1161 rotrwi $acc08,$acc08,8
1162 rotrwi $acc09,$acc09,8
1163 rotrwi $acc10,$acc10,8
1164 rotrwi $acc11,$acc11,8
1165 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16)
1166 xor $s1,$s1,$acc05
1167 xor $s2,$s2,$acc06
1168 xor $s3,$s3,$acc07
1169 xor $s0,$s0,$acc08 # ^= ROTATE(r8,8)
1170 xor $s1,$s1,$acc09
1171 xor $s2,$s2,$acc10
1172 xor $s3,$s3,$acc11
1173
1174 b Ldec_compact_loop
1175 .align 4
1176 Ldec_compact_done:
1177 xor $s0,$s0,$t0
1178 xor $s1,$s1,$t1
1179 xor $s2,$s2,$t2
1180 xor $s3,$s3,$t3
1181 blr
1182 .long 0
1183 .asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1184 .align 7
1185 ___
1186
1187 $code =~ s/\`([^\`]*)\`/eval $1/gem;
1188 print $code;
1189 close STDOUT;
OLDNEW
« no previous file with comments | « openssl/crypto/aes/asm/aes-armv4.pl ('k') | openssl/crypto/aes/asm/aes-s390x.pl » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698