Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(927)

Side by Side Diff: openssl/crypto/rc4/asm/rc4-x86_64.S

Issue 2072073002: Delete bundled copy of OpenSSL and replace with README. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/openssl@master
Patch Set: Delete bundled copy of OpenSSL and replace with README. Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « openssl/crypto/rc4/asm/rc4-s390x.pl ('k') | openssl/crypto/rc4/asm/rc4-x86_64.pl » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 .text
2
3
4 .globl RC4
5 .type RC4,@function
6 .align 16
7 RC4: orq %rsi,%rsi
8 jne .Lentry
9 .byte 0xf3,0xc3
10 .Lentry:
11 pushq %rbx
12 pushq %r12
13 pushq %r13
14 .Lprologue:
15 movq %rsi,%r11
16 movq %rdx,%r12
17 movq %rcx,%r13
18 xorq %r10,%r10
19 xorq %rcx,%rcx
20
21 leaq 8(%rdi),%rdi
22 movb -8(%rdi),%r10b
23 movb -4(%rdi),%cl
24 cmpl $-1,256(%rdi)
25 je .LRC4_CHAR
26 movl OPENSSL_ia32cap_P(%rip),%r8d
27 xorq %rbx,%rbx
28 incb %r10b
29 subq %r10,%rbx
30 subq %r12,%r13
31 movl (%rdi,%r10,4),%eax
32 testq $-16,%r11
33 jz .Lloop1
34 btl $30,%r8d
35 jc .Lintel
36 andq $7,%rbx
37 leaq 1(%r10),%rsi
38 jz .Loop8
39 subq %rbx,%r11
40 .Loop8_warmup:
41 addb %al,%cl
42 movl (%rdi,%rcx,4),%edx
43 movl %eax,(%rdi,%rcx,4)
44 movl %edx,(%rdi,%r10,4)
45 addb %dl,%al
46 incb %r10b
47 movl (%rdi,%rax,4),%edx
48 movl (%rdi,%r10,4),%eax
49 xorb (%r12),%dl
50 movb %dl,(%r13,%r12,1)
51 leaq 1(%r12),%r12
52 decq %rbx
53 jnz .Loop8_warmup
54
55 leaq 1(%r10),%rsi
56 jmp .Loop8
57 .align 16
58 .Loop8:
59 addb %al,%cl
60 movl (%rdi,%rcx,4),%edx
61 movl %eax,(%rdi,%rcx,4)
62 movl 0(%rdi,%rsi,4),%ebx
63 rorq $8,%r8
64 movl %edx,0(%rdi,%r10,4)
65 addb %al,%dl
66 movb (%rdi,%rdx,4),%r8b
67 addb %bl,%cl
68 movl (%rdi,%rcx,4),%edx
69 movl %ebx,(%rdi,%rcx,4)
70 movl 4(%rdi,%rsi,4),%eax
71 rorq $8,%r8
72 movl %edx,4(%rdi,%r10,4)
73 addb %bl,%dl
74 movb (%rdi,%rdx,4),%r8b
75 addb %al,%cl
76 movl (%rdi,%rcx,4),%edx
77 movl %eax,(%rdi,%rcx,4)
78 movl 8(%rdi,%rsi,4),%ebx
79 rorq $8,%r8
80 movl %edx,8(%rdi,%r10,4)
81 addb %al,%dl
82 movb (%rdi,%rdx,4),%r8b
83 addb %bl,%cl
84 movl (%rdi,%rcx,4),%edx
85 movl %ebx,(%rdi,%rcx,4)
86 movl 12(%rdi,%rsi,4),%eax
87 rorq $8,%r8
88 movl %edx,12(%rdi,%r10,4)
89 addb %bl,%dl
90 movb (%rdi,%rdx,4),%r8b
91 addb %al,%cl
92 movl (%rdi,%rcx,4),%edx
93 movl %eax,(%rdi,%rcx,4)
94 movl 16(%rdi,%rsi,4),%ebx
95 rorq $8,%r8
96 movl %edx,16(%rdi,%r10,4)
97 addb %al,%dl
98 movb (%rdi,%rdx,4),%r8b
99 addb %bl,%cl
100 movl (%rdi,%rcx,4),%edx
101 movl %ebx,(%rdi,%rcx,4)
102 movl 20(%rdi,%rsi,4),%eax
103 rorq $8,%r8
104 movl %edx,20(%rdi,%r10,4)
105 addb %bl,%dl
106 movb (%rdi,%rdx,4),%r8b
107 addb %al,%cl
108 movl (%rdi,%rcx,4),%edx
109 movl %eax,(%rdi,%rcx,4)
110 movl 24(%rdi,%rsi,4),%ebx
111 rorq $8,%r8
112 movl %edx,24(%rdi,%r10,4)
113 addb %al,%dl
114 movb (%rdi,%rdx,4),%r8b
115 addb $8,%sil
116 addb %bl,%cl
117 movl (%rdi,%rcx,4),%edx
118 movl %ebx,(%rdi,%rcx,4)
119 movl -4(%rdi,%rsi,4),%eax
120 rorq $8,%r8
121 movl %edx,28(%rdi,%r10,4)
122 addb %bl,%dl
123 movb (%rdi,%rdx,4),%r8b
124 addb $8,%r10b
125 rorq $8,%r8
126 subq $8,%r11
127
128 xorq (%r12),%r8
129 movq %r8,(%r13,%r12,1)
130 leaq 8(%r12),%r12
131
132 testq $-8,%r11
133 jnz .Loop8
134 cmpq $0,%r11
135 jne .Lloop1
136 jmp .Lexit
137
138 .align 16
139 .Lintel:
140 testq $-32,%r11
141 jz .Lloop1
142 andq $15,%rbx
143 jz .Loop16_is_hot
144 subq %rbx,%r11
145 .Loop16_warmup:
146 addb %al,%cl
147 movl (%rdi,%rcx,4),%edx
148 movl %eax,(%rdi,%rcx,4)
149 movl %edx,(%rdi,%r10,4)
150 addb %dl,%al
151 incb %r10b
152 movl (%rdi,%rax,4),%edx
153 movl (%rdi,%r10,4),%eax
154 xorb (%r12),%dl
155 movb %dl,(%r13,%r12,1)
156 leaq 1(%r12),%r12
157 decq %rbx
158 jnz .Loop16_warmup
159
160 movq %rcx,%rbx
161 xorq %rcx,%rcx
162 movb %bl,%cl
163
164 .Loop16_is_hot:
165 leaq (%rdi,%r10,4),%rsi
166 addb %al,%cl
167 movl (%rdi,%rcx,4),%edx
168 pxor %xmm0,%xmm0
169 movl %eax,(%rdi,%rcx,4)
170 addb %dl,%al
171 movl 4(%rsi),%ebx
172 movzbl %al,%eax
173 movl %edx,0(%rsi)
174 addb %bl,%cl
175 pinsrw $0,(%rdi,%rax,4),%xmm0
176 jmp .Loop16_enter
177 .align 16
178 .Loop16:
179 addb %al,%cl
180 movl (%rdi,%rcx,4),%edx
181 pxor %xmm0,%xmm2
182 psllq $8,%xmm1
183 pxor %xmm0,%xmm0
184 movl %eax,(%rdi,%rcx,4)
185 addb %dl,%al
186 movl 4(%rsi),%ebx
187 movzbl %al,%eax
188 movl %edx,0(%rsi)
189 pxor %xmm1,%xmm2
190 addb %bl,%cl
191 pinsrw $0,(%rdi,%rax,4),%xmm0
192 movdqu %xmm2,(%r13,%r12,1)
193 leaq 16(%r12),%r12
194 .Loop16_enter:
195 movl (%rdi,%rcx,4),%edx
196 pxor %xmm1,%xmm1
197 movl %ebx,(%rdi,%rcx,4)
198 addb %dl,%bl
199 movl 8(%rsi),%eax
200 movzbl %bl,%ebx
201 movl %edx,4(%rsi)
202 addb %al,%cl
203 pinsrw $0,(%rdi,%rbx,4),%xmm1
204 movl (%rdi,%rcx,4),%edx
205 movl %eax,(%rdi,%rcx,4)
206 addb %dl,%al
207 movl 12(%rsi),%ebx
208 movzbl %al,%eax
209 movl %edx,8(%rsi)
210 addb %bl,%cl
211 pinsrw $1,(%rdi,%rax,4),%xmm0
212 movl (%rdi,%rcx,4),%edx
213 movl %ebx,(%rdi,%rcx,4)
214 addb %dl,%bl
215 movl 16(%rsi),%eax
216 movzbl %bl,%ebx
217 movl %edx,12(%rsi)
218 addb %al,%cl
219 pinsrw $1,(%rdi,%rbx,4),%xmm1
220 movl (%rdi,%rcx,4),%edx
221 movl %eax,(%rdi,%rcx,4)
222 addb %dl,%al
223 movl 20(%rsi),%ebx
224 movzbl %al,%eax
225 movl %edx,16(%rsi)
226 addb %bl,%cl
227 pinsrw $2,(%rdi,%rax,4),%xmm0
228 movl (%rdi,%rcx,4),%edx
229 movl %ebx,(%rdi,%rcx,4)
230 addb %dl,%bl
231 movl 24(%rsi),%eax
232 movzbl %bl,%ebx
233 movl %edx,20(%rsi)
234 addb %al,%cl
235 pinsrw $2,(%rdi,%rbx,4),%xmm1
236 movl (%rdi,%rcx,4),%edx
237 movl %eax,(%rdi,%rcx,4)
238 addb %dl,%al
239 movl 28(%rsi),%ebx
240 movzbl %al,%eax
241 movl %edx,24(%rsi)
242 addb %bl,%cl
243 pinsrw $3,(%rdi,%rax,4),%xmm0
244 movl (%rdi,%rcx,4),%edx
245 movl %ebx,(%rdi,%rcx,4)
246 addb %dl,%bl
247 movl 32(%rsi),%eax
248 movzbl %bl,%ebx
249 movl %edx,28(%rsi)
250 addb %al,%cl
251 pinsrw $3,(%rdi,%rbx,4),%xmm1
252 movl (%rdi,%rcx,4),%edx
253 movl %eax,(%rdi,%rcx,4)
254 addb %dl,%al
255 movl 36(%rsi),%ebx
256 movzbl %al,%eax
257 movl %edx,32(%rsi)
258 addb %bl,%cl
259 pinsrw $4,(%rdi,%rax,4),%xmm0
260 movl (%rdi,%rcx,4),%edx
261 movl %ebx,(%rdi,%rcx,4)
262 addb %dl,%bl
263 movl 40(%rsi),%eax
264 movzbl %bl,%ebx
265 movl %edx,36(%rsi)
266 addb %al,%cl
267 pinsrw $4,(%rdi,%rbx,4),%xmm1
268 movl (%rdi,%rcx,4),%edx
269 movl %eax,(%rdi,%rcx,4)
270 addb %dl,%al
271 movl 44(%rsi),%ebx
272 movzbl %al,%eax
273 movl %edx,40(%rsi)
274 addb %bl,%cl
275 pinsrw $5,(%rdi,%rax,4),%xmm0
276 movl (%rdi,%rcx,4),%edx
277 movl %ebx,(%rdi,%rcx,4)
278 addb %dl,%bl
279 movl 48(%rsi),%eax
280 movzbl %bl,%ebx
281 movl %edx,44(%rsi)
282 addb %al,%cl
283 pinsrw $5,(%rdi,%rbx,4),%xmm1
284 movl (%rdi,%rcx,4),%edx
285 movl %eax,(%rdi,%rcx,4)
286 addb %dl,%al
287 movl 52(%rsi),%ebx
288 movzbl %al,%eax
289 movl %edx,48(%rsi)
290 addb %bl,%cl
291 pinsrw $6,(%rdi,%rax,4),%xmm0
292 movl (%rdi,%rcx,4),%edx
293 movl %ebx,(%rdi,%rcx,4)
294 addb %dl,%bl
295 movl 56(%rsi),%eax
296 movzbl %bl,%ebx
297 movl %edx,52(%rsi)
298 addb %al,%cl
299 pinsrw $6,(%rdi,%rbx,4),%xmm1
300 movl (%rdi,%rcx,4),%edx
301 movl %eax,(%rdi,%rcx,4)
302 addb %dl,%al
303 movl 60(%rsi),%ebx
304 movzbl %al,%eax
305 movl %edx,56(%rsi)
306 addb %bl,%cl
307 pinsrw $7,(%rdi,%rax,4),%xmm0
308 addb $16,%r10b
309 movdqu (%r12),%xmm2
310 movl (%rdi,%rcx,4),%edx
311 movl %ebx,(%rdi,%rcx,4)
312 addb %dl,%bl
313 movzbl %bl,%ebx
314 movl %edx,60(%rsi)
315 leaq (%rdi,%r10,4),%rsi
316 pinsrw $7,(%rdi,%rbx,4),%xmm1
317 movl (%rsi),%eax
318 movq %rcx,%rbx
319 xorq %rcx,%rcx
320 subq $16,%r11
321 movb %bl,%cl
322 testq $-16,%r11
323 jnz .Loop16
324
325 psllq $8,%xmm1
326 pxor %xmm0,%xmm2
327 pxor %xmm1,%xmm2
328 movdqu %xmm2,(%r13,%r12,1)
329 leaq 16(%r12),%r12
330
331 cmpq $0,%r11
332 jne .Lloop1
333 jmp .Lexit
334
335 .align 16
336 .Lloop1:
337 addb %al,%cl
338 movl (%rdi,%rcx,4),%edx
339 movl %eax,(%rdi,%rcx,4)
340 movl %edx,(%rdi,%r10,4)
341 addb %dl,%al
342 incb %r10b
343 movl (%rdi,%rax,4),%edx
344 movl (%rdi,%r10,4),%eax
345 xorb (%r12),%dl
346 movb %dl,(%r13,%r12,1)
347 leaq 1(%r12),%r12
348 decq %r11
349 jnz .Lloop1
350 jmp .Lexit
351
352 .align 16
353 .LRC4_CHAR:
354 addb $1,%r10b
355 movzbl (%rdi,%r10,1),%eax
356 testq $-8,%r11
357 jz .Lcloop1
358 jmp .Lcloop8
359 .align 16
360 .Lcloop8:
361 movl (%r12),%r8d
362 movl 4(%r12),%r9d
363 addb %al,%cl
364 leaq 1(%r10),%rsi
365 movzbl (%rdi,%rcx,1),%edx
366 movzbl %sil,%esi
367 movzbl (%rdi,%rsi,1),%ebx
368 movb %al,(%rdi,%rcx,1)
369 cmpq %rsi,%rcx
370 movb %dl,(%rdi,%r10,1)
371 jne .Lcmov0
372 movq %rax,%rbx
373 .Lcmov0:
374 addb %al,%dl
375 xorb (%rdi,%rdx,1),%r8b
376 rorl $8,%r8d
377 addb %bl,%cl
378 leaq 1(%rsi),%r10
379 movzbl (%rdi,%rcx,1),%edx
380 movzbl %r10b,%r10d
381 movzbl (%rdi,%r10,1),%eax
382 movb %bl,(%rdi,%rcx,1)
383 cmpq %r10,%rcx
384 movb %dl,(%rdi,%rsi,1)
385 jne .Lcmov1
386 movq %rbx,%rax
387 .Lcmov1:
388 addb %bl,%dl
389 xorb (%rdi,%rdx,1),%r8b
390 rorl $8,%r8d
391 addb %al,%cl
392 leaq 1(%r10),%rsi
393 movzbl (%rdi,%rcx,1),%edx
394 movzbl %sil,%esi
395 movzbl (%rdi,%rsi,1),%ebx
396 movb %al,(%rdi,%rcx,1)
397 cmpq %rsi,%rcx
398 movb %dl,(%rdi,%r10,1)
399 jne .Lcmov2
400 movq %rax,%rbx
401 .Lcmov2:
402 addb %al,%dl
403 xorb (%rdi,%rdx,1),%r8b
404 rorl $8,%r8d
405 addb %bl,%cl
406 leaq 1(%rsi),%r10
407 movzbl (%rdi,%rcx,1),%edx
408 movzbl %r10b,%r10d
409 movzbl (%rdi,%r10,1),%eax
410 movb %bl,(%rdi,%rcx,1)
411 cmpq %r10,%rcx
412 movb %dl,(%rdi,%rsi,1)
413 jne .Lcmov3
414 movq %rbx,%rax
415 .Lcmov3:
416 addb %bl,%dl
417 xorb (%rdi,%rdx,1),%r8b
418 rorl $8,%r8d
419 addb %al,%cl
420 leaq 1(%r10),%rsi
421 movzbl (%rdi,%rcx,1),%edx
422 movzbl %sil,%esi
423 movzbl (%rdi,%rsi,1),%ebx
424 movb %al,(%rdi,%rcx,1)
425 cmpq %rsi,%rcx
426 movb %dl,(%rdi,%r10,1)
427 jne .Lcmov4
428 movq %rax,%rbx
429 .Lcmov4:
430 addb %al,%dl
431 xorb (%rdi,%rdx,1),%r9b
432 rorl $8,%r9d
433 addb %bl,%cl
434 leaq 1(%rsi),%r10
435 movzbl (%rdi,%rcx,1),%edx
436 movzbl %r10b,%r10d
437 movzbl (%rdi,%r10,1),%eax
438 movb %bl,(%rdi,%rcx,1)
439 cmpq %r10,%rcx
440 movb %dl,(%rdi,%rsi,1)
441 jne .Lcmov5
442 movq %rbx,%rax
443 .Lcmov5:
444 addb %bl,%dl
445 xorb (%rdi,%rdx,1),%r9b
446 rorl $8,%r9d
447 addb %al,%cl
448 leaq 1(%r10),%rsi
449 movzbl (%rdi,%rcx,1),%edx
450 movzbl %sil,%esi
451 movzbl (%rdi,%rsi,1),%ebx
452 movb %al,(%rdi,%rcx,1)
453 cmpq %rsi,%rcx
454 movb %dl,(%rdi,%r10,1)
455 jne .Lcmov6
456 movq %rax,%rbx
457 .Lcmov6:
458 addb %al,%dl
459 xorb (%rdi,%rdx,1),%r9b
460 rorl $8,%r9d
461 addb %bl,%cl
462 leaq 1(%rsi),%r10
463 movzbl (%rdi,%rcx,1),%edx
464 movzbl %r10b,%r10d
465 movzbl (%rdi,%r10,1),%eax
466 movb %bl,(%rdi,%rcx,1)
467 cmpq %r10,%rcx
468 movb %dl,(%rdi,%rsi,1)
469 jne .Lcmov7
470 movq %rbx,%rax
471 .Lcmov7:
472 addb %bl,%dl
473 xorb (%rdi,%rdx,1),%r9b
474 rorl $8,%r9d
475 leaq -8(%r11),%r11
476 movl %r8d,(%r13)
477 leaq 8(%r12),%r12
478 movl %r9d,4(%r13)
479 leaq 8(%r13),%r13
480
481 testq $-8,%r11
482 jnz .Lcloop8
483 cmpq $0,%r11
484 jne .Lcloop1
485 jmp .Lexit
486 .align 16
487 .Lcloop1:
488 addb %al,%cl
489 movzbl %cl,%ecx
490 movzbl (%rdi,%rcx,1),%edx
491 movb %al,(%rdi,%rcx,1)
492 movb %dl,(%rdi,%r10,1)
493 addb %al,%dl
494 addb $1,%r10b
495 movzbl %dl,%edx
496 movzbl %r10b,%r10d
497 movzbl (%rdi,%rdx,1),%edx
498 movzbl (%rdi,%r10,1),%eax
499 xorb (%r12),%dl
500 leaq 1(%r12),%r12
501 movb %dl,(%r13)
502 leaq 1(%r13),%r13
503 subq $1,%r11
504 jnz .Lcloop1
505 jmp .Lexit
506
507 .align 16
508 .Lexit:
509 subb $1,%r10b
510 movl %r10d,-8(%rdi)
511 movl %ecx,-4(%rdi)
512
513 movq (%rsp),%r13
514 movq 8(%rsp),%r12
515 movq 16(%rsp),%rbx
516 addq $24,%rsp
517 .Lepilogue:
518 .byte 0xf3,0xc3
519 .size RC4,.-RC4
520 .globl private_RC4_set_key
521 .type private_RC4_set_key,@function
522 .align 16
523 private_RC4_set_key:
524 leaq 8(%rdi),%rdi
525 leaq (%rdx,%rsi,1),%rdx
526 negq %rsi
527 movq %rsi,%rcx
528 xorl %eax,%eax
529 xorq %r9,%r9
530 xorq %r10,%r10
531 xorq %r11,%r11
532
533 movl OPENSSL_ia32cap_P(%rip),%r8d
534 btl $20,%r8d
535 jc .Lc1stloop
536 jmp .Lw1stloop
537
538 .align 16
539 .Lw1stloop:
540 movl %eax,(%rdi,%rax,4)
541 addb $1,%al
542 jnc .Lw1stloop
543
544 xorq %r9,%r9
545 xorq %r8,%r8
546 .align 16
547 .Lw2ndloop:
548 movl (%rdi,%r9,4),%r10d
549 addb (%rdx,%rsi,1),%r8b
550 addb %r10b,%r8b
551 addq $1,%rsi
552 movl (%rdi,%r8,4),%r11d
553 cmovzq %rcx,%rsi
554 movl %r10d,(%rdi,%r8,4)
555 movl %r11d,(%rdi,%r9,4)
556 addb $1,%r9b
557 jnc .Lw2ndloop
558 jmp .Lexit_key
559
560 .align 16
561 .Lc1stloop:
562 movb %al,(%rdi,%rax,1)
563 addb $1,%al
564 jnc .Lc1stloop
565
566 xorq %r9,%r9
567 xorq %r8,%r8
568 .align 16
569 .Lc2ndloop:
570 movb (%rdi,%r9,1),%r10b
571 addb (%rdx,%rsi,1),%r8b
572 addb %r10b,%r8b
573 addq $1,%rsi
574 movb (%rdi,%r8,1),%r11b
575 jnz .Lcnowrap
576 movq %rcx,%rsi
577 .Lcnowrap:
578 movb %r10b,(%rdi,%r8,1)
579 movb %r11b,(%rdi,%r9,1)
580 addb $1,%r9b
581 jnc .Lc2ndloop
582 movl $-1,256(%rdi)
583
584 .align 16
585 .Lexit_key:
586 xorl %eax,%eax
587 movl %eax,-8(%rdi)
588 movl %eax,-4(%rdi)
589 .byte 0xf3,0xc3
590 .size private_RC4_set_key,.-private_RC4_set_key
591
592 .globl RC4_options
593 .type RC4_options,@function
594 .align 16
595 RC4_options:
596 leaq .Lopts(%rip),%rax
597 movl OPENSSL_ia32cap_P(%rip),%edx
598 btl $20,%edx
599 jc .L8xchar
600 btl $30,%edx
601 jnc .Ldone
602 addq $25,%rax
603 .byte 0xf3,0xc3
604 .L8xchar:
605 addq $12,%rax
606 .Ldone:
607 .byte 0xf3,0xc3
608 .align 64
609 .Lopts:
610 .byte 114,99,52,40,56,120,44,105,110,116,41,0
611 .byte 114,99,52,40,56,120,44,99,104,97,114,41,0
612 .byte 114,99,52,40,49,54,120,44,105,110,116,41,0
613 .byte 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71 ,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,1 11,114,103,62,0
614 .align 64
615 .size RC4_options,.-RC4_options
OLDNEW
« no previous file with comments | « openssl/crypto/rc4/asm/rc4-s390x.pl ('k') | openssl/crypto/rc4/asm/rc4-x86_64.pl » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698