Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(191)

Side by Side Diff: third_party/boringssl/linux-x86_64/crypto/rc4/rc4-x86_64.S

Issue 377783004: Add BoringSSL GYP files. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Final Python fix. Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 #if defined(__x86_64__)
2 .text
3
4
5 .globl RC4
6 .type RC4,@function
7 .align 16
8 RC4: orq %rsi,%rsi
9 jne .Lentry
10 .byte 0xf3,0xc3
11 .Lentry:
12 pushq %rbx
13 pushq %r12
14 pushq %r13
15 .Lprologue:
16 movq %rsi,%r11
17 movq %rdx,%r12
18 movq %rcx,%r13
19 xorq %r10,%r10
20 xorq %rcx,%rcx
21
22 leaq 8(%rdi),%rdi
23 movb -8(%rdi),%r10b
24 movb -4(%rdi),%cl
25 cmpl $-1,256(%rdi)
26 je .LRC4_CHAR
27 movq OPENSSL_ia32cap_P@GOTPCREL(%rip),%r8
28 movl (%r8),%r8d
29 xorq %rbx,%rbx
30 incb %r10b
31 subq %r10,%rbx
32 subq %r12,%r13
33 movl (%rdi,%r10,4),%eax
34 testq $-16,%r11
35 jz .Lloop1
36 btl $30,%r8d
37 jc .Lintel
38 andq $7,%rbx
39 leaq 1(%r10),%rsi
40 jz .Loop8
41 subq %rbx,%r11
42 .Loop8_warmup:
43 addb %al,%cl
44 movl (%rdi,%rcx,4),%edx
45 movl %eax,(%rdi,%rcx,4)
46 movl %edx,(%rdi,%r10,4)
47 addb %dl,%al
48 incb %r10b
49 movl (%rdi,%rax,4),%edx
50 movl (%rdi,%r10,4),%eax
51 xorb (%r12),%dl
52 movb %dl,(%r12,%r13,1)
53 leaq 1(%r12),%r12
54 decq %rbx
55 jnz .Loop8_warmup
56
57 leaq 1(%r10),%rsi
58 jmp .Loop8
59 .align 16
60 .Loop8:
61 addb %al,%cl
62 movl (%rdi,%rcx,4),%edx
63 movl %eax,(%rdi,%rcx,4)
64 movl 0(%rdi,%rsi,4),%ebx
65 rorq $8,%r8
66 movl %edx,0(%rdi,%r10,4)
67 addb %al,%dl
68 movb (%rdi,%rdx,4),%r8b
69 addb %bl,%cl
70 movl (%rdi,%rcx,4),%edx
71 movl %ebx,(%rdi,%rcx,4)
72 movl 4(%rdi,%rsi,4),%eax
73 rorq $8,%r8
74 movl %edx,4(%rdi,%r10,4)
75 addb %bl,%dl
76 movb (%rdi,%rdx,4),%r8b
77 addb %al,%cl
78 movl (%rdi,%rcx,4),%edx
79 movl %eax,(%rdi,%rcx,4)
80 movl 8(%rdi,%rsi,4),%ebx
81 rorq $8,%r8
82 movl %edx,8(%rdi,%r10,4)
83 addb %al,%dl
84 movb (%rdi,%rdx,4),%r8b
85 addb %bl,%cl
86 movl (%rdi,%rcx,4),%edx
87 movl %ebx,(%rdi,%rcx,4)
88 movl 12(%rdi,%rsi,4),%eax
89 rorq $8,%r8
90 movl %edx,12(%rdi,%r10,4)
91 addb %bl,%dl
92 movb (%rdi,%rdx,4),%r8b
93 addb %al,%cl
94 movl (%rdi,%rcx,4),%edx
95 movl %eax,(%rdi,%rcx,4)
96 movl 16(%rdi,%rsi,4),%ebx
97 rorq $8,%r8
98 movl %edx,16(%rdi,%r10,4)
99 addb %al,%dl
100 movb (%rdi,%rdx,4),%r8b
101 addb %bl,%cl
102 movl (%rdi,%rcx,4),%edx
103 movl %ebx,(%rdi,%rcx,4)
104 movl 20(%rdi,%rsi,4),%eax
105 rorq $8,%r8
106 movl %edx,20(%rdi,%r10,4)
107 addb %bl,%dl
108 movb (%rdi,%rdx,4),%r8b
109 addb %al,%cl
110 movl (%rdi,%rcx,4),%edx
111 movl %eax,(%rdi,%rcx,4)
112 movl 24(%rdi,%rsi,4),%ebx
113 rorq $8,%r8
114 movl %edx,24(%rdi,%r10,4)
115 addb %al,%dl
116 movb (%rdi,%rdx,4),%r8b
117 addb $8,%sil
118 addb %bl,%cl
119 movl (%rdi,%rcx,4),%edx
120 movl %ebx,(%rdi,%rcx,4)
121 movl -4(%rdi,%rsi,4),%eax
122 rorq $8,%r8
123 movl %edx,28(%rdi,%r10,4)
124 addb %bl,%dl
125 movb (%rdi,%rdx,4),%r8b
126 addb $8,%r10b
127 rorq $8,%r8
128 subq $8,%r11
129
130 xorq (%r12),%r8
131 movq %r8,(%r12,%r13,1)
132 leaq 8(%r12),%r12
133
134 testq $-8,%r11
135 jnz .Loop8
136 cmpq $0,%r11
137 jne .Lloop1
138 jmp .Lexit
139
140 .align 16
141 .Lintel:
142 testq $-32,%r11
143 jz .Lloop1
144 andq $15,%rbx
145 jz .Loop16_is_hot
146 subq %rbx,%r11
147 .Loop16_warmup:
148 addb %al,%cl
149 movl (%rdi,%rcx,4),%edx
150 movl %eax,(%rdi,%rcx,4)
151 movl %edx,(%rdi,%r10,4)
152 addb %dl,%al
153 incb %r10b
154 movl (%rdi,%rax,4),%edx
155 movl (%rdi,%r10,4),%eax
156 xorb (%r12),%dl
157 movb %dl,(%r12,%r13,1)
158 leaq 1(%r12),%r12
159 decq %rbx
160 jnz .Loop16_warmup
161
162 movq %rcx,%rbx
163 xorq %rcx,%rcx
164 movb %bl,%cl
165
166 .Loop16_is_hot:
167 leaq (%rdi,%r10,4),%rsi
168 addb %al,%cl
169 movl (%rdi,%rcx,4),%edx
170 pxor %xmm0,%xmm0
171 movl %eax,(%rdi,%rcx,4)
172 addb %dl,%al
173 movl 4(%rsi),%ebx
174 movzbl %al,%eax
175 movl %edx,0(%rsi)
176 addb %bl,%cl
177 pinsrw $0,(%rdi,%rax,4),%xmm0
178 jmp .Loop16_enter
179 .align 16
180 .Loop16:
181 addb %al,%cl
182 movl (%rdi,%rcx,4),%edx
183 pxor %xmm0,%xmm2
184 psllq $8,%xmm1
185 pxor %xmm0,%xmm0
186 movl %eax,(%rdi,%rcx,4)
187 addb %dl,%al
188 movl 4(%rsi),%ebx
189 movzbl %al,%eax
190 movl %edx,0(%rsi)
191 pxor %xmm1,%xmm2
192 addb %bl,%cl
193 pinsrw $0,(%rdi,%rax,4),%xmm0
194 movdqu %xmm2,(%r12,%r13,1)
195 leaq 16(%r12),%r12
196 .Loop16_enter:
197 movl (%rdi,%rcx,4),%edx
198 pxor %xmm1,%xmm1
199 movl %ebx,(%rdi,%rcx,4)
200 addb %dl,%bl
201 movl 8(%rsi),%eax
202 movzbl %bl,%ebx
203 movl %edx,4(%rsi)
204 addb %al,%cl
205 pinsrw $0,(%rdi,%rbx,4),%xmm1
206 movl (%rdi,%rcx,4),%edx
207 movl %eax,(%rdi,%rcx,4)
208 addb %dl,%al
209 movl 12(%rsi),%ebx
210 movzbl %al,%eax
211 movl %edx,8(%rsi)
212 addb %bl,%cl
213 pinsrw $1,(%rdi,%rax,4),%xmm0
214 movl (%rdi,%rcx,4),%edx
215 movl %ebx,(%rdi,%rcx,4)
216 addb %dl,%bl
217 movl 16(%rsi),%eax
218 movzbl %bl,%ebx
219 movl %edx,12(%rsi)
220 addb %al,%cl
221 pinsrw $1,(%rdi,%rbx,4),%xmm1
222 movl (%rdi,%rcx,4),%edx
223 movl %eax,(%rdi,%rcx,4)
224 addb %dl,%al
225 movl 20(%rsi),%ebx
226 movzbl %al,%eax
227 movl %edx,16(%rsi)
228 addb %bl,%cl
229 pinsrw $2,(%rdi,%rax,4),%xmm0
230 movl (%rdi,%rcx,4),%edx
231 movl %ebx,(%rdi,%rcx,4)
232 addb %dl,%bl
233 movl 24(%rsi),%eax
234 movzbl %bl,%ebx
235 movl %edx,20(%rsi)
236 addb %al,%cl
237 pinsrw $2,(%rdi,%rbx,4),%xmm1
238 movl (%rdi,%rcx,4),%edx
239 movl %eax,(%rdi,%rcx,4)
240 addb %dl,%al
241 movl 28(%rsi),%ebx
242 movzbl %al,%eax
243 movl %edx,24(%rsi)
244 addb %bl,%cl
245 pinsrw $3,(%rdi,%rax,4),%xmm0
246 movl (%rdi,%rcx,4),%edx
247 movl %ebx,(%rdi,%rcx,4)
248 addb %dl,%bl
249 movl 32(%rsi),%eax
250 movzbl %bl,%ebx
251 movl %edx,28(%rsi)
252 addb %al,%cl
253 pinsrw $3,(%rdi,%rbx,4),%xmm1
254 movl (%rdi,%rcx,4),%edx
255 movl %eax,(%rdi,%rcx,4)
256 addb %dl,%al
257 movl 36(%rsi),%ebx
258 movzbl %al,%eax
259 movl %edx,32(%rsi)
260 addb %bl,%cl
261 pinsrw $4,(%rdi,%rax,4),%xmm0
262 movl (%rdi,%rcx,4),%edx
263 movl %ebx,(%rdi,%rcx,4)
264 addb %dl,%bl
265 movl 40(%rsi),%eax
266 movzbl %bl,%ebx
267 movl %edx,36(%rsi)
268 addb %al,%cl
269 pinsrw $4,(%rdi,%rbx,4),%xmm1
270 movl (%rdi,%rcx,4),%edx
271 movl %eax,(%rdi,%rcx,4)
272 addb %dl,%al
273 movl 44(%rsi),%ebx
274 movzbl %al,%eax
275 movl %edx,40(%rsi)
276 addb %bl,%cl
277 pinsrw $5,(%rdi,%rax,4),%xmm0
278 movl (%rdi,%rcx,4),%edx
279 movl %ebx,(%rdi,%rcx,4)
280 addb %dl,%bl
281 movl 48(%rsi),%eax
282 movzbl %bl,%ebx
283 movl %edx,44(%rsi)
284 addb %al,%cl
285 pinsrw $5,(%rdi,%rbx,4),%xmm1
286 movl (%rdi,%rcx,4),%edx
287 movl %eax,(%rdi,%rcx,4)
288 addb %dl,%al
289 movl 52(%rsi),%ebx
290 movzbl %al,%eax
291 movl %edx,48(%rsi)
292 addb %bl,%cl
293 pinsrw $6,(%rdi,%rax,4),%xmm0
294 movl (%rdi,%rcx,4),%edx
295 movl %ebx,(%rdi,%rcx,4)
296 addb %dl,%bl
297 movl 56(%rsi),%eax
298 movzbl %bl,%ebx
299 movl %edx,52(%rsi)
300 addb %al,%cl
301 pinsrw $6,(%rdi,%rbx,4),%xmm1
302 movl (%rdi,%rcx,4),%edx
303 movl %eax,(%rdi,%rcx,4)
304 addb %dl,%al
305 movl 60(%rsi),%ebx
306 movzbl %al,%eax
307 movl %edx,56(%rsi)
308 addb %bl,%cl
309 pinsrw $7,(%rdi,%rax,4),%xmm0
310 addb $16,%r10b
311 movdqu (%r12),%xmm2
312 movl (%rdi,%rcx,4),%edx
313 movl %ebx,(%rdi,%rcx,4)
314 addb %dl,%bl
315 movzbl %bl,%ebx
316 movl %edx,60(%rsi)
317 leaq (%rdi,%r10,4),%rsi
318 pinsrw $7,(%rdi,%rbx,4),%xmm1
319 movl (%rsi),%eax
320 movq %rcx,%rbx
321 xorq %rcx,%rcx
322 subq $16,%r11
323 movb %bl,%cl
324 testq $-16,%r11
325 jnz .Loop16
326
327 psllq $8,%xmm1
328 pxor %xmm0,%xmm2
329 pxor %xmm1,%xmm2
330 movdqu %xmm2,(%r12,%r13,1)
331 leaq 16(%r12),%r12
332
333 cmpq $0,%r11
334 jne .Lloop1
335 jmp .Lexit
336
337 .align 16
338 .Lloop1:
339 addb %al,%cl
340 movl (%rdi,%rcx,4),%edx
341 movl %eax,(%rdi,%rcx,4)
342 movl %edx,(%rdi,%r10,4)
343 addb %dl,%al
344 incb %r10b
345 movl (%rdi,%rax,4),%edx
346 movl (%rdi,%r10,4),%eax
347 xorb (%r12),%dl
348 movb %dl,(%r12,%r13,1)
349 leaq 1(%r12),%r12
350 decq %r11
351 jnz .Lloop1
352 jmp .Lexit
353
354 .align 16
355 .LRC4_CHAR:
356 addb $1,%r10b
357 movzbl (%rdi,%r10,1),%eax
358 testq $-8,%r11
359 jz .Lcloop1
360 jmp .Lcloop8
361 .align 16
362 .Lcloop8:
363 movl (%r12),%r8d
364 movl 4(%r12),%r9d
365 addb %al,%cl
366 leaq 1(%r10),%rsi
367 movzbl (%rdi,%rcx,1),%edx
368 movzbl %sil,%esi
369 movzbl (%rdi,%rsi,1),%ebx
370 movb %al,(%rdi,%rcx,1)
371 cmpq %rsi,%rcx
372 movb %dl,(%rdi,%r10,1)
373 jne .Lcmov0
374 movq %rax,%rbx
375 .Lcmov0:
376 addb %al,%dl
377 xorb (%rdi,%rdx,1),%r8b
378 rorl $8,%r8d
379 addb %bl,%cl
380 leaq 1(%rsi),%r10
381 movzbl (%rdi,%rcx,1),%edx
382 movzbl %r10b,%r10d
383 movzbl (%rdi,%r10,1),%eax
384 movb %bl,(%rdi,%rcx,1)
385 cmpq %r10,%rcx
386 movb %dl,(%rdi,%rsi,1)
387 jne .Lcmov1
388 movq %rbx,%rax
389 .Lcmov1:
390 addb %bl,%dl
391 xorb (%rdi,%rdx,1),%r8b
392 rorl $8,%r8d
393 addb %al,%cl
394 leaq 1(%r10),%rsi
395 movzbl (%rdi,%rcx,1),%edx
396 movzbl %sil,%esi
397 movzbl (%rdi,%rsi,1),%ebx
398 movb %al,(%rdi,%rcx,1)
399 cmpq %rsi,%rcx
400 movb %dl,(%rdi,%r10,1)
401 jne .Lcmov2
402 movq %rax,%rbx
403 .Lcmov2:
404 addb %al,%dl
405 xorb (%rdi,%rdx,1),%r8b
406 rorl $8,%r8d
407 addb %bl,%cl
408 leaq 1(%rsi),%r10
409 movzbl (%rdi,%rcx,1),%edx
410 movzbl %r10b,%r10d
411 movzbl (%rdi,%r10,1),%eax
412 movb %bl,(%rdi,%rcx,1)
413 cmpq %r10,%rcx
414 movb %dl,(%rdi,%rsi,1)
415 jne .Lcmov3
416 movq %rbx,%rax
417 .Lcmov3:
418 addb %bl,%dl
419 xorb (%rdi,%rdx,1),%r8b
420 rorl $8,%r8d
421 addb %al,%cl
422 leaq 1(%r10),%rsi
423 movzbl (%rdi,%rcx,1),%edx
424 movzbl %sil,%esi
425 movzbl (%rdi,%rsi,1),%ebx
426 movb %al,(%rdi,%rcx,1)
427 cmpq %rsi,%rcx
428 movb %dl,(%rdi,%r10,1)
429 jne .Lcmov4
430 movq %rax,%rbx
431 .Lcmov4:
432 addb %al,%dl
433 xorb (%rdi,%rdx,1),%r9b
434 rorl $8,%r9d
435 addb %bl,%cl
436 leaq 1(%rsi),%r10
437 movzbl (%rdi,%rcx,1),%edx
438 movzbl %r10b,%r10d
439 movzbl (%rdi,%r10,1),%eax
440 movb %bl,(%rdi,%rcx,1)
441 cmpq %r10,%rcx
442 movb %dl,(%rdi,%rsi,1)
443 jne .Lcmov5
444 movq %rbx,%rax
445 .Lcmov5:
446 addb %bl,%dl
447 xorb (%rdi,%rdx,1),%r9b
448 rorl $8,%r9d
449 addb %al,%cl
450 leaq 1(%r10),%rsi
451 movzbl (%rdi,%rcx,1),%edx
452 movzbl %sil,%esi
453 movzbl (%rdi,%rsi,1),%ebx
454 movb %al,(%rdi,%rcx,1)
455 cmpq %rsi,%rcx
456 movb %dl,(%rdi,%r10,1)
457 jne .Lcmov6
458 movq %rax,%rbx
459 .Lcmov6:
460 addb %al,%dl
461 xorb (%rdi,%rdx,1),%r9b
462 rorl $8,%r9d
463 addb %bl,%cl
464 leaq 1(%rsi),%r10
465 movzbl (%rdi,%rcx,1),%edx
466 movzbl %r10b,%r10d
467 movzbl (%rdi,%r10,1),%eax
468 movb %bl,(%rdi,%rcx,1)
469 cmpq %r10,%rcx
470 movb %dl,(%rdi,%rsi,1)
471 jne .Lcmov7
472 movq %rbx,%rax
473 .Lcmov7:
474 addb %bl,%dl
475 xorb (%rdi,%rdx,1),%r9b
476 rorl $8,%r9d
477 leaq -8(%r11),%r11
478 movl %r8d,(%r13)
479 leaq 8(%r12),%r12
480 movl %r9d,4(%r13)
481 leaq 8(%r13),%r13
482
483 testq $-8,%r11
484 jnz .Lcloop8
485 cmpq $0,%r11
486 jne .Lcloop1
487 jmp .Lexit
488 .align 16
489 .Lcloop1:
490 addb %al,%cl
491 movzbl %cl,%ecx
492 movzbl (%rdi,%rcx,1),%edx
493 movb %al,(%rdi,%rcx,1)
494 movb %dl,(%rdi,%r10,1)
495 addb %al,%dl
496 addb $1,%r10b
497 movzbl %dl,%edx
498 movzbl %r10b,%r10d
499 movzbl (%rdi,%rdx,1),%edx
500 movzbl (%rdi,%r10,1),%eax
501 xorb (%r12),%dl
502 leaq 1(%r12),%r12
503 movb %dl,(%r13)
504 leaq 1(%r13),%r13
505 subq $1,%r11
506 jnz .Lcloop1
507 jmp .Lexit
508
509 .align 16
510 .Lexit:
511 subb $1,%r10b
512 movl %r10d,-8(%rdi)
513 movl %ecx,-4(%rdi)
514
515 movq (%rsp),%r13
516 movq 8(%rsp),%r12
517 movq 16(%rsp),%rbx
518 addq $24,%rsp
519 .Lepilogue:
520 .byte 0xf3,0xc3
521 .size RC4,.-RC4
522 .globl RC4_set_key
523 .type RC4_set_key,@function
524 .align 16
525 RC4_set_key:
526 leaq 8(%rdi),%rdi
527 leaq (%rdx,%rsi,1),%rdx
528 negq %rsi
529 movq %rsi,%rcx
530 xorl %eax,%eax
531 xorq %r9,%r9
532 xorq %r10,%r10
533 xorq %r11,%r11
534
535 movq OPENSSL_ia32cap_P@GOTPCREL(%rip),%r8
536 movl (%r8),%r8d
537 btl $20,%r8d
538 jc .Lc1stloop
539 jmp .Lw1stloop
540
541 .align 16
542 .Lw1stloop:
543 movl %eax,(%rdi,%rax,4)
544 addb $1,%al
545 jnc .Lw1stloop
546
547 xorq %r9,%r9
548 xorq %r8,%r8
549 .align 16
550 .Lw2ndloop:
551 movl (%rdi,%r9,4),%r10d
552 addb (%rdx,%rsi,1),%r8b
553 addb %r10b,%r8b
554 addq $1,%rsi
555 movl (%rdi,%r8,4),%r11d
556 cmovzq %rcx,%rsi
557 movl %r10d,(%rdi,%r8,4)
558 movl %r11d,(%rdi,%r9,4)
559 addb $1,%r9b
560 jnc .Lw2ndloop
561 jmp .Lexit_key
562
563 .align 16
564 .Lc1stloop:
565 movb %al,(%rdi,%rax,1)
566 addb $1,%al
567 jnc .Lc1stloop
568
569 xorq %r9,%r9
570 xorq %r8,%r8
571 .align 16
572 .Lc2ndloop:
573 movb (%rdi,%r9,1),%r10b
574 addb (%rdx,%rsi,1),%r8b
575 addb %r10b,%r8b
576 addq $1,%rsi
577 movb (%rdi,%r8,1),%r11b
578 jnz .Lcnowrap
579 movq %rcx,%rsi
580 .Lcnowrap:
581 movb %r10b,(%rdi,%r8,1)
582 movb %r11b,(%rdi,%r9,1)
583 addb $1,%r9b
584 jnc .Lc2ndloop
585 movl $-1,256(%rdi)
586
587 .align 16
588 .Lexit_key:
589 xorl %eax,%eax
590 movl %eax,-8(%rdi)
591 movl %eax,-4(%rdi)
592 .byte 0xf3,0xc3
593 .size RC4_set_key,.-RC4_set_key
594
595 .globl RC4_options
596 .type RC4_options,@function
597 .align 16
598 RC4_options:
599 leaq .Lopts(%rip),%rax
600 movq OPENSSL_ia32cap_P(%rip),%rdx
601 movl (%rdx),%edx
602 btl $20,%edx
603 jc .L8xchar
604 btl $30,%edx
605 jnc .Ldone
606 addq $25,%rax
607 .byte 0xf3,0xc3
608 .L8xchar:
609 addq $12,%rax
610 .Ldone:
611 .byte 0xf3,0xc3
612 .align 64
613 .Lopts:
614 .byte 114,99,52,40,56,120,44,105,110,116,41,0
615 .byte 114,99,52,40,56,120,44,99,104,97,114,41,0
616 .byte 114,99,52,40,49,54,120,44,105,110,116,41,0
617 .byte 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71 ,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,1 11,114,103,62,0
618 .align 64
619 .size RC4_options,.-RC4_options
620 #endif
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698