OLD | NEW |
(Empty) | |
| 1 #if defined(__x86_64__) |
| 2 .text |
| 3 |
| 4 |
| 5 .globl _gcm_gmult_4bit |
| 6 .private_extern _gcm_gmult_4bit |
| 7 |
| 8 .p2align 4 |
| 9 _gcm_gmult_4bit: |
| 10 pushq %rbx |
| 11 pushq %rbp |
| 12 pushq %r12 |
| 13 L$gmult_prologue: |
| 14 |
| 15 movzbq 15(%rdi),%r8 |
| 16 leaq L$rem_4bit(%rip),%r11 |
| 17 xorq %rax,%rax |
| 18 xorq %rbx,%rbx |
| 19 movb %r8b,%al |
| 20 movb %r8b,%bl |
| 21 shlb $4,%al |
| 22 movq $14,%rcx |
| 23 movq 8(%rsi,%rax,1),%r8 |
| 24 movq (%rsi,%rax,1),%r9 |
| 25 andb $240,%bl |
| 26 movq %r8,%rdx |
| 27 jmp L$oop1 |
| 28 |
| 29 .p2align 4 |
| 30 L$oop1: |
| 31 shrq $4,%r8 |
| 32 andq $15,%rdx |
| 33 movq %r9,%r10 |
| 34 movb (%rdi,%rcx,1),%al |
| 35 shrq $4,%r9 |
| 36 xorq 8(%rsi,%rbx,1),%r8 |
| 37 shlq $60,%r10 |
| 38 xorq (%rsi,%rbx,1),%r9 |
| 39 movb %al,%bl |
| 40 xorq (%r11,%rdx,8),%r9 |
| 41 movq %r8,%rdx |
| 42 shlb $4,%al |
| 43 xorq %r10,%r8 |
| 44 decq %rcx |
| 45 js L$break1 |
| 46 |
| 47 shrq $4,%r8 |
| 48 andq $15,%rdx |
| 49 movq %r9,%r10 |
| 50 shrq $4,%r9 |
| 51 xorq 8(%rsi,%rax,1),%r8 |
| 52 shlq $60,%r10 |
| 53 xorq (%rsi,%rax,1),%r9 |
| 54 andb $240,%bl |
| 55 xorq (%r11,%rdx,8),%r9 |
| 56 movq %r8,%rdx |
| 57 xorq %r10,%r8 |
| 58 jmp L$oop1 |
| 59 |
| 60 .p2align 4 |
| 61 L$break1: |
| 62 shrq $4,%r8 |
| 63 andq $15,%rdx |
| 64 movq %r9,%r10 |
| 65 shrq $4,%r9 |
| 66 xorq 8(%rsi,%rax,1),%r8 |
| 67 shlq $60,%r10 |
| 68 xorq (%rsi,%rax,1),%r9 |
| 69 andb $240,%bl |
| 70 xorq (%r11,%rdx,8),%r9 |
| 71 movq %r8,%rdx |
| 72 xorq %r10,%r8 |
| 73 |
| 74 shrq $4,%r8 |
| 75 andq $15,%rdx |
| 76 movq %r9,%r10 |
| 77 shrq $4,%r9 |
| 78 xorq 8(%rsi,%rbx,1),%r8 |
| 79 shlq $60,%r10 |
| 80 xorq (%rsi,%rbx,1),%r9 |
| 81 xorq %r10,%r8 |
| 82 xorq (%r11,%rdx,8),%r9 |
| 83 |
| 84 bswapq %r8 |
| 85 bswapq %r9 |
| 86 movq %r8,8(%rdi) |
| 87 movq %r9,(%rdi) |
| 88 |
| 89 movq 16(%rsp),%rbx |
| 90 leaq 24(%rsp),%rsp |
| 91 L$gmult_epilogue: |
| 92 .byte 0xf3,0xc3 |
| 93 |
| 94 .globl _gcm_ghash_4bit |
| 95 .private_extern _gcm_ghash_4bit |
| 96 |
| 97 .p2align 4 |
| 98 _gcm_ghash_4bit: |
| 99 pushq %rbx |
| 100 pushq %rbp |
| 101 pushq %r12 |
| 102 pushq %r13 |
| 103 pushq %r14 |
| 104 pushq %r15 |
| 105 subq $280,%rsp |
| 106 L$ghash_prologue: |
| 107 movq %rdx,%r14 |
| 108 movq %rcx,%r15 |
| 109 subq $-128,%rsi |
| 110 leaq 16+128(%rsp),%rbp |
| 111 xorl %edx,%edx |
| 112 movq 0+0-128(%rsi),%r8 |
| 113 movq 0+8-128(%rsi),%rax |
| 114 movb %al,%dl |
| 115 shrq $4,%rax |
| 116 movq %r8,%r10 |
| 117 shrq $4,%r8 |
| 118 movq 16+0-128(%rsi),%r9 |
| 119 shlb $4,%dl |
| 120 movq 16+8-128(%rsi),%rbx |
| 121 shlq $60,%r10 |
| 122 movb %dl,0(%rsp) |
| 123 orq %r10,%rax |
| 124 movb %bl,%dl |
| 125 shrq $4,%rbx |
| 126 movq %r9,%r10 |
| 127 shrq $4,%r9 |
| 128 movq %r8,0(%rbp) |
| 129 movq 32+0-128(%rsi),%r8 |
| 130 shlb $4,%dl |
| 131 movq %rax,0-128(%rbp) |
| 132 movq 32+8-128(%rsi),%rax |
| 133 shlq $60,%r10 |
| 134 movb %dl,1(%rsp) |
| 135 orq %r10,%rbx |
| 136 movb %al,%dl |
| 137 shrq $4,%rax |
| 138 movq %r8,%r10 |
| 139 shrq $4,%r8 |
| 140 movq %r9,8(%rbp) |
| 141 movq 48+0-128(%rsi),%r9 |
| 142 shlb $4,%dl |
| 143 movq %rbx,8-128(%rbp) |
| 144 movq 48+8-128(%rsi),%rbx |
| 145 shlq $60,%r10 |
| 146 movb %dl,2(%rsp) |
| 147 orq %r10,%rax |
| 148 movb %bl,%dl |
| 149 shrq $4,%rbx |
| 150 movq %r9,%r10 |
| 151 shrq $4,%r9 |
| 152 movq %r8,16(%rbp) |
| 153 movq 64+0-128(%rsi),%r8 |
| 154 shlb $4,%dl |
| 155 movq %rax,16-128(%rbp) |
| 156 movq 64+8-128(%rsi),%rax |
| 157 shlq $60,%r10 |
| 158 movb %dl,3(%rsp) |
| 159 orq %r10,%rbx |
| 160 movb %al,%dl |
| 161 shrq $4,%rax |
| 162 movq %r8,%r10 |
| 163 shrq $4,%r8 |
| 164 movq %r9,24(%rbp) |
| 165 movq 80+0-128(%rsi),%r9 |
| 166 shlb $4,%dl |
| 167 movq %rbx,24-128(%rbp) |
| 168 movq 80+8-128(%rsi),%rbx |
| 169 shlq $60,%r10 |
| 170 movb %dl,4(%rsp) |
| 171 orq %r10,%rax |
| 172 movb %bl,%dl |
| 173 shrq $4,%rbx |
| 174 movq %r9,%r10 |
| 175 shrq $4,%r9 |
| 176 movq %r8,32(%rbp) |
| 177 movq 96+0-128(%rsi),%r8 |
| 178 shlb $4,%dl |
| 179 movq %rax,32-128(%rbp) |
| 180 movq 96+8-128(%rsi),%rax |
| 181 shlq $60,%r10 |
| 182 movb %dl,5(%rsp) |
| 183 orq %r10,%rbx |
| 184 movb %al,%dl |
| 185 shrq $4,%rax |
| 186 movq %r8,%r10 |
| 187 shrq $4,%r8 |
| 188 movq %r9,40(%rbp) |
| 189 movq 112+0-128(%rsi),%r9 |
| 190 shlb $4,%dl |
| 191 movq %rbx,40-128(%rbp) |
| 192 movq 112+8-128(%rsi),%rbx |
| 193 shlq $60,%r10 |
| 194 movb %dl,6(%rsp) |
| 195 orq %r10,%rax |
| 196 movb %bl,%dl |
| 197 shrq $4,%rbx |
| 198 movq %r9,%r10 |
| 199 shrq $4,%r9 |
| 200 movq %r8,48(%rbp) |
| 201 movq 128+0-128(%rsi),%r8 |
| 202 shlb $4,%dl |
| 203 movq %rax,48-128(%rbp) |
| 204 movq 128+8-128(%rsi),%rax |
| 205 shlq $60,%r10 |
| 206 movb %dl,7(%rsp) |
| 207 orq %r10,%rbx |
| 208 movb %al,%dl |
| 209 shrq $4,%rax |
| 210 movq %r8,%r10 |
| 211 shrq $4,%r8 |
| 212 movq %r9,56(%rbp) |
| 213 movq 144+0-128(%rsi),%r9 |
| 214 shlb $4,%dl |
| 215 movq %rbx,56-128(%rbp) |
| 216 movq 144+8-128(%rsi),%rbx |
| 217 shlq $60,%r10 |
| 218 movb %dl,8(%rsp) |
| 219 orq %r10,%rax |
| 220 movb %bl,%dl |
| 221 shrq $4,%rbx |
| 222 movq %r9,%r10 |
| 223 shrq $4,%r9 |
| 224 movq %r8,64(%rbp) |
| 225 movq 160+0-128(%rsi),%r8 |
| 226 shlb $4,%dl |
| 227 movq %rax,64-128(%rbp) |
| 228 movq 160+8-128(%rsi),%rax |
| 229 shlq $60,%r10 |
| 230 movb %dl,9(%rsp) |
| 231 orq %r10,%rbx |
| 232 movb %al,%dl |
| 233 shrq $4,%rax |
| 234 movq %r8,%r10 |
| 235 shrq $4,%r8 |
| 236 movq %r9,72(%rbp) |
| 237 movq 176+0-128(%rsi),%r9 |
| 238 shlb $4,%dl |
| 239 movq %rbx,72-128(%rbp) |
| 240 movq 176+8-128(%rsi),%rbx |
| 241 shlq $60,%r10 |
| 242 movb %dl,10(%rsp) |
| 243 orq %r10,%rax |
| 244 movb %bl,%dl |
| 245 shrq $4,%rbx |
| 246 movq %r9,%r10 |
| 247 shrq $4,%r9 |
| 248 movq %r8,80(%rbp) |
| 249 movq 192+0-128(%rsi),%r8 |
| 250 shlb $4,%dl |
| 251 movq %rax,80-128(%rbp) |
| 252 movq 192+8-128(%rsi),%rax |
| 253 shlq $60,%r10 |
| 254 movb %dl,11(%rsp) |
| 255 orq %r10,%rbx |
| 256 movb %al,%dl |
| 257 shrq $4,%rax |
| 258 movq %r8,%r10 |
| 259 shrq $4,%r8 |
| 260 movq %r9,88(%rbp) |
| 261 movq 208+0-128(%rsi),%r9 |
| 262 shlb $4,%dl |
| 263 movq %rbx,88-128(%rbp) |
| 264 movq 208+8-128(%rsi),%rbx |
| 265 shlq $60,%r10 |
| 266 movb %dl,12(%rsp) |
| 267 orq %r10,%rax |
| 268 movb %bl,%dl |
| 269 shrq $4,%rbx |
| 270 movq %r9,%r10 |
| 271 shrq $4,%r9 |
| 272 movq %r8,96(%rbp) |
| 273 movq 224+0-128(%rsi),%r8 |
| 274 shlb $4,%dl |
| 275 movq %rax,96-128(%rbp) |
| 276 movq 224+8-128(%rsi),%rax |
| 277 shlq $60,%r10 |
| 278 movb %dl,13(%rsp) |
| 279 orq %r10,%rbx |
| 280 movb %al,%dl |
| 281 shrq $4,%rax |
| 282 movq %r8,%r10 |
| 283 shrq $4,%r8 |
| 284 movq %r9,104(%rbp) |
| 285 movq 240+0-128(%rsi),%r9 |
| 286 shlb $4,%dl |
| 287 movq %rbx,104-128(%rbp) |
| 288 movq 240+8-128(%rsi),%rbx |
| 289 shlq $60,%r10 |
| 290 movb %dl,14(%rsp) |
| 291 orq %r10,%rax |
| 292 movb %bl,%dl |
| 293 shrq $4,%rbx |
| 294 movq %r9,%r10 |
| 295 shrq $4,%r9 |
| 296 movq %r8,112(%rbp) |
| 297 shlb $4,%dl |
| 298 movq %rax,112-128(%rbp) |
| 299 shlq $60,%r10 |
| 300 movb %dl,15(%rsp) |
| 301 orq %r10,%rbx |
| 302 movq %r9,120(%rbp) |
| 303 movq %rbx,120-128(%rbp) |
| 304 addq $-128,%rsi |
| 305 movq 8(%rdi),%r8 |
| 306 movq 0(%rdi),%r9 |
| 307 addq %r14,%r15 |
| 308 leaq L$rem_8bit(%rip),%r11 |
| 309 jmp L$outer_loop |
| 310 .p2align 4 |
| 311 L$outer_loop: |
| 312 xorq (%r14),%r9 |
| 313 movq 8(%r14),%rdx |
| 314 leaq 16(%r14),%r14 |
| 315 xorq %r8,%rdx |
| 316 movq %r9,(%rdi) |
| 317 movq %rdx,8(%rdi) |
| 318 shrq $32,%rdx |
| 319 xorq %rax,%rax |
| 320 roll $8,%edx |
| 321 movb %dl,%al |
| 322 movzbl %dl,%ebx |
| 323 shlb $4,%al |
| 324 shrl $4,%ebx |
| 325 roll $8,%edx |
| 326 movq 8(%rsi,%rax,1),%r8 |
| 327 movq (%rsi,%rax,1),%r9 |
| 328 movb %dl,%al |
| 329 movzbl %dl,%ecx |
| 330 shlb $4,%al |
| 331 movzbq (%rsp,%rbx,1),%r12 |
| 332 shrl $4,%ecx |
| 333 xorq %r8,%r12 |
| 334 movq %r9,%r10 |
| 335 shrq $8,%r8 |
| 336 movzbq %r12b,%r12 |
| 337 shrq $8,%r9 |
| 338 xorq -128(%rbp,%rbx,8),%r8 |
| 339 shlq $56,%r10 |
| 340 xorq (%rbp,%rbx,8),%r9 |
| 341 roll $8,%edx |
| 342 xorq 8(%rsi,%rax,1),%r8 |
| 343 xorq (%rsi,%rax,1),%r9 |
| 344 movb %dl,%al |
| 345 xorq %r10,%r8 |
| 346 movzwq (%r11,%r12,2),%r12 |
| 347 movzbl %dl,%ebx |
| 348 shlb $4,%al |
| 349 movzbq (%rsp,%rcx,1),%r13 |
| 350 shrl $4,%ebx |
| 351 shlq $48,%r12 |
| 352 xorq %r8,%r13 |
| 353 movq %r9,%r10 |
| 354 xorq %r12,%r9 |
| 355 shrq $8,%r8 |
| 356 movzbq %r13b,%r13 |
| 357 shrq $8,%r9 |
| 358 xorq -128(%rbp,%rcx,8),%r8 |
| 359 shlq $56,%r10 |
| 360 xorq (%rbp,%rcx,8),%r9 |
| 361 roll $8,%edx |
| 362 xorq 8(%rsi,%rax,1),%r8 |
| 363 xorq (%rsi,%rax,1),%r9 |
| 364 movb %dl,%al |
| 365 xorq %r10,%r8 |
| 366 movzwq (%r11,%r13,2),%r13 |
| 367 movzbl %dl,%ecx |
| 368 shlb $4,%al |
| 369 movzbq (%rsp,%rbx,1),%r12 |
| 370 shrl $4,%ecx |
| 371 shlq $48,%r13 |
| 372 xorq %r8,%r12 |
| 373 movq %r9,%r10 |
| 374 xorq %r13,%r9 |
| 375 shrq $8,%r8 |
| 376 movzbq %r12b,%r12 |
| 377 movl 8(%rdi),%edx |
| 378 shrq $8,%r9 |
| 379 xorq -128(%rbp,%rbx,8),%r8 |
| 380 shlq $56,%r10 |
| 381 xorq (%rbp,%rbx,8),%r9 |
| 382 roll $8,%edx |
| 383 xorq 8(%rsi,%rax,1),%r8 |
| 384 xorq (%rsi,%rax,1),%r9 |
| 385 movb %dl,%al |
| 386 xorq %r10,%r8 |
| 387 movzwq (%r11,%r12,2),%r12 |
| 388 movzbl %dl,%ebx |
| 389 shlb $4,%al |
| 390 movzbq (%rsp,%rcx,1),%r13 |
| 391 shrl $4,%ebx |
| 392 shlq $48,%r12 |
| 393 xorq %r8,%r13 |
| 394 movq %r9,%r10 |
| 395 xorq %r12,%r9 |
| 396 shrq $8,%r8 |
| 397 movzbq %r13b,%r13 |
| 398 shrq $8,%r9 |
| 399 xorq -128(%rbp,%rcx,8),%r8 |
| 400 shlq $56,%r10 |
| 401 xorq (%rbp,%rcx,8),%r9 |
| 402 roll $8,%edx |
| 403 xorq 8(%rsi,%rax,1),%r8 |
| 404 xorq (%rsi,%rax,1),%r9 |
| 405 movb %dl,%al |
| 406 xorq %r10,%r8 |
| 407 movzwq (%r11,%r13,2),%r13 |
| 408 movzbl %dl,%ecx |
| 409 shlb $4,%al |
| 410 movzbq (%rsp,%rbx,1),%r12 |
| 411 shrl $4,%ecx |
| 412 shlq $48,%r13 |
| 413 xorq %r8,%r12 |
| 414 movq %r9,%r10 |
| 415 xorq %r13,%r9 |
| 416 shrq $8,%r8 |
| 417 movzbq %r12b,%r12 |
| 418 shrq $8,%r9 |
| 419 xorq -128(%rbp,%rbx,8),%r8 |
| 420 shlq $56,%r10 |
| 421 xorq (%rbp,%rbx,8),%r9 |
| 422 roll $8,%edx |
| 423 xorq 8(%rsi,%rax,1),%r8 |
| 424 xorq (%rsi,%rax,1),%r9 |
| 425 movb %dl,%al |
| 426 xorq %r10,%r8 |
| 427 movzwq (%r11,%r12,2),%r12 |
| 428 movzbl %dl,%ebx |
| 429 shlb $4,%al |
| 430 movzbq (%rsp,%rcx,1),%r13 |
| 431 shrl $4,%ebx |
| 432 shlq $48,%r12 |
| 433 xorq %r8,%r13 |
| 434 movq %r9,%r10 |
| 435 xorq %r12,%r9 |
| 436 shrq $8,%r8 |
| 437 movzbq %r13b,%r13 |
| 438 shrq $8,%r9 |
| 439 xorq -128(%rbp,%rcx,8),%r8 |
| 440 shlq $56,%r10 |
| 441 xorq (%rbp,%rcx,8),%r9 |
| 442 roll $8,%edx |
| 443 xorq 8(%rsi,%rax,1),%r8 |
| 444 xorq (%rsi,%rax,1),%r9 |
| 445 movb %dl,%al |
| 446 xorq %r10,%r8 |
| 447 movzwq (%r11,%r13,2),%r13 |
| 448 movzbl %dl,%ecx |
| 449 shlb $4,%al |
| 450 movzbq (%rsp,%rbx,1),%r12 |
| 451 shrl $4,%ecx |
| 452 shlq $48,%r13 |
| 453 xorq %r8,%r12 |
| 454 movq %r9,%r10 |
| 455 xorq %r13,%r9 |
| 456 shrq $8,%r8 |
| 457 movzbq %r12b,%r12 |
| 458 movl 4(%rdi),%edx |
| 459 shrq $8,%r9 |
| 460 xorq -128(%rbp,%rbx,8),%r8 |
| 461 shlq $56,%r10 |
| 462 xorq (%rbp,%rbx,8),%r9 |
| 463 roll $8,%edx |
| 464 xorq 8(%rsi,%rax,1),%r8 |
| 465 xorq (%rsi,%rax,1),%r9 |
| 466 movb %dl,%al |
| 467 xorq %r10,%r8 |
| 468 movzwq (%r11,%r12,2),%r12 |
| 469 movzbl %dl,%ebx |
| 470 shlb $4,%al |
| 471 movzbq (%rsp,%rcx,1),%r13 |
| 472 shrl $4,%ebx |
| 473 shlq $48,%r12 |
| 474 xorq %r8,%r13 |
| 475 movq %r9,%r10 |
| 476 xorq %r12,%r9 |
| 477 shrq $8,%r8 |
| 478 movzbq %r13b,%r13 |
| 479 shrq $8,%r9 |
| 480 xorq -128(%rbp,%rcx,8),%r8 |
| 481 shlq $56,%r10 |
| 482 xorq (%rbp,%rcx,8),%r9 |
| 483 roll $8,%edx |
| 484 xorq 8(%rsi,%rax,1),%r8 |
| 485 xorq (%rsi,%rax,1),%r9 |
| 486 movb %dl,%al |
| 487 xorq %r10,%r8 |
| 488 movzwq (%r11,%r13,2),%r13 |
| 489 movzbl %dl,%ecx |
| 490 shlb $4,%al |
| 491 movzbq (%rsp,%rbx,1),%r12 |
| 492 shrl $4,%ecx |
| 493 shlq $48,%r13 |
| 494 xorq %r8,%r12 |
| 495 movq %r9,%r10 |
| 496 xorq %r13,%r9 |
| 497 shrq $8,%r8 |
| 498 movzbq %r12b,%r12 |
| 499 shrq $8,%r9 |
| 500 xorq -128(%rbp,%rbx,8),%r8 |
| 501 shlq $56,%r10 |
| 502 xorq (%rbp,%rbx,8),%r9 |
| 503 roll $8,%edx |
| 504 xorq 8(%rsi,%rax,1),%r8 |
| 505 xorq (%rsi,%rax,1),%r9 |
| 506 movb %dl,%al |
| 507 xorq %r10,%r8 |
| 508 movzwq (%r11,%r12,2),%r12 |
| 509 movzbl %dl,%ebx |
| 510 shlb $4,%al |
| 511 movzbq (%rsp,%rcx,1),%r13 |
| 512 shrl $4,%ebx |
| 513 shlq $48,%r12 |
| 514 xorq %r8,%r13 |
| 515 movq %r9,%r10 |
| 516 xorq %r12,%r9 |
| 517 shrq $8,%r8 |
| 518 movzbq %r13b,%r13 |
| 519 shrq $8,%r9 |
| 520 xorq -128(%rbp,%rcx,8),%r8 |
| 521 shlq $56,%r10 |
| 522 xorq (%rbp,%rcx,8),%r9 |
| 523 roll $8,%edx |
| 524 xorq 8(%rsi,%rax,1),%r8 |
| 525 xorq (%rsi,%rax,1),%r9 |
| 526 movb %dl,%al |
| 527 xorq %r10,%r8 |
| 528 movzwq (%r11,%r13,2),%r13 |
| 529 movzbl %dl,%ecx |
| 530 shlb $4,%al |
| 531 movzbq (%rsp,%rbx,1),%r12 |
| 532 shrl $4,%ecx |
| 533 shlq $48,%r13 |
| 534 xorq %r8,%r12 |
| 535 movq %r9,%r10 |
| 536 xorq %r13,%r9 |
| 537 shrq $8,%r8 |
| 538 movzbq %r12b,%r12 |
| 539 movl 0(%rdi),%edx |
| 540 shrq $8,%r9 |
| 541 xorq -128(%rbp,%rbx,8),%r8 |
| 542 shlq $56,%r10 |
| 543 xorq (%rbp,%rbx,8),%r9 |
| 544 roll $8,%edx |
| 545 xorq 8(%rsi,%rax,1),%r8 |
| 546 xorq (%rsi,%rax,1),%r9 |
| 547 movb %dl,%al |
| 548 xorq %r10,%r8 |
| 549 movzwq (%r11,%r12,2),%r12 |
| 550 movzbl %dl,%ebx |
| 551 shlb $4,%al |
| 552 movzbq (%rsp,%rcx,1),%r13 |
| 553 shrl $4,%ebx |
| 554 shlq $48,%r12 |
| 555 xorq %r8,%r13 |
| 556 movq %r9,%r10 |
| 557 xorq %r12,%r9 |
| 558 shrq $8,%r8 |
| 559 movzbq %r13b,%r13 |
| 560 shrq $8,%r9 |
| 561 xorq -128(%rbp,%rcx,8),%r8 |
| 562 shlq $56,%r10 |
| 563 xorq (%rbp,%rcx,8),%r9 |
| 564 roll $8,%edx |
| 565 xorq 8(%rsi,%rax,1),%r8 |
| 566 xorq (%rsi,%rax,1),%r9 |
| 567 movb %dl,%al |
| 568 xorq %r10,%r8 |
| 569 movzwq (%r11,%r13,2),%r13 |
| 570 movzbl %dl,%ecx |
| 571 shlb $4,%al |
| 572 movzbq (%rsp,%rbx,1),%r12 |
| 573 shrl $4,%ecx |
| 574 shlq $48,%r13 |
| 575 xorq %r8,%r12 |
| 576 movq %r9,%r10 |
| 577 xorq %r13,%r9 |
| 578 shrq $8,%r8 |
| 579 movzbq %r12b,%r12 |
| 580 shrq $8,%r9 |
| 581 xorq -128(%rbp,%rbx,8),%r8 |
| 582 shlq $56,%r10 |
| 583 xorq (%rbp,%rbx,8),%r9 |
| 584 roll $8,%edx |
| 585 xorq 8(%rsi,%rax,1),%r8 |
| 586 xorq (%rsi,%rax,1),%r9 |
| 587 movb %dl,%al |
| 588 xorq %r10,%r8 |
| 589 movzwq (%r11,%r12,2),%r12 |
| 590 movzbl %dl,%ebx |
| 591 shlb $4,%al |
| 592 movzbq (%rsp,%rcx,1),%r13 |
| 593 shrl $4,%ebx |
| 594 shlq $48,%r12 |
| 595 xorq %r8,%r13 |
| 596 movq %r9,%r10 |
| 597 xorq %r12,%r9 |
| 598 shrq $8,%r8 |
| 599 movzbq %r13b,%r13 |
| 600 shrq $8,%r9 |
| 601 xorq -128(%rbp,%rcx,8),%r8 |
| 602 shlq $56,%r10 |
| 603 xorq (%rbp,%rcx,8),%r9 |
| 604 roll $8,%edx |
| 605 xorq 8(%rsi,%rax,1),%r8 |
| 606 xorq (%rsi,%rax,1),%r9 |
| 607 movb %dl,%al |
| 608 xorq %r10,%r8 |
| 609 movzwq (%r11,%r13,2),%r13 |
| 610 movzbl %dl,%ecx |
| 611 shlb $4,%al |
| 612 movzbq (%rsp,%rbx,1),%r12 |
| 613 andl $240,%ecx |
| 614 shlq $48,%r13 |
| 615 xorq %r8,%r12 |
| 616 movq %r9,%r10 |
| 617 xorq %r13,%r9 |
| 618 shrq $8,%r8 |
| 619 movzbq %r12b,%r12 |
| 620 movl -4(%rdi),%edx |
| 621 shrq $8,%r9 |
| 622 xorq -128(%rbp,%rbx,8),%r8 |
| 623 shlq $56,%r10 |
| 624 xorq (%rbp,%rbx,8),%r9 |
| 625 movzwq (%r11,%r12,2),%r12 |
| 626 xorq 8(%rsi,%rax,1),%r8 |
| 627 xorq (%rsi,%rax,1),%r9 |
| 628 shlq $48,%r12 |
| 629 xorq %r10,%r8 |
| 630 xorq %r12,%r9 |
| 631 movzbq %r8b,%r13 |
| 632 shrq $4,%r8 |
| 633 movq %r9,%r10 |
| 634 shlb $4,%r13b |
| 635 shrq $4,%r9 |
| 636 xorq 8(%rsi,%rcx,1),%r8 |
| 637 movzwq (%r11,%r13,2),%r13 |
| 638 shlq $60,%r10 |
| 639 xorq (%rsi,%rcx,1),%r9 |
| 640 xorq %r10,%r8 |
| 641 shlq $48,%r13 |
| 642 bswapq %r8 |
| 643 xorq %r13,%r9 |
| 644 bswapq %r9 |
| 645 cmpq %r15,%r14 |
| 646 jb L$outer_loop |
| 647 movq %r8,8(%rdi) |
| 648 movq %r9,(%rdi) |
| 649 |
| 650 leaq 280(%rsp),%rsi |
| 651 movq 0(%rsi),%r15 |
| 652 movq 8(%rsi),%r14 |
| 653 movq 16(%rsi),%r13 |
| 654 movq 24(%rsi),%r12 |
| 655 movq 32(%rsi),%rbp |
| 656 movq 40(%rsi),%rbx |
| 657 leaq 48(%rsi),%rsp |
| 658 L$ghash_epilogue: |
| 659 .byte 0xf3,0xc3 |
| 660 |
| 661 .globl _gcm_init_clmul |
| 662 .private_extern _gcm_init_clmul |
| 663 |
| 664 .p2align 4 |
| 665 _gcm_init_clmul: |
| 666 L$_init_clmul: |
| 667 movdqu (%rsi),%xmm2 |
| 668 pshufd $78,%xmm2,%xmm2 |
| 669 |
| 670 |
| 671 pshufd $255,%xmm2,%xmm4 |
| 672 movdqa %xmm2,%xmm3 |
| 673 psllq $1,%xmm2 |
| 674 pxor %xmm5,%xmm5 |
| 675 psrlq $63,%xmm3 |
| 676 pcmpgtd %xmm4,%xmm5 |
| 677 pslldq $8,%xmm3 |
| 678 por %xmm3,%xmm2 |
| 679 |
| 680 |
| 681 pand L$0x1c2_polynomial(%rip),%xmm5 |
| 682 pxor %xmm5,%xmm2 |
| 683 |
| 684 |
| 685 pshufd $78,%xmm2,%xmm6 |
| 686 movdqa %xmm2,%xmm0 |
| 687 pxor %xmm2,%xmm6 |
| 688 movdqa %xmm0,%xmm1 |
| 689 pshufd $78,%xmm0,%xmm3 |
| 690 pxor %xmm0,%xmm3 |
| 691 .byte 102,15,58,68,194,0 |
| 692 .byte 102,15,58,68,202,17 |
| 693 .byte 102,15,58,68,222,0 |
| 694 pxor %xmm0,%xmm3 |
| 695 pxor %xmm1,%xmm3 |
| 696 |
| 697 movdqa %xmm3,%xmm4 |
| 698 psrldq $8,%xmm3 |
| 699 pslldq $8,%xmm4 |
| 700 pxor %xmm3,%xmm1 |
| 701 pxor %xmm4,%xmm0 |
| 702 |
| 703 movdqa %xmm0,%xmm4 |
| 704 movdqa %xmm0,%xmm3 |
| 705 psllq $5,%xmm0 |
| 706 pxor %xmm0,%xmm3 |
| 707 psllq $1,%xmm0 |
| 708 pxor %xmm3,%xmm0 |
| 709 psllq $57,%xmm0 |
| 710 movdqa %xmm0,%xmm3 |
| 711 pslldq $8,%xmm0 |
| 712 psrldq $8,%xmm3 |
| 713 pxor %xmm4,%xmm0 |
| 714 pxor %xmm3,%xmm1 |
| 715 |
| 716 |
| 717 movdqa %xmm0,%xmm4 |
| 718 psrlq $1,%xmm0 |
| 719 pxor %xmm4,%xmm1 |
| 720 pxor %xmm0,%xmm4 |
| 721 psrlq $5,%xmm0 |
| 722 pxor %xmm4,%xmm0 |
| 723 psrlq $1,%xmm0 |
| 724 pxor %xmm1,%xmm0 |
| 725 pshufd $78,%xmm2,%xmm3 |
| 726 pshufd $78,%xmm0,%xmm4 |
| 727 pxor %xmm2,%xmm3 |
| 728 movdqu %xmm2,0(%rdi) |
| 729 pxor %xmm0,%xmm4 |
| 730 movdqu %xmm0,16(%rdi) |
| 731 .byte 102,15,58,15,227,8 |
| 732 movdqu %xmm4,32(%rdi) |
| 733 movdqa %xmm0,%xmm1 |
| 734 pshufd $78,%xmm0,%xmm3 |
| 735 pxor %xmm0,%xmm3 |
| 736 .byte 102,15,58,68,194,0 |
| 737 .byte 102,15,58,68,202,17 |
| 738 .byte 102,15,58,68,222,0 |
| 739 pxor %xmm0,%xmm3 |
| 740 pxor %xmm1,%xmm3 |
| 741 |
| 742 movdqa %xmm3,%xmm4 |
| 743 psrldq $8,%xmm3 |
| 744 pslldq $8,%xmm4 |
| 745 pxor %xmm3,%xmm1 |
| 746 pxor %xmm4,%xmm0 |
| 747 |
| 748 movdqa %xmm0,%xmm4 |
| 749 movdqa %xmm0,%xmm3 |
| 750 psllq $5,%xmm0 |
| 751 pxor %xmm0,%xmm3 |
| 752 psllq $1,%xmm0 |
| 753 pxor %xmm3,%xmm0 |
| 754 psllq $57,%xmm0 |
| 755 movdqa %xmm0,%xmm3 |
| 756 pslldq $8,%xmm0 |
| 757 psrldq $8,%xmm3 |
| 758 pxor %xmm4,%xmm0 |
| 759 pxor %xmm3,%xmm1 |
| 760 |
| 761 |
| 762 movdqa %xmm0,%xmm4 |
| 763 psrlq $1,%xmm0 |
| 764 pxor %xmm4,%xmm1 |
| 765 pxor %xmm0,%xmm4 |
| 766 psrlq $5,%xmm0 |
| 767 pxor %xmm4,%xmm0 |
| 768 psrlq $1,%xmm0 |
| 769 pxor %xmm1,%xmm0 |
| 770 movdqa %xmm0,%xmm5 |
| 771 movdqa %xmm0,%xmm1 |
| 772 pshufd $78,%xmm0,%xmm3 |
| 773 pxor %xmm0,%xmm3 |
| 774 .byte 102,15,58,68,194,0 |
| 775 .byte 102,15,58,68,202,17 |
| 776 .byte 102,15,58,68,222,0 |
| 777 pxor %xmm0,%xmm3 |
| 778 pxor %xmm1,%xmm3 |
| 779 |
| 780 movdqa %xmm3,%xmm4 |
| 781 psrldq $8,%xmm3 |
| 782 pslldq $8,%xmm4 |
| 783 pxor %xmm3,%xmm1 |
| 784 pxor %xmm4,%xmm0 |
| 785 |
| 786 movdqa %xmm0,%xmm4 |
| 787 movdqa %xmm0,%xmm3 |
| 788 psllq $5,%xmm0 |
| 789 pxor %xmm0,%xmm3 |
| 790 psllq $1,%xmm0 |
| 791 pxor %xmm3,%xmm0 |
| 792 psllq $57,%xmm0 |
| 793 movdqa %xmm0,%xmm3 |
| 794 pslldq $8,%xmm0 |
| 795 psrldq $8,%xmm3 |
| 796 pxor %xmm4,%xmm0 |
| 797 pxor %xmm3,%xmm1 |
| 798 |
| 799 |
| 800 movdqa %xmm0,%xmm4 |
| 801 psrlq $1,%xmm0 |
| 802 pxor %xmm4,%xmm1 |
| 803 pxor %xmm0,%xmm4 |
| 804 psrlq $5,%xmm0 |
| 805 pxor %xmm4,%xmm0 |
| 806 psrlq $1,%xmm0 |
| 807 pxor %xmm1,%xmm0 |
| 808 pshufd $78,%xmm5,%xmm3 |
| 809 pshufd $78,%xmm0,%xmm4 |
| 810 pxor %xmm5,%xmm3 |
| 811 movdqu %xmm5,48(%rdi) |
| 812 pxor %xmm0,%xmm4 |
| 813 movdqu %xmm0,64(%rdi) |
| 814 .byte 102,15,58,15,227,8 |
| 815 movdqu %xmm4,80(%rdi) |
| 816 .byte 0xf3,0xc3 |
| 817 |
| 818 .globl _gcm_gmult_clmul |
| 819 .private_extern _gcm_gmult_clmul |
| 820 |
| 821 .p2align 4 |
| 822 _gcm_gmult_clmul: |
| 823 L$_gmult_clmul: |
| 824 movdqu (%rdi),%xmm0 |
| 825 movdqa L$bswap_mask(%rip),%xmm5 |
| 826 movdqu (%rsi),%xmm2 |
| 827 movdqu 32(%rsi),%xmm4 |
| 828 .byte 102,15,56,0,197 |
| 829 movdqa %xmm0,%xmm1 |
| 830 pshufd $78,%xmm0,%xmm3 |
| 831 pxor %xmm0,%xmm3 |
| 832 .byte 102,15,58,68,194,0 |
| 833 .byte 102,15,58,68,202,17 |
| 834 .byte 102,15,58,68,220,0 |
| 835 pxor %xmm0,%xmm3 |
| 836 pxor %xmm1,%xmm3 |
| 837 |
| 838 movdqa %xmm3,%xmm4 |
| 839 psrldq $8,%xmm3 |
| 840 pslldq $8,%xmm4 |
| 841 pxor %xmm3,%xmm1 |
| 842 pxor %xmm4,%xmm0 |
| 843 |
| 844 movdqa %xmm0,%xmm4 |
| 845 movdqa %xmm0,%xmm3 |
| 846 psllq $5,%xmm0 |
| 847 pxor %xmm0,%xmm3 |
| 848 psllq $1,%xmm0 |
| 849 pxor %xmm3,%xmm0 |
| 850 psllq $57,%xmm0 |
| 851 movdqa %xmm0,%xmm3 |
| 852 pslldq $8,%xmm0 |
| 853 psrldq $8,%xmm3 |
| 854 pxor %xmm4,%xmm0 |
| 855 pxor %xmm3,%xmm1 |
| 856 |
| 857 |
| 858 movdqa %xmm0,%xmm4 |
| 859 psrlq $1,%xmm0 |
| 860 pxor %xmm4,%xmm1 |
| 861 pxor %xmm0,%xmm4 |
| 862 psrlq $5,%xmm0 |
| 863 pxor %xmm4,%xmm0 |
| 864 psrlq $1,%xmm0 |
| 865 pxor %xmm1,%xmm0 |
| 866 .byte 102,15,56,0,197 |
| 867 movdqu %xmm0,(%rdi) |
| 868 .byte 0xf3,0xc3 |
| 869 |
| 870 .globl _gcm_ghash_clmul |
| 871 .private_extern _gcm_ghash_clmul |
| 872 |
| 873 .p2align 5 |
| 874 _gcm_ghash_clmul: |
| 875 L$_ghash_clmul: |
| 876 movdqa L$bswap_mask(%rip),%xmm10 |
| 877 |
| 878 movdqu (%rdi),%xmm0 |
| 879 movdqu (%rsi),%xmm2 |
| 880 movdqu 32(%rsi),%xmm7 |
| 881 .byte 102,65,15,56,0,194 |
| 882 |
| 883 subq $16,%rcx |
| 884 jz L$odd_tail |
| 885 |
| 886 movdqu 16(%rsi),%xmm6 |
| 887 movl _OPENSSL_ia32cap_P+4(%rip),%eax |
| 888 cmpq $48,%rcx |
| 889 jb L$skip4x |
| 890 |
| 891 andl $71303168,%eax |
| 892 cmpl $4194304,%eax |
| 893 je L$skip4x |
| 894 |
| 895 subq $48,%rcx |
| 896 movq $11547335547999543296,%rax |
| 897 movdqu 48(%rsi),%xmm14 |
| 898 movdqu 64(%rsi),%xmm15 |
| 899 |
| 900 |
| 901 |
| 902 |
| 903 movdqu 48(%rdx),%xmm3 |
| 904 movdqu 32(%rdx),%xmm11 |
| 905 .byte 102,65,15,56,0,218 |
| 906 .byte 102,69,15,56,0,218 |
| 907 movdqa %xmm3,%xmm5 |
| 908 pshufd $78,%xmm3,%xmm4 |
| 909 pxor %xmm3,%xmm4 |
| 910 .byte 102,15,58,68,218,0 |
| 911 .byte 102,15,58,68,234,17 |
| 912 .byte 102,15,58,68,231,0 |
| 913 |
| 914 movdqa %xmm11,%xmm13 |
| 915 pshufd $78,%xmm11,%xmm12 |
| 916 pxor %xmm11,%xmm12 |
| 917 .byte 102,68,15,58,68,222,0 |
| 918 .byte 102,68,15,58,68,238,17 |
| 919 .byte 102,68,15,58,68,231,16 |
| 920 xorps %xmm11,%xmm3 |
| 921 xorps %xmm13,%xmm5 |
| 922 movups 80(%rsi),%xmm7 |
| 923 xorps %xmm12,%xmm4 |
| 924 |
| 925 movdqu 16(%rdx),%xmm11 |
| 926 movdqu 0(%rdx),%xmm8 |
| 927 .byte 102,69,15,56,0,218 |
| 928 .byte 102,69,15,56,0,194 |
| 929 movdqa %xmm11,%xmm13 |
| 930 pshufd $78,%xmm11,%xmm12 |
| 931 pxor %xmm8,%xmm0 |
| 932 pxor %xmm11,%xmm12 |
| 933 .byte 102,69,15,58,68,222,0 |
| 934 movdqa %xmm0,%xmm1 |
| 935 pshufd $78,%xmm0,%xmm8 |
| 936 pxor %xmm0,%xmm8 |
| 937 .byte 102,69,15,58,68,238,17 |
| 938 .byte 102,68,15,58,68,231,0 |
| 939 xorps %xmm11,%xmm3 |
| 940 xorps %xmm13,%xmm5 |
| 941 |
| 942 leaq 64(%rdx),%rdx |
| 943 subq $64,%rcx |
| 944 jc L$tail4x |
| 945 |
| 946 jmp L$mod4_loop |
| 947 .p2align 5 |
| 948 L$mod4_loop: |
| 949 .byte 102,65,15,58,68,199,0 |
| 950 xorps %xmm12,%xmm4 |
| 951 movdqu 48(%rdx),%xmm11 |
| 952 .byte 102,69,15,56,0,218 |
| 953 .byte 102,65,15,58,68,207,17 |
| 954 xorps %xmm3,%xmm0 |
| 955 movdqu 32(%rdx),%xmm3 |
| 956 movdqa %xmm11,%xmm13 |
| 957 .byte 102,68,15,58,68,199,16 |
| 958 pshufd $78,%xmm11,%xmm12 |
| 959 xorps %xmm5,%xmm1 |
| 960 pxor %xmm11,%xmm12 |
| 961 .byte 102,65,15,56,0,218 |
| 962 movups 32(%rsi),%xmm7 |
| 963 xorps %xmm4,%xmm8 |
| 964 .byte 102,68,15,58,68,218,0 |
| 965 pshufd $78,%xmm3,%xmm4 |
| 966 |
| 967 pxor %xmm0,%xmm8 |
| 968 movdqa %xmm3,%xmm5 |
| 969 pxor %xmm1,%xmm8 |
| 970 pxor %xmm3,%xmm4 |
| 971 movdqa %xmm8,%xmm9 |
| 972 .byte 102,68,15,58,68,234,17 |
| 973 pslldq $8,%xmm8 |
| 974 psrldq $8,%xmm9 |
| 975 pxor %xmm8,%xmm0 |
| 976 movdqa L$7_mask(%rip),%xmm8 |
| 977 pxor %xmm9,%xmm1 |
| 978 .byte 102,76,15,110,200 |
| 979 |
| 980 pand %xmm0,%xmm8 |
| 981 .byte 102,69,15,56,0,200 |
| 982 pxor %xmm0,%xmm9 |
| 983 .byte 102,68,15,58,68,231,0 |
| 984 psllq $57,%xmm9 |
| 985 movdqa %xmm9,%xmm8 |
| 986 pslldq $8,%xmm9 |
| 987 .byte 102,15,58,68,222,0 |
| 988 psrldq $8,%xmm8 |
| 989 pxor %xmm9,%xmm0 |
| 990 pxor %xmm8,%xmm1 |
| 991 movdqu 0(%rdx),%xmm8 |
| 992 |
| 993 movdqa %xmm0,%xmm9 |
| 994 psrlq $1,%xmm0 |
| 995 .byte 102,15,58,68,238,17 |
| 996 xorps %xmm11,%xmm3 |
| 997 movdqu 16(%rdx),%xmm11 |
| 998 .byte 102,69,15,56,0,218 |
| 999 .byte 102,15,58,68,231,16 |
| 1000 xorps %xmm13,%xmm5 |
| 1001 movups 80(%rsi),%xmm7 |
| 1002 .byte 102,69,15,56,0,194 |
| 1003 pxor %xmm9,%xmm1 |
| 1004 pxor %xmm0,%xmm9 |
| 1005 psrlq $5,%xmm0 |
| 1006 |
| 1007 movdqa %xmm11,%xmm13 |
| 1008 pxor %xmm12,%xmm4 |
| 1009 pshufd $78,%xmm11,%xmm12 |
| 1010 pxor %xmm9,%xmm0 |
| 1011 pxor %xmm8,%xmm1 |
| 1012 pxor %xmm11,%xmm12 |
| 1013 .byte 102,69,15,58,68,222,0 |
| 1014 psrlq $1,%xmm0 |
| 1015 pxor %xmm1,%xmm0 |
| 1016 movdqa %xmm0,%xmm1 |
| 1017 .byte 102,69,15,58,68,238,17 |
| 1018 xorps %xmm11,%xmm3 |
| 1019 pshufd $78,%xmm0,%xmm8 |
| 1020 pxor %xmm0,%xmm8 |
| 1021 |
| 1022 .byte 102,68,15,58,68,231,0 |
| 1023 xorps %xmm13,%xmm5 |
| 1024 |
| 1025 leaq 64(%rdx),%rdx |
| 1026 subq $64,%rcx |
| 1027 jnc L$mod4_loop |
| 1028 |
| 1029 L$tail4x: |
| 1030 .byte 102,65,15,58,68,199,0 |
| 1031 .byte 102,65,15,58,68,207,17 |
| 1032 .byte 102,68,15,58,68,199,16 |
| 1033 xorps %xmm12,%xmm4 |
| 1034 xorps %xmm3,%xmm0 |
| 1035 xorps %xmm5,%xmm1 |
| 1036 pxor %xmm0,%xmm1 |
| 1037 pxor %xmm4,%xmm8 |
| 1038 |
| 1039 pxor %xmm1,%xmm8 |
| 1040 pxor %xmm0,%xmm1 |
| 1041 |
| 1042 movdqa %xmm8,%xmm9 |
| 1043 psrldq $8,%xmm8 |
| 1044 pslldq $8,%xmm9 |
| 1045 pxor %xmm8,%xmm1 |
| 1046 pxor %xmm9,%xmm0 |
| 1047 |
| 1048 movdqa %xmm0,%xmm4 |
| 1049 movdqa %xmm0,%xmm3 |
| 1050 psllq $5,%xmm0 |
| 1051 pxor %xmm0,%xmm3 |
| 1052 psllq $1,%xmm0 |
| 1053 pxor %xmm3,%xmm0 |
| 1054 psllq $57,%xmm0 |
| 1055 movdqa %xmm0,%xmm3 |
| 1056 pslldq $8,%xmm0 |
| 1057 psrldq $8,%xmm3 |
| 1058 pxor %xmm4,%xmm0 |
| 1059 pxor %xmm3,%xmm1 |
| 1060 |
| 1061 |
| 1062 movdqa %xmm0,%xmm4 |
| 1063 psrlq $1,%xmm0 |
| 1064 pxor %xmm4,%xmm1 |
| 1065 pxor %xmm0,%xmm4 |
| 1066 psrlq $5,%xmm0 |
| 1067 pxor %xmm4,%xmm0 |
| 1068 psrlq $1,%xmm0 |
| 1069 pxor %xmm1,%xmm0 |
| 1070 addq $64,%rcx |
| 1071 jz L$done |
| 1072 movdqu 32(%rsi),%xmm7 |
| 1073 subq $16,%rcx |
| 1074 jz L$odd_tail |
| 1075 L$skip4x: |
| 1076 |
| 1077 |
| 1078 |
| 1079 |
| 1080 |
| 1081 movdqu (%rdx),%xmm8 |
| 1082 movdqu 16(%rdx),%xmm3 |
| 1083 .byte 102,69,15,56,0,194 |
| 1084 .byte 102,65,15,56,0,218 |
| 1085 pxor %xmm8,%xmm0 |
| 1086 |
| 1087 movdqa %xmm3,%xmm5 |
| 1088 pshufd $78,%xmm3,%xmm4 |
| 1089 pxor %xmm3,%xmm4 |
| 1090 .byte 102,15,58,68,218,0 |
| 1091 .byte 102,15,58,68,234,17 |
| 1092 .byte 102,15,58,68,231,0 |
| 1093 |
| 1094 leaq 32(%rdx),%rdx |
| 1095 nop |
| 1096 subq $32,%rcx |
| 1097 jbe L$even_tail |
| 1098 nop |
| 1099 jmp L$mod_loop |
| 1100 |
| 1101 .p2align 5 |
| 1102 L$mod_loop: |
| 1103 movdqa %xmm0,%xmm1 |
| 1104 movdqa %xmm4,%xmm8 |
| 1105 pshufd $78,%xmm0,%xmm4 |
| 1106 pxor %xmm0,%xmm4 |
| 1107 |
| 1108 .byte 102,15,58,68,198,0 |
| 1109 .byte 102,15,58,68,206,17 |
| 1110 .byte 102,15,58,68,231,16 |
| 1111 |
| 1112 pxor %xmm3,%xmm0 |
| 1113 pxor %xmm5,%xmm1 |
| 1114 movdqu (%rdx),%xmm9 |
| 1115 pxor %xmm0,%xmm8 |
| 1116 .byte 102,69,15,56,0,202 |
| 1117 movdqu 16(%rdx),%xmm3 |
| 1118 |
| 1119 pxor %xmm1,%xmm8 |
| 1120 pxor %xmm9,%xmm1 |
| 1121 pxor %xmm8,%xmm4 |
| 1122 .byte 102,65,15,56,0,218 |
| 1123 movdqa %xmm4,%xmm8 |
| 1124 psrldq $8,%xmm8 |
| 1125 pslldq $8,%xmm4 |
| 1126 pxor %xmm8,%xmm1 |
| 1127 pxor %xmm4,%xmm0 |
| 1128 |
| 1129 movdqa %xmm3,%xmm5 |
| 1130 |
| 1131 movdqa %xmm0,%xmm9 |
| 1132 movdqa %xmm0,%xmm8 |
| 1133 psllq $5,%xmm0 |
| 1134 pxor %xmm0,%xmm8 |
| 1135 .byte 102,15,58,68,218,0 |
| 1136 psllq $1,%xmm0 |
| 1137 pxor %xmm8,%xmm0 |
| 1138 psllq $57,%xmm0 |
| 1139 movdqa %xmm0,%xmm8 |
| 1140 pslldq $8,%xmm0 |
| 1141 psrldq $8,%xmm8 |
| 1142 pxor %xmm9,%xmm0 |
| 1143 pshufd $78,%xmm5,%xmm4 |
| 1144 pxor %xmm8,%xmm1 |
| 1145 pxor %xmm5,%xmm4 |
| 1146 |
| 1147 movdqa %xmm0,%xmm9 |
| 1148 psrlq $1,%xmm0 |
| 1149 .byte 102,15,58,68,234,17 |
| 1150 pxor %xmm9,%xmm1 |
| 1151 pxor %xmm0,%xmm9 |
| 1152 psrlq $5,%xmm0 |
| 1153 pxor %xmm9,%xmm0 |
| 1154 leaq 32(%rdx),%rdx |
| 1155 psrlq $1,%xmm0 |
| 1156 .byte 102,15,58,68,231,0 |
| 1157 pxor %xmm1,%xmm0 |
| 1158 |
| 1159 subq $32,%rcx |
| 1160 ja L$mod_loop |
| 1161 |
| 1162 L$even_tail: |
| 1163 movdqa %xmm0,%xmm1 |
| 1164 movdqa %xmm4,%xmm8 |
| 1165 pshufd $78,%xmm0,%xmm4 |
| 1166 pxor %xmm0,%xmm4 |
| 1167 |
| 1168 .byte 102,15,58,68,198,0 |
| 1169 .byte 102,15,58,68,206,17 |
| 1170 .byte 102,15,58,68,231,16 |
| 1171 |
| 1172 pxor %xmm3,%xmm0 |
| 1173 pxor %xmm5,%xmm1 |
| 1174 pxor %xmm0,%xmm8 |
| 1175 pxor %xmm1,%xmm8 |
| 1176 pxor %xmm8,%xmm4 |
| 1177 movdqa %xmm4,%xmm8 |
| 1178 psrldq $8,%xmm8 |
| 1179 pslldq $8,%xmm4 |
| 1180 pxor %xmm8,%xmm1 |
| 1181 pxor %xmm4,%xmm0 |
| 1182 |
| 1183 movdqa %xmm0,%xmm4 |
| 1184 movdqa %xmm0,%xmm3 |
| 1185 psllq $5,%xmm0 |
| 1186 pxor %xmm0,%xmm3 |
| 1187 psllq $1,%xmm0 |
| 1188 pxor %xmm3,%xmm0 |
| 1189 psllq $57,%xmm0 |
| 1190 movdqa %xmm0,%xmm3 |
| 1191 pslldq $8,%xmm0 |
| 1192 psrldq $8,%xmm3 |
| 1193 pxor %xmm4,%xmm0 |
| 1194 pxor %xmm3,%xmm1 |
| 1195 |
| 1196 |
| 1197 movdqa %xmm0,%xmm4 |
| 1198 psrlq $1,%xmm0 |
| 1199 pxor %xmm4,%xmm1 |
| 1200 pxor %xmm0,%xmm4 |
| 1201 psrlq $5,%xmm0 |
| 1202 pxor %xmm4,%xmm0 |
| 1203 psrlq $1,%xmm0 |
| 1204 pxor %xmm1,%xmm0 |
| 1205 testq %rcx,%rcx |
| 1206 jnz L$done |
| 1207 |
| 1208 L$odd_tail: |
| 1209 movdqu (%rdx),%xmm8 |
| 1210 .byte 102,69,15,56,0,194 |
| 1211 pxor %xmm8,%xmm0 |
| 1212 movdqa %xmm0,%xmm1 |
| 1213 pshufd $78,%xmm0,%xmm3 |
| 1214 pxor %xmm0,%xmm3 |
| 1215 .byte 102,15,58,68,194,0 |
| 1216 .byte 102,15,58,68,202,17 |
| 1217 .byte 102,15,58,68,223,0 |
| 1218 pxor %xmm0,%xmm3 |
| 1219 pxor %xmm1,%xmm3 |
| 1220 |
| 1221 movdqa %xmm3,%xmm4 |
| 1222 psrldq $8,%xmm3 |
| 1223 pslldq $8,%xmm4 |
| 1224 pxor %xmm3,%xmm1 |
| 1225 pxor %xmm4,%xmm0 |
| 1226 |
| 1227 movdqa %xmm0,%xmm4 |
| 1228 movdqa %xmm0,%xmm3 |
| 1229 psllq $5,%xmm0 |
| 1230 pxor %xmm0,%xmm3 |
| 1231 psllq $1,%xmm0 |
| 1232 pxor %xmm3,%xmm0 |
| 1233 psllq $57,%xmm0 |
| 1234 movdqa %xmm0,%xmm3 |
| 1235 pslldq $8,%xmm0 |
| 1236 psrldq $8,%xmm3 |
| 1237 pxor %xmm4,%xmm0 |
| 1238 pxor %xmm3,%xmm1 |
| 1239 |
| 1240 |
| 1241 movdqa %xmm0,%xmm4 |
| 1242 psrlq $1,%xmm0 |
| 1243 pxor %xmm4,%xmm1 |
| 1244 pxor %xmm0,%xmm4 |
| 1245 psrlq $5,%xmm0 |
| 1246 pxor %xmm4,%xmm0 |
| 1247 psrlq $1,%xmm0 |
| 1248 pxor %xmm1,%xmm0 |
| 1249 L$done: |
| 1250 .byte 102,65,15,56,0,194 |
| 1251 movdqu %xmm0,(%rdi) |
| 1252 .byte 0xf3,0xc3 |
| 1253 |
| 1254 .globl _gcm_init_avx |
| 1255 .private_extern _gcm_init_avx |
| 1256 |
| 1257 .p2align 5 |
| 1258 _gcm_init_avx: |
| 1259 jmp L$_init_clmul |
| 1260 |
| 1261 .globl _gcm_gmult_avx |
| 1262 .private_extern _gcm_gmult_avx |
| 1263 |
| 1264 .p2align 5 |
| 1265 _gcm_gmult_avx: |
| 1266 jmp L$_gmult_clmul |
| 1267 |
| 1268 .globl _gcm_ghash_avx |
| 1269 .private_extern _gcm_ghash_avx |
| 1270 |
| 1271 .p2align 5 |
| 1272 _gcm_ghash_avx: |
| 1273 jmp L$_ghash_clmul |
| 1274 |
| 1275 .p2align 6 |
| 1276 L$bswap_mask: |
| 1277 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 |
| 1278 L$0x1c2_polynomial: |
| 1279 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 |
| 1280 L$7_mask: |
| 1281 .long 7,0,7,0 |
| 1282 L$7_mask_poly: |
| 1283 .long 7,0,450,0 |
| 1284 .p2align 6 |
| 1285 |
| 1286 L$rem_4bit: |
| 1287 .long 0,0,0,471859200,0,943718400,0,610271232 |
| 1288 .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 |
| 1289 .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 |
| 1290 .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 |
| 1291 |
| 1292 L$rem_8bit: |
| 1293 .value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E |
| 1294 .value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E |
| 1295 .value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E |
| 1296 .value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E |
| 1297 .value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E |
| 1298 .value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E |
| 1299 .value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E |
| 1300 .value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E |
| 1301 .value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE |
| 1302 .value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE |
| 1303 .value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE |
| 1304 .value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE |
| 1305 .value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E |
| 1306 .value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E |
| 1307 .value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE |
| 1308 .value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE |
| 1309 .value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E |
| 1310 .value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E |
| 1311 .value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E |
| 1312 .value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E |
| 1313 .value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E |
| 1314 .value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E |
| 1315 .value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E |
| 1316 .value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E |
| 1317 .value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE |
| 1318 .value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE |
| 1319 .value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE |
| 1320 .value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE |
| 1321 .value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E |
| 1322 .value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E |
| 1323 .value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE |
| 1324 .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE |
| 1325 |
| 1326 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84
,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10
8,46,111,114,103,62,0 |
| 1327 .p2align 6 |
| 1328 #endif |
OLD | NEW |