OLD | NEW |
(Empty) | |
| 1 #if defined(__x86_64__) |
| 2 .text |
| 3 |
| 4 |
| 5 .globl _gcm_gmult_4bit |
| 6 |
| 7 .p2align 4 |
| 8 _gcm_gmult_4bit: |
| 9 pushq %rbx |
| 10 pushq %rbp |
| 11 pushq %r12 |
| 12 L$gmult_prologue: |
| 13 |
| 14 movzbq 15(%rdi),%r8 |
| 15 leaq L$rem_4bit(%rip),%r11 |
| 16 xorq %rax,%rax |
| 17 xorq %rbx,%rbx |
| 18 movb %r8b,%al |
| 19 movb %r8b,%bl |
| 20 shlb $4,%al |
| 21 movq $14,%rcx |
| 22 movq 8(%rsi,%rax,1),%r8 |
| 23 movq (%rsi,%rax,1),%r9 |
| 24 andb $240,%bl |
| 25 movq %r8,%rdx |
| 26 jmp L$oop1 |
| 27 |
| 28 .p2align 4 |
| 29 L$oop1: |
| 30 shrq $4,%r8 |
| 31 andq $15,%rdx |
| 32 movq %r9,%r10 |
| 33 movb (%rdi,%rcx,1),%al |
| 34 shrq $4,%r9 |
| 35 xorq 8(%rsi,%rbx,1),%r8 |
| 36 shlq $60,%r10 |
| 37 xorq (%rsi,%rbx,1),%r9 |
| 38 movb %al,%bl |
| 39 xorq (%r11,%rdx,8),%r9 |
| 40 movq %r8,%rdx |
| 41 shlb $4,%al |
| 42 xorq %r10,%r8 |
| 43 decq %rcx |
| 44 js L$break1 |
| 45 |
| 46 shrq $4,%r8 |
| 47 andq $15,%rdx |
| 48 movq %r9,%r10 |
| 49 shrq $4,%r9 |
| 50 xorq 8(%rsi,%rax,1),%r8 |
| 51 shlq $60,%r10 |
| 52 xorq (%rsi,%rax,1),%r9 |
| 53 andb $240,%bl |
| 54 xorq (%r11,%rdx,8),%r9 |
| 55 movq %r8,%rdx |
| 56 xorq %r10,%r8 |
| 57 jmp L$oop1 |
| 58 |
| 59 .p2align 4 |
| 60 L$break1: |
| 61 shrq $4,%r8 |
| 62 andq $15,%rdx |
| 63 movq %r9,%r10 |
| 64 shrq $4,%r9 |
| 65 xorq 8(%rsi,%rax,1),%r8 |
| 66 shlq $60,%r10 |
| 67 xorq (%rsi,%rax,1),%r9 |
| 68 andb $240,%bl |
| 69 xorq (%r11,%rdx,8),%r9 |
| 70 movq %r8,%rdx |
| 71 xorq %r10,%r8 |
| 72 |
| 73 shrq $4,%r8 |
| 74 andq $15,%rdx |
| 75 movq %r9,%r10 |
| 76 shrq $4,%r9 |
| 77 xorq 8(%rsi,%rbx,1),%r8 |
| 78 shlq $60,%r10 |
| 79 xorq (%rsi,%rbx,1),%r9 |
| 80 xorq %r10,%r8 |
| 81 xorq (%r11,%rdx,8),%r9 |
| 82 |
| 83 bswapq %r8 |
| 84 bswapq %r9 |
| 85 movq %r8,8(%rdi) |
| 86 movq %r9,(%rdi) |
| 87 |
| 88 movq 16(%rsp),%rbx |
| 89 leaq 24(%rsp),%rsp |
| 90 L$gmult_epilogue: |
| 91 .byte 0xf3,0xc3 |
| 92 |
| 93 .globl _gcm_ghash_4bit |
| 94 |
| 95 .p2align 4 |
| 96 _gcm_ghash_4bit: |
| 97 pushq %rbx |
| 98 pushq %rbp |
| 99 pushq %r12 |
| 100 pushq %r13 |
| 101 pushq %r14 |
| 102 pushq %r15 |
| 103 subq $280,%rsp |
| 104 L$ghash_prologue: |
| 105 movq %rdx,%r14 |
| 106 movq %rcx,%r15 |
| 107 subq $-128,%rsi |
| 108 leaq 16+128(%rsp),%rbp |
| 109 xorl %edx,%edx |
| 110 movq 0+0-128(%rsi),%r8 |
| 111 movq 0+8-128(%rsi),%rax |
| 112 movb %al,%dl |
| 113 shrq $4,%rax |
| 114 movq %r8,%r10 |
| 115 shrq $4,%r8 |
| 116 movq 16+0-128(%rsi),%r9 |
| 117 shlb $4,%dl |
| 118 movq 16+8-128(%rsi),%rbx |
| 119 shlq $60,%r10 |
| 120 movb %dl,0(%rsp) |
| 121 orq %r10,%rax |
| 122 movb %bl,%dl |
| 123 shrq $4,%rbx |
| 124 movq %r9,%r10 |
| 125 shrq $4,%r9 |
| 126 movq %r8,0(%rbp) |
| 127 movq 32+0-128(%rsi),%r8 |
| 128 shlb $4,%dl |
| 129 movq %rax,0-128(%rbp) |
| 130 movq 32+8-128(%rsi),%rax |
| 131 shlq $60,%r10 |
| 132 movb %dl,1(%rsp) |
| 133 orq %r10,%rbx |
| 134 movb %al,%dl |
| 135 shrq $4,%rax |
| 136 movq %r8,%r10 |
| 137 shrq $4,%r8 |
| 138 movq %r9,8(%rbp) |
| 139 movq 48+0-128(%rsi),%r9 |
| 140 shlb $4,%dl |
| 141 movq %rbx,8-128(%rbp) |
| 142 movq 48+8-128(%rsi),%rbx |
| 143 shlq $60,%r10 |
| 144 movb %dl,2(%rsp) |
| 145 orq %r10,%rax |
| 146 movb %bl,%dl |
| 147 shrq $4,%rbx |
| 148 movq %r9,%r10 |
| 149 shrq $4,%r9 |
| 150 movq %r8,16(%rbp) |
| 151 movq 64+0-128(%rsi),%r8 |
| 152 shlb $4,%dl |
| 153 movq %rax,16-128(%rbp) |
| 154 movq 64+8-128(%rsi),%rax |
| 155 shlq $60,%r10 |
| 156 movb %dl,3(%rsp) |
| 157 orq %r10,%rbx |
| 158 movb %al,%dl |
| 159 shrq $4,%rax |
| 160 movq %r8,%r10 |
| 161 shrq $4,%r8 |
| 162 movq %r9,24(%rbp) |
| 163 movq 80+0-128(%rsi),%r9 |
| 164 shlb $4,%dl |
| 165 movq %rbx,24-128(%rbp) |
| 166 movq 80+8-128(%rsi),%rbx |
| 167 shlq $60,%r10 |
| 168 movb %dl,4(%rsp) |
| 169 orq %r10,%rax |
| 170 movb %bl,%dl |
| 171 shrq $4,%rbx |
| 172 movq %r9,%r10 |
| 173 shrq $4,%r9 |
| 174 movq %r8,32(%rbp) |
| 175 movq 96+0-128(%rsi),%r8 |
| 176 shlb $4,%dl |
| 177 movq %rax,32-128(%rbp) |
| 178 movq 96+8-128(%rsi),%rax |
| 179 shlq $60,%r10 |
| 180 movb %dl,5(%rsp) |
| 181 orq %r10,%rbx |
| 182 movb %al,%dl |
| 183 shrq $4,%rax |
| 184 movq %r8,%r10 |
| 185 shrq $4,%r8 |
| 186 movq %r9,40(%rbp) |
| 187 movq 112+0-128(%rsi),%r9 |
| 188 shlb $4,%dl |
| 189 movq %rbx,40-128(%rbp) |
| 190 movq 112+8-128(%rsi),%rbx |
| 191 shlq $60,%r10 |
| 192 movb %dl,6(%rsp) |
| 193 orq %r10,%rax |
| 194 movb %bl,%dl |
| 195 shrq $4,%rbx |
| 196 movq %r9,%r10 |
| 197 shrq $4,%r9 |
| 198 movq %r8,48(%rbp) |
| 199 movq 128+0-128(%rsi),%r8 |
| 200 shlb $4,%dl |
| 201 movq %rax,48-128(%rbp) |
| 202 movq 128+8-128(%rsi),%rax |
| 203 shlq $60,%r10 |
| 204 movb %dl,7(%rsp) |
| 205 orq %r10,%rbx |
| 206 movb %al,%dl |
| 207 shrq $4,%rax |
| 208 movq %r8,%r10 |
| 209 shrq $4,%r8 |
| 210 movq %r9,56(%rbp) |
| 211 movq 144+0-128(%rsi),%r9 |
| 212 shlb $4,%dl |
| 213 movq %rbx,56-128(%rbp) |
| 214 movq 144+8-128(%rsi),%rbx |
| 215 shlq $60,%r10 |
| 216 movb %dl,8(%rsp) |
| 217 orq %r10,%rax |
| 218 movb %bl,%dl |
| 219 shrq $4,%rbx |
| 220 movq %r9,%r10 |
| 221 shrq $4,%r9 |
| 222 movq %r8,64(%rbp) |
| 223 movq 160+0-128(%rsi),%r8 |
| 224 shlb $4,%dl |
| 225 movq %rax,64-128(%rbp) |
| 226 movq 160+8-128(%rsi),%rax |
| 227 shlq $60,%r10 |
| 228 movb %dl,9(%rsp) |
| 229 orq %r10,%rbx |
| 230 movb %al,%dl |
| 231 shrq $4,%rax |
| 232 movq %r8,%r10 |
| 233 shrq $4,%r8 |
| 234 movq %r9,72(%rbp) |
| 235 movq 176+0-128(%rsi),%r9 |
| 236 shlb $4,%dl |
| 237 movq %rbx,72-128(%rbp) |
| 238 movq 176+8-128(%rsi),%rbx |
| 239 shlq $60,%r10 |
| 240 movb %dl,10(%rsp) |
| 241 orq %r10,%rax |
| 242 movb %bl,%dl |
| 243 shrq $4,%rbx |
| 244 movq %r9,%r10 |
| 245 shrq $4,%r9 |
| 246 movq %r8,80(%rbp) |
| 247 movq 192+0-128(%rsi),%r8 |
| 248 shlb $4,%dl |
| 249 movq %rax,80-128(%rbp) |
| 250 movq 192+8-128(%rsi),%rax |
| 251 shlq $60,%r10 |
| 252 movb %dl,11(%rsp) |
| 253 orq %r10,%rbx |
| 254 movb %al,%dl |
| 255 shrq $4,%rax |
| 256 movq %r8,%r10 |
| 257 shrq $4,%r8 |
| 258 movq %r9,88(%rbp) |
| 259 movq 208+0-128(%rsi),%r9 |
| 260 shlb $4,%dl |
| 261 movq %rbx,88-128(%rbp) |
| 262 movq 208+8-128(%rsi),%rbx |
| 263 shlq $60,%r10 |
| 264 movb %dl,12(%rsp) |
| 265 orq %r10,%rax |
| 266 movb %bl,%dl |
| 267 shrq $4,%rbx |
| 268 movq %r9,%r10 |
| 269 shrq $4,%r9 |
| 270 movq %r8,96(%rbp) |
| 271 movq 224+0-128(%rsi),%r8 |
| 272 shlb $4,%dl |
| 273 movq %rax,96-128(%rbp) |
| 274 movq 224+8-128(%rsi),%rax |
| 275 shlq $60,%r10 |
| 276 movb %dl,13(%rsp) |
| 277 orq %r10,%rbx |
| 278 movb %al,%dl |
| 279 shrq $4,%rax |
| 280 movq %r8,%r10 |
| 281 shrq $4,%r8 |
| 282 movq %r9,104(%rbp) |
| 283 movq 240+0-128(%rsi),%r9 |
| 284 shlb $4,%dl |
| 285 movq %rbx,104-128(%rbp) |
| 286 movq 240+8-128(%rsi),%rbx |
| 287 shlq $60,%r10 |
| 288 movb %dl,14(%rsp) |
| 289 orq %r10,%rax |
| 290 movb %bl,%dl |
| 291 shrq $4,%rbx |
| 292 movq %r9,%r10 |
| 293 shrq $4,%r9 |
| 294 movq %r8,112(%rbp) |
| 295 shlb $4,%dl |
| 296 movq %rax,112-128(%rbp) |
| 297 shlq $60,%r10 |
| 298 movb %dl,15(%rsp) |
| 299 orq %r10,%rbx |
| 300 movq %r9,120(%rbp) |
| 301 movq %rbx,120-128(%rbp) |
| 302 addq $-128,%rsi |
| 303 movq 8(%rdi),%r8 |
| 304 movq 0(%rdi),%r9 |
| 305 addq %r14,%r15 |
| 306 leaq L$rem_8bit(%rip),%r11 |
| 307 jmp L$outer_loop |
| 308 .p2align 4 |
| 309 L$outer_loop: |
| 310 xorq (%r14),%r9 |
| 311 movq 8(%r14),%rdx |
| 312 leaq 16(%r14),%r14 |
| 313 xorq %r8,%rdx |
| 314 movq %r9,(%rdi) |
| 315 movq %rdx,8(%rdi) |
| 316 shrq $32,%rdx |
| 317 xorq %rax,%rax |
| 318 roll $8,%edx |
| 319 movb %dl,%al |
| 320 movzbl %dl,%ebx |
| 321 shlb $4,%al |
| 322 shrl $4,%ebx |
| 323 roll $8,%edx |
| 324 movq 8(%rsi,%rax,1),%r8 |
| 325 movq (%rsi,%rax,1),%r9 |
| 326 movb %dl,%al |
| 327 movzbl %dl,%ecx |
| 328 shlb $4,%al |
| 329 movzbq (%rsp,%rbx,1),%r12 |
| 330 shrl $4,%ecx |
| 331 xorq %r8,%r12 |
| 332 movq %r9,%r10 |
| 333 shrq $8,%r8 |
| 334 movzbq %r12b,%r12 |
| 335 shrq $8,%r9 |
| 336 xorq -128(%rbp,%rbx,8),%r8 |
| 337 shlq $56,%r10 |
| 338 xorq (%rbp,%rbx,8),%r9 |
| 339 roll $8,%edx |
| 340 xorq 8(%rsi,%rax,1),%r8 |
| 341 xorq (%rsi,%rax,1),%r9 |
| 342 movb %dl,%al |
| 343 xorq %r10,%r8 |
| 344 movzwq (%r11,%r12,2),%r12 |
| 345 movzbl %dl,%ebx |
| 346 shlb $4,%al |
| 347 movzbq (%rsp,%rcx,1),%r13 |
| 348 shrl $4,%ebx |
| 349 shlq $48,%r12 |
| 350 xorq %r8,%r13 |
| 351 movq %r9,%r10 |
| 352 xorq %r12,%r9 |
| 353 shrq $8,%r8 |
| 354 movzbq %r13b,%r13 |
| 355 shrq $8,%r9 |
| 356 xorq -128(%rbp,%rcx,8),%r8 |
| 357 shlq $56,%r10 |
| 358 xorq (%rbp,%rcx,8),%r9 |
| 359 roll $8,%edx |
| 360 xorq 8(%rsi,%rax,1),%r8 |
| 361 xorq (%rsi,%rax,1),%r9 |
| 362 movb %dl,%al |
| 363 xorq %r10,%r8 |
| 364 movzwq (%r11,%r13,2),%r13 |
| 365 movzbl %dl,%ecx |
| 366 shlb $4,%al |
| 367 movzbq (%rsp,%rbx,1),%r12 |
| 368 shrl $4,%ecx |
| 369 shlq $48,%r13 |
| 370 xorq %r8,%r12 |
| 371 movq %r9,%r10 |
| 372 xorq %r13,%r9 |
| 373 shrq $8,%r8 |
| 374 movzbq %r12b,%r12 |
| 375 movl 8(%rdi),%edx |
| 376 shrq $8,%r9 |
| 377 xorq -128(%rbp,%rbx,8),%r8 |
| 378 shlq $56,%r10 |
| 379 xorq (%rbp,%rbx,8),%r9 |
| 380 roll $8,%edx |
| 381 xorq 8(%rsi,%rax,1),%r8 |
| 382 xorq (%rsi,%rax,1),%r9 |
| 383 movb %dl,%al |
| 384 xorq %r10,%r8 |
| 385 movzwq (%r11,%r12,2),%r12 |
| 386 movzbl %dl,%ebx |
| 387 shlb $4,%al |
| 388 movzbq (%rsp,%rcx,1),%r13 |
| 389 shrl $4,%ebx |
| 390 shlq $48,%r12 |
| 391 xorq %r8,%r13 |
| 392 movq %r9,%r10 |
| 393 xorq %r12,%r9 |
| 394 shrq $8,%r8 |
| 395 movzbq %r13b,%r13 |
| 396 shrq $8,%r9 |
| 397 xorq -128(%rbp,%rcx,8),%r8 |
| 398 shlq $56,%r10 |
| 399 xorq (%rbp,%rcx,8),%r9 |
| 400 roll $8,%edx |
| 401 xorq 8(%rsi,%rax,1),%r8 |
| 402 xorq (%rsi,%rax,1),%r9 |
| 403 movb %dl,%al |
| 404 xorq %r10,%r8 |
| 405 movzwq (%r11,%r13,2),%r13 |
| 406 movzbl %dl,%ecx |
| 407 shlb $4,%al |
| 408 movzbq (%rsp,%rbx,1),%r12 |
| 409 shrl $4,%ecx |
| 410 shlq $48,%r13 |
| 411 xorq %r8,%r12 |
| 412 movq %r9,%r10 |
| 413 xorq %r13,%r9 |
| 414 shrq $8,%r8 |
| 415 movzbq %r12b,%r12 |
| 416 shrq $8,%r9 |
| 417 xorq -128(%rbp,%rbx,8),%r8 |
| 418 shlq $56,%r10 |
| 419 xorq (%rbp,%rbx,8),%r9 |
| 420 roll $8,%edx |
| 421 xorq 8(%rsi,%rax,1),%r8 |
| 422 xorq (%rsi,%rax,1),%r9 |
| 423 movb %dl,%al |
| 424 xorq %r10,%r8 |
| 425 movzwq (%r11,%r12,2),%r12 |
| 426 movzbl %dl,%ebx |
| 427 shlb $4,%al |
| 428 movzbq (%rsp,%rcx,1),%r13 |
| 429 shrl $4,%ebx |
| 430 shlq $48,%r12 |
| 431 xorq %r8,%r13 |
| 432 movq %r9,%r10 |
| 433 xorq %r12,%r9 |
| 434 shrq $8,%r8 |
| 435 movzbq %r13b,%r13 |
| 436 shrq $8,%r9 |
| 437 xorq -128(%rbp,%rcx,8),%r8 |
| 438 shlq $56,%r10 |
| 439 xorq (%rbp,%rcx,8),%r9 |
| 440 roll $8,%edx |
| 441 xorq 8(%rsi,%rax,1),%r8 |
| 442 xorq (%rsi,%rax,1),%r9 |
| 443 movb %dl,%al |
| 444 xorq %r10,%r8 |
| 445 movzwq (%r11,%r13,2),%r13 |
| 446 movzbl %dl,%ecx |
| 447 shlb $4,%al |
| 448 movzbq (%rsp,%rbx,1),%r12 |
| 449 shrl $4,%ecx |
| 450 shlq $48,%r13 |
| 451 xorq %r8,%r12 |
| 452 movq %r9,%r10 |
| 453 xorq %r13,%r9 |
| 454 shrq $8,%r8 |
| 455 movzbq %r12b,%r12 |
| 456 movl 4(%rdi),%edx |
| 457 shrq $8,%r9 |
| 458 xorq -128(%rbp,%rbx,8),%r8 |
| 459 shlq $56,%r10 |
| 460 xorq (%rbp,%rbx,8),%r9 |
| 461 roll $8,%edx |
| 462 xorq 8(%rsi,%rax,1),%r8 |
| 463 xorq (%rsi,%rax,1),%r9 |
| 464 movb %dl,%al |
| 465 xorq %r10,%r8 |
| 466 movzwq (%r11,%r12,2),%r12 |
| 467 movzbl %dl,%ebx |
| 468 shlb $4,%al |
| 469 movzbq (%rsp,%rcx,1),%r13 |
| 470 shrl $4,%ebx |
| 471 shlq $48,%r12 |
| 472 xorq %r8,%r13 |
| 473 movq %r9,%r10 |
| 474 xorq %r12,%r9 |
| 475 shrq $8,%r8 |
| 476 movzbq %r13b,%r13 |
| 477 shrq $8,%r9 |
| 478 xorq -128(%rbp,%rcx,8),%r8 |
| 479 shlq $56,%r10 |
| 480 xorq (%rbp,%rcx,8),%r9 |
| 481 roll $8,%edx |
| 482 xorq 8(%rsi,%rax,1),%r8 |
| 483 xorq (%rsi,%rax,1),%r9 |
| 484 movb %dl,%al |
| 485 xorq %r10,%r8 |
| 486 movzwq (%r11,%r13,2),%r13 |
| 487 movzbl %dl,%ecx |
| 488 shlb $4,%al |
| 489 movzbq (%rsp,%rbx,1),%r12 |
| 490 shrl $4,%ecx |
| 491 shlq $48,%r13 |
| 492 xorq %r8,%r12 |
| 493 movq %r9,%r10 |
| 494 xorq %r13,%r9 |
| 495 shrq $8,%r8 |
| 496 movzbq %r12b,%r12 |
| 497 shrq $8,%r9 |
| 498 xorq -128(%rbp,%rbx,8),%r8 |
| 499 shlq $56,%r10 |
| 500 xorq (%rbp,%rbx,8),%r9 |
| 501 roll $8,%edx |
| 502 xorq 8(%rsi,%rax,1),%r8 |
| 503 xorq (%rsi,%rax,1),%r9 |
| 504 movb %dl,%al |
| 505 xorq %r10,%r8 |
| 506 movzwq (%r11,%r12,2),%r12 |
| 507 movzbl %dl,%ebx |
| 508 shlb $4,%al |
| 509 movzbq (%rsp,%rcx,1),%r13 |
| 510 shrl $4,%ebx |
| 511 shlq $48,%r12 |
| 512 xorq %r8,%r13 |
| 513 movq %r9,%r10 |
| 514 xorq %r12,%r9 |
| 515 shrq $8,%r8 |
| 516 movzbq %r13b,%r13 |
| 517 shrq $8,%r9 |
| 518 xorq -128(%rbp,%rcx,8),%r8 |
| 519 shlq $56,%r10 |
| 520 xorq (%rbp,%rcx,8),%r9 |
| 521 roll $8,%edx |
| 522 xorq 8(%rsi,%rax,1),%r8 |
| 523 xorq (%rsi,%rax,1),%r9 |
| 524 movb %dl,%al |
| 525 xorq %r10,%r8 |
| 526 movzwq (%r11,%r13,2),%r13 |
| 527 movzbl %dl,%ecx |
| 528 shlb $4,%al |
| 529 movzbq (%rsp,%rbx,1),%r12 |
| 530 shrl $4,%ecx |
| 531 shlq $48,%r13 |
| 532 xorq %r8,%r12 |
| 533 movq %r9,%r10 |
| 534 xorq %r13,%r9 |
| 535 shrq $8,%r8 |
| 536 movzbq %r12b,%r12 |
| 537 movl 0(%rdi),%edx |
| 538 shrq $8,%r9 |
| 539 xorq -128(%rbp,%rbx,8),%r8 |
| 540 shlq $56,%r10 |
| 541 xorq (%rbp,%rbx,8),%r9 |
| 542 roll $8,%edx |
| 543 xorq 8(%rsi,%rax,1),%r8 |
| 544 xorq (%rsi,%rax,1),%r9 |
| 545 movb %dl,%al |
| 546 xorq %r10,%r8 |
| 547 movzwq (%r11,%r12,2),%r12 |
| 548 movzbl %dl,%ebx |
| 549 shlb $4,%al |
| 550 movzbq (%rsp,%rcx,1),%r13 |
| 551 shrl $4,%ebx |
| 552 shlq $48,%r12 |
| 553 xorq %r8,%r13 |
| 554 movq %r9,%r10 |
| 555 xorq %r12,%r9 |
| 556 shrq $8,%r8 |
| 557 movzbq %r13b,%r13 |
| 558 shrq $8,%r9 |
| 559 xorq -128(%rbp,%rcx,8),%r8 |
| 560 shlq $56,%r10 |
| 561 xorq (%rbp,%rcx,8),%r9 |
| 562 roll $8,%edx |
| 563 xorq 8(%rsi,%rax,1),%r8 |
| 564 xorq (%rsi,%rax,1),%r9 |
| 565 movb %dl,%al |
| 566 xorq %r10,%r8 |
| 567 movzwq (%r11,%r13,2),%r13 |
| 568 movzbl %dl,%ecx |
| 569 shlb $4,%al |
| 570 movzbq (%rsp,%rbx,1),%r12 |
| 571 shrl $4,%ecx |
| 572 shlq $48,%r13 |
| 573 xorq %r8,%r12 |
| 574 movq %r9,%r10 |
| 575 xorq %r13,%r9 |
| 576 shrq $8,%r8 |
| 577 movzbq %r12b,%r12 |
| 578 shrq $8,%r9 |
| 579 xorq -128(%rbp,%rbx,8),%r8 |
| 580 shlq $56,%r10 |
| 581 xorq (%rbp,%rbx,8),%r9 |
| 582 roll $8,%edx |
| 583 xorq 8(%rsi,%rax,1),%r8 |
| 584 xorq (%rsi,%rax,1),%r9 |
| 585 movb %dl,%al |
| 586 xorq %r10,%r8 |
| 587 movzwq (%r11,%r12,2),%r12 |
| 588 movzbl %dl,%ebx |
| 589 shlb $4,%al |
| 590 movzbq (%rsp,%rcx,1),%r13 |
| 591 shrl $4,%ebx |
| 592 shlq $48,%r12 |
| 593 xorq %r8,%r13 |
| 594 movq %r9,%r10 |
| 595 xorq %r12,%r9 |
| 596 shrq $8,%r8 |
| 597 movzbq %r13b,%r13 |
| 598 shrq $8,%r9 |
| 599 xorq -128(%rbp,%rcx,8),%r8 |
| 600 shlq $56,%r10 |
| 601 xorq (%rbp,%rcx,8),%r9 |
| 602 roll $8,%edx |
| 603 xorq 8(%rsi,%rax,1),%r8 |
| 604 xorq (%rsi,%rax,1),%r9 |
| 605 movb %dl,%al |
| 606 xorq %r10,%r8 |
| 607 movzwq (%r11,%r13,2),%r13 |
| 608 movzbl %dl,%ecx |
| 609 shlb $4,%al |
| 610 movzbq (%rsp,%rbx,1),%r12 |
| 611 andl $240,%ecx |
| 612 shlq $48,%r13 |
| 613 xorq %r8,%r12 |
| 614 movq %r9,%r10 |
| 615 xorq %r13,%r9 |
| 616 shrq $8,%r8 |
| 617 movzbq %r12b,%r12 |
| 618 movl -4(%rdi),%edx |
| 619 shrq $8,%r9 |
| 620 xorq -128(%rbp,%rbx,8),%r8 |
| 621 shlq $56,%r10 |
| 622 xorq (%rbp,%rbx,8),%r9 |
| 623 movzwq (%r11,%r12,2),%r12 |
| 624 xorq 8(%rsi,%rax,1),%r8 |
| 625 xorq (%rsi,%rax,1),%r9 |
| 626 shlq $48,%r12 |
| 627 xorq %r10,%r8 |
| 628 xorq %r12,%r9 |
| 629 movzbq %r8b,%r13 |
| 630 shrq $4,%r8 |
| 631 movq %r9,%r10 |
| 632 shlb $4,%r13b |
| 633 shrq $4,%r9 |
| 634 xorq 8(%rsi,%rcx,1),%r8 |
| 635 movzwq (%r11,%r13,2),%r13 |
| 636 shlq $60,%r10 |
| 637 xorq (%rsi,%rcx,1),%r9 |
| 638 xorq %r10,%r8 |
| 639 shlq $48,%r13 |
| 640 bswapq %r8 |
| 641 xorq %r13,%r9 |
| 642 bswapq %r9 |
| 643 cmpq %r15,%r14 |
| 644 jb L$outer_loop |
| 645 movq %r8,8(%rdi) |
| 646 movq %r9,(%rdi) |
| 647 |
| 648 leaq 280(%rsp),%rsi |
| 649 movq 0(%rsi),%r15 |
| 650 movq 8(%rsi),%r14 |
| 651 movq 16(%rsi),%r13 |
| 652 movq 24(%rsi),%r12 |
| 653 movq 32(%rsi),%rbp |
| 654 movq 40(%rsi),%rbx |
| 655 leaq 48(%rsi),%rsp |
| 656 L$ghash_epilogue: |
| 657 .byte 0xf3,0xc3 |
| 658 |
| 659 .globl _gcm_init_clmul |
| 660 |
| 661 .p2align 4 |
| 662 _gcm_init_clmul: |
| 663 L$_init_clmul: |
| 664 movdqu (%rsi),%xmm2 |
| 665 pshufd $78,%xmm2,%xmm2 |
| 666 |
| 667 |
| 668 pshufd $255,%xmm2,%xmm4 |
| 669 movdqa %xmm2,%xmm3 |
| 670 psllq $1,%xmm2 |
| 671 pxor %xmm5,%xmm5 |
| 672 psrlq $63,%xmm3 |
| 673 pcmpgtd %xmm4,%xmm5 |
| 674 pslldq $8,%xmm3 |
| 675 por %xmm3,%xmm2 |
| 676 |
| 677 |
| 678 pand L$0x1c2_polynomial(%rip),%xmm5 |
| 679 pxor %xmm5,%xmm2 |
| 680 |
| 681 |
| 682 pshufd $78,%xmm2,%xmm6 |
| 683 movdqa %xmm2,%xmm0 |
| 684 pxor %xmm2,%xmm6 |
| 685 movdqa %xmm0,%xmm1 |
| 686 pshufd $78,%xmm0,%xmm3 |
| 687 pxor %xmm0,%xmm3 |
| 688 .byte 102,15,58,68,194,0 |
| 689 .byte 102,15,58,68,202,17 |
| 690 .byte 102,15,58,68,222,0 |
| 691 pxor %xmm0,%xmm3 |
| 692 pxor %xmm1,%xmm3 |
| 693 |
| 694 movdqa %xmm3,%xmm4 |
| 695 psrldq $8,%xmm3 |
| 696 pslldq $8,%xmm4 |
| 697 pxor %xmm3,%xmm1 |
| 698 pxor %xmm4,%xmm0 |
| 699 |
| 700 movdqa %xmm0,%xmm4 |
| 701 movdqa %xmm0,%xmm3 |
| 702 psllq $5,%xmm0 |
| 703 pxor %xmm0,%xmm3 |
| 704 psllq $1,%xmm0 |
| 705 pxor %xmm3,%xmm0 |
| 706 psllq $57,%xmm0 |
| 707 movdqa %xmm0,%xmm3 |
| 708 pslldq $8,%xmm0 |
| 709 psrldq $8,%xmm3 |
| 710 pxor %xmm4,%xmm0 |
| 711 pxor %xmm3,%xmm1 |
| 712 |
| 713 |
| 714 movdqa %xmm0,%xmm4 |
| 715 psrlq $1,%xmm0 |
| 716 pxor %xmm4,%xmm1 |
| 717 pxor %xmm0,%xmm4 |
| 718 psrlq $5,%xmm0 |
| 719 pxor %xmm4,%xmm0 |
| 720 psrlq $1,%xmm0 |
| 721 pxor %xmm1,%xmm0 |
| 722 pshufd $78,%xmm2,%xmm3 |
| 723 pshufd $78,%xmm0,%xmm4 |
| 724 pxor %xmm2,%xmm3 |
| 725 movdqu %xmm2,0(%rdi) |
| 726 pxor %xmm0,%xmm4 |
| 727 movdqu %xmm0,16(%rdi) |
| 728 .byte 102,15,58,15,227,8 |
| 729 movdqu %xmm4,32(%rdi) |
| 730 movdqa %xmm0,%xmm1 |
| 731 pshufd $78,%xmm0,%xmm3 |
| 732 pxor %xmm0,%xmm3 |
| 733 .byte 102,15,58,68,194,0 |
| 734 .byte 102,15,58,68,202,17 |
| 735 .byte 102,15,58,68,222,0 |
| 736 pxor %xmm0,%xmm3 |
| 737 pxor %xmm1,%xmm3 |
| 738 |
| 739 movdqa %xmm3,%xmm4 |
| 740 psrldq $8,%xmm3 |
| 741 pslldq $8,%xmm4 |
| 742 pxor %xmm3,%xmm1 |
| 743 pxor %xmm4,%xmm0 |
| 744 |
| 745 movdqa %xmm0,%xmm4 |
| 746 movdqa %xmm0,%xmm3 |
| 747 psllq $5,%xmm0 |
| 748 pxor %xmm0,%xmm3 |
| 749 psllq $1,%xmm0 |
| 750 pxor %xmm3,%xmm0 |
| 751 psllq $57,%xmm0 |
| 752 movdqa %xmm0,%xmm3 |
| 753 pslldq $8,%xmm0 |
| 754 psrldq $8,%xmm3 |
| 755 pxor %xmm4,%xmm0 |
| 756 pxor %xmm3,%xmm1 |
| 757 |
| 758 |
| 759 movdqa %xmm0,%xmm4 |
| 760 psrlq $1,%xmm0 |
| 761 pxor %xmm4,%xmm1 |
| 762 pxor %xmm0,%xmm4 |
| 763 psrlq $5,%xmm0 |
| 764 pxor %xmm4,%xmm0 |
| 765 psrlq $1,%xmm0 |
| 766 pxor %xmm1,%xmm0 |
| 767 movdqa %xmm0,%xmm5 |
| 768 movdqa %xmm0,%xmm1 |
| 769 pshufd $78,%xmm0,%xmm3 |
| 770 pxor %xmm0,%xmm3 |
| 771 .byte 102,15,58,68,194,0 |
| 772 .byte 102,15,58,68,202,17 |
| 773 .byte 102,15,58,68,222,0 |
| 774 pxor %xmm0,%xmm3 |
| 775 pxor %xmm1,%xmm3 |
| 776 |
| 777 movdqa %xmm3,%xmm4 |
| 778 psrldq $8,%xmm3 |
| 779 pslldq $8,%xmm4 |
| 780 pxor %xmm3,%xmm1 |
| 781 pxor %xmm4,%xmm0 |
| 782 |
| 783 movdqa %xmm0,%xmm4 |
| 784 movdqa %xmm0,%xmm3 |
| 785 psllq $5,%xmm0 |
| 786 pxor %xmm0,%xmm3 |
| 787 psllq $1,%xmm0 |
| 788 pxor %xmm3,%xmm0 |
| 789 psllq $57,%xmm0 |
| 790 movdqa %xmm0,%xmm3 |
| 791 pslldq $8,%xmm0 |
| 792 psrldq $8,%xmm3 |
| 793 pxor %xmm4,%xmm0 |
| 794 pxor %xmm3,%xmm1 |
| 795 |
| 796 |
| 797 movdqa %xmm0,%xmm4 |
| 798 psrlq $1,%xmm0 |
| 799 pxor %xmm4,%xmm1 |
| 800 pxor %xmm0,%xmm4 |
| 801 psrlq $5,%xmm0 |
| 802 pxor %xmm4,%xmm0 |
| 803 psrlq $1,%xmm0 |
| 804 pxor %xmm1,%xmm0 |
| 805 pshufd $78,%xmm5,%xmm3 |
| 806 pshufd $78,%xmm0,%xmm4 |
| 807 pxor %xmm5,%xmm3 |
| 808 movdqu %xmm5,48(%rdi) |
| 809 pxor %xmm0,%xmm4 |
| 810 movdqu %xmm0,64(%rdi) |
| 811 .byte 102,15,58,15,227,8 |
| 812 movdqu %xmm4,80(%rdi) |
| 813 .byte 0xf3,0xc3 |
| 814 |
| 815 .globl _gcm_gmult_clmul |
| 816 |
| 817 .p2align 4 |
| 818 _gcm_gmult_clmul: |
| 819 L$_gmult_clmul: |
| 820 movdqu (%rdi),%xmm0 |
| 821 movdqa L$bswap_mask(%rip),%xmm5 |
| 822 movdqu (%rsi),%xmm2 |
| 823 movdqu 32(%rsi),%xmm4 |
| 824 .byte 102,15,56,0,197 |
| 825 movdqa %xmm0,%xmm1 |
| 826 pshufd $78,%xmm0,%xmm3 |
| 827 pxor %xmm0,%xmm3 |
| 828 .byte 102,15,58,68,194,0 |
| 829 .byte 102,15,58,68,202,17 |
| 830 .byte 102,15,58,68,220,0 |
| 831 pxor %xmm0,%xmm3 |
| 832 pxor %xmm1,%xmm3 |
| 833 |
| 834 movdqa %xmm3,%xmm4 |
| 835 psrldq $8,%xmm3 |
| 836 pslldq $8,%xmm4 |
| 837 pxor %xmm3,%xmm1 |
| 838 pxor %xmm4,%xmm0 |
| 839 |
| 840 movdqa %xmm0,%xmm4 |
| 841 movdqa %xmm0,%xmm3 |
| 842 psllq $5,%xmm0 |
| 843 pxor %xmm0,%xmm3 |
| 844 psllq $1,%xmm0 |
| 845 pxor %xmm3,%xmm0 |
| 846 psllq $57,%xmm0 |
| 847 movdqa %xmm0,%xmm3 |
| 848 pslldq $8,%xmm0 |
| 849 psrldq $8,%xmm3 |
| 850 pxor %xmm4,%xmm0 |
| 851 pxor %xmm3,%xmm1 |
| 852 |
| 853 |
| 854 movdqa %xmm0,%xmm4 |
| 855 psrlq $1,%xmm0 |
| 856 pxor %xmm4,%xmm1 |
| 857 pxor %xmm0,%xmm4 |
| 858 psrlq $5,%xmm0 |
| 859 pxor %xmm4,%xmm0 |
| 860 psrlq $1,%xmm0 |
| 861 pxor %xmm1,%xmm0 |
| 862 .byte 102,15,56,0,197 |
| 863 movdqu %xmm0,(%rdi) |
| 864 .byte 0xf3,0xc3 |
| 865 |
| 866 .globl _gcm_ghash_clmul |
| 867 |
| 868 .p2align 5 |
| 869 _gcm_ghash_clmul: |
| 870 L$_ghash_clmul: |
| 871 movdqa L$bswap_mask(%rip),%xmm10 |
| 872 |
| 873 movdqu (%rdi),%xmm0 |
| 874 movdqu (%rsi),%xmm2 |
| 875 movdqu 32(%rsi),%xmm7 |
| 876 .byte 102,65,15,56,0,194 |
| 877 |
| 878 subq $16,%rcx |
| 879 jz L$odd_tail |
| 880 |
| 881 movdqu 16(%rsi),%xmm6 |
| 882 movl _OPENSSL_ia32cap_P+4(%rip),%eax |
| 883 cmpq $48,%rcx |
| 884 jb L$skip4x |
| 885 |
| 886 andl $71303168,%eax |
| 887 cmpl $4194304,%eax |
| 888 je L$skip4x |
| 889 |
| 890 subq $48,%rcx |
| 891 movq $11547335547999543296,%rax |
| 892 movdqu 48(%rsi),%xmm14 |
| 893 movdqu 64(%rsi),%xmm15 |
| 894 |
| 895 |
| 896 |
| 897 |
| 898 movdqu 48(%rdx),%xmm3 |
| 899 movdqu 32(%rdx),%xmm11 |
| 900 .byte 102,65,15,56,0,218 |
| 901 .byte 102,69,15,56,0,218 |
| 902 movdqa %xmm3,%xmm5 |
| 903 pshufd $78,%xmm3,%xmm4 |
| 904 pxor %xmm3,%xmm4 |
| 905 .byte 102,15,58,68,218,0 |
| 906 .byte 102,15,58,68,234,17 |
| 907 .byte 102,15,58,68,231,0 |
| 908 |
| 909 movdqa %xmm11,%xmm13 |
| 910 pshufd $78,%xmm11,%xmm12 |
| 911 pxor %xmm11,%xmm12 |
| 912 .byte 102,68,15,58,68,222,0 |
| 913 .byte 102,68,15,58,68,238,17 |
| 914 .byte 102,68,15,58,68,231,16 |
| 915 xorps %xmm11,%xmm3 |
| 916 xorps %xmm13,%xmm5 |
| 917 movups 80(%rsi),%xmm7 |
| 918 xorps %xmm12,%xmm4 |
| 919 |
| 920 movdqu 16(%rdx),%xmm11 |
| 921 movdqu 0(%rdx),%xmm8 |
| 922 .byte 102,69,15,56,0,218 |
| 923 .byte 102,69,15,56,0,194 |
| 924 movdqa %xmm11,%xmm13 |
| 925 pshufd $78,%xmm11,%xmm12 |
| 926 pxor %xmm8,%xmm0 |
| 927 pxor %xmm11,%xmm12 |
| 928 .byte 102,69,15,58,68,222,0 |
| 929 movdqa %xmm0,%xmm1 |
| 930 pshufd $78,%xmm0,%xmm8 |
| 931 pxor %xmm0,%xmm8 |
| 932 .byte 102,69,15,58,68,238,17 |
| 933 .byte 102,68,15,58,68,231,0 |
| 934 xorps %xmm11,%xmm3 |
| 935 xorps %xmm13,%xmm5 |
| 936 |
| 937 leaq 64(%rdx),%rdx |
| 938 subq $64,%rcx |
| 939 jc L$tail4x |
| 940 |
| 941 jmp L$mod4_loop |
| 942 .p2align 5 |
| 943 L$mod4_loop: |
| 944 .byte 102,65,15,58,68,199,0 |
| 945 xorps %xmm12,%xmm4 |
| 946 movdqu 48(%rdx),%xmm11 |
| 947 .byte 102,69,15,56,0,218 |
| 948 .byte 102,65,15,58,68,207,17 |
| 949 xorps %xmm3,%xmm0 |
| 950 movdqu 32(%rdx),%xmm3 |
| 951 movdqa %xmm11,%xmm13 |
| 952 .byte 102,68,15,58,68,199,16 |
| 953 pshufd $78,%xmm11,%xmm12 |
| 954 xorps %xmm5,%xmm1 |
| 955 pxor %xmm11,%xmm12 |
| 956 .byte 102,65,15,56,0,218 |
| 957 movups 32(%rsi),%xmm7 |
| 958 xorps %xmm4,%xmm8 |
| 959 .byte 102,68,15,58,68,218,0 |
| 960 pshufd $78,%xmm3,%xmm4 |
| 961 |
| 962 pxor %xmm0,%xmm8 |
| 963 movdqa %xmm3,%xmm5 |
| 964 pxor %xmm1,%xmm8 |
| 965 pxor %xmm3,%xmm4 |
| 966 movdqa %xmm8,%xmm9 |
| 967 .byte 102,68,15,58,68,234,17 |
| 968 pslldq $8,%xmm8 |
| 969 psrldq $8,%xmm9 |
| 970 pxor %xmm8,%xmm0 |
| 971 movdqa L$7_mask(%rip),%xmm8 |
| 972 pxor %xmm9,%xmm1 |
| 973 .byte 102,76,15,110,200 |
| 974 |
| 975 pand %xmm0,%xmm8 |
| 976 .byte 102,69,15,56,0,200 |
| 977 pxor %xmm0,%xmm9 |
| 978 .byte 102,68,15,58,68,231,0 |
| 979 psllq $57,%xmm9 |
| 980 movdqa %xmm9,%xmm8 |
| 981 pslldq $8,%xmm9 |
| 982 .byte 102,15,58,68,222,0 |
| 983 psrldq $8,%xmm8 |
| 984 pxor %xmm9,%xmm0 |
| 985 pxor %xmm8,%xmm1 |
| 986 movdqu 0(%rdx),%xmm8 |
| 987 |
| 988 movdqa %xmm0,%xmm9 |
| 989 psrlq $1,%xmm0 |
| 990 .byte 102,15,58,68,238,17 |
| 991 xorps %xmm11,%xmm3 |
| 992 movdqu 16(%rdx),%xmm11 |
| 993 .byte 102,69,15,56,0,218 |
| 994 .byte 102,15,58,68,231,16 |
| 995 xorps %xmm13,%xmm5 |
| 996 movups 80(%rsi),%xmm7 |
| 997 .byte 102,69,15,56,0,194 |
| 998 pxor %xmm9,%xmm1 |
| 999 pxor %xmm0,%xmm9 |
| 1000 psrlq $5,%xmm0 |
| 1001 |
| 1002 movdqa %xmm11,%xmm13 |
| 1003 pxor %xmm12,%xmm4 |
| 1004 pshufd $78,%xmm11,%xmm12 |
| 1005 pxor %xmm9,%xmm0 |
| 1006 pxor %xmm8,%xmm1 |
| 1007 pxor %xmm11,%xmm12 |
| 1008 .byte 102,69,15,58,68,222,0 |
| 1009 psrlq $1,%xmm0 |
| 1010 pxor %xmm1,%xmm0 |
| 1011 movdqa %xmm0,%xmm1 |
| 1012 .byte 102,69,15,58,68,238,17 |
| 1013 xorps %xmm11,%xmm3 |
| 1014 pshufd $78,%xmm0,%xmm8 |
| 1015 pxor %xmm0,%xmm8 |
| 1016 |
| 1017 .byte 102,68,15,58,68,231,0 |
| 1018 xorps %xmm13,%xmm5 |
| 1019 |
| 1020 leaq 64(%rdx),%rdx |
| 1021 subq $64,%rcx |
| 1022 jnc L$mod4_loop |
| 1023 |
| 1024 L$tail4x: |
| 1025 .byte 102,65,15,58,68,199,0 |
| 1026 .byte 102,65,15,58,68,207,17 |
| 1027 .byte 102,68,15,58,68,199,16 |
| 1028 xorps %xmm12,%xmm4 |
| 1029 xorps %xmm3,%xmm0 |
| 1030 xorps %xmm5,%xmm1 |
| 1031 pxor %xmm0,%xmm1 |
| 1032 pxor %xmm4,%xmm8 |
| 1033 |
| 1034 pxor %xmm1,%xmm8 |
| 1035 pxor %xmm0,%xmm1 |
| 1036 |
| 1037 movdqa %xmm8,%xmm9 |
| 1038 psrldq $8,%xmm8 |
| 1039 pslldq $8,%xmm9 |
| 1040 pxor %xmm8,%xmm1 |
| 1041 pxor %xmm9,%xmm0 |
| 1042 |
| 1043 movdqa %xmm0,%xmm4 |
| 1044 movdqa %xmm0,%xmm3 |
| 1045 psllq $5,%xmm0 |
| 1046 pxor %xmm0,%xmm3 |
| 1047 psllq $1,%xmm0 |
| 1048 pxor %xmm3,%xmm0 |
| 1049 psllq $57,%xmm0 |
| 1050 movdqa %xmm0,%xmm3 |
| 1051 pslldq $8,%xmm0 |
| 1052 psrldq $8,%xmm3 |
| 1053 pxor %xmm4,%xmm0 |
| 1054 pxor %xmm3,%xmm1 |
| 1055 |
| 1056 |
| 1057 movdqa %xmm0,%xmm4 |
| 1058 psrlq $1,%xmm0 |
| 1059 pxor %xmm4,%xmm1 |
| 1060 pxor %xmm0,%xmm4 |
| 1061 psrlq $5,%xmm0 |
| 1062 pxor %xmm4,%xmm0 |
| 1063 psrlq $1,%xmm0 |
| 1064 pxor %xmm1,%xmm0 |
| 1065 addq $64,%rcx |
| 1066 jz L$done |
| 1067 movdqu 32(%rsi),%xmm7 |
| 1068 subq $16,%rcx |
| 1069 jz L$odd_tail |
| 1070 L$skip4x: |
| 1071 |
| 1072 |
| 1073 |
| 1074 |
| 1075 |
| 1076 movdqu (%rdx),%xmm8 |
| 1077 movdqu 16(%rdx),%xmm3 |
| 1078 .byte 102,69,15,56,0,194 |
| 1079 .byte 102,65,15,56,0,218 |
| 1080 pxor %xmm8,%xmm0 |
| 1081 |
| 1082 movdqa %xmm3,%xmm5 |
| 1083 pshufd $78,%xmm3,%xmm4 |
| 1084 pxor %xmm3,%xmm4 |
| 1085 .byte 102,15,58,68,218,0 |
| 1086 .byte 102,15,58,68,234,17 |
| 1087 .byte 102,15,58,68,231,0 |
| 1088 |
| 1089 leaq 32(%rdx),%rdx |
| 1090 nop |
| 1091 subq $32,%rcx |
| 1092 jbe L$even_tail |
| 1093 nop |
| 1094 jmp L$mod_loop |
| 1095 |
| 1096 .p2align 5 |
| 1097 L$mod_loop: |
| 1098 movdqa %xmm0,%xmm1 |
| 1099 movdqa %xmm4,%xmm8 |
| 1100 pshufd $78,%xmm0,%xmm4 |
| 1101 pxor %xmm0,%xmm4 |
| 1102 |
| 1103 .byte 102,15,58,68,198,0 |
| 1104 .byte 102,15,58,68,206,17 |
| 1105 .byte 102,15,58,68,231,16 |
| 1106 |
| 1107 pxor %xmm3,%xmm0 |
| 1108 pxor %xmm5,%xmm1 |
| 1109 movdqu (%rdx),%xmm9 |
| 1110 pxor %xmm0,%xmm8 |
| 1111 .byte 102,69,15,56,0,202 |
| 1112 movdqu 16(%rdx),%xmm3 |
| 1113 |
| 1114 pxor %xmm1,%xmm8 |
| 1115 pxor %xmm9,%xmm1 |
| 1116 pxor %xmm8,%xmm4 |
| 1117 .byte 102,65,15,56,0,218 |
| 1118 movdqa %xmm4,%xmm8 |
| 1119 psrldq $8,%xmm8 |
| 1120 pslldq $8,%xmm4 |
| 1121 pxor %xmm8,%xmm1 |
| 1122 pxor %xmm4,%xmm0 |
| 1123 |
| 1124 movdqa %xmm3,%xmm5 |
| 1125 |
| 1126 movdqa %xmm0,%xmm9 |
| 1127 movdqa %xmm0,%xmm8 |
| 1128 psllq $5,%xmm0 |
| 1129 pxor %xmm0,%xmm8 |
| 1130 .byte 102,15,58,68,218,0 |
| 1131 psllq $1,%xmm0 |
| 1132 pxor %xmm8,%xmm0 |
| 1133 psllq $57,%xmm0 |
| 1134 movdqa %xmm0,%xmm8 |
| 1135 pslldq $8,%xmm0 |
| 1136 psrldq $8,%xmm8 |
| 1137 pxor %xmm9,%xmm0 |
| 1138 pshufd $78,%xmm5,%xmm4 |
| 1139 pxor %xmm8,%xmm1 |
| 1140 pxor %xmm5,%xmm4 |
| 1141 |
| 1142 movdqa %xmm0,%xmm9 |
| 1143 psrlq $1,%xmm0 |
| 1144 .byte 102,15,58,68,234,17 |
| 1145 pxor %xmm9,%xmm1 |
| 1146 pxor %xmm0,%xmm9 |
| 1147 psrlq $5,%xmm0 |
| 1148 pxor %xmm9,%xmm0 |
| 1149 leaq 32(%rdx),%rdx |
| 1150 psrlq $1,%xmm0 |
| 1151 .byte 102,15,58,68,231,0 |
| 1152 pxor %xmm1,%xmm0 |
| 1153 |
| 1154 subq $32,%rcx |
| 1155 ja L$mod_loop |
| 1156 |
| 1157 L$even_tail: |
| 1158 movdqa %xmm0,%xmm1 |
| 1159 movdqa %xmm4,%xmm8 |
| 1160 pshufd $78,%xmm0,%xmm4 |
| 1161 pxor %xmm0,%xmm4 |
| 1162 |
| 1163 .byte 102,15,58,68,198,0 |
| 1164 .byte 102,15,58,68,206,17 |
| 1165 .byte 102,15,58,68,231,16 |
| 1166 |
| 1167 pxor %xmm3,%xmm0 |
| 1168 pxor %xmm5,%xmm1 |
| 1169 pxor %xmm0,%xmm8 |
| 1170 pxor %xmm1,%xmm8 |
| 1171 pxor %xmm8,%xmm4 |
| 1172 movdqa %xmm4,%xmm8 |
| 1173 psrldq $8,%xmm8 |
| 1174 pslldq $8,%xmm4 |
| 1175 pxor %xmm8,%xmm1 |
| 1176 pxor %xmm4,%xmm0 |
| 1177 |
| 1178 movdqa %xmm0,%xmm4 |
| 1179 movdqa %xmm0,%xmm3 |
| 1180 psllq $5,%xmm0 |
| 1181 pxor %xmm0,%xmm3 |
| 1182 psllq $1,%xmm0 |
| 1183 pxor %xmm3,%xmm0 |
| 1184 psllq $57,%xmm0 |
| 1185 movdqa %xmm0,%xmm3 |
| 1186 pslldq $8,%xmm0 |
| 1187 psrldq $8,%xmm3 |
| 1188 pxor %xmm4,%xmm0 |
| 1189 pxor %xmm3,%xmm1 |
| 1190 |
| 1191 |
| 1192 movdqa %xmm0,%xmm4 |
| 1193 psrlq $1,%xmm0 |
| 1194 pxor %xmm4,%xmm1 |
| 1195 pxor %xmm0,%xmm4 |
| 1196 psrlq $5,%xmm0 |
| 1197 pxor %xmm4,%xmm0 |
| 1198 psrlq $1,%xmm0 |
| 1199 pxor %xmm1,%xmm0 |
| 1200 testq %rcx,%rcx |
| 1201 jnz L$done |
| 1202 |
| 1203 L$odd_tail: |
| 1204 movdqu (%rdx),%xmm8 |
| 1205 .byte 102,69,15,56,0,194 |
| 1206 pxor %xmm8,%xmm0 |
| 1207 movdqa %xmm0,%xmm1 |
| 1208 pshufd $78,%xmm0,%xmm3 |
| 1209 pxor %xmm0,%xmm3 |
| 1210 .byte 102,15,58,68,194,0 |
| 1211 .byte 102,15,58,68,202,17 |
| 1212 .byte 102,15,58,68,223,0 |
| 1213 pxor %xmm0,%xmm3 |
| 1214 pxor %xmm1,%xmm3 |
| 1215 |
| 1216 movdqa %xmm3,%xmm4 |
| 1217 psrldq $8,%xmm3 |
| 1218 pslldq $8,%xmm4 |
| 1219 pxor %xmm3,%xmm1 |
| 1220 pxor %xmm4,%xmm0 |
| 1221 |
| 1222 movdqa %xmm0,%xmm4 |
| 1223 movdqa %xmm0,%xmm3 |
| 1224 psllq $5,%xmm0 |
| 1225 pxor %xmm0,%xmm3 |
| 1226 psllq $1,%xmm0 |
| 1227 pxor %xmm3,%xmm0 |
| 1228 psllq $57,%xmm0 |
| 1229 movdqa %xmm0,%xmm3 |
| 1230 pslldq $8,%xmm0 |
| 1231 psrldq $8,%xmm3 |
| 1232 pxor %xmm4,%xmm0 |
| 1233 pxor %xmm3,%xmm1 |
| 1234 |
| 1235 |
| 1236 movdqa %xmm0,%xmm4 |
| 1237 psrlq $1,%xmm0 |
| 1238 pxor %xmm4,%xmm1 |
| 1239 pxor %xmm0,%xmm4 |
| 1240 psrlq $5,%xmm0 |
| 1241 pxor %xmm4,%xmm0 |
| 1242 psrlq $1,%xmm0 |
| 1243 pxor %xmm1,%xmm0 |
| 1244 L$done: |
| 1245 .byte 102,65,15,56,0,194 |
| 1246 movdqu %xmm0,(%rdi) |
| 1247 .byte 0xf3,0xc3 |
| 1248 |
| 1249 .globl _gcm_init_avx |
| 1250 |
| 1251 .p2align 5 |
| 1252 _gcm_init_avx: |
| 1253 jmp L$_init_clmul |
| 1254 |
| 1255 .globl _gcm_gmult_avx |
| 1256 |
| 1257 .p2align 5 |
| 1258 _gcm_gmult_avx: |
| 1259 jmp L$_gmult_clmul |
| 1260 |
| 1261 .globl _gcm_ghash_avx |
| 1262 |
| 1263 .p2align 5 |
| 1264 _gcm_ghash_avx: |
| 1265 jmp L$_ghash_clmul |
| 1266 |
| 1267 .p2align 6 |
| 1268 L$bswap_mask: |
| 1269 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 |
| 1270 L$0x1c2_polynomial: |
| 1271 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 |
| 1272 L$7_mask: |
| 1273 .long 7,0,7,0 |
| 1274 L$7_mask_poly: |
| 1275 .long 7,0,450,0 |
| 1276 .p2align 6 |
| 1277 |
| 1278 L$rem_4bit: |
| 1279 .long 0,0,0,471859200,0,943718400,0,610271232 |
| 1280 .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 |
| 1281 .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 |
| 1282 .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 |
| 1283 |
| 1284 L$rem_8bit: |
| 1285 .value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E |
| 1286 .value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E |
| 1287 .value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E |
| 1288 .value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E |
| 1289 .value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E |
| 1290 .value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E |
| 1291 .value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E |
| 1292 .value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E |
| 1293 .value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE |
| 1294 .value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE |
| 1295 .value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE |
| 1296 .value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE |
| 1297 .value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E |
| 1298 .value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E |
| 1299 .value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE |
| 1300 .value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE |
| 1301 .value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E |
| 1302 .value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E |
| 1303 .value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E |
| 1304 .value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E |
| 1305 .value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E |
| 1306 .value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E |
| 1307 .value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E |
| 1308 .value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E |
| 1309 .value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE |
| 1310 .value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE |
| 1311 .value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE |
| 1312 .value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE |
| 1313 .value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E |
| 1314 .value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E |
| 1315 .value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE |
| 1316 .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE |
| 1317 |
| 1318 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84
,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10
8,46,111,114,103,62,0 |
| 1319 .p2align 6 |
| 1320 #endif |
OLD | NEW |