| OLD | NEW |
| (Empty) |
| 1 #if defined(__x86_64__) | |
| 2 .text | |
| 3 .extern OPENSSL_ia32cap_P | |
| 4 .hidden OPENSSL_ia32cap_P | |
| 5 | |
| 6 .globl gcm_gmult_4bit | |
| 7 .hidden gcm_gmult_4bit | |
| 8 .type gcm_gmult_4bit,@function | |
| 9 .align 16 | |
| 10 gcm_gmult_4bit: | |
| 11 pushq %rbx | |
| 12 pushq %rbp | |
| 13 pushq %r12 | |
| 14 .Lgmult_prologue: | |
| 15 | |
| 16 movzbq 15(%rdi),%r8 | |
| 17 leaq .Lrem_4bit(%rip),%r11 | |
| 18 xorq %rax,%rax | |
| 19 xorq %rbx,%rbx | |
| 20 movb %r8b,%al | |
| 21 movb %r8b,%bl | |
| 22 shlb $4,%al | |
| 23 movq $14,%rcx | |
| 24 movq 8(%rsi,%rax,1),%r8 | |
| 25 movq (%rsi,%rax,1),%r9 | |
| 26 andb $0xf0,%bl | |
| 27 movq %r8,%rdx | |
| 28 jmp .Loop1 | |
| 29 | |
| 30 .align 16 | |
| 31 .Loop1: | |
| 32 shrq $4,%r8 | |
| 33 andq $0xf,%rdx | |
| 34 movq %r9,%r10 | |
| 35 movb (%rdi,%rcx,1),%al | |
| 36 shrq $4,%r9 | |
| 37 xorq 8(%rsi,%rbx,1),%r8 | |
| 38 shlq $60,%r10 | |
| 39 xorq (%rsi,%rbx,1),%r9 | |
| 40 movb %al,%bl | |
| 41 xorq (%r11,%rdx,8),%r9 | |
| 42 movq %r8,%rdx | |
| 43 shlb $4,%al | |
| 44 xorq %r10,%r8 | |
| 45 decq %rcx | |
| 46 js .Lbreak1 | |
| 47 | |
| 48 shrq $4,%r8 | |
| 49 andq $0xf,%rdx | |
| 50 movq %r9,%r10 | |
| 51 shrq $4,%r9 | |
| 52 xorq 8(%rsi,%rax,1),%r8 | |
| 53 shlq $60,%r10 | |
| 54 xorq (%rsi,%rax,1),%r9 | |
| 55 andb $0xf0,%bl | |
| 56 xorq (%r11,%rdx,8),%r9 | |
| 57 movq %r8,%rdx | |
| 58 xorq %r10,%r8 | |
| 59 jmp .Loop1 | |
| 60 | |
| 61 .align 16 | |
| 62 .Lbreak1: | |
| 63 shrq $4,%r8 | |
| 64 andq $0xf,%rdx | |
| 65 movq %r9,%r10 | |
| 66 shrq $4,%r9 | |
| 67 xorq 8(%rsi,%rax,1),%r8 | |
| 68 shlq $60,%r10 | |
| 69 xorq (%rsi,%rax,1),%r9 | |
| 70 andb $0xf0,%bl | |
| 71 xorq (%r11,%rdx,8),%r9 | |
| 72 movq %r8,%rdx | |
| 73 xorq %r10,%r8 | |
| 74 | |
| 75 shrq $4,%r8 | |
| 76 andq $0xf,%rdx | |
| 77 movq %r9,%r10 | |
| 78 shrq $4,%r9 | |
| 79 xorq 8(%rsi,%rbx,1),%r8 | |
| 80 shlq $60,%r10 | |
| 81 xorq (%rsi,%rbx,1),%r9 | |
| 82 xorq %r10,%r8 | |
| 83 xorq (%r11,%rdx,8),%r9 | |
| 84 | |
| 85 bswapq %r8 | |
| 86 bswapq %r9 | |
| 87 movq %r8,8(%rdi) | |
| 88 movq %r9,(%rdi) | |
| 89 | |
| 90 movq 16(%rsp),%rbx | |
| 91 leaq 24(%rsp),%rsp | |
| 92 .Lgmult_epilogue: | |
| 93 .byte 0xf3,0xc3 | |
| 94 .size gcm_gmult_4bit,.-gcm_gmult_4bit | |
| 95 .globl gcm_ghash_4bit | |
| 96 .hidden gcm_ghash_4bit | |
| 97 .type gcm_ghash_4bit,@function | |
| 98 .align 16 | |
| 99 gcm_ghash_4bit: | |
| 100 pushq %rbx | |
| 101 pushq %rbp | |
| 102 pushq %r12 | |
| 103 pushq %r13 | |
| 104 pushq %r14 | |
| 105 pushq %r15 | |
| 106 subq $280,%rsp | |
| 107 .Lghash_prologue: | |
| 108 movq %rdx,%r14 | |
| 109 movq %rcx,%r15 | |
| 110 subq $-128,%rsi | |
| 111 leaq 16+128(%rsp),%rbp | |
| 112 xorl %edx,%edx | |
| 113 movq 0+0-128(%rsi),%r8 | |
| 114 movq 0+8-128(%rsi),%rax | |
| 115 movb %al,%dl | |
| 116 shrq $4,%rax | |
| 117 movq %r8,%r10 | |
| 118 shrq $4,%r8 | |
| 119 movq 16+0-128(%rsi),%r9 | |
| 120 shlb $4,%dl | |
| 121 movq 16+8-128(%rsi),%rbx | |
| 122 shlq $60,%r10 | |
| 123 movb %dl,0(%rsp) | |
| 124 orq %r10,%rax | |
| 125 movb %bl,%dl | |
| 126 shrq $4,%rbx | |
| 127 movq %r9,%r10 | |
| 128 shrq $4,%r9 | |
| 129 movq %r8,0(%rbp) | |
| 130 movq 32+0-128(%rsi),%r8 | |
| 131 shlb $4,%dl | |
| 132 movq %rax,0-128(%rbp) | |
| 133 movq 32+8-128(%rsi),%rax | |
| 134 shlq $60,%r10 | |
| 135 movb %dl,1(%rsp) | |
| 136 orq %r10,%rbx | |
| 137 movb %al,%dl | |
| 138 shrq $4,%rax | |
| 139 movq %r8,%r10 | |
| 140 shrq $4,%r8 | |
| 141 movq %r9,8(%rbp) | |
| 142 movq 48+0-128(%rsi),%r9 | |
| 143 shlb $4,%dl | |
| 144 movq %rbx,8-128(%rbp) | |
| 145 movq 48+8-128(%rsi),%rbx | |
| 146 shlq $60,%r10 | |
| 147 movb %dl,2(%rsp) | |
| 148 orq %r10,%rax | |
| 149 movb %bl,%dl | |
| 150 shrq $4,%rbx | |
| 151 movq %r9,%r10 | |
| 152 shrq $4,%r9 | |
| 153 movq %r8,16(%rbp) | |
| 154 movq 64+0-128(%rsi),%r8 | |
| 155 shlb $4,%dl | |
| 156 movq %rax,16-128(%rbp) | |
| 157 movq 64+8-128(%rsi),%rax | |
| 158 shlq $60,%r10 | |
| 159 movb %dl,3(%rsp) | |
| 160 orq %r10,%rbx | |
| 161 movb %al,%dl | |
| 162 shrq $4,%rax | |
| 163 movq %r8,%r10 | |
| 164 shrq $4,%r8 | |
| 165 movq %r9,24(%rbp) | |
| 166 movq 80+0-128(%rsi),%r9 | |
| 167 shlb $4,%dl | |
| 168 movq %rbx,24-128(%rbp) | |
| 169 movq 80+8-128(%rsi),%rbx | |
| 170 shlq $60,%r10 | |
| 171 movb %dl,4(%rsp) | |
| 172 orq %r10,%rax | |
| 173 movb %bl,%dl | |
| 174 shrq $4,%rbx | |
| 175 movq %r9,%r10 | |
| 176 shrq $4,%r9 | |
| 177 movq %r8,32(%rbp) | |
| 178 movq 96+0-128(%rsi),%r8 | |
| 179 shlb $4,%dl | |
| 180 movq %rax,32-128(%rbp) | |
| 181 movq 96+8-128(%rsi),%rax | |
| 182 shlq $60,%r10 | |
| 183 movb %dl,5(%rsp) | |
| 184 orq %r10,%rbx | |
| 185 movb %al,%dl | |
| 186 shrq $4,%rax | |
| 187 movq %r8,%r10 | |
| 188 shrq $4,%r8 | |
| 189 movq %r9,40(%rbp) | |
| 190 movq 112+0-128(%rsi),%r9 | |
| 191 shlb $4,%dl | |
| 192 movq %rbx,40-128(%rbp) | |
| 193 movq 112+8-128(%rsi),%rbx | |
| 194 shlq $60,%r10 | |
| 195 movb %dl,6(%rsp) | |
| 196 orq %r10,%rax | |
| 197 movb %bl,%dl | |
| 198 shrq $4,%rbx | |
| 199 movq %r9,%r10 | |
| 200 shrq $4,%r9 | |
| 201 movq %r8,48(%rbp) | |
| 202 movq 128+0-128(%rsi),%r8 | |
| 203 shlb $4,%dl | |
| 204 movq %rax,48-128(%rbp) | |
| 205 movq 128+8-128(%rsi),%rax | |
| 206 shlq $60,%r10 | |
| 207 movb %dl,7(%rsp) | |
| 208 orq %r10,%rbx | |
| 209 movb %al,%dl | |
| 210 shrq $4,%rax | |
| 211 movq %r8,%r10 | |
| 212 shrq $4,%r8 | |
| 213 movq %r9,56(%rbp) | |
| 214 movq 144+0-128(%rsi),%r9 | |
| 215 shlb $4,%dl | |
| 216 movq %rbx,56-128(%rbp) | |
| 217 movq 144+8-128(%rsi),%rbx | |
| 218 shlq $60,%r10 | |
| 219 movb %dl,8(%rsp) | |
| 220 orq %r10,%rax | |
| 221 movb %bl,%dl | |
| 222 shrq $4,%rbx | |
| 223 movq %r9,%r10 | |
| 224 shrq $4,%r9 | |
| 225 movq %r8,64(%rbp) | |
| 226 movq 160+0-128(%rsi),%r8 | |
| 227 shlb $4,%dl | |
| 228 movq %rax,64-128(%rbp) | |
| 229 movq 160+8-128(%rsi),%rax | |
| 230 shlq $60,%r10 | |
| 231 movb %dl,9(%rsp) | |
| 232 orq %r10,%rbx | |
| 233 movb %al,%dl | |
| 234 shrq $4,%rax | |
| 235 movq %r8,%r10 | |
| 236 shrq $4,%r8 | |
| 237 movq %r9,72(%rbp) | |
| 238 movq 176+0-128(%rsi),%r9 | |
| 239 shlb $4,%dl | |
| 240 movq %rbx,72-128(%rbp) | |
| 241 movq 176+8-128(%rsi),%rbx | |
| 242 shlq $60,%r10 | |
| 243 movb %dl,10(%rsp) | |
| 244 orq %r10,%rax | |
| 245 movb %bl,%dl | |
| 246 shrq $4,%rbx | |
| 247 movq %r9,%r10 | |
| 248 shrq $4,%r9 | |
| 249 movq %r8,80(%rbp) | |
| 250 movq 192+0-128(%rsi),%r8 | |
| 251 shlb $4,%dl | |
| 252 movq %rax,80-128(%rbp) | |
| 253 movq 192+8-128(%rsi),%rax | |
| 254 shlq $60,%r10 | |
| 255 movb %dl,11(%rsp) | |
| 256 orq %r10,%rbx | |
| 257 movb %al,%dl | |
| 258 shrq $4,%rax | |
| 259 movq %r8,%r10 | |
| 260 shrq $4,%r8 | |
| 261 movq %r9,88(%rbp) | |
| 262 movq 208+0-128(%rsi),%r9 | |
| 263 shlb $4,%dl | |
| 264 movq %rbx,88-128(%rbp) | |
| 265 movq 208+8-128(%rsi),%rbx | |
| 266 shlq $60,%r10 | |
| 267 movb %dl,12(%rsp) | |
| 268 orq %r10,%rax | |
| 269 movb %bl,%dl | |
| 270 shrq $4,%rbx | |
| 271 movq %r9,%r10 | |
| 272 shrq $4,%r9 | |
| 273 movq %r8,96(%rbp) | |
| 274 movq 224+0-128(%rsi),%r8 | |
| 275 shlb $4,%dl | |
| 276 movq %rax,96-128(%rbp) | |
| 277 movq 224+8-128(%rsi),%rax | |
| 278 shlq $60,%r10 | |
| 279 movb %dl,13(%rsp) | |
| 280 orq %r10,%rbx | |
| 281 movb %al,%dl | |
| 282 shrq $4,%rax | |
| 283 movq %r8,%r10 | |
| 284 shrq $4,%r8 | |
| 285 movq %r9,104(%rbp) | |
| 286 movq 240+0-128(%rsi),%r9 | |
| 287 shlb $4,%dl | |
| 288 movq %rbx,104-128(%rbp) | |
| 289 movq 240+8-128(%rsi),%rbx | |
| 290 shlq $60,%r10 | |
| 291 movb %dl,14(%rsp) | |
| 292 orq %r10,%rax | |
| 293 movb %bl,%dl | |
| 294 shrq $4,%rbx | |
| 295 movq %r9,%r10 | |
| 296 shrq $4,%r9 | |
| 297 movq %r8,112(%rbp) | |
| 298 shlb $4,%dl | |
| 299 movq %rax,112-128(%rbp) | |
| 300 shlq $60,%r10 | |
| 301 movb %dl,15(%rsp) | |
| 302 orq %r10,%rbx | |
| 303 movq %r9,120(%rbp) | |
| 304 movq %rbx,120-128(%rbp) | |
| 305 addq $-128,%rsi | |
| 306 movq 8(%rdi),%r8 | |
| 307 movq 0(%rdi),%r9 | |
| 308 addq %r14,%r15 | |
| 309 leaq .Lrem_8bit(%rip),%r11 | |
| 310 jmp .Louter_loop | |
| 311 .align 16 | |
| 312 .Louter_loop: | |
| 313 xorq (%r14),%r9 | |
| 314 movq 8(%r14),%rdx | |
| 315 leaq 16(%r14),%r14 | |
| 316 xorq %r8,%rdx | |
| 317 movq %r9,(%rdi) | |
| 318 movq %rdx,8(%rdi) | |
| 319 shrq $32,%rdx | |
| 320 xorq %rax,%rax | |
| 321 roll $8,%edx | |
| 322 movb %dl,%al | |
| 323 movzbl %dl,%ebx | |
| 324 shlb $4,%al | |
| 325 shrl $4,%ebx | |
| 326 roll $8,%edx | |
| 327 movq 8(%rsi,%rax,1),%r8 | |
| 328 movq (%rsi,%rax,1),%r9 | |
| 329 movb %dl,%al | |
| 330 movzbl %dl,%ecx | |
| 331 shlb $4,%al | |
| 332 movzbq (%rsp,%rbx,1),%r12 | |
| 333 shrl $4,%ecx | |
| 334 xorq %r8,%r12 | |
| 335 movq %r9,%r10 | |
| 336 shrq $8,%r8 | |
| 337 movzbq %r12b,%r12 | |
| 338 shrq $8,%r9 | |
| 339 xorq -128(%rbp,%rbx,8),%r8 | |
| 340 shlq $56,%r10 | |
| 341 xorq (%rbp,%rbx,8),%r9 | |
| 342 roll $8,%edx | |
| 343 xorq 8(%rsi,%rax,1),%r8 | |
| 344 xorq (%rsi,%rax,1),%r9 | |
| 345 movb %dl,%al | |
| 346 xorq %r10,%r8 | |
| 347 movzwq (%r11,%r12,2),%r12 | |
| 348 movzbl %dl,%ebx | |
| 349 shlb $4,%al | |
| 350 movzbq (%rsp,%rcx,1),%r13 | |
| 351 shrl $4,%ebx | |
| 352 shlq $48,%r12 | |
| 353 xorq %r8,%r13 | |
| 354 movq %r9,%r10 | |
| 355 xorq %r12,%r9 | |
| 356 shrq $8,%r8 | |
| 357 movzbq %r13b,%r13 | |
| 358 shrq $8,%r9 | |
| 359 xorq -128(%rbp,%rcx,8),%r8 | |
| 360 shlq $56,%r10 | |
| 361 xorq (%rbp,%rcx,8),%r9 | |
| 362 roll $8,%edx | |
| 363 xorq 8(%rsi,%rax,1),%r8 | |
| 364 xorq (%rsi,%rax,1),%r9 | |
| 365 movb %dl,%al | |
| 366 xorq %r10,%r8 | |
| 367 movzwq (%r11,%r13,2),%r13 | |
| 368 movzbl %dl,%ecx | |
| 369 shlb $4,%al | |
| 370 movzbq (%rsp,%rbx,1),%r12 | |
| 371 shrl $4,%ecx | |
| 372 shlq $48,%r13 | |
| 373 xorq %r8,%r12 | |
| 374 movq %r9,%r10 | |
| 375 xorq %r13,%r9 | |
| 376 shrq $8,%r8 | |
| 377 movzbq %r12b,%r12 | |
| 378 movl 8(%rdi),%edx | |
| 379 shrq $8,%r9 | |
| 380 xorq -128(%rbp,%rbx,8),%r8 | |
| 381 shlq $56,%r10 | |
| 382 xorq (%rbp,%rbx,8),%r9 | |
| 383 roll $8,%edx | |
| 384 xorq 8(%rsi,%rax,1),%r8 | |
| 385 xorq (%rsi,%rax,1),%r9 | |
| 386 movb %dl,%al | |
| 387 xorq %r10,%r8 | |
| 388 movzwq (%r11,%r12,2),%r12 | |
| 389 movzbl %dl,%ebx | |
| 390 shlb $4,%al | |
| 391 movzbq (%rsp,%rcx,1),%r13 | |
| 392 shrl $4,%ebx | |
| 393 shlq $48,%r12 | |
| 394 xorq %r8,%r13 | |
| 395 movq %r9,%r10 | |
| 396 xorq %r12,%r9 | |
| 397 shrq $8,%r8 | |
| 398 movzbq %r13b,%r13 | |
| 399 shrq $8,%r9 | |
| 400 xorq -128(%rbp,%rcx,8),%r8 | |
| 401 shlq $56,%r10 | |
| 402 xorq (%rbp,%rcx,8),%r9 | |
| 403 roll $8,%edx | |
| 404 xorq 8(%rsi,%rax,1),%r8 | |
| 405 xorq (%rsi,%rax,1),%r9 | |
| 406 movb %dl,%al | |
| 407 xorq %r10,%r8 | |
| 408 movzwq (%r11,%r13,2),%r13 | |
| 409 movzbl %dl,%ecx | |
| 410 shlb $4,%al | |
| 411 movzbq (%rsp,%rbx,1),%r12 | |
| 412 shrl $4,%ecx | |
| 413 shlq $48,%r13 | |
| 414 xorq %r8,%r12 | |
| 415 movq %r9,%r10 | |
| 416 xorq %r13,%r9 | |
| 417 shrq $8,%r8 | |
| 418 movzbq %r12b,%r12 | |
| 419 shrq $8,%r9 | |
| 420 xorq -128(%rbp,%rbx,8),%r8 | |
| 421 shlq $56,%r10 | |
| 422 xorq (%rbp,%rbx,8),%r9 | |
| 423 roll $8,%edx | |
| 424 xorq 8(%rsi,%rax,1),%r8 | |
| 425 xorq (%rsi,%rax,1),%r9 | |
| 426 movb %dl,%al | |
| 427 xorq %r10,%r8 | |
| 428 movzwq (%r11,%r12,2),%r12 | |
| 429 movzbl %dl,%ebx | |
| 430 shlb $4,%al | |
| 431 movzbq (%rsp,%rcx,1),%r13 | |
| 432 shrl $4,%ebx | |
| 433 shlq $48,%r12 | |
| 434 xorq %r8,%r13 | |
| 435 movq %r9,%r10 | |
| 436 xorq %r12,%r9 | |
| 437 shrq $8,%r8 | |
| 438 movzbq %r13b,%r13 | |
| 439 shrq $8,%r9 | |
| 440 xorq -128(%rbp,%rcx,8),%r8 | |
| 441 shlq $56,%r10 | |
| 442 xorq (%rbp,%rcx,8),%r9 | |
| 443 roll $8,%edx | |
| 444 xorq 8(%rsi,%rax,1),%r8 | |
| 445 xorq (%rsi,%rax,1),%r9 | |
| 446 movb %dl,%al | |
| 447 xorq %r10,%r8 | |
| 448 movzwq (%r11,%r13,2),%r13 | |
| 449 movzbl %dl,%ecx | |
| 450 shlb $4,%al | |
| 451 movzbq (%rsp,%rbx,1),%r12 | |
| 452 shrl $4,%ecx | |
| 453 shlq $48,%r13 | |
| 454 xorq %r8,%r12 | |
| 455 movq %r9,%r10 | |
| 456 xorq %r13,%r9 | |
| 457 shrq $8,%r8 | |
| 458 movzbq %r12b,%r12 | |
| 459 movl 4(%rdi),%edx | |
| 460 shrq $8,%r9 | |
| 461 xorq -128(%rbp,%rbx,8),%r8 | |
| 462 shlq $56,%r10 | |
| 463 xorq (%rbp,%rbx,8),%r9 | |
| 464 roll $8,%edx | |
| 465 xorq 8(%rsi,%rax,1),%r8 | |
| 466 xorq (%rsi,%rax,1),%r9 | |
| 467 movb %dl,%al | |
| 468 xorq %r10,%r8 | |
| 469 movzwq (%r11,%r12,2),%r12 | |
| 470 movzbl %dl,%ebx | |
| 471 shlb $4,%al | |
| 472 movzbq (%rsp,%rcx,1),%r13 | |
| 473 shrl $4,%ebx | |
| 474 shlq $48,%r12 | |
| 475 xorq %r8,%r13 | |
| 476 movq %r9,%r10 | |
| 477 xorq %r12,%r9 | |
| 478 shrq $8,%r8 | |
| 479 movzbq %r13b,%r13 | |
| 480 shrq $8,%r9 | |
| 481 xorq -128(%rbp,%rcx,8),%r8 | |
| 482 shlq $56,%r10 | |
| 483 xorq (%rbp,%rcx,8),%r9 | |
| 484 roll $8,%edx | |
| 485 xorq 8(%rsi,%rax,1),%r8 | |
| 486 xorq (%rsi,%rax,1),%r9 | |
| 487 movb %dl,%al | |
| 488 xorq %r10,%r8 | |
| 489 movzwq (%r11,%r13,2),%r13 | |
| 490 movzbl %dl,%ecx | |
| 491 shlb $4,%al | |
| 492 movzbq (%rsp,%rbx,1),%r12 | |
| 493 shrl $4,%ecx | |
| 494 shlq $48,%r13 | |
| 495 xorq %r8,%r12 | |
| 496 movq %r9,%r10 | |
| 497 xorq %r13,%r9 | |
| 498 shrq $8,%r8 | |
| 499 movzbq %r12b,%r12 | |
| 500 shrq $8,%r9 | |
| 501 xorq -128(%rbp,%rbx,8),%r8 | |
| 502 shlq $56,%r10 | |
| 503 xorq (%rbp,%rbx,8),%r9 | |
| 504 roll $8,%edx | |
| 505 xorq 8(%rsi,%rax,1),%r8 | |
| 506 xorq (%rsi,%rax,1),%r9 | |
| 507 movb %dl,%al | |
| 508 xorq %r10,%r8 | |
| 509 movzwq (%r11,%r12,2),%r12 | |
| 510 movzbl %dl,%ebx | |
| 511 shlb $4,%al | |
| 512 movzbq (%rsp,%rcx,1),%r13 | |
| 513 shrl $4,%ebx | |
| 514 shlq $48,%r12 | |
| 515 xorq %r8,%r13 | |
| 516 movq %r9,%r10 | |
| 517 xorq %r12,%r9 | |
| 518 shrq $8,%r8 | |
| 519 movzbq %r13b,%r13 | |
| 520 shrq $8,%r9 | |
| 521 xorq -128(%rbp,%rcx,8),%r8 | |
| 522 shlq $56,%r10 | |
| 523 xorq (%rbp,%rcx,8),%r9 | |
| 524 roll $8,%edx | |
| 525 xorq 8(%rsi,%rax,1),%r8 | |
| 526 xorq (%rsi,%rax,1),%r9 | |
| 527 movb %dl,%al | |
| 528 xorq %r10,%r8 | |
| 529 movzwq (%r11,%r13,2),%r13 | |
| 530 movzbl %dl,%ecx | |
| 531 shlb $4,%al | |
| 532 movzbq (%rsp,%rbx,1),%r12 | |
| 533 shrl $4,%ecx | |
| 534 shlq $48,%r13 | |
| 535 xorq %r8,%r12 | |
| 536 movq %r9,%r10 | |
| 537 xorq %r13,%r9 | |
| 538 shrq $8,%r8 | |
| 539 movzbq %r12b,%r12 | |
| 540 movl 0(%rdi),%edx | |
| 541 shrq $8,%r9 | |
| 542 xorq -128(%rbp,%rbx,8),%r8 | |
| 543 shlq $56,%r10 | |
| 544 xorq (%rbp,%rbx,8),%r9 | |
| 545 roll $8,%edx | |
| 546 xorq 8(%rsi,%rax,1),%r8 | |
| 547 xorq (%rsi,%rax,1),%r9 | |
| 548 movb %dl,%al | |
| 549 xorq %r10,%r8 | |
| 550 movzwq (%r11,%r12,2),%r12 | |
| 551 movzbl %dl,%ebx | |
| 552 shlb $4,%al | |
| 553 movzbq (%rsp,%rcx,1),%r13 | |
| 554 shrl $4,%ebx | |
| 555 shlq $48,%r12 | |
| 556 xorq %r8,%r13 | |
| 557 movq %r9,%r10 | |
| 558 xorq %r12,%r9 | |
| 559 shrq $8,%r8 | |
| 560 movzbq %r13b,%r13 | |
| 561 shrq $8,%r9 | |
| 562 xorq -128(%rbp,%rcx,8),%r8 | |
| 563 shlq $56,%r10 | |
| 564 xorq (%rbp,%rcx,8),%r9 | |
| 565 roll $8,%edx | |
| 566 xorq 8(%rsi,%rax,1),%r8 | |
| 567 xorq (%rsi,%rax,1),%r9 | |
| 568 movb %dl,%al | |
| 569 xorq %r10,%r8 | |
| 570 movzwq (%r11,%r13,2),%r13 | |
| 571 movzbl %dl,%ecx | |
| 572 shlb $4,%al | |
| 573 movzbq (%rsp,%rbx,1),%r12 | |
| 574 shrl $4,%ecx | |
| 575 shlq $48,%r13 | |
| 576 xorq %r8,%r12 | |
| 577 movq %r9,%r10 | |
| 578 xorq %r13,%r9 | |
| 579 shrq $8,%r8 | |
| 580 movzbq %r12b,%r12 | |
| 581 shrq $8,%r9 | |
| 582 xorq -128(%rbp,%rbx,8),%r8 | |
| 583 shlq $56,%r10 | |
| 584 xorq (%rbp,%rbx,8),%r9 | |
| 585 roll $8,%edx | |
| 586 xorq 8(%rsi,%rax,1),%r8 | |
| 587 xorq (%rsi,%rax,1),%r9 | |
| 588 movb %dl,%al | |
| 589 xorq %r10,%r8 | |
| 590 movzwq (%r11,%r12,2),%r12 | |
| 591 movzbl %dl,%ebx | |
| 592 shlb $4,%al | |
| 593 movzbq (%rsp,%rcx,1),%r13 | |
| 594 shrl $4,%ebx | |
| 595 shlq $48,%r12 | |
| 596 xorq %r8,%r13 | |
| 597 movq %r9,%r10 | |
| 598 xorq %r12,%r9 | |
| 599 shrq $8,%r8 | |
| 600 movzbq %r13b,%r13 | |
| 601 shrq $8,%r9 | |
| 602 xorq -128(%rbp,%rcx,8),%r8 | |
| 603 shlq $56,%r10 | |
| 604 xorq (%rbp,%rcx,8),%r9 | |
| 605 roll $8,%edx | |
| 606 xorq 8(%rsi,%rax,1),%r8 | |
| 607 xorq (%rsi,%rax,1),%r9 | |
| 608 movb %dl,%al | |
| 609 xorq %r10,%r8 | |
| 610 movzwq (%r11,%r13,2),%r13 | |
| 611 movzbl %dl,%ecx | |
| 612 shlb $4,%al | |
| 613 movzbq (%rsp,%rbx,1),%r12 | |
| 614 andl $240,%ecx | |
| 615 shlq $48,%r13 | |
| 616 xorq %r8,%r12 | |
| 617 movq %r9,%r10 | |
| 618 xorq %r13,%r9 | |
| 619 shrq $8,%r8 | |
| 620 movzbq %r12b,%r12 | |
| 621 movl -4(%rdi),%edx | |
| 622 shrq $8,%r9 | |
| 623 xorq -128(%rbp,%rbx,8),%r8 | |
| 624 shlq $56,%r10 | |
| 625 xorq (%rbp,%rbx,8),%r9 | |
| 626 movzwq (%r11,%r12,2),%r12 | |
| 627 xorq 8(%rsi,%rax,1),%r8 | |
| 628 xorq (%rsi,%rax,1),%r9 | |
| 629 shlq $48,%r12 | |
| 630 xorq %r10,%r8 | |
| 631 xorq %r12,%r9 | |
| 632 movzbq %r8b,%r13 | |
| 633 shrq $4,%r8 | |
| 634 movq %r9,%r10 | |
| 635 shlb $4,%r13b | |
| 636 shrq $4,%r9 | |
| 637 xorq 8(%rsi,%rcx,1),%r8 | |
| 638 movzwq (%r11,%r13,2),%r13 | |
| 639 shlq $60,%r10 | |
| 640 xorq (%rsi,%rcx,1),%r9 | |
| 641 xorq %r10,%r8 | |
| 642 shlq $48,%r13 | |
| 643 bswapq %r8 | |
| 644 xorq %r13,%r9 | |
| 645 bswapq %r9 | |
| 646 cmpq %r15,%r14 | |
| 647 jb .Louter_loop | |
| 648 movq %r8,8(%rdi) | |
| 649 movq %r9,(%rdi) | |
| 650 | |
| 651 leaq 280(%rsp),%rsi | |
| 652 movq 0(%rsi),%r15 | |
| 653 movq 8(%rsi),%r14 | |
| 654 movq 16(%rsi),%r13 | |
| 655 movq 24(%rsi),%r12 | |
| 656 movq 32(%rsi),%rbp | |
| 657 movq 40(%rsi),%rbx | |
| 658 leaq 48(%rsi),%rsp | |
| 659 .Lghash_epilogue: | |
| 660 .byte 0xf3,0xc3 | |
| 661 .size gcm_ghash_4bit,.-gcm_ghash_4bit | |
| 662 .globl gcm_init_clmul | |
| 663 .hidden gcm_init_clmul | |
| 664 .type gcm_init_clmul,@function | |
| 665 .align 16 | |
| 666 gcm_init_clmul: | |
| 667 .L_init_clmul: | |
| 668 movdqu (%rsi),%xmm2 | |
| 669 pshufd $78,%xmm2,%xmm2 | |
| 670 | |
| 671 | |
| 672 pshufd $255,%xmm2,%xmm4 | |
| 673 movdqa %xmm2,%xmm3 | |
| 674 psllq $1,%xmm2 | |
| 675 pxor %xmm5,%xmm5 | |
| 676 psrlq $63,%xmm3 | |
| 677 pcmpgtd %xmm4,%xmm5 | |
| 678 pslldq $8,%xmm3 | |
| 679 por %xmm3,%xmm2 | |
| 680 | |
| 681 | |
| 682 pand .L0x1c2_polynomial(%rip),%xmm5 | |
| 683 pxor %xmm5,%xmm2 | |
| 684 | |
| 685 | |
| 686 pshufd $78,%xmm2,%xmm6 | |
| 687 movdqa %xmm2,%xmm0 | |
| 688 pxor %xmm2,%xmm6 | |
| 689 movdqa %xmm0,%xmm1 | |
| 690 pshufd $78,%xmm0,%xmm3 | |
| 691 pxor %xmm0,%xmm3 | |
| 692 .byte 102,15,58,68,194,0 | |
| 693 .byte 102,15,58,68,202,17 | |
| 694 .byte 102,15,58,68,222,0 | |
| 695 pxor %xmm0,%xmm3 | |
| 696 pxor %xmm1,%xmm3 | |
| 697 | |
| 698 movdqa %xmm3,%xmm4 | |
| 699 psrldq $8,%xmm3 | |
| 700 pslldq $8,%xmm4 | |
| 701 pxor %xmm3,%xmm1 | |
| 702 pxor %xmm4,%xmm0 | |
| 703 | |
| 704 movdqa %xmm0,%xmm4 | |
| 705 movdqa %xmm0,%xmm3 | |
| 706 psllq $5,%xmm0 | |
| 707 pxor %xmm0,%xmm3 | |
| 708 psllq $1,%xmm0 | |
| 709 pxor %xmm3,%xmm0 | |
| 710 psllq $57,%xmm0 | |
| 711 movdqa %xmm0,%xmm3 | |
| 712 pslldq $8,%xmm0 | |
| 713 psrldq $8,%xmm3 | |
| 714 pxor %xmm4,%xmm0 | |
| 715 pxor %xmm3,%xmm1 | |
| 716 | |
| 717 | |
| 718 movdqa %xmm0,%xmm4 | |
| 719 psrlq $1,%xmm0 | |
| 720 pxor %xmm4,%xmm1 | |
| 721 pxor %xmm0,%xmm4 | |
| 722 psrlq $5,%xmm0 | |
| 723 pxor %xmm4,%xmm0 | |
| 724 psrlq $1,%xmm0 | |
| 725 pxor %xmm1,%xmm0 | |
| 726 pshufd $78,%xmm2,%xmm3 | |
| 727 pshufd $78,%xmm0,%xmm4 | |
| 728 pxor %xmm2,%xmm3 | |
| 729 movdqu %xmm2,0(%rdi) | |
| 730 pxor %xmm0,%xmm4 | |
| 731 movdqu %xmm0,16(%rdi) | |
| 732 .byte 102,15,58,15,227,8 | |
| 733 movdqu %xmm4,32(%rdi) | |
| 734 movdqa %xmm0,%xmm1 | |
| 735 pshufd $78,%xmm0,%xmm3 | |
| 736 pxor %xmm0,%xmm3 | |
| 737 .byte 102,15,58,68,194,0 | |
| 738 .byte 102,15,58,68,202,17 | |
| 739 .byte 102,15,58,68,222,0 | |
| 740 pxor %xmm0,%xmm3 | |
| 741 pxor %xmm1,%xmm3 | |
| 742 | |
| 743 movdqa %xmm3,%xmm4 | |
| 744 psrldq $8,%xmm3 | |
| 745 pslldq $8,%xmm4 | |
| 746 pxor %xmm3,%xmm1 | |
| 747 pxor %xmm4,%xmm0 | |
| 748 | |
| 749 movdqa %xmm0,%xmm4 | |
| 750 movdqa %xmm0,%xmm3 | |
| 751 psllq $5,%xmm0 | |
| 752 pxor %xmm0,%xmm3 | |
| 753 psllq $1,%xmm0 | |
| 754 pxor %xmm3,%xmm0 | |
| 755 psllq $57,%xmm0 | |
| 756 movdqa %xmm0,%xmm3 | |
| 757 pslldq $8,%xmm0 | |
| 758 psrldq $8,%xmm3 | |
| 759 pxor %xmm4,%xmm0 | |
| 760 pxor %xmm3,%xmm1 | |
| 761 | |
| 762 | |
| 763 movdqa %xmm0,%xmm4 | |
| 764 psrlq $1,%xmm0 | |
| 765 pxor %xmm4,%xmm1 | |
| 766 pxor %xmm0,%xmm4 | |
| 767 psrlq $5,%xmm0 | |
| 768 pxor %xmm4,%xmm0 | |
| 769 psrlq $1,%xmm0 | |
| 770 pxor %xmm1,%xmm0 | |
| 771 movdqa %xmm0,%xmm5 | |
| 772 movdqa %xmm0,%xmm1 | |
| 773 pshufd $78,%xmm0,%xmm3 | |
| 774 pxor %xmm0,%xmm3 | |
| 775 .byte 102,15,58,68,194,0 | |
| 776 .byte 102,15,58,68,202,17 | |
| 777 .byte 102,15,58,68,222,0 | |
| 778 pxor %xmm0,%xmm3 | |
| 779 pxor %xmm1,%xmm3 | |
| 780 | |
| 781 movdqa %xmm3,%xmm4 | |
| 782 psrldq $8,%xmm3 | |
| 783 pslldq $8,%xmm4 | |
| 784 pxor %xmm3,%xmm1 | |
| 785 pxor %xmm4,%xmm0 | |
| 786 | |
| 787 movdqa %xmm0,%xmm4 | |
| 788 movdqa %xmm0,%xmm3 | |
| 789 psllq $5,%xmm0 | |
| 790 pxor %xmm0,%xmm3 | |
| 791 psllq $1,%xmm0 | |
| 792 pxor %xmm3,%xmm0 | |
| 793 psllq $57,%xmm0 | |
| 794 movdqa %xmm0,%xmm3 | |
| 795 pslldq $8,%xmm0 | |
| 796 psrldq $8,%xmm3 | |
| 797 pxor %xmm4,%xmm0 | |
| 798 pxor %xmm3,%xmm1 | |
| 799 | |
| 800 | |
| 801 movdqa %xmm0,%xmm4 | |
| 802 psrlq $1,%xmm0 | |
| 803 pxor %xmm4,%xmm1 | |
| 804 pxor %xmm0,%xmm4 | |
| 805 psrlq $5,%xmm0 | |
| 806 pxor %xmm4,%xmm0 | |
| 807 psrlq $1,%xmm0 | |
| 808 pxor %xmm1,%xmm0 | |
| 809 pshufd $78,%xmm5,%xmm3 | |
| 810 pshufd $78,%xmm0,%xmm4 | |
| 811 pxor %xmm5,%xmm3 | |
| 812 movdqu %xmm5,48(%rdi) | |
| 813 pxor %xmm0,%xmm4 | |
| 814 movdqu %xmm0,64(%rdi) | |
| 815 .byte 102,15,58,15,227,8 | |
| 816 movdqu %xmm4,80(%rdi) | |
| 817 .byte 0xf3,0xc3 | |
| 818 .size gcm_init_clmul,.-gcm_init_clmul | |
| 819 .globl gcm_gmult_clmul | |
| 820 .hidden gcm_gmult_clmul | |
| 821 .type gcm_gmult_clmul,@function | |
| 822 .align 16 | |
| 823 gcm_gmult_clmul: | |
| 824 .L_gmult_clmul: | |
| 825 movdqu (%rdi),%xmm0 | |
| 826 movdqa .Lbswap_mask(%rip),%xmm5 | |
| 827 movdqu (%rsi),%xmm2 | |
| 828 movdqu 32(%rsi),%xmm4 | |
| 829 .byte 102,15,56,0,197 | |
| 830 movdqa %xmm0,%xmm1 | |
| 831 pshufd $78,%xmm0,%xmm3 | |
| 832 pxor %xmm0,%xmm3 | |
| 833 .byte 102,15,58,68,194,0 | |
| 834 .byte 102,15,58,68,202,17 | |
| 835 .byte 102,15,58,68,220,0 | |
| 836 pxor %xmm0,%xmm3 | |
| 837 pxor %xmm1,%xmm3 | |
| 838 | |
| 839 movdqa %xmm3,%xmm4 | |
| 840 psrldq $8,%xmm3 | |
| 841 pslldq $8,%xmm4 | |
| 842 pxor %xmm3,%xmm1 | |
| 843 pxor %xmm4,%xmm0 | |
| 844 | |
| 845 movdqa %xmm0,%xmm4 | |
| 846 movdqa %xmm0,%xmm3 | |
| 847 psllq $5,%xmm0 | |
| 848 pxor %xmm0,%xmm3 | |
| 849 psllq $1,%xmm0 | |
| 850 pxor %xmm3,%xmm0 | |
| 851 psllq $57,%xmm0 | |
| 852 movdqa %xmm0,%xmm3 | |
| 853 pslldq $8,%xmm0 | |
| 854 psrldq $8,%xmm3 | |
| 855 pxor %xmm4,%xmm0 | |
| 856 pxor %xmm3,%xmm1 | |
| 857 | |
| 858 | |
| 859 movdqa %xmm0,%xmm4 | |
| 860 psrlq $1,%xmm0 | |
| 861 pxor %xmm4,%xmm1 | |
| 862 pxor %xmm0,%xmm4 | |
| 863 psrlq $5,%xmm0 | |
| 864 pxor %xmm4,%xmm0 | |
| 865 psrlq $1,%xmm0 | |
| 866 pxor %xmm1,%xmm0 | |
| 867 .byte 102,15,56,0,197 | |
| 868 movdqu %xmm0,(%rdi) | |
| 869 .byte 0xf3,0xc3 | |
| 870 .size gcm_gmult_clmul,.-gcm_gmult_clmul | |
| 871 .globl gcm_ghash_clmul | |
| 872 .hidden gcm_ghash_clmul | |
| 873 .type gcm_ghash_clmul,@function | |
| 874 .align 32 | |
| 875 gcm_ghash_clmul: | |
| 876 .L_ghash_clmul: | |
| 877 movdqa .Lbswap_mask(%rip),%xmm10 | |
| 878 | |
| 879 movdqu (%rdi),%xmm0 | |
| 880 movdqu (%rsi),%xmm2 | |
| 881 movdqu 32(%rsi),%xmm7 | |
| 882 .byte 102,65,15,56,0,194 | |
| 883 | |
| 884 subq $0x10,%rcx | |
| 885 jz .Lodd_tail | |
| 886 | |
| 887 movdqu 16(%rsi),%xmm6 | |
| 888 movl OPENSSL_ia32cap_P+4(%rip),%eax | |
| 889 cmpq $0x30,%rcx | |
| 890 jb .Lskip4x | |
| 891 | |
| 892 andl $71303168,%eax | |
| 893 cmpl $4194304,%eax | |
| 894 je .Lskip4x | |
| 895 | |
| 896 subq $0x30,%rcx | |
| 897 movq $0xA040608020C0E000,%rax | |
| 898 movdqu 48(%rsi),%xmm14 | |
| 899 movdqu 64(%rsi),%xmm15 | |
| 900 | |
| 901 | |
| 902 | |
| 903 | |
| 904 movdqu 48(%rdx),%xmm3 | |
| 905 movdqu 32(%rdx),%xmm11 | |
| 906 .byte 102,65,15,56,0,218 | |
| 907 .byte 102,69,15,56,0,218 | |
| 908 movdqa %xmm3,%xmm5 | |
| 909 pshufd $78,%xmm3,%xmm4 | |
| 910 pxor %xmm3,%xmm4 | |
| 911 .byte 102,15,58,68,218,0 | |
| 912 .byte 102,15,58,68,234,17 | |
| 913 .byte 102,15,58,68,231,0 | |
| 914 | |
| 915 movdqa %xmm11,%xmm13 | |
| 916 pshufd $78,%xmm11,%xmm12 | |
| 917 pxor %xmm11,%xmm12 | |
| 918 .byte 102,68,15,58,68,222,0 | |
| 919 .byte 102,68,15,58,68,238,17 | |
| 920 .byte 102,68,15,58,68,231,16 | |
| 921 xorps %xmm11,%xmm3 | |
| 922 xorps %xmm13,%xmm5 | |
| 923 movups 80(%rsi),%xmm7 | |
| 924 xorps %xmm12,%xmm4 | |
| 925 | |
| 926 movdqu 16(%rdx),%xmm11 | |
| 927 movdqu 0(%rdx),%xmm8 | |
| 928 .byte 102,69,15,56,0,218 | |
| 929 .byte 102,69,15,56,0,194 | |
| 930 movdqa %xmm11,%xmm13 | |
| 931 pshufd $78,%xmm11,%xmm12 | |
| 932 pxor %xmm8,%xmm0 | |
| 933 pxor %xmm11,%xmm12 | |
| 934 .byte 102,69,15,58,68,222,0 | |
| 935 movdqa %xmm0,%xmm1 | |
| 936 pshufd $78,%xmm0,%xmm8 | |
| 937 pxor %xmm0,%xmm8 | |
| 938 .byte 102,69,15,58,68,238,17 | |
| 939 .byte 102,68,15,58,68,231,0 | |
| 940 xorps %xmm11,%xmm3 | |
| 941 xorps %xmm13,%xmm5 | |
| 942 | |
| 943 leaq 64(%rdx),%rdx | |
| 944 subq $0x40,%rcx | |
| 945 jc .Ltail4x | |
| 946 | |
| 947 jmp .Lmod4_loop | |
| 948 .align 32 | |
| 949 .Lmod4_loop: | |
| 950 .byte 102,65,15,58,68,199,0 | |
| 951 xorps %xmm12,%xmm4 | |
| 952 movdqu 48(%rdx),%xmm11 | |
| 953 .byte 102,69,15,56,0,218 | |
| 954 .byte 102,65,15,58,68,207,17 | |
| 955 xorps %xmm3,%xmm0 | |
| 956 movdqu 32(%rdx),%xmm3 | |
| 957 movdqa %xmm11,%xmm13 | |
| 958 .byte 102,68,15,58,68,199,16 | |
| 959 pshufd $78,%xmm11,%xmm12 | |
| 960 xorps %xmm5,%xmm1 | |
| 961 pxor %xmm11,%xmm12 | |
| 962 .byte 102,65,15,56,0,218 | |
| 963 movups 32(%rsi),%xmm7 | |
| 964 xorps %xmm4,%xmm8 | |
| 965 .byte 102,68,15,58,68,218,0 | |
| 966 pshufd $78,%xmm3,%xmm4 | |
| 967 | |
| 968 pxor %xmm0,%xmm8 | |
| 969 movdqa %xmm3,%xmm5 | |
| 970 pxor %xmm1,%xmm8 | |
| 971 pxor %xmm3,%xmm4 | |
| 972 movdqa %xmm8,%xmm9 | |
| 973 .byte 102,68,15,58,68,234,17 | |
| 974 pslldq $8,%xmm8 | |
| 975 psrldq $8,%xmm9 | |
| 976 pxor %xmm8,%xmm0 | |
| 977 movdqa .L7_mask(%rip),%xmm8 | |
| 978 pxor %xmm9,%xmm1 | |
| 979 .byte 102,76,15,110,200 | |
| 980 | |
| 981 pand %xmm0,%xmm8 | |
| 982 .byte 102,69,15,56,0,200 | |
| 983 pxor %xmm0,%xmm9 | |
| 984 .byte 102,68,15,58,68,231,0 | |
| 985 psllq $57,%xmm9 | |
| 986 movdqa %xmm9,%xmm8 | |
| 987 pslldq $8,%xmm9 | |
| 988 .byte 102,15,58,68,222,0 | |
| 989 psrldq $8,%xmm8 | |
| 990 pxor %xmm9,%xmm0 | |
| 991 pxor %xmm8,%xmm1 | |
| 992 movdqu 0(%rdx),%xmm8 | |
| 993 | |
| 994 movdqa %xmm0,%xmm9 | |
| 995 psrlq $1,%xmm0 | |
| 996 .byte 102,15,58,68,238,17 | |
| 997 xorps %xmm11,%xmm3 | |
| 998 movdqu 16(%rdx),%xmm11 | |
| 999 .byte 102,69,15,56,0,218 | |
| 1000 .byte 102,15,58,68,231,16 | |
| 1001 xorps %xmm13,%xmm5 | |
| 1002 movups 80(%rsi),%xmm7 | |
| 1003 .byte 102,69,15,56,0,194 | |
| 1004 pxor %xmm9,%xmm1 | |
| 1005 pxor %xmm0,%xmm9 | |
| 1006 psrlq $5,%xmm0 | |
| 1007 | |
| 1008 movdqa %xmm11,%xmm13 | |
| 1009 pxor %xmm12,%xmm4 | |
| 1010 pshufd $78,%xmm11,%xmm12 | |
| 1011 pxor %xmm9,%xmm0 | |
| 1012 pxor %xmm8,%xmm1 | |
| 1013 pxor %xmm11,%xmm12 | |
| 1014 .byte 102,69,15,58,68,222,0 | |
| 1015 psrlq $1,%xmm0 | |
| 1016 pxor %xmm1,%xmm0 | |
| 1017 movdqa %xmm0,%xmm1 | |
| 1018 .byte 102,69,15,58,68,238,17 | |
| 1019 xorps %xmm11,%xmm3 | |
| 1020 pshufd $78,%xmm0,%xmm8 | |
| 1021 pxor %xmm0,%xmm8 | |
| 1022 | |
| 1023 .byte 102,68,15,58,68,231,0 | |
| 1024 xorps %xmm13,%xmm5 | |
| 1025 | |
| 1026 leaq 64(%rdx),%rdx | |
| 1027 subq $0x40,%rcx | |
| 1028 jnc .Lmod4_loop | |
| 1029 | |
| 1030 .Ltail4x: | |
| 1031 .byte 102,65,15,58,68,199,0 | |
| 1032 .byte 102,65,15,58,68,207,17 | |
| 1033 .byte 102,68,15,58,68,199,16 | |
| 1034 xorps %xmm12,%xmm4 | |
| 1035 xorps %xmm3,%xmm0 | |
| 1036 xorps %xmm5,%xmm1 | |
| 1037 pxor %xmm0,%xmm1 | |
| 1038 pxor %xmm4,%xmm8 | |
| 1039 | |
| 1040 pxor %xmm1,%xmm8 | |
| 1041 pxor %xmm0,%xmm1 | |
| 1042 | |
| 1043 movdqa %xmm8,%xmm9 | |
| 1044 psrldq $8,%xmm8 | |
| 1045 pslldq $8,%xmm9 | |
| 1046 pxor %xmm8,%xmm1 | |
| 1047 pxor %xmm9,%xmm0 | |
| 1048 | |
| 1049 movdqa %xmm0,%xmm4 | |
| 1050 movdqa %xmm0,%xmm3 | |
| 1051 psllq $5,%xmm0 | |
| 1052 pxor %xmm0,%xmm3 | |
| 1053 psllq $1,%xmm0 | |
| 1054 pxor %xmm3,%xmm0 | |
| 1055 psllq $57,%xmm0 | |
| 1056 movdqa %xmm0,%xmm3 | |
| 1057 pslldq $8,%xmm0 | |
| 1058 psrldq $8,%xmm3 | |
| 1059 pxor %xmm4,%xmm0 | |
| 1060 pxor %xmm3,%xmm1 | |
| 1061 | |
| 1062 | |
| 1063 movdqa %xmm0,%xmm4 | |
| 1064 psrlq $1,%xmm0 | |
| 1065 pxor %xmm4,%xmm1 | |
| 1066 pxor %xmm0,%xmm4 | |
| 1067 psrlq $5,%xmm0 | |
| 1068 pxor %xmm4,%xmm0 | |
| 1069 psrlq $1,%xmm0 | |
| 1070 pxor %xmm1,%xmm0 | |
| 1071 addq $0x40,%rcx | |
| 1072 jz .Ldone | |
| 1073 movdqu 32(%rsi),%xmm7 | |
| 1074 subq $0x10,%rcx | |
| 1075 jz .Lodd_tail | |
| 1076 .Lskip4x: | |
| 1077 | |
| 1078 | |
| 1079 | |
| 1080 | |
| 1081 | |
| 1082 movdqu (%rdx),%xmm8 | |
| 1083 movdqu 16(%rdx),%xmm3 | |
| 1084 .byte 102,69,15,56,0,194 | |
| 1085 .byte 102,65,15,56,0,218 | |
| 1086 pxor %xmm8,%xmm0 | |
| 1087 | |
| 1088 movdqa %xmm3,%xmm5 | |
| 1089 pshufd $78,%xmm3,%xmm4 | |
| 1090 pxor %xmm3,%xmm4 | |
| 1091 .byte 102,15,58,68,218,0 | |
| 1092 .byte 102,15,58,68,234,17 | |
| 1093 .byte 102,15,58,68,231,0 | |
| 1094 | |
| 1095 leaq 32(%rdx),%rdx | |
| 1096 nop | |
| 1097 subq $0x20,%rcx | |
| 1098 jbe .Leven_tail | |
| 1099 nop | |
| 1100 jmp .Lmod_loop | |
| 1101 | |
| 1102 .align 32 | |
| 1103 .Lmod_loop: | |
| 1104 movdqa %xmm0,%xmm1 | |
| 1105 movdqa %xmm4,%xmm8 | |
| 1106 pshufd $78,%xmm0,%xmm4 | |
| 1107 pxor %xmm0,%xmm4 | |
| 1108 | |
| 1109 .byte 102,15,58,68,198,0 | |
| 1110 .byte 102,15,58,68,206,17 | |
| 1111 .byte 102,15,58,68,231,16 | |
| 1112 | |
| 1113 pxor %xmm3,%xmm0 | |
| 1114 pxor %xmm5,%xmm1 | |
| 1115 movdqu (%rdx),%xmm9 | |
| 1116 pxor %xmm0,%xmm8 | |
| 1117 .byte 102,69,15,56,0,202 | |
| 1118 movdqu 16(%rdx),%xmm3 | |
| 1119 | |
| 1120 pxor %xmm1,%xmm8 | |
| 1121 pxor %xmm9,%xmm1 | |
| 1122 pxor %xmm8,%xmm4 | |
| 1123 .byte 102,65,15,56,0,218 | |
| 1124 movdqa %xmm4,%xmm8 | |
| 1125 psrldq $8,%xmm8 | |
| 1126 pslldq $8,%xmm4 | |
| 1127 pxor %xmm8,%xmm1 | |
| 1128 pxor %xmm4,%xmm0 | |
| 1129 | |
| 1130 movdqa %xmm3,%xmm5 | |
| 1131 | |
| 1132 movdqa %xmm0,%xmm9 | |
| 1133 movdqa %xmm0,%xmm8 | |
| 1134 psllq $5,%xmm0 | |
| 1135 pxor %xmm0,%xmm8 | |
| 1136 .byte 102,15,58,68,218,0 | |
| 1137 psllq $1,%xmm0 | |
| 1138 pxor %xmm8,%xmm0 | |
| 1139 psllq $57,%xmm0 | |
| 1140 movdqa %xmm0,%xmm8 | |
| 1141 pslldq $8,%xmm0 | |
| 1142 psrldq $8,%xmm8 | |
| 1143 pxor %xmm9,%xmm0 | |
| 1144 pshufd $78,%xmm5,%xmm4 | |
| 1145 pxor %xmm8,%xmm1 | |
| 1146 pxor %xmm5,%xmm4 | |
| 1147 | |
| 1148 movdqa %xmm0,%xmm9 | |
| 1149 psrlq $1,%xmm0 | |
| 1150 .byte 102,15,58,68,234,17 | |
| 1151 pxor %xmm9,%xmm1 | |
| 1152 pxor %xmm0,%xmm9 | |
| 1153 psrlq $5,%xmm0 | |
| 1154 pxor %xmm9,%xmm0 | |
| 1155 leaq 32(%rdx),%rdx | |
| 1156 psrlq $1,%xmm0 | |
| 1157 .byte 102,15,58,68,231,0 | |
| 1158 pxor %xmm1,%xmm0 | |
| 1159 | |
| 1160 subq $0x20,%rcx | |
| 1161 ja .Lmod_loop | |
| 1162 | |
| 1163 .Leven_tail: | |
| 1164 movdqa %xmm0,%xmm1 | |
| 1165 movdqa %xmm4,%xmm8 | |
| 1166 pshufd $78,%xmm0,%xmm4 | |
| 1167 pxor %xmm0,%xmm4 | |
| 1168 | |
| 1169 .byte 102,15,58,68,198,0 | |
| 1170 .byte 102,15,58,68,206,17 | |
| 1171 .byte 102,15,58,68,231,16 | |
| 1172 | |
| 1173 pxor %xmm3,%xmm0 | |
| 1174 pxor %xmm5,%xmm1 | |
| 1175 pxor %xmm0,%xmm8 | |
| 1176 pxor %xmm1,%xmm8 | |
| 1177 pxor %xmm8,%xmm4 | |
| 1178 movdqa %xmm4,%xmm8 | |
| 1179 psrldq $8,%xmm8 | |
| 1180 pslldq $8,%xmm4 | |
| 1181 pxor %xmm8,%xmm1 | |
| 1182 pxor %xmm4,%xmm0 | |
| 1183 | |
| 1184 movdqa %xmm0,%xmm4 | |
| 1185 movdqa %xmm0,%xmm3 | |
| 1186 psllq $5,%xmm0 | |
| 1187 pxor %xmm0,%xmm3 | |
| 1188 psllq $1,%xmm0 | |
| 1189 pxor %xmm3,%xmm0 | |
| 1190 psllq $57,%xmm0 | |
| 1191 movdqa %xmm0,%xmm3 | |
| 1192 pslldq $8,%xmm0 | |
| 1193 psrldq $8,%xmm3 | |
| 1194 pxor %xmm4,%xmm0 | |
| 1195 pxor %xmm3,%xmm1 | |
| 1196 | |
| 1197 | |
| 1198 movdqa %xmm0,%xmm4 | |
| 1199 psrlq $1,%xmm0 | |
| 1200 pxor %xmm4,%xmm1 | |
| 1201 pxor %xmm0,%xmm4 | |
| 1202 psrlq $5,%xmm0 | |
| 1203 pxor %xmm4,%xmm0 | |
| 1204 psrlq $1,%xmm0 | |
| 1205 pxor %xmm1,%xmm0 | |
| 1206 testq %rcx,%rcx | |
| 1207 jnz .Ldone | |
| 1208 | |
| 1209 .Lodd_tail: | |
| 1210 movdqu (%rdx),%xmm8 | |
| 1211 .byte 102,69,15,56,0,194 | |
| 1212 pxor %xmm8,%xmm0 | |
| 1213 movdqa %xmm0,%xmm1 | |
| 1214 pshufd $78,%xmm0,%xmm3 | |
| 1215 pxor %xmm0,%xmm3 | |
| 1216 .byte 102,15,58,68,194,0 | |
| 1217 .byte 102,15,58,68,202,17 | |
| 1218 .byte 102,15,58,68,223,0 | |
| 1219 pxor %xmm0,%xmm3 | |
| 1220 pxor %xmm1,%xmm3 | |
| 1221 | |
| 1222 movdqa %xmm3,%xmm4 | |
| 1223 psrldq $8,%xmm3 | |
| 1224 pslldq $8,%xmm4 | |
| 1225 pxor %xmm3,%xmm1 | |
| 1226 pxor %xmm4,%xmm0 | |
| 1227 | |
| 1228 movdqa %xmm0,%xmm4 | |
| 1229 movdqa %xmm0,%xmm3 | |
| 1230 psllq $5,%xmm0 | |
| 1231 pxor %xmm0,%xmm3 | |
| 1232 psllq $1,%xmm0 | |
| 1233 pxor %xmm3,%xmm0 | |
| 1234 psllq $57,%xmm0 | |
| 1235 movdqa %xmm0,%xmm3 | |
| 1236 pslldq $8,%xmm0 | |
| 1237 psrldq $8,%xmm3 | |
| 1238 pxor %xmm4,%xmm0 | |
| 1239 pxor %xmm3,%xmm1 | |
| 1240 | |
| 1241 | |
| 1242 movdqa %xmm0,%xmm4 | |
| 1243 psrlq $1,%xmm0 | |
| 1244 pxor %xmm4,%xmm1 | |
| 1245 pxor %xmm0,%xmm4 | |
| 1246 psrlq $5,%xmm0 | |
| 1247 pxor %xmm4,%xmm0 | |
| 1248 psrlq $1,%xmm0 | |
| 1249 pxor %xmm1,%xmm0 | |
| 1250 .Ldone: | |
| 1251 .byte 102,65,15,56,0,194 | |
| 1252 movdqu %xmm0,(%rdi) | |
| 1253 .byte 0xf3,0xc3 | |
| 1254 .size gcm_ghash_clmul,.-gcm_ghash_clmul | |
| 1255 .globl gcm_init_avx | |
| 1256 .hidden gcm_init_avx | |
| 1257 .type gcm_init_avx,@function | |
| 1258 .align 32 | |
| 1259 gcm_init_avx: | |
| 1260 jmp .L_init_clmul | |
| 1261 .size gcm_init_avx,.-gcm_init_avx | |
| 1262 .globl gcm_gmult_avx | |
| 1263 .hidden gcm_gmult_avx | |
| 1264 .type gcm_gmult_avx,@function | |
| 1265 .align 32 | |
| 1266 gcm_gmult_avx: | |
| 1267 jmp .L_gmult_clmul | |
| 1268 .size gcm_gmult_avx,.-gcm_gmult_avx | |
| 1269 .globl gcm_ghash_avx | |
| 1270 .hidden gcm_ghash_avx | |
| 1271 .type gcm_ghash_avx,@function | |
| 1272 .align 32 | |
| 1273 gcm_ghash_avx: | |
| 1274 jmp .L_ghash_clmul | |
| 1275 .size gcm_ghash_avx,.-gcm_ghash_avx | |
| 1276 .align 64 | |
| 1277 .Lbswap_mask: | |
| 1278 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 | |
| 1279 .L0x1c2_polynomial: | |
| 1280 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 | |
| 1281 .L7_mask: | |
| 1282 .long 7,0,7,0 | |
| 1283 .L7_mask_poly: | |
| 1284 .long 7,0,450,0 | |
| 1285 .align 64 | |
| 1286 .type .Lrem_4bit,@object | |
| 1287 .Lrem_4bit: | |
| 1288 .long 0,0,0,471859200,0,943718400,0,610271232 | |
| 1289 .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 | |
| 1290 .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 | |
| 1291 .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 | |
| 1292 .type .Lrem_8bit,@object | |
| 1293 .Lrem_8bit: | |
| 1294 .value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E | |
| 1295 .value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E | |
| 1296 .value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E | |
| 1297 .value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E | |
| 1298 .value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E | |
| 1299 .value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E | |
| 1300 .value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E | |
| 1301 .value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E | |
| 1302 .value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE | |
| 1303 .value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE | |
| 1304 .value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE | |
| 1305 .value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE | |
| 1306 .value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E | |
| 1307 .value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E | |
| 1308 .value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE | |
| 1309 .value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE | |
| 1310 .value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E | |
| 1311 .value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E | |
| 1312 .value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E | |
| 1313 .value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E | |
| 1314 .value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E | |
| 1315 .value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E | |
| 1316 .value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E | |
| 1317 .value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E | |
| 1318 .value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE | |
| 1319 .value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE | |
| 1320 .value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE | |
| 1321 .value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE | |
| 1322 .value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E | |
| 1323 .value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E | |
| 1324 .value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE | |
| 1325 .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE | |
| 1326 | |
| 1327 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84
,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,10
8,46,111,114,103,62,0 | |
| 1328 .align 64 | |
| 1329 #endif | |
| OLD | NEW |