OLD | NEW |
(Empty) | |
| 1 default rel |
| 2 %define XMMWORD |
| 3 %define YMMWORD |
| 4 %define ZMMWORD |
| 5 section .text code align=64 |
| 6 |
| 7 EXTERN OPENSSL_ia32cap_P |
| 8 |
| 9 global gcm_gmult_4bit |
| 10 |
| 11 ALIGN 16 |
| 12 gcm_gmult_4bit: |
| 13 mov QWORD[8+rsp],rdi ;WIN64 prologue |
| 14 mov QWORD[16+rsp],rsi |
| 15 mov rax,rsp |
| 16 $L$SEH_begin_gcm_gmult_4bit: |
| 17 mov rdi,rcx |
| 18 mov rsi,rdx |
| 19 |
| 20 |
| 21 push rbx |
| 22 push rbp |
| 23 push r12 |
| 24 $L$gmult_prologue: |
| 25 |
| 26 movzx r8,BYTE[15+rdi] |
| 27 lea r11,[$L$rem_4bit] |
| 28 xor rax,rax |
| 29 xor rbx,rbx |
| 30 mov al,r8b |
| 31 mov bl,r8b |
| 32 shl al,4 |
| 33 mov rcx,14 |
| 34 mov r8,QWORD[8+rax*1+rsi] |
| 35 mov r9,QWORD[rax*1+rsi] |
| 36 and bl,0xf0 |
| 37 mov rdx,r8 |
| 38 jmp NEAR $L$oop1 |
| 39 |
| 40 ALIGN 16 |
| 41 $L$oop1: |
| 42 shr r8,4 |
| 43 and rdx,0xf |
| 44 mov r10,r9 |
| 45 mov al,BYTE[rcx*1+rdi] |
| 46 shr r9,4 |
| 47 xor r8,QWORD[8+rbx*1+rsi] |
| 48 shl r10,60 |
| 49 xor r9,QWORD[rbx*1+rsi] |
| 50 mov bl,al |
| 51 xor r9,QWORD[rdx*8+r11] |
| 52 mov rdx,r8 |
| 53 shl al,4 |
| 54 xor r8,r10 |
| 55 dec rcx |
| 56 js NEAR $L$break1 |
| 57 |
| 58 shr r8,4 |
| 59 and rdx,0xf |
| 60 mov r10,r9 |
| 61 shr r9,4 |
| 62 xor r8,QWORD[8+rax*1+rsi] |
| 63 shl r10,60 |
| 64 xor r9,QWORD[rax*1+rsi] |
| 65 and bl,0xf0 |
| 66 xor r9,QWORD[rdx*8+r11] |
| 67 mov rdx,r8 |
| 68 xor r8,r10 |
| 69 jmp NEAR $L$oop1 |
| 70 |
| 71 ALIGN 16 |
| 72 $L$break1: |
| 73 shr r8,4 |
| 74 and rdx,0xf |
| 75 mov r10,r9 |
| 76 shr r9,4 |
| 77 xor r8,QWORD[8+rax*1+rsi] |
| 78 shl r10,60 |
| 79 xor r9,QWORD[rax*1+rsi] |
| 80 and bl,0xf0 |
| 81 xor r9,QWORD[rdx*8+r11] |
| 82 mov rdx,r8 |
| 83 xor r8,r10 |
| 84 |
| 85 shr r8,4 |
| 86 and rdx,0xf |
| 87 mov r10,r9 |
| 88 shr r9,4 |
| 89 xor r8,QWORD[8+rbx*1+rsi] |
| 90 shl r10,60 |
| 91 xor r9,QWORD[rbx*1+rsi] |
| 92 xor r8,r10 |
| 93 xor r9,QWORD[rdx*8+r11] |
| 94 |
| 95 bswap r8 |
| 96 bswap r9 |
| 97 mov QWORD[8+rdi],r8 |
| 98 mov QWORD[rdi],r9 |
| 99 |
| 100 mov rbx,QWORD[16+rsp] |
| 101 lea rsp,[24+rsp] |
| 102 $L$gmult_epilogue: |
| 103 mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
| 104 mov rsi,QWORD[16+rsp] |
| 105 DB 0F3h,0C3h ;repret |
| 106 $L$SEH_end_gcm_gmult_4bit: |
| 107 global gcm_ghash_4bit |
| 108 |
| 109 ALIGN 16 |
| 110 gcm_ghash_4bit: |
| 111 mov QWORD[8+rsp],rdi ;WIN64 prologue |
| 112 mov QWORD[16+rsp],rsi |
| 113 mov rax,rsp |
| 114 $L$SEH_begin_gcm_ghash_4bit: |
| 115 mov rdi,rcx |
| 116 mov rsi,rdx |
| 117 mov rdx,r8 |
| 118 mov rcx,r9 |
| 119 |
| 120 |
| 121 push rbx |
| 122 push rbp |
| 123 push r12 |
| 124 push r13 |
| 125 push r14 |
| 126 push r15 |
| 127 sub rsp,280 |
| 128 $L$ghash_prologue: |
| 129 mov r14,rdx |
| 130 mov r15,rcx |
| 131 sub rsi,-128 |
| 132 lea rbp,[((16+128))+rsp] |
| 133 xor edx,edx |
| 134 mov r8,QWORD[((0+0-128))+rsi] |
| 135 mov rax,QWORD[((0+8-128))+rsi] |
| 136 mov dl,al |
| 137 shr rax,4 |
| 138 mov r10,r8 |
| 139 shr r8,4 |
| 140 mov r9,QWORD[((16+0-128))+rsi] |
| 141 shl dl,4 |
| 142 mov rbx,QWORD[((16+8-128))+rsi] |
| 143 shl r10,60 |
| 144 mov BYTE[rsp],dl |
| 145 or rax,r10 |
| 146 mov dl,bl |
| 147 shr rbx,4 |
| 148 mov r10,r9 |
| 149 shr r9,4 |
| 150 mov QWORD[rbp],r8 |
| 151 mov r8,QWORD[((32+0-128))+rsi] |
| 152 shl dl,4 |
| 153 mov QWORD[((0-128))+rbp],rax |
| 154 mov rax,QWORD[((32+8-128))+rsi] |
| 155 shl r10,60 |
| 156 mov BYTE[1+rsp],dl |
| 157 or rbx,r10 |
| 158 mov dl,al |
| 159 shr rax,4 |
| 160 mov r10,r8 |
| 161 shr r8,4 |
| 162 mov QWORD[8+rbp],r9 |
| 163 mov r9,QWORD[((48+0-128))+rsi] |
| 164 shl dl,4 |
| 165 mov QWORD[((8-128))+rbp],rbx |
| 166 mov rbx,QWORD[((48+8-128))+rsi] |
| 167 shl r10,60 |
| 168 mov BYTE[2+rsp],dl |
| 169 or rax,r10 |
| 170 mov dl,bl |
| 171 shr rbx,4 |
| 172 mov r10,r9 |
| 173 shr r9,4 |
| 174 mov QWORD[16+rbp],r8 |
| 175 mov r8,QWORD[((64+0-128))+rsi] |
| 176 shl dl,4 |
| 177 mov QWORD[((16-128))+rbp],rax |
| 178 mov rax,QWORD[((64+8-128))+rsi] |
| 179 shl r10,60 |
| 180 mov BYTE[3+rsp],dl |
| 181 or rbx,r10 |
| 182 mov dl,al |
| 183 shr rax,4 |
| 184 mov r10,r8 |
| 185 shr r8,4 |
| 186 mov QWORD[24+rbp],r9 |
| 187 mov r9,QWORD[((80+0-128))+rsi] |
| 188 shl dl,4 |
| 189 mov QWORD[((24-128))+rbp],rbx |
| 190 mov rbx,QWORD[((80+8-128))+rsi] |
| 191 shl r10,60 |
| 192 mov BYTE[4+rsp],dl |
| 193 or rax,r10 |
| 194 mov dl,bl |
| 195 shr rbx,4 |
| 196 mov r10,r9 |
| 197 shr r9,4 |
| 198 mov QWORD[32+rbp],r8 |
| 199 mov r8,QWORD[((96+0-128))+rsi] |
| 200 shl dl,4 |
| 201 mov QWORD[((32-128))+rbp],rax |
| 202 mov rax,QWORD[((96+8-128))+rsi] |
| 203 shl r10,60 |
| 204 mov BYTE[5+rsp],dl |
| 205 or rbx,r10 |
| 206 mov dl,al |
| 207 shr rax,4 |
| 208 mov r10,r8 |
| 209 shr r8,4 |
| 210 mov QWORD[40+rbp],r9 |
| 211 mov r9,QWORD[((112+0-128))+rsi] |
| 212 shl dl,4 |
| 213 mov QWORD[((40-128))+rbp],rbx |
| 214 mov rbx,QWORD[((112+8-128))+rsi] |
| 215 shl r10,60 |
| 216 mov BYTE[6+rsp],dl |
| 217 or rax,r10 |
| 218 mov dl,bl |
| 219 shr rbx,4 |
| 220 mov r10,r9 |
| 221 shr r9,4 |
| 222 mov QWORD[48+rbp],r8 |
| 223 mov r8,QWORD[((128+0-128))+rsi] |
| 224 shl dl,4 |
| 225 mov QWORD[((48-128))+rbp],rax |
| 226 mov rax,QWORD[((128+8-128))+rsi] |
| 227 shl r10,60 |
| 228 mov BYTE[7+rsp],dl |
| 229 or rbx,r10 |
| 230 mov dl,al |
| 231 shr rax,4 |
| 232 mov r10,r8 |
| 233 shr r8,4 |
| 234 mov QWORD[56+rbp],r9 |
| 235 mov r9,QWORD[((144+0-128))+rsi] |
| 236 shl dl,4 |
| 237 mov QWORD[((56-128))+rbp],rbx |
| 238 mov rbx,QWORD[((144+8-128))+rsi] |
| 239 shl r10,60 |
| 240 mov BYTE[8+rsp],dl |
| 241 or rax,r10 |
| 242 mov dl,bl |
| 243 shr rbx,4 |
| 244 mov r10,r9 |
| 245 shr r9,4 |
| 246 mov QWORD[64+rbp],r8 |
| 247 mov r8,QWORD[((160+0-128))+rsi] |
| 248 shl dl,4 |
| 249 mov QWORD[((64-128))+rbp],rax |
| 250 mov rax,QWORD[((160+8-128))+rsi] |
| 251 shl r10,60 |
| 252 mov BYTE[9+rsp],dl |
| 253 or rbx,r10 |
| 254 mov dl,al |
| 255 shr rax,4 |
| 256 mov r10,r8 |
| 257 shr r8,4 |
| 258 mov QWORD[72+rbp],r9 |
| 259 mov r9,QWORD[((176+0-128))+rsi] |
| 260 shl dl,4 |
| 261 mov QWORD[((72-128))+rbp],rbx |
| 262 mov rbx,QWORD[((176+8-128))+rsi] |
| 263 shl r10,60 |
| 264 mov BYTE[10+rsp],dl |
| 265 or rax,r10 |
| 266 mov dl,bl |
| 267 shr rbx,4 |
| 268 mov r10,r9 |
| 269 shr r9,4 |
| 270 mov QWORD[80+rbp],r8 |
| 271 mov r8,QWORD[((192+0-128))+rsi] |
| 272 shl dl,4 |
| 273 mov QWORD[((80-128))+rbp],rax |
| 274 mov rax,QWORD[((192+8-128))+rsi] |
| 275 shl r10,60 |
| 276 mov BYTE[11+rsp],dl |
| 277 or rbx,r10 |
| 278 mov dl,al |
| 279 shr rax,4 |
| 280 mov r10,r8 |
| 281 shr r8,4 |
| 282 mov QWORD[88+rbp],r9 |
| 283 mov r9,QWORD[((208+0-128))+rsi] |
| 284 shl dl,4 |
| 285 mov QWORD[((88-128))+rbp],rbx |
| 286 mov rbx,QWORD[((208+8-128))+rsi] |
| 287 shl r10,60 |
| 288 mov BYTE[12+rsp],dl |
| 289 or rax,r10 |
| 290 mov dl,bl |
| 291 shr rbx,4 |
| 292 mov r10,r9 |
| 293 shr r9,4 |
| 294 mov QWORD[96+rbp],r8 |
| 295 mov r8,QWORD[((224+0-128))+rsi] |
| 296 shl dl,4 |
| 297 mov QWORD[((96-128))+rbp],rax |
| 298 mov rax,QWORD[((224+8-128))+rsi] |
| 299 shl r10,60 |
| 300 mov BYTE[13+rsp],dl |
| 301 or rbx,r10 |
| 302 mov dl,al |
| 303 shr rax,4 |
| 304 mov r10,r8 |
| 305 shr r8,4 |
| 306 mov QWORD[104+rbp],r9 |
| 307 mov r9,QWORD[((240+0-128))+rsi] |
| 308 shl dl,4 |
| 309 mov QWORD[((104-128))+rbp],rbx |
| 310 mov rbx,QWORD[((240+8-128))+rsi] |
| 311 shl r10,60 |
| 312 mov BYTE[14+rsp],dl |
| 313 or rax,r10 |
| 314 mov dl,bl |
| 315 shr rbx,4 |
| 316 mov r10,r9 |
| 317 shr r9,4 |
| 318 mov QWORD[112+rbp],r8 |
| 319 shl dl,4 |
| 320 mov QWORD[((112-128))+rbp],rax |
| 321 shl r10,60 |
| 322 mov BYTE[15+rsp],dl |
| 323 or rbx,r10 |
| 324 mov QWORD[120+rbp],r9 |
| 325 mov QWORD[((120-128))+rbp],rbx |
| 326 add rsi,-128 |
| 327 mov r8,QWORD[8+rdi] |
| 328 mov r9,QWORD[rdi] |
| 329 add r15,r14 |
| 330 lea r11,[$L$rem_8bit] |
| 331 jmp NEAR $L$outer_loop |
| 332 ALIGN 16 |
| 333 $L$outer_loop: |
| 334 xor r9,QWORD[r14] |
| 335 mov rdx,QWORD[8+r14] |
| 336 lea r14,[16+r14] |
| 337 xor rdx,r8 |
| 338 mov QWORD[rdi],r9 |
| 339 mov QWORD[8+rdi],rdx |
| 340 shr rdx,32 |
| 341 xor rax,rax |
| 342 rol edx,8 |
| 343 mov al,dl |
| 344 movzx ebx,dl |
| 345 shl al,4 |
| 346 shr ebx,4 |
| 347 rol edx,8 |
| 348 mov r8,QWORD[8+rax*1+rsi] |
| 349 mov r9,QWORD[rax*1+rsi] |
| 350 mov al,dl |
| 351 movzx ecx,dl |
| 352 shl al,4 |
| 353 movzx r12,BYTE[rbx*1+rsp] |
| 354 shr ecx,4 |
| 355 xor r12,r8 |
| 356 mov r10,r9 |
| 357 shr r8,8 |
| 358 movzx r12,r12b |
| 359 shr r9,8 |
| 360 xor r8,QWORD[((-128))+rbx*8+rbp] |
| 361 shl r10,56 |
| 362 xor r9,QWORD[rbx*8+rbp] |
| 363 rol edx,8 |
| 364 xor r8,QWORD[8+rax*1+rsi] |
| 365 xor r9,QWORD[rax*1+rsi] |
| 366 mov al,dl |
| 367 xor r8,r10 |
| 368 movzx r12,WORD[r12*2+r11] |
| 369 movzx ebx,dl |
| 370 shl al,4 |
| 371 movzx r13,BYTE[rcx*1+rsp] |
| 372 shr ebx,4 |
| 373 shl r12,48 |
| 374 xor r13,r8 |
| 375 mov r10,r9 |
| 376 xor r9,r12 |
| 377 shr r8,8 |
| 378 movzx r13,r13b |
| 379 shr r9,8 |
| 380 xor r8,QWORD[((-128))+rcx*8+rbp] |
| 381 shl r10,56 |
| 382 xor r9,QWORD[rcx*8+rbp] |
| 383 rol edx,8 |
| 384 xor r8,QWORD[8+rax*1+rsi] |
| 385 xor r9,QWORD[rax*1+rsi] |
| 386 mov al,dl |
| 387 xor r8,r10 |
| 388 movzx r13,WORD[r13*2+r11] |
| 389 movzx ecx,dl |
| 390 shl al,4 |
| 391 movzx r12,BYTE[rbx*1+rsp] |
| 392 shr ecx,4 |
| 393 shl r13,48 |
| 394 xor r12,r8 |
| 395 mov r10,r9 |
| 396 xor r9,r13 |
| 397 shr r8,8 |
| 398 movzx r12,r12b |
| 399 mov edx,DWORD[8+rdi] |
| 400 shr r9,8 |
| 401 xor r8,QWORD[((-128))+rbx*8+rbp] |
| 402 shl r10,56 |
| 403 xor r9,QWORD[rbx*8+rbp] |
| 404 rol edx,8 |
| 405 xor r8,QWORD[8+rax*1+rsi] |
| 406 xor r9,QWORD[rax*1+rsi] |
| 407 mov al,dl |
| 408 xor r8,r10 |
| 409 movzx r12,WORD[r12*2+r11] |
| 410 movzx ebx,dl |
| 411 shl al,4 |
| 412 movzx r13,BYTE[rcx*1+rsp] |
| 413 shr ebx,4 |
| 414 shl r12,48 |
| 415 xor r13,r8 |
| 416 mov r10,r9 |
| 417 xor r9,r12 |
| 418 shr r8,8 |
| 419 movzx r13,r13b |
| 420 shr r9,8 |
| 421 xor r8,QWORD[((-128))+rcx*8+rbp] |
| 422 shl r10,56 |
| 423 xor r9,QWORD[rcx*8+rbp] |
| 424 rol edx,8 |
| 425 xor r8,QWORD[8+rax*1+rsi] |
| 426 xor r9,QWORD[rax*1+rsi] |
| 427 mov al,dl |
| 428 xor r8,r10 |
| 429 movzx r13,WORD[r13*2+r11] |
| 430 movzx ecx,dl |
| 431 shl al,4 |
| 432 movzx r12,BYTE[rbx*1+rsp] |
| 433 shr ecx,4 |
| 434 shl r13,48 |
| 435 xor r12,r8 |
| 436 mov r10,r9 |
| 437 xor r9,r13 |
| 438 shr r8,8 |
| 439 movzx r12,r12b |
| 440 shr r9,8 |
| 441 xor r8,QWORD[((-128))+rbx*8+rbp] |
| 442 shl r10,56 |
| 443 xor r9,QWORD[rbx*8+rbp] |
| 444 rol edx,8 |
| 445 xor r8,QWORD[8+rax*1+rsi] |
| 446 xor r9,QWORD[rax*1+rsi] |
| 447 mov al,dl |
| 448 xor r8,r10 |
| 449 movzx r12,WORD[r12*2+r11] |
| 450 movzx ebx,dl |
| 451 shl al,4 |
| 452 movzx r13,BYTE[rcx*1+rsp] |
| 453 shr ebx,4 |
| 454 shl r12,48 |
| 455 xor r13,r8 |
| 456 mov r10,r9 |
| 457 xor r9,r12 |
| 458 shr r8,8 |
| 459 movzx r13,r13b |
| 460 shr r9,8 |
| 461 xor r8,QWORD[((-128))+rcx*8+rbp] |
| 462 shl r10,56 |
| 463 xor r9,QWORD[rcx*8+rbp] |
| 464 rol edx,8 |
| 465 xor r8,QWORD[8+rax*1+rsi] |
| 466 xor r9,QWORD[rax*1+rsi] |
| 467 mov al,dl |
| 468 xor r8,r10 |
| 469 movzx r13,WORD[r13*2+r11] |
| 470 movzx ecx,dl |
| 471 shl al,4 |
| 472 movzx r12,BYTE[rbx*1+rsp] |
| 473 shr ecx,4 |
| 474 shl r13,48 |
| 475 xor r12,r8 |
| 476 mov r10,r9 |
| 477 xor r9,r13 |
| 478 shr r8,8 |
| 479 movzx r12,r12b |
| 480 mov edx,DWORD[4+rdi] |
| 481 shr r9,8 |
| 482 xor r8,QWORD[((-128))+rbx*8+rbp] |
| 483 shl r10,56 |
| 484 xor r9,QWORD[rbx*8+rbp] |
| 485 rol edx,8 |
| 486 xor r8,QWORD[8+rax*1+rsi] |
| 487 xor r9,QWORD[rax*1+rsi] |
| 488 mov al,dl |
| 489 xor r8,r10 |
| 490 movzx r12,WORD[r12*2+r11] |
| 491 movzx ebx,dl |
| 492 shl al,4 |
| 493 movzx r13,BYTE[rcx*1+rsp] |
| 494 shr ebx,4 |
| 495 shl r12,48 |
| 496 xor r13,r8 |
| 497 mov r10,r9 |
| 498 xor r9,r12 |
| 499 shr r8,8 |
| 500 movzx r13,r13b |
| 501 shr r9,8 |
| 502 xor r8,QWORD[((-128))+rcx*8+rbp] |
| 503 shl r10,56 |
| 504 xor r9,QWORD[rcx*8+rbp] |
| 505 rol edx,8 |
| 506 xor r8,QWORD[8+rax*1+rsi] |
| 507 xor r9,QWORD[rax*1+rsi] |
| 508 mov al,dl |
| 509 xor r8,r10 |
| 510 movzx r13,WORD[r13*2+r11] |
| 511 movzx ecx,dl |
| 512 shl al,4 |
| 513 movzx r12,BYTE[rbx*1+rsp] |
| 514 shr ecx,4 |
| 515 shl r13,48 |
| 516 xor r12,r8 |
| 517 mov r10,r9 |
| 518 xor r9,r13 |
| 519 shr r8,8 |
| 520 movzx r12,r12b |
| 521 shr r9,8 |
| 522 xor r8,QWORD[((-128))+rbx*8+rbp] |
| 523 shl r10,56 |
| 524 xor r9,QWORD[rbx*8+rbp] |
| 525 rol edx,8 |
| 526 xor r8,QWORD[8+rax*1+rsi] |
| 527 xor r9,QWORD[rax*1+rsi] |
| 528 mov al,dl |
| 529 xor r8,r10 |
| 530 movzx r12,WORD[r12*2+r11] |
| 531 movzx ebx,dl |
| 532 shl al,4 |
| 533 movzx r13,BYTE[rcx*1+rsp] |
| 534 shr ebx,4 |
| 535 shl r12,48 |
| 536 xor r13,r8 |
| 537 mov r10,r9 |
| 538 xor r9,r12 |
| 539 shr r8,8 |
| 540 movzx r13,r13b |
| 541 shr r9,8 |
| 542 xor r8,QWORD[((-128))+rcx*8+rbp] |
| 543 shl r10,56 |
| 544 xor r9,QWORD[rcx*8+rbp] |
| 545 rol edx,8 |
| 546 xor r8,QWORD[8+rax*1+rsi] |
| 547 xor r9,QWORD[rax*1+rsi] |
| 548 mov al,dl |
| 549 xor r8,r10 |
| 550 movzx r13,WORD[r13*2+r11] |
| 551 movzx ecx,dl |
| 552 shl al,4 |
| 553 movzx r12,BYTE[rbx*1+rsp] |
| 554 shr ecx,4 |
| 555 shl r13,48 |
| 556 xor r12,r8 |
| 557 mov r10,r9 |
| 558 xor r9,r13 |
| 559 shr r8,8 |
| 560 movzx r12,r12b |
| 561 mov edx,DWORD[rdi] |
| 562 shr r9,8 |
| 563 xor r8,QWORD[((-128))+rbx*8+rbp] |
| 564 shl r10,56 |
| 565 xor r9,QWORD[rbx*8+rbp] |
| 566 rol edx,8 |
| 567 xor r8,QWORD[8+rax*1+rsi] |
| 568 xor r9,QWORD[rax*1+rsi] |
| 569 mov al,dl |
| 570 xor r8,r10 |
| 571 movzx r12,WORD[r12*2+r11] |
| 572 movzx ebx,dl |
| 573 shl al,4 |
| 574 movzx r13,BYTE[rcx*1+rsp] |
| 575 shr ebx,4 |
| 576 shl r12,48 |
| 577 xor r13,r8 |
| 578 mov r10,r9 |
| 579 xor r9,r12 |
| 580 shr r8,8 |
| 581 movzx r13,r13b |
| 582 shr r9,8 |
| 583 xor r8,QWORD[((-128))+rcx*8+rbp] |
| 584 shl r10,56 |
| 585 xor r9,QWORD[rcx*8+rbp] |
| 586 rol edx,8 |
| 587 xor r8,QWORD[8+rax*1+rsi] |
| 588 xor r9,QWORD[rax*1+rsi] |
| 589 mov al,dl |
| 590 xor r8,r10 |
| 591 movzx r13,WORD[r13*2+r11] |
| 592 movzx ecx,dl |
| 593 shl al,4 |
| 594 movzx r12,BYTE[rbx*1+rsp] |
| 595 shr ecx,4 |
| 596 shl r13,48 |
| 597 xor r12,r8 |
| 598 mov r10,r9 |
| 599 xor r9,r13 |
| 600 shr r8,8 |
| 601 movzx r12,r12b |
| 602 shr r9,8 |
| 603 xor r8,QWORD[((-128))+rbx*8+rbp] |
| 604 shl r10,56 |
| 605 xor r9,QWORD[rbx*8+rbp] |
| 606 rol edx,8 |
| 607 xor r8,QWORD[8+rax*1+rsi] |
| 608 xor r9,QWORD[rax*1+rsi] |
| 609 mov al,dl |
| 610 xor r8,r10 |
| 611 movzx r12,WORD[r12*2+r11] |
| 612 movzx ebx,dl |
| 613 shl al,4 |
| 614 movzx r13,BYTE[rcx*1+rsp] |
| 615 shr ebx,4 |
| 616 shl r12,48 |
| 617 xor r13,r8 |
| 618 mov r10,r9 |
| 619 xor r9,r12 |
| 620 shr r8,8 |
| 621 movzx r13,r13b |
| 622 shr r9,8 |
| 623 xor r8,QWORD[((-128))+rcx*8+rbp] |
| 624 shl r10,56 |
| 625 xor r9,QWORD[rcx*8+rbp] |
| 626 rol edx,8 |
| 627 xor r8,QWORD[8+rax*1+rsi] |
| 628 xor r9,QWORD[rax*1+rsi] |
| 629 mov al,dl |
| 630 xor r8,r10 |
| 631 movzx r13,WORD[r13*2+r11] |
| 632 movzx ecx,dl |
| 633 shl al,4 |
| 634 movzx r12,BYTE[rbx*1+rsp] |
| 635 and ecx,240 |
| 636 shl r13,48 |
| 637 xor r12,r8 |
| 638 mov r10,r9 |
| 639 xor r9,r13 |
| 640 shr r8,8 |
| 641 movzx r12,r12b |
| 642 mov edx,DWORD[((-4))+rdi] |
| 643 shr r9,8 |
| 644 xor r8,QWORD[((-128))+rbx*8+rbp] |
| 645 shl r10,56 |
| 646 xor r9,QWORD[rbx*8+rbp] |
| 647 movzx r12,WORD[r12*2+r11] |
| 648 xor r8,QWORD[8+rax*1+rsi] |
| 649 xor r9,QWORD[rax*1+rsi] |
| 650 shl r12,48 |
| 651 xor r8,r10 |
| 652 xor r9,r12 |
| 653 movzx r13,r8b |
| 654 shr r8,4 |
| 655 mov r10,r9 |
| 656 shl r13b,4 |
| 657 shr r9,4 |
| 658 xor r8,QWORD[8+rcx*1+rsi] |
| 659 movzx r13,WORD[r13*2+r11] |
| 660 shl r10,60 |
| 661 xor r9,QWORD[rcx*1+rsi] |
| 662 xor r8,r10 |
| 663 shl r13,48 |
| 664 bswap r8 |
| 665 xor r9,r13 |
| 666 bswap r9 |
| 667 cmp r14,r15 |
| 668 jb NEAR $L$outer_loop |
| 669 mov QWORD[8+rdi],r8 |
| 670 mov QWORD[rdi],r9 |
| 671 |
| 672 lea rsi,[280+rsp] |
| 673 mov r15,QWORD[rsi] |
| 674 mov r14,QWORD[8+rsi] |
| 675 mov r13,QWORD[16+rsi] |
| 676 mov r12,QWORD[24+rsi] |
| 677 mov rbp,QWORD[32+rsi] |
| 678 mov rbx,QWORD[40+rsi] |
| 679 lea rsp,[48+rsi] |
| 680 $L$ghash_epilogue: |
| 681 mov rdi,QWORD[8+rsp] ;WIN64 epilogue |
| 682 mov rsi,QWORD[16+rsp] |
| 683 DB 0F3h,0C3h ;repret |
| 684 $L$SEH_end_gcm_ghash_4bit: |
| 685 global gcm_init_clmul |
| 686 |
| 687 ALIGN 16 |
| 688 gcm_init_clmul: |
| 689 $L$_init_clmul: |
| 690 $L$SEH_begin_gcm_init_clmul: |
| 691 |
| 692 DB 0x48,0x83,0xec,0x18 |
| 693 DB 0x0f,0x29,0x34,0x24 |
| 694 movdqu xmm2,XMMWORD[rdx] |
| 695 pshufd xmm2,xmm2,78 |
| 696 |
| 697 |
| 698 pshufd xmm4,xmm2,255 |
| 699 movdqa xmm3,xmm2 |
| 700 psllq xmm2,1 |
| 701 pxor xmm5,xmm5 |
| 702 psrlq xmm3,63 |
| 703 pcmpgtd xmm5,xmm4 |
| 704 pslldq xmm3,8 |
| 705 por xmm2,xmm3 |
| 706 |
| 707 |
| 708 pand xmm5,XMMWORD[$L$0x1c2_polynomial] |
| 709 pxor xmm2,xmm5 |
| 710 |
| 711 |
| 712 pshufd xmm6,xmm2,78 |
| 713 movdqa xmm0,xmm2 |
| 714 pxor xmm6,xmm2 |
| 715 movdqa xmm1,xmm0 |
| 716 pshufd xmm3,xmm0,78 |
| 717 pxor xmm3,xmm0 |
| 718 DB 102,15,58,68,194,0 |
| 719 DB 102,15,58,68,202,17 |
| 720 DB 102,15,58,68,222,0 |
| 721 pxor xmm3,xmm0 |
| 722 pxor xmm3,xmm1 |
| 723 |
| 724 movdqa xmm4,xmm3 |
| 725 psrldq xmm3,8 |
| 726 pslldq xmm4,8 |
| 727 pxor xmm1,xmm3 |
| 728 pxor xmm0,xmm4 |
| 729 |
| 730 movdqa xmm4,xmm0 |
| 731 movdqa xmm3,xmm0 |
| 732 psllq xmm0,5 |
| 733 pxor xmm3,xmm0 |
| 734 psllq xmm0,1 |
| 735 pxor xmm0,xmm3 |
| 736 psllq xmm0,57 |
| 737 movdqa xmm3,xmm0 |
| 738 pslldq xmm0,8 |
| 739 psrldq xmm3,8 |
| 740 pxor xmm0,xmm4 |
| 741 pxor xmm1,xmm3 |
| 742 |
| 743 |
| 744 movdqa xmm4,xmm0 |
| 745 psrlq xmm0,1 |
| 746 pxor xmm1,xmm4 |
| 747 pxor xmm4,xmm0 |
| 748 psrlq xmm0,5 |
| 749 pxor xmm0,xmm4 |
| 750 psrlq xmm0,1 |
| 751 pxor xmm0,xmm1 |
| 752 pshufd xmm3,xmm2,78 |
| 753 pshufd xmm4,xmm0,78 |
| 754 pxor xmm3,xmm2 |
| 755 movdqu XMMWORD[rcx],xmm2 |
| 756 pxor xmm4,xmm0 |
| 757 movdqu XMMWORD[16+rcx],xmm0 |
| 758 DB 102,15,58,15,227,8 |
| 759 movdqu XMMWORD[32+rcx],xmm4 |
| 760 movdqa xmm1,xmm0 |
| 761 pshufd xmm3,xmm0,78 |
| 762 pxor xmm3,xmm0 |
| 763 DB 102,15,58,68,194,0 |
| 764 DB 102,15,58,68,202,17 |
| 765 DB 102,15,58,68,222,0 |
| 766 pxor xmm3,xmm0 |
| 767 pxor xmm3,xmm1 |
| 768 |
| 769 movdqa xmm4,xmm3 |
| 770 psrldq xmm3,8 |
| 771 pslldq xmm4,8 |
| 772 pxor xmm1,xmm3 |
| 773 pxor xmm0,xmm4 |
| 774 |
| 775 movdqa xmm4,xmm0 |
| 776 movdqa xmm3,xmm0 |
| 777 psllq xmm0,5 |
| 778 pxor xmm3,xmm0 |
| 779 psllq xmm0,1 |
| 780 pxor xmm0,xmm3 |
| 781 psllq xmm0,57 |
| 782 movdqa xmm3,xmm0 |
| 783 pslldq xmm0,8 |
| 784 psrldq xmm3,8 |
| 785 pxor xmm0,xmm4 |
| 786 pxor xmm1,xmm3 |
| 787 |
| 788 |
| 789 movdqa xmm4,xmm0 |
| 790 psrlq xmm0,1 |
| 791 pxor xmm1,xmm4 |
| 792 pxor xmm4,xmm0 |
| 793 psrlq xmm0,5 |
| 794 pxor xmm0,xmm4 |
| 795 psrlq xmm0,1 |
| 796 pxor xmm0,xmm1 |
| 797 movdqa xmm5,xmm0 |
| 798 movdqa xmm1,xmm0 |
| 799 pshufd xmm3,xmm0,78 |
| 800 pxor xmm3,xmm0 |
| 801 DB 102,15,58,68,194,0 |
| 802 DB 102,15,58,68,202,17 |
| 803 DB 102,15,58,68,222,0 |
| 804 pxor xmm3,xmm0 |
| 805 pxor xmm3,xmm1 |
| 806 |
| 807 movdqa xmm4,xmm3 |
| 808 psrldq xmm3,8 |
| 809 pslldq xmm4,8 |
| 810 pxor xmm1,xmm3 |
| 811 pxor xmm0,xmm4 |
| 812 |
| 813 movdqa xmm4,xmm0 |
| 814 movdqa xmm3,xmm0 |
| 815 psllq xmm0,5 |
| 816 pxor xmm3,xmm0 |
| 817 psllq xmm0,1 |
| 818 pxor xmm0,xmm3 |
| 819 psllq xmm0,57 |
| 820 movdqa xmm3,xmm0 |
| 821 pslldq xmm0,8 |
| 822 psrldq xmm3,8 |
| 823 pxor xmm0,xmm4 |
| 824 pxor xmm1,xmm3 |
| 825 |
| 826 |
| 827 movdqa xmm4,xmm0 |
| 828 psrlq xmm0,1 |
| 829 pxor xmm1,xmm4 |
| 830 pxor xmm4,xmm0 |
| 831 psrlq xmm0,5 |
| 832 pxor xmm0,xmm4 |
| 833 psrlq xmm0,1 |
| 834 pxor xmm0,xmm1 |
| 835 pshufd xmm3,xmm5,78 |
| 836 pshufd xmm4,xmm0,78 |
| 837 pxor xmm3,xmm5 |
| 838 movdqu XMMWORD[48+rcx],xmm5 |
| 839 pxor xmm4,xmm0 |
| 840 movdqu XMMWORD[64+rcx],xmm0 |
| 841 DB 102,15,58,15,227,8 |
| 842 movdqu XMMWORD[80+rcx],xmm4 |
| 843 movaps xmm6,XMMWORD[rsp] |
| 844 lea rsp,[24+rsp] |
| 845 $L$SEH_end_gcm_init_clmul: |
| 846 DB 0F3h,0C3h ;repret |
| 847 |
| 848 global gcm_gmult_clmul |
| 849 |
| 850 ALIGN 16 |
| 851 gcm_gmult_clmul: |
| 852 $L$_gmult_clmul: |
| 853 movdqu xmm0,XMMWORD[rcx] |
| 854 movdqa xmm5,XMMWORD[$L$bswap_mask] |
| 855 movdqu xmm2,XMMWORD[rdx] |
| 856 movdqu xmm4,XMMWORD[32+rdx] |
| 857 DB 102,15,56,0,197 |
| 858 movdqa xmm1,xmm0 |
| 859 pshufd xmm3,xmm0,78 |
| 860 pxor xmm3,xmm0 |
| 861 DB 102,15,58,68,194,0 |
| 862 DB 102,15,58,68,202,17 |
| 863 DB 102,15,58,68,220,0 |
| 864 pxor xmm3,xmm0 |
| 865 pxor xmm3,xmm1 |
| 866 |
| 867 movdqa xmm4,xmm3 |
| 868 psrldq xmm3,8 |
| 869 pslldq xmm4,8 |
| 870 pxor xmm1,xmm3 |
| 871 pxor xmm0,xmm4 |
| 872 |
| 873 movdqa xmm4,xmm0 |
| 874 movdqa xmm3,xmm0 |
| 875 psllq xmm0,5 |
| 876 pxor xmm3,xmm0 |
| 877 psllq xmm0,1 |
| 878 pxor xmm0,xmm3 |
| 879 psllq xmm0,57 |
| 880 movdqa xmm3,xmm0 |
| 881 pslldq xmm0,8 |
| 882 psrldq xmm3,8 |
| 883 pxor xmm0,xmm4 |
| 884 pxor xmm1,xmm3 |
| 885 |
| 886 |
| 887 movdqa xmm4,xmm0 |
| 888 psrlq xmm0,1 |
| 889 pxor xmm1,xmm4 |
| 890 pxor xmm4,xmm0 |
| 891 psrlq xmm0,5 |
| 892 pxor xmm0,xmm4 |
| 893 psrlq xmm0,1 |
| 894 pxor xmm0,xmm1 |
| 895 DB 102,15,56,0,197 |
| 896 movdqu XMMWORD[rcx],xmm0 |
| 897 DB 0F3h,0C3h ;repret |
| 898 |
| 899 global gcm_ghash_clmul |
| 900 |
| 901 ALIGN 32 |
| 902 gcm_ghash_clmul: |
| 903 $L$_ghash_clmul: |
| 904 lea rax,[((-136))+rsp] |
| 905 $L$SEH_begin_gcm_ghash_clmul: |
| 906 |
| 907 DB 0x48,0x8d,0x60,0xe0 |
| 908 DB 0x0f,0x29,0x70,0xe0 |
| 909 DB 0x0f,0x29,0x78,0xf0 |
| 910 DB 0x44,0x0f,0x29,0x00 |
| 911 DB 0x44,0x0f,0x29,0x48,0x10 |
| 912 DB 0x44,0x0f,0x29,0x50,0x20 |
| 913 DB 0x44,0x0f,0x29,0x58,0x30 |
| 914 DB 0x44,0x0f,0x29,0x60,0x40 |
| 915 DB 0x44,0x0f,0x29,0x68,0x50 |
| 916 DB 0x44,0x0f,0x29,0x70,0x60 |
| 917 DB 0x44,0x0f,0x29,0x78,0x70 |
| 918 movdqa xmm10,XMMWORD[$L$bswap_mask] |
| 919 |
| 920 movdqu xmm0,XMMWORD[rcx] |
| 921 movdqu xmm2,XMMWORD[rdx] |
| 922 movdqu xmm7,XMMWORD[32+rdx] |
| 923 DB 102,65,15,56,0,194 |
| 924 |
| 925 sub r9,0x10 |
| 926 jz NEAR $L$odd_tail |
| 927 |
| 928 movdqu xmm6,XMMWORD[16+rdx] |
| 929 mov eax,DWORD[((OPENSSL_ia32cap_P+4))] |
| 930 cmp r9,0x30 |
| 931 jb NEAR $L$skip4x |
| 932 |
| 933 and eax,71303168 |
| 934 cmp eax,4194304 |
| 935 je NEAR $L$skip4x |
| 936 |
| 937 sub r9,0x30 |
| 938 mov rax,0xA040608020C0E000 |
| 939 movdqu xmm14,XMMWORD[48+rdx] |
| 940 movdqu xmm15,XMMWORD[64+rdx] |
| 941 |
| 942 |
| 943 |
| 944 |
| 945 movdqu xmm3,XMMWORD[48+r8] |
| 946 movdqu xmm11,XMMWORD[32+r8] |
| 947 DB 102,65,15,56,0,218 |
| 948 DB 102,69,15,56,0,218 |
| 949 movdqa xmm5,xmm3 |
| 950 pshufd xmm4,xmm3,78 |
| 951 pxor xmm4,xmm3 |
| 952 DB 102,15,58,68,218,0 |
| 953 DB 102,15,58,68,234,17 |
| 954 DB 102,15,58,68,231,0 |
| 955 |
| 956 movdqa xmm13,xmm11 |
| 957 pshufd xmm12,xmm11,78 |
| 958 pxor xmm12,xmm11 |
| 959 DB 102,68,15,58,68,222,0 |
| 960 DB 102,68,15,58,68,238,17 |
| 961 DB 102,68,15,58,68,231,16 |
| 962 xorps xmm3,xmm11 |
| 963 xorps xmm5,xmm13 |
| 964 movups xmm7,XMMWORD[80+rdx] |
| 965 xorps xmm4,xmm12 |
| 966 |
| 967 movdqu xmm11,XMMWORD[16+r8] |
| 968 movdqu xmm8,XMMWORD[r8] |
| 969 DB 102,69,15,56,0,218 |
| 970 DB 102,69,15,56,0,194 |
| 971 movdqa xmm13,xmm11 |
| 972 pshufd xmm12,xmm11,78 |
| 973 pxor xmm0,xmm8 |
| 974 pxor xmm12,xmm11 |
| 975 DB 102,69,15,58,68,222,0 |
| 976 movdqa xmm1,xmm0 |
| 977 pshufd xmm8,xmm0,78 |
| 978 pxor xmm8,xmm0 |
| 979 DB 102,69,15,58,68,238,17 |
| 980 DB 102,68,15,58,68,231,0 |
| 981 xorps xmm3,xmm11 |
| 982 xorps xmm5,xmm13 |
| 983 |
| 984 lea r8,[64+r8] |
| 985 sub r9,0x40 |
| 986 jc NEAR $L$tail4x |
| 987 |
| 988 jmp NEAR $L$mod4_loop |
| 989 ALIGN 32 |
| 990 $L$mod4_loop: |
| 991 DB 102,65,15,58,68,199,0 |
| 992 xorps xmm4,xmm12 |
| 993 movdqu xmm11,XMMWORD[48+r8] |
| 994 DB 102,69,15,56,0,218 |
| 995 DB 102,65,15,58,68,207,17 |
| 996 xorps xmm0,xmm3 |
| 997 movdqu xmm3,XMMWORD[32+r8] |
| 998 movdqa xmm13,xmm11 |
| 999 DB 102,68,15,58,68,199,16 |
| 1000 pshufd xmm12,xmm11,78 |
| 1001 xorps xmm1,xmm5 |
| 1002 pxor xmm12,xmm11 |
| 1003 DB 102,65,15,56,0,218 |
| 1004 movups xmm7,XMMWORD[32+rdx] |
| 1005 xorps xmm8,xmm4 |
| 1006 DB 102,68,15,58,68,218,0 |
| 1007 pshufd xmm4,xmm3,78 |
| 1008 |
| 1009 pxor xmm8,xmm0 |
| 1010 movdqa xmm5,xmm3 |
| 1011 pxor xmm8,xmm1 |
| 1012 pxor xmm4,xmm3 |
| 1013 movdqa xmm9,xmm8 |
| 1014 DB 102,68,15,58,68,234,17 |
| 1015 pslldq xmm8,8 |
| 1016 psrldq xmm9,8 |
| 1017 pxor xmm0,xmm8 |
| 1018 movdqa xmm8,XMMWORD[$L$7_mask] |
| 1019 pxor xmm1,xmm9 |
| 1020 DB 102,76,15,110,200 |
| 1021 |
| 1022 pand xmm8,xmm0 |
| 1023 DB 102,69,15,56,0,200 |
| 1024 pxor xmm9,xmm0 |
| 1025 DB 102,68,15,58,68,231,0 |
| 1026 psllq xmm9,57 |
| 1027 movdqa xmm8,xmm9 |
| 1028 pslldq xmm9,8 |
| 1029 DB 102,15,58,68,222,0 |
| 1030 psrldq xmm8,8 |
| 1031 pxor xmm0,xmm9 |
| 1032 pxor xmm1,xmm8 |
| 1033 movdqu xmm8,XMMWORD[r8] |
| 1034 |
| 1035 movdqa xmm9,xmm0 |
| 1036 psrlq xmm0,1 |
| 1037 DB 102,15,58,68,238,17 |
| 1038 xorps xmm3,xmm11 |
| 1039 movdqu xmm11,XMMWORD[16+r8] |
| 1040 DB 102,69,15,56,0,218 |
| 1041 DB 102,15,58,68,231,16 |
| 1042 xorps xmm5,xmm13 |
| 1043 movups xmm7,XMMWORD[80+rdx] |
| 1044 DB 102,69,15,56,0,194 |
| 1045 pxor xmm1,xmm9 |
| 1046 pxor xmm9,xmm0 |
| 1047 psrlq xmm0,5 |
| 1048 |
| 1049 movdqa xmm13,xmm11 |
| 1050 pxor xmm4,xmm12 |
| 1051 pshufd xmm12,xmm11,78 |
| 1052 pxor xmm0,xmm9 |
| 1053 pxor xmm1,xmm8 |
| 1054 pxor xmm12,xmm11 |
| 1055 DB 102,69,15,58,68,222,0 |
| 1056 psrlq xmm0,1 |
| 1057 pxor xmm0,xmm1 |
| 1058 movdqa xmm1,xmm0 |
| 1059 DB 102,69,15,58,68,238,17 |
| 1060 xorps xmm3,xmm11 |
| 1061 pshufd xmm8,xmm0,78 |
| 1062 pxor xmm8,xmm0 |
| 1063 |
| 1064 DB 102,68,15,58,68,231,0 |
| 1065 xorps xmm5,xmm13 |
| 1066 |
| 1067 lea r8,[64+r8] |
| 1068 sub r9,0x40 |
| 1069 jnc NEAR $L$mod4_loop |
| 1070 |
| 1071 $L$tail4x: |
| 1072 DB 102,65,15,58,68,199,0 |
| 1073 DB 102,65,15,58,68,207,17 |
| 1074 DB 102,68,15,58,68,199,16 |
| 1075 xorps xmm4,xmm12 |
| 1076 xorps xmm0,xmm3 |
| 1077 xorps xmm1,xmm5 |
| 1078 pxor xmm1,xmm0 |
| 1079 pxor xmm8,xmm4 |
| 1080 |
| 1081 pxor xmm8,xmm1 |
| 1082 pxor xmm1,xmm0 |
| 1083 |
| 1084 movdqa xmm9,xmm8 |
| 1085 psrldq xmm8,8 |
| 1086 pslldq xmm9,8 |
| 1087 pxor xmm1,xmm8 |
| 1088 pxor xmm0,xmm9 |
| 1089 |
| 1090 movdqa xmm4,xmm0 |
| 1091 movdqa xmm3,xmm0 |
| 1092 psllq xmm0,5 |
| 1093 pxor xmm3,xmm0 |
| 1094 psllq xmm0,1 |
| 1095 pxor xmm0,xmm3 |
| 1096 psllq xmm0,57 |
| 1097 movdqa xmm3,xmm0 |
| 1098 pslldq xmm0,8 |
| 1099 psrldq xmm3,8 |
| 1100 pxor xmm0,xmm4 |
| 1101 pxor xmm1,xmm3 |
| 1102 |
| 1103 |
| 1104 movdqa xmm4,xmm0 |
| 1105 psrlq xmm0,1 |
| 1106 pxor xmm1,xmm4 |
| 1107 pxor xmm4,xmm0 |
| 1108 psrlq xmm0,5 |
| 1109 pxor xmm0,xmm4 |
| 1110 psrlq xmm0,1 |
| 1111 pxor xmm0,xmm1 |
| 1112 add r9,0x40 |
| 1113 jz NEAR $L$done |
| 1114 movdqu xmm7,XMMWORD[32+rdx] |
| 1115 sub r9,0x10 |
| 1116 jz NEAR $L$odd_tail |
| 1117 $L$skip4x: |
| 1118 |
| 1119 |
| 1120 |
| 1121 |
| 1122 |
| 1123 movdqu xmm8,XMMWORD[r8] |
| 1124 movdqu xmm3,XMMWORD[16+r8] |
| 1125 DB 102,69,15,56,0,194 |
| 1126 DB 102,65,15,56,0,218 |
| 1127 pxor xmm0,xmm8 |
| 1128 |
| 1129 movdqa xmm5,xmm3 |
| 1130 pshufd xmm4,xmm3,78 |
| 1131 pxor xmm4,xmm3 |
| 1132 DB 102,15,58,68,218,0 |
| 1133 DB 102,15,58,68,234,17 |
| 1134 DB 102,15,58,68,231,0 |
| 1135 |
| 1136 lea r8,[32+r8] |
| 1137 nop |
| 1138 sub r9,0x20 |
| 1139 jbe NEAR $L$even_tail |
| 1140 nop |
| 1141 jmp NEAR $L$mod_loop |
| 1142 |
| 1143 ALIGN 32 |
| 1144 $L$mod_loop: |
| 1145 movdqa xmm1,xmm0 |
| 1146 movdqa xmm8,xmm4 |
| 1147 pshufd xmm4,xmm0,78 |
| 1148 pxor xmm4,xmm0 |
| 1149 |
| 1150 DB 102,15,58,68,198,0 |
| 1151 DB 102,15,58,68,206,17 |
| 1152 DB 102,15,58,68,231,16 |
| 1153 |
| 1154 pxor xmm0,xmm3 |
| 1155 pxor xmm1,xmm5 |
| 1156 movdqu xmm9,XMMWORD[r8] |
| 1157 pxor xmm8,xmm0 |
| 1158 DB 102,69,15,56,0,202 |
| 1159 movdqu xmm3,XMMWORD[16+r8] |
| 1160 |
| 1161 pxor xmm8,xmm1 |
| 1162 pxor xmm1,xmm9 |
| 1163 pxor xmm4,xmm8 |
| 1164 DB 102,65,15,56,0,218 |
| 1165 movdqa xmm8,xmm4 |
| 1166 psrldq xmm8,8 |
| 1167 pslldq xmm4,8 |
| 1168 pxor xmm1,xmm8 |
| 1169 pxor xmm0,xmm4 |
| 1170 |
| 1171 movdqa xmm5,xmm3 |
| 1172 |
| 1173 movdqa xmm9,xmm0 |
| 1174 movdqa xmm8,xmm0 |
| 1175 psllq xmm0,5 |
| 1176 pxor xmm8,xmm0 |
| 1177 DB 102,15,58,68,218,0 |
| 1178 psllq xmm0,1 |
| 1179 pxor xmm0,xmm8 |
| 1180 psllq xmm0,57 |
| 1181 movdqa xmm8,xmm0 |
| 1182 pslldq xmm0,8 |
| 1183 psrldq xmm8,8 |
| 1184 pxor xmm0,xmm9 |
| 1185 pshufd xmm4,xmm5,78 |
| 1186 pxor xmm1,xmm8 |
| 1187 pxor xmm4,xmm5 |
| 1188 |
| 1189 movdqa xmm9,xmm0 |
| 1190 psrlq xmm0,1 |
| 1191 DB 102,15,58,68,234,17 |
| 1192 pxor xmm1,xmm9 |
| 1193 pxor xmm9,xmm0 |
| 1194 psrlq xmm0,5 |
| 1195 pxor xmm0,xmm9 |
| 1196 lea r8,[32+r8] |
| 1197 psrlq xmm0,1 |
| 1198 DB 102,15,58,68,231,0 |
| 1199 pxor xmm0,xmm1 |
| 1200 |
| 1201 sub r9,0x20 |
| 1202 ja NEAR $L$mod_loop |
| 1203 |
| 1204 $L$even_tail: |
| 1205 movdqa xmm1,xmm0 |
| 1206 movdqa xmm8,xmm4 |
| 1207 pshufd xmm4,xmm0,78 |
| 1208 pxor xmm4,xmm0 |
| 1209 |
| 1210 DB 102,15,58,68,198,0 |
| 1211 DB 102,15,58,68,206,17 |
| 1212 DB 102,15,58,68,231,16 |
| 1213 |
| 1214 pxor xmm0,xmm3 |
| 1215 pxor xmm1,xmm5 |
| 1216 pxor xmm8,xmm0 |
| 1217 pxor xmm8,xmm1 |
| 1218 pxor xmm4,xmm8 |
| 1219 movdqa xmm8,xmm4 |
| 1220 psrldq xmm8,8 |
| 1221 pslldq xmm4,8 |
| 1222 pxor xmm1,xmm8 |
| 1223 pxor xmm0,xmm4 |
| 1224 |
| 1225 movdqa xmm4,xmm0 |
| 1226 movdqa xmm3,xmm0 |
| 1227 psllq xmm0,5 |
| 1228 pxor xmm3,xmm0 |
| 1229 psllq xmm0,1 |
| 1230 pxor xmm0,xmm3 |
| 1231 psllq xmm0,57 |
| 1232 movdqa xmm3,xmm0 |
| 1233 pslldq xmm0,8 |
| 1234 psrldq xmm3,8 |
| 1235 pxor xmm0,xmm4 |
| 1236 pxor xmm1,xmm3 |
| 1237 |
| 1238 |
| 1239 movdqa xmm4,xmm0 |
| 1240 psrlq xmm0,1 |
| 1241 pxor xmm1,xmm4 |
| 1242 pxor xmm4,xmm0 |
| 1243 psrlq xmm0,5 |
| 1244 pxor xmm0,xmm4 |
| 1245 psrlq xmm0,1 |
| 1246 pxor xmm0,xmm1 |
| 1247 test r9,r9 |
| 1248 jnz NEAR $L$done |
| 1249 |
| 1250 $L$odd_tail: |
| 1251 movdqu xmm8,XMMWORD[r8] |
| 1252 DB 102,69,15,56,0,194 |
| 1253 pxor xmm0,xmm8 |
| 1254 movdqa xmm1,xmm0 |
| 1255 pshufd xmm3,xmm0,78 |
| 1256 pxor xmm3,xmm0 |
| 1257 DB 102,15,58,68,194,0 |
| 1258 DB 102,15,58,68,202,17 |
| 1259 DB 102,15,58,68,223,0 |
| 1260 pxor xmm3,xmm0 |
| 1261 pxor xmm3,xmm1 |
| 1262 |
| 1263 movdqa xmm4,xmm3 |
| 1264 psrldq xmm3,8 |
| 1265 pslldq xmm4,8 |
| 1266 pxor xmm1,xmm3 |
| 1267 pxor xmm0,xmm4 |
| 1268 |
| 1269 movdqa xmm4,xmm0 |
| 1270 movdqa xmm3,xmm0 |
| 1271 psllq xmm0,5 |
| 1272 pxor xmm3,xmm0 |
| 1273 psllq xmm0,1 |
| 1274 pxor xmm0,xmm3 |
| 1275 psllq xmm0,57 |
| 1276 movdqa xmm3,xmm0 |
| 1277 pslldq xmm0,8 |
| 1278 psrldq xmm3,8 |
| 1279 pxor xmm0,xmm4 |
| 1280 pxor xmm1,xmm3 |
| 1281 |
| 1282 |
| 1283 movdqa xmm4,xmm0 |
| 1284 psrlq xmm0,1 |
| 1285 pxor xmm1,xmm4 |
| 1286 pxor xmm4,xmm0 |
| 1287 psrlq xmm0,5 |
| 1288 pxor xmm0,xmm4 |
| 1289 psrlq xmm0,1 |
| 1290 pxor xmm0,xmm1 |
| 1291 $L$done: |
| 1292 DB 102,65,15,56,0,194 |
| 1293 movdqu XMMWORD[rcx],xmm0 |
| 1294 movaps xmm6,XMMWORD[rsp] |
| 1295 movaps xmm7,XMMWORD[16+rsp] |
| 1296 movaps xmm8,XMMWORD[32+rsp] |
| 1297 movaps xmm9,XMMWORD[48+rsp] |
| 1298 movaps xmm10,XMMWORD[64+rsp] |
| 1299 movaps xmm11,XMMWORD[80+rsp] |
| 1300 movaps xmm12,XMMWORD[96+rsp] |
| 1301 movaps xmm13,XMMWORD[112+rsp] |
| 1302 movaps xmm14,XMMWORD[128+rsp] |
| 1303 movaps xmm15,XMMWORD[144+rsp] |
| 1304 lea rsp,[168+rsp] |
| 1305 $L$SEH_end_gcm_ghash_clmul: |
| 1306 DB 0F3h,0C3h ;repret |
| 1307 |
| 1308 global gcm_init_avx |
| 1309 |
| 1310 ALIGN 32 |
| 1311 gcm_init_avx: |
| 1312 jmp NEAR $L$_init_clmul |
| 1313 |
| 1314 global gcm_gmult_avx |
| 1315 |
| 1316 ALIGN 32 |
| 1317 gcm_gmult_avx: |
| 1318 jmp NEAR $L$_gmult_clmul |
| 1319 |
| 1320 global gcm_ghash_avx |
| 1321 |
| 1322 ALIGN 32 |
| 1323 gcm_ghash_avx: |
| 1324 jmp NEAR $L$_ghash_clmul |
| 1325 |
| 1326 ALIGN 64 |
| 1327 $L$bswap_mask: |
| 1328 DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 |
| 1329 $L$0x1c2_polynomial: |
| 1330 DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 |
| 1331 $L$7_mask: |
| 1332 DD 7,0,7,0 |
| 1333 $L$7_mask_poly: |
| 1334 DD 7,0,450,0 |
| 1335 ALIGN 64 |
| 1336 |
| 1337 $L$rem_4bit: |
| 1338 DD 0,0,0,471859200,0,943718400,0,610271232 |
| 1339 DD 0,1887436800,0,1822425088,0,1220542464,0,1423966208 |
| 1340 DD 0,3774873600,0,4246732800,0,3644850176,0,3311403008 |
| 1341 DD 0,2441084928,0,2376073216,0,2847932416,0,3051356160 |
| 1342 |
| 1343 $L$rem_8bit: |
| 1344 DW 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E |
| 1345 DW 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E |
| 1346 DW 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E |
| 1347 DW 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E |
| 1348 DW 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E |
| 1349 DW 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E |
| 1350 DW 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E |
| 1351 DW 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E |
| 1352 DW 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE |
| 1353 DW 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE |
| 1354 DW 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE |
| 1355 DW 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE |
| 1356 DW 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E |
| 1357 DW 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E |
| 1358 DW 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE |
| 1359 DW 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE |
| 1360 DW 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E |
| 1361 DW 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E |
| 1362 DW 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E |
| 1363 DW 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E |
| 1364 DW 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E |
| 1365 DW 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E |
| 1366 DW 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E |
| 1367 DW 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E |
| 1368 DW 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE |
| 1369 DW 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE |
| 1370 DW 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE |
| 1371 DW 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE |
| 1372 DW 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E |
| 1373 DW 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E |
| 1374 DW 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE |
| 1375 DW 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE |
| 1376 |
| 1377 DB 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52 |
| 1378 DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 |
| 1379 DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 |
| 1380 DB 114,103,62,0 |
| 1381 ALIGN 64 |
| 1382 EXTERN __imp_RtlVirtualUnwind |
| 1383 |
| 1384 ALIGN 16 |
| 1385 se_handler: |
| 1386 push rsi |
| 1387 push rdi |
| 1388 push rbx |
| 1389 push rbp |
| 1390 push r12 |
| 1391 push r13 |
| 1392 push r14 |
| 1393 push r15 |
| 1394 pushfq |
| 1395 sub rsp,64 |
| 1396 |
| 1397 mov rax,QWORD[120+r8] |
| 1398 mov rbx,QWORD[248+r8] |
| 1399 |
| 1400 mov rsi,QWORD[8+r9] |
| 1401 mov r11,QWORD[56+r9] |
| 1402 |
| 1403 mov r10d,DWORD[r11] |
| 1404 lea r10,[r10*1+rsi] |
| 1405 cmp rbx,r10 |
| 1406 jb NEAR $L$in_prologue |
| 1407 |
| 1408 mov rax,QWORD[152+r8] |
| 1409 |
| 1410 mov r10d,DWORD[4+r11] |
| 1411 lea r10,[r10*1+rsi] |
| 1412 cmp rbx,r10 |
| 1413 jae NEAR $L$in_prologue |
| 1414 |
| 1415 lea rax,[24+rax] |
| 1416 |
| 1417 mov rbx,QWORD[((-8))+rax] |
| 1418 mov rbp,QWORD[((-16))+rax] |
| 1419 mov r12,QWORD[((-24))+rax] |
| 1420 mov QWORD[144+r8],rbx |
| 1421 mov QWORD[160+r8],rbp |
| 1422 mov QWORD[216+r8],r12 |
| 1423 |
| 1424 $L$in_prologue: |
| 1425 mov rdi,QWORD[8+rax] |
| 1426 mov rsi,QWORD[16+rax] |
| 1427 mov QWORD[152+r8],rax |
| 1428 mov QWORD[168+r8],rsi |
| 1429 mov QWORD[176+r8],rdi |
| 1430 |
| 1431 mov rdi,QWORD[40+r9] |
| 1432 mov rsi,r8 |
| 1433 mov ecx,154 |
| 1434 DD 0xa548f3fc |
| 1435 |
| 1436 mov rsi,r9 |
| 1437 xor rcx,rcx |
| 1438 mov rdx,QWORD[8+rsi] |
| 1439 mov r8,QWORD[rsi] |
| 1440 mov r9,QWORD[16+rsi] |
| 1441 mov r10,QWORD[40+rsi] |
| 1442 lea r11,[56+rsi] |
| 1443 lea r12,[24+rsi] |
| 1444 mov QWORD[32+rsp],r10 |
| 1445 mov QWORD[40+rsp],r11 |
| 1446 mov QWORD[48+rsp],r12 |
| 1447 mov QWORD[56+rsp],rcx |
| 1448 call QWORD[__imp_RtlVirtualUnwind] |
| 1449 |
| 1450 mov eax,1 |
| 1451 add rsp,64 |
| 1452 popfq |
| 1453 pop r15 |
| 1454 pop r14 |
| 1455 pop r13 |
| 1456 pop r12 |
| 1457 pop rbp |
| 1458 pop rbx |
| 1459 pop rdi |
| 1460 pop rsi |
| 1461 DB 0F3h,0C3h ;repret |
| 1462 |
| 1463 |
| 1464 section .pdata rdata align=4 |
| 1465 ALIGN 4 |
| 1466 DD $L$SEH_begin_gcm_gmult_4bit wrt ..imagebase |
| 1467 DD $L$SEH_end_gcm_gmult_4bit wrt ..imagebase |
| 1468 DD $L$SEH_info_gcm_gmult_4bit wrt ..imagebase |
| 1469 |
| 1470 DD $L$SEH_begin_gcm_ghash_4bit wrt ..imagebase |
| 1471 DD $L$SEH_end_gcm_ghash_4bit wrt ..imagebase |
| 1472 DD $L$SEH_info_gcm_ghash_4bit wrt ..imagebase |
| 1473 |
| 1474 DD $L$SEH_begin_gcm_init_clmul wrt ..imagebase |
| 1475 DD $L$SEH_end_gcm_init_clmul wrt ..imagebase |
| 1476 DD $L$SEH_info_gcm_init_clmul wrt ..imagebase |
| 1477 |
| 1478 DD $L$SEH_begin_gcm_ghash_clmul wrt ..imagebase |
| 1479 DD $L$SEH_end_gcm_ghash_clmul wrt ..imagebase |
| 1480 DD $L$SEH_info_gcm_ghash_clmul wrt ..imagebase |
| 1481 section .xdata rdata align=8 |
| 1482 ALIGN 8 |
| 1483 $L$SEH_info_gcm_gmult_4bit: |
| 1484 DB 9,0,0,0 |
| 1485 DD se_handler wrt ..imagebase |
| 1486 DD $L$gmult_prologue wrt ..imagebase,$L$gmult_epilogue wrt ..imageb
ase |
| 1487 $L$SEH_info_gcm_ghash_4bit: |
| 1488 DB 9,0,0,0 |
| 1489 DD se_handler wrt ..imagebase |
| 1490 DD $L$ghash_prologue wrt ..imagebase,$L$ghash_epilogue wrt ..imageb
ase |
| 1491 $L$SEH_info_gcm_init_clmul: |
| 1492 DB 0x01,0x08,0x03,0x00 |
| 1493 DB 0x08,0x68,0x00,0x00 |
| 1494 DB 0x04,0x22,0x00,0x00 |
| 1495 $L$SEH_info_gcm_ghash_clmul: |
| 1496 DB 0x01,0x33,0x16,0x00 |
| 1497 DB 0x33,0xf8,0x09,0x00 |
| 1498 DB 0x2e,0xe8,0x08,0x00 |
| 1499 DB 0x29,0xd8,0x07,0x00 |
| 1500 DB 0x24,0xc8,0x06,0x00 |
| 1501 DB 0x1f,0xb8,0x05,0x00 |
| 1502 DB 0x1a,0xa8,0x04,0x00 |
| 1503 DB 0x15,0x98,0x03,0x00 |
| 1504 DB 0x10,0x88,0x02,0x00 |
| 1505 DB 0x0c,0x78,0x01,0x00 |
| 1506 DB 0x08,0x68,0x00,0x00 |
| 1507 DB 0x04,0x01,0x15,0x00 |
OLD | NEW |