OLD | NEW |
(Empty) | |
| 1 OPTION DOTNAME |
| 2 .text$ SEGMENT ALIGN(64) 'CODE' |
| 3 EXTERN OPENSSL_ia32cap_P:NEAR |
| 4 |
| 5 PUBLIC gcm_gmult_4bit |
| 6 |
| 7 ALIGN 16 |
| 8 gcm_gmult_4bit PROC PUBLIC |
| 9 mov QWORD PTR[8+rsp],rdi ;WIN64 prologue |
| 10 mov QWORD PTR[16+rsp],rsi |
| 11 mov rax,rsp |
| 12 $L$SEH_begin_gcm_gmult_4bit:: |
| 13 mov rdi,rcx |
| 14 mov rsi,rdx |
| 15 |
| 16 |
| 17 push rbx |
| 18 push rbp |
| 19 push r12 |
| 20 $L$gmult_prologue:: |
| 21 |
| 22 movzx r8,BYTE PTR[15+rdi] |
| 23 lea r11,QWORD PTR[$L$rem_4bit] |
| 24 xor rax,rax |
| 25 xor rbx,rbx |
| 26 mov al,r8b |
| 27 mov bl,r8b |
| 28 shl al,4 |
| 29 mov rcx,14 |
| 30 mov r8,QWORD PTR[8+rax*1+rsi] |
| 31 mov r9,QWORD PTR[rax*1+rsi] |
| 32 and bl,0f0h |
| 33 mov rdx,r8 |
| 34 jmp $L$oop1 |
| 35 |
| 36 ALIGN 16 |
| 37 $L$oop1:: |
| 38 shr r8,4 |
| 39 and rdx,0fh |
| 40 mov r10,r9 |
| 41 mov al,BYTE PTR[rcx*1+rdi] |
| 42 shr r9,4 |
| 43 xor r8,QWORD PTR[8+rbx*1+rsi] |
| 44 shl r10,60 |
| 45 xor r9,QWORD PTR[rbx*1+rsi] |
| 46 mov bl,al |
| 47 xor r9,QWORD PTR[rdx*8+r11] |
| 48 mov rdx,r8 |
| 49 shl al,4 |
| 50 xor r8,r10 |
| 51 dec rcx |
| 52 js $L$break1 |
| 53 |
| 54 shr r8,4 |
| 55 and rdx,0fh |
| 56 mov r10,r9 |
| 57 shr r9,4 |
| 58 xor r8,QWORD PTR[8+rax*1+rsi] |
| 59 shl r10,60 |
| 60 xor r9,QWORD PTR[rax*1+rsi] |
| 61 and bl,0f0h |
| 62 xor r9,QWORD PTR[rdx*8+r11] |
| 63 mov rdx,r8 |
| 64 xor r8,r10 |
| 65 jmp $L$oop1 |
| 66 |
| 67 ALIGN 16 |
| 68 $L$break1:: |
| 69 shr r8,4 |
| 70 and rdx,0fh |
| 71 mov r10,r9 |
| 72 shr r9,4 |
| 73 xor r8,QWORD PTR[8+rax*1+rsi] |
| 74 shl r10,60 |
| 75 xor r9,QWORD PTR[rax*1+rsi] |
| 76 and bl,0f0h |
| 77 xor r9,QWORD PTR[rdx*8+r11] |
| 78 mov rdx,r8 |
| 79 xor r8,r10 |
| 80 |
| 81 shr r8,4 |
| 82 and rdx,0fh |
| 83 mov r10,r9 |
| 84 shr r9,4 |
| 85 xor r8,QWORD PTR[8+rbx*1+rsi] |
| 86 shl r10,60 |
| 87 xor r9,QWORD PTR[rbx*1+rsi] |
| 88 xor r8,r10 |
| 89 xor r9,QWORD PTR[rdx*8+r11] |
| 90 |
| 91 bswap r8 |
| 92 bswap r9 |
| 93 mov QWORD PTR[8+rdi],r8 |
| 94 mov QWORD PTR[rdi],r9 |
| 95 |
| 96 mov rbx,QWORD PTR[16+rsp] |
| 97 lea rsp,QWORD PTR[24+rsp] |
| 98 $L$gmult_epilogue:: |
| 99 mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue |
| 100 mov rsi,QWORD PTR[16+rsp] |
| 101 DB 0F3h,0C3h ;repret |
| 102 $L$SEH_end_gcm_gmult_4bit:: |
| 103 gcm_gmult_4bit ENDP |
| 104 PUBLIC gcm_ghash_4bit |
| 105 |
| 106 ALIGN 16 |
| 107 gcm_ghash_4bit PROC PUBLIC |
| 108 mov QWORD PTR[8+rsp],rdi ;WIN64 prologue |
| 109 mov QWORD PTR[16+rsp],rsi |
| 110 mov rax,rsp |
| 111 $L$SEH_begin_gcm_ghash_4bit:: |
| 112 mov rdi,rcx |
| 113 mov rsi,rdx |
| 114 mov rdx,r8 |
| 115 mov rcx,r9 |
| 116 |
| 117 |
| 118 push rbx |
| 119 push rbp |
| 120 push r12 |
| 121 push r13 |
| 122 push r14 |
| 123 push r15 |
| 124 sub rsp,280 |
| 125 $L$ghash_prologue:: |
| 126 mov r14,rdx |
| 127 mov r15,rcx |
| 128 sub rsi,-128 |
| 129 lea rbp,QWORD PTR[((16+128))+rsp] |
| 130 xor edx,edx |
| 131 mov r8,QWORD PTR[((0+0-128))+rsi] |
| 132 mov rax,QWORD PTR[((0+8-128))+rsi] |
| 133 mov dl,al |
| 134 shr rax,4 |
| 135 mov r10,r8 |
| 136 shr r8,4 |
| 137 mov r9,QWORD PTR[((16+0-128))+rsi] |
| 138 shl dl,4 |
| 139 mov rbx,QWORD PTR[((16+8-128))+rsi] |
| 140 shl r10,60 |
| 141 mov BYTE PTR[rsp],dl |
| 142 or rax,r10 |
| 143 mov dl,bl |
| 144 shr rbx,4 |
| 145 mov r10,r9 |
| 146 shr r9,4 |
| 147 mov QWORD PTR[rbp],r8 |
| 148 mov r8,QWORD PTR[((32+0-128))+rsi] |
| 149 shl dl,4 |
| 150 mov QWORD PTR[((0-128))+rbp],rax |
| 151 mov rax,QWORD PTR[((32+8-128))+rsi] |
| 152 shl r10,60 |
| 153 mov BYTE PTR[1+rsp],dl |
| 154 or rbx,r10 |
| 155 mov dl,al |
| 156 shr rax,4 |
| 157 mov r10,r8 |
| 158 shr r8,4 |
| 159 mov QWORD PTR[8+rbp],r9 |
| 160 mov r9,QWORD PTR[((48+0-128))+rsi] |
| 161 shl dl,4 |
| 162 mov QWORD PTR[((8-128))+rbp],rbx |
| 163 mov rbx,QWORD PTR[((48+8-128))+rsi] |
| 164 shl r10,60 |
| 165 mov BYTE PTR[2+rsp],dl |
| 166 or rax,r10 |
| 167 mov dl,bl |
| 168 shr rbx,4 |
| 169 mov r10,r9 |
| 170 shr r9,4 |
| 171 mov QWORD PTR[16+rbp],r8 |
| 172 mov r8,QWORD PTR[((64+0-128))+rsi] |
| 173 shl dl,4 |
| 174 mov QWORD PTR[((16-128))+rbp],rax |
| 175 mov rax,QWORD PTR[((64+8-128))+rsi] |
| 176 shl r10,60 |
| 177 mov BYTE PTR[3+rsp],dl |
| 178 or rbx,r10 |
| 179 mov dl,al |
| 180 shr rax,4 |
| 181 mov r10,r8 |
| 182 shr r8,4 |
| 183 mov QWORD PTR[24+rbp],r9 |
| 184 mov r9,QWORD PTR[((80+0-128))+rsi] |
| 185 shl dl,4 |
| 186 mov QWORD PTR[((24-128))+rbp],rbx |
| 187 mov rbx,QWORD PTR[((80+8-128))+rsi] |
| 188 shl r10,60 |
| 189 mov BYTE PTR[4+rsp],dl |
| 190 or rax,r10 |
| 191 mov dl,bl |
| 192 shr rbx,4 |
| 193 mov r10,r9 |
| 194 shr r9,4 |
| 195 mov QWORD PTR[32+rbp],r8 |
| 196 mov r8,QWORD PTR[((96+0-128))+rsi] |
| 197 shl dl,4 |
| 198 mov QWORD PTR[((32-128))+rbp],rax |
| 199 mov rax,QWORD PTR[((96+8-128))+rsi] |
| 200 shl r10,60 |
| 201 mov BYTE PTR[5+rsp],dl |
| 202 or rbx,r10 |
| 203 mov dl,al |
| 204 shr rax,4 |
| 205 mov r10,r8 |
| 206 shr r8,4 |
| 207 mov QWORD PTR[40+rbp],r9 |
| 208 mov r9,QWORD PTR[((112+0-128))+rsi] |
| 209 shl dl,4 |
| 210 mov QWORD PTR[((40-128))+rbp],rbx |
| 211 mov rbx,QWORD PTR[((112+8-128))+rsi] |
| 212 shl r10,60 |
| 213 mov BYTE PTR[6+rsp],dl |
| 214 or rax,r10 |
| 215 mov dl,bl |
| 216 shr rbx,4 |
| 217 mov r10,r9 |
| 218 shr r9,4 |
| 219 mov QWORD PTR[48+rbp],r8 |
| 220 mov r8,QWORD PTR[((128+0-128))+rsi] |
| 221 shl dl,4 |
| 222 mov QWORD PTR[((48-128))+rbp],rax |
| 223 mov rax,QWORD PTR[((128+8-128))+rsi] |
| 224 shl r10,60 |
| 225 mov BYTE PTR[7+rsp],dl |
| 226 or rbx,r10 |
| 227 mov dl,al |
| 228 shr rax,4 |
| 229 mov r10,r8 |
| 230 shr r8,4 |
| 231 mov QWORD PTR[56+rbp],r9 |
| 232 mov r9,QWORD PTR[((144+0-128))+rsi] |
| 233 shl dl,4 |
| 234 mov QWORD PTR[((56-128))+rbp],rbx |
| 235 mov rbx,QWORD PTR[((144+8-128))+rsi] |
| 236 shl r10,60 |
| 237 mov BYTE PTR[8+rsp],dl |
| 238 or rax,r10 |
| 239 mov dl,bl |
| 240 shr rbx,4 |
| 241 mov r10,r9 |
| 242 shr r9,4 |
| 243 mov QWORD PTR[64+rbp],r8 |
| 244 mov r8,QWORD PTR[((160+0-128))+rsi] |
| 245 shl dl,4 |
| 246 mov QWORD PTR[((64-128))+rbp],rax |
| 247 mov rax,QWORD PTR[((160+8-128))+rsi] |
| 248 shl r10,60 |
| 249 mov BYTE PTR[9+rsp],dl |
| 250 or rbx,r10 |
| 251 mov dl,al |
| 252 shr rax,4 |
| 253 mov r10,r8 |
| 254 shr r8,4 |
| 255 mov QWORD PTR[72+rbp],r9 |
| 256 mov r9,QWORD PTR[((176+0-128))+rsi] |
| 257 shl dl,4 |
| 258 mov QWORD PTR[((72-128))+rbp],rbx |
| 259 mov rbx,QWORD PTR[((176+8-128))+rsi] |
| 260 shl r10,60 |
| 261 mov BYTE PTR[10+rsp],dl |
| 262 or rax,r10 |
| 263 mov dl,bl |
| 264 shr rbx,4 |
| 265 mov r10,r9 |
| 266 shr r9,4 |
| 267 mov QWORD PTR[80+rbp],r8 |
| 268 mov r8,QWORD PTR[((192+0-128))+rsi] |
| 269 shl dl,4 |
| 270 mov QWORD PTR[((80-128))+rbp],rax |
| 271 mov rax,QWORD PTR[((192+8-128))+rsi] |
| 272 shl r10,60 |
| 273 mov BYTE PTR[11+rsp],dl |
| 274 or rbx,r10 |
| 275 mov dl,al |
| 276 shr rax,4 |
| 277 mov r10,r8 |
| 278 shr r8,4 |
| 279 mov QWORD PTR[88+rbp],r9 |
| 280 mov r9,QWORD PTR[((208+0-128))+rsi] |
| 281 shl dl,4 |
| 282 mov QWORD PTR[((88-128))+rbp],rbx |
| 283 mov rbx,QWORD PTR[((208+8-128))+rsi] |
| 284 shl r10,60 |
| 285 mov BYTE PTR[12+rsp],dl |
| 286 or rax,r10 |
| 287 mov dl,bl |
| 288 shr rbx,4 |
| 289 mov r10,r9 |
| 290 shr r9,4 |
| 291 mov QWORD PTR[96+rbp],r8 |
| 292 mov r8,QWORD PTR[((224+0-128))+rsi] |
| 293 shl dl,4 |
| 294 mov QWORD PTR[((96-128))+rbp],rax |
| 295 mov rax,QWORD PTR[((224+8-128))+rsi] |
| 296 shl r10,60 |
| 297 mov BYTE PTR[13+rsp],dl |
| 298 or rbx,r10 |
| 299 mov dl,al |
| 300 shr rax,4 |
| 301 mov r10,r8 |
| 302 shr r8,4 |
| 303 mov QWORD PTR[104+rbp],r9 |
| 304 mov r9,QWORD PTR[((240+0-128))+rsi] |
| 305 shl dl,4 |
| 306 mov QWORD PTR[((104-128))+rbp],rbx |
| 307 mov rbx,QWORD PTR[((240+8-128))+rsi] |
| 308 shl r10,60 |
| 309 mov BYTE PTR[14+rsp],dl |
| 310 or rax,r10 |
| 311 mov dl,bl |
| 312 shr rbx,4 |
| 313 mov r10,r9 |
| 314 shr r9,4 |
| 315 mov QWORD PTR[112+rbp],r8 |
| 316 shl dl,4 |
| 317 mov QWORD PTR[((112-128))+rbp],rax |
| 318 shl r10,60 |
| 319 mov BYTE PTR[15+rsp],dl |
| 320 or rbx,r10 |
| 321 mov QWORD PTR[120+rbp],r9 |
| 322 mov QWORD PTR[((120-128))+rbp],rbx |
| 323 add rsi,-128 |
| 324 mov r8,QWORD PTR[8+rdi] |
| 325 mov r9,QWORD PTR[rdi] |
| 326 add r15,r14 |
| 327 lea r11,QWORD PTR[$L$rem_8bit] |
| 328 jmp $L$outer_loop |
| 329 ALIGN 16 |
| 330 $L$outer_loop:: |
| 331 xor r9,QWORD PTR[r14] |
| 332 mov rdx,QWORD PTR[8+r14] |
| 333 lea r14,QWORD PTR[16+r14] |
| 334 xor rdx,r8 |
| 335 mov QWORD PTR[rdi],r9 |
| 336 mov QWORD PTR[8+rdi],rdx |
| 337 shr rdx,32 |
| 338 xor rax,rax |
| 339 rol edx,8 |
| 340 mov al,dl |
| 341 movzx ebx,dl |
| 342 shl al,4 |
| 343 shr ebx,4 |
| 344 rol edx,8 |
| 345 mov r8,QWORD PTR[8+rax*1+rsi] |
| 346 mov r9,QWORD PTR[rax*1+rsi] |
| 347 mov al,dl |
| 348 movzx ecx,dl |
| 349 shl al,4 |
| 350 movzx r12,BYTE PTR[rbx*1+rsp] |
| 351 shr ecx,4 |
| 352 xor r12,r8 |
| 353 mov r10,r9 |
| 354 shr r8,8 |
| 355 movzx r12,r12b |
| 356 shr r9,8 |
| 357 xor r8,QWORD PTR[((-128))+rbx*8+rbp] |
| 358 shl r10,56 |
| 359 xor r9,QWORD PTR[rbx*8+rbp] |
| 360 rol edx,8 |
| 361 xor r8,QWORD PTR[8+rax*1+rsi] |
| 362 xor r9,QWORD PTR[rax*1+rsi] |
| 363 mov al,dl |
| 364 xor r8,r10 |
| 365 movzx r12,WORD PTR[r12*2+r11] |
| 366 movzx ebx,dl |
| 367 shl al,4 |
| 368 movzx r13,BYTE PTR[rcx*1+rsp] |
| 369 shr ebx,4 |
| 370 shl r12,48 |
| 371 xor r13,r8 |
| 372 mov r10,r9 |
| 373 xor r9,r12 |
| 374 shr r8,8 |
| 375 movzx r13,r13b |
| 376 shr r9,8 |
| 377 xor r8,QWORD PTR[((-128))+rcx*8+rbp] |
| 378 shl r10,56 |
| 379 xor r9,QWORD PTR[rcx*8+rbp] |
| 380 rol edx,8 |
| 381 xor r8,QWORD PTR[8+rax*1+rsi] |
| 382 xor r9,QWORD PTR[rax*1+rsi] |
| 383 mov al,dl |
| 384 xor r8,r10 |
| 385 movzx r13,WORD PTR[r13*2+r11] |
| 386 movzx ecx,dl |
| 387 shl al,4 |
| 388 movzx r12,BYTE PTR[rbx*1+rsp] |
| 389 shr ecx,4 |
| 390 shl r13,48 |
| 391 xor r12,r8 |
| 392 mov r10,r9 |
| 393 xor r9,r13 |
| 394 shr r8,8 |
| 395 movzx r12,r12b |
| 396 mov edx,DWORD PTR[8+rdi] |
| 397 shr r9,8 |
| 398 xor r8,QWORD PTR[((-128))+rbx*8+rbp] |
| 399 shl r10,56 |
| 400 xor r9,QWORD PTR[rbx*8+rbp] |
| 401 rol edx,8 |
| 402 xor r8,QWORD PTR[8+rax*1+rsi] |
| 403 xor r9,QWORD PTR[rax*1+rsi] |
| 404 mov al,dl |
| 405 xor r8,r10 |
| 406 movzx r12,WORD PTR[r12*2+r11] |
| 407 movzx ebx,dl |
| 408 shl al,4 |
| 409 movzx r13,BYTE PTR[rcx*1+rsp] |
| 410 shr ebx,4 |
| 411 shl r12,48 |
| 412 xor r13,r8 |
| 413 mov r10,r9 |
| 414 xor r9,r12 |
| 415 shr r8,8 |
| 416 movzx r13,r13b |
| 417 shr r9,8 |
| 418 xor r8,QWORD PTR[((-128))+rcx*8+rbp] |
| 419 shl r10,56 |
| 420 xor r9,QWORD PTR[rcx*8+rbp] |
| 421 rol edx,8 |
| 422 xor r8,QWORD PTR[8+rax*1+rsi] |
| 423 xor r9,QWORD PTR[rax*1+rsi] |
| 424 mov al,dl |
| 425 xor r8,r10 |
| 426 movzx r13,WORD PTR[r13*2+r11] |
| 427 movzx ecx,dl |
| 428 shl al,4 |
| 429 movzx r12,BYTE PTR[rbx*1+rsp] |
| 430 shr ecx,4 |
| 431 shl r13,48 |
| 432 xor r12,r8 |
| 433 mov r10,r9 |
| 434 xor r9,r13 |
| 435 shr r8,8 |
| 436 movzx r12,r12b |
| 437 shr r9,8 |
| 438 xor r8,QWORD PTR[((-128))+rbx*8+rbp] |
| 439 shl r10,56 |
| 440 xor r9,QWORD PTR[rbx*8+rbp] |
| 441 rol edx,8 |
| 442 xor r8,QWORD PTR[8+rax*1+rsi] |
| 443 xor r9,QWORD PTR[rax*1+rsi] |
| 444 mov al,dl |
| 445 xor r8,r10 |
| 446 movzx r12,WORD PTR[r12*2+r11] |
| 447 movzx ebx,dl |
| 448 shl al,4 |
| 449 movzx r13,BYTE PTR[rcx*1+rsp] |
| 450 shr ebx,4 |
| 451 shl r12,48 |
| 452 xor r13,r8 |
| 453 mov r10,r9 |
| 454 xor r9,r12 |
| 455 shr r8,8 |
| 456 movzx r13,r13b |
| 457 shr r9,8 |
| 458 xor r8,QWORD PTR[((-128))+rcx*8+rbp] |
| 459 shl r10,56 |
| 460 xor r9,QWORD PTR[rcx*8+rbp] |
| 461 rol edx,8 |
| 462 xor r8,QWORD PTR[8+rax*1+rsi] |
| 463 xor r9,QWORD PTR[rax*1+rsi] |
| 464 mov al,dl |
| 465 xor r8,r10 |
| 466 movzx r13,WORD PTR[r13*2+r11] |
| 467 movzx ecx,dl |
| 468 shl al,4 |
| 469 movzx r12,BYTE PTR[rbx*1+rsp] |
| 470 shr ecx,4 |
| 471 shl r13,48 |
| 472 xor r12,r8 |
| 473 mov r10,r9 |
| 474 xor r9,r13 |
| 475 shr r8,8 |
| 476 movzx r12,r12b |
| 477 mov edx,DWORD PTR[4+rdi] |
| 478 shr r9,8 |
| 479 xor r8,QWORD PTR[((-128))+rbx*8+rbp] |
| 480 shl r10,56 |
| 481 xor r9,QWORD PTR[rbx*8+rbp] |
| 482 rol edx,8 |
| 483 xor r8,QWORD PTR[8+rax*1+rsi] |
| 484 xor r9,QWORD PTR[rax*1+rsi] |
| 485 mov al,dl |
| 486 xor r8,r10 |
| 487 movzx r12,WORD PTR[r12*2+r11] |
| 488 movzx ebx,dl |
| 489 shl al,4 |
| 490 movzx r13,BYTE PTR[rcx*1+rsp] |
| 491 shr ebx,4 |
| 492 shl r12,48 |
| 493 xor r13,r8 |
| 494 mov r10,r9 |
| 495 xor r9,r12 |
| 496 shr r8,8 |
| 497 movzx r13,r13b |
| 498 shr r9,8 |
| 499 xor r8,QWORD PTR[((-128))+rcx*8+rbp] |
| 500 shl r10,56 |
| 501 xor r9,QWORD PTR[rcx*8+rbp] |
| 502 rol edx,8 |
| 503 xor r8,QWORD PTR[8+rax*1+rsi] |
| 504 xor r9,QWORD PTR[rax*1+rsi] |
| 505 mov al,dl |
| 506 xor r8,r10 |
| 507 movzx r13,WORD PTR[r13*2+r11] |
| 508 movzx ecx,dl |
| 509 shl al,4 |
| 510 movzx r12,BYTE PTR[rbx*1+rsp] |
| 511 shr ecx,4 |
| 512 shl r13,48 |
| 513 xor r12,r8 |
| 514 mov r10,r9 |
| 515 xor r9,r13 |
| 516 shr r8,8 |
| 517 movzx r12,r12b |
| 518 shr r9,8 |
| 519 xor r8,QWORD PTR[((-128))+rbx*8+rbp] |
| 520 shl r10,56 |
| 521 xor r9,QWORD PTR[rbx*8+rbp] |
| 522 rol edx,8 |
| 523 xor r8,QWORD PTR[8+rax*1+rsi] |
| 524 xor r9,QWORD PTR[rax*1+rsi] |
| 525 mov al,dl |
| 526 xor r8,r10 |
| 527 movzx r12,WORD PTR[r12*2+r11] |
| 528 movzx ebx,dl |
| 529 shl al,4 |
| 530 movzx r13,BYTE PTR[rcx*1+rsp] |
| 531 shr ebx,4 |
| 532 shl r12,48 |
| 533 xor r13,r8 |
| 534 mov r10,r9 |
| 535 xor r9,r12 |
| 536 shr r8,8 |
| 537 movzx r13,r13b |
| 538 shr r9,8 |
| 539 xor r8,QWORD PTR[((-128))+rcx*8+rbp] |
| 540 shl r10,56 |
| 541 xor r9,QWORD PTR[rcx*8+rbp] |
| 542 rol edx,8 |
| 543 xor r8,QWORD PTR[8+rax*1+rsi] |
| 544 xor r9,QWORD PTR[rax*1+rsi] |
| 545 mov al,dl |
| 546 xor r8,r10 |
| 547 movzx r13,WORD PTR[r13*2+r11] |
| 548 movzx ecx,dl |
| 549 shl al,4 |
| 550 movzx r12,BYTE PTR[rbx*1+rsp] |
| 551 shr ecx,4 |
| 552 shl r13,48 |
| 553 xor r12,r8 |
| 554 mov r10,r9 |
| 555 xor r9,r13 |
| 556 shr r8,8 |
| 557 movzx r12,r12b |
| 558 mov edx,DWORD PTR[rdi] |
| 559 shr r9,8 |
| 560 xor r8,QWORD PTR[((-128))+rbx*8+rbp] |
| 561 shl r10,56 |
| 562 xor r9,QWORD PTR[rbx*8+rbp] |
| 563 rol edx,8 |
| 564 xor r8,QWORD PTR[8+rax*1+rsi] |
| 565 xor r9,QWORD PTR[rax*1+rsi] |
| 566 mov al,dl |
| 567 xor r8,r10 |
| 568 movzx r12,WORD PTR[r12*2+r11] |
| 569 movzx ebx,dl |
| 570 shl al,4 |
| 571 movzx r13,BYTE PTR[rcx*1+rsp] |
| 572 shr ebx,4 |
| 573 shl r12,48 |
| 574 xor r13,r8 |
| 575 mov r10,r9 |
| 576 xor r9,r12 |
| 577 shr r8,8 |
| 578 movzx r13,r13b |
| 579 shr r9,8 |
| 580 xor r8,QWORD PTR[((-128))+rcx*8+rbp] |
| 581 shl r10,56 |
| 582 xor r9,QWORD PTR[rcx*8+rbp] |
| 583 rol edx,8 |
| 584 xor r8,QWORD PTR[8+rax*1+rsi] |
| 585 xor r9,QWORD PTR[rax*1+rsi] |
| 586 mov al,dl |
| 587 xor r8,r10 |
| 588 movzx r13,WORD PTR[r13*2+r11] |
| 589 movzx ecx,dl |
| 590 shl al,4 |
| 591 movzx r12,BYTE PTR[rbx*1+rsp] |
| 592 shr ecx,4 |
| 593 shl r13,48 |
| 594 xor r12,r8 |
| 595 mov r10,r9 |
| 596 xor r9,r13 |
| 597 shr r8,8 |
| 598 movzx r12,r12b |
| 599 shr r9,8 |
| 600 xor r8,QWORD PTR[((-128))+rbx*8+rbp] |
| 601 shl r10,56 |
| 602 xor r9,QWORD PTR[rbx*8+rbp] |
| 603 rol edx,8 |
| 604 xor r8,QWORD PTR[8+rax*1+rsi] |
| 605 xor r9,QWORD PTR[rax*1+rsi] |
| 606 mov al,dl |
| 607 xor r8,r10 |
| 608 movzx r12,WORD PTR[r12*2+r11] |
| 609 movzx ebx,dl |
| 610 shl al,4 |
| 611 movzx r13,BYTE PTR[rcx*1+rsp] |
| 612 shr ebx,4 |
| 613 shl r12,48 |
| 614 xor r13,r8 |
| 615 mov r10,r9 |
| 616 xor r9,r12 |
| 617 shr r8,8 |
| 618 movzx r13,r13b |
| 619 shr r9,8 |
| 620 xor r8,QWORD PTR[((-128))+rcx*8+rbp] |
| 621 shl r10,56 |
| 622 xor r9,QWORD PTR[rcx*8+rbp] |
| 623 rol edx,8 |
| 624 xor r8,QWORD PTR[8+rax*1+rsi] |
| 625 xor r9,QWORD PTR[rax*1+rsi] |
| 626 mov al,dl |
| 627 xor r8,r10 |
| 628 movzx r13,WORD PTR[r13*2+r11] |
| 629 movzx ecx,dl |
| 630 shl al,4 |
| 631 movzx r12,BYTE PTR[rbx*1+rsp] |
| 632 and ecx,240 |
| 633 shl r13,48 |
| 634 xor r12,r8 |
| 635 mov r10,r9 |
| 636 xor r9,r13 |
| 637 shr r8,8 |
| 638 movzx r12,r12b |
| 639 mov edx,DWORD PTR[((-4))+rdi] |
| 640 shr r9,8 |
| 641 xor r8,QWORD PTR[((-128))+rbx*8+rbp] |
| 642 shl r10,56 |
| 643 xor r9,QWORD PTR[rbx*8+rbp] |
| 644 movzx r12,WORD PTR[r12*2+r11] |
| 645 xor r8,QWORD PTR[8+rax*1+rsi] |
| 646 xor r9,QWORD PTR[rax*1+rsi] |
| 647 shl r12,48 |
| 648 xor r8,r10 |
| 649 xor r9,r12 |
| 650 movzx r13,r8b |
| 651 shr r8,4 |
| 652 mov r10,r9 |
| 653 shl r13b,4 |
| 654 shr r9,4 |
| 655 xor r8,QWORD PTR[8+rcx*1+rsi] |
| 656 movzx r13,WORD PTR[r13*2+r11] |
| 657 shl r10,60 |
| 658 xor r9,QWORD PTR[rcx*1+rsi] |
| 659 xor r8,r10 |
| 660 shl r13,48 |
| 661 bswap r8 |
| 662 xor r9,r13 |
| 663 bswap r9 |
| 664 cmp r14,r15 |
| 665 jb $L$outer_loop |
| 666 mov QWORD PTR[8+rdi],r8 |
| 667 mov QWORD PTR[rdi],r9 |
| 668 |
| 669 lea rsi,QWORD PTR[280+rsp] |
| 670 mov r15,QWORD PTR[rsi] |
| 671 mov r14,QWORD PTR[8+rsi] |
| 672 mov r13,QWORD PTR[16+rsi] |
| 673 mov r12,QWORD PTR[24+rsi] |
| 674 mov rbp,QWORD PTR[32+rsi] |
| 675 mov rbx,QWORD PTR[40+rsi] |
| 676 lea rsp,QWORD PTR[48+rsi] |
| 677 $L$ghash_epilogue:: |
| 678 mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue |
| 679 mov rsi,QWORD PTR[16+rsp] |
| 680 DB 0F3h,0C3h ;repret |
| 681 $L$SEH_end_gcm_ghash_4bit:: |
| 682 gcm_ghash_4bit ENDP |
| 683 PUBLIC gcm_init_clmul |
| 684 |
| 685 ALIGN 16 |
| 686 gcm_init_clmul PROC PUBLIC |
| 687 $L$_init_clmul:: |
| 688 $L$SEH_begin_gcm_init_clmul:: |
| 689 |
| 690 DB 048h,083h,0ech,018h |
| 691 DB 00fh,029h,034h,024h |
| 692 movdqu xmm2,XMMWORD PTR[rdx] |
| 693 pshufd xmm2,xmm2,78 |
| 694 |
| 695 |
| 696 pshufd xmm4,xmm2,255 |
| 697 movdqa xmm3,xmm2 |
| 698 psllq xmm2,1 |
| 699 pxor xmm5,xmm5 |
| 700 psrlq xmm3,63 |
| 701 pcmpgtd xmm5,xmm4 |
| 702 pslldq xmm3,8 |
| 703 por xmm2,xmm3 |
| 704 |
| 705 |
| 706 pand xmm5,XMMWORD PTR[$L$0x1c2_polynomial] |
| 707 pxor xmm2,xmm5 |
| 708 |
| 709 |
| 710 pshufd xmm6,xmm2,78 |
| 711 movdqa xmm0,xmm2 |
| 712 pxor xmm6,xmm2 |
| 713 movdqa xmm1,xmm0 |
| 714 pshufd xmm3,xmm0,78 |
| 715 pxor xmm3,xmm0 |
| 716 DB 102,15,58,68,194,0 |
| 717 DB 102,15,58,68,202,17 |
| 718 DB 102,15,58,68,222,0 |
| 719 pxor xmm3,xmm0 |
| 720 pxor xmm3,xmm1 |
| 721 |
| 722 movdqa xmm4,xmm3 |
| 723 psrldq xmm3,8 |
| 724 pslldq xmm4,8 |
| 725 pxor xmm1,xmm3 |
| 726 pxor xmm0,xmm4 |
| 727 |
| 728 movdqa xmm4,xmm0 |
| 729 movdqa xmm3,xmm0 |
| 730 psllq xmm0,5 |
| 731 pxor xmm3,xmm0 |
| 732 psllq xmm0,1 |
| 733 pxor xmm0,xmm3 |
| 734 psllq xmm0,57 |
| 735 movdqa xmm3,xmm0 |
| 736 pslldq xmm0,8 |
| 737 psrldq xmm3,8 |
| 738 pxor xmm0,xmm4 |
| 739 pxor xmm1,xmm3 |
| 740 |
| 741 |
| 742 movdqa xmm4,xmm0 |
| 743 psrlq xmm0,1 |
| 744 pxor xmm1,xmm4 |
| 745 pxor xmm4,xmm0 |
| 746 psrlq xmm0,5 |
| 747 pxor xmm0,xmm4 |
| 748 psrlq xmm0,1 |
| 749 pxor xmm0,xmm1 |
| 750 pshufd xmm3,xmm2,78 |
| 751 pshufd xmm4,xmm0,78 |
| 752 pxor xmm3,xmm2 |
| 753 movdqu XMMWORD PTR[rcx],xmm2 |
| 754 pxor xmm4,xmm0 |
| 755 movdqu XMMWORD PTR[16+rcx],xmm0 |
| 756 DB 102,15,58,15,227,8 |
| 757 movdqu XMMWORD PTR[32+rcx],xmm4 |
| 758 movdqa xmm1,xmm0 |
| 759 pshufd xmm3,xmm0,78 |
| 760 pxor xmm3,xmm0 |
| 761 DB 102,15,58,68,194,0 |
| 762 DB 102,15,58,68,202,17 |
| 763 DB 102,15,58,68,222,0 |
| 764 pxor xmm3,xmm0 |
| 765 pxor xmm3,xmm1 |
| 766 |
| 767 movdqa xmm4,xmm3 |
| 768 psrldq xmm3,8 |
| 769 pslldq xmm4,8 |
| 770 pxor xmm1,xmm3 |
| 771 pxor xmm0,xmm4 |
| 772 |
| 773 movdqa xmm4,xmm0 |
| 774 movdqa xmm3,xmm0 |
| 775 psllq xmm0,5 |
| 776 pxor xmm3,xmm0 |
| 777 psllq xmm0,1 |
| 778 pxor xmm0,xmm3 |
| 779 psllq xmm0,57 |
| 780 movdqa xmm3,xmm0 |
| 781 pslldq xmm0,8 |
| 782 psrldq xmm3,8 |
| 783 pxor xmm0,xmm4 |
| 784 pxor xmm1,xmm3 |
| 785 |
| 786 |
| 787 movdqa xmm4,xmm0 |
| 788 psrlq xmm0,1 |
| 789 pxor xmm1,xmm4 |
| 790 pxor xmm4,xmm0 |
| 791 psrlq xmm0,5 |
| 792 pxor xmm0,xmm4 |
| 793 psrlq xmm0,1 |
| 794 pxor xmm0,xmm1 |
| 795 movdqa xmm5,xmm0 |
| 796 movdqa xmm1,xmm0 |
| 797 pshufd xmm3,xmm0,78 |
| 798 pxor xmm3,xmm0 |
| 799 DB 102,15,58,68,194,0 |
| 800 DB 102,15,58,68,202,17 |
| 801 DB 102,15,58,68,222,0 |
| 802 pxor xmm3,xmm0 |
| 803 pxor xmm3,xmm1 |
| 804 |
| 805 movdqa xmm4,xmm3 |
| 806 psrldq xmm3,8 |
| 807 pslldq xmm4,8 |
| 808 pxor xmm1,xmm3 |
| 809 pxor xmm0,xmm4 |
| 810 |
| 811 movdqa xmm4,xmm0 |
| 812 movdqa xmm3,xmm0 |
| 813 psllq xmm0,5 |
| 814 pxor xmm3,xmm0 |
| 815 psllq xmm0,1 |
| 816 pxor xmm0,xmm3 |
| 817 psllq xmm0,57 |
| 818 movdqa xmm3,xmm0 |
| 819 pslldq xmm0,8 |
| 820 psrldq xmm3,8 |
| 821 pxor xmm0,xmm4 |
| 822 pxor xmm1,xmm3 |
| 823 |
| 824 |
| 825 movdqa xmm4,xmm0 |
| 826 psrlq xmm0,1 |
| 827 pxor xmm1,xmm4 |
| 828 pxor xmm4,xmm0 |
| 829 psrlq xmm0,5 |
| 830 pxor xmm0,xmm4 |
| 831 psrlq xmm0,1 |
| 832 pxor xmm0,xmm1 |
| 833 pshufd xmm3,xmm5,78 |
| 834 pshufd xmm4,xmm0,78 |
| 835 pxor xmm3,xmm5 |
| 836 movdqu XMMWORD PTR[48+rcx],xmm5 |
| 837 pxor xmm4,xmm0 |
| 838 movdqu XMMWORD PTR[64+rcx],xmm0 |
| 839 DB 102,15,58,15,227,8 |
| 840 movdqu XMMWORD PTR[80+rcx],xmm4 |
| 841 movaps xmm6,XMMWORD PTR[rsp] |
| 842 lea rsp,QWORD PTR[24+rsp] |
| 843 $L$SEH_end_gcm_init_clmul:: |
| 844 DB 0F3h,0C3h ;repret |
| 845 gcm_init_clmul ENDP |
| 846 PUBLIC gcm_gmult_clmul |
| 847 |
| 848 ALIGN 16 |
| 849 gcm_gmult_clmul PROC PUBLIC |
| 850 $L$_gmult_clmul:: |
| 851 movdqu xmm0,XMMWORD PTR[rcx] |
| 852 movdqa xmm5,XMMWORD PTR[$L$bswap_mask] |
| 853 movdqu xmm2,XMMWORD PTR[rdx] |
| 854 movdqu xmm4,XMMWORD PTR[32+rdx] |
| 855 DB 102,15,56,0,197 |
| 856 movdqa xmm1,xmm0 |
| 857 pshufd xmm3,xmm0,78 |
| 858 pxor xmm3,xmm0 |
| 859 DB 102,15,58,68,194,0 |
| 860 DB 102,15,58,68,202,17 |
| 861 DB 102,15,58,68,220,0 |
| 862 pxor xmm3,xmm0 |
| 863 pxor xmm3,xmm1 |
| 864 |
| 865 movdqa xmm4,xmm3 |
| 866 psrldq xmm3,8 |
| 867 pslldq xmm4,8 |
| 868 pxor xmm1,xmm3 |
| 869 pxor xmm0,xmm4 |
| 870 |
| 871 movdqa xmm4,xmm0 |
| 872 movdqa xmm3,xmm0 |
| 873 psllq xmm0,5 |
| 874 pxor xmm3,xmm0 |
| 875 psllq xmm0,1 |
| 876 pxor xmm0,xmm3 |
| 877 psllq xmm0,57 |
| 878 movdqa xmm3,xmm0 |
| 879 pslldq xmm0,8 |
| 880 psrldq xmm3,8 |
| 881 pxor xmm0,xmm4 |
| 882 pxor xmm1,xmm3 |
| 883 |
| 884 |
| 885 movdqa xmm4,xmm0 |
| 886 psrlq xmm0,1 |
| 887 pxor xmm1,xmm4 |
| 888 pxor xmm4,xmm0 |
| 889 psrlq xmm0,5 |
| 890 pxor xmm0,xmm4 |
| 891 psrlq xmm0,1 |
| 892 pxor xmm0,xmm1 |
| 893 DB 102,15,56,0,197 |
| 894 movdqu XMMWORD PTR[rcx],xmm0 |
| 895 DB 0F3h,0C3h ;repret |
| 896 gcm_gmult_clmul ENDP |
| 897 PUBLIC gcm_ghash_clmul |
| 898 |
| 899 ALIGN 32 |
| 900 gcm_ghash_clmul PROC PUBLIC |
| 901 $L$_ghash_clmul:: |
| 902 lea rax,QWORD PTR[((-136))+rsp] |
| 903 $L$SEH_begin_gcm_ghash_clmul:: |
| 904 |
| 905 DB 048h,08dh,060h,0e0h |
| 906 DB 00fh,029h,070h,0e0h |
| 907 DB 00fh,029h,078h,0f0h |
| 908 DB 044h,00fh,029h,000h |
| 909 DB 044h,00fh,029h,048h,010h |
| 910 DB 044h,00fh,029h,050h,020h |
| 911 DB 044h,00fh,029h,058h,030h |
| 912 DB 044h,00fh,029h,060h,040h |
| 913 DB 044h,00fh,029h,068h,050h |
| 914 DB 044h,00fh,029h,070h,060h |
| 915 DB 044h,00fh,029h,078h,070h |
| 916 movdqa xmm10,XMMWORD PTR[$L$bswap_mask] |
| 917 |
| 918 movdqu xmm0,XMMWORD PTR[rcx] |
| 919 movdqu xmm2,XMMWORD PTR[rdx] |
| 920 movdqu xmm7,XMMWORD PTR[32+rdx] |
| 921 DB 102,65,15,56,0,194 |
| 922 |
| 923 sub r9,010h |
| 924 jz $L$odd_tail |
| 925 |
| 926 movdqu xmm6,XMMWORD PTR[16+rdx] |
| 927 mov eax,DWORD PTR[((OPENSSL_ia32cap_P+4))] |
| 928 cmp r9,030h |
| 929 jb $L$skip4x |
| 930 |
| 931 and eax,71303168 |
| 932 cmp eax,4194304 |
| 933 je $L$skip4x |
| 934 |
| 935 sub r9,030h |
| 936 mov rax,0A040608020C0E000h |
| 937 movdqu xmm14,XMMWORD PTR[48+rdx] |
| 938 movdqu xmm15,XMMWORD PTR[64+rdx] |
| 939 |
| 940 |
| 941 |
| 942 |
| 943 movdqu xmm3,XMMWORD PTR[48+r8] |
| 944 movdqu xmm11,XMMWORD PTR[32+r8] |
| 945 DB 102,65,15,56,0,218 |
| 946 DB 102,69,15,56,0,218 |
| 947 movdqa xmm5,xmm3 |
| 948 pshufd xmm4,xmm3,78 |
| 949 pxor xmm4,xmm3 |
| 950 DB 102,15,58,68,218,0 |
| 951 DB 102,15,58,68,234,17 |
| 952 DB 102,15,58,68,231,0 |
| 953 |
| 954 movdqa xmm13,xmm11 |
| 955 pshufd xmm12,xmm11,78 |
| 956 pxor xmm12,xmm11 |
| 957 DB 102,68,15,58,68,222,0 |
| 958 DB 102,68,15,58,68,238,17 |
| 959 DB 102,68,15,58,68,231,16 |
| 960 xorps xmm3,xmm11 |
| 961 xorps xmm5,xmm13 |
| 962 movups xmm7,XMMWORD PTR[80+rdx] |
| 963 xorps xmm4,xmm12 |
| 964 |
| 965 movdqu xmm11,XMMWORD PTR[16+r8] |
| 966 movdqu xmm8,XMMWORD PTR[r8] |
| 967 DB 102,69,15,56,0,218 |
| 968 DB 102,69,15,56,0,194 |
| 969 movdqa xmm13,xmm11 |
| 970 pshufd xmm12,xmm11,78 |
| 971 pxor xmm0,xmm8 |
| 972 pxor xmm12,xmm11 |
| 973 DB 102,69,15,58,68,222,0 |
| 974 movdqa xmm1,xmm0 |
| 975 pshufd xmm8,xmm0,78 |
| 976 pxor xmm8,xmm0 |
| 977 DB 102,69,15,58,68,238,17 |
| 978 DB 102,68,15,58,68,231,0 |
| 979 xorps xmm3,xmm11 |
| 980 xorps xmm5,xmm13 |
| 981 |
| 982 lea r8,QWORD PTR[64+r8] |
| 983 sub r9,040h |
| 984 jc $L$tail4x |
| 985 |
| 986 jmp $L$mod4_loop |
| 987 ALIGN 32 |
| 988 $L$mod4_loop:: |
| 989 DB 102,65,15,58,68,199,0 |
| 990 xorps xmm4,xmm12 |
| 991 movdqu xmm11,XMMWORD PTR[48+r8] |
| 992 DB 102,69,15,56,0,218 |
| 993 DB 102,65,15,58,68,207,17 |
| 994 xorps xmm0,xmm3 |
| 995 movdqu xmm3,XMMWORD PTR[32+r8] |
| 996 movdqa xmm13,xmm11 |
| 997 DB 102,68,15,58,68,199,16 |
| 998 pshufd xmm12,xmm11,78 |
| 999 xorps xmm1,xmm5 |
| 1000 pxor xmm12,xmm11 |
| 1001 DB 102,65,15,56,0,218 |
| 1002 movups xmm7,XMMWORD PTR[32+rdx] |
| 1003 xorps xmm8,xmm4 |
| 1004 DB 102,68,15,58,68,218,0 |
| 1005 pshufd xmm4,xmm3,78 |
| 1006 |
| 1007 pxor xmm8,xmm0 |
| 1008 movdqa xmm5,xmm3 |
| 1009 pxor xmm8,xmm1 |
| 1010 pxor xmm4,xmm3 |
| 1011 movdqa xmm9,xmm8 |
| 1012 DB 102,68,15,58,68,234,17 |
| 1013 pslldq xmm8,8 |
| 1014 psrldq xmm9,8 |
| 1015 pxor xmm0,xmm8 |
| 1016 movdqa xmm8,XMMWORD PTR[$L$7_mask] |
| 1017 pxor xmm1,xmm9 |
| 1018 DB 102,76,15,110,200 |
| 1019 |
| 1020 pand xmm8,xmm0 |
| 1021 DB 102,69,15,56,0,200 |
| 1022 pxor xmm9,xmm0 |
| 1023 DB 102,68,15,58,68,231,0 |
| 1024 psllq xmm9,57 |
| 1025 movdqa xmm8,xmm9 |
| 1026 pslldq xmm9,8 |
| 1027 DB 102,15,58,68,222,0 |
| 1028 psrldq xmm8,8 |
| 1029 pxor xmm0,xmm9 |
| 1030 pxor xmm1,xmm8 |
| 1031 movdqu xmm8,XMMWORD PTR[r8] |
| 1032 |
| 1033 movdqa xmm9,xmm0 |
| 1034 psrlq xmm0,1 |
| 1035 DB 102,15,58,68,238,17 |
| 1036 xorps xmm3,xmm11 |
| 1037 movdqu xmm11,XMMWORD PTR[16+r8] |
| 1038 DB 102,69,15,56,0,218 |
| 1039 DB 102,15,58,68,231,16 |
| 1040 xorps xmm5,xmm13 |
| 1041 movups xmm7,XMMWORD PTR[80+rdx] |
| 1042 DB 102,69,15,56,0,194 |
| 1043 pxor xmm1,xmm9 |
| 1044 pxor xmm9,xmm0 |
| 1045 psrlq xmm0,5 |
| 1046 |
| 1047 movdqa xmm13,xmm11 |
| 1048 pxor xmm4,xmm12 |
| 1049 pshufd xmm12,xmm11,78 |
| 1050 pxor xmm0,xmm9 |
| 1051 pxor xmm1,xmm8 |
| 1052 pxor xmm12,xmm11 |
| 1053 DB 102,69,15,58,68,222,0 |
| 1054 psrlq xmm0,1 |
| 1055 pxor xmm0,xmm1 |
| 1056 movdqa xmm1,xmm0 |
| 1057 DB 102,69,15,58,68,238,17 |
| 1058 xorps xmm3,xmm11 |
| 1059 pshufd xmm8,xmm0,78 |
| 1060 pxor xmm8,xmm0 |
| 1061 |
| 1062 DB 102,68,15,58,68,231,0 |
| 1063 xorps xmm5,xmm13 |
| 1064 |
| 1065 lea r8,QWORD PTR[64+r8] |
| 1066 sub r9,040h |
| 1067 jnc $L$mod4_loop |
| 1068 |
| 1069 $L$tail4x:: |
| 1070 DB 102,65,15,58,68,199,0 |
| 1071 DB 102,65,15,58,68,207,17 |
| 1072 DB 102,68,15,58,68,199,16 |
| 1073 xorps xmm4,xmm12 |
| 1074 xorps xmm0,xmm3 |
| 1075 xorps xmm1,xmm5 |
| 1076 pxor xmm1,xmm0 |
| 1077 pxor xmm8,xmm4 |
| 1078 |
| 1079 pxor xmm8,xmm1 |
| 1080 pxor xmm1,xmm0 |
| 1081 |
| 1082 movdqa xmm9,xmm8 |
| 1083 psrldq xmm8,8 |
| 1084 pslldq xmm9,8 |
| 1085 pxor xmm1,xmm8 |
| 1086 pxor xmm0,xmm9 |
| 1087 |
| 1088 movdqa xmm4,xmm0 |
| 1089 movdqa xmm3,xmm0 |
| 1090 psllq xmm0,5 |
| 1091 pxor xmm3,xmm0 |
| 1092 psllq xmm0,1 |
| 1093 pxor xmm0,xmm3 |
| 1094 psllq xmm0,57 |
| 1095 movdqa xmm3,xmm0 |
| 1096 pslldq xmm0,8 |
| 1097 psrldq xmm3,8 |
| 1098 pxor xmm0,xmm4 |
| 1099 pxor xmm1,xmm3 |
| 1100 |
| 1101 |
| 1102 movdqa xmm4,xmm0 |
| 1103 psrlq xmm0,1 |
| 1104 pxor xmm1,xmm4 |
| 1105 pxor xmm4,xmm0 |
| 1106 psrlq xmm0,5 |
| 1107 pxor xmm0,xmm4 |
| 1108 psrlq xmm0,1 |
| 1109 pxor xmm0,xmm1 |
| 1110 add r9,040h |
| 1111 jz $L$done |
| 1112 movdqu xmm7,XMMWORD PTR[32+rdx] |
| 1113 sub r9,010h |
| 1114 jz $L$odd_tail |
| 1115 $L$skip4x:: |
| 1116 |
| 1117 |
| 1118 |
| 1119 |
| 1120 |
| 1121 movdqu xmm8,XMMWORD PTR[r8] |
| 1122 movdqu xmm3,XMMWORD PTR[16+r8] |
| 1123 DB 102,69,15,56,0,194 |
| 1124 DB 102,65,15,56,0,218 |
| 1125 pxor xmm0,xmm8 |
| 1126 |
| 1127 movdqa xmm5,xmm3 |
| 1128 pshufd xmm4,xmm3,78 |
| 1129 pxor xmm4,xmm3 |
| 1130 DB 102,15,58,68,218,0 |
| 1131 DB 102,15,58,68,234,17 |
| 1132 DB 102,15,58,68,231,0 |
| 1133 |
| 1134 lea r8,QWORD PTR[32+r8] |
| 1135 nop |
| 1136 sub r9,020h |
| 1137 jbe $L$even_tail |
| 1138 nop |
| 1139 jmp $L$mod_loop |
| 1140 |
| 1141 ALIGN 32 |
| 1142 $L$mod_loop:: |
| 1143 movdqa xmm1,xmm0 |
| 1144 movdqa xmm8,xmm4 |
| 1145 pshufd xmm4,xmm0,78 |
| 1146 pxor xmm4,xmm0 |
| 1147 |
| 1148 DB 102,15,58,68,198,0 |
| 1149 DB 102,15,58,68,206,17 |
| 1150 DB 102,15,58,68,231,16 |
| 1151 |
| 1152 pxor xmm0,xmm3 |
| 1153 pxor xmm1,xmm5 |
| 1154 movdqu xmm9,XMMWORD PTR[r8] |
| 1155 pxor xmm8,xmm0 |
| 1156 DB 102,69,15,56,0,202 |
| 1157 movdqu xmm3,XMMWORD PTR[16+r8] |
| 1158 |
| 1159 pxor xmm8,xmm1 |
| 1160 pxor xmm1,xmm9 |
| 1161 pxor xmm4,xmm8 |
| 1162 DB 102,65,15,56,0,218 |
| 1163 movdqa xmm8,xmm4 |
| 1164 psrldq xmm8,8 |
| 1165 pslldq xmm4,8 |
| 1166 pxor xmm1,xmm8 |
| 1167 pxor xmm0,xmm4 |
| 1168 |
| 1169 movdqa xmm5,xmm3 |
| 1170 |
| 1171 movdqa xmm9,xmm0 |
| 1172 movdqa xmm8,xmm0 |
| 1173 psllq xmm0,5 |
| 1174 pxor xmm8,xmm0 |
| 1175 DB 102,15,58,68,218,0 |
| 1176 psllq xmm0,1 |
| 1177 pxor xmm0,xmm8 |
| 1178 psllq xmm0,57 |
| 1179 movdqa xmm8,xmm0 |
| 1180 pslldq xmm0,8 |
| 1181 psrldq xmm8,8 |
| 1182 pxor xmm0,xmm9 |
| 1183 pshufd xmm4,xmm5,78 |
| 1184 pxor xmm1,xmm8 |
| 1185 pxor xmm4,xmm5 |
| 1186 |
| 1187 movdqa xmm9,xmm0 |
| 1188 psrlq xmm0,1 |
| 1189 DB 102,15,58,68,234,17 |
| 1190 pxor xmm1,xmm9 |
| 1191 pxor xmm9,xmm0 |
| 1192 psrlq xmm0,5 |
| 1193 pxor xmm0,xmm9 |
| 1194 lea r8,QWORD PTR[32+r8] |
| 1195 psrlq xmm0,1 |
| 1196 DB 102,15,58,68,231,0 |
| 1197 pxor xmm0,xmm1 |
| 1198 |
| 1199 sub r9,020h |
| 1200 ja $L$mod_loop |
| 1201 |
| 1202 $L$even_tail:: |
| 1203 movdqa xmm1,xmm0 |
| 1204 movdqa xmm8,xmm4 |
| 1205 pshufd xmm4,xmm0,78 |
| 1206 pxor xmm4,xmm0 |
| 1207 |
| 1208 DB 102,15,58,68,198,0 |
| 1209 DB 102,15,58,68,206,17 |
| 1210 DB 102,15,58,68,231,16 |
| 1211 |
| 1212 pxor xmm0,xmm3 |
| 1213 pxor xmm1,xmm5 |
| 1214 pxor xmm8,xmm0 |
| 1215 pxor xmm8,xmm1 |
| 1216 pxor xmm4,xmm8 |
| 1217 movdqa xmm8,xmm4 |
| 1218 psrldq xmm8,8 |
| 1219 pslldq xmm4,8 |
| 1220 pxor xmm1,xmm8 |
| 1221 pxor xmm0,xmm4 |
| 1222 |
| 1223 movdqa xmm4,xmm0 |
| 1224 movdqa xmm3,xmm0 |
| 1225 psllq xmm0,5 |
| 1226 pxor xmm3,xmm0 |
| 1227 psllq xmm0,1 |
| 1228 pxor xmm0,xmm3 |
| 1229 psllq xmm0,57 |
| 1230 movdqa xmm3,xmm0 |
| 1231 pslldq xmm0,8 |
| 1232 psrldq xmm3,8 |
| 1233 pxor xmm0,xmm4 |
| 1234 pxor xmm1,xmm3 |
| 1235 |
| 1236 |
| 1237 movdqa xmm4,xmm0 |
| 1238 psrlq xmm0,1 |
| 1239 pxor xmm1,xmm4 |
| 1240 pxor xmm4,xmm0 |
| 1241 psrlq xmm0,5 |
| 1242 pxor xmm0,xmm4 |
| 1243 psrlq xmm0,1 |
| 1244 pxor xmm0,xmm1 |
| 1245 test r9,r9 |
| 1246 jnz $L$done |
| 1247 |
| 1248 $L$odd_tail:: |
| 1249 movdqu xmm8,XMMWORD PTR[r8] |
| 1250 DB 102,69,15,56,0,194 |
| 1251 pxor xmm0,xmm8 |
| 1252 movdqa xmm1,xmm0 |
| 1253 pshufd xmm3,xmm0,78 |
| 1254 pxor xmm3,xmm0 |
| 1255 DB 102,15,58,68,194,0 |
| 1256 DB 102,15,58,68,202,17 |
| 1257 DB 102,15,58,68,223,0 |
| 1258 pxor xmm3,xmm0 |
| 1259 pxor xmm3,xmm1 |
| 1260 |
| 1261 movdqa xmm4,xmm3 |
| 1262 psrldq xmm3,8 |
| 1263 pslldq xmm4,8 |
| 1264 pxor xmm1,xmm3 |
| 1265 pxor xmm0,xmm4 |
| 1266 |
| 1267 movdqa xmm4,xmm0 |
| 1268 movdqa xmm3,xmm0 |
| 1269 psllq xmm0,5 |
| 1270 pxor xmm3,xmm0 |
| 1271 psllq xmm0,1 |
| 1272 pxor xmm0,xmm3 |
| 1273 psllq xmm0,57 |
| 1274 movdqa xmm3,xmm0 |
| 1275 pslldq xmm0,8 |
| 1276 psrldq xmm3,8 |
| 1277 pxor xmm0,xmm4 |
| 1278 pxor xmm1,xmm3 |
| 1279 |
| 1280 |
| 1281 movdqa xmm4,xmm0 |
| 1282 psrlq xmm0,1 |
| 1283 pxor xmm1,xmm4 |
| 1284 pxor xmm4,xmm0 |
| 1285 psrlq xmm0,5 |
| 1286 pxor xmm0,xmm4 |
| 1287 psrlq xmm0,1 |
| 1288 pxor xmm0,xmm1 |
| 1289 $L$done:: |
| 1290 DB 102,65,15,56,0,194 |
| 1291 movdqu XMMWORD PTR[rcx],xmm0 |
| 1292 movaps xmm6,XMMWORD PTR[rsp] |
| 1293 movaps xmm7,XMMWORD PTR[16+rsp] |
| 1294 movaps xmm8,XMMWORD PTR[32+rsp] |
| 1295 movaps xmm9,XMMWORD PTR[48+rsp] |
| 1296 movaps xmm10,XMMWORD PTR[64+rsp] |
| 1297 movaps xmm11,XMMWORD PTR[80+rsp] |
| 1298 movaps xmm12,XMMWORD PTR[96+rsp] |
| 1299 movaps xmm13,XMMWORD PTR[112+rsp] |
| 1300 movaps xmm14,XMMWORD PTR[128+rsp] |
| 1301 movaps xmm15,XMMWORD PTR[144+rsp] |
| 1302 lea rsp,QWORD PTR[168+rsp] |
| 1303 $L$SEH_end_gcm_ghash_clmul:: |
| 1304 DB 0F3h,0C3h ;repret |
| 1305 gcm_ghash_clmul ENDP |
| 1306 PUBLIC gcm_init_avx |
| 1307 |
| 1308 ALIGN 32 |
| 1309 gcm_init_avx PROC PUBLIC |
| 1310 jmp $L$_init_clmul |
| 1311 gcm_init_avx ENDP |
| 1312 PUBLIC gcm_gmult_avx |
| 1313 |
| 1314 ALIGN 32 |
| 1315 gcm_gmult_avx PROC PUBLIC |
| 1316 jmp $L$_gmult_clmul |
| 1317 gcm_gmult_avx ENDP |
| 1318 PUBLIC gcm_ghash_avx |
| 1319 |
| 1320 ALIGN 32 |
| 1321 gcm_ghash_avx PROC PUBLIC |
| 1322 jmp $L$_ghash_clmul |
| 1323 gcm_ghash_avx ENDP |
| 1324 ALIGN 64 |
| 1325 $L$bswap_mask:: |
| 1326 DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 |
| 1327 $L$0x1c2_polynomial:: |
| 1328 DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0c2h |
| 1329 $L$7_mask:: |
| 1330 DD 7,0,7,0 |
| 1331 $L$7_mask_poly:: |
| 1332 DD 7,0,450,0 |
| 1333 ALIGN 64 |
| 1334 |
| 1335 $L$rem_4bit:: |
| 1336 DD 0,0,0,471859200,0,943718400,0,610271232 |
| 1337 DD 0,1887436800,0,1822425088,0,1220542464,0,1423966208 |
| 1338 DD 0,3774873600,0,4246732800,0,3644850176,0,3311403008 |
| 1339 DD 0,2441084928,0,2376073216,0,2847932416,0,3051356160 |
| 1340 |
| 1341 $L$rem_8bit:: |
| 1342 DW 00000h,001C2h,00384h,00246h,00708h,006CAh,0048Ch,0054Eh |
| 1343 DW 00E10h,00FD2h,00D94h,00C56h,00918h,008DAh,00A9Ch,00B5Eh |
| 1344 DW 01C20h,01DE2h,01FA4h,01E66h,01B28h,01AEAh,018ACh,0196Eh |
| 1345 DW 01230h,013F2h,011B4h,01076h,01538h,014FAh,016BCh,0177Eh |
| 1346 DW 03840h,03982h,03BC4h,03A06h,03F48h,03E8Ah,03CCCh,03D0Eh |
| 1347 DW 03650h,03792h,035D4h,03416h,03158h,0309Ah,032DCh,0331Eh |
| 1348 DW 02460h,025A2h,027E4h,02626h,02368h,022AAh,020ECh,0212Eh |
| 1349 DW 02A70h,02BB2h,029F4h,02836h,02D78h,02CBAh,02EFCh,02F3Eh |
| 1350 DW 07080h,07142h,07304h,072C6h,07788h,0764Ah,0740Ch,075CEh |
| 1351 DW 07E90h,07F52h,07D14h,07CD6h,07998h,0785Ah,07A1Ch,07BDEh |
| 1352 DW 06CA0h,06D62h,06F24h,06EE6h,06BA8h,06A6Ah,0682Ch,069EEh |
| 1353 DW 062B0h,06372h,06134h,060F6h,065B8h,0647Ah,0663Ch,067FEh |
| 1354 DW 048C0h,04902h,04B44h,04A86h,04FC8h,04E0Ah,04C4Ch,04D8Eh |
| 1355 DW 046D0h,04712h,04554h,04496h,041D8h,0401Ah,0425Ch,0439Eh |
| 1356 DW 054E0h,05522h,05764h,056A6h,053E8h,0522Ah,0506Ch,051AEh |
| 1357 DW 05AF0h,05B32h,05974h,058B6h,05DF8h,05C3Ah,05E7Ch,05FBEh |
| 1358 DW 0E100h,0E0C2h,0E284h,0E346h,0E608h,0E7CAh,0E58Ch,0E44Eh |
| 1359 DW 0EF10h,0EED2h,0EC94h,0ED56h,0E818h,0E9DAh,0EB9Ch,0EA5Eh |
| 1360 DW 0FD20h,0FCE2h,0FEA4h,0FF66h,0FA28h,0FBEAh,0F9ACh,0F86Eh |
| 1361 DW 0F330h,0F2F2h,0F0B4h,0F176h,0F438h,0F5FAh,0F7BCh,0F67Eh |
| 1362 DW 0D940h,0D882h,0DAC4h,0DB06h,0DE48h,0DF8Ah,0DDCCh,0DC0Eh |
| 1363 DW 0D750h,0D692h,0D4D4h,0D516h,0D058h,0D19Ah,0D3DCh,0D21Eh |
| 1364 DW 0C560h,0C4A2h,0C6E4h,0C726h,0C268h,0C3AAh,0C1ECh,0C02Eh |
| 1365 DW 0CB70h,0CAB2h,0C8F4h,0C936h,0CC78h,0CDBAh,0CFFCh,0CE3Eh |
| 1366 DW 09180h,09042h,09204h,093C6h,09688h,0974Ah,0950Ch,094CEh |
| 1367 DW 09F90h,09E52h,09C14h,09DD6h,09898h,0995Ah,09B1Ch,09ADEh |
| 1368 DW 08DA0h,08C62h,08E24h,08FE6h,08AA8h,08B6Ah,0892Ch,088EEh |
| 1369 DW 083B0h,08272h,08034h,081F6h,084B8h,0857Ah,0873Ch,086FEh |
| 1370 DW 0A9C0h,0A802h,0AA44h,0AB86h,0AEC8h,0AF0Ah,0AD4Ch,0AC8Eh |
| 1371 DW 0A7D0h,0A612h,0A454h,0A596h,0A0D8h,0A11Ah,0A35Ch,0A29Eh |
| 1372 DW 0B5E0h,0B422h,0B664h,0B7A6h,0B2E8h,0B32Ah,0B16Ch,0B0AEh |
| 1373 DW 0BBF0h,0BA32h,0B874h,0B9B6h,0BCF8h,0BD3Ah,0BF7Ch,0BEBEh |
| 1374 |
| 1375 DB 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52 |
| 1376 DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 |
| 1377 DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 |
| 1378 DB 114,103,62,0 |
| 1379 ALIGN 64 |
| 1380 EXTERN __imp_RtlVirtualUnwind:NEAR |
| 1381 |
| 1382 ALIGN 16 |
| 1383 se_handler PROC PRIVATE |
| 1384 push rsi |
| 1385 push rdi |
| 1386 push rbx |
| 1387 push rbp |
| 1388 push r12 |
| 1389 push r13 |
| 1390 push r14 |
| 1391 push r15 |
| 1392 pushfq |
| 1393 sub rsp,64 |
| 1394 |
| 1395 mov rax,QWORD PTR[120+r8] |
| 1396 mov rbx,QWORD PTR[248+r8] |
| 1397 |
| 1398 mov rsi,QWORD PTR[8+r9] |
| 1399 mov r11,QWORD PTR[56+r9] |
| 1400 |
| 1401 mov r10d,DWORD PTR[r11] |
| 1402 lea r10,QWORD PTR[r10*1+rsi] |
| 1403 cmp rbx,r10 |
| 1404 jb $L$in_prologue |
| 1405 |
| 1406 mov rax,QWORD PTR[152+r8] |
| 1407 |
| 1408 mov r10d,DWORD PTR[4+r11] |
| 1409 lea r10,QWORD PTR[r10*1+rsi] |
| 1410 cmp rbx,r10 |
| 1411 jae $L$in_prologue |
| 1412 |
| 1413 lea rax,QWORD PTR[24+rax] |
| 1414 |
| 1415 mov rbx,QWORD PTR[((-8))+rax] |
| 1416 mov rbp,QWORD PTR[((-16))+rax] |
| 1417 mov r12,QWORD PTR[((-24))+rax] |
| 1418 mov QWORD PTR[144+r8],rbx |
| 1419 mov QWORD PTR[160+r8],rbp |
| 1420 mov QWORD PTR[216+r8],r12 |
| 1421 |
| 1422 $L$in_prologue:: |
| 1423 mov rdi,QWORD PTR[8+rax] |
| 1424 mov rsi,QWORD PTR[16+rax] |
| 1425 mov QWORD PTR[152+r8],rax |
| 1426 mov QWORD PTR[168+r8],rsi |
| 1427 mov QWORD PTR[176+r8],rdi |
| 1428 |
| 1429 mov rdi,QWORD PTR[40+r9] |
| 1430 mov rsi,r8 |
| 1431 mov ecx,154 |
| 1432 DD 0a548f3fch |
| 1433 |
| 1434 mov rsi,r9 |
| 1435 xor rcx,rcx |
| 1436 mov rdx,QWORD PTR[8+rsi] |
| 1437 mov r8,QWORD PTR[rsi] |
| 1438 mov r9,QWORD PTR[16+rsi] |
| 1439 mov r10,QWORD PTR[40+rsi] |
| 1440 lea r11,QWORD PTR[56+rsi] |
| 1441 lea r12,QWORD PTR[24+rsi] |
| 1442 mov QWORD PTR[32+rsp],r10 |
| 1443 mov QWORD PTR[40+rsp],r11 |
| 1444 mov QWORD PTR[48+rsp],r12 |
| 1445 mov QWORD PTR[56+rsp],rcx |
| 1446 call QWORD PTR[__imp_RtlVirtualUnwind] |
| 1447 |
| 1448 mov eax,1 |
| 1449 add rsp,64 |
| 1450 popfq |
| 1451 pop r15 |
| 1452 pop r14 |
| 1453 pop r13 |
| 1454 pop r12 |
| 1455 pop rbp |
| 1456 pop rbx |
| 1457 pop rdi |
| 1458 pop rsi |
| 1459 DB 0F3h,0C3h ;repret |
| 1460 se_handler ENDP |
| 1461 |
| 1462 .text$ ENDS |
| 1463 .pdata SEGMENT READONLY ALIGN(4) |
| 1464 ALIGN 4 |
| 1465 DD imagerel $L$SEH_begin_gcm_gmult_4bit |
| 1466 DD imagerel $L$SEH_end_gcm_gmult_4bit |
| 1467 DD imagerel $L$SEH_info_gcm_gmult_4bit |
| 1468 |
| 1469 DD imagerel $L$SEH_begin_gcm_ghash_4bit |
| 1470 DD imagerel $L$SEH_end_gcm_ghash_4bit |
| 1471 DD imagerel $L$SEH_info_gcm_ghash_4bit |
| 1472 |
| 1473 DD imagerel $L$SEH_begin_gcm_init_clmul |
| 1474 DD imagerel $L$SEH_end_gcm_init_clmul |
| 1475 DD imagerel $L$SEH_info_gcm_init_clmul |
| 1476 |
| 1477 DD imagerel $L$SEH_begin_gcm_ghash_clmul |
| 1478 DD imagerel $L$SEH_end_gcm_ghash_clmul |
| 1479 DD imagerel $L$SEH_info_gcm_ghash_clmul |
| 1480 .pdata ENDS |
| 1481 .xdata SEGMENT READONLY ALIGN(8) |
| 1482 ALIGN 8 |
| 1483 $L$SEH_info_gcm_gmult_4bit:: |
| 1484 DB 9,0,0,0 |
| 1485 DD imagerel se_handler |
| 1486 DD imagerel $L$gmult_prologue,imagerel $L$gmult_epilogue |
| 1487 $L$SEH_info_gcm_ghash_4bit:: |
| 1488 DB 9,0,0,0 |
| 1489 DD imagerel se_handler |
| 1490 DD imagerel $L$ghash_prologue,imagerel $L$ghash_epilogue |
| 1491 $L$SEH_info_gcm_init_clmul:: |
| 1492 DB 001h,008h,003h,000h |
| 1493 DB 008h,068h,000h,000h |
| 1494 DB 004h,022h,000h,000h |
| 1495 $L$SEH_info_gcm_ghash_clmul:: |
| 1496 DB 001h,033h,016h,000h |
| 1497 DB 033h,0f8h,009h,000h |
| 1498 DB 02eh,0e8h,008h,000h |
| 1499 DB 029h,0d8h,007h,000h |
| 1500 DB 024h,0c8h,006h,000h |
| 1501 DB 01fh,0b8h,005h,000h |
| 1502 DB 01ah,0a8h,004h,000h |
| 1503 DB 015h,098h,003h,000h |
| 1504 DB 010h,088h,002h,000h |
| 1505 DB 00ch,078h,001h,000h |
| 1506 DB 008h,068h,000h,000h |
| 1507 DB 004h,001h,015h,000h |
| 1508 |
| 1509 .xdata ENDS |
| 1510 END |
OLD | NEW |