OLD | NEW |
| (Empty) |
1 default rel | |
2 %define XMMWORD | |
3 %define YMMWORD | |
4 %define ZMMWORD | |
5 section .text code align=64 | |
6 | |
7 EXTERN OPENSSL_ia32cap_P | |
8 | |
9 global gcm_gmult_4bit | |
10 | |
11 ALIGN 16 | |
12 gcm_gmult_4bit: | |
13 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
14 mov QWORD[16+rsp],rsi | |
15 mov rax,rsp | |
16 $L$SEH_begin_gcm_gmult_4bit: | |
17 mov rdi,rcx | |
18 mov rsi,rdx | |
19 | |
20 | |
21 push rbx | |
22 push rbp | |
23 push r12 | |
24 $L$gmult_prologue: | |
25 | |
26 movzx r8,BYTE[15+rdi] | |
27 lea r11,[$L$rem_4bit] | |
28 xor rax,rax | |
29 xor rbx,rbx | |
30 mov al,r8b | |
31 mov bl,r8b | |
32 shl al,4 | |
33 mov rcx,14 | |
34 mov r8,QWORD[8+rax*1+rsi] | |
35 mov r9,QWORD[rax*1+rsi] | |
36 and bl,0xf0 | |
37 mov rdx,r8 | |
38 jmp NEAR $L$oop1 | |
39 | |
40 ALIGN 16 | |
41 $L$oop1: | |
42 shr r8,4 | |
43 and rdx,0xf | |
44 mov r10,r9 | |
45 mov al,BYTE[rcx*1+rdi] | |
46 shr r9,4 | |
47 xor r8,QWORD[8+rbx*1+rsi] | |
48 shl r10,60 | |
49 xor r9,QWORD[rbx*1+rsi] | |
50 mov bl,al | |
51 xor r9,QWORD[rdx*8+r11] | |
52 mov rdx,r8 | |
53 shl al,4 | |
54 xor r8,r10 | |
55 dec rcx | |
56 js NEAR $L$break1 | |
57 | |
58 shr r8,4 | |
59 and rdx,0xf | |
60 mov r10,r9 | |
61 shr r9,4 | |
62 xor r8,QWORD[8+rax*1+rsi] | |
63 shl r10,60 | |
64 xor r9,QWORD[rax*1+rsi] | |
65 and bl,0xf0 | |
66 xor r9,QWORD[rdx*8+r11] | |
67 mov rdx,r8 | |
68 xor r8,r10 | |
69 jmp NEAR $L$oop1 | |
70 | |
71 ALIGN 16 | |
72 $L$break1: | |
73 shr r8,4 | |
74 and rdx,0xf | |
75 mov r10,r9 | |
76 shr r9,4 | |
77 xor r8,QWORD[8+rax*1+rsi] | |
78 shl r10,60 | |
79 xor r9,QWORD[rax*1+rsi] | |
80 and bl,0xf0 | |
81 xor r9,QWORD[rdx*8+r11] | |
82 mov rdx,r8 | |
83 xor r8,r10 | |
84 | |
85 shr r8,4 | |
86 and rdx,0xf | |
87 mov r10,r9 | |
88 shr r9,4 | |
89 xor r8,QWORD[8+rbx*1+rsi] | |
90 shl r10,60 | |
91 xor r9,QWORD[rbx*1+rsi] | |
92 xor r8,r10 | |
93 xor r9,QWORD[rdx*8+r11] | |
94 | |
95 bswap r8 | |
96 bswap r9 | |
97 mov QWORD[8+rdi],r8 | |
98 mov QWORD[rdi],r9 | |
99 | |
100 mov rbx,QWORD[16+rsp] | |
101 lea rsp,[24+rsp] | |
102 $L$gmult_epilogue: | |
103 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
104 mov rsi,QWORD[16+rsp] | |
105 DB 0F3h,0C3h ;repret | |
106 $L$SEH_end_gcm_gmult_4bit: | |
107 global gcm_ghash_4bit | |
108 | |
109 ALIGN 16 | |
110 gcm_ghash_4bit: | |
111 mov QWORD[8+rsp],rdi ;WIN64 prologue | |
112 mov QWORD[16+rsp],rsi | |
113 mov rax,rsp | |
114 $L$SEH_begin_gcm_ghash_4bit: | |
115 mov rdi,rcx | |
116 mov rsi,rdx | |
117 mov rdx,r8 | |
118 mov rcx,r9 | |
119 | |
120 | |
121 push rbx | |
122 push rbp | |
123 push r12 | |
124 push r13 | |
125 push r14 | |
126 push r15 | |
127 sub rsp,280 | |
128 $L$ghash_prologue: | |
129 mov r14,rdx | |
130 mov r15,rcx | |
131 sub rsi,-128 | |
132 lea rbp,[((16+128))+rsp] | |
133 xor edx,edx | |
134 mov r8,QWORD[((0+0-128))+rsi] | |
135 mov rax,QWORD[((0+8-128))+rsi] | |
136 mov dl,al | |
137 shr rax,4 | |
138 mov r10,r8 | |
139 shr r8,4 | |
140 mov r9,QWORD[((16+0-128))+rsi] | |
141 shl dl,4 | |
142 mov rbx,QWORD[((16+8-128))+rsi] | |
143 shl r10,60 | |
144 mov BYTE[rsp],dl | |
145 or rax,r10 | |
146 mov dl,bl | |
147 shr rbx,4 | |
148 mov r10,r9 | |
149 shr r9,4 | |
150 mov QWORD[rbp],r8 | |
151 mov r8,QWORD[((32+0-128))+rsi] | |
152 shl dl,4 | |
153 mov QWORD[((0-128))+rbp],rax | |
154 mov rax,QWORD[((32+8-128))+rsi] | |
155 shl r10,60 | |
156 mov BYTE[1+rsp],dl | |
157 or rbx,r10 | |
158 mov dl,al | |
159 shr rax,4 | |
160 mov r10,r8 | |
161 shr r8,4 | |
162 mov QWORD[8+rbp],r9 | |
163 mov r9,QWORD[((48+0-128))+rsi] | |
164 shl dl,4 | |
165 mov QWORD[((8-128))+rbp],rbx | |
166 mov rbx,QWORD[((48+8-128))+rsi] | |
167 shl r10,60 | |
168 mov BYTE[2+rsp],dl | |
169 or rax,r10 | |
170 mov dl,bl | |
171 shr rbx,4 | |
172 mov r10,r9 | |
173 shr r9,4 | |
174 mov QWORD[16+rbp],r8 | |
175 mov r8,QWORD[((64+0-128))+rsi] | |
176 shl dl,4 | |
177 mov QWORD[((16-128))+rbp],rax | |
178 mov rax,QWORD[((64+8-128))+rsi] | |
179 shl r10,60 | |
180 mov BYTE[3+rsp],dl | |
181 or rbx,r10 | |
182 mov dl,al | |
183 shr rax,4 | |
184 mov r10,r8 | |
185 shr r8,4 | |
186 mov QWORD[24+rbp],r9 | |
187 mov r9,QWORD[((80+0-128))+rsi] | |
188 shl dl,4 | |
189 mov QWORD[((24-128))+rbp],rbx | |
190 mov rbx,QWORD[((80+8-128))+rsi] | |
191 shl r10,60 | |
192 mov BYTE[4+rsp],dl | |
193 or rax,r10 | |
194 mov dl,bl | |
195 shr rbx,4 | |
196 mov r10,r9 | |
197 shr r9,4 | |
198 mov QWORD[32+rbp],r8 | |
199 mov r8,QWORD[((96+0-128))+rsi] | |
200 shl dl,4 | |
201 mov QWORD[((32-128))+rbp],rax | |
202 mov rax,QWORD[((96+8-128))+rsi] | |
203 shl r10,60 | |
204 mov BYTE[5+rsp],dl | |
205 or rbx,r10 | |
206 mov dl,al | |
207 shr rax,4 | |
208 mov r10,r8 | |
209 shr r8,4 | |
210 mov QWORD[40+rbp],r9 | |
211 mov r9,QWORD[((112+0-128))+rsi] | |
212 shl dl,4 | |
213 mov QWORD[((40-128))+rbp],rbx | |
214 mov rbx,QWORD[((112+8-128))+rsi] | |
215 shl r10,60 | |
216 mov BYTE[6+rsp],dl | |
217 or rax,r10 | |
218 mov dl,bl | |
219 shr rbx,4 | |
220 mov r10,r9 | |
221 shr r9,4 | |
222 mov QWORD[48+rbp],r8 | |
223 mov r8,QWORD[((128+0-128))+rsi] | |
224 shl dl,4 | |
225 mov QWORD[((48-128))+rbp],rax | |
226 mov rax,QWORD[((128+8-128))+rsi] | |
227 shl r10,60 | |
228 mov BYTE[7+rsp],dl | |
229 or rbx,r10 | |
230 mov dl,al | |
231 shr rax,4 | |
232 mov r10,r8 | |
233 shr r8,4 | |
234 mov QWORD[56+rbp],r9 | |
235 mov r9,QWORD[((144+0-128))+rsi] | |
236 shl dl,4 | |
237 mov QWORD[((56-128))+rbp],rbx | |
238 mov rbx,QWORD[((144+8-128))+rsi] | |
239 shl r10,60 | |
240 mov BYTE[8+rsp],dl | |
241 or rax,r10 | |
242 mov dl,bl | |
243 shr rbx,4 | |
244 mov r10,r9 | |
245 shr r9,4 | |
246 mov QWORD[64+rbp],r8 | |
247 mov r8,QWORD[((160+0-128))+rsi] | |
248 shl dl,4 | |
249 mov QWORD[((64-128))+rbp],rax | |
250 mov rax,QWORD[((160+8-128))+rsi] | |
251 shl r10,60 | |
252 mov BYTE[9+rsp],dl | |
253 or rbx,r10 | |
254 mov dl,al | |
255 shr rax,4 | |
256 mov r10,r8 | |
257 shr r8,4 | |
258 mov QWORD[72+rbp],r9 | |
259 mov r9,QWORD[((176+0-128))+rsi] | |
260 shl dl,4 | |
261 mov QWORD[((72-128))+rbp],rbx | |
262 mov rbx,QWORD[((176+8-128))+rsi] | |
263 shl r10,60 | |
264 mov BYTE[10+rsp],dl | |
265 or rax,r10 | |
266 mov dl,bl | |
267 shr rbx,4 | |
268 mov r10,r9 | |
269 shr r9,4 | |
270 mov QWORD[80+rbp],r8 | |
271 mov r8,QWORD[((192+0-128))+rsi] | |
272 shl dl,4 | |
273 mov QWORD[((80-128))+rbp],rax | |
274 mov rax,QWORD[((192+8-128))+rsi] | |
275 shl r10,60 | |
276 mov BYTE[11+rsp],dl | |
277 or rbx,r10 | |
278 mov dl,al | |
279 shr rax,4 | |
280 mov r10,r8 | |
281 shr r8,4 | |
282 mov QWORD[88+rbp],r9 | |
283 mov r9,QWORD[((208+0-128))+rsi] | |
284 shl dl,4 | |
285 mov QWORD[((88-128))+rbp],rbx | |
286 mov rbx,QWORD[((208+8-128))+rsi] | |
287 shl r10,60 | |
288 mov BYTE[12+rsp],dl | |
289 or rax,r10 | |
290 mov dl,bl | |
291 shr rbx,4 | |
292 mov r10,r9 | |
293 shr r9,4 | |
294 mov QWORD[96+rbp],r8 | |
295 mov r8,QWORD[((224+0-128))+rsi] | |
296 shl dl,4 | |
297 mov QWORD[((96-128))+rbp],rax | |
298 mov rax,QWORD[((224+8-128))+rsi] | |
299 shl r10,60 | |
300 mov BYTE[13+rsp],dl | |
301 or rbx,r10 | |
302 mov dl,al | |
303 shr rax,4 | |
304 mov r10,r8 | |
305 shr r8,4 | |
306 mov QWORD[104+rbp],r9 | |
307 mov r9,QWORD[((240+0-128))+rsi] | |
308 shl dl,4 | |
309 mov QWORD[((104-128))+rbp],rbx | |
310 mov rbx,QWORD[((240+8-128))+rsi] | |
311 shl r10,60 | |
312 mov BYTE[14+rsp],dl | |
313 or rax,r10 | |
314 mov dl,bl | |
315 shr rbx,4 | |
316 mov r10,r9 | |
317 shr r9,4 | |
318 mov QWORD[112+rbp],r8 | |
319 shl dl,4 | |
320 mov QWORD[((112-128))+rbp],rax | |
321 shl r10,60 | |
322 mov BYTE[15+rsp],dl | |
323 or rbx,r10 | |
324 mov QWORD[120+rbp],r9 | |
325 mov QWORD[((120-128))+rbp],rbx | |
326 add rsi,-128 | |
327 mov r8,QWORD[8+rdi] | |
328 mov r9,QWORD[rdi] | |
329 add r15,r14 | |
330 lea r11,[$L$rem_8bit] | |
331 jmp NEAR $L$outer_loop | |
332 ALIGN 16 | |
333 $L$outer_loop: | |
334 xor r9,QWORD[r14] | |
335 mov rdx,QWORD[8+r14] | |
336 lea r14,[16+r14] | |
337 xor rdx,r8 | |
338 mov QWORD[rdi],r9 | |
339 mov QWORD[8+rdi],rdx | |
340 shr rdx,32 | |
341 xor rax,rax | |
342 rol edx,8 | |
343 mov al,dl | |
344 movzx ebx,dl | |
345 shl al,4 | |
346 shr ebx,4 | |
347 rol edx,8 | |
348 mov r8,QWORD[8+rax*1+rsi] | |
349 mov r9,QWORD[rax*1+rsi] | |
350 mov al,dl | |
351 movzx ecx,dl | |
352 shl al,4 | |
353 movzx r12,BYTE[rbx*1+rsp] | |
354 shr ecx,4 | |
355 xor r12,r8 | |
356 mov r10,r9 | |
357 shr r8,8 | |
358 movzx r12,r12b | |
359 shr r9,8 | |
360 xor r8,QWORD[((-128))+rbx*8+rbp] | |
361 shl r10,56 | |
362 xor r9,QWORD[rbx*8+rbp] | |
363 rol edx,8 | |
364 xor r8,QWORD[8+rax*1+rsi] | |
365 xor r9,QWORD[rax*1+rsi] | |
366 mov al,dl | |
367 xor r8,r10 | |
368 movzx r12,WORD[r12*2+r11] | |
369 movzx ebx,dl | |
370 shl al,4 | |
371 movzx r13,BYTE[rcx*1+rsp] | |
372 shr ebx,4 | |
373 shl r12,48 | |
374 xor r13,r8 | |
375 mov r10,r9 | |
376 xor r9,r12 | |
377 shr r8,8 | |
378 movzx r13,r13b | |
379 shr r9,8 | |
380 xor r8,QWORD[((-128))+rcx*8+rbp] | |
381 shl r10,56 | |
382 xor r9,QWORD[rcx*8+rbp] | |
383 rol edx,8 | |
384 xor r8,QWORD[8+rax*1+rsi] | |
385 xor r9,QWORD[rax*1+rsi] | |
386 mov al,dl | |
387 xor r8,r10 | |
388 movzx r13,WORD[r13*2+r11] | |
389 movzx ecx,dl | |
390 shl al,4 | |
391 movzx r12,BYTE[rbx*1+rsp] | |
392 shr ecx,4 | |
393 shl r13,48 | |
394 xor r12,r8 | |
395 mov r10,r9 | |
396 xor r9,r13 | |
397 shr r8,8 | |
398 movzx r12,r12b | |
399 mov edx,DWORD[8+rdi] | |
400 shr r9,8 | |
401 xor r8,QWORD[((-128))+rbx*8+rbp] | |
402 shl r10,56 | |
403 xor r9,QWORD[rbx*8+rbp] | |
404 rol edx,8 | |
405 xor r8,QWORD[8+rax*1+rsi] | |
406 xor r9,QWORD[rax*1+rsi] | |
407 mov al,dl | |
408 xor r8,r10 | |
409 movzx r12,WORD[r12*2+r11] | |
410 movzx ebx,dl | |
411 shl al,4 | |
412 movzx r13,BYTE[rcx*1+rsp] | |
413 shr ebx,4 | |
414 shl r12,48 | |
415 xor r13,r8 | |
416 mov r10,r9 | |
417 xor r9,r12 | |
418 shr r8,8 | |
419 movzx r13,r13b | |
420 shr r9,8 | |
421 xor r8,QWORD[((-128))+rcx*8+rbp] | |
422 shl r10,56 | |
423 xor r9,QWORD[rcx*8+rbp] | |
424 rol edx,8 | |
425 xor r8,QWORD[8+rax*1+rsi] | |
426 xor r9,QWORD[rax*1+rsi] | |
427 mov al,dl | |
428 xor r8,r10 | |
429 movzx r13,WORD[r13*2+r11] | |
430 movzx ecx,dl | |
431 shl al,4 | |
432 movzx r12,BYTE[rbx*1+rsp] | |
433 shr ecx,4 | |
434 shl r13,48 | |
435 xor r12,r8 | |
436 mov r10,r9 | |
437 xor r9,r13 | |
438 shr r8,8 | |
439 movzx r12,r12b | |
440 shr r9,8 | |
441 xor r8,QWORD[((-128))+rbx*8+rbp] | |
442 shl r10,56 | |
443 xor r9,QWORD[rbx*8+rbp] | |
444 rol edx,8 | |
445 xor r8,QWORD[8+rax*1+rsi] | |
446 xor r9,QWORD[rax*1+rsi] | |
447 mov al,dl | |
448 xor r8,r10 | |
449 movzx r12,WORD[r12*2+r11] | |
450 movzx ebx,dl | |
451 shl al,4 | |
452 movzx r13,BYTE[rcx*1+rsp] | |
453 shr ebx,4 | |
454 shl r12,48 | |
455 xor r13,r8 | |
456 mov r10,r9 | |
457 xor r9,r12 | |
458 shr r8,8 | |
459 movzx r13,r13b | |
460 shr r9,8 | |
461 xor r8,QWORD[((-128))+rcx*8+rbp] | |
462 shl r10,56 | |
463 xor r9,QWORD[rcx*8+rbp] | |
464 rol edx,8 | |
465 xor r8,QWORD[8+rax*1+rsi] | |
466 xor r9,QWORD[rax*1+rsi] | |
467 mov al,dl | |
468 xor r8,r10 | |
469 movzx r13,WORD[r13*2+r11] | |
470 movzx ecx,dl | |
471 shl al,4 | |
472 movzx r12,BYTE[rbx*1+rsp] | |
473 shr ecx,4 | |
474 shl r13,48 | |
475 xor r12,r8 | |
476 mov r10,r9 | |
477 xor r9,r13 | |
478 shr r8,8 | |
479 movzx r12,r12b | |
480 mov edx,DWORD[4+rdi] | |
481 shr r9,8 | |
482 xor r8,QWORD[((-128))+rbx*8+rbp] | |
483 shl r10,56 | |
484 xor r9,QWORD[rbx*8+rbp] | |
485 rol edx,8 | |
486 xor r8,QWORD[8+rax*1+rsi] | |
487 xor r9,QWORD[rax*1+rsi] | |
488 mov al,dl | |
489 xor r8,r10 | |
490 movzx r12,WORD[r12*2+r11] | |
491 movzx ebx,dl | |
492 shl al,4 | |
493 movzx r13,BYTE[rcx*1+rsp] | |
494 shr ebx,4 | |
495 shl r12,48 | |
496 xor r13,r8 | |
497 mov r10,r9 | |
498 xor r9,r12 | |
499 shr r8,8 | |
500 movzx r13,r13b | |
501 shr r9,8 | |
502 xor r8,QWORD[((-128))+rcx*8+rbp] | |
503 shl r10,56 | |
504 xor r9,QWORD[rcx*8+rbp] | |
505 rol edx,8 | |
506 xor r8,QWORD[8+rax*1+rsi] | |
507 xor r9,QWORD[rax*1+rsi] | |
508 mov al,dl | |
509 xor r8,r10 | |
510 movzx r13,WORD[r13*2+r11] | |
511 movzx ecx,dl | |
512 shl al,4 | |
513 movzx r12,BYTE[rbx*1+rsp] | |
514 shr ecx,4 | |
515 shl r13,48 | |
516 xor r12,r8 | |
517 mov r10,r9 | |
518 xor r9,r13 | |
519 shr r8,8 | |
520 movzx r12,r12b | |
521 shr r9,8 | |
522 xor r8,QWORD[((-128))+rbx*8+rbp] | |
523 shl r10,56 | |
524 xor r9,QWORD[rbx*8+rbp] | |
525 rol edx,8 | |
526 xor r8,QWORD[8+rax*1+rsi] | |
527 xor r9,QWORD[rax*1+rsi] | |
528 mov al,dl | |
529 xor r8,r10 | |
530 movzx r12,WORD[r12*2+r11] | |
531 movzx ebx,dl | |
532 shl al,4 | |
533 movzx r13,BYTE[rcx*1+rsp] | |
534 shr ebx,4 | |
535 shl r12,48 | |
536 xor r13,r8 | |
537 mov r10,r9 | |
538 xor r9,r12 | |
539 shr r8,8 | |
540 movzx r13,r13b | |
541 shr r9,8 | |
542 xor r8,QWORD[((-128))+rcx*8+rbp] | |
543 shl r10,56 | |
544 xor r9,QWORD[rcx*8+rbp] | |
545 rol edx,8 | |
546 xor r8,QWORD[8+rax*1+rsi] | |
547 xor r9,QWORD[rax*1+rsi] | |
548 mov al,dl | |
549 xor r8,r10 | |
550 movzx r13,WORD[r13*2+r11] | |
551 movzx ecx,dl | |
552 shl al,4 | |
553 movzx r12,BYTE[rbx*1+rsp] | |
554 shr ecx,4 | |
555 shl r13,48 | |
556 xor r12,r8 | |
557 mov r10,r9 | |
558 xor r9,r13 | |
559 shr r8,8 | |
560 movzx r12,r12b | |
561 mov edx,DWORD[rdi] | |
562 shr r9,8 | |
563 xor r8,QWORD[((-128))+rbx*8+rbp] | |
564 shl r10,56 | |
565 xor r9,QWORD[rbx*8+rbp] | |
566 rol edx,8 | |
567 xor r8,QWORD[8+rax*1+rsi] | |
568 xor r9,QWORD[rax*1+rsi] | |
569 mov al,dl | |
570 xor r8,r10 | |
571 movzx r12,WORD[r12*2+r11] | |
572 movzx ebx,dl | |
573 shl al,4 | |
574 movzx r13,BYTE[rcx*1+rsp] | |
575 shr ebx,4 | |
576 shl r12,48 | |
577 xor r13,r8 | |
578 mov r10,r9 | |
579 xor r9,r12 | |
580 shr r8,8 | |
581 movzx r13,r13b | |
582 shr r9,8 | |
583 xor r8,QWORD[((-128))+rcx*8+rbp] | |
584 shl r10,56 | |
585 xor r9,QWORD[rcx*8+rbp] | |
586 rol edx,8 | |
587 xor r8,QWORD[8+rax*1+rsi] | |
588 xor r9,QWORD[rax*1+rsi] | |
589 mov al,dl | |
590 xor r8,r10 | |
591 movzx r13,WORD[r13*2+r11] | |
592 movzx ecx,dl | |
593 shl al,4 | |
594 movzx r12,BYTE[rbx*1+rsp] | |
595 shr ecx,4 | |
596 shl r13,48 | |
597 xor r12,r8 | |
598 mov r10,r9 | |
599 xor r9,r13 | |
600 shr r8,8 | |
601 movzx r12,r12b | |
602 shr r9,8 | |
603 xor r8,QWORD[((-128))+rbx*8+rbp] | |
604 shl r10,56 | |
605 xor r9,QWORD[rbx*8+rbp] | |
606 rol edx,8 | |
607 xor r8,QWORD[8+rax*1+rsi] | |
608 xor r9,QWORD[rax*1+rsi] | |
609 mov al,dl | |
610 xor r8,r10 | |
611 movzx r12,WORD[r12*2+r11] | |
612 movzx ebx,dl | |
613 shl al,4 | |
614 movzx r13,BYTE[rcx*1+rsp] | |
615 shr ebx,4 | |
616 shl r12,48 | |
617 xor r13,r8 | |
618 mov r10,r9 | |
619 xor r9,r12 | |
620 shr r8,8 | |
621 movzx r13,r13b | |
622 shr r9,8 | |
623 xor r8,QWORD[((-128))+rcx*8+rbp] | |
624 shl r10,56 | |
625 xor r9,QWORD[rcx*8+rbp] | |
626 rol edx,8 | |
627 xor r8,QWORD[8+rax*1+rsi] | |
628 xor r9,QWORD[rax*1+rsi] | |
629 mov al,dl | |
630 xor r8,r10 | |
631 movzx r13,WORD[r13*2+r11] | |
632 movzx ecx,dl | |
633 shl al,4 | |
634 movzx r12,BYTE[rbx*1+rsp] | |
635 and ecx,240 | |
636 shl r13,48 | |
637 xor r12,r8 | |
638 mov r10,r9 | |
639 xor r9,r13 | |
640 shr r8,8 | |
641 movzx r12,r12b | |
642 mov edx,DWORD[((-4))+rdi] | |
643 shr r9,8 | |
644 xor r8,QWORD[((-128))+rbx*8+rbp] | |
645 shl r10,56 | |
646 xor r9,QWORD[rbx*8+rbp] | |
647 movzx r12,WORD[r12*2+r11] | |
648 xor r8,QWORD[8+rax*1+rsi] | |
649 xor r9,QWORD[rax*1+rsi] | |
650 shl r12,48 | |
651 xor r8,r10 | |
652 xor r9,r12 | |
653 movzx r13,r8b | |
654 shr r8,4 | |
655 mov r10,r9 | |
656 shl r13b,4 | |
657 shr r9,4 | |
658 xor r8,QWORD[8+rcx*1+rsi] | |
659 movzx r13,WORD[r13*2+r11] | |
660 shl r10,60 | |
661 xor r9,QWORD[rcx*1+rsi] | |
662 xor r8,r10 | |
663 shl r13,48 | |
664 bswap r8 | |
665 xor r9,r13 | |
666 bswap r9 | |
667 cmp r14,r15 | |
668 jb NEAR $L$outer_loop | |
669 mov QWORD[8+rdi],r8 | |
670 mov QWORD[rdi],r9 | |
671 | |
672 lea rsi,[280+rsp] | |
673 mov r15,QWORD[rsi] | |
674 mov r14,QWORD[8+rsi] | |
675 mov r13,QWORD[16+rsi] | |
676 mov r12,QWORD[24+rsi] | |
677 mov rbp,QWORD[32+rsi] | |
678 mov rbx,QWORD[40+rsi] | |
679 lea rsp,[48+rsi] | |
680 $L$ghash_epilogue: | |
681 mov rdi,QWORD[8+rsp] ;WIN64 epilogue | |
682 mov rsi,QWORD[16+rsp] | |
683 DB 0F3h,0C3h ;repret | |
684 $L$SEH_end_gcm_ghash_4bit: | |
685 global gcm_init_clmul | |
686 | |
687 ALIGN 16 | |
688 gcm_init_clmul: | |
689 $L$_init_clmul: | |
690 $L$SEH_begin_gcm_init_clmul: | |
691 | |
692 DB 0x48,0x83,0xec,0x18 | |
693 DB 0x0f,0x29,0x34,0x24 | |
694 movdqu xmm2,XMMWORD[rdx] | |
695 pshufd xmm2,xmm2,78 | |
696 | |
697 | |
698 pshufd xmm4,xmm2,255 | |
699 movdqa xmm3,xmm2 | |
700 psllq xmm2,1 | |
701 pxor xmm5,xmm5 | |
702 psrlq xmm3,63 | |
703 pcmpgtd xmm5,xmm4 | |
704 pslldq xmm3,8 | |
705 por xmm2,xmm3 | |
706 | |
707 | |
708 pand xmm5,XMMWORD[$L$0x1c2_polynomial] | |
709 pxor xmm2,xmm5 | |
710 | |
711 | |
712 pshufd xmm6,xmm2,78 | |
713 movdqa xmm0,xmm2 | |
714 pxor xmm6,xmm2 | |
715 movdqa xmm1,xmm0 | |
716 pshufd xmm3,xmm0,78 | |
717 pxor xmm3,xmm0 | |
718 DB 102,15,58,68,194,0 | |
719 DB 102,15,58,68,202,17 | |
720 DB 102,15,58,68,222,0 | |
721 pxor xmm3,xmm0 | |
722 pxor xmm3,xmm1 | |
723 | |
724 movdqa xmm4,xmm3 | |
725 psrldq xmm3,8 | |
726 pslldq xmm4,8 | |
727 pxor xmm1,xmm3 | |
728 pxor xmm0,xmm4 | |
729 | |
730 movdqa xmm4,xmm0 | |
731 movdqa xmm3,xmm0 | |
732 psllq xmm0,5 | |
733 pxor xmm3,xmm0 | |
734 psllq xmm0,1 | |
735 pxor xmm0,xmm3 | |
736 psllq xmm0,57 | |
737 movdqa xmm3,xmm0 | |
738 pslldq xmm0,8 | |
739 psrldq xmm3,8 | |
740 pxor xmm0,xmm4 | |
741 pxor xmm1,xmm3 | |
742 | |
743 | |
744 movdqa xmm4,xmm0 | |
745 psrlq xmm0,1 | |
746 pxor xmm1,xmm4 | |
747 pxor xmm4,xmm0 | |
748 psrlq xmm0,5 | |
749 pxor xmm0,xmm4 | |
750 psrlq xmm0,1 | |
751 pxor xmm0,xmm1 | |
752 pshufd xmm3,xmm2,78 | |
753 pshufd xmm4,xmm0,78 | |
754 pxor xmm3,xmm2 | |
755 movdqu XMMWORD[rcx],xmm2 | |
756 pxor xmm4,xmm0 | |
757 movdqu XMMWORD[16+rcx],xmm0 | |
758 DB 102,15,58,15,227,8 | |
759 movdqu XMMWORD[32+rcx],xmm4 | |
760 movdqa xmm1,xmm0 | |
761 pshufd xmm3,xmm0,78 | |
762 pxor xmm3,xmm0 | |
763 DB 102,15,58,68,194,0 | |
764 DB 102,15,58,68,202,17 | |
765 DB 102,15,58,68,222,0 | |
766 pxor xmm3,xmm0 | |
767 pxor xmm3,xmm1 | |
768 | |
769 movdqa xmm4,xmm3 | |
770 psrldq xmm3,8 | |
771 pslldq xmm4,8 | |
772 pxor xmm1,xmm3 | |
773 pxor xmm0,xmm4 | |
774 | |
775 movdqa xmm4,xmm0 | |
776 movdqa xmm3,xmm0 | |
777 psllq xmm0,5 | |
778 pxor xmm3,xmm0 | |
779 psllq xmm0,1 | |
780 pxor xmm0,xmm3 | |
781 psllq xmm0,57 | |
782 movdqa xmm3,xmm0 | |
783 pslldq xmm0,8 | |
784 psrldq xmm3,8 | |
785 pxor xmm0,xmm4 | |
786 pxor xmm1,xmm3 | |
787 | |
788 | |
789 movdqa xmm4,xmm0 | |
790 psrlq xmm0,1 | |
791 pxor xmm1,xmm4 | |
792 pxor xmm4,xmm0 | |
793 psrlq xmm0,5 | |
794 pxor xmm0,xmm4 | |
795 psrlq xmm0,1 | |
796 pxor xmm0,xmm1 | |
797 movdqa xmm5,xmm0 | |
798 movdqa xmm1,xmm0 | |
799 pshufd xmm3,xmm0,78 | |
800 pxor xmm3,xmm0 | |
801 DB 102,15,58,68,194,0 | |
802 DB 102,15,58,68,202,17 | |
803 DB 102,15,58,68,222,0 | |
804 pxor xmm3,xmm0 | |
805 pxor xmm3,xmm1 | |
806 | |
807 movdqa xmm4,xmm3 | |
808 psrldq xmm3,8 | |
809 pslldq xmm4,8 | |
810 pxor xmm1,xmm3 | |
811 pxor xmm0,xmm4 | |
812 | |
813 movdqa xmm4,xmm0 | |
814 movdqa xmm3,xmm0 | |
815 psllq xmm0,5 | |
816 pxor xmm3,xmm0 | |
817 psllq xmm0,1 | |
818 pxor xmm0,xmm3 | |
819 psllq xmm0,57 | |
820 movdqa xmm3,xmm0 | |
821 pslldq xmm0,8 | |
822 psrldq xmm3,8 | |
823 pxor xmm0,xmm4 | |
824 pxor xmm1,xmm3 | |
825 | |
826 | |
827 movdqa xmm4,xmm0 | |
828 psrlq xmm0,1 | |
829 pxor xmm1,xmm4 | |
830 pxor xmm4,xmm0 | |
831 psrlq xmm0,5 | |
832 pxor xmm0,xmm4 | |
833 psrlq xmm0,1 | |
834 pxor xmm0,xmm1 | |
835 pshufd xmm3,xmm5,78 | |
836 pshufd xmm4,xmm0,78 | |
837 pxor xmm3,xmm5 | |
838 movdqu XMMWORD[48+rcx],xmm5 | |
839 pxor xmm4,xmm0 | |
840 movdqu XMMWORD[64+rcx],xmm0 | |
841 DB 102,15,58,15,227,8 | |
842 movdqu XMMWORD[80+rcx],xmm4 | |
843 movaps xmm6,XMMWORD[rsp] | |
844 lea rsp,[24+rsp] | |
845 $L$SEH_end_gcm_init_clmul: | |
846 DB 0F3h,0C3h ;repret | |
847 | |
848 global gcm_gmult_clmul | |
849 | |
850 ALIGN 16 | |
851 gcm_gmult_clmul: | |
852 $L$_gmult_clmul: | |
853 movdqu xmm0,XMMWORD[rcx] | |
854 movdqa xmm5,XMMWORD[$L$bswap_mask] | |
855 movdqu xmm2,XMMWORD[rdx] | |
856 movdqu xmm4,XMMWORD[32+rdx] | |
857 DB 102,15,56,0,197 | |
858 movdqa xmm1,xmm0 | |
859 pshufd xmm3,xmm0,78 | |
860 pxor xmm3,xmm0 | |
861 DB 102,15,58,68,194,0 | |
862 DB 102,15,58,68,202,17 | |
863 DB 102,15,58,68,220,0 | |
864 pxor xmm3,xmm0 | |
865 pxor xmm3,xmm1 | |
866 | |
867 movdqa xmm4,xmm3 | |
868 psrldq xmm3,8 | |
869 pslldq xmm4,8 | |
870 pxor xmm1,xmm3 | |
871 pxor xmm0,xmm4 | |
872 | |
873 movdqa xmm4,xmm0 | |
874 movdqa xmm3,xmm0 | |
875 psllq xmm0,5 | |
876 pxor xmm3,xmm0 | |
877 psllq xmm0,1 | |
878 pxor xmm0,xmm3 | |
879 psllq xmm0,57 | |
880 movdqa xmm3,xmm0 | |
881 pslldq xmm0,8 | |
882 psrldq xmm3,8 | |
883 pxor xmm0,xmm4 | |
884 pxor xmm1,xmm3 | |
885 | |
886 | |
887 movdqa xmm4,xmm0 | |
888 psrlq xmm0,1 | |
889 pxor xmm1,xmm4 | |
890 pxor xmm4,xmm0 | |
891 psrlq xmm0,5 | |
892 pxor xmm0,xmm4 | |
893 psrlq xmm0,1 | |
894 pxor xmm0,xmm1 | |
895 DB 102,15,56,0,197 | |
896 movdqu XMMWORD[rcx],xmm0 | |
897 DB 0F3h,0C3h ;repret | |
898 | |
899 global gcm_ghash_clmul | |
900 | |
901 ALIGN 32 | |
902 gcm_ghash_clmul: | |
903 $L$_ghash_clmul: | |
904 lea rax,[((-136))+rsp] | |
905 $L$SEH_begin_gcm_ghash_clmul: | |
906 | |
907 DB 0x48,0x8d,0x60,0xe0 | |
908 DB 0x0f,0x29,0x70,0xe0 | |
909 DB 0x0f,0x29,0x78,0xf0 | |
910 DB 0x44,0x0f,0x29,0x00 | |
911 DB 0x44,0x0f,0x29,0x48,0x10 | |
912 DB 0x44,0x0f,0x29,0x50,0x20 | |
913 DB 0x44,0x0f,0x29,0x58,0x30 | |
914 DB 0x44,0x0f,0x29,0x60,0x40 | |
915 DB 0x44,0x0f,0x29,0x68,0x50 | |
916 DB 0x44,0x0f,0x29,0x70,0x60 | |
917 DB 0x44,0x0f,0x29,0x78,0x70 | |
918 movdqa xmm10,XMMWORD[$L$bswap_mask] | |
919 | |
920 movdqu xmm0,XMMWORD[rcx] | |
921 movdqu xmm2,XMMWORD[rdx] | |
922 movdqu xmm7,XMMWORD[32+rdx] | |
923 DB 102,65,15,56,0,194 | |
924 | |
925 sub r9,0x10 | |
926 jz NEAR $L$odd_tail | |
927 | |
928 movdqu xmm6,XMMWORD[16+rdx] | |
929 mov eax,DWORD[((OPENSSL_ia32cap_P+4))] | |
930 cmp r9,0x30 | |
931 jb NEAR $L$skip4x | |
932 | |
933 and eax,71303168 | |
934 cmp eax,4194304 | |
935 je NEAR $L$skip4x | |
936 | |
937 sub r9,0x30 | |
938 mov rax,0xA040608020C0E000 | |
939 movdqu xmm14,XMMWORD[48+rdx] | |
940 movdqu xmm15,XMMWORD[64+rdx] | |
941 | |
942 | |
943 | |
944 | |
945 movdqu xmm3,XMMWORD[48+r8] | |
946 movdqu xmm11,XMMWORD[32+r8] | |
947 DB 102,65,15,56,0,218 | |
948 DB 102,69,15,56,0,218 | |
949 movdqa xmm5,xmm3 | |
950 pshufd xmm4,xmm3,78 | |
951 pxor xmm4,xmm3 | |
952 DB 102,15,58,68,218,0 | |
953 DB 102,15,58,68,234,17 | |
954 DB 102,15,58,68,231,0 | |
955 | |
956 movdqa xmm13,xmm11 | |
957 pshufd xmm12,xmm11,78 | |
958 pxor xmm12,xmm11 | |
959 DB 102,68,15,58,68,222,0 | |
960 DB 102,68,15,58,68,238,17 | |
961 DB 102,68,15,58,68,231,16 | |
962 xorps xmm3,xmm11 | |
963 xorps xmm5,xmm13 | |
964 movups xmm7,XMMWORD[80+rdx] | |
965 xorps xmm4,xmm12 | |
966 | |
967 movdqu xmm11,XMMWORD[16+r8] | |
968 movdqu xmm8,XMMWORD[r8] | |
969 DB 102,69,15,56,0,218 | |
970 DB 102,69,15,56,0,194 | |
971 movdqa xmm13,xmm11 | |
972 pshufd xmm12,xmm11,78 | |
973 pxor xmm0,xmm8 | |
974 pxor xmm12,xmm11 | |
975 DB 102,69,15,58,68,222,0 | |
976 movdqa xmm1,xmm0 | |
977 pshufd xmm8,xmm0,78 | |
978 pxor xmm8,xmm0 | |
979 DB 102,69,15,58,68,238,17 | |
980 DB 102,68,15,58,68,231,0 | |
981 xorps xmm3,xmm11 | |
982 xorps xmm5,xmm13 | |
983 | |
984 lea r8,[64+r8] | |
985 sub r9,0x40 | |
986 jc NEAR $L$tail4x | |
987 | |
988 jmp NEAR $L$mod4_loop | |
989 ALIGN 32 | |
990 $L$mod4_loop: | |
991 DB 102,65,15,58,68,199,0 | |
992 xorps xmm4,xmm12 | |
993 movdqu xmm11,XMMWORD[48+r8] | |
994 DB 102,69,15,56,0,218 | |
995 DB 102,65,15,58,68,207,17 | |
996 xorps xmm0,xmm3 | |
997 movdqu xmm3,XMMWORD[32+r8] | |
998 movdqa xmm13,xmm11 | |
999 DB 102,68,15,58,68,199,16 | |
1000 pshufd xmm12,xmm11,78 | |
1001 xorps xmm1,xmm5 | |
1002 pxor xmm12,xmm11 | |
1003 DB 102,65,15,56,0,218 | |
1004 movups xmm7,XMMWORD[32+rdx] | |
1005 xorps xmm8,xmm4 | |
1006 DB 102,68,15,58,68,218,0 | |
1007 pshufd xmm4,xmm3,78 | |
1008 | |
1009 pxor xmm8,xmm0 | |
1010 movdqa xmm5,xmm3 | |
1011 pxor xmm8,xmm1 | |
1012 pxor xmm4,xmm3 | |
1013 movdqa xmm9,xmm8 | |
1014 DB 102,68,15,58,68,234,17 | |
1015 pslldq xmm8,8 | |
1016 psrldq xmm9,8 | |
1017 pxor xmm0,xmm8 | |
1018 movdqa xmm8,XMMWORD[$L$7_mask] | |
1019 pxor xmm1,xmm9 | |
1020 DB 102,76,15,110,200 | |
1021 | |
1022 pand xmm8,xmm0 | |
1023 DB 102,69,15,56,0,200 | |
1024 pxor xmm9,xmm0 | |
1025 DB 102,68,15,58,68,231,0 | |
1026 psllq xmm9,57 | |
1027 movdqa xmm8,xmm9 | |
1028 pslldq xmm9,8 | |
1029 DB 102,15,58,68,222,0 | |
1030 psrldq xmm8,8 | |
1031 pxor xmm0,xmm9 | |
1032 pxor xmm1,xmm8 | |
1033 movdqu xmm8,XMMWORD[r8] | |
1034 | |
1035 movdqa xmm9,xmm0 | |
1036 psrlq xmm0,1 | |
1037 DB 102,15,58,68,238,17 | |
1038 xorps xmm3,xmm11 | |
1039 movdqu xmm11,XMMWORD[16+r8] | |
1040 DB 102,69,15,56,0,218 | |
1041 DB 102,15,58,68,231,16 | |
1042 xorps xmm5,xmm13 | |
1043 movups xmm7,XMMWORD[80+rdx] | |
1044 DB 102,69,15,56,0,194 | |
1045 pxor xmm1,xmm9 | |
1046 pxor xmm9,xmm0 | |
1047 psrlq xmm0,5 | |
1048 | |
1049 movdqa xmm13,xmm11 | |
1050 pxor xmm4,xmm12 | |
1051 pshufd xmm12,xmm11,78 | |
1052 pxor xmm0,xmm9 | |
1053 pxor xmm1,xmm8 | |
1054 pxor xmm12,xmm11 | |
1055 DB 102,69,15,58,68,222,0 | |
1056 psrlq xmm0,1 | |
1057 pxor xmm0,xmm1 | |
1058 movdqa xmm1,xmm0 | |
1059 DB 102,69,15,58,68,238,17 | |
1060 xorps xmm3,xmm11 | |
1061 pshufd xmm8,xmm0,78 | |
1062 pxor xmm8,xmm0 | |
1063 | |
1064 DB 102,68,15,58,68,231,0 | |
1065 xorps xmm5,xmm13 | |
1066 | |
1067 lea r8,[64+r8] | |
1068 sub r9,0x40 | |
1069 jnc NEAR $L$mod4_loop | |
1070 | |
1071 $L$tail4x: | |
1072 DB 102,65,15,58,68,199,0 | |
1073 DB 102,65,15,58,68,207,17 | |
1074 DB 102,68,15,58,68,199,16 | |
1075 xorps xmm4,xmm12 | |
1076 xorps xmm0,xmm3 | |
1077 xorps xmm1,xmm5 | |
1078 pxor xmm1,xmm0 | |
1079 pxor xmm8,xmm4 | |
1080 | |
1081 pxor xmm8,xmm1 | |
1082 pxor xmm1,xmm0 | |
1083 | |
1084 movdqa xmm9,xmm8 | |
1085 psrldq xmm8,8 | |
1086 pslldq xmm9,8 | |
1087 pxor xmm1,xmm8 | |
1088 pxor xmm0,xmm9 | |
1089 | |
1090 movdqa xmm4,xmm0 | |
1091 movdqa xmm3,xmm0 | |
1092 psllq xmm0,5 | |
1093 pxor xmm3,xmm0 | |
1094 psllq xmm0,1 | |
1095 pxor xmm0,xmm3 | |
1096 psllq xmm0,57 | |
1097 movdqa xmm3,xmm0 | |
1098 pslldq xmm0,8 | |
1099 psrldq xmm3,8 | |
1100 pxor xmm0,xmm4 | |
1101 pxor xmm1,xmm3 | |
1102 | |
1103 | |
1104 movdqa xmm4,xmm0 | |
1105 psrlq xmm0,1 | |
1106 pxor xmm1,xmm4 | |
1107 pxor xmm4,xmm0 | |
1108 psrlq xmm0,5 | |
1109 pxor xmm0,xmm4 | |
1110 psrlq xmm0,1 | |
1111 pxor xmm0,xmm1 | |
1112 add r9,0x40 | |
1113 jz NEAR $L$done | |
1114 movdqu xmm7,XMMWORD[32+rdx] | |
1115 sub r9,0x10 | |
1116 jz NEAR $L$odd_tail | |
1117 $L$skip4x: | |
1118 | |
1119 | |
1120 | |
1121 | |
1122 | |
1123 movdqu xmm8,XMMWORD[r8] | |
1124 movdqu xmm3,XMMWORD[16+r8] | |
1125 DB 102,69,15,56,0,194 | |
1126 DB 102,65,15,56,0,218 | |
1127 pxor xmm0,xmm8 | |
1128 | |
1129 movdqa xmm5,xmm3 | |
1130 pshufd xmm4,xmm3,78 | |
1131 pxor xmm4,xmm3 | |
1132 DB 102,15,58,68,218,0 | |
1133 DB 102,15,58,68,234,17 | |
1134 DB 102,15,58,68,231,0 | |
1135 | |
1136 lea r8,[32+r8] | |
1137 nop | |
1138 sub r9,0x20 | |
1139 jbe NEAR $L$even_tail | |
1140 nop | |
1141 jmp NEAR $L$mod_loop | |
1142 | |
1143 ALIGN 32 | |
1144 $L$mod_loop: | |
1145 movdqa xmm1,xmm0 | |
1146 movdqa xmm8,xmm4 | |
1147 pshufd xmm4,xmm0,78 | |
1148 pxor xmm4,xmm0 | |
1149 | |
1150 DB 102,15,58,68,198,0 | |
1151 DB 102,15,58,68,206,17 | |
1152 DB 102,15,58,68,231,16 | |
1153 | |
1154 pxor xmm0,xmm3 | |
1155 pxor xmm1,xmm5 | |
1156 movdqu xmm9,XMMWORD[r8] | |
1157 pxor xmm8,xmm0 | |
1158 DB 102,69,15,56,0,202 | |
1159 movdqu xmm3,XMMWORD[16+r8] | |
1160 | |
1161 pxor xmm8,xmm1 | |
1162 pxor xmm1,xmm9 | |
1163 pxor xmm4,xmm8 | |
1164 DB 102,65,15,56,0,218 | |
1165 movdqa xmm8,xmm4 | |
1166 psrldq xmm8,8 | |
1167 pslldq xmm4,8 | |
1168 pxor xmm1,xmm8 | |
1169 pxor xmm0,xmm4 | |
1170 | |
1171 movdqa xmm5,xmm3 | |
1172 | |
1173 movdqa xmm9,xmm0 | |
1174 movdqa xmm8,xmm0 | |
1175 psllq xmm0,5 | |
1176 pxor xmm8,xmm0 | |
1177 DB 102,15,58,68,218,0 | |
1178 psllq xmm0,1 | |
1179 pxor xmm0,xmm8 | |
1180 psllq xmm0,57 | |
1181 movdqa xmm8,xmm0 | |
1182 pslldq xmm0,8 | |
1183 psrldq xmm8,8 | |
1184 pxor xmm0,xmm9 | |
1185 pshufd xmm4,xmm5,78 | |
1186 pxor xmm1,xmm8 | |
1187 pxor xmm4,xmm5 | |
1188 | |
1189 movdqa xmm9,xmm0 | |
1190 psrlq xmm0,1 | |
1191 DB 102,15,58,68,234,17 | |
1192 pxor xmm1,xmm9 | |
1193 pxor xmm9,xmm0 | |
1194 psrlq xmm0,5 | |
1195 pxor xmm0,xmm9 | |
1196 lea r8,[32+r8] | |
1197 psrlq xmm0,1 | |
1198 DB 102,15,58,68,231,0 | |
1199 pxor xmm0,xmm1 | |
1200 | |
1201 sub r9,0x20 | |
1202 ja NEAR $L$mod_loop | |
1203 | |
1204 $L$even_tail: | |
1205 movdqa xmm1,xmm0 | |
1206 movdqa xmm8,xmm4 | |
1207 pshufd xmm4,xmm0,78 | |
1208 pxor xmm4,xmm0 | |
1209 | |
1210 DB 102,15,58,68,198,0 | |
1211 DB 102,15,58,68,206,17 | |
1212 DB 102,15,58,68,231,16 | |
1213 | |
1214 pxor xmm0,xmm3 | |
1215 pxor xmm1,xmm5 | |
1216 pxor xmm8,xmm0 | |
1217 pxor xmm8,xmm1 | |
1218 pxor xmm4,xmm8 | |
1219 movdqa xmm8,xmm4 | |
1220 psrldq xmm8,8 | |
1221 pslldq xmm4,8 | |
1222 pxor xmm1,xmm8 | |
1223 pxor xmm0,xmm4 | |
1224 | |
1225 movdqa xmm4,xmm0 | |
1226 movdqa xmm3,xmm0 | |
1227 psllq xmm0,5 | |
1228 pxor xmm3,xmm0 | |
1229 psllq xmm0,1 | |
1230 pxor xmm0,xmm3 | |
1231 psllq xmm0,57 | |
1232 movdqa xmm3,xmm0 | |
1233 pslldq xmm0,8 | |
1234 psrldq xmm3,8 | |
1235 pxor xmm0,xmm4 | |
1236 pxor xmm1,xmm3 | |
1237 | |
1238 | |
1239 movdqa xmm4,xmm0 | |
1240 psrlq xmm0,1 | |
1241 pxor xmm1,xmm4 | |
1242 pxor xmm4,xmm0 | |
1243 psrlq xmm0,5 | |
1244 pxor xmm0,xmm4 | |
1245 psrlq xmm0,1 | |
1246 pxor xmm0,xmm1 | |
1247 test r9,r9 | |
1248 jnz NEAR $L$done | |
1249 | |
1250 $L$odd_tail: | |
1251 movdqu xmm8,XMMWORD[r8] | |
1252 DB 102,69,15,56,0,194 | |
1253 pxor xmm0,xmm8 | |
1254 movdqa xmm1,xmm0 | |
1255 pshufd xmm3,xmm0,78 | |
1256 pxor xmm3,xmm0 | |
1257 DB 102,15,58,68,194,0 | |
1258 DB 102,15,58,68,202,17 | |
1259 DB 102,15,58,68,223,0 | |
1260 pxor xmm3,xmm0 | |
1261 pxor xmm3,xmm1 | |
1262 | |
1263 movdqa xmm4,xmm3 | |
1264 psrldq xmm3,8 | |
1265 pslldq xmm4,8 | |
1266 pxor xmm1,xmm3 | |
1267 pxor xmm0,xmm4 | |
1268 | |
1269 movdqa xmm4,xmm0 | |
1270 movdqa xmm3,xmm0 | |
1271 psllq xmm0,5 | |
1272 pxor xmm3,xmm0 | |
1273 psllq xmm0,1 | |
1274 pxor xmm0,xmm3 | |
1275 psllq xmm0,57 | |
1276 movdqa xmm3,xmm0 | |
1277 pslldq xmm0,8 | |
1278 psrldq xmm3,8 | |
1279 pxor xmm0,xmm4 | |
1280 pxor xmm1,xmm3 | |
1281 | |
1282 | |
1283 movdqa xmm4,xmm0 | |
1284 psrlq xmm0,1 | |
1285 pxor xmm1,xmm4 | |
1286 pxor xmm4,xmm0 | |
1287 psrlq xmm0,5 | |
1288 pxor xmm0,xmm4 | |
1289 psrlq xmm0,1 | |
1290 pxor xmm0,xmm1 | |
1291 $L$done: | |
1292 DB 102,65,15,56,0,194 | |
1293 movdqu XMMWORD[rcx],xmm0 | |
1294 movaps xmm6,XMMWORD[rsp] | |
1295 movaps xmm7,XMMWORD[16+rsp] | |
1296 movaps xmm8,XMMWORD[32+rsp] | |
1297 movaps xmm9,XMMWORD[48+rsp] | |
1298 movaps xmm10,XMMWORD[64+rsp] | |
1299 movaps xmm11,XMMWORD[80+rsp] | |
1300 movaps xmm12,XMMWORD[96+rsp] | |
1301 movaps xmm13,XMMWORD[112+rsp] | |
1302 movaps xmm14,XMMWORD[128+rsp] | |
1303 movaps xmm15,XMMWORD[144+rsp] | |
1304 lea rsp,[168+rsp] | |
1305 $L$SEH_end_gcm_ghash_clmul: | |
1306 DB 0F3h,0C3h ;repret | |
1307 | |
1308 global gcm_init_avx | |
1309 | |
1310 ALIGN 32 | |
1311 gcm_init_avx: | |
1312 jmp NEAR $L$_init_clmul | |
1313 | |
1314 global gcm_gmult_avx | |
1315 | |
1316 ALIGN 32 | |
1317 gcm_gmult_avx: | |
1318 jmp NEAR $L$_gmult_clmul | |
1319 | |
1320 global gcm_ghash_avx | |
1321 | |
1322 ALIGN 32 | |
1323 gcm_ghash_avx: | |
1324 jmp NEAR $L$_ghash_clmul | |
1325 | |
1326 ALIGN 64 | |
1327 $L$bswap_mask: | |
1328 DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 | |
1329 $L$0x1c2_polynomial: | |
1330 DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 | |
1331 $L$7_mask: | |
1332 DD 7,0,7,0 | |
1333 $L$7_mask_poly: | |
1334 DD 7,0,450,0 | |
1335 ALIGN 64 | |
1336 | |
1337 $L$rem_4bit: | |
1338 DD 0,0,0,471859200,0,943718400,0,610271232 | |
1339 DD 0,1887436800,0,1822425088,0,1220542464,0,1423966208 | |
1340 DD 0,3774873600,0,4246732800,0,3644850176,0,3311403008 | |
1341 DD 0,2441084928,0,2376073216,0,2847932416,0,3051356160 | |
1342 | |
1343 $L$rem_8bit: | |
1344 DW 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E | |
1345 DW 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E | |
1346 DW 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E | |
1347 DW 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E | |
1348 DW 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E | |
1349 DW 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E | |
1350 DW 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E | |
1351 DW 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E | |
1352 DW 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE | |
1353 DW 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE | |
1354 DW 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE | |
1355 DW 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE | |
1356 DW 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E | |
1357 DW 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E | |
1358 DW 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE | |
1359 DW 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE | |
1360 DW 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E | |
1361 DW 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E | |
1362 DW 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E | |
1363 DW 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E | |
1364 DW 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E | |
1365 DW 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E | |
1366 DW 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E | |
1367 DW 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E | |
1368 DW 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE | |
1369 DW 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE | |
1370 DW 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE | |
1371 DW 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE | |
1372 DW 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E | |
1373 DW 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E | |
1374 DW 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE | |
1375 DW 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE | |
1376 | |
1377 DB 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52 | |
1378 DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 | |
1379 DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 | |
1380 DB 114,103,62,0 | |
1381 ALIGN 64 | |
1382 EXTERN __imp_RtlVirtualUnwind | |
1383 | |
1384 ALIGN 16 | |
1385 se_handler: | |
1386 push rsi | |
1387 push rdi | |
1388 push rbx | |
1389 push rbp | |
1390 push r12 | |
1391 push r13 | |
1392 push r14 | |
1393 push r15 | |
1394 pushfq | |
1395 sub rsp,64 | |
1396 | |
1397 mov rax,QWORD[120+r8] | |
1398 mov rbx,QWORD[248+r8] | |
1399 | |
1400 mov rsi,QWORD[8+r9] | |
1401 mov r11,QWORD[56+r9] | |
1402 | |
1403 mov r10d,DWORD[r11] | |
1404 lea r10,[r10*1+rsi] | |
1405 cmp rbx,r10 | |
1406 jb NEAR $L$in_prologue | |
1407 | |
1408 mov rax,QWORD[152+r8] | |
1409 | |
1410 mov r10d,DWORD[4+r11] | |
1411 lea r10,[r10*1+rsi] | |
1412 cmp rbx,r10 | |
1413 jae NEAR $L$in_prologue | |
1414 | |
1415 lea rax,[24+rax] | |
1416 | |
1417 mov rbx,QWORD[((-8))+rax] | |
1418 mov rbp,QWORD[((-16))+rax] | |
1419 mov r12,QWORD[((-24))+rax] | |
1420 mov QWORD[144+r8],rbx | |
1421 mov QWORD[160+r8],rbp | |
1422 mov QWORD[216+r8],r12 | |
1423 | |
1424 $L$in_prologue: | |
1425 mov rdi,QWORD[8+rax] | |
1426 mov rsi,QWORD[16+rax] | |
1427 mov QWORD[152+r8],rax | |
1428 mov QWORD[168+r8],rsi | |
1429 mov QWORD[176+r8],rdi | |
1430 | |
1431 mov rdi,QWORD[40+r9] | |
1432 mov rsi,r8 | |
1433 mov ecx,154 | |
1434 DD 0xa548f3fc | |
1435 | |
1436 mov rsi,r9 | |
1437 xor rcx,rcx | |
1438 mov rdx,QWORD[8+rsi] | |
1439 mov r8,QWORD[rsi] | |
1440 mov r9,QWORD[16+rsi] | |
1441 mov r10,QWORD[40+rsi] | |
1442 lea r11,[56+rsi] | |
1443 lea r12,[24+rsi] | |
1444 mov QWORD[32+rsp],r10 | |
1445 mov QWORD[40+rsp],r11 | |
1446 mov QWORD[48+rsp],r12 | |
1447 mov QWORD[56+rsp],rcx | |
1448 call QWORD[__imp_RtlVirtualUnwind] | |
1449 | |
1450 mov eax,1 | |
1451 add rsp,64 | |
1452 popfq | |
1453 pop r15 | |
1454 pop r14 | |
1455 pop r13 | |
1456 pop r12 | |
1457 pop rbp | |
1458 pop rbx | |
1459 pop rdi | |
1460 pop rsi | |
1461 DB 0F3h,0C3h ;repret | |
1462 | |
1463 | |
1464 section .pdata rdata align=4 | |
1465 ALIGN 4 | |
1466 DD $L$SEH_begin_gcm_gmult_4bit wrt ..imagebase | |
1467 DD $L$SEH_end_gcm_gmult_4bit wrt ..imagebase | |
1468 DD $L$SEH_info_gcm_gmult_4bit wrt ..imagebase | |
1469 | |
1470 DD $L$SEH_begin_gcm_ghash_4bit wrt ..imagebase | |
1471 DD $L$SEH_end_gcm_ghash_4bit wrt ..imagebase | |
1472 DD $L$SEH_info_gcm_ghash_4bit wrt ..imagebase | |
1473 | |
1474 DD $L$SEH_begin_gcm_init_clmul wrt ..imagebase | |
1475 DD $L$SEH_end_gcm_init_clmul wrt ..imagebase | |
1476 DD $L$SEH_info_gcm_init_clmul wrt ..imagebase | |
1477 | |
1478 DD $L$SEH_begin_gcm_ghash_clmul wrt ..imagebase | |
1479 DD $L$SEH_end_gcm_ghash_clmul wrt ..imagebase | |
1480 DD $L$SEH_info_gcm_ghash_clmul wrt ..imagebase | |
1481 section .xdata rdata align=8 | |
1482 ALIGN 8 | |
1483 $L$SEH_info_gcm_gmult_4bit: | |
1484 DB 9,0,0,0 | |
1485 DD se_handler wrt ..imagebase | |
1486 DD $L$gmult_prologue wrt ..imagebase,$L$gmult_epilogue wrt ..imageb
ase | |
1487 $L$SEH_info_gcm_ghash_4bit: | |
1488 DB 9,0,0,0 | |
1489 DD se_handler wrt ..imagebase | |
1490 DD $L$ghash_prologue wrt ..imagebase,$L$ghash_epilogue wrt ..imageb
ase | |
1491 $L$SEH_info_gcm_init_clmul: | |
1492 DB 0x01,0x08,0x03,0x00 | |
1493 DB 0x08,0x68,0x00,0x00 | |
1494 DB 0x04,0x22,0x00,0x00 | |
1495 $L$SEH_info_gcm_ghash_clmul: | |
1496 DB 0x01,0x33,0x16,0x00 | |
1497 DB 0x33,0xf8,0x09,0x00 | |
1498 DB 0x2e,0xe8,0x08,0x00 | |
1499 DB 0x29,0xd8,0x07,0x00 | |
1500 DB 0x24,0xc8,0x06,0x00 | |
1501 DB 0x1f,0xb8,0x05,0x00 | |
1502 DB 0x1a,0xa8,0x04,0x00 | |
1503 DB 0x15,0x98,0x03,0x00 | |
1504 DB 0x10,0x88,0x02,0x00 | |
1505 DB 0x0c,0x78,0x01,0x00 | |
1506 DB 0x08,0x68,0x00,0x00 | |
1507 DB 0x04,0x01,0x15,0x00 | |
OLD | NEW |