OLD | NEW |
| (Empty) |
1 .text | |
2 | |
3 .type _mul_1x1,@function | |
4 .align 16 | |
5 _mul_1x1: | |
6 subq $128+8,%rsp | |
7 movq $-1,%r9 | |
8 leaq (%rax,%rax,1),%rsi | |
9 shrq $3,%r9 | |
10 leaq (,%rax,4),%rdi | |
11 andq %rax,%r9 | |
12 leaq (,%rax,8),%r12 | |
13 sarq $63,%rax | |
14 leaq (%r9,%r9,1),%r10 | |
15 sarq $63,%rsi | |
16 leaq (,%r9,4),%r11 | |
17 andq %rbp,%rax | |
18 sarq $63,%rdi | |
19 movq %rax,%rdx | |
20 shlq $63,%rax | |
21 andq %rbp,%rsi | |
22 shrq $1,%rdx | |
23 movq %rsi,%rcx | |
24 shlq $62,%rsi | |
25 andq %rbp,%rdi | |
26 shrq $2,%rcx | |
27 xorq %rsi,%rax | |
28 movq %rdi,%rbx | |
29 shlq $61,%rdi | |
30 xorq %rcx,%rdx | |
31 shrq $3,%rbx | |
32 xorq %rdi,%rax | |
33 xorq %rbx,%rdx | |
34 | |
35 movq %r9,%r13 | |
36 movq $0,0(%rsp) | |
37 xorq %r10,%r13 | |
38 movq %r9,8(%rsp) | |
39 movq %r11,%r14 | |
40 movq %r10,16(%rsp) | |
41 xorq %r12,%r14 | |
42 movq %r13,24(%rsp) | |
43 | |
44 xorq %r11,%r9 | |
45 movq %r11,32(%rsp) | |
46 xorq %r11,%r10 | |
47 movq %r9,40(%rsp) | |
48 xorq %r11,%r13 | |
49 movq %r10,48(%rsp) | |
50 xorq %r14,%r9 | |
51 movq %r13,56(%rsp) | |
52 xorq %r14,%r10 | |
53 | |
54 movq %r12,64(%rsp) | |
55 xorq %r14,%r13 | |
56 movq %r9,72(%rsp) | |
57 xorq %r11,%r9 | |
58 movq %r10,80(%rsp) | |
59 xorq %r11,%r10 | |
60 movq %r13,88(%rsp) | |
61 | |
62 xorq %r11,%r13 | |
63 movq %r14,96(%rsp) | |
64 movq %r8,%rsi | |
65 movq %r9,104(%rsp) | |
66 andq %rbp,%rsi | |
67 movq %r10,112(%rsp) | |
68 shrq $4,%rbp | |
69 movq %r13,120(%rsp) | |
70 movq %r8,%rdi | |
71 andq %rbp,%rdi | |
72 shrq $4,%rbp | |
73 | |
74 movq (%rsp,%rsi,8),%xmm0 | |
75 movq %r8,%rsi | |
76 andq %rbp,%rsi | |
77 shrq $4,%rbp | |
78 movq (%rsp,%rdi,8),%rcx | |
79 movq %r8,%rdi | |
80 movq %rcx,%rbx | |
81 shlq $4,%rcx | |
82 andq %rbp,%rdi | |
83 movq (%rsp,%rsi,8),%xmm1 | |
84 shrq $60,%rbx | |
85 xorq %rcx,%rax | |
86 pslldq $1,%xmm1 | |
87 movq %r8,%rsi | |
88 shrq $4,%rbp | |
89 xorq %rbx,%rdx | |
90 andq %rbp,%rsi | |
91 shrq $4,%rbp | |
92 pxor %xmm1,%xmm0 | |
93 movq (%rsp,%rdi,8),%rcx | |
94 movq %r8,%rdi | |
95 movq %rcx,%rbx | |
96 shlq $12,%rcx | |
97 andq %rbp,%rdi | |
98 movq (%rsp,%rsi,8),%xmm1 | |
99 shrq $52,%rbx | |
100 xorq %rcx,%rax | |
101 pslldq $2,%xmm1 | |
102 movq %r8,%rsi | |
103 shrq $4,%rbp | |
104 xorq %rbx,%rdx | |
105 andq %rbp,%rsi | |
106 shrq $4,%rbp | |
107 pxor %xmm1,%xmm0 | |
108 movq (%rsp,%rdi,8),%rcx | |
109 movq %r8,%rdi | |
110 movq %rcx,%rbx | |
111 shlq $20,%rcx | |
112 andq %rbp,%rdi | |
113 movq (%rsp,%rsi,8),%xmm1 | |
114 shrq $44,%rbx | |
115 xorq %rcx,%rax | |
116 pslldq $3,%xmm1 | |
117 movq %r8,%rsi | |
118 shrq $4,%rbp | |
119 xorq %rbx,%rdx | |
120 andq %rbp,%rsi | |
121 shrq $4,%rbp | |
122 pxor %xmm1,%xmm0 | |
123 movq (%rsp,%rdi,8),%rcx | |
124 movq %r8,%rdi | |
125 movq %rcx,%rbx | |
126 shlq $28,%rcx | |
127 andq %rbp,%rdi | |
128 movq (%rsp,%rsi,8),%xmm1 | |
129 shrq $36,%rbx | |
130 xorq %rcx,%rax | |
131 pslldq $4,%xmm1 | |
132 movq %r8,%rsi | |
133 shrq $4,%rbp | |
134 xorq %rbx,%rdx | |
135 andq %rbp,%rsi | |
136 shrq $4,%rbp | |
137 pxor %xmm1,%xmm0 | |
138 movq (%rsp,%rdi,8),%rcx | |
139 movq %r8,%rdi | |
140 movq %rcx,%rbx | |
141 shlq $36,%rcx | |
142 andq %rbp,%rdi | |
143 movq (%rsp,%rsi,8),%xmm1 | |
144 shrq $28,%rbx | |
145 xorq %rcx,%rax | |
146 pslldq $5,%xmm1 | |
147 movq %r8,%rsi | |
148 shrq $4,%rbp | |
149 xorq %rbx,%rdx | |
150 andq %rbp,%rsi | |
151 shrq $4,%rbp | |
152 pxor %xmm1,%xmm0 | |
153 movq (%rsp,%rdi,8),%rcx | |
154 movq %r8,%rdi | |
155 movq %rcx,%rbx | |
156 shlq $44,%rcx | |
157 andq %rbp,%rdi | |
158 movq (%rsp,%rsi,8),%xmm1 | |
159 shrq $20,%rbx | |
160 xorq %rcx,%rax | |
161 pslldq $6,%xmm1 | |
162 movq %r8,%rsi | |
163 shrq $4,%rbp | |
164 xorq %rbx,%rdx | |
165 andq %rbp,%rsi | |
166 shrq $4,%rbp | |
167 pxor %xmm1,%xmm0 | |
168 movq (%rsp,%rdi,8),%rcx | |
169 movq %r8,%rdi | |
170 movq %rcx,%rbx | |
171 shlq $52,%rcx | |
172 andq %rbp,%rdi | |
173 movq (%rsp,%rsi,8),%xmm1 | |
174 shrq $12,%rbx | |
175 xorq %rcx,%rax | |
176 pslldq $7,%xmm1 | |
177 movq %r8,%rsi | |
178 shrq $4,%rbp | |
179 xorq %rbx,%rdx | |
180 andq %rbp,%rsi | |
181 shrq $4,%rbp | |
182 pxor %xmm1,%xmm0 | |
183 movq (%rsp,%rdi,8),%rcx | |
184 movq %rcx,%rbx | |
185 shlq $60,%rcx | |
186 .byte 102,72,15,126,198 | |
187 shrq $4,%rbx | |
188 xorq %rcx,%rax | |
189 psrldq $8,%xmm0 | |
190 xorq %rbx,%rdx | |
191 .byte 102,72,15,126,199 | |
192 xorq %rsi,%rax | |
193 xorq %rdi,%rdx | |
194 | |
195 addq $128+8,%rsp | |
196 .byte 0xf3,0xc3 | |
197 .Lend_mul_1x1: | |
198 .size _mul_1x1,.-_mul_1x1 | |
199 | |
200 .globl bn_GF2m_mul_2x2 | |
201 .type bn_GF2m_mul_2x2,@function | |
202 .align 16 | |
203 bn_GF2m_mul_2x2: | |
204 movq OPENSSL_ia32cap_P(%rip),%rax | |
205 btq $33,%rax | |
206 jnc .Lvanilla_mul_2x2 | |
207 | |
208 .byte 102,72,15,110,198 | |
209 .byte 102,72,15,110,201 | |
210 .byte 102,72,15,110,210 | |
211 .byte 102,73,15,110,216 | |
212 movdqa %xmm0,%xmm4 | |
213 movdqa %xmm1,%xmm5 | |
214 .byte 102,15,58,68,193,0 | |
215 pxor %xmm2,%xmm4 | |
216 pxor %xmm3,%xmm5 | |
217 .byte 102,15,58,68,211,0 | |
218 .byte 102,15,58,68,229,0 | |
219 xorps %xmm0,%xmm4 | |
220 xorps %xmm2,%xmm4 | |
221 movdqa %xmm4,%xmm5 | |
222 pslldq $8,%xmm4 | |
223 psrldq $8,%xmm5 | |
224 pxor %xmm4,%xmm2 | |
225 pxor %xmm5,%xmm0 | |
226 movdqu %xmm2,0(%rdi) | |
227 movdqu %xmm0,16(%rdi) | |
228 .byte 0xf3,0xc3 | |
229 | |
230 .align 16 | |
231 .Lvanilla_mul_2x2: | |
232 leaq -136(%rsp),%rsp | |
233 movq %r14,80(%rsp) | |
234 movq %r13,88(%rsp) | |
235 movq %r12,96(%rsp) | |
236 movq %rbp,104(%rsp) | |
237 movq %rbx,112(%rsp) | |
238 .Lbody_mul_2x2: | |
239 movq %rdi,32(%rsp) | |
240 movq %rsi,40(%rsp) | |
241 movq %rdx,48(%rsp) | |
242 movq %rcx,56(%rsp) | |
243 movq %r8,64(%rsp) | |
244 | |
245 movq $15,%r8 | |
246 movq %rsi,%rax | |
247 movq %rcx,%rbp | |
248 call _mul_1x1 | |
249 movq %rax,16(%rsp) | |
250 movq %rdx,24(%rsp) | |
251 | |
252 movq 48(%rsp),%rax | |
253 movq 64(%rsp),%rbp | |
254 call _mul_1x1 | |
255 movq %rax,0(%rsp) | |
256 movq %rdx,8(%rsp) | |
257 | |
258 movq 40(%rsp),%rax | |
259 movq 56(%rsp),%rbp | |
260 xorq 48(%rsp),%rax | |
261 xorq 64(%rsp),%rbp | |
262 call _mul_1x1 | |
263 movq 0(%rsp),%rbx | |
264 movq 8(%rsp),%rcx | |
265 movq 16(%rsp),%rdi | |
266 movq 24(%rsp),%rsi | |
267 movq 32(%rsp),%rbp | |
268 | |
269 xorq %rdx,%rax | |
270 xorq %rcx,%rdx | |
271 xorq %rbx,%rax | |
272 movq %rbx,0(%rbp) | |
273 xorq %rdi,%rdx | |
274 movq %rsi,24(%rbp) | |
275 xorq %rsi,%rax | |
276 xorq %rsi,%rdx | |
277 xorq %rdx,%rax | |
278 movq %rdx,16(%rbp) | |
279 movq %rax,8(%rbp) | |
280 | |
281 movq 80(%rsp),%r14 | |
282 movq 88(%rsp),%r13 | |
283 movq 96(%rsp),%r12 | |
284 movq 104(%rsp),%rbp | |
285 movq 112(%rsp),%rbx | |
286 leaq 136(%rsp),%rsp | |
287 .byte 0xf3,0xc3 | |
288 .Lend_mul_2x2: | |
289 .size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 | |
290 .byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,11
1,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,3
2,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,
62,0 | |
291 .align 16 | |
OLD | NEW |