OLD | NEW |
| (Empty) |
1 #!/usr/local/bin/perl | |
2 | |
3 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | |
4 push(@INC,"${dir}","${dir}../../perlasm"); | |
5 require "x86asm.pl"; | |
6 | |
7 &asm_init($ARGV[0],$0); | |
8 | |
9 &bn_mul_comba("bn_mul_comba8",8); | |
10 &bn_mul_comba("bn_mul_comba4",4); | |
11 &bn_sqr_comba("bn_sqr_comba8",8); | |
12 &bn_sqr_comba("bn_sqr_comba4",4); | |
13 | |
14 &asm_finish(); | |
15 | |
16 sub mul_add_c | |
17 { | |
18 local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; | |
19 | |
20 # pos == -1 if eax and edx are pre-loaded, 0 to load from next | |
21 # words, and 1 if load return value | |
22 | |
23 &comment("mul a[$ai]*b[$bi]"); | |
24 | |
25 # "eax" and "edx" will always be pre-loaded. | |
26 # &mov("eax",&DWP($ai*4,$a,"",0)) ; | |
27 # &mov("edx",&DWP($bi*4,$b,"",0)); | |
28 | |
29 &mul("edx"); | |
30 &add($c0,"eax"); | |
31 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a | |
32 &mov("eax",&wparam(0)) if $pos > 0; # load r[] | |
33 ### | |
34 &adc($c1,"edx"); | |
35 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b | |
36 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b | |
37 ### | |
38 &adc($c2,0); | |
39 # is pos > 1, it means it is the last loop | |
40 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; | |
41 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a | |
42 } | |
43 | |
44 sub sqr_add_c | |
45 { | |
46 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; | |
47 | |
48 # pos == -1 if eax and edx are pre-loaded, 0 to load from next | |
49 # words, and 1 if load return value | |
50 | |
51 &comment("sqr a[$ai]*a[$bi]"); | |
52 | |
53 # "eax" and "edx" will always be pre-loaded. | |
54 # &mov("eax",&DWP($ai*4,$a,"",0)) ; | |
55 # &mov("edx",&DWP($bi*4,$b,"",0)); | |
56 | |
57 if ($ai == $bi) | |
58 { &mul("eax");} | |
59 else | |
60 { &mul("edx");} | |
61 &add($c0,"eax"); | |
62 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a | |
63 ### | |
64 &adc($c1,"edx"); | |
65 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb); | |
66 ### | |
67 &adc($c2,0); | |
68 # is pos > 1, it means it is the last loop | |
69 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; | |
70 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b | |
71 } | |
72 | |
73 sub sqr_add_c2 | |
74 { | |
75 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; | |
76 | |
77 # pos == -1 if eax and edx are pre-loaded, 0 to load from next | |
78 # words, and 1 if load return value | |
79 | |
80 &comment("sqr a[$ai]*a[$bi]"); | |
81 | |
82 # "eax" and "edx" will always be pre-loaded. | |
83 # &mov("eax",&DWP($ai*4,$a,"",0)) ; | |
84 # &mov("edx",&DWP($bi*4,$a,"",0)); | |
85 | |
86 if ($ai == $bi) | |
87 { &mul("eax");} | |
88 else | |
89 { &mul("edx");} | |
90 &add("eax","eax"); | |
91 ### | |
92 &adc("edx","edx"); | |
93 ### | |
94 &adc($c2,0); | |
95 &add($c0,"eax"); | |
96 &adc($c1,"edx"); | |
97 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a | |
98 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b | |
99 &adc($c2,0); | |
100 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; | |
101 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb); | |
102 ### | |
103 } | |
104 | |
105 sub bn_mul_comba | |
106 { | |
107 local($name,$num)=@_; | |
108 local($a,$b,$c0,$c1,$c2); | |
109 local($i,$as,$ae,$bs,$be,$ai,$bi); | |
110 local($tot,$end); | |
111 | |
112 &function_begin_B($name,""); | |
113 | |
114 $c0="ebx"; | |
115 $c1="ecx"; | |
116 $c2="ebp"; | |
117 $a="esi"; | |
118 $b="edi"; | |
119 | |
120 $as=0; | |
121 $ae=0; | |
122 $bs=0; | |
123 $be=0; | |
124 $tot=$num+$num-1; | |
125 | |
126 &push("esi"); | |
127 &mov($a,&wparam(1)); | |
128 &push("edi"); | |
129 &mov($b,&wparam(2)); | |
130 &push("ebp"); | |
131 &push("ebx"); | |
132 | |
133 &xor($c0,$c0); | |
134 &mov("eax",&DWP(0,$a,"",0)); # load the first word | |
135 &xor($c1,$c1); | |
136 &mov("edx",&DWP(0,$b,"",0)); # load the first second | |
137 | |
138 for ($i=0; $i<$tot; $i++) | |
139 { | |
140 $ai=$as; | |
141 $bi=$bs; | |
142 $end=$be+1; | |
143 | |
144 &comment("################## Calculate word $i"); | |
145 | |
146 for ($j=$bs; $j<$end; $j++) | |
147 { | |
148 &xor($c2,$c2) if ($j == $bs); | |
149 if (($j+1) == $end) | |
150 { | |
151 $v=1; | |
152 $v=2 if (($i+1) == $tot); | |
153 } | |
154 else | |
155 { $v=0; } | |
156 if (($j+1) != $end) | |
157 { | |
158 $na=($ai-1); | |
159 $nb=($bi+1); | |
160 } | |
161 else | |
162 { | |
163 $na=$as+($i < ($num-1)); | |
164 $nb=$bs+($i >= ($num-1)); | |
165 } | |
166 #printf STDERR "[$ai,$bi] -> [$na,$nb]\n"; | |
167 &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb); | |
168 if ($v) | |
169 { | |
170 &comment("saved r[$i]"); | |
171 # &mov("eax",&wparam(0)); | |
172 # &mov(&DWP($i*4,"eax","",0),$c0); | |
173 ($c0,$c1,$c2)=($c1,$c2,$c0); | |
174 } | |
175 $ai--; | |
176 $bi++; | |
177 } | |
178 $as++ if ($i < ($num-1)); | |
179 $ae++ if ($i >= ($num-1)); | |
180 | |
181 $bs++ if ($i >= ($num-1)); | |
182 $be++ if ($i < ($num-1)); | |
183 } | |
184 &comment("save r[$i]"); | |
185 # &mov("eax",&wparam(0)); | |
186 &mov(&DWP($i*4,"eax","",0),$c0); | |
187 | |
188 &pop("ebx"); | |
189 &pop("ebp"); | |
190 &pop("edi"); | |
191 &pop("esi"); | |
192 &ret(); | |
193 &function_end_B($name); | |
194 } | |
195 | |
196 sub bn_sqr_comba | |
197 { | |
198 local($name,$num)=@_; | |
199 local($r,$a,$c0,$c1,$c2)=@_; | |
200 local($i,$as,$ae,$bs,$be,$ai,$bi); | |
201 local($b,$tot,$end,$half); | |
202 | |
203 &function_begin_B($name,""); | |
204 | |
205 $c0="ebx"; | |
206 $c1="ecx"; | |
207 $c2="ebp"; | |
208 $a="esi"; | |
209 $r="edi"; | |
210 | |
211 &push("esi"); | |
212 &push("edi"); | |
213 &push("ebp"); | |
214 &push("ebx"); | |
215 &mov($r,&wparam(0)); | |
216 &mov($a,&wparam(1)); | |
217 &xor($c0,$c0); | |
218 &xor($c1,$c1); | |
219 &mov("eax",&DWP(0,$a,"",0)); # load the first word | |
220 | |
221 $as=0; | |
222 $ae=0; | |
223 $bs=0; | |
224 $be=0; | |
225 $tot=$num+$num-1; | |
226 | |
227 for ($i=0; $i<$tot; $i++) | |
228 { | |
229 $ai=$as; | |
230 $bi=$bs; | |
231 $end=$be+1; | |
232 | |
233 &comment("############### Calculate word $i"); | |
234 for ($j=$bs; $j<$end; $j++) | |
235 { | |
236 &xor($c2,$c2) if ($j == $bs); | |
237 if (($ai-1) < ($bi+1)) | |
238 { | |
239 $v=1; | |
240 $v=2 if ($i+1) == $tot; | |
241 } | |
242 else | |
243 { $v=0; } | |
244 if (!$v) | |
245 { | |
246 $na=$ai-1; | |
247 $nb=$bi+1; | |
248 } | |
249 else | |
250 { | |
251 $na=$as+($i < ($num-1)); | |
252 $nb=$bs+($i >= ($num-1)); | |
253 } | |
254 if ($ai == $bi) | |
255 { | |
256 &sqr_add_c($r,$a,$ai,$bi, | |
257 $c0,$c1,$c2,$v,$i,$na,$nb); | |
258 } | |
259 else | |
260 { | |
261 &sqr_add_c2($r,$a,$ai,$bi, | |
262 $c0,$c1,$c2,$v,$i,$na,$nb); | |
263 } | |
264 if ($v) | |
265 { | |
266 &comment("saved r[$i]"); | |
267 #&mov(&DWP($i*4,$r,"",0),$c0); | |
268 ($c0,$c1,$c2)=($c1,$c2,$c0); | |
269 last; | |
270 } | |
271 $ai--; | |
272 $bi++; | |
273 } | |
274 $as++ if ($i < ($num-1)); | |
275 $ae++ if ($i >= ($num-1)); | |
276 | |
277 $bs++ if ($i >= ($num-1)); | |
278 $be++ if ($i < ($num-1)); | |
279 } | |
280 &mov(&DWP($i*4,$r,"",0),$c0); | |
281 &pop("ebx"); | |
282 &pop("ebp"); | |
283 &pop("edi"); | |
284 &pop("esi"); | |
285 &ret(); | |
286 &function_end_B($name); | |
287 } | |
OLD | NEW |