| OLD | NEW |
| (Empty) |
| 1 #!/usr/local/bin/perl | |
| 2 | |
| 3 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | |
| 4 push(@INC,"${dir}","${dir}../../perlasm"); | |
| 5 require "x86asm.pl"; | |
| 6 | |
| 7 &asm_init($ARGV[0],$0); | |
| 8 | |
| 9 &bn_mul_comba("bn_mul_comba8",8); | |
| 10 &bn_mul_comba("bn_mul_comba4",4); | |
| 11 &bn_sqr_comba("bn_sqr_comba8",8); | |
| 12 &bn_sqr_comba("bn_sqr_comba4",4); | |
| 13 | |
| 14 &asm_finish(); | |
| 15 | |
| 16 sub mul_add_c | |
| 17 { | |
| 18 local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; | |
| 19 | |
| 20 # pos == -1 if eax and edx are pre-loaded, 0 to load from next | |
| 21 # words, and 1 if load return value | |
| 22 | |
| 23 &comment("mul a[$ai]*b[$bi]"); | |
| 24 | |
| 25 # "eax" and "edx" will always be pre-loaded. | |
| 26 # &mov("eax",&DWP($ai*4,$a,"",0)) ; | |
| 27 # &mov("edx",&DWP($bi*4,$b,"",0)); | |
| 28 | |
| 29 &mul("edx"); | |
| 30 &add($c0,"eax"); | |
| 31 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a | |
| 32 &mov("eax",&wparam(0)) if $pos > 0; # load r[] | |
| 33 ### | |
| 34 &adc($c1,"edx"); | |
| 35 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b | |
| 36 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b | |
| 37 ### | |
| 38 &adc($c2,0); | |
| 39 # is pos > 1, it means it is the last loop | |
| 40 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; | |
| 41 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a | |
| 42 } | |
| 43 | |
| 44 sub sqr_add_c | |
| 45 { | |
| 46 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; | |
| 47 | |
| 48 # pos == -1 if eax and edx are pre-loaded, 0 to load from next | |
| 49 # words, and 1 if load return value | |
| 50 | |
| 51 &comment("sqr a[$ai]*a[$bi]"); | |
| 52 | |
| 53 # "eax" and "edx" will always be pre-loaded. | |
| 54 # &mov("eax",&DWP($ai*4,$a,"",0)) ; | |
| 55 # &mov("edx",&DWP($bi*4,$b,"",0)); | |
| 56 | |
| 57 if ($ai == $bi) | |
| 58 { &mul("eax");} | |
| 59 else | |
| 60 { &mul("edx");} | |
| 61 &add($c0,"eax"); | |
| 62 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a | |
| 63 ### | |
| 64 &adc($c1,"edx"); | |
| 65 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb); | |
| 66 ### | |
| 67 &adc($c2,0); | |
| 68 # is pos > 1, it means it is the last loop | |
| 69 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; | |
| 70 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b | |
| 71 } | |
| 72 | |
| 73 sub sqr_add_c2 | |
| 74 { | |
| 75 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; | |
| 76 | |
| 77 # pos == -1 if eax and edx are pre-loaded, 0 to load from next | |
| 78 # words, and 1 if load return value | |
| 79 | |
| 80 &comment("sqr a[$ai]*a[$bi]"); | |
| 81 | |
| 82 # "eax" and "edx" will always be pre-loaded. | |
| 83 # &mov("eax",&DWP($ai*4,$a,"",0)) ; | |
| 84 # &mov("edx",&DWP($bi*4,$a,"",0)); | |
| 85 | |
| 86 if ($ai == $bi) | |
| 87 { &mul("eax");} | |
| 88 else | |
| 89 { &mul("edx");} | |
| 90 &add("eax","eax"); | |
| 91 ### | |
| 92 &adc("edx","edx"); | |
| 93 ### | |
| 94 &adc($c2,0); | |
| 95 &add($c0,"eax"); | |
| 96 &adc($c1,"edx"); | |
| 97 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a | |
| 98 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b | |
| 99 &adc($c2,0); | |
| 100 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; | |
| 101 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb); | |
| 102 ### | |
| 103 } | |
| 104 | |
| 105 sub bn_mul_comba | |
| 106 { | |
| 107 local($name,$num)=@_; | |
| 108 local($a,$b,$c0,$c1,$c2); | |
| 109 local($i,$as,$ae,$bs,$be,$ai,$bi); | |
| 110 local($tot,$end); | |
| 111 | |
| 112 &function_begin_B($name,""); | |
| 113 | |
| 114 $c0="ebx"; | |
| 115 $c1="ecx"; | |
| 116 $c2="ebp"; | |
| 117 $a="esi"; | |
| 118 $b="edi"; | |
| 119 | |
| 120 $as=0; | |
| 121 $ae=0; | |
| 122 $bs=0; | |
| 123 $be=0; | |
| 124 $tot=$num+$num-1; | |
| 125 | |
| 126 &push("esi"); | |
| 127 &mov($a,&wparam(1)); | |
| 128 &push("edi"); | |
| 129 &mov($b,&wparam(2)); | |
| 130 &push("ebp"); | |
| 131 &push("ebx"); | |
| 132 | |
| 133 &xor($c0,$c0); | |
| 134 &mov("eax",&DWP(0,$a,"",0)); # load the first word | |
| 135 &xor($c1,$c1); | |
| 136 &mov("edx",&DWP(0,$b,"",0)); # load the first second | |
| 137 | |
| 138 for ($i=0; $i<$tot; $i++) | |
| 139 { | |
| 140 $ai=$as; | |
| 141 $bi=$bs; | |
| 142 $end=$be+1; | |
| 143 | |
| 144 &comment("################## Calculate word $i"); | |
| 145 | |
| 146 for ($j=$bs; $j<$end; $j++) | |
| 147 { | |
| 148 &xor($c2,$c2) if ($j == $bs); | |
| 149 if (($j+1) == $end) | |
| 150 { | |
| 151 $v=1; | |
| 152 $v=2 if (($i+1) == $tot); | |
| 153 } | |
| 154 else | |
| 155 { $v=0; } | |
| 156 if (($j+1) != $end) | |
| 157 { | |
| 158 $na=($ai-1); | |
| 159 $nb=($bi+1); | |
| 160 } | |
| 161 else | |
| 162 { | |
| 163 $na=$as+($i < ($num-1)); | |
| 164 $nb=$bs+($i >= ($num-1)); | |
| 165 } | |
| 166 #printf STDERR "[$ai,$bi] -> [$na,$nb]\n"; | |
| 167 &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb); | |
| 168 if ($v) | |
| 169 { | |
| 170 &comment("saved r[$i]"); | |
| 171 # &mov("eax",&wparam(0)); | |
| 172 # &mov(&DWP($i*4,"eax","",0),$c0); | |
| 173 ($c0,$c1,$c2)=($c1,$c2,$c0); | |
| 174 } | |
| 175 $ai--; | |
| 176 $bi++; | |
| 177 } | |
| 178 $as++ if ($i < ($num-1)); | |
| 179 $ae++ if ($i >= ($num-1)); | |
| 180 | |
| 181 $bs++ if ($i >= ($num-1)); | |
| 182 $be++ if ($i < ($num-1)); | |
| 183 } | |
| 184 &comment("save r[$i]"); | |
| 185 # &mov("eax",&wparam(0)); | |
| 186 &mov(&DWP($i*4,"eax","",0),$c0); | |
| 187 | |
| 188 &pop("ebx"); | |
| 189 &pop("ebp"); | |
| 190 &pop("edi"); | |
| 191 &pop("esi"); | |
| 192 &ret(); | |
| 193 &function_end_B($name); | |
| 194 } | |
| 195 | |
| 196 sub bn_sqr_comba | |
| 197 { | |
| 198 local($name,$num)=@_; | |
| 199 local($r,$a,$c0,$c1,$c2)=@_; | |
| 200 local($i,$as,$ae,$bs,$be,$ai,$bi); | |
| 201 local($b,$tot,$end,$half); | |
| 202 | |
| 203 &function_begin_B($name,""); | |
| 204 | |
| 205 $c0="ebx"; | |
| 206 $c1="ecx"; | |
| 207 $c2="ebp"; | |
| 208 $a="esi"; | |
| 209 $r="edi"; | |
| 210 | |
| 211 &push("esi"); | |
| 212 &push("edi"); | |
| 213 &push("ebp"); | |
| 214 &push("ebx"); | |
| 215 &mov($r,&wparam(0)); | |
| 216 &mov($a,&wparam(1)); | |
| 217 &xor($c0,$c0); | |
| 218 &xor($c1,$c1); | |
| 219 &mov("eax",&DWP(0,$a,"",0)); # load the first word | |
| 220 | |
| 221 $as=0; | |
| 222 $ae=0; | |
| 223 $bs=0; | |
| 224 $be=0; | |
| 225 $tot=$num+$num-1; | |
| 226 | |
| 227 for ($i=0; $i<$tot; $i++) | |
| 228 { | |
| 229 $ai=$as; | |
| 230 $bi=$bs; | |
| 231 $end=$be+1; | |
| 232 | |
| 233 &comment("############### Calculate word $i"); | |
| 234 for ($j=$bs; $j<$end; $j++) | |
| 235 { | |
| 236 &xor($c2,$c2) if ($j == $bs); | |
| 237 if (($ai-1) < ($bi+1)) | |
| 238 { | |
| 239 $v=1; | |
| 240 $v=2 if ($i+1) == $tot; | |
| 241 } | |
| 242 else | |
| 243 { $v=0; } | |
| 244 if (!$v) | |
| 245 { | |
| 246 $na=$ai-1; | |
| 247 $nb=$bi+1; | |
| 248 } | |
| 249 else | |
| 250 { | |
| 251 $na=$as+($i < ($num-1)); | |
| 252 $nb=$bs+($i >= ($num-1)); | |
| 253 } | |
| 254 if ($ai == $bi) | |
| 255 { | |
| 256 &sqr_add_c($r,$a,$ai,$bi, | |
| 257 $c0,$c1,$c2,$v,$i,$na,$nb); | |
| 258 } | |
| 259 else | |
| 260 { | |
| 261 &sqr_add_c2($r,$a,$ai,$bi, | |
| 262 $c0,$c1,$c2,$v,$i,$na,$nb); | |
| 263 } | |
| 264 if ($v) | |
| 265 { | |
| 266 &comment("saved r[$i]"); | |
| 267 #&mov(&DWP($i*4,$r,"",0),$c0); | |
| 268 ($c0,$c1,$c2)=($c1,$c2,$c0); | |
| 269 last; | |
| 270 } | |
| 271 $ai--; | |
| 272 $bi++; | |
| 273 } | |
| 274 $as++ if ($i < ($num-1)); | |
| 275 $ae++ if ($i >= ($num-1)); | |
| 276 | |
| 277 $bs++ if ($i >= ($num-1)); | |
| 278 $be++ if ($i < ($num-1)); | |
| 279 } | |
| 280 &mov(&DWP($i*4,$r,"",0),$c0); | |
| 281 &pop("ebx"); | |
| 282 &pop("ebp"); | |
| 283 &pop("edi"); | |
| 284 &pop("esi"); | |
| 285 &ret(); | |
| 286 &function_end_B($name); | |
| 287 } | |
| OLD | NEW |