| OLD | NEW |
| (Empty) |
| 1 dnl IA-64 mpn_mul_1, mpn_mul_1c -- Multiply a limb vector with a limb and | |
| 2 dnl store the result in a second limb vector. | |
| 3 | |
| 4 dnl Copyright 2000, 2001, 2002, 2003, 2004, 2006, 2007 Free Software | |
| 5 dnl Foundation, Inc. | |
| 6 | |
| 7 dnl This file is part of the GNU MP Library. | |
| 8 | |
| 9 dnl The GNU MP Library is free software; you can redistribute it and/or modify | |
| 10 dnl it under the terms of the GNU Lesser General Public License as published | |
| 11 dnl by the Free Software Foundation; either version 3 of the License, or (at | |
| 12 dnl your option) any later version. | |
| 13 | |
| 14 dnl The GNU MP Library is distributed in the hope that it will be useful, but | |
| 15 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
| 16 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public | |
| 17 dnl License for more details. | |
| 18 | |
| 19 dnl You should have received a copy of the GNU Lesser General Public License | |
| 20 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. | |
| 21 | |
| 22 include(`../config.m4') | |
| 23 | |
| 24 C cycles/limb | |
| 25 C Itanium: 4.0 | |
| 26 C Itanium 2: 2.0 | |
| 27 | |
| 28 C TODO | |
| 29 C * Further optimize feed-in and wind-down code, both for speed and code size. | |
| 30 C * Handle low limb input and results specially, using a common stf8 in the | |
| 31 C epilogue. | |
| 32 C * Use 1 c/l carry propagation scheme in wind-down code. | |
| 33 C * Use extra pointer register for `up' to speed up feed-in loads. | |
| 34 C * Work out final differences with addmul_1.asm. | |
| 35 | |
| 36 C INPUT PARAMETERS | |
| 37 define(`rp', `r32') | |
| 38 define(`up', `r33') | |
| 39 define(`n', `r34') | |
| 40 define(`vl', `r35') | |
| 41 define(`cy', `r36') C for mpn_mul_1c | |
| 42 | |
| 43 ASM_START() | |
| 44 PROLOGUE(mpn_mul_1) | |
| 45 .prologue | |
| 46 .save ar.lc, r2 | |
| 47 .body | |
| 48 | |
| 49 ifdef(`HAVE_ABI_32', | |
| 50 ` addp4 rp = 0, rp C M I | |
| 51 addp4 up = 0, up C M I | |
| 52 zxt4 n = n C I | |
| 53 ;; | |
| 54 ') | |
| 55 {.mfi | |
| 56 adds r15 = -1, n C M I | |
| 57 mov f9 = f0 C F | |
| 58 mov.i r2 = ar.lc C I0 | |
| 59 } | |
| 60 {.mmi | |
| 61 ldf8 f7 = [up], 8 C M | |
| 62 nop.m 0 C M | |
| 63 and r14 = 3, n C M I | |
| 64 ;; | |
| 65 } | |
| 66 .Lcommon: | |
| 67 {.mii | |
| 68 setf.sig f6 = vl C M2 M3 | |
| 69 shr.u r31 = r15, 2 C I0 | |
| 70 cmp.eq p10, p0 = 0, r14 C M I | |
| 71 } | |
| 72 {.mii | |
| 73 cmp.eq p11, p0 = 2, r14 C M I | |
| 74 cmp.eq p12, p0 = 3, r14 C M I | |
| 75 nop.i 0 C I | |
| 76 ;; | |
| 77 } | |
| 78 {.mii | |
| 79 cmp.ne p6, p7 = r0, r0 C M I | |
| 80 mov.i ar.lc = r31 C I0 | |
| 81 cmp.ne p8, p9 = r0, r0 C M I | |
| 82 } | |
| 83 {.bbb | |
| 84 (p10) br.dptk .Lb00 C B | |
| 85 (p11) br.dptk .Lb10 C B | |
| 86 (p12) br.dptk .Lb11 C B | |
| 87 ;; | |
| 88 } | |
| 89 | |
| 90 .Lb01: mov r20 = 0 | |
| 91 br.cloop.dptk .grt1 C B | |
| 92 | |
| 93 xma.l f39 = f7, f6, f9 C F | |
| 94 xma.hu f43 = f7, f6, f9 C F | |
| 95 ;; | |
| 96 getf.sig r8 = f43 C M2 | |
| 97 stf8 [rp] = f39 C M2 M3 | |
| 98 mov.i ar.lc = r2 C I0 | |
| 99 br.ret.sptk.many b0 C B | |
| 100 | |
| 101 .grt1: | |
| 102 ldf8 f32 = [up], 8 | |
| 103 ;; | |
| 104 ldf8 f33 = [up], 8 | |
| 105 ;; | |
| 106 ldf8 f34 = [up], 8 | |
| 107 xma.l f39 = f7, f6, f9 | |
| 108 xma.hu f43 = f7, f6, f9 | |
| 109 ;; | |
| 110 ldf8 f35 = [up], 8 | |
| 111 br.cloop.dptk .grt5 | |
| 112 | |
| 113 xma.l f36 = f32, f6, f0 | |
| 114 xma.hu f40 = f32, f6, f0 | |
| 115 ;; | |
| 116 stf8 [rp] = f39, 8 | |
| 117 xma.l f37 = f33, f6, f0 | |
| 118 xma.hu f41 = f33, f6, f0 | |
| 119 ;; | |
| 120 getf.sig r21 = f43 | |
| 121 getf.sig r18 = f36 | |
| 122 xma.l f38 = f34, f6, f0 | |
| 123 xma.hu f42 = f34, f6, f0 | |
| 124 ;; | |
| 125 getf.sig r22 = f40 | |
| 126 getf.sig r19 = f37 | |
| 127 xma.l f39 = f35, f6, f0 | |
| 128 xma.hu f43 = f35, f6, f0 | |
| 129 ;; | |
| 130 getf.sig r23 = f41 | |
| 131 getf.sig r16 = f38 | |
| 132 br .Lcj5 | |
| 133 | |
| 134 .grt5: | |
| 135 xma.l f36 = f32, f6, f0 | |
| 136 xma.hu f40 = f32, f6, f0 | |
| 137 ;; | |
| 138 getf.sig r17 = f39 | |
| 139 ldf8 f32 = [up], 8 | |
| 140 xma.l f37 = f33, f6, f0 | |
| 141 xma.hu f41 = f33, f6, f0 | |
| 142 ;; | |
| 143 getf.sig r21 = f43 | |
| 144 ldf8 f33 = [up], 8 | |
| 145 xma.l f38 = f34, f6, f0 | |
| 146 ;; | |
| 147 getf.sig r18 = f36 | |
| 148 xma.hu f42 = f34, f6, f0 | |
| 149 ;; | |
| 150 getf.sig r22 = f40 | |
| 151 ldf8 f34 = [up], 8 | |
| 152 xma.l f39 = f35, f6, f0 | |
| 153 ;; | |
| 154 getf.sig r19 = f37 | |
| 155 xma.hu f43 = f35, f6, f0 | |
| 156 br .LL01 | |
| 157 | |
| 158 | |
| 159 .Lb10: ldf8 f35 = [up], 8 | |
| 160 mov r23 = 0 | |
| 161 br.cloop.dptk .grt2 | |
| 162 | |
| 163 xma.l f38 = f7, f6, f9 | |
| 164 xma.hu f42 = f7, f6, f9 | |
| 165 ;; | |
| 166 stf8 [rp] = f38, 8 | |
| 167 xma.l f39 = f35, f6, f42 | |
| 168 xma.hu f43 = f35, f6, f42 | |
| 169 ;; | |
| 170 getf.sig r8 = f43 | |
| 171 stf8 [rp] = f39 | |
| 172 mov.i ar.lc = r2 | |
| 173 br.ret.sptk.many b0 | |
| 174 | |
| 175 | |
| 176 .grt2: | |
| 177 ldf8 f32 = [up], 8 | |
| 178 ;; | |
| 179 ldf8 f33 = [up], 8 | |
| 180 xma.l f38 = f7, f6, f9 | |
| 181 xma.hu f42 = f7, f6, f9 | |
| 182 ;; | |
| 183 ldf8 f34 = [up], 8 | |
| 184 xma.l f39 = f35, f6, f0 | |
| 185 xma.hu f43 = f35, f6, f0 | |
| 186 ;; | |
| 187 ldf8 f35 = [up], 8 | |
| 188 br.cloop.dptk .grt6 | |
| 189 | |
| 190 stf8 [rp] = f38, 8 | |
| 191 xma.l f36 = f32, f6, f0 | |
| 192 xma.hu f40 = f32, f6, f0 | |
| 193 ;; | |
| 194 getf.sig r20 = f42 | |
| 195 getf.sig r17 = f39 | |
| 196 xma.l f37 = f33, f6, f0 | |
| 197 xma.hu f41 = f33, f6, f0 | |
| 198 ;; | |
| 199 getf.sig r21 = f43 | |
| 200 getf.sig r18 = f36 | |
| 201 xma.l f38 = f34, f6, f0 | |
| 202 xma.hu f42 = f34, f6, f0 | |
| 203 ;; | |
| 204 getf.sig r22 = f40 | |
| 205 getf.sig r19 = f37 | |
| 206 xma.l f39 = f35, f6, f0 | |
| 207 xma.hu f43 = f35, f6, f0 | |
| 208 br .Lcj6 | |
| 209 | |
| 210 .grt6: | |
| 211 getf.sig r16 = f38 | |
| 212 xma.l f36 = f32, f6, f0 | |
| 213 xma.hu f40 = f32, f6, f0 | |
| 214 ;; | |
| 215 getf.sig r20 = f42 | |
| 216 ldf8 f32 = [up], 8 | |
| 217 xma.l f37 = f33, f6, f0 | |
| 218 ;; | |
| 219 getf.sig r17 = f39 | |
| 220 xma.hu f41 = f33, f6, f0 | |
| 221 ;; | |
| 222 getf.sig r21 = f43 | |
| 223 ldf8 f33 = [up], 8 | |
| 224 xma.l f38 = f34, f6, f0 | |
| 225 ;; | |
| 226 getf.sig r18 = f36 | |
| 227 xma.hu f42 = f34, f6, f0 | |
| 228 br .LL10 | |
| 229 | |
| 230 | |
| 231 .Lb11: ldf8 f34 = [up], 8 | |
| 232 mov r22 = 0 | |
| 233 ;; | |
| 234 ldf8 f35 = [up], 8 | |
| 235 br.cloop.dptk .grt3 | |
| 236 ;; | |
| 237 | |
| 238 xma.l f37 = f7, f6, f9 | |
| 239 xma.hu f41 = f7, f6, f9 | |
| 240 xma.l f38 = f34, f6, f0 | |
| 241 xma.hu f42 = f34, f6, f0 | |
| 242 xma.l f39 = f35, f6, f0 | |
| 243 xma.hu f43 = f35, f6, f0 | |
| 244 ;; | |
| 245 getf.sig r23 = f41 | |
| 246 stf8 [rp] = f37, 8 | |
| 247 getf.sig r16 = f38 | |
| 248 getf.sig r20 = f42 | |
| 249 getf.sig r17 = f39 | |
| 250 getf.sig r8 = f43 | |
| 251 br .Lcj3 | |
| 252 | |
| 253 .grt3: | |
| 254 ldf8 f32 = [up], 8 | |
| 255 xma.l f37 = f7, f6, f9 | |
| 256 xma.hu f41 = f7, f6, f9 | |
| 257 ;; | |
| 258 ldf8 f33 = [up], 8 | |
| 259 xma.l f38 = f34, f6, f0 | |
| 260 xma.hu f42 = f34, f6, f0 | |
| 261 ;; | |
| 262 getf.sig r19 = f37 | |
| 263 ldf8 f34 = [up], 8 | |
| 264 xma.l f39 = f35, f6, f0 | |
| 265 xma.hu f43 = f35, f6, f0 | |
| 266 ;; | |
| 267 getf.sig r23 = f41 | |
| 268 ldf8 f35 = [up], 8 | |
| 269 br.cloop.dptk .grt7 | |
| 270 | |
| 271 getf.sig r16 = f38 | |
| 272 xma.l f36 = f32, f6, f0 | |
| 273 getf.sig r20 = f42 | |
| 274 xma.hu f40 = f32, f6, f0 | |
| 275 ;; | |
| 276 getf.sig r17 = f39 | |
| 277 xma.l f37 = f33, f6, f0 | |
| 278 getf.sig r21 = f43 | |
| 279 xma.hu f41 = f33, f6, f0 | |
| 280 ;; | |
| 281 getf.sig r18 = f36 | |
| 282 st8 [rp] = r19, 8 | |
| 283 xma.l f38 = f34, f6, f0 | |
| 284 xma.hu f42 = f34, f6, f0 | |
| 285 br .Lcj7 | |
| 286 | |
| 287 .grt7: | |
| 288 getf.sig r16 = f38 | |
| 289 xma.l f36 = f32, f6, f0 | |
| 290 xma.hu f40 = f32, f6, f0 | |
| 291 ;; | |
| 292 getf.sig r20 = f42 | |
| 293 ldf8 f32 = [up], 8 | |
| 294 xma.l f37 = f33, f6, f0 | |
| 295 ;; | |
| 296 getf.sig r17 = f39 | |
| 297 xma.hu f41 = f33, f6, f0 | |
| 298 br .LL11 | |
| 299 | |
| 300 | |
| 301 .Lb00: ldf8 f33 = [up], 8 | |
| 302 mov r21 = 0 | |
| 303 ;; | |
| 304 ldf8 f34 = [up], 8 | |
| 305 ;; | |
| 306 ldf8 f35 = [up], 8 | |
| 307 xma.l f36 = f7, f6, f9 | |
| 308 xma.hu f40 = f7, f6, f9 | |
| 309 br.cloop.dptk .grt4 | |
| 310 | |
| 311 xma.l f37 = f33, f6, f0 | |
| 312 xma.hu f41 = f33, f6, f0 | |
| 313 xma.l f38 = f34, f6, f0 | |
| 314 xma.hu f42 = f34, f6, f0 | |
| 315 ;; | |
| 316 getf.sig r22 = f40 | |
| 317 stf8 [rp] = f36, 8 | |
| 318 xma.l f39 = f35, f6, f0 | |
| 319 getf.sig r19 = f37 | |
| 320 xma.hu f43 = f35, f6, f0 | |
| 321 ;; | |
| 322 getf.sig r23 = f41 | |
| 323 getf.sig r16 = f38 | |
| 324 getf.sig r20 = f42 | |
| 325 getf.sig r17 = f39 | |
| 326 br .Lcj4 | |
| 327 | |
| 328 .grt4: | |
| 329 ldf8 f32 = [up], 8 | |
| 330 xma.l f37 = f33, f6, f0 | |
| 331 xma.hu f41 = f33, f6, f0 | |
| 332 ;; | |
| 333 getf.sig r18 = f36 | |
| 334 ldf8 f33 = [up], 8 | |
| 335 xma.l f38 = f34, f6, f0 | |
| 336 xma.hu f42 = f34, f6, f0 | |
| 337 ;; | |
| 338 getf.sig r22 = f40 | |
| 339 ldf8 f34 = [up], 8 | |
| 340 xma.l f39 = f35, f6, f0 | |
| 341 ;; | |
| 342 getf.sig r19 = f37 | |
| 343 getf.sig r23 = f41 | |
| 344 xma.hu f43 = f35, f6, f0 | |
| 345 ldf8 f35 = [up], 8 | |
| 346 br.cloop.dptk .grt8 | |
| 347 | |
| 348 getf.sig r16 = f38 | |
| 349 xma.l f36 = f32, f6, f0 | |
| 350 getf.sig r20 = f42 | |
| 351 xma.hu f40 = f32, f6, f0 | |
| 352 ;; | |
| 353 getf.sig r17 = f39 | |
| 354 st8 [rp] = r18, 8 | |
| 355 xma.l f37 = f33, f6, f0 | |
| 356 xma.hu f41 = f33, f6, f0 | |
| 357 br .Lcj8 | |
| 358 | |
| 359 .grt8: | |
| 360 getf.sig r16 = f38 | |
| 361 xma.l f36 = f32, f6, f0 | |
| 362 xma.hu f40 = f32, f6, f0 | |
| 363 br .LL00 | |
| 364 | |
| 365 | |
| 366 C *** MAIN LOOP START *** | |
| 367 ALIGN(32) | |
| 368 .Loop: | |
| 369 .pred.rel "mutex",p6,p7 | |
| 370 getf.sig r16 = f38 | |
| 371 xma.l f36 = f32, f6, f0 | |
| 372 (p6) cmp.leu p8, p9 = r24, r17 | |
| 373 st8 [rp] = r24, 8 | |
| 374 xma.hu f40 = f32, f6, f0 | |
| 375 (p7) cmp.ltu p8, p9 = r24, r17 | |
| 376 ;; | |
| 377 .LL00: | |
| 378 .pred.rel "mutex",p8,p9 | |
| 379 getf.sig r20 = f42 | |
| 380 (p8) add r24 = r18, r21, 1 | |
| 381 nop.b 0 | |
| 382 ldf8 f32 = [up], 8 | |
| 383 (p9) add r24 = r18, r21 | |
| 384 nop.b 0 | |
| 385 ;; | |
| 386 .pred.rel "mutex",p8,p9 | |
| 387 getf.sig r17 = f39 | |
| 388 xma.l f37 = f33, f6, f0 | |
| 389 (p8) cmp.leu p6, p7 = r24, r18 | |
| 390 st8 [rp] = r24, 8 | |
| 391 xma.hu f41 = f33, f6, f0 | |
| 392 (p9) cmp.ltu p6, p7 = r24, r18 | |
| 393 ;; | |
| 394 .LL11: | |
| 395 .pred.rel "mutex",p6,p7 | |
| 396 getf.sig r21 = f43 | |
| 397 (p6) add r24 = r19, r22, 1 | |
| 398 nop.b 0 | |
| 399 ldf8 f33 = [up], 8 | |
| 400 (p7) add r24 = r19, r22 | |
| 401 nop.b 0 | |
| 402 ;; | |
| 403 .pred.rel "mutex",p6,p7 | |
| 404 getf.sig r18 = f36 | |
| 405 xma.l f38 = f34, f6, f0 | |
| 406 (p6) cmp.leu p8, p9 = r24, r19 | |
| 407 st8 [rp] = r24, 8 | |
| 408 xma.hu f42 = f34, f6, f0 | |
| 409 (p7) cmp.ltu p8, p9 = r24, r19 | |
| 410 ;; | |
| 411 .LL10: | |
| 412 .pred.rel "mutex",p8,p9 | |
| 413 getf.sig r22 = f40 | |
| 414 (p8) add r24 = r16, r23, 1 | |
| 415 nop.b 0 | |
| 416 ldf8 f34 = [up], 8 | |
| 417 (p9) add r24 = r16, r23 | |
| 418 nop.b 0 | |
| 419 ;; | |
| 420 .pred.rel "mutex",p8,p9 | |
| 421 getf.sig r19 = f37 | |
| 422 xma.l f39 = f35, f6, f0 | |
| 423 (p8) cmp.leu p6, p7 = r24, r16 | |
| 424 st8 [rp] = r24, 8 | |
| 425 xma.hu f43 = f35, f6, f0 | |
| 426 (p9) cmp.ltu p6, p7 = r24, r16 | |
| 427 ;; | |
| 428 .LL01: | |
| 429 .pred.rel "mutex",p6,p7 | |
| 430 getf.sig r23 = f41 | |
| 431 (p6) add r24 = r17, r20, 1 | |
| 432 nop.b 0 | |
| 433 ldf8 f35 = [up], 8 | |
| 434 (p7) add r24 = r17, r20 | |
| 435 br.cloop.dptk .Loop | |
| 436 C *** MAIN LOOP END *** | |
| 437 ;; | |
| 438 | |
| 439 .Lcj9: | |
| 440 .pred.rel "mutex",p6,p7 | |
| 441 getf.sig r16 = f38 | |
| 442 xma.l f36 = f32, f6, f0 | |
| 443 (p6) cmp.leu p8, p9 = r24, r17 | |
| 444 st8 [rp] = r24, 8 | |
| 445 xma.hu f40 = f32, f6, f0 | |
| 446 (p7) cmp.ltu p8, p9 = r24, r17 | |
| 447 ;; | |
| 448 .pred.rel "mutex",p8,p9 | |
| 449 getf.sig r20 = f42 | |
| 450 (p8) add r24 = r18, r21, 1 | |
| 451 (p9) add r24 = r18, r21 | |
| 452 ;; | |
| 453 .pred.rel "mutex",p8,p9 | |
| 454 getf.sig r17 = f39 | |
| 455 xma.l f37 = f33, f6, f0 | |
| 456 (p8) cmp.leu p6, p7 = r24, r18 | |
| 457 st8 [rp] = r24, 8 | |
| 458 xma.hu f41 = f33, f6, f0 | |
| 459 (p9) cmp.ltu p6, p7 = r24, r18 | |
| 460 ;; | |
| 461 .Lcj8: | |
| 462 .pred.rel "mutex",p6,p7 | |
| 463 getf.sig r21 = f43 | |
| 464 (p6) add r24 = r19, r22, 1 | |
| 465 (p7) add r24 = r19, r22 | |
| 466 ;; | |
| 467 .pred.rel "mutex",p6,p7 | |
| 468 getf.sig r18 = f36 | |
| 469 xma.l f38 = f34, f6, f0 | |
| 470 (p6) cmp.leu p8, p9 = r24, r19 | |
| 471 st8 [rp] = r24, 8 | |
| 472 xma.hu f42 = f34, f6, f0 | |
| 473 (p7) cmp.ltu p8, p9 = r24, r19 | |
| 474 ;; | |
| 475 .Lcj7: | |
| 476 .pred.rel "mutex",p8,p9 | |
| 477 getf.sig r22 = f40 | |
| 478 (p8) add r24 = r16, r23, 1 | |
| 479 (p9) add r24 = r16, r23 | |
| 480 ;; | |
| 481 .pred.rel "mutex",p8,p9 | |
| 482 getf.sig r19 = f37 | |
| 483 xma.l f39 = f35, f6, f0 | |
| 484 (p8) cmp.leu p6, p7 = r24, r16 | |
| 485 st8 [rp] = r24, 8 | |
| 486 xma.hu f43 = f35, f6, f0 | |
| 487 (p9) cmp.ltu p6, p7 = r24, r16 | |
| 488 ;; | |
| 489 .Lcj6: | |
| 490 .pred.rel "mutex",p6,p7 | |
| 491 getf.sig r23 = f41 | |
| 492 (p6) add r24 = r17, r20, 1 | |
| 493 (p7) add r24 = r17, r20 | |
| 494 ;; | |
| 495 .pred.rel "mutex",p6,p7 | |
| 496 (p6) cmp.leu p8, p9 = r24, r17 | |
| 497 (p7) cmp.ltu p8, p9 = r24, r17 | |
| 498 getf.sig r16 = f38 | |
| 499 st8 [rp] = r24, 8 | |
| 500 ;; | |
| 501 .Lcj5: | |
| 502 .pred.rel "mutex",p8,p9 | |
| 503 getf.sig r20 = f42 | |
| 504 (p8) add r24 = r18, r21, 1 | |
| 505 (p9) add r24 = r18, r21 | |
| 506 ;; | |
| 507 .pred.rel "mutex",p8,p9 | |
| 508 (p8) cmp.leu p6, p7 = r24, r18 | |
| 509 (p9) cmp.ltu p6, p7 = r24, r18 | |
| 510 getf.sig r17 = f39 | |
| 511 st8 [rp] = r24, 8 | |
| 512 ;; | |
| 513 .Lcj4: | |
| 514 .pred.rel "mutex",p6,p7 | |
| 515 getf.sig r8 = f43 | |
| 516 (p6) add r24 = r19, r22, 1 | |
| 517 (p7) add r24 = r19, r22 | |
| 518 ;; | |
| 519 .pred.rel "mutex",p6,p7 | |
| 520 st8 [rp] = r24, 8 | |
| 521 (p6) cmp.leu p8, p9 = r24, r19 | |
| 522 (p7) cmp.ltu p8, p9 = r24, r19 | |
| 523 ;; | |
| 524 .Lcj3: | |
| 525 .pred.rel "mutex",p8,p9 | |
| 526 (p8) add r24 = r16, r23, 1 | |
| 527 (p9) add r24 = r16, r23 | |
| 528 ;; | |
| 529 .pred.rel "mutex",p8,p9 | |
| 530 st8 [rp] = r24, 8 | |
| 531 (p8) cmp.leu p6, p7 = r24, r16 | |
| 532 (p9) cmp.ltu p6, p7 = r24, r16 | |
| 533 ;; | |
| 534 .Lcj2: | |
| 535 .pred.rel "mutex",p6,p7 | |
| 536 (p6) add r24 = r17, r20, 1 | |
| 537 (p7) add r24 = r17, r20 | |
| 538 ;; | |
| 539 .pred.rel "mutex",p6,p7 | |
| 540 st8 [rp] = r24, 8 | |
| 541 (p6) cmp.leu p8, p9 = r24, r17 | |
| 542 (p7) cmp.ltu p8, p9 = r24, r17 | |
| 543 ;; | |
| 544 .pred.rel "mutex",p8,p9 | |
| 545 (p8) add r8 = 1, r8 | |
| 546 mov.i ar.lc = r2 | |
| 547 br.ret.sptk.many b0 | |
| 548 EPILOGUE() | |
| 549 | |
| 550 PROLOGUE(mpn_mul_1c) | |
| 551 .prologue | |
| 552 .save ar.lc, r2 | |
| 553 .body | |
| 554 | |
| 555 ifdef(`HAVE_ABI_32', | |
| 556 ` addp4 rp = 0, rp C M I | |
| 557 addp4 up = 0, up C M I | |
| 558 zxt4 n = n C I | |
| 559 ;; | |
| 560 ') | |
| 561 {.mmi | |
| 562 adds r15 = -1, n C M I | |
| 563 setf.sig f9 = cy C M2 M3 | |
| 564 mov.i r2 = ar.lc C I0 | |
| 565 } | |
| 566 {.mmb | |
| 567 ldf8 f7 = [up], 8 C M | |
| 568 and r14 = 3, n C M I | |
| 569 br.sptk .Lcommon | |
| 570 ;; | |
| 571 } | |
| 572 EPILOGUE() | |
| 573 ASM_END() | |
| OLD | NEW |