| OLD | NEW |
| (Empty) |
| 1 dnl x86 __gmpn_addmul_1 (for 386 and 486) -- Multiply a limb vector with a | |
| 2 dnl limb and add the result to a second limb vector. | |
| 3 | |
| 4 dnl Copyright 1992, 1994, 1997, 1999, 2000, 2001, 2002, 2005 Free Software | |
| 5 dnl Foundation, Inc. | |
| 6 dnl | |
| 7 dnl This file is part of the GNU MP Library. | |
| 8 dnl | |
| 9 dnl The GNU MP Library is free software; you can redistribute it and/or | |
| 10 dnl modify it under the terms of the GNU Lesser General Public License as | |
| 11 dnl published by the Free Software Foundation; either version 3 of the | |
| 12 dnl License, or (at your option) any later version. | |
| 13 dnl | |
| 14 dnl The GNU MP Library is distributed in the hope that it will be useful, | |
| 15 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 16 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 17 dnl Lesser General Public License for more details. | |
| 18 dnl | |
| 19 dnl You should have received a copy of the GNU Lesser General Public License | |
| 20 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. | |
| 21 | |
| 22 include(`../config.m4') | |
| 23 | |
| 24 | |
| 25 C cycles/limb | |
| 26 C P5: 14.75 | |
| 27 C P6 model 0-8,10-12) 7.5 | |
| 28 C P6 model 9 (Banias) | |
| 29 C P6 model 13 (Dothan) 6.75 | |
| 30 C P4 model 0 (Willamette) 24.0 | |
| 31 C P4 model 1 (?) 24.0 | |
| 32 C P4 model 2 (Northwood) 24.0 | |
| 33 C P4 model 3 (Prescott) | |
| 34 C P4 model 4 (Nocona) | |
| 35 C K6: 12.5 | |
| 36 C K7: 5.25 | |
| 37 C K8: | |
| 38 | |
| 39 | |
| 40 ifdef(`OPERATION_addmul_1',` | |
| 41 define(M4_inst, addl) | |
| 42 define(M4_function_1, mpn_addmul_1) | |
| 43 | |
| 44 ',`ifdef(`OPERATION_submul_1',` | |
| 45 define(M4_inst, subl) | |
| 46 define(M4_function_1, mpn_submul_1) | |
| 47 | |
| 48 ',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1 | |
| 49 ')')') | |
| 50 | |
| 51 MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) | |
| 52 | |
| 53 | |
| 54 C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, | |
| 55 C mp_limb_t mult); | |
| 56 | |
| 57 define(PARAM_MULTIPLIER, `FRAME+16(%esp)') | |
| 58 define(PARAM_SIZE, `FRAME+12(%esp)') | |
| 59 define(PARAM_SRC, `FRAME+8(%esp)') | |
| 60 define(PARAM_DST, `FRAME+4(%esp)') | |
| 61 | |
| 62 TEXT | |
| 63 ALIGN(8) | |
| 64 | |
| 65 PROLOGUE(M4_function_1) | |
| 66 deflit(`FRAME',0) | |
| 67 | |
| 68 pushl %edi | |
| 69 pushl %esi | |
| 70 pushl %ebx | |
| 71 pushl %ebp | |
| 72 deflit(`FRAME',16) | |
| 73 | |
| 74 movl PARAM_DST,%edi | |
| 75 movl PARAM_SRC,%esi | |
| 76 movl PARAM_SIZE,%ecx | |
| 77 | |
| 78 xorl %ebx,%ebx | |
| 79 andl $3,%ecx | |
| 80 jz L(end0) | |
| 81 | |
| 82 L(oop0): | |
| 83 movl (%esi),%eax | |
| 84 mull PARAM_MULTIPLIER | |
| 85 leal 4(%esi),%esi | |
| 86 addl %ebx,%eax | |
| 87 movl $0,%ebx | |
| 88 adcl %ebx,%edx | |
| 89 M4_inst %eax,(%edi) | |
| 90 adcl %edx,%ebx C propagate carry into cylimb | |
| 91 | |
| 92 leal 4(%edi),%edi | |
| 93 decl %ecx | |
| 94 jnz L(oop0) | |
| 95 | |
| 96 L(end0): | |
| 97 movl PARAM_SIZE,%ecx | |
| 98 shrl $2,%ecx | |
| 99 jz L(end) | |
| 100 | |
| 101 ALIGN(8) | |
| 102 L(oop): movl (%esi),%eax | |
| 103 mull PARAM_MULTIPLIER | |
| 104 addl %eax,%ebx | |
| 105 movl $0,%ebp | |
| 106 adcl %edx,%ebp | |
| 107 | |
| 108 movl 4(%esi),%eax | |
| 109 mull PARAM_MULTIPLIER | |
| 110 M4_inst %ebx,(%edi) | |
| 111 adcl %eax,%ebp C new lo + cylimb | |
| 112 movl $0,%ebx | |
| 113 adcl %edx,%ebx | |
| 114 | |
| 115 movl 8(%esi),%eax | |
| 116 mull PARAM_MULTIPLIER | |
| 117 M4_inst %ebp,4(%edi) | |
| 118 adcl %eax,%ebx C new lo + cylimb | |
| 119 movl $0,%ebp | |
| 120 adcl %edx,%ebp | |
| 121 | |
| 122 movl 12(%esi),%eax | |
| 123 mull PARAM_MULTIPLIER | |
| 124 M4_inst %ebx,8(%edi) | |
| 125 adcl %eax,%ebp C new lo + cylimb | |
| 126 movl $0,%ebx | |
| 127 adcl %edx,%ebx | |
| 128 | |
| 129 M4_inst %ebp,12(%edi) | |
| 130 adcl $0,%ebx C propagate carry into cylimb | |
| 131 | |
| 132 leal 16(%esi),%esi | |
| 133 leal 16(%edi),%edi | |
| 134 decl %ecx | |
| 135 jnz L(oop) | |
| 136 | |
| 137 L(end): movl %ebx,%eax | |
| 138 | |
| 139 popl %ebp | |
| 140 popl %ebx | |
| 141 popl %esi | |
| 142 popl %edi | |
| 143 ret | |
| 144 | |
| 145 EPILOGUE() | |
| OLD | NEW |