| Index: gcc/gmp/mpn/x86/p6/lshsub_n.asm
|
| diff --git a/gcc/gmp/mpn/x86/p6/lshsub_n.asm b/gcc/gmp/mpn/x86/p6/lshsub_n.asm
|
| deleted file mode 100644
|
| index a3086bdbc20abc593776b45e5e6a0382f337a3e0..0000000000000000000000000000000000000000
|
| --- a/gcc/gmp/mpn/x86/p6/lshsub_n.asm
|
| +++ /dev/null
|
| @@ -1,158 +0,0 @@
|
| -dnl Intel P6 mpn_lshsub_n -- mpn papillion support.
|
| -
|
| -dnl Copyright 2006 Free Software Foundation, Inc.
|
| -dnl
|
| -dnl This file is part of the GNU MP Library.
|
| -dnl
|
| -dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
| -dnl it under the terms of the GNU Lesser General Public License as published
|
| -dnl by the Free Software Foundation; either version 3 of the License, or (at
|
| -dnl your option) any later version.
|
| -dnl
|
| -dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
| -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
| -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
| -dnl License for more details.
|
| -dnl
|
| -dnl You should have received a copy of the GNU Lesser General Public License
|
| -dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
|
| -
|
| -include(`../config.m4')
|
| -
|
| -C P6/13: 3.35 cycles/limb (separate mpn_sub_n + mpn_lshift needs 4.12)
|
| -
|
| -C (1) The loop is is not scheduled in any way, and scheduling attempts have not
|
| -C improved speed on P6/13. Presumably, the K7 will want scheduling, if it
|
| -C at all wants to use MMX.
|
| -C (2) We could save a register by not alternatingly using eax and edx in the
|
| -C loop.
|
| -
|
| -define(`rp', `%edi')
|
| -define(`up', `%esi')
|
| -define(`vp', `%ebx')
|
| -define(`n', `%ecx')
|
| -define(`cnt', `%mm7')
|
| -
|
| -ASM_START()
|
| -
|
| - TEXT
|
| - ALIGN(16)
|
| -
|
| -PROLOGUE(mpn_lshsub_n)
|
| - push %edi
|
| - push %esi
|
| - push %ebx
|
| -
|
| - mov 16(%esp), rp
|
| - mov 20(%esp), up
|
| - mov 24(%esp), vp
|
| - mov 28(%esp), n
|
| - mov $32, %eax
|
| - sub 32(%esp), %eax
|
| - movd %eax, cnt
|
| -
|
| - lea (up,n,4), up
|
| - lea (vp,n,4), vp
|
| - lea (rp,n,4), rp
|
| -
|
| - neg n
|
| - mov n, %eax
|
| - and $-8, n
|
| - and $7, %eax
|
| - shl %eax C eax = 2x
|
| - lea (%eax,%eax,4), %edx C edx = 10x
|
| -ifdef(`PIC',`
|
| - call L(pic_calc)
|
| -L(here):
|
| -',`
|
| - lea L(ent)(%eax,%edx,2), %eax C eax = 22x
|
| -')
|
| -
|
| - pxor %mm1, %mm1
|
| - pxor %mm0, %mm0
|
| -
|
| - jmp *%eax
|
| -
|
| -ifdef(`PIC',`
|
| -L(pic_calc):
|
| - C See mpn/x86/README about old gas bugs
|
| - lea (%eax,%edx,2), %eax
|
| - add $L(ent)-L(here), %eax
|
| - add (%esp), %eax
|
| - ret_internal
|
| -')
|
| -
|
| -L(end): C compute (cy<<cnt) | (edx>>(32-cnt))
|
| - sbb %eax, %eax
|
| - neg %eax
|
| - mov 32(%esp), %ecx
|
| - shld %cl, %edx, %eax
|
| -
|
| - emms
|
| -
|
| - pop %ebx
|
| - pop %esi
|
| - pop %edi
|
| - ret
|
| - ALIGN(16)
|
| -L(top): jecxz L(end)
|
| -L(ent): mov 0(up,n,4), %eax
|
| - sbb 0(vp,n,4), %eax
|
| - movd %eax, %mm0
|
| - punpckldq %mm0, %mm1
|
| - psrlq %mm7, %mm1
|
| - movd %mm1, 0(rp,n,4)
|
| -
|
| - mov 4(up,n,4), %edx
|
| - sbb 4(vp,n,4), %edx
|
| - movd %edx, %mm1
|
| - punpckldq %mm1, %mm0
|
| - psrlq %mm7, %mm0
|
| - movd %mm0, 4(rp,n,4)
|
| -
|
| - mov 8(up,n,4), %eax
|
| - sbb 8(vp,n,4), %eax
|
| - movd %eax, %mm0
|
| - punpckldq %mm0, %mm1
|
| - psrlq %mm7, %mm1
|
| - movd %mm1, 8(rp,n,4)
|
| -
|
| - mov 12(up,n,4), %edx
|
| - sbb 12(vp,n,4), %edx
|
| - movd %edx, %mm1
|
| - punpckldq %mm1, %mm0
|
| - psrlq %mm7, %mm0
|
| - movd %mm0, 12(rp,n,4)
|
| -
|
| - mov 16(up,n,4), %eax
|
| - sbb 16(vp,n,4), %eax
|
| - movd %eax, %mm0
|
| - punpckldq %mm0, %mm1
|
| - psrlq %mm7, %mm1
|
| - movd %mm1, 16(rp,n,4)
|
| -
|
| - mov 20(up,n,4), %edx
|
| - sbb 20(vp,n,4), %edx
|
| - movd %edx, %mm1
|
| - punpckldq %mm1, %mm0
|
| - psrlq %mm7, %mm0
|
| - movd %mm0, 20(rp,n,4)
|
| -
|
| - mov 24(up,n,4), %eax
|
| - sbb 24(vp,n,4), %eax
|
| - movd %eax, %mm0
|
| - punpckldq %mm0, %mm1
|
| - psrlq %mm7, %mm1
|
| - movd %mm1, 24(rp,n,4)
|
| -
|
| - mov 28(up,n,4), %edx
|
| - sbb 28(vp,n,4), %edx
|
| - movd %edx, %mm1
|
| - punpckldq %mm1, %mm0
|
| - psrlq %mm7, %mm0
|
| - movd %mm0, 28(rp,n,4)
|
| -
|
| - lea 8(n), n
|
| - jmp L(top)
|
| -
|
| -EPILOGUE()
|
|
|