Index: gcc/gmp/mpn/x86/p6/lshsub_n.asm |
diff --git a/gcc/gmp/mpn/x86/p6/lshsub_n.asm b/gcc/gmp/mpn/x86/p6/lshsub_n.asm |
deleted file mode 100644 |
index a3086bdbc20abc593776b45e5e6a0382f337a3e0..0000000000000000000000000000000000000000 |
--- a/gcc/gmp/mpn/x86/p6/lshsub_n.asm |
+++ /dev/null |
@@ -1,158 +0,0 @@ |
-dnl Intel P6 mpn_lshsub_n -- mpn papillion support. |
- |
-dnl Copyright 2006 Free Software Foundation, Inc. |
-dnl |
-dnl This file is part of the GNU MP Library. |
-dnl |
-dnl The GNU MP Library is free software; you can redistribute it and/or modify |
-dnl it under the terms of the GNU Lesser General Public License as published |
-dnl by the Free Software Foundation; either version 3 of the License, or (at |
-dnl your option) any later version. |
-dnl |
-dnl The GNU MP Library is distributed in the hope that it will be useful, but |
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
-dnl License for more details. |
-dnl |
-dnl You should have received a copy of the GNU Lesser General Public License |
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. |
- |
-include(`../config.m4') |
- |
-C P6/13: 3.35 cycles/limb (separate mpn_sub_n + mpn_lshift needs 4.12) |
- |
-C (1) The loop is is not scheduled in any way, and scheduling attempts have not |
-C improved speed on P6/13. Presumably, the K7 will want scheduling, if it |
-C at all wants to use MMX. |
-C (2) We could save a register by not alternatingly using eax and edx in the |
-C loop. |
- |
-define(`rp', `%edi') |
-define(`up', `%esi') |
-define(`vp', `%ebx') |
-define(`n', `%ecx') |
-define(`cnt', `%mm7') |
- |
-ASM_START() |
- |
- TEXT |
- ALIGN(16) |
- |
-PROLOGUE(mpn_lshsub_n) |
- push %edi |
- push %esi |
- push %ebx |
- |
- mov 16(%esp), rp |
- mov 20(%esp), up |
- mov 24(%esp), vp |
- mov 28(%esp), n |
- mov $32, %eax |
- sub 32(%esp), %eax |
- movd %eax, cnt |
- |
- lea (up,n,4), up |
- lea (vp,n,4), vp |
- lea (rp,n,4), rp |
- |
- neg n |
- mov n, %eax |
- and $-8, n |
- and $7, %eax |
- shl %eax C eax = 2x |
- lea (%eax,%eax,4), %edx C edx = 10x |
-ifdef(`PIC',` |
- call L(pic_calc) |
-L(here): |
-',` |
- lea L(ent)(%eax,%edx,2), %eax C eax = 22x |
-') |
- |
- pxor %mm1, %mm1 |
- pxor %mm0, %mm0 |
- |
- jmp *%eax |
- |
-ifdef(`PIC',` |
-L(pic_calc): |
- C See mpn/x86/README about old gas bugs |
- lea (%eax,%edx,2), %eax |
- add $L(ent)-L(here), %eax |
- add (%esp), %eax |
- ret_internal |
-') |
- |
-L(end): C compute (cy<<cnt) | (edx>>(32-cnt)) |
- sbb %eax, %eax |
- neg %eax |
- mov 32(%esp), %ecx |
- shld %cl, %edx, %eax |
- |
- emms |
- |
- pop %ebx |
- pop %esi |
- pop %edi |
- ret |
- ALIGN(16) |
-L(top): jecxz L(end) |
-L(ent): mov 0(up,n,4), %eax |
- sbb 0(vp,n,4), %eax |
- movd %eax, %mm0 |
- punpckldq %mm0, %mm1 |
- psrlq %mm7, %mm1 |
- movd %mm1, 0(rp,n,4) |
- |
- mov 4(up,n,4), %edx |
- sbb 4(vp,n,4), %edx |
- movd %edx, %mm1 |
- punpckldq %mm1, %mm0 |
- psrlq %mm7, %mm0 |
- movd %mm0, 4(rp,n,4) |
- |
- mov 8(up,n,4), %eax |
- sbb 8(vp,n,4), %eax |
- movd %eax, %mm0 |
- punpckldq %mm0, %mm1 |
- psrlq %mm7, %mm1 |
- movd %mm1, 8(rp,n,4) |
- |
- mov 12(up,n,4), %edx |
- sbb 12(vp,n,4), %edx |
- movd %edx, %mm1 |
- punpckldq %mm1, %mm0 |
- psrlq %mm7, %mm0 |
- movd %mm0, 12(rp,n,4) |
- |
- mov 16(up,n,4), %eax |
- sbb 16(vp,n,4), %eax |
- movd %eax, %mm0 |
- punpckldq %mm0, %mm1 |
- psrlq %mm7, %mm1 |
- movd %mm1, 16(rp,n,4) |
- |
- mov 20(up,n,4), %edx |
- sbb 20(vp,n,4), %edx |
- movd %edx, %mm1 |
- punpckldq %mm1, %mm0 |
- psrlq %mm7, %mm0 |
- movd %mm0, 20(rp,n,4) |
- |
- mov 24(up,n,4), %eax |
- sbb 24(vp,n,4), %eax |
- movd %eax, %mm0 |
- punpckldq %mm0, %mm1 |
- psrlq %mm7, %mm1 |
- movd %mm1, 24(rp,n,4) |
- |
- mov 28(up,n,4), %edx |
- sbb 28(vp,n,4), %edx |
- movd %edx, %mm1 |
- punpckldq %mm1, %mm0 |
- psrlq %mm7, %mm0 |
- movd %mm0, 28(rp,n,4) |
- |
- lea 8(n), n |
- jmp L(top) |
- |
-EPILOGUE() |