Index: gcc/gmp/mpn/x86/pentium4/sse2/mod_34lsub1.asm |
diff --git a/gcc/gmp/mpn/x86/pentium4/sse2/mod_34lsub1.asm b/gcc/gmp/mpn/x86/pentium4/sse2/mod_34lsub1.asm |
deleted file mode 100644 |
index 1598b41785d5d5ff4ab5eaed32ae4b8748cf7868..0000000000000000000000000000000000000000 |
--- a/gcc/gmp/mpn/x86/pentium4/sse2/mod_34lsub1.asm |
+++ /dev/null |
@@ -1,164 +0,0 @@ |
-dnl Intel Pentium 4 mpn_mod_34lsub1 -- remainder modulo 2^24-1. |
- |
-dnl Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc. |
-dnl |
-dnl This file is part of the GNU MP Library. |
-dnl |
-dnl The GNU MP Library is free software; you can redistribute it and/or |
-dnl modify it under the terms of the GNU Lesser General Public License as |
-dnl published by the Free Software Foundation; either version 3 of the |
-dnl License, or (at your option) any later version. |
-dnl |
-dnl The GNU MP Library is distributed in the hope that it will be useful, |
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of |
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
-dnl Lesser General Public License for more details. |
-dnl |
-dnl You should have received a copy of the GNU Lesser General Public License |
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. |
- |
-include(`../config.m4') |
- |
- |
-C Pentium4: 1.0 cycles/limb |
- |
- |
-C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size) |
-C |
-C Enhancements: |
-C |
-C There might a couple of cycles to save by using plain integer code for |
-C more small sizes. 2 limbs measures about 20 cycles, but 3 limbs jumps to |
-C about 46 (inclusive of some function call overheads). |
- |
-defframe(PARAM_SIZE, 8) |
-defframe(PARAM_SRC, 4) |
- |
-dnl re-use parameter space |
-define(SAVE_EBX, `PARAM_SRC') |
-define(SAVE_ESI, `PARAM_SIZE') |
- |
- TEXT |
- ALIGN(16) |
-PROLOGUE(mpn_mod_34lsub1) |
-deflit(`FRAME',0) |
- |
- movl PARAM_SIZE, %ecx |
- movl PARAM_SRC, %edx |
- movl (%edx), %eax |
- |
- subl $2, %ecx |
- ja L(three_or_more) |
- jne L(one) |
- |
- movl 4(%edx), %edx |
- movl %eax, %ecx |
- shrl $24, %eax C src[0] high |
- |
- andl $0x00FFFFFF, %ecx C src[0] low |
- addl %ecx, %eax |
- |
- movl %edx, %ecx |
- shll $8, %edx |
- |
- shrl $16, %ecx C src[1] low |
- addl %ecx, %eax |
- |
- andl $0x00FFFF00, %edx C src[1] high |
- addl %edx, %eax |
- |
-L(one): |
- ret |
- |
- |
-L(three_or_more): |
- pxor %mm0, %mm0 |
- pxor %mm1, %mm1 |
- pxor %mm2, %mm2 |
- |
- pcmpeqd %mm7, %mm7 |
- psrlq $32, %mm7 C 0x00000000FFFFFFFF, low 32 bits |
- |
- pcmpeqd %mm6, %mm6 |
- psrlq $40, %mm6 C 0x0000000000FFFFFF, low 24 bits |
- |
-L(top): |
- C eax |
- C ebx |
- C ecx counter, size-2 to 0, -1 or -2 |
- C edx src, incrementing |
- C |
- C mm0 sum 0mod3 |
- C mm1 sum 1mod3 |
- C mm2 sum 2mod3 |
- C mm3 |
- C mm4 |
- C mm5 |
- C mm6 0x0000000000FFFFFF |
- C mm7 0x00000000FFFFFFFF |
- |
- movd (%edx), %mm3 |
- paddq %mm3, %mm0 |
- |
- movd 4(%edx), %mm3 |
- paddq %mm3, %mm1 |
- |
- movd 8(%edx), %mm3 |
- paddq %mm3, %mm2 |
- |
- addl $12, %edx |
- subl $3, %ecx |
- ja L(top) |
- |
- |
- C ecx is -2, -1 or 0 representing 0, 1 or 2 more limbs, respectively |
- |
- addl $1, %ecx |
- js L(combine) C 0 more |
- |
- movd (%edx), %mm3 |
- paddq %mm3, %mm0 |
- |
- jz L(combine) C 1 more |
- |
- movd 4(%edx), %mm3 |
- paddq %mm3, %mm1 |
- |
-L(combine): |
- movq %mm7, %mm3 C low halves |
- pand %mm0, %mm3 |
- |
- movq %mm7, %mm4 |
- pand %mm1, %mm4 |
- |
- movq %mm7, %mm5 |
- pand %mm2, %mm5 |
- |
- psrlq $32, %mm0 C high halves |
- psrlq $32, %mm1 |
- psrlq $32, %mm2 |
- |
- paddq %mm0, %mm4 C fold high halves to give 33 bits each |
- paddq %mm1, %mm5 |
- paddq %mm2, %mm3 |
- |
- psllq $8, %mm4 C combine at respective offsets |
- psllq $16, %mm5 |
- paddq %mm4, %mm3 |
- paddq %mm5, %mm3 C 0x000cxxxxxxxxxxxx, 50 bits |
- |
- pand %mm3, %mm6 C fold at 24 bits |
- psrlq $24, %mm3 |
- |
- paddq %mm6, %mm3 |
- movd %mm3, %eax |
- |
- ASSERT(z, C nothing left in high dword |
- `psrlq $32, %mm3 |
- movd %mm3, %ecx |
- orl %ecx, %ecx') |
- |
- emms |
- ret |
- |
-EPILOGUE() |