| Index: gcc/gmp/mpn/x86/k6/mmx/logops_n.asm
|
| diff --git a/gcc/gmp/mpn/x86/k6/mmx/logops_n.asm b/gcc/gmp/mpn/x86/k6/mmx/logops_n.asm
|
| deleted file mode 100644
|
| index a6272131a2b0cca40f3fe301b0269c678b2f93a7..0000000000000000000000000000000000000000
|
| --- a/gcc/gmp/mpn/x86/k6/mmx/logops_n.asm
|
| +++ /dev/null
|
| @@ -1,215 +0,0 @@
|
| -dnl AMD K6-2 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
|
| -dnl mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
|
| -
|
| -dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
|
| -dnl
|
| -dnl This file is part of the GNU MP Library.
|
| -dnl
|
| -dnl The GNU MP Library is free software; you can redistribute it and/or
|
| -dnl modify it under the terms of the GNU Lesser General Public License as
|
| -dnl published by the Free Software Foundation; either version 3 of the
|
| -dnl License, or (at your option) any later version.
|
| -dnl
|
| -dnl The GNU MP Library is distributed in the hope that it will be useful,
|
| -dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| -dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| -dnl Lesser General Public License for more details.
|
| -dnl
|
| -dnl You should have received a copy of the GNU Lesser General Public License
|
| -dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
|
| -
|
| -include(`../config.m4')
|
| -
|
| -NAILS_SUPPORT(0-31)
|
| -
|
| -
|
| -C alignment dst/src1/src2, A=0mod8, N=4mod8
|
| -C A/A/A A/A/N A/N/A A/N/N N/A/A N/A/N N/N/A N/N/N
|
| -C
|
| -C K6-2 1.2 1.5 1.5 1.2 1.2 1.5 1.5 1.2 and,andn,ior,xor
|
| -C K6-2 1.5 1.75 2.0 1.75 1.75 2.0 1.75 1.5 iorn,xnor
|
| -C K6-2 1.75 2.0 2.0 2.0 2.0 2.0 2.0 1.75 nand,nior
|
| -C
|
| -C K6 1.5 1.68 1.75 1.2 1.75 1.75 1.68 1.5 and,andn,ior,xor
|
| -C K6 2.0 2.0 2.25 2.25 2.25 2.25 2.0 2.0 iorn,xnor
|
| -C K6 2.0 2.25 2.25 2.25 2.25 2.25 2.25 2.0 nand,nior
|
| -
|
| -
|
| -dnl M4_p and M4_i are the MMX and integer instructions
|
| -dnl M4_*_neg_dst means whether to negate the final result before writing
|
| -dnl M4_*_neg_src2 means whether to negate the src2 values before using them
|
| -
|
| -define(M4_choose_op,
|
| -m4_assert_numargs(7)
|
| -`ifdef(`OPERATION_$1',`
|
| -define(`M4_function', `mpn_$1')
|
| -define(`M4_operation', `$1')
|
| -define(`M4_p', `$2')
|
| -define(`M4_p_neg_dst', `$3')
|
| -define(`M4_p_neg_src2',`$4')
|
| -define(`M4_i', `$5')
|
| -define(`M4_i_neg_dst', `$6')
|
| -define(`M4_i_neg_src2',`$7')
|
| -')')
|
| -
|
| -dnl xnor is done in "iorn" style because it's a touch faster than "nior"
|
| -dnl style (the two are equivalent for xor).
|
| -dnl
|
| -dnl pandn can't be used with nails.
|
| -
|
| -M4_choose_op( and_n, pand,0,0, andl,0,0)
|
| -ifelse(GMP_NAIL_BITS,0,
|
| -`M4_choose_op(andn_n, pandn,0,0, andl,0,1)',
|
| -`M4_choose_op(andn_n, pand,0,1, andl,0,1)')
|
| -M4_choose_op( nand_n, pand,1,0, andl,1,0)
|
| -M4_choose_op( ior_n, por,0,0, orl,0,0)
|
| -M4_choose_op( iorn_n, por,0,1, orl,0,1)
|
| -M4_choose_op( nior_n, por,1,0, orl,1,0)
|
| -M4_choose_op( xor_n, pxor,0,0, xorl,0,0)
|
| -M4_choose_op( xnor_n, pxor,0,1, xorl,0,1)
|
| -
|
| -ifdef(`M4_function',,
|
| -`m4_error(`Unrecognised or undefined OPERATION symbol
|
| -')')
|
| -
|
| -MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
|
| -
|
| -
|
| -C void M4_function (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
|
| -C mp_size_t size);
|
| -C
|
| -C Do src1,size M4_operation src2,size, storing the result in dst,size.
|
| -C
|
| -C Unaligned movq loads and stores are a bit slower than aligned ones. The
|
| -C test at the start of the routine checks the alignment of src1 and if
|
| -C necessary processes one limb separately at the low end to make it aligned.
|
| -C
|
| -C The raw speeds without this alignment switch are as follows.
|
| -C
|
| -C alignment dst/src1/src2, A=0mod8, N=4mod8
|
| -C A/A/A A/A/N A/N/A A/N/N N/A/A N/A/N N/N/A N/N/N
|
| -C
|
| -C K6 1.5 2.0 1.5 2.0 and,andn,ior,xor
|
| -C K6 1.75 2.2 2.0 2.28 iorn,xnor
|
| -C K6 2.0 2.25 2.35 2.28 nand,nior
|
| -C
|
| -C
|
| -C Future:
|
| -C
|
| -C K6 can do one 64-bit load per cycle so each of these routines should be
|
| -C able to approach 1.0 c/l, if aligned. The basic and/andn/ior/xor might be
|
| -C able to get 1.0 with just a 4 limb loop, being 3 instructions per 2 limbs.
|
| -C The others are 4 instructions per 2 limbs, and so can only approach 1.0
|
| -C because there's nowhere to hide some loop control.
|
| -
|
| -defframe(PARAM_SIZE,16)
|
| -defframe(PARAM_SRC2,12)
|
| -defframe(PARAM_SRC1,8)
|
| -defframe(PARAM_DST, 4)
|
| -deflit(`FRAME',0)
|
| -
|
| - TEXT
|
| - ALIGN(32)
|
| -PROLOGUE(M4_function)
|
| - movl PARAM_SIZE, %ecx
|
| - pushl %ebx FRAME_pushl()
|
| -
|
| - movl PARAM_SRC1, %eax
|
| -
|
| - movl PARAM_SRC2, %ebx
|
| - cmpl $1, %ecx
|
| -
|
| - movl PARAM_DST, %edx
|
| - ja L(two_or_more)
|
| -
|
| -
|
| - movl (%ebx), %ecx
|
| - popl %ebx
|
| -ifelse(M4_i_neg_src2,1,`notl_or_xorl_GMP_NUMB_MASK( %ecx)')
|
| - M4_i (%eax), %ecx
|
| -ifelse(M4_i_neg_dst,1,` notl_or_xorl_GMP_NUMB_MASK( %ecx)')
|
| - movl %ecx, (%edx)
|
| -
|
| - ret
|
| -
|
| -
|
| -L(two_or_more):
|
| - C eax src1
|
| - C ebx src2
|
| - C ecx size
|
| - C edx dst
|
| - C esi
|
| - C edi
|
| - C ebp
|
| -
|
| - pushl %esi FRAME_pushl()
|
| - testl $4, %eax
|
| - jz L(alignment_ok)
|
| -
|
| - movl (%ebx), %esi
|
| - addl $4, %ebx
|
| -ifelse(M4_i_neg_src2,1,`notl_or_xorl_GMP_NUMB_MASK( %esi)')
|
| - M4_i (%eax), %esi
|
| - addl $4, %eax
|
| -ifelse(M4_i_neg_dst,1,` notl_or_xorl_GMP_NUMB_MASK( %esi)')
|
| - movl %esi, (%edx)
|
| - addl $4, %edx
|
| - decl %ecx
|
| -
|
| -L(alignment_ok):
|
| - movl %ecx, %esi
|
| - shrl %ecx
|
| - jnz L(still_two_or_more)
|
| -
|
| - movl (%ebx), %ecx
|
| - popl %esi
|
| -ifelse(M4_i_neg_src2,1,`notl_or_xorl_GMP_NUMB_MASK( %ecx)')
|
| - M4_i (%eax), %ecx
|
| -ifelse(M4_i_neg_dst,1,` notl_or_xorl_GMP_NUMB_MASK( %ecx)')
|
| - popl %ebx
|
| - movl %ecx, (%edx)
|
| - ret
|
| -
|
| -
|
| -L(still_two_or_more):
|
| -ifelse(eval(M4_p_neg_src2 || M4_p_neg_dst),1,`
|
| - pcmpeqd %mm7, %mm7 C all ones
|
| -ifelse(GMP_NAIL_BITS,0,,`psrld $GMP_NAIL_BITS, %mm7') C clear nails
|
| -')
|
| -
|
| - ALIGN(16)
|
| -L(top):
|
| - C eax src1
|
| - C ebx src2
|
| - C ecx counter
|
| - C edx dst
|
| - C esi
|
| - C edi
|
| - C ebp
|
| - C
|
| - C carry bit is low of size
|
| -
|
| - movq -8(%ebx,%ecx,8), %mm0
|
| -ifelse(M4_p_neg_src2,1,`pxor %mm7, %mm0')
|
| - M4_p -8(%eax,%ecx,8), %mm0
|
| -ifelse(M4_p_neg_dst,1,` pxor %mm7, %mm0')
|
| - movq %mm0, -8(%edx,%ecx,8)
|
| -
|
| - loop L(top)
|
| -
|
| -
|
| - jnc L(no_extra)
|
| -
|
| - movl -4(%ebx,%esi,4), %ebx
|
| -ifelse(M4_i_neg_src2,1,`notl_or_xorl_GMP_NUMB_MASK( %ebx)')
|
| - M4_i -4(%eax,%esi,4), %ebx
|
| -ifelse(M4_i_neg_dst,1,` notl_or_xorl_GMP_NUMB_MASK( %ebx)')
|
| - movl %ebx, -4(%edx,%esi,4)
|
| -L(no_extra):
|
| -
|
| - popl %esi
|
| - popl %ebx
|
| - emms_or_femms
|
| - ret
|
| -
|
| -EPILOGUE()
|
|
|