gcc/gmp/mpn/x86/k6/mmx/logops_n.asm - Issue 3050029: [gcc] GCC 4.5.0=>4.5.1

Unified Diff: gcc/gmp/mpn/x86/k6/mmx/logops_n.asm

Issue 3050029: [gcc] GCC 4.5.0=>4.5.1 (Closed) Base URL: ssh://git@gitrw.chromium.org:9222/nacl-toolchain.git

Patch Set: Created 10 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: gcc/gmp/mpn/x86/k6/mmx/logops_n.asm

diff --git a/gcc/gmp/mpn/x86/k6/mmx/logops_n.asm b/gcc/gmp/mpn/x86/k6/mmx/logops_n.asm

deleted file mode 100644

index a6272131a2b0cca40f3fe301b0269c678b2f93a7..0000000000000000000000000000000000000000

--- a/gcc/gmp/mpn/x86/k6/mmx/logops_n.asm

+++ /dev/null

@@ -1,215 +0,0 @@

-dnl AMD K6-2 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,

-dnl mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.

-dnl

-dnl This file is part of the GNU MP Library.

-dnl

-dnl The GNU MP Library is free software; you can redistribute it and/or

-dnl modify it under the terms of the GNU Lesser General Public License as

-dnl published by the Free Software Foundation; either version 3 of the

-dnl License, or (at your option) any later version.

-dnl

-dnl The GNU MP Library is distributed in the hope that it will be useful,

-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of

-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

-dnl Lesser General Public License for more details.

-dnl

-dnl You should have received a copy of the GNU Lesser General Public License

-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.

-include(`../config.m4')

-NAILS_SUPPORT(0-31)

-C alignment dst/src1/src2, A=0mod8, N=4mod8

-C A/A/A A/A/N A/N/A A/N/N N/A/A N/A/N N/N/A N/N/N

-C

-C K6-2 1.2 1.5 1.5 1.2 1.2 1.5 1.5 1.2 and,andn,ior,xor

-C K6-2 1.5 1.75 2.0 1.75 1.75 2.0 1.75 1.5 iorn,xnor

-C K6-2 1.75 2.0 2.0 2.0 2.0 2.0 2.0 1.75 nand,nior

-C

-C K6 1.5 1.68 1.75 1.2 1.75 1.75 1.68 1.5 and,andn,ior,xor

-C K6 2.0 2.0 2.25 2.25 2.25 2.25 2.0 2.0 iorn,xnor

-C K6 2.0 2.25 2.25 2.25 2.25 2.25 2.25 2.0 nand,nior

-dnl M4_p and M4_i are the MMX and integer instructions

-dnl M4_*_neg_dst means whether to negate the final result before writing

-dnl M4_*_neg_src2 means whether to negate the src2 values before using them

-define(M4_choose_op,

-m4_assert_numargs(7)

-`ifdef(`OPERATION_$1',`

-define(`M4_function', `mpn_$1')

-define(`M4_operation', `$1')

-define(`M4_p', `$2')

-define(`M4_p_neg_dst', `$3')

-define(`M4_p_neg_src2',`$4')

-define(`M4_i', `$5')

-define(`M4_i_neg_dst', `$6')

-define(`M4_i_neg_src2',`$7')

-')')

-dnl xnor is done in "iorn" style because it's a touch faster than "nior"

-dnl style (the two are equivalent for xor).

-dnl

-dnl pandn can't be used with nails.

-M4_choose_op( and_n, pand,0,0, andl,0,0)

-ifelse(GMP_NAIL_BITS,0,

-`M4_choose_op(andn_n, pandn,0,0, andl,0,1)',

-`M4_choose_op(andn_n, pand,0,1, andl,0,1)')

-M4_choose_op( nand_n, pand,1,0, andl,1,0)

-M4_choose_op( ior_n, por,0,0, orl,0,0)

-M4_choose_op( iorn_n, por,0,1, orl,0,1)

-M4_choose_op( nior_n, por,1,0, orl,1,0)

-M4_choose_op( xor_n, pxor,0,0, xorl,0,0)

-M4_choose_op( xnor_n, pxor,0,1, xorl,0,1)

-ifdef(`M4_function',,

-`m4_error(`Unrecognised or undefined OPERATION symbol

-')')

-MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)

-C void M4_function (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,

-C mp_size_t size);

-C

-C Do src1,size M4_operation src2,size, storing the result in dst,size.

-C

-C Unaligned movq loads and stores are a bit slower than aligned ones. The

-C test at the start of the routine checks the alignment of src1 and if

-C necessary processes one limb separately at the low end to make it aligned.

-C

-C The raw speeds without this alignment switch are as follows.

-C

-C alignment dst/src1/src2, A=0mod8, N=4mod8

-C A/A/A A/A/N A/N/A A/N/N N/A/A N/A/N N/N/A N/N/N

-C

-C K6 1.5 2.0 1.5 2.0 and,andn,ior,xor

-C K6 1.75 2.2 2.0 2.28 iorn,xnor

-C K6 2.0 2.25 2.35 2.28 nand,nior

-C

-C Future:

-C

-C K6 can do one 64-bit load per cycle so each of these routines should be

-C able to approach 1.0 c/l, if aligned. The basic and/andn/ior/xor might be

-C able to get 1.0 with just a 4 limb loop, being 3 instructions per 2 limbs.

-C The others are 4 instructions per 2 limbs, and so can only approach 1.0

-C because there's nowhere to hide some loop control.

-defframe(PARAM_SIZE,16)

-defframe(PARAM_SRC2,12)

-defframe(PARAM_SRC1,8)

-defframe(PARAM_DST, 4)

-deflit(`FRAME',0)

- TEXT

- ALIGN(32)

-PROLOGUE(M4_function)

- movl PARAM_SIZE, %ecx

- pushl %ebx FRAME_pushl()

- movl PARAM_SRC1, %eax

- movl PARAM_SRC2, %ebx

- cmpl $1, %ecx

- movl PARAM_DST, %edx

- ja L(two_or_more)

- movl (%ebx), %ecx

- popl %ebx

-ifelse(M4_i_neg_src2,1,`notl_or_xorl_GMP_NUMB_MASK( %ecx)')

- M4_i (%eax), %ecx

-ifelse(M4_i_neg_dst,1,` notl_or_xorl_GMP_NUMB_MASK( %ecx)')

- movl %ecx, (%edx)

- ret

-L(two_or_more):

- C eax src1

- C ebx src2

- C ecx size

- C edx dst

- C esi

- C edi

- C ebp

- pushl %esi FRAME_pushl()

- testl $4, %eax

- jz L(alignment_ok)

- movl (%ebx), %esi

- addl $4, %ebx

-ifelse(M4_i_neg_src2,1,`notl_or_xorl_GMP_NUMB_MASK( %esi)')

- M4_i (%eax), %esi

- addl $4, %eax

-ifelse(M4_i_neg_dst,1,` notl_or_xorl_GMP_NUMB_MASK( %esi)')

- movl %esi, (%edx)

- addl $4, %edx

- decl %ecx

-L(alignment_ok):

- movl %ecx, %esi

- shrl %ecx

- jnz L(still_two_or_more)

- movl (%ebx), %ecx

- popl %esi

-ifelse(M4_i_neg_src2,1,`notl_or_xorl_GMP_NUMB_MASK( %ecx)')

- M4_i (%eax), %ecx

-ifelse(M4_i_neg_dst,1,` notl_or_xorl_GMP_NUMB_MASK( %ecx)')

- popl %ebx

- movl %ecx, (%edx)

- ret

-L(still_two_or_more):

-ifelse(eval(M4_p_neg_src2 || M4_p_neg_dst),1,`

- pcmpeqd %mm7, %mm7 C all ones

-ifelse(GMP_NAIL_BITS,0,,`psrld $GMP_NAIL_BITS, %mm7') C clear nails

-')

- ALIGN(16)

-L(top):

- C eax src1

- C ebx src2

- C ecx counter

- C edx dst

- C esi

- C edi

- C ebp

- C

- C carry bit is low of size

- movq -8(%ebx,%ecx,8), %mm0

-ifelse(M4_p_neg_src2,1,`pxor %mm7, %mm0')

- M4_p -8(%eax,%ecx,8), %mm0

-ifelse(M4_p_neg_dst,1,` pxor %mm7, %mm0')

- movq %mm0, -8(%edx,%ecx,8)

- loop L(top)

- jnc L(no_extra)

- movl -4(%ebx,%esi,4), %ebx

-ifelse(M4_i_neg_src2,1,`notl_or_xorl_GMP_NUMB_MASK( %ebx)')

- M4_i -4(%eax,%esi,4), %ebx

-ifelse(M4_i_neg_dst,1,` notl_or_xorl_GMP_NUMB_MASK( %ebx)')

- movl %ebx, -4(%edx,%esi,4)

-L(no_extra):

- popl %esi

- popl %ebx

- emms_or_femms

- ret

-EPILOGUE()

« no previous file with comments | « gcc/gmp/mpn/x86/k6/mmx/dive_1.asm ('k') | gcc/gmp/mpn/x86/k6/mode1o.asm » ('j') | no next file with comments »