gcc/gmp/mpn/x86/pentium/lshift.asm - Issue 3050029: [gcc] GCC 4.5.0=>4.5.1

Unified Diff: gcc/gmp/mpn/x86/pentium/lshift.asm

Issue 3050029: [gcc] GCC 4.5.0=>4.5.1 (Closed) Base URL: ssh://git@gitrw.chromium.org:9222/nacl-toolchain.git

Patch Set: Created 10 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: gcc/gmp/mpn/x86/pentium/lshift.asm

diff --git a/gcc/gmp/mpn/x86/pentium/lshift.asm b/gcc/gmp/mpn/x86/pentium/lshift.asm

deleted file mode 100644

index ece51e06d3a855481d64290a3e791e031d7237e8..0000000000000000000000000000000000000000

--- a/gcc/gmp/mpn/x86/pentium/lshift.asm

+++ /dev/null

@@ -1,233 +0,0 @@

-dnl Intel Pentium mpn_lshift -- mpn left shift.

-dnl Foundation, Inc.

-dnl

-dnl This file is part of the GNU MP Library.

-dnl

-dnl The GNU MP Library is free software; you can redistribute it and/or

-dnl modify it under the terms of the GNU Lesser General Public License as

-dnl published by the Free Software Foundation; either version 3 of the

-dnl License, or (at your option) any later version.

-dnl

-dnl The GNU MP Library is distributed in the hope that it will be useful,

-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of

-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

-dnl Lesser General Public License for more details.

-dnl

-dnl You should have received a copy of the GNU Lesser General Public License

-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.

-include(`../config.m4')

-C cycles/limb

-C P5,P54: 6.0

-C P55: 5.375

-C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,

-C unsigned shift);

-C

-C The main shift-by-N loop should run at 5.375 c/l and that's what P55 does,

-C but P5 and P54 run only at 6.0 c/l, which is 4 cycles lost somewhere.

-defframe(PARAM_SHIFT,16)

-defframe(PARAM_SIZE, 12)

-defframe(PARAM_SRC, 8)

-defframe(PARAM_DST, 4)

- TEXT

- ALIGN(8)

-PROLOGUE(mpn_lshift)

- pushl %edi

- pushl %esi

- pushl %ebx

- pushl %ebp

-deflit(`FRAME',16)

- movl PARAM_DST,%edi

- movl PARAM_SRC,%esi

- movl PARAM_SIZE,%ebp

- movl PARAM_SHIFT,%ecx

-C We can use faster code for shift-by-1 under certain conditions.

- cmp $1,%ecx

- jne L(normal)

- leal 4(%esi),%eax

- cmpl %edi,%eax

- jnc L(special) C jump if s_ptr + 1 >= res_ptr

- leal (%esi,%ebp,4),%eax

- cmpl %eax,%edi

- jnc L(special) C jump if res_ptr >= s_ptr + size

-L(normal):

- leal -4(%edi,%ebp,4),%edi

- leal -4(%esi,%ebp,4),%esi

- movl (%esi),%edx

- subl $4,%esi

- xorl %eax,%eax

- shldl( %cl, %edx, %eax) C compute carry limb

- pushl %eax C push carry limb onto stack

- decl %ebp

- pushl %ebp

- shrl $3,%ebp

- jz L(end)

- movl (%edi),%eax C fetch destination cache line

- ALIGN(4)

-L(oop): movl -28(%edi),%eax C fetch destination cache line

- movl %edx,%ebx

- movl (%esi),%eax

- movl -4(%esi),%edx

- shldl( %cl, %eax, %ebx)

- shldl( %cl, %edx, %eax)

- movl %ebx,(%edi)

- movl %eax,-4(%edi)

- movl -8(%esi),%ebx

- movl -12(%esi),%eax

- shldl( %cl, %ebx, %edx)

- shldl( %cl, %eax, %ebx)

- movl %edx,-8(%edi)

- movl %ebx,-12(%edi)

- movl -16(%esi),%edx

- movl -20(%esi),%ebx

- shldl( %cl, %edx, %eax)

- shldl( %cl, %ebx, %edx)

- movl %eax,-16(%edi)

- movl %edx,-20(%edi)

- movl -24(%esi),%eax

- movl -28(%esi),%edx

- shldl( %cl, %eax, %ebx)

- shldl( %cl, %edx, %eax)

- movl %ebx,-24(%edi)

- movl %eax,-28(%edi)

- subl $32,%esi

- subl $32,%edi

- decl %ebp

- jnz L(oop)

-L(end): popl %ebp

- andl $7,%ebp

- jz L(end2)

-L(oop2):

- movl (%esi),%eax

- shldl( %cl,%eax,%edx)

- movl %edx,(%edi)

- movl %eax,%edx

- subl $4,%esi

- subl $4,%edi

- decl %ebp

- jnz L(oop2)

-L(end2):

- shll %cl,%edx C compute least significant limb

- movl %edx,(%edi) C store it

- popl %eax C pop carry limb

- popl %ebp

- popl %ebx

- popl %esi

- popl %edi

- ret

-C We loop from least significant end of the arrays, which is only

-C permissable if the source and destination don't overlap, since the

-C function is documented to work for overlapping source and destination.

-L(special):

- movl (%esi),%edx

- addl $4,%esi

- decl %ebp

- pushl %ebp

- shrl $3,%ebp

- addl %edx,%edx

- incl %ebp

- decl %ebp

- jz L(Lend)

- movl (%edi),%eax C fetch destination cache line

- ALIGN(4)

-L(Loop):

- movl 28(%edi),%eax C fetch destination cache line

- movl %edx,%ebx

- movl (%esi),%eax

- movl 4(%esi),%edx

- adcl %eax,%eax

- movl %ebx,(%edi)

- adcl %edx,%edx

- movl %eax,4(%edi)

- movl 8(%esi),%ebx

- movl 12(%esi),%eax

- adcl %ebx,%ebx

- movl %edx,8(%edi)

- adcl %eax,%eax

- movl %ebx,12(%edi)

- movl 16(%esi),%edx

- movl 20(%esi),%ebx

- adcl %edx,%edx

- movl %eax,16(%edi)

- adcl %ebx,%ebx

- movl %edx,20(%edi)

- movl 24(%esi),%eax

- movl 28(%esi),%edx

- adcl %eax,%eax

- movl %ebx,24(%edi)

- adcl %edx,%edx

- movl %eax,28(%edi)

- leal 32(%esi),%esi C use leal not to clobber carry

- leal 32(%edi),%edi

- decl %ebp

- jnz L(Loop)

-L(Lend):

- popl %ebp

- sbbl %eax,%eax C save carry in %eax

- andl $7,%ebp

- jz L(Lend2)

- addl %eax,%eax C restore carry from eax

-L(Loop2):

- movl %edx,%ebx

- movl (%esi),%edx

- adcl %edx,%edx

- movl %ebx,(%edi)

- leal 4(%esi),%esi C use leal not to clobber carry

- leal 4(%edi),%edi

- decl %ebp

- jnz L(Loop2)

- jmp L(L1)

-L(Lend2):

- addl %eax,%eax C restore carry from eax

-L(L1): movl %edx,(%edi) C store last limb

- sbbl %eax,%eax

- negl %eax

- popl %ebp

- popl %ebx

- popl %esi

- popl %edi

- ret

-EPILOGUE()

« no previous file with comments | « gcc/gmp/mpn/x86/pentium/hamdist.asm ('k') | gcc/gmp/mpn/x86/pentium/mmx/hamdist.asm » ('j') | no next file with comments »