Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(116)

Unified Diff: gcc/gmp/mpn/x86/sqr_basecase.asm

Issue 3050029: [gcc] GCC 4.5.0=>4.5.1 (Closed) Base URL: ssh://git@gitrw.chromium.org:9222/nacl-toolchain.git
Patch Set: Created 10 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « gcc/gmp/mpn/x86/rshift.asm ('k') | gcc/gmp/mpn/x86/t-zdisp2.pl » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: gcc/gmp/mpn/x86/sqr_basecase.asm
diff --git a/gcc/gmp/mpn/x86/sqr_basecase.asm b/gcc/gmp/mpn/x86/sqr_basecase.asm
deleted file mode 100644
index 9a7e13327b8c41c91ec01d28bc0a89cad846b069..0000000000000000000000000000000000000000
--- a/gcc/gmp/mpn/x86/sqr_basecase.asm
+++ /dev/null
@@ -1,348 +0,0 @@
-dnl x86 generic mpn_sqr_basecase -- square an mpn number.
-
-dnl Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
-dnl
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-dnl
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-
-C cycles/crossproduct cycles/triangleproduct
-C P5:
-C P6:
-C K6:
-C K7:
-C P4:
-
-
-C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
-C
-C The algorithm is basically the same as mpn/generic/sqr_basecase.c, but a
-C lot of function call overheads are avoided, especially when the size is
-C small.
-C
-C The mul1 loop is not unrolled like mul_1.asm, it doesn't seem worth the
-C code size to do so here.
-C
-C Enhancements:
-C
-C The addmul loop here is also not unrolled like aorsmul_1.asm and
-C mul_basecase.asm are. Perhaps it should be done. It'd add to the
-C complexity, but if it's worth doing in the other places then it should be
-C worthwhile here.
-C
-C A fully-unrolled style like other sqr_basecase.asm versions (k6, k7, p6)
-C might be worth considering. That'd add quite a bit to the code size, but
-C only as much as is used would be dragged into L1 cache.
-
-defframe(PARAM_SIZE,12)
-defframe(PARAM_SRC, 8)
-defframe(PARAM_DST, 4)
-
- TEXT
- ALIGN(8)
-PROLOGUE(mpn_sqr_basecase)
-deflit(`FRAME',0)
-
- movl PARAM_SIZE, %edx
-
- movl PARAM_SRC, %eax
-
- cmpl $2, %edx
- movl PARAM_DST, %ecx
-
- je L(two_limbs)
- ja L(three_or_more)
-
-
-C -----------------------------------------------------------------------------
-C one limb only
- C eax src
- C ebx
- C ecx dst
- C edx
-
- movl (%eax), %eax
- mull %eax
- movl %eax, (%ecx)
- movl %edx, 4(%ecx)
- ret
-
-
-C -----------------------------------------------------------------------------
- ALIGN(8)
-L(two_limbs):
- C eax src
- C ebx
- C ecx dst
- C edx
-
- pushl %ebx
- pushl %ebp
-
- movl %eax, %ebx
- movl (%eax), %eax
-
- mull %eax C src[0]^2
-
- pushl %esi
- pushl %edi
-
- movl %edx, %esi C dst[1]
- movl %eax, (%ecx) C dst[0]
-
- movl 4(%ebx), %eax
- mull %eax C src[1]^2
-
- movl %eax, %edi C dst[2]
- movl %edx, %ebp C dst[3]
-
- movl (%ebx), %eax
- mull 4(%ebx) C src[0]*src[1]
-
- addl %eax, %esi
-
- adcl %edx, %edi
-
- adcl $0, %ebp
- addl %esi, %eax
-
- adcl %edi, %edx
- movl %eax, 4(%ecx)
-
- adcl $0, %ebp
-
- movl %edx, 8(%ecx)
- movl %ebp, 12(%ecx)
-
- popl %edi
- popl %esi
-
- popl %ebp
- popl %ebx
-
- ret
-
-
-C -----------------------------------------------------------------------------
- ALIGN(8)
-L(three_or_more):
-deflit(`FRAME',0)
- C eax src
- C ebx
- C ecx dst
- C edx size
-
- pushl %ebx FRAME_pushl()
- pushl %edi FRAME_pushl()
-
- pushl %esi FRAME_pushl()
- pushl %ebp FRAME_pushl()
-
- leal (%ecx,%edx,4), %edi C &dst[size], end of this mul1
- leal (%eax,%edx,4), %esi C &src[size]
-
-C First multiply src[0]*src[1..size-1] and store at dst[1..size].
-
- movl (%eax), %ebp C src[0], multiplier
- movl %edx, %ecx
-
- negl %ecx C -size
- xorl %ebx, %ebx C clear carry limb
-
- incl %ecx C -(size-1)
-
-L(mul1):
- C eax scratch
- C ebx carry
- C ecx counter, limbs, negative
- C edx scratch
- C esi &src[size]
- C edi &dst[size]
- C ebp multiplier
-
- movl (%esi,%ecx,4), %eax
- mull %ebp
- addl %eax, %ebx
- adcl $0, %edx
- movl %ebx, (%edi,%ecx,4)
- movl %edx, %ebx
- incl %ecx
- jnz L(mul1)
-
- movl %ebx, (%edi)
-
-
- C Add products src[n]*src[n+1..size-1] at dst[2*n-1...], for
- C n=1..size-2.
- C
- C The last products src[size-2]*src[size-1], which is the end corner
- C of the product triangle, is handled separately at the end to save
- C looping overhead. If size is 3 then it's only this that needs to
- C be done.
- C
- C In the outer loop %esi is a constant, and %edi just advances by 1
- C limb each time. The size of the operation decreases by 1 limb
- C each time.
-
- C eax
- C ebx carry (needing carry flag added)
- C ecx
- C edx
- C esi &src[size]
- C edi &dst[size]
- C ebp
-
- movl PARAM_SIZE, %ecx
- subl $3, %ecx
- jz L(corner)
-
- negl %ecx
-
-dnl re-use parameter space
-define(VAR_OUTER,`PARAM_DST')
-
-L(outer):
- C eax
- C ebx
- C ecx
- C edx outer loop counter, -(size-3) to -1
- C esi &src[size]
- C edi dst, pointing at stored carry limb of previous loop
- C ebp
-
- movl %ecx, VAR_OUTER
- addl $4, %edi C advance dst end
-
- movl -8(%esi,%ecx,4), %ebp C next multiplier
- subl $1, %ecx
-
- xorl %ebx, %ebx C initial carry limb
-
-L(inner):
- C eax scratch
- C ebx carry (needing carry flag added)
- C ecx counter, -n-1 to -1
- C edx scratch
- C esi &src[size]
- C edi dst end of this addmul
- C ebp multiplier
-
- movl (%esi,%ecx,4), %eax
- mull %ebp
- addl %ebx, %eax
- adcl $0, %edx
- addl %eax, (%edi,%ecx,4)
- adcl $0, %edx
- movl %edx, %ebx
- addl $1, %ecx
- jl L(inner)
-
-
- movl %ebx, (%edi)
- movl VAR_OUTER, %ecx
- incl %ecx
- jnz L(outer)
-
-
-L(corner):
- C esi &src[size]
- C edi &dst[2*size-3]
-
- movl -4(%esi), %eax
- mull -8(%esi) C src[size-1]*src[size-2]
- addl %eax, 0(%edi)
- adcl $0, %edx
- movl %edx, 4(%edi) C dst high limb
-
-
-C -----------------------------------------------------------------------------
-C Left shift of dst[1..2*size-2], high bit shifted out becomes dst[2*size-1].
-
- movl PARAM_SIZE, %eax
- negl %eax
- addl $1, %eax C -(size-1) and clear carry
-
-L(lshift):
- C eax counter, negative
- C ebx next limb
- C ecx
- C edx
- C esi
- C edi &dst[2*size-4]
- C ebp
-
- rcll 8(%edi,%eax,8)
- rcll 12(%edi,%eax,8)
- incl %eax
- jnz L(lshift)
-
-
- adcl %eax, %eax C high bit out
- movl %eax, 8(%edi) C dst most significant limb
-
-
-C Now add in the squares on the diagonal, namely src[0]^2, src[1]^2, ...,
-C src[size-1]^2. dst[0] hasn't yet been set at all yet, and just gets the
-C low limb of src[0]^2.
-
- movl PARAM_SRC, %esi
- movl (%esi), %eax C src[0]
- mull %eax C src[0]^2
-
- movl PARAM_SIZE, %ecx
- leal (%esi,%ecx,4), %esi C src end
-
- negl %ecx C -size
- movl %edx, %ebx C initial carry
-
- movl %eax, 12(%edi,%ecx,8) C dst[0]
- incl %ecx C -(size-1)
-
-L(diag):
- C eax scratch (low product)
- C ebx carry limb
- C ecx counter, -(size-1) to -1
- C edx scratch (high product)
- C esi &src[size]
- C edi &dst[2*size-3]
- C ebp scratch (fetched dst limbs)
-
- movl (%esi,%ecx,4), %eax
- mull %eax
-
- addl %ebx, 8(%edi,%ecx,8)
- movl %edx, %ebx
-
- adcl %eax, 12(%edi,%ecx,8)
- adcl $0, %ebx
-
- incl %ecx
- jnz L(diag)
-
-
- addl %ebx, 8(%edi) C dst most significant limb
-
- popl %ebp
- popl %esi
-
- popl %edi
- popl %ebx
-
- ret
-
-EPILOGUE()
« no previous file with comments | « gcc/gmp/mpn/x86/rshift.asm ('k') | gcc/gmp/mpn/x86/t-zdisp2.pl » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698