Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(9)

Unified Diff: gcc/gmp/mpn/x86/k7/gcd_1.asm

Issue 3050029: [gcc] GCC 4.5.0=>4.5.1 (Closed) Base URL: ssh://git@gitrw.chromium.org:9222/nacl-toolchain.git
Patch Set: Created 10 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « gcc/gmp/mpn/x86/k7/dive_1.asm ('k') | gcc/gmp/mpn/x86/k7/mmx/com_n.asm » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: gcc/gmp/mpn/x86/k7/gcd_1.asm
diff --git a/gcc/gmp/mpn/x86/k7/gcd_1.asm b/gcc/gmp/mpn/x86/k7/gcd_1.asm
deleted file mode 100644
index f912f43730411d68d3d1443fc26973b6285f0c23..0000000000000000000000000000000000000000
--- a/gcc/gmp/mpn/x86/k7/gcd_1.asm
+++ /dev/null
@@ -1,369 +0,0 @@
-dnl AMD K7 mpn_gcd_1 -- mpn by 1 gcd.
-
-dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
-dnl
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or
-dnl modify it under the terms of the GNU Lesser General Public License as
-dnl published by the Free Software Foundation; either version 3 of the
-dnl License, or (at your option) any later version.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C K7: 6.75 cycles/bit (approx) 1x1 gcd
-C 11.0 cycles/limb Nx1 reduction (modexact_1_odd)
-
-
-dnl Reduce using x%y if x is more than DIV_THRESHOLD bits bigger than y,
-dnl where x is the larger of the two. See tune/README for more.
-dnl
-dnl divl at 40 cycles compared to the gcd at about 7 cycles/bitpair
-dnl suggests 40/7*2=11.4 but 7 seems to be about right.
-
-deflit(DIV_THRESHOLD, 7)
-
-
-C table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
-C
-C This is mixed in with the code, but as per the k7 optimization manual it's
-C a full cache line and suitably aligned so it won't get swapped between
-C code and data. Having it in TEXT rather than RODATA saves needing a GOT
-C entry when PIC.
-C
-C Actually, there doesn't seem to be a measurable difference between this in
-C it's own cache line or plonked in the middle of the code. Presumably
-C since TEXT is read-only there's no worries about coherency.
-
-deflit(MASK, 63)
-deflit(MAXSHIFT, 6)
-
- TEXT
- ALIGN(64)
-L(table):
- .byte MAXSHIFT
-forloop(i,1,MASK,
-` .byte m4_count_trailing_zeros(i)
-')
-
-
-C mp_limb_t mpn_gcd_1 (mp_srcptr src, mp_size_t size, mp_limb_t limb);
-C
-
-defframe(PARAM_LIMB, 12)
-defframe(PARAM_SIZE, 8)
-defframe(PARAM_SRC, 4)
-
-defframe(SAVE_EBX, -4)
-defframe(SAVE_ESI, -8)
-defframe(SAVE_EDI, -12)
-defframe(SAVE_EBP, -16)
-defframe(CALL_DIVISOR,-20)
-defframe(CALL_SIZE, -24)
-defframe(CALL_SRC, -28)
-
-deflit(STACK_SPACE, 28)
-
- TEXT
- ALIGN(16)
-
-PROLOGUE(mpn_gcd_1)
-deflit(`FRAME',0)
-
- ASSERT(ne, `cmpl $0, PARAM_LIMB') C y!=0
- ASSERT(ae, `cmpl $1, PARAM_SIZE') C size>=1
-
- movl PARAM_SRC, %eax
- movl PARAM_LIMB, %edx
- subl $STACK_SPACE, %esp deflit(`FRAME',STACK_SPACE)
-
- movl %esi, SAVE_ESI
- movl %ebx, SAVE_EBX
-
- movl (%eax), %esi C src low limb
-
-ifdef(`PIC',`
- movl %edi, SAVE_EDI
- call L(movl_eip_to_edi)
-L(here):
- addl $L(table)-L(here), %edi
-')
-
- movl %esi, %ebx
- orl %edx, %esi C x|y
- movl $-1, %ecx
-
-L(twos):
- incl %ecx
- shrl %esi
- jnc L(twos) C 3/4 chance of x or y odd already
-
- shrl %cl, %ebx
- shrl %cl, %edx
- movl %ecx, %esi C common twos
-
- movl PARAM_SIZE, %ecx
- cmpl $1, %ecx
- ja L(divide)
-
-
- C eax
- C ebx x
- C ecx
- C edx y
- C esi common twos
- C edi [PIC] L(table)
- C ebp
-
- movl %edx, %eax
- cmpl %ebx, %edx
-
- cmovb( %ebx, %eax) C swap to make x bigger than y
- cmovb( %edx, %ebx)
-
-
-L(strip_y):
- C eax x
- C ebx y
- C ecx
- C edx
- C esi common twos
- C edi [PIC] L(table)
- C ebp
-
- ASSERT(nz,`orl %ebx,%ebx')
- shrl %ebx
- jnc L(strip_y)
- rcll %ebx
-
-
- C eax x
- C ebx y (odd)
- C ecx
- C edx
- C esi common twos
- C edi [PIC] L(table)
- C ebp
-
- movl %eax, %ecx
- movl %ebx, %edx
- shrl $DIV_THRESHOLD, %eax
-
- cmpl %eax, %ebx
- movl %ecx, %eax
- ja L(strip_x_entry) C do x%y if x much bigger than y
-
-
- xorl %edx, %edx
-
- divl %ebx
-
- orl %edx, %edx
- movl %edx, %eax C remainder -> x
- movl %ebx, %edx C y
-
- jz L(done_ebx)
- jmp L(strip_x)
-
-
- C Offset 0x9D here for non-PIC. About 0.4 cycles/bit is saved by
- C ensuring the end of the jnz at the end of this loop doesn't cross
- C into the next cache line at 0xC0.
- C
- C PIC on the other hand is offset 0xAC here and extends to 0xC9, so
- C it crosses but doesn't suffer any measurable slowdown.
-
-L(top):
- C eax x
- C ebx y-x
- C ecx x-y
- C edx y
- C esi twos, for use at end
- C edi [PIC] L(table)
-
- cmovc( %ebx, %ecx) C if x-y gave carry, use x and y-x
- cmovc( %eax, %edx)
-
-L(strip_x):
- movl %ecx, %eax
-L(strip_x_entry):
- andl $MASK, %ecx
-
- ASSERT(nz, `orl %eax, %eax')
-
-ifdef(`PIC',`
- movb (%ecx,%edi), %cl
-',`
- movb L(table) (%ecx), %cl
-')
-
- shrl %cl, %eax
- cmpb $MAXSHIFT, %cl
-
- movl %eax, %ecx
- movl %edx, %ebx
- je L(strip_x)
-
- ASSERT(nz, `testl $1, %eax') C both odd
- ASSERT(nz, `testl $1, %edx')
-
- subl %eax, %ebx
- subl %edx, %ecx
- jnz L(top)
-
-
-L(done):
- movl %esi, %ecx
- movl SAVE_ESI, %esi
-ifdef(`PIC',`
- movl SAVE_EDI, %edi
-')
-
- shll %cl, %eax
- movl SAVE_EBX, %ebx
- addl $FRAME, %esp
-
- ret
-
-
-
-C -----------------------------------------------------------------------------
-C two or more limbs
-
-dnl MODEXACT_THRESHOLD is the size at which it's better to call
-dnl mpn_modexact_1_odd than do an inline loop.
-
-deflit(MODEXACT_THRESHOLD, ifdef(`PIC',6,5))
-
-L(divide):
- C eax src
- C ebx
- C ecx size
- C edx y
- C esi common twos
- C edi [PIC] L(table)
- C ebp
-
-L(divide_strip_y):
- ASSERT(nz,`orl %edx,%edx')
- shrl %edx
- jnc L(divide_strip_y)
- leal 1(%edx,%edx), %ebx C y now odd
-
- movl %ebp, SAVE_EBP
- movl %eax, %ebp
- movl -4(%eax,%ecx,4), %eax C src high limb
-
- cmp $MODEXACT_THRESHOLD, %ecx
- jae L(modexact)
-
- cmpl %ebx, %eax C high cmp divisor
- movl $0, %edx
-
- cmovc( %eax, %edx) C skip a div if high<divisor
- sbbl $0, %ecx
-
-
-L(divide_top):
- C eax scratch (quotient)
- C ebx y
- C ecx counter (size to 1, inclusive)
- C edx carry (remainder)
- C esi common twos
- C edi [PIC] L(table)
- C ebp src
-
- movl -4(%ebp,%ecx,4), %eax
-
- divl %ebx
-
- decl %ecx
- jnz L(divide_top)
-
-
- C eax
- C ebx y (odd)
- C ecx
- C edx x
- C esi common twos
- C edi [PIC] L(table)
- C ebp
-
- orl %edx, %edx
- movl SAVE_EBP, %ebp
- movl %edx, %eax
-
- movl %edx, %ecx
- movl %ebx, %edx
- jnz L(strip_x_entry)
-
-
-L(done_ebx):
- movl %ebx, %eax
- jmp L(done)
-
-
-
-L(modexact):
- C eax
- C ebx y
- C ecx size
- C edx
- C esi common twos
- C edi [PIC] L(table)
- C ebp src
-
-ifdef(`PIC',`
- movl %ebp, CALL_SRC
- movl %ebx, %ebp C y
- movl %edi, %ebx C L(table)
-
- addl $_GLOBAL_OFFSET_TABLE_+[.-L(table)], %ebx
- movl %ebp, CALL_DIVISOR
- movl %ecx, CALL_SIZE
-
- call GSYM_PREFIX`'mpn_modexact_1_odd@PLT
-',`
-dnl non-PIC
- movl %ebx, CALL_DIVISOR
- movl %ebp, CALL_SRC
- movl %ecx, CALL_SIZE
-
- call GSYM_PREFIX`'mpn_modexact_1_odd
-')
-
- C eax x
- C ebx [non-PIC] y
- C ecx
- C edx
- C esi common twos
- C edi [PIC] L(table)
- C ebp [PIC] y
-
- orl %eax, %eax
- movl ifdef(`PIC',`%ebp',`%ebx'), %edx
- movl SAVE_EBP, %ebp
-
- movl %eax, %ecx
- jnz L(strip_x_entry)
-
- movl %edx, %eax
- jmp L(done)
-
-
-ifdef(`PIC', `
-L(movl_eip_to_edi):
- movl (%esp), %edi
- ret_internal
-')
-
-EPILOGUE()
« no previous file with comments | « gcc/gmp/mpn/x86/k7/dive_1.asm ('k') | gcc/gmp/mpn/x86/k7/mmx/com_n.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698