Index: gcc/gmp/mpn/x86_64/divrem_2.asm |
diff --git a/gcc/gmp/mpn/x86_64/divrem_2.asm b/gcc/gmp/mpn/x86_64/divrem_2.asm |
deleted file mode 100644 |
index 37053ba88d92abc86b51c29b824dd739e07d6df5..0000000000000000000000000000000000000000 |
--- a/gcc/gmp/mpn/x86_64/divrem_2.asm |
+++ /dev/null |
@@ -1,239 +0,0 @@ |
-dnl x86-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number. |
- |
-dnl Copyright 2007, 2008 Free Software Foundation, Inc. |
- |
-dnl This file is part of the GNU MP Library. |
- |
-dnl The GNU MP Library is free software; you can redistribute it and/or modify |
-dnl it under the terms of the GNU Lesser General Public License as published |
-dnl by the Free Software Foundation; either version 3 of the License, or (at |
-dnl your option) any later version. |
- |
-dnl The GNU MP Library is distributed in the hope that it will be useful, but |
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
-dnl License for more details. |
- |
-dnl You should have received a copy of the GNU Lesser General Public License |
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. |
- |
-include(`../config.m4') |
- |
- |
-C norm frac |
-C K8 20 20 |
-C P4 73 73 |
-C P6-15 37 37 |
- |
-C TODO |
-C * Perhaps compute the inverse without relying on divq? Could either use |
-C Newton's method and mulq, or perhaps the faster fdiv. |
-C * The loop has not been carefully tuned, nor analysed for critical path |
-C length. It seems that 20 c/l is a bit long, compared to the 13 c/l for |
-C mpn_divrem_1. |
-C * Clean up. This code is really crude. |
- |
- |
-C INPUT PARAMETERS |
-define(`qp', `%rdi') |
-define(`fn', `%rsi') |
-define(`up_param', `%rdx') |
-define(`un_param', `%rcx') |
-define(`dp', `%r8') |
- |
-define(`dinv', `%r9') |
- |
- |
-C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15 |
-C cnt qp d dinv |
- |
-ASM_START() |
- TEXT |
- ALIGN(16) |
-PROLOGUE(mpn_divrem_2) |
- |
- push %r15 |
- lea (%rdx,%rcx,8), %rax |
- push %r14 |
- push %r13 |
- mov %rsi, %r13 |
- push %r12 |
- lea -24(%rax), %r12 |
- push %rbp |
- mov %rdi, %rbp |
- push %rbx |
- mov 8(%r8), %r11 |
- mov -8(%rax), %r9 |
- mov (%r8), %r8 |
- mov -16(%rax), %r10 |
- xor R32(%r15), R32(%r15) |
- cmp %r9, %r11 |
- ja L(2) |
- setb %dl |
- cmp %r10, %r8 |
- setbe %al |
- or %al, %dl |
- jne L(23) |
-L(2): |
- lea -3(%rcx,%r13), %rbx C un + fn - 3 |
- test %rbx, %rbx |
- js L(6) |
- mov %r11, %rdx |
- mov $-1, %rax |
- not %rdx |
- div %r11 |
- mov %r11, %rdx |
- mov %rax, %rdi |
- imul %rax, %rdx |
- mov %rdx, %r14 |
- mul %r8 |
- mov %rdx, %rcx |
- mov $-1, %rdx |
- add %r8, %r14 |
- adc $0, %rdx |
- add %rcx, %r14 |
- adc $0, %rdx |
- js L(8) |
-L(18): |
- dec %rdi |
- sub %r11, %r14 |
- sbb $0, %rdx |
- jns L(18) |
-L(8): |
- |
-C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15 |
-C n2 un n1 dinv qp d0 d1 up fn msl |
-C n2 un -d1 n1 dinv XX XX |
- |
-ifdef(`NEW',` |
- lea (%rbp,%rbx,8), %rbp |
- mov %rbx, %rcx C un |
- mov %r9, %rbx |
- mov %rdi, %r9 C di |
- mov %r10, %r14 |
- mov %r11, %rsi |
- neg %rsi C -d1 |
- ALIGN(16) |
-L(loop): |
- mov %r9, %rax C di ncp |
- mul %rbx C 0, 18 |
- add %r14, %rax C 4 |
- mov %rax, %r10 C q0 5 |
- adc %rbx, %rdx C 5 |
- mov %rdx, %rdi C q 6 |
- imul %rsi, %rdx C 6 |
- mov %r8, %rax C ncp |
- lea (%rdx, %r14), %rbx C n1 -= ... 7 |
- mul %rdi C 7 |
- xor R32(%r14), R32(%r14) C |
- cmp %rcx, %r13 C |
- jg L(19) C |
- mov (%r12), %r14 C |
- sub $8, %r12 C |
-L(19): sub %r8, %r14 C ncp |
- sbb %r11, %rbx C 9 |
- sub %rax, %r14 C 11 |
- sbb %rdx, %rbx C 12 |
- inc %rdi C 7 |
- xor R32(%rdx), R32(%rdx) C |
- cmp %r10, %rbx C 13 |
- mov %r8, %rax C d1 ncp |
- adc $-1, %rdx C mask 14 |
- add %rdx, %rdi C q-- 15 |
- and %rdx, %rax C d0 or 0 15 |
- and %r11, %rdx C d1 or 0 15 |
- add %rax, %r14 C 16 |
- adc %rdx, %rbx C 16 |
- cmp %r11, %rbx C 17 |
- jae L(fix) C |
-L(bck): mov %rdi, (%rbp) C |
- sub $8, %rbp C |
- dec %rcx |
- jns L(loop) |
- |
- mov %r14, %r10 |
- mov %rbx, %r9 |
-',` |
- lea (%rbp,%rbx,8), %rbp |
- mov %rbx, %rcx |
- mov %r9, %rax |
- mov %r10, %rsi |
- ALIGN(16) |
-L(loop): |
- mov %rax, %r14 C 0, 19 |
- mul %rdi C 0 |
- mov %r11, %r9 C 1 |
- add %rsi, %rax C 4 |
- mov %rax, %rbx C q0 5 |
- adc %r14, %rdx C q 5 |
- lea 1(%rdx), %r10 C 6 |
- mov %rdx, %rax C 6 |
- imul %rdx, %r9 C 6 |
- sub %r9, %rsi C 10 |
- xor R32(%r9), R32(%r9) C |
- mul %r8 C 7 |
- cmp %rcx, %r13 C |
- jg L(13) C |
- mov (%r12), %r9 C |
- sub $8, %r12 C |
-L(13): sub %r8, %r9 C ncp |
- sbb %r11, %rsi C 11 |
- sub %rax, %r9 C 11 |
- sbb %rdx, %rsi C 12 |
- cmp %rbx, %rsi C 13 |
- sbb %rax, %rax C 14 |
- not %rax C 15 |
- add %rax, %r10 C 16 |
- mov %r8, %rbx C ncp |
- and %rax, %rbx C 16 |
- and %r11, %rax C 16 |
- add %rbx, %r9 C 17 |
- adc %rsi, %rax C 18 |
- cmp %rax, %r11 C 19 |
- jbe L(fix) C |
-L(bck): mov %r10, (%rbp) C |
- sub $8, %rbp C |
- mov %r9, %rsi C 18 |
- dec %rcx |
- jns L(loop) |
- |
- mov %rsi, %r10 |
- mov %rax, %r9 |
-') |
-L(6): |
- mov %r10, 8(%r12) |
- mov %r9, 16(%r12) |
- pop %rbx |
- pop %rbp |
- pop %r12 |
- pop %r13 |
- pop %r14 |
- mov %r15, %rax |
- pop %r15 |
- ret |
- |
-L(23): inc R32(%r15) |
- sub %r8, %r10 |
- sbb %r11, %r9 |
- jmp L(2) |
- |
-ifdef(`NEW',` |
-L(fix): seta %dl |
- cmp %r8, %r14 |
- setae %al |
- orb %dl, %al |
- je L(bck) |
- inc %rdi |
- sub %r8, %r14 |
- sbb %r11, %rbx |
- jmp L(bck) |
-',` |
-L(fix): jb L(88) |
- cmp %r8, %r9 |
- jb L(bck) |
-L(88): inc %r10 |
- sub %r8, %r9 |
- sbb %r11, %rax |
- jmp L(bck) |
-') |
-EPILOGUE() |