Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(424)

Side by Side Diff: gcc/gmp/mpn/x86_64/divrem_2.asm

Issue 3050029: [gcc] GCC 4.5.0=>4.5.1 (Closed) Base URL: ssh://git@gitrw.chromium.org:9222/nacl-toolchain.git
Patch Set: Created 10 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « gcc/gmp/mpn/x86_64/core2/gmp-mparam.h ('k') | gcc/gmp/mpn/x86_64/invert_limb.asm » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 dnl x86-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
2
3 dnl Copyright 2007, 2008 Free Software Foundation, Inc.
4
5 dnl This file is part of the GNU MP Library.
6
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of the GNU Lesser General Public License as published
9 dnl by the Free Software Foundation; either version 3 of the License, or (at
10 dnl your option) any later version.
11
12 dnl The GNU MP Library is distributed in the hope that it will be useful, but
13 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 dnl License for more details.
16
17 dnl You should have received a copy of the GNU Lesser General Public License
18 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
19
20 include(`../config.m4')
21
22
23 C norm frac
24 C K8 20 20
25 C P4 73 73
26 C P6-15 37 37
27
28 C TODO
29 C * Perhaps compute the inverse without relying on divq? Could either use
30 C Newton's method and mulq, or perhaps the faster fdiv.
31 C * The loop has not been carefully tuned, nor analysed for critical path
32 C length. It seems that 20 c/l is a bit long, compared to the 13 c/l for
33 C mpn_divrem_1.
34 C * Clean up. This code is really crude.
35
36
37 C INPUT PARAMETERS
38 define(`qp', `%rdi')
39 define(`fn', `%rsi')
40 define(`up_param', `%rdx')
41 define(`un_param', `%rcx')
42 define(`dp', `%r8')
43
44 define(`dinv', `%r9')
45
46
47 C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15
48 C cnt qp d dinv
49
50 ASM_START()
51 TEXT
52 ALIGN(16)
53 PROLOGUE(mpn_divrem_2)
54
55 push %r15
56 lea (%rdx,%rcx,8), %rax
57 push %r14
58 push %r13
59 mov %rsi, %r13
60 push %r12
61 lea -24(%rax), %r12
62 push %rbp
63 mov %rdi, %rbp
64 push %rbx
65 mov 8(%r8), %r11
66 mov -8(%rax), %r9
67 mov (%r8), %r8
68 mov -16(%rax), %r10
69 xor R32(%r15), R32(%r15)
70 cmp %r9, %r11
71 ja L(2)
72 setb %dl
73 cmp %r10, %r8
74 setbe %al
75 or %al, %dl
76 jne L(23)
77 L(2):
78 lea -3(%rcx,%r13), %rbx C un + fn - 3
79 test %rbx, %rbx
80 js L(6)
81 mov %r11, %rdx
82 mov $-1, %rax
83 not %rdx
84 div %r11
85 mov %r11, %rdx
86 mov %rax, %rdi
87 imul %rax, %rdx
88 mov %rdx, %r14
89 mul %r8
90 mov %rdx, %rcx
91 mov $-1, %rdx
92 add %r8, %r14
93 adc $0, %rdx
94 add %rcx, %r14
95 adc $0, %rdx
96 js L(8)
97 L(18):
98 dec %rdi
99 sub %r11, %r14
100 sbb $0, %rdx
101 jns L(18)
102 L(8):
103
104 C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15
105 C n2 un n1 dinv qp d0 d1 up fn msl
106 C n2 un -d1 n1 dinv XX XX
107
108 ifdef(`NEW',`
109 lea (%rbp,%rbx,8), %rbp
110 mov %rbx, %rcx C un
111 mov %r9, %rbx
112 mov %rdi, %r9 C di
113 mov %r10, %r14
114 mov %r11, %rsi
115 neg %rsi C -d1
116 ALIGN(16)
117 L(loop):
118 mov %r9, %rax C di ncp
119 mul %rbx C 0, 18
120 add %r14, %rax C 4
121 mov %rax, %r10 C q0 5
122 adc %rbx, %rdx C 5
123 mov %rdx, %rdi C q 6
124 imul %rsi, %rdx C 6
125 mov %r8, %rax C ncp
126 lea (%rdx, %r14), %rbx C n1 -= ... 7
127 mul %rdi C 7
128 xor R32(%r14), R32(%r14) C
129 cmp %rcx, %r13 C
130 jg L(19) C
131 mov (%r12), %r14 C
132 sub $8, %r12 C
133 L(19): sub %r8, %r14 C ncp
134 sbb %r11, %rbx C 9
135 sub %rax, %r14 C 11
136 sbb %rdx, %rbx C 12
137 inc %rdi C 7
138 xor R32(%rdx), R32(%rdx) C
139 cmp %r10, %rbx C 13
140 mov %r8, %rax C d1 ncp
141 adc $-1, %rdx C mask 14
142 add %rdx, %rdi C q-- 15
143 and %rdx, %rax C d0 or 0 15
144 and %r11, %rdx C d1 or 0 15
145 add %rax, %r14 C 16
146 adc %rdx, %rbx C 16
147 cmp %r11, %rbx C 17
148 jae L(fix) C
149 L(bck): mov %rdi, (%rbp) C
150 sub $8, %rbp C
151 dec %rcx
152 jns L(loop)
153
154 mov %r14, %r10
155 mov %rbx, %r9
156 ',`
157 lea (%rbp,%rbx,8), %rbp
158 mov %rbx, %rcx
159 mov %r9, %rax
160 mov %r10, %rsi
161 ALIGN(16)
162 L(loop):
163 mov %rax, %r14 C 0, 19
164 mul %rdi C 0
165 mov %r11, %r9 C 1
166 add %rsi, %rax C 4
167 mov %rax, %rbx C q0 5
168 adc %r14, %rdx C q 5
169 lea 1(%rdx), %r10 C 6
170 mov %rdx, %rax C 6
171 imul %rdx, %r9 C 6
172 sub %r9, %rsi C 10
173 xor R32(%r9), R32(%r9) C
174 mul %r8 C 7
175 cmp %rcx, %r13 C
176 jg L(13) C
177 mov (%r12), %r9 C
178 sub $8, %r12 C
179 L(13): sub %r8, %r9 C ncp
180 sbb %r11, %rsi C 11
181 sub %rax, %r9 C 11
182 sbb %rdx, %rsi C 12
183 cmp %rbx, %rsi C 13
184 sbb %rax, %rax C 14
185 not %rax C 15
186 add %rax, %r10 C 16
187 mov %r8, %rbx C ncp
188 and %rax, %rbx C 16
189 and %r11, %rax C 16
190 add %rbx, %r9 C 17
191 adc %rsi, %rax C 18
192 cmp %rax, %r11 C 19
193 jbe L(fix) C
194 L(bck): mov %r10, (%rbp) C
195 sub $8, %rbp C
196 mov %r9, %rsi C 18
197 dec %rcx
198 jns L(loop)
199
200 mov %rsi, %r10
201 mov %rax, %r9
202 ')
203 L(6):
204 mov %r10, 8(%r12)
205 mov %r9, 16(%r12)
206 pop %rbx
207 pop %rbp
208 pop %r12
209 pop %r13
210 pop %r14
211 mov %r15, %rax
212 pop %r15
213 ret
214
215 L(23): inc R32(%r15)
216 sub %r8, %r10
217 sbb %r11, %r9
218 jmp L(2)
219
220 ifdef(`NEW',`
221 L(fix): seta %dl
222 cmp %r8, %r14
223 setae %al
224 orb %dl, %al
225 je L(bck)
226 inc %rdi
227 sub %r8, %r14
228 sbb %r11, %rbx
229 jmp L(bck)
230 ',`
231 L(fix): jb L(88)
232 cmp %r8, %r9
233 jb L(bck)
234 L(88): inc %r10
235 sub %r8, %r9
236 sbb %r11, %rax
237 jmp L(bck)
238 ')
239 EPILOGUE()
OLDNEW
« no previous file with comments | « gcc/gmp/mpn/x86_64/core2/gmp-mparam.h ('k') | gcc/gmp/mpn/x86_64/invert_limb.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698