Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(11)

Side by Side Diff: gcc/gmp/mpn/alpha/divrem_2.asm

Issue 3050029: [gcc] GCC 4.5.0=>4.5.1 (Closed) Base URL: ssh://git@gitrw.chromium.org:9222/nacl-toolchain.git
Patch Set: Created 10 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « gcc/gmp/mpn/alpha/diveby3.asm ('k') | gcc/gmp/mpn/alpha/ev5/add_n.asm » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 dnl Alpha mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
2
3 dnl Copyright 2007, 2008 Free Software Foundation, Inc.
4
5 dnl This file is part of the GNU MP Library.
6
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of the GNU Lesser General Public License as published
9 dnl by the Free Software Foundation; either version 3 of the License, or (at
10 dnl your option) any later version.
11
12 dnl The GNU MP Library is distributed in the hope that it will be useful, but
13 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 dnl License for more details.
16
17 dnl You should have received a copy of the GNU Lesser General Public License
18 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
19
20 include(`../config.m4')
21
22 C norm frac
23 C ev4
24 C ev5 70 70
25 C ev6 29 29
26
27 C TODO
28 C * Perhaps inline mpn_invert_limb, that would allow us to not save/restore
29 C any registers (thus save ~10 cycles per call).
30 C * Use negated d1 and/or d0 to speed carry propagation. Might save a cycle
31 C or two.
32 C * Check cluster delays (for ev6). We very likely could save some cycles.
33 C * Use branch-free code for computing di.
34 C * CAVEAT: We rely on r19 not being clobbered by mpn_invert_limb call.
35
36 C INPUT PARAMETERS
37 define(`qp', `r16')
38 define(`fn', `r17')
39 define(`up_param', `r18')
40 define(`un_param', `r19')
41 define(`dp', `r20')
42
43 ASM_START()
44 PROLOGUE(mpn_divrem_2)
45 ldgp r29, 0(r27)
46 lda r30, -80(r30)
47 stq r26, 0(r30)
48 stq r9, 8(r30)
49 stq r10, 16(r30)
50 stq r11, 24(r30)
51 stq r12, 32(r30)
52 stq r13, 40(r30)
53 C stq r14, 48(r30)
54 stq r15, 56(r30)
55 .prologue 1
56 stq r16, 64(r30)
57 bis r31, r17, r15
58 s8addq r19, r18, r13
59 lda r13, -24(r13)
60 ldq r12, 8(r20)
61 ldq r10, 0(r20)
62 ldq r11, 16(r13)
63 ldq r9, 8(r13)
64
65 bis r31, r31, r3 C most_significant_q_limb = 0
66 cmpult r11, r12, r1
67 bne r1, L(L8)
68 cmpule r11, r12, r1
69 cmpult r9, r10, r2
70 and r1, r2, r1
71 bne r1, L(L8)
72 subq r11, r12, r11
73 subq r11, r2, r11
74 subq r9, r10, r9
75 lda r3, 1(r31) C most_significant_q_limb = 1
76 L(L8): stq r3, 72(r30)
77
78 addq r15, r19, r19
79 lda r19, -3(r19)
80 blt r19, L(L10)
81 bis r31, r12, r16
82 jsr r26, mpn_invert_limb
83 ldgp r29, 0(r26)
84 mulq r0, r12, r4 C t0 = LO(di * d1)
85 umulh r0, r10, r2 C s1 = HI(di * d0)
86 addq r4, r10, r4 C t0 += d0
87 cmpule r10, r4, r7 C (t0 < d0)
88 addq r4, r2, r4 C t0 += s1
89 cmpult r4, r2, r1
90 subq r1, r7, r7 C t1 (-1, 0, or 1)
91 blt r7, L(L42)
92 L(L22):
93 lda r0, -1(r0) C di--
94 cmpult r4, r12, r1 C cy for: t0 -= d1 (below)
95 subq r7, r1, r7 C t1 -= cy
96 subq r4, r12, r4 C t0 -= d1
97 bge r7, L(L22)
98 L(L42):
99 ldq r16, 64(r30)
100 s8addq r19, r16, r16
101 ALIGN(16)
102 L(loop):
103 mulq r11, r0, r5 C q0 (early)
104 umulh r11, r0, r6 C q (early)
105 addq r5, r9, r8 C q0 += n1
106 addq r6, r11, r6 C q += n2
107 cmpult r8, r5, r1 C cy for: q0 += n1
108 addq r6, r1, r6 C q += cy
109 unop
110 mulq r12, r6, r1 C LO(d1 * q)
111 umulh r10, r6, r7 C t1 = HI(d0 * q)
112 subq r9, r1, r9 C n1 -= LO(d1 * q)
113 mulq r10, r6, r4 C t0 = LO(d0 * q)
114 unop
115 cmple r15, r19, r5 C condition and n0...
116 beq r5, L(L31)
117 ldq r5, 0(r13)
118 lda r13, -8(r13)
119 L(L31): subq r9, r12, r9 C n1 -= d1
120 cmpult r5, r10, r1 C
121 subq r9, r1, r9 C
122 subq r5, r10, r5 C n0 -= d0
123 subq r9, r7, r9 C n1 -= t0
124 cmpult r5, r4, r1 C
125 subq r9, r1, r2 C
126 subq r5, r4, r5 C n0 -= t1
127 cmpult r2, r8, r1 C (n1 < q0)
128 addq r6, r1, r6 C q += cond
129 lda r1, -1(r1) C -(n1 >= q0)
130 and r1, r10, r4 C
131 addq r5, r4, r9 C n0 += mask & d0
132 and r1, r12, r1 C
133 cmpult r9, r5, r11 C cy for: n0 += mask & d0
134 addq r2, r1, r1 C n1 += mask & d1
135 addq r1, r11, r11 C n1 += cy
136 cmpult r11, r12, r1 C
137 beq r1, L(fix) C
138 L(bck): stq r6, 0(r16)
139 lda r16, -8(r16)
140 lda r19, -1(r19)
141 bge r19, L(loop)
142
143 L(L10): stq r9, 8(r13)
144 stq r11, 16(r13)
145 ldq r0, 72(r30)
146 ldq r26, 0(r30)
147 ldq r9, 8(r30)
148 ldq r10, 16(r30)
149 ldq r11, 24(r30)
150 ldq r12, 32(r30)
151 ldq r13, 40(r30)
152 C ldq r14, 48(r30)
153 ldq r15, 56(r30)
154 lda r30, 80(r30)
155 ret r31, (r26), 1
156
157 L(fix): cmpule r11, r12, r1
158 cmpult r9, r10, r2
159 and r1, r2, r1
160 bne r1, L(bck)
161 subq r11, r12, r11
162 subq r11, r2, r11
163 subq r9, r10, r9
164 lda r6, 1(r6)
165 br L(bck)
166 EPILOGUE()
167 ASM_END()
OLDNEW
« no previous file with comments | « gcc/gmp/mpn/alpha/diveby3.asm ('k') | gcc/gmp/mpn/alpha/ev5/add_n.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698