Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(449)

Side by Side Diff: gcc/gmp/mpn/x86/k7/mod_34lsub1.asm

Issue 3050029: [gcc] GCC 4.5.0=>4.5.1 (Closed) Base URL: ssh://git@gitrw.chromium.org:9222/nacl-toolchain.git
Patch Set: Created 10 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « gcc/gmp/mpn/x86/k7/mmx/mod_1.asm ('k') | gcc/gmp/mpn/x86/mod_1.asm » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 dnl AMD K7 mpn_mod_34lsub1 -- remainder modulo 2^24-1.
2
3 dnl Copyright 2000, 2001, 2002, 2004, 2005, 2008 Free Software Foundation,
4 dnl Inc.
5 dnl
6 dnl This file is part of the GNU MP Library.
7 dnl
8 dnl The GNU MP Library is free software; you can redistribute it and/or
9 dnl modify it under the terms of the GNU Lesser General Public License as
10 dnl published by the Free Software Foundation; either version 3 of the
11 dnl License, or (at your option) any later version.
12 dnl
13 dnl The GNU MP Library is distributed in the hope that it will be useful,
14 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
15 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 dnl Lesser General Public License for more details.
17 dnl
18 dnl You should have received a copy of the GNU Lesser General Public License
19 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
20
21 include(`../config.m4')
22
23
24 C cycles/limb
25 C Athlon: 1
26 C Hammer: 1
27
28
29 C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
30 C
31 C The loop form below and the 64 byte code alignment seem necessary for the
32 C claimed speed. This is a bit strange, since normally k7 isn't very
33 C sensitive to such things. Perhaps there has to be 6 instructions in the
34 C first 16 bytes for the BTB entry or something.
35
36 defframe(PARAM_SIZE, 8)
37 defframe(PARAM_SRC, 4)
38
39 dnl re-use parameter space
40 define(SAVE_EDI, `PARAM_SIZE')
41
42 TEXT
43 ALIGN(64)
44 PROLOGUE(mpn_mod_34lsub1)
45 deflit(`FRAME',0)
46
47 movl PARAM_SIZE, %ecx
48 movl PARAM_SRC, %edx
49
50 subl $2, %ecx
51 ja L(three_or_more)
52
53 movl (%edx), %eax
54 jb L(one)
55
56 movl 4(%edx), %ecx
57 movl %eax, %edx
58 shrl $24, %eax C src[0] low
59
60 andl $0xFFFFFF, %edx C src[0] high
61 addl %edx, %eax
62 movl %ecx, %edx
63
64 andl $0xFFFF, %ecx
65 shrl $16, %edx C src[1] high
66 addl %edx, %eax
67
68 shll $8, %ecx C src[1] low
69 addl %ecx, %eax
70
71 L(one):
72 ret
73
74
75 L(three_or_more):
76 C eax
77 C ebx
78 C ecx size-2
79 C edx src
80 C esi
81 C edi
82
83 pushl %ebx FRAME_pushl()
84 xorl %eax, %eax
85 xorl %ebx, %ebx
86
87 movl %edi, SAVE_EDI
88 pushl %esi FRAME_pushl()
89 xorl %esi, %esi C and clear carry flag
90
91
92 C code offset 0x40 at this point
93 L(top):
94 C eax acc 0mod3
95 C ebx acc 1mod3
96 C ecx counter, limbs
97 C edx src
98 C esi acc 2mod3
99 C edi
100
101 leal 24(%edx), %edx
102 leal -2(%ecx), %ecx
103 adcl -24(%edx), %eax
104 adcl -20(%edx), %ebx
105 adcl -16(%edx), %esi
106
107 decl %ecx
108 jng L(done_loop)
109
110 leal -2(%ecx), %ecx
111 adcl -12(%edx), %eax
112 adcl -8(%edx), %ebx
113 adcl -4(%edx), %esi
114
115 decl %ecx
116 jg L(top)
117
118
119 leal 12(%edx), %edx
120
121
122 L(done_loop):
123 C ecx is -2, -1 or 0 representing 0, 1 or 2 more limbs, respectively
124
125 incl %ecx
126 movl $0xFFFFFFFF, %edi
127 js L(combine)
128
129 adcl -12(%edx), %eax
130 decl %ecx
131 movl $0xFFFFFF00, %edi
132 js L(combine)
133
134 adcl -8(%edx), %ebx
135 movl $0xFFFF0000, %edi
136
137
138 L(combine):
139 C eax acc 0mod3
140 C ebx acc 1mod3
141 C ecx
142 C edx
143 C esi acc 2mod3
144 C edi mask
145
146 sbbl %ecx, %ecx C carry
147 movl %eax, %edx C 0mod3
148 shrl $24, %eax C 0mod3 high
149
150 andl %edi, %ecx C carry masked
151 andl $0x00FFFFFF, %edx C 0mod3 low
152 movl %ebx, %edi C 1mod3
153
154 subl %ecx, %eax C apply carry
155 shrl $16, %ebx C 1mod3 high
156 andl $0xFFFF, %edi
157
158 addl %edx, %eax C apply 0mod3 low
159 movl %esi, %edx C 2mod3
160 shll $8, %edi C 1mod3 low
161
162 addl %ebx, %eax C apply 1mod3 high
163 shrl $8, %esi C 2mod3 high
164 movzbl %dl, %edx C 2mod3 low
165
166 addl %edi, %eax C apply 1mod3 low
167 shll $16, %edx C 2mod3 low
168
169 addl %esi, %eax C apply 2mod3 high
170 popl %esi FRAME_popl()
171
172 movl SAVE_EDI, %edi
173 addl %edx, %eax C apply 2mod3 low
174 popl %ebx FRAME_popl()
175
176 ret
177
178 EPILOGUE()
OLDNEW
« no previous file with comments | « gcc/gmp/mpn/x86/k7/mmx/mod_1.asm ('k') | gcc/gmp/mpn/x86/mod_1.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698