Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(220)

Side by Side Diff: simd/jcsample-mmx.asm

Issue 1953443002: Update to libjpeg_turbo 1.4.90 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libjpeg_turbo.git@master
Patch Set: Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « simd/jcsample-altivec.c ('k') | simd/jcsample-sse2.asm » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 ;
2 ; jcsample.asm - downsampling (MMX)
3 ;
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 ;
6 ; Based on
7 ; x86 SIMD extension for IJG JPEG library
8 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
10 ;
11 ; This file should be assembled with NASM (Netwide Assembler),
12 ; can *not* be assembled with Microsoft's MASM or any compatible
13 ; assembler (including Borland's Turbo Assembler).
14 ; NASM is available from http://nasm.sourceforge.net/ or
15 ; http://sourceforge.net/project/showfiles.php?group_id=6208
16 ;
17 ; [TAB8]
18
19 %include "jsimdext.inc"
20
21 ; --------------------------------------------------------------------------
22 SECTION SEG_TEXT
23 BITS 32
24 ;
25 ; Downsample pixel values of a single component.
26 ; This version handles the common case of 2:1 horizontal and 1:1 vertical,
27 ; without smoothing.
28 ;
29 ; GLOBAL(void)
30 ; jsimd_h2v1_downsample_mmx (JDIMENSION image_width, int max_v_samp_factor,
31 ; JDIMENSION v_samp_factor, JDIMENSION width_blocks,
32 ; JSAMPARRAY input_data, JSAMPARRAY output_data);
33 ;
34
35 %define img_width(b) (b)+8 ; JDIMENSION image_width
36 %define max_v_samp(b) (b)+12 ; int max_v_samp_factor
37 %define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor
38 %define width_blks(b) (b)+20 ; JDIMENSION width_blocks
39 %define input_data(b) (b)+24 ; JSAMPARRAY input_data
40 %define output_data(b) (b)+28 ; JSAMPARRAY output_data
41
42 align 16
43 global EXTN(jsimd_h2v1_downsample_mmx)
44
45 EXTN(jsimd_h2v1_downsample_mmx):
46 push ebp
47 mov ebp,esp
48 ; push ebx ; unused
49 ; push ecx ; need not be preserved
50 ; push edx ; need not be preserved
51 push esi
52 push edi
53
54 mov ecx, JDIMENSION [width_blks(ebp)]
55 shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols)
56 jz near .return
57
58 mov edx, JDIMENSION [img_width(ebp)]
59
60 ; -- expand_right_edge
61
62 push ecx
63 shl ecx,1 ; output_cols * 2
64 sub ecx,edx
65 jle short .expand_end
66
67 mov eax, INT [max_v_samp(ebp)]
68 test eax,eax
69 jle short .expand_end
70
71 cld
72 mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
73 alignx 16,7
74 .expandloop:
75 push eax
76 push ecx
77
78 mov edi, JSAMPROW [esi]
79 add edi,edx
80 mov al, JSAMPLE [edi-1]
81
82 rep stosb
83
84 pop ecx
85 pop eax
86
87 add esi, byte SIZEOF_JSAMPROW
88 dec eax
89 jg short .expandloop
90
91 .expand_end:
92 pop ecx ; output_cols
93
94 ; -- h2v1_downsample
95
96 mov eax, JDIMENSION [v_samp(ebp)] ; rowctr
97 test eax,eax
98 jle near .return
99
100 mov edx, 0x00010000 ; bias pattern
101 movd mm7,edx
102 pcmpeqw mm6,mm6
103 punpckldq mm7,mm7 ; mm7={0, 1, 0, 1}
104 psrlw mm6,BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..}
105
106 mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
107 mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
108 alignx 16,7
109 .rowloop:
110 push ecx
111 push edi
112 push esi
113
114 mov esi, JSAMPROW [esi] ; inptr
115 mov edi, JSAMPROW [edi] ; outptr
116 alignx 16,7
117 .columnloop:
118
119 movq mm0, MMWORD [esi+0*SIZEOF_MMWORD]
120 movq mm1, MMWORD [esi+1*SIZEOF_MMWORD]
121 movq mm2,mm0
122 movq mm3,mm1
123
124 pand mm0,mm6
125 psrlw mm2,BYTE_BIT
126 pand mm1,mm6
127 psrlw mm3,BYTE_BIT
128
129 paddw mm0,mm2
130 paddw mm1,mm3
131 paddw mm0,mm7
132 paddw mm1,mm7
133 psrlw mm0,1
134 psrlw mm1,1
135
136 packuswb mm0,mm1
137
138 movq MMWORD [edi+0*SIZEOF_MMWORD], mm0
139
140 add esi, byte 2*SIZEOF_MMWORD ; inptr
141 add edi, byte 1*SIZEOF_MMWORD ; outptr
142 sub ecx, byte SIZEOF_MMWORD ; outcol
143 jnz short .columnloop
144
145 pop esi
146 pop edi
147 pop ecx
148
149 add esi, byte SIZEOF_JSAMPROW ; input_data
150 add edi, byte SIZEOF_JSAMPROW ; output_data
151 dec eax ; rowctr
152 jg short .rowloop
153
154 emms ; empty MMX state
155
156 .return:
157 pop edi
158 pop esi
159 ; pop edx ; need not be preserved
160 ; pop ecx ; need not be preserved
161 ; pop ebx ; unused
162 pop ebp
163 ret
164
165 ; --------------------------------------------------------------------------
166 ;
167 ; Downsample pixel values of a single component.
168 ; This version handles the standard case of 2:1 horizontal and 2:1 vertical,
169 ; without smoothing.
170 ;
171 ; GLOBAL(void)
172 ; jsimd_h2v2_downsample_mmx (JDIMENSION image_width, int max_v_samp_factor,
173 ; JDIMENSION v_samp_factor, JDIMENSION width_blocks,
174 ; JSAMPARRAY input_data, JSAMPARRAY output_data);
175 ;
176
177 %define img_width(b) (b)+8 ; JDIMENSION image_width
178 %define max_v_samp(b) (b)+12 ; int max_v_samp_factor
179 %define v_samp(b) (b)+16 ; JDIMENSION v_samp_factor
180 %define width_blks(b) (b)+20 ; JDIMENSION width_blocks
181 %define input_data(b) (b)+24 ; JSAMPARRAY input_data
182 %define output_data(b) (b)+28 ; JSAMPARRAY output_data
183
184 align 16
185 global EXTN(jsimd_h2v2_downsample_mmx)
186
187 EXTN(jsimd_h2v2_downsample_mmx):
188 push ebp
189 mov ebp,esp
190 ; push ebx ; unused
191 ; push ecx ; need not be preserved
192 ; push edx ; need not be preserved
193 push esi
194 push edi
195
196 mov ecx, JDIMENSION [width_blks(ebp)]
197 shl ecx,3 ; imul ecx,DCTSIZE (ecx = output_cols)
198 jz near .return
199
200 mov edx, JDIMENSION [img_width(ebp)]
201
202 ; -- expand_right_edge
203
204 push ecx
205 shl ecx,1 ; output_cols * 2
206 sub ecx,edx
207 jle short .expand_end
208
209 mov eax, INT [max_v_samp(ebp)]
210 test eax,eax
211 jle short .expand_end
212
213 cld
214 mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
215 alignx 16,7
216 .expandloop:
217 push eax
218 push ecx
219
220 mov edi, JSAMPROW [esi]
221 add edi,edx
222 mov al, JSAMPLE [edi-1]
223
224 rep stosb
225
226 pop ecx
227 pop eax
228
229 add esi, byte SIZEOF_JSAMPROW
230 dec eax
231 jg short .expandloop
232
233 .expand_end:
234 pop ecx ; output_cols
235
236 ; -- h2v2_downsample
237
238 mov eax, JDIMENSION [v_samp(ebp)] ; rowctr
239 test eax,eax
240 jle near .return
241
242 mov edx, 0x00020001 ; bias pattern
243 movd mm7,edx
244 pcmpeqw mm6,mm6
245 punpckldq mm7,mm7 ; mm7={1, 2, 1, 2}
246 psrlw mm6,BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..}
247
248 mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
249 mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
250 alignx 16,7
251 .rowloop:
252 push ecx
253 push edi
254 push esi
255
256 mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0
257 mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1
258 mov edi, JSAMPROW [edi] ; outptr
259 alignx 16,7
260 .columnloop:
261
262 movq mm0, MMWORD [edx+0*SIZEOF_MMWORD]
263 movq mm1, MMWORD [esi+0*SIZEOF_MMWORD]
264 movq mm2, MMWORD [edx+1*SIZEOF_MMWORD]
265 movq mm3, MMWORD [esi+1*SIZEOF_MMWORD]
266
267 movq mm4,mm0
268 movq mm5,mm1
269 pand mm0,mm6
270 psrlw mm4,BYTE_BIT
271 pand mm1,mm6
272 psrlw mm5,BYTE_BIT
273 paddw mm0,mm4
274 paddw mm1,mm5
275
276 movq mm4,mm2
277 movq mm5,mm3
278 pand mm2,mm6
279 psrlw mm4,BYTE_BIT
280 pand mm3,mm6
281 psrlw mm5,BYTE_BIT
282 paddw mm2,mm4
283 paddw mm3,mm5
284
285 paddw mm0,mm1
286 paddw mm2,mm3
287 paddw mm0,mm7
288 paddw mm2,mm7
289 psrlw mm0,2
290 psrlw mm2,2
291
292 packuswb mm0,mm2
293
294 movq MMWORD [edi+0*SIZEOF_MMWORD], mm0
295
296 add edx, byte 2*SIZEOF_MMWORD ; inptr0
297 add esi, byte 2*SIZEOF_MMWORD ; inptr1
298 add edi, byte 1*SIZEOF_MMWORD ; outptr
299 sub ecx, byte SIZEOF_MMWORD ; outcol
300 jnz near .columnloop
301
302 pop esi
303 pop edi
304 pop ecx
305
306 add esi, byte 2*SIZEOF_JSAMPROW ; input_data
307 add edi, byte 1*SIZEOF_JSAMPROW ; output_data
308 dec eax ; rowctr
309 jg near .rowloop
310
311 emms ; empty MMX state
312
313 .return:
314 pop edi
315 pop esi
316 ; pop edx ; need not be preserved
317 ; pop ecx ; need not be preserved
318 ; pop ebx ; unused
319 pop ebp
320 ret
321
322 ; For some reason, the OS X linker does not honor the request to align the
323 ; segment unless we do this.
324 align 16
OLDNEW
« no previous file with comments | « simd/jcsample-altivec.c ('k') | simd/jcsample-sse2.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698