Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(431)

Side by Side Diff: simd/jcsamss2-64.asm

Issue 1347093003: Rewrite the JDIMENSION argument management for x86 64-bit assembly to (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libjpeg_turbo.git@master
Patch Set: README.chromium Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « simd/jcqnts2i-64.asm ('k') | simd/jdclrss2-64.asm » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 ; 1 ;
2 ; jcsamss2-64.asm - downsampling (64-bit SSE2) 2 ; jcsamss2-64.asm - downsampling (64-bit SSE2)
3 ; 3 ;
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 ; Copyright 2009 D. R. Commander 5 ; Copyright 2009 D. R. Commander
6 ; 6 ;
7 ; Based on 7 ; Based on
8 ; x86 SIMD extension for IJG JPEG library 8 ; x86 SIMD extension for IJG JPEG library
9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru.
10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
42 42
43 align 16 43 align 16
44 global EXTN(jsimd_h2v1_downsample_sse2) PRIVATE 44 global EXTN(jsimd_h2v1_downsample_sse2) PRIVATE
45 45
46 EXTN(jsimd_h2v1_downsample_sse2): 46 EXTN(jsimd_h2v1_downsample_sse2):
47 push rbp 47 push rbp
48 mov rax,rsp 48 mov rax,rsp
49 mov rbp,rsp 49 mov rbp,rsp
50 collect_args 50 collect_args
51 51
52 » mov rcx, r13 52 » mov ecx, r13d
53 shl rcx,3 ; imul rcx,DCTSIZE (rcx = output_cols) 53 shl rcx,3 ; imul rcx,DCTSIZE (rcx = output_cols)
54 jz near .return 54 jz near .return
55 55
56 » mov rdx, r10 56 » mov edx, r10d
57 57
58 ; -- expand_right_edge 58 ; -- expand_right_edge
59 59
60 push rcx 60 push rcx
61 shl rcx,1 ; output_cols * 2 61 shl rcx,1 ; output_cols * 2
62 sub rcx,rdx 62 sub rcx,rdx
63 jle short .expand_end 63 jle short .expand_end
64 64
65 mov rax, r11 65 mov rax, r11
66 test rax,rax 66 test rax,rax
(...skipping 16 matching lines...) Expand all
83 83
84 add rsi, byte SIZEOF_JSAMPROW 84 add rsi, byte SIZEOF_JSAMPROW
85 dec rax 85 dec rax
86 jg short .expandloop 86 jg short .expandloop
87 87
88 .expand_end: 88 .expand_end:
89 pop rcx ; output_cols 89 pop rcx ; output_cols
90 90
91 ; -- h2v1_downsample 91 ; -- h2v1_downsample
92 92
93 » mov» rax, r12» ; rowctr 93 » mov» eax, r12d» ; rowctr
94 test eax,eax 94 test eax,eax
95 jle near .return 95 jle near .return
96 96
97 mov rdx, 0x00010000 ; bias pattern 97 mov rdx, 0x00010000 ; bias pattern
98 movd xmm7,edx 98 movd xmm7,edx
99 pcmpeqw xmm6,xmm6 99 pcmpeqw xmm6,xmm6
100 pshufd xmm7,xmm7,0x00 ; xmm7={0, 1, 0, 1, 0, 1, 0, 1} 100 pshufd xmm7,xmm7,0x00 ; xmm7={0, 1, 0, 1, 0, 1, 0, 1}
101 psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} 101 psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..}
102 102
103 mov rsi, r14 ; input_data 103 mov rsi, r14 ; input_data
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
186 186
187 align 16 187 align 16
188 global EXTN(jsimd_h2v2_downsample_sse2) PRIVATE 188 global EXTN(jsimd_h2v2_downsample_sse2) PRIVATE
189 189
190 EXTN(jsimd_h2v2_downsample_sse2): 190 EXTN(jsimd_h2v2_downsample_sse2):
191 push rbp 191 push rbp
192 mov rax,rsp 192 mov rax,rsp
193 mov rbp,rsp 193 mov rbp,rsp
194 collect_args 194 collect_args
195 195
196 » mov» rcx, r13 196 » mov» ecx, r13d
197 shl rcx,3 ; imul rcx,DCTSIZE (rcx = output_cols) 197 shl rcx,3 ; imul rcx,DCTSIZE (rcx = output_cols)
198 jz near .return 198 jz near .return
199 199
200 mov rdx, r10 200 mov rdx, r10
201 201
202 ; -- expand_right_edge 202 ; -- expand_right_edge
203 203
204 push rcx 204 push rcx
205 shl rcx,1 ; output_cols * 2 205 shl rcx,1 ; output_cols * 2
206 sub rcx,rdx 206 sub rcx,rdx
(...skipping 20 matching lines...) Expand all
227 227
228 add rsi, byte SIZEOF_JSAMPROW 228 add rsi, byte SIZEOF_JSAMPROW
229 dec rax 229 dec rax
230 jg short .expandloop 230 jg short .expandloop
231 231
232 .expand_end: 232 .expand_end:
233 pop rcx ; output_cols 233 pop rcx ; output_cols
234 234
235 ; -- h2v2_downsample 235 ; -- h2v2_downsample
236 236
237 » mov» rax, r12» ; rowctr 237 » mov» eax, r12d» ; rowctr
238 test rax,rax 238 test rax,rax
239 jle near .return 239 jle near .return
240 240
241 mov rdx, 0x00020001 ; bias pattern 241 mov rdx, 0x00020001 ; bias pattern
242 movd xmm7,edx 242 movd xmm7,edx
243 pcmpeqw xmm6,xmm6 243 pcmpeqw xmm6,xmm6
244 pshufd xmm7,xmm7,0x00 ; xmm7={1, 2, 1, 2, 1, 2, 1, 2} 244 pshufd xmm7,xmm7,0x00 ; xmm7={1, 2, 1, 2, 1, 2, 1, 2}
245 psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..} 245 psrlw xmm6,BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..}
246 246
247 mov rsi, r14 ; input_data 247 mov rsi, r14 ; input_data
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
321 jg near .rowloop 321 jg near .rowloop
322 322
323 .return: 323 .return:
324 uncollect_args 324 uncollect_args
325 pop rbp 325 pop rbp
326 ret 326 ret
327 327
328 ; For some reason, the OS X linker does not honor the request to align the 328 ; For some reason, the OS X linker does not honor the request to align the
329 ; segment unless we do this. 329 ; segment unless we do this.
330 align 16 330 align 16
OLDNEW
« no previous file with comments | « simd/jcqnts2i-64.asm ('k') | simd/jdclrss2-64.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698