OLD | NEW |
1 ; | 1 ; |
2 ; jquanti.asm - sample data conversion and quantization (64-bit SSE2) | 2 ; jquanti.asm - sample data conversion and quantization (64-bit SSE2) |
3 ; | 3 ; |
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
5 ; Copyright 2009 D. R. Commander | 5 ; Copyright 2009 D. R. Commander |
6 ; | 6 ; |
7 ; Based on | 7 ; Based on |
8 ; x86 SIMD extension for IJG JPEG library | 8 ; x86 SIMD extension for IJG JPEG library |
9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
(...skipping 18 matching lines...) Expand all Loading... |
29 ; GLOBAL(void) | 29 ; GLOBAL(void) |
30 ; jsimd_convsamp_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col, | 30 ; jsimd_convsamp_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col, |
31 ; DCTELEM *workspace); | 31 ; DCTELEM *workspace); |
32 ; | 32 ; |
33 | 33 |
34 ; r10 = JSAMPARRAY sample_data | 34 ; r10 = JSAMPARRAY sample_data |
35 ; r11 = JDIMENSION start_col | 35 ; r11 = JDIMENSION start_col |
36 ; r12 = DCTELEM *workspace | 36 ; r12 = DCTELEM *workspace |
37 | 37 |
38 align 16 | 38 align 16 |
39 global EXTN(jsimd_convsamp_sse2) | 39 global EXTN(jsimd_convsamp_sse2) PRIVATE |
40 | 40 |
41 EXTN(jsimd_convsamp_sse2): | 41 EXTN(jsimd_convsamp_sse2): |
42 push rbp | 42 push rbp |
43 mov rax,rsp | 43 mov rax,rsp |
44 mov rbp,rsp | 44 mov rbp,rsp |
45 collect_args | 45 collect_args |
46 push rbx | 46 push rbx |
47 | 47 |
48 pxor xmm6,xmm6 ; xmm6=(all 0's) | 48 pxor xmm6,xmm6 ; xmm6=(all 0's) |
49 pcmpeqw xmm7,xmm7 | 49 pcmpeqw xmm7,xmm7 |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
105 | 105 |
106 %define RECIPROCAL(m,n,b) XMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM) | 106 %define RECIPROCAL(m,n,b) XMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM) |
107 %define CORRECTION(m,n,b) XMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM) | 107 %define CORRECTION(m,n,b) XMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM) |
108 %define SCALE(m,n,b) XMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM) | 108 %define SCALE(m,n,b) XMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM) |
109 | 109 |
110 ; r10 = JCOEFPTR coef_block | 110 ; r10 = JCOEFPTR coef_block |
111 ; r11 = DCTELEM *divisors | 111 ; r11 = DCTELEM *divisors |
112 ; r12 = DCTELEM *workspace | 112 ; r12 = DCTELEM *workspace |
113 | 113 |
114 align 16 | 114 align 16 |
115 global EXTN(jsimd_quantize_sse2) | 115 global EXTN(jsimd_quantize_sse2) PRIVATE |
116 | 116 |
117 EXTN(jsimd_quantize_sse2): | 117 EXTN(jsimd_quantize_sse2): |
118 push rbp | 118 push rbp |
119 mov rax,rsp | 119 mov rax,rsp |
120 mov rbp,rsp | 120 mov rbp,rsp |
121 collect_args | 121 collect_args |
122 | 122 |
123 mov rsi, r12 | 123 mov rsi, r12 |
124 mov rdx, r11 | 124 mov rdx, r11 |
125 mov rdi, r10 | 125 mov rdi, r10 |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
178 dec rax | 178 dec rax |
179 jnz near .quantloop | 179 jnz near .quantloop |
180 | 180 |
181 uncollect_args | 181 uncollect_args |
182 pop rbp | 182 pop rbp |
183 ret | 183 ret |
184 | 184 |
185 ; For some reason, the OS X linker does not honor the request to align the | 185 ; For some reason, the OS X linker does not honor the request to align the |
186 ; segment unless we do this. | 186 ; segment unless we do this. |
187 align 16 | 187 align 16 |
OLD | NEW |