OLD | NEW |
1 ; | 1 ; |
2 ; jquantf.asm - sample data conversion and quantization (64-bit SSE & SSE2) | 2 ; jquantf.asm - sample data conversion and quantization (64-bit SSE & SSE2) |
3 ; | 3 ; |
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
5 ; Copyright 2009 D. R. Commander | 5 ; Copyright 2009 D. R. Commander |
6 ; | 6 ; |
7 ; Based on | 7 ; Based on |
8 ; x86 SIMD extension for IJG JPEG library | 8 ; x86 SIMD extension for IJG JPEG library |
9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
(...skipping 18 matching lines...) Expand all Loading... |
29 ; GLOBAL(void) | 29 ; GLOBAL(void) |
30 ; jsimd_convsamp_float_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col, | 30 ; jsimd_convsamp_float_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col, |
31 ; FAST_FLOAT *workspace); | 31 ; FAST_FLOAT *workspace); |
32 ; | 32 ; |
33 | 33 |
34 ; r10 = JSAMPARRAY sample_data | 34 ; r10 = JSAMPARRAY sample_data |
35 ; r11 = JDIMENSION start_col | 35 ; r11 = JDIMENSION start_col |
36 ; r12 = FAST_FLOAT *workspace | 36 ; r12 = FAST_FLOAT *workspace |
37 | 37 |
38 align 16 | 38 align 16 |
39 global EXTN(jsimd_convsamp_float_sse2) | 39 global EXTN(jsimd_convsamp_float_sse2) PRIVATE |
40 | 40 |
41 EXTN(jsimd_convsamp_float_sse2): | 41 EXTN(jsimd_convsamp_float_sse2): |
42 push rbp | 42 push rbp |
43 mov rax,rsp | 43 mov rax,rsp |
44 mov rbp,rsp | 44 mov rbp,rsp |
45 collect_args | 45 collect_args |
46 push rbx | 46 push rbx |
47 | 47 |
48 pcmpeqw xmm7,xmm7 | 48 pcmpeqw xmm7,xmm7 |
49 psllw xmm7,7 | 49 psllw xmm7,7 |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
103 ; GLOBAL(void) | 103 ; GLOBAL(void) |
104 ; jsimd_quantize_float_sse2 (JCOEFPTR coef_block, FAST_FLOAT *divisors, | 104 ; jsimd_quantize_float_sse2 (JCOEFPTR coef_block, FAST_FLOAT *divisors, |
105 ; FAST_FLOAT *workspace); | 105 ; FAST_FLOAT *workspace); |
106 ; | 106 ; |
107 | 107 |
108 ; r10 = JCOEFPTR coef_block | 108 ; r10 = JCOEFPTR coef_block |
109 ; r11 = FAST_FLOAT *divisors | 109 ; r11 = FAST_FLOAT *divisors |
110 ; r12 = FAST_FLOAT *workspace | 110 ; r12 = FAST_FLOAT *workspace |
111 | 111 |
112 align 16 | 112 align 16 |
113 global EXTN(jsimd_quantize_float_sse2) | 113 global EXTN(jsimd_quantize_float_sse2) PRIVATE |
114 | 114 |
115 EXTN(jsimd_quantize_float_sse2): | 115 EXTN(jsimd_quantize_float_sse2): |
116 push rbp | 116 push rbp |
117 mov rax,rsp | 117 mov rax,rsp |
118 mov rbp,rsp | 118 mov rbp,rsp |
119 collect_args | 119 collect_args |
120 | 120 |
121 mov rsi, r12 | 121 mov rsi, r12 |
122 mov rdx, r11 | 122 mov rdx, r11 |
123 mov rdi, r10 | 123 mov rdi, r10 |
(...skipping 25 matching lines...) Expand all Loading... |
149 dec rax | 149 dec rax |
150 jnz short .quantloop | 150 jnz short .quantloop |
151 | 151 |
152 uncollect_args | 152 uncollect_args |
153 pop rbp | 153 pop rbp |
154 ret | 154 ret |
155 | 155 |
156 ; For some reason, the OS X linker does not honor the request to align the | 156 ; For some reason, the OS X linker does not honor the request to align the |
157 ; segment unless we do this. | 157 ; segment unless we do this. |
158 align 16 | 158 align 16 |
OLD | NEW |