| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; jquantf.asm - sample data conversion and quantization (64-bit SSE & SSE2) | 2 ; jquantf.asm - sample data conversion and quantization (64-bit SSE & SSE2) |
| 3 ; | 3 ; |
| 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 ; Copyright 2009 D. R. Commander | 5 ; Copyright 2009 D. R. Commander |
| 6 ; | 6 ; |
| 7 ; Based on | 7 ; Based on |
| 8 ; x86 SIMD extension for IJG JPEG library | 8 ; x86 SIMD extension for IJG JPEG library |
| 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
| (...skipping 18 matching lines...) Expand all Loading... |
| 29 ; GLOBAL(void) | 29 ; GLOBAL(void) |
| 30 ; jsimd_convsamp_float_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col, | 30 ; jsimd_convsamp_float_sse2 (JSAMPARRAY sample_data, JDIMENSION start_col, |
| 31 ; FAST_FLOAT *workspace); | 31 ; FAST_FLOAT *workspace); |
| 32 ; | 32 ; |
| 33 | 33 |
| 34 ; r10 = JSAMPARRAY sample_data | 34 ; r10 = JSAMPARRAY sample_data |
| 35 ; r11 = JDIMENSION start_col | 35 ; r11 = JDIMENSION start_col |
| 36 ; r12 = FAST_FLOAT *workspace | 36 ; r12 = FAST_FLOAT *workspace |
| 37 | 37 |
| 38 align 16 | 38 align 16 |
| 39 global EXTN(jsimd_convsamp_float_sse2) | 39 global EXTN(jsimd_convsamp_float_sse2) PRIVATE |
| 40 | 40 |
| 41 EXTN(jsimd_convsamp_float_sse2): | 41 EXTN(jsimd_convsamp_float_sse2): |
| 42 push rbp | 42 push rbp |
| 43 mov rax,rsp | 43 mov rax,rsp |
| 44 mov rbp,rsp | 44 mov rbp,rsp |
| 45 collect_args | 45 collect_args |
| 46 push rbx | 46 push rbx |
| 47 | 47 |
| 48 pcmpeqw xmm7,xmm7 | 48 pcmpeqw xmm7,xmm7 |
| 49 psllw xmm7,7 | 49 psllw xmm7,7 |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 103 ; GLOBAL(void) | 103 ; GLOBAL(void) |
| 104 ; jsimd_quantize_float_sse2 (JCOEFPTR coef_block, FAST_FLOAT *divisors, | 104 ; jsimd_quantize_float_sse2 (JCOEFPTR coef_block, FAST_FLOAT *divisors, |
| 105 ; FAST_FLOAT *workspace); | 105 ; FAST_FLOAT *workspace); |
| 106 ; | 106 ; |
| 107 | 107 |
| 108 ; r10 = JCOEFPTR coef_block | 108 ; r10 = JCOEFPTR coef_block |
| 109 ; r11 = FAST_FLOAT *divisors | 109 ; r11 = FAST_FLOAT *divisors |
| 110 ; r12 = FAST_FLOAT *workspace | 110 ; r12 = FAST_FLOAT *workspace |
| 111 | 111 |
| 112 align 16 | 112 align 16 |
| 113 global EXTN(jsimd_quantize_float_sse2) | 113 global EXTN(jsimd_quantize_float_sse2) PRIVATE |
| 114 | 114 |
| 115 EXTN(jsimd_quantize_float_sse2): | 115 EXTN(jsimd_quantize_float_sse2): |
| 116 push rbp | 116 push rbp |
| 117 mov rax,rsp | 117 mov rax,rsp |
| 118 mov rbp,rsp | 118 mov rbp,rsp |
| 119 collect_args | 119 collect_args |
| 120 | 120 |
| 121 mov rsi, r12 | 121 mov rsi, r12 |
| 122 mov rdx, r11 | 122 mov rdx, r11 |
| 123 mov rdi, r10 | 123 mov rdi, r10 |
| (...skipping 25 matching lines...) Expand all Loading... |
| 149 dec rax | 149 dec rax |
| 150 jnz short .quantloop | 150 jnz short .quantloop |
| 151 | 151 |
| 152 uncollect_args | 152 uncollect_args |
| 153 pop rbp | 153 pop rbp |
| 154 ret | 154 ret |
| 155 | 155 |
| 156 ; For some reason, the OS X linker does not honor the request to align the | 156 ; For some reason, the OS X linker does not honor the request to align the |
| 157 ; segment unless we do this. | 157 ; segment unless we do this. |
| 158 align 16 | 158 align 16 |
| OLD | NEW |