| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; jcqnts2i-64.asm - sample data conversion and quantization (64-bit SSE2) | 2 ; jcqnts2i-64.asm - sample data conversion and quantization (64-bit SSE2) |
| 3 ; | 3 ; |
| 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 ; Copyright 2009 D. R. Commander | 5 ; Copyright 2009 D. R. Commander |
| 6 ; | 6 ; |
| 7 ; Based on | 7 ; Based on |
| 8 ; x86 SIMD extension for IJG JPEG library | 8 ; x86 SIMD extension for IJG JPEG library |
| 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 43 mov rax,rsp | 43 mov rax,rsp |
| 44 mov rbp,rsp | 44 mov rbp,rsp |
| 45 collect_args | 45 collect_args |
| 46 push rbx | 46 push rbx |
| 47 | 47 |
| 48 pxor xmm6,xmm6 ; xmm6=(all 0's) | 48 pxor xmm6,xmm6 ; xmm6=(all 0's) |
| 49 pcmpeqw xmm7,xmm7 | 49 pcmpeqw xmm7,xmm7 |
| 50 psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} | 50 psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} |
| 51 | 51 |
| 52 mov rsi, r10 | 52 mov rsi, r10 |
| 53 » mov rax, r11 | 53 » mov eax, r11d |
| 54 mov rdi, r12 | 54 mov rdi, r12 |
| 55 mov rcx, DCTSIZE/4 | 55 mov rcx, DCTSIZE/4 |
| 56 .convloop: | 56 .convloop: |
| 57 mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) | 57 mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) |
| 58 mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) | 58 mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) |
| 59 | 59 |
| 60 movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm0=(01234567
) | 60 movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm0=(01234567
) |
| 61 movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm1=(89ABCDEF
) | 61 movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm1=(89ABCDEF
) |
| 62 | 62 |
| 63 mov rbx, JSAMPROW [rsi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) | 63 mov rbx, JSAMPROW [rsi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) |
| (...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 178 dec rax | 178 dec rax |
| 179 jnz near .quantloop | 179 jnz near .quantloop |
| 180 | 180 |
| 181 uncollect_args | 181 uncollect_args |
| 182 pop rbp | 182 pop rbp |
| 183 ret | 183 ret |
| 184 | 184 |
| 185 ; For some reason, the OS X linker does not honor the request to align the | 185 ; For some reason, the OS X linker does not honor the request to align the |
| 186 ; segment unless we do this. | 186 ; segment unless we do this. |
| 187 align 16 | 187 align 16 |
| OLD | NEW |