OLD | NEW |
1 ; | 1 ; |
2 ; jcqnts2i-64.asm - sample data conversion and quantization (64-bit SSE2) | 2 ; jcqnts2i-64.asm - sample data conversion and quantization (64-bit SSE2) |
3 ; | 3 ; |
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
5 ; Copyright 2009 D. R. Commander | 5 ; Copyright 2009 D. R. Commander |
6 ; | 6 ; |
7 ; Based on | 7 ; Based on |
8 ; x86 SIMD extension for IJG JPEG library | 8 ; x86 SIMD extension for IJG JPEG library |
9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
43 mov rax,rsp | 43 mov rax,rsp |
44 mov rbp,rsp | 44 mov rbp,rsp |
45 collect_args | 45 collect_args |
46 push rbx | 46 push rbx |
47 | 47 |
48 pxor xmm6,xmm6 ; xmm6=(all 0's) | 48 pxor xmm6,xmm6 ; xmm6=(all 0's) |
49 pcmpeqw xmm7,xmm7 | 49 pcmpeqw xmm7,xmm7 |
50 psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} | 50 psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..} |
51 | 51 |
52 mov rsi, r10 | 52 mov rsi, r10 |
53 » mov rax, r11 | 53 » mov eax, r11d |
54 mov rdi, r12 | 54 mov rdi, r12 |
55 mov rcx, DCTSIZE/4 | 55 mov rcx, DCTSIZE/4 |
56 .convloop: | 56 .convloop: |
57 mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) | 57 mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) |
58 mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) | 58 mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) |
59 | 59 |
60 movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm0=(01234567
) | 60 movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] ; xmm0=(01234567
) |
61 movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm1=(89ABCDEF
) | 61 movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] ; xmm1=(89ABCDEF
) |
62 | 62 |
63 mov rbx, JSAMPROW [rsi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) | 63 mov rbx, JSAMPROW [rsi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *) |
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
178 dec rax | 178 dec rax |
179 jnz near .quantloop | 179 jnz near .quantloop |
180 | 180 |
181 uncollect_args | 181 uncollect_args |
182 pop rbp | 182 pop rbp |
183 ret | 183 ret |
184 | 184 |
185 ; For some reason, the OS X linker does not honor the request to align the | 185 ; For some reason, the OS X linker does not honor the request to align the |
186 ; segment unless we do this. | 186 ; segment unless we do this. |
187 align 16 | 187 align 16 |
OLD | NEW |