OLD | NEW |
1 ; | 1 ; |
2 ; jcqnts2f-64.asm - sample data conversion and quantization (64-bit SSE & SSE2) | 2 ; jcqnts2f-64.asm - sample data conversion and quantization (64-bit SSE & SSE2) |
3 ; | 3 ; |
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
5 ; Copyright 2009 D. R. Commander | 5 ; Copyright 2009 D. R. Commander |
6 ; | 6 ; |
7 ; Based on | 7 ; Based on |
8 ; x86 SIMD extension for IJG JPEG library | 8 ; x86 SIMD extension for IJG JPEG library |
9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
43 mov rax,rsp | 43 mov rax,rsp |
44 mov rbp,rsp | 44 mov rbp,rsp |
45 collect_args | 45 collect_args |
46 push rbx | 46 push rbx |
47 | 47 |
48 pcmpeqw xmm7,xmm7 | 48 pcmpeqw xmm7,xmm7 |
49 psllw xmm7,7 | 49 psllw xmm7,7 |
50 packsswb xmm7,xmm7 ; xmm7 = PB_CENTERJSAMPLE (0x808080..) | 50 packsswb xmm7,xmm7 ; xmm7 = PB_CENTERJSAMPLE (0x808080..) |
51 | 51 |
52 mov rsi, r10 | 52 mov rsi, r10 |
53 » mov» rax, r11 | 53 » mov» eax, r11d |
54 mov rdi, r12 | 54 mov rdi, r12 |
55 mov rcx, DCTSIZE/2 | 55 mov rcx, DCTSIZE/2 |
56 .convloop: | 56 .convloop: |
57 mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) | 57 mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) |
58 mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) | 58 mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) |
59 | 59 |
60 movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] | 60 movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] |
61 movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] | 61 movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] |
62 | 62 |
63 psubb xmm0,xmm7 ; xmm0=(01234567) | 63 psubb xmm0,xmm7 ; xmm0=(01234567) |
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
149 dec rax | 149 dec rax |
150 jnz short .quantloop | 150 jnz short .quantloop |
151 | 151 |
152 uncollect_args | 152 uncollect_args |
153 pop rbp | 153 pop rbp |
154 ret | 154 ret |
155 | 155 |
156 ; For some reason, the OS X linker does not honor the request to align the | 156 ; For some reason, the OS X linker does not honor the request to align the |
157 ; segment unless we do this. | 157 ; segment unless we do this. |
158 align 16 | 158 align 16 |
OLD | NEW |