| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; jcqnts2f-64.asm - sample data conversion and quantization (64-bit SSE & SSE2) | 2 ; jcqnts2f-64.asm - sample data conversion and quantization (64-bit SSE & SSE2) |
| 3 ; | 3 ; |
| 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 ; Copyright 2009 D. R. Commander | 5 ; Copyright 2009 D. R. Commander |
| 6 ; | 6 ; |
| 7 ; Based on | 7 ; Based on |
| 8 ; x86 SIMD extension for IJG JPEG library | 8 ; x86 SIMD extension for IJG JPEG library |
| 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 43 mov rax,rsp | 43 mov rax,rsp |
| 44 mov rbp,rsp | 44 mov rbp,rsp |
| 45 collect_args | 45 collect_args |
| 46 push rbx | 46 push rbx |
| 47 | 47 |
| 48 pcmpeqw xmm7,xmm7 | 48 pcmpeqw xmm7,xmm7 |
| 49 psllw xmm7,7 | 49 psllw xmm7,7 |
| 50 packsswb xmm7,xmm7 ; xmm7 = PB_CENTERJSAMPLE (0x808080..) | 50 packsswb xmm7,xmm7 ; xmm7 = PB_CENTERJSAMPLE (0x808080..) |
| 51 | 51 |
| 52 mov rsi, r10 | 52 mov rsi, r10 |
| 53 » mov» rax, r11 | 53 » mov» eax, r11d |
| 54 mov rdi, r12 | 54 mov rdi, r12 |
| 55 mov rcx, DCTSIZE/2 | 55 mov rcx, DCTSIZE/2 |
| 56 .convloop: | 56 .convloop: |
| 57 mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) | 57 mov rbx, JSAMPROW [rsi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *) |
| 58 mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) | 58 mov rdx, JSAMPROW [rsi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *) |
| 59 | 59 |
| 60 movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] | 60 movq xmm0, XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE] |
| 61 movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] | 61 movq xmm1, XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE] |
| 62 | 62 |
| 63 psubb xmm0,xmm7 ; xmm0=(01234567) | 63 psubb xmm0,xmm7 ; xmm0=(01234567) |
| (...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 149 dec rax | 149 dec rax |
| 150 jnz short .quantloop | 150 jnz short .quantloop |
| 151 | 151 |
| 152 uncollect_args | 152 uncollect_args |
| 153 pop rbp | 153 pop rbp |
| 154 ret | 154 ret |
| 155 | 155 |
| 156 ; For some reason, the OS X linker does not honor the request to align the | 156 ; For some reason, the OS X linker does not honor the request to align the |
| 157 ; segment unless we do this. | 157 ; segment unless we do this. |
| 158 align 16 | 158 align 16 |
| OLD | NEW |