| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; jfdctflt.asm - floating-point FDCT (64-bit SSE) | 2 ; jfdctflt.asm - floating-point FDCT (64-bit SSE) |
| 3 ; | 3 ; |
| 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 ; Copyright 2009 D. R. Commander | 5 ; Copyright 2009 D. R. Commander |
| 6 ; | 6 ; |
| 7 ; Based on | 7 ; Based on |
| 8 ; x86 SIMD extension for IJG JPEG library | 8 ; x86 SIMD extension for IJG JPEG library |
| 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
| (...skipping 20 matching lines...) Expand all Loading... |
| 31 %endmacro | 31 %endmacro |
| 32 | 32 |
| 33 %macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) | 33 %macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) |
| 34 shufps %1,%2,0xEE | 34 shufps %1,%2,0xEE |
| 35 %endmacro | 35 %endmacro |
| 36 | 36 |
| 37 ; -------------------------------------------------------------------------- | 37 ; -------------------------------------------------------------------------- |
| 38 SECTION SEG_CONST | 38 SECTION SEG_CONST |
| 39 | 39 |
| 40 alignz 16 | 40 alignz 16 |
| 41 global EXTN(jconst_fdct_float_sse) | 41 global EXTN(jconst_fdct_float_sse) PRIVATE |
| 42 | 42 |
| 43 EXTN(jconst_fdct_float_sse): | 43 EXTN(jconst_fdct_float_sse): |
| 44 | 44 |
| 45 PD_0_382 times 4 dd 0.382683432365089771728460 | 45 PD_0_382 times 4 dd 0.382683432365089771728460 |
| 46 PD_0_707 times 4 dd 0.707106781186547524400844 | 46 PD_0_707 times 4 dd 0.707106781186547524400844 |
| 47 PD_0_541 times 4 dd 0.541196100146196984399723 | 47 PD_0_541 times 4 dd 0.541196100146196984399723 |
| 48 PD_1_306 times 4 dd 1.306562964876376527856643 | 48 PD_1_306 times 4 dd 1.306562964876376527856643 |
| 49 | 49 |
| 50 alignz 16 | 50 alignz 16 |
| 51 | 51 |
| 52 ; -------------------------------------------------------------------------- | 52 ; -------------------------------------------------------------------------- |
| 53 SECTION SEG_TEXT | 53 SECTION SEG_TEXT |
| 54 BITS 64 | 54 BITS 64 |
| 55 ; | 55 ; |
| 56 ; Perform the forward DCT on one block of samples. | 56 ; Perform the forward DCT on one block of samples. |
| 57 ; | 57 ; |
| 58 ; GLOBAL(void) | 58 ; GLOBAL(void) |
| 59 ; jsimd_fdct_float_sse (FAST_FLOAT *data) | 59 ; jsimd_fdct_float_sse (FAST_FLOAT *data) |
| 60 ; | 60 ; |
| 61 | 61 |
| 62 ; r10 = FAST_FLOAT *data | 62 ; r10 = FAST_FLOAT *data |
| 63 | 63 |
| 64 %define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] | 64 %define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] |
| 65 %define WK_NUM 2 | 65 %define WK_NUM 2 |
| 66 | 66 |
| 67 align 16 | 67 align 16 |
| 68 global EXTN(jsimd_fdct_float_sse) | 68 global EXTN(jsimd_fdct_float_sse) PRIVATE |
| 69 | 69 |
| 70 EXTN(jsimd_fdct_float_sse): | 70 EXTN(jsimd_fdct_float_sse): |
| 71 push rbp | 71 push rbp |
| 72 mov rax,rsp ; rax = original rbp | 72 mov rax,rsp ; rax = original rbp |
| 73 sub rsp, byte 4 | 73 sub rsp, byte 4 |
| 74 and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits | 74 and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits |
| 75 mov [rsp],rax | 75 mov [rsp],rax |
| 76 mov rbp,rsp ; rbp = aligned rbp | 76 mov rbp,rsp ; rbp = aligned rbp |
| 77 lea rsp, [wk(0)] | 77 lea rsp, [wk(0)] |
| 78 collect_args | 78 collect_args |
| (...skipping 270 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 349 | 349 |
| 350 uncollect_args | 350 uncollect_args |
| 351 mov rsp,rbp ; rsp <- aligned rbp | 351 mov rsp,rbp ; rsp <- aligned rbp |
| 352 pop rsp ; rsp <- original rbp | 352 pop rsp ; rsp <- original rbp |
| 353 pop rbp | 353 pop rbp |
| 354 ret | 354 ret |
| 355 | 355 |
| 356 ; For some reason, the OS X linker does not honor the request to align the | 356 ; For some reason, the OS X linker does not honor the request to align the |
| 357 ; segment unless we do this. | 357 ; segment unless we do this. |
| 358 align 16 | 358 align 16 |
| OLD | NEW |