| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; jfdctflt.asm - floating-point FDCT (SSE) | 2 ; jfdctflt.asm - floating-point FDCT (SSE) |
| 3 ; | 3 ; |
| 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 ; | 5 ; |
| 6 ; Based on | 6 ; Based on |
| 7 ; x86 SIMD extension for IJG JPEG library | 7 ; x86 SIMD extension for IJG JPEG library |
| 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
| 10 ; | 10 ; |
| (...skipping 19 matching lines...) Expand all Loading... |
| 30 %endmacro | 30 %endmacro |
| 31 | 31 |
| 32 %macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) | 32 %macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) |
| 33 shufps %1,%2,0xEE | 33 shufps %1,%2,0xEE |
| 34 %endmacro | 34 %endmacro |
| 35 | 35 |
| 36 ; -------------------------------------------------------------------------- | 36 ; -------------------------------------------------------------------------- |
| 37 SECTION SEG_CONST | 37 SECTION SEG_CONST |
| 38 | 38 |
| 39 alignz 16 | 39 alignz 16 |
| 40 global EXTN(jconst_fdct_float_sse) | 40 global EXTN(jconst_fdct_float_sse) PRIVATE |
| 41 | 41 |
| 42 EXTN(jconst_fdct_float_sse): | 42 EXTN(jconst_fdct_float_sse): |
| 43 | 43 |
| 44 PD_0_382 times 4 dd 0.382683432365089771728460 | 44 PD_0_382 times 4 dd 0.382683432365089771728460 |
| 45 PD_0_707 times 4 dd 0.707106781186547524400844 | 45 PD_0_707 times 4 dd 0.707106781186547524400844 |
| 46 PD_0_541 times 4 dd 0.541196100146196984399723 | 46 PD_0_541 times 4 dd 0.541196100146196984399723 |
| 47 PD_1_306 times 4 dd 1.306562964876376527856643 | 47 PD_1_306 times 4 dd 1.306562964876376527856643 |
| 48 | 48 |
| 49 alignz 16 | 49 alignz 16 |
| 50 | 50 |
| 51 ; -------------------------------------------------------------------------- | 51 ; -------------------------------------------------------------------------- |
| 52 SECTION SEG_TEXT | 52 SECTION SEG_TEXT |
| 53 BITS 32 | 53 BITS 32 |
| 54 ; | 54 ; |
| 55 ; Perform the forward DCT on one block of samples. | 55 ; Perform the forward DCT on one block of samples. |
| 56 ; | 56 ; |
| 57 ; GLOBAL(void) | 57 ; GLOBAL(void) |
| 58 ; jsimd_fdct_float_sse (FAST_FLOAT *data) | 58 ; jsimd_fdct_float_sse (FAST_FLOAT *data) |
| 59 ; | 59 ; |
| 60 | 60 |
| 61 %define data(b) (b)+8 ; FAST_FLOAT *data | 61 %define data(b) (b)+8 ; FAST_FLOAT *data |
| 62 | 62 |
| 63 %define original_ebp ebp+0 | 63 %define original_ebp ebp+0 |
| 64 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] | 64 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] |
| 65 %define WK_NUM 2 | 65 %define WK_NUM 2 |
| 66 | 66 |
| 67 align 16 | 67 align 16 |
| 68 global EXTN(jsimd_fdct_float_sse) | 68 global EXTN(jsimd_fdct_float_sse) PRIVATE |
| 69 | 69 |
| 70 EXTN(jsimd_fdct_float_sse): | 70 EXTN(jsimd_fdct_float_sse): |
| 71 push ebp | 71 push ebp |
| 72 mov eax,esp ; eax = original ebp | 72 mov eax,esp ; eax = original ebp |
| 73 sub esp, byte 4 | 73 sub esp, byte 4 |
| 74 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits | 74 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits |
| 75 mov [esp],eax | 75 mov [esp],eax |
| 76 mov ebp,esp ; ebp = aligned ebp | 76 mov ebp,esp ; ebp = aligned ebp |
| 77 lea esp, [wk(0)] | 77 lea esp, [wk(0)] |
| 78 pushpic ebx | 78 pushpic ebx |
| (...skipping 282 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 361 ; pop ecx ; need not be preserved | 361 ; pop ecx ; need not be preserved |
| 362 poppic ebx | 362 poppic ebx |
| 363 mov esp,ebp ; esp <- aligned ebp | 363 mov esp,ebp ; esp <- aligned ebp |
| 364 pop esp ; esp <- original ebp | 364 pop esp ; esp <- original ebp |
| 365 pop ebp | 365 pop ebp |
| 366 ret | 366 ret |
| 367 | 367 |
| 368 ; For some reason, the OS X linker does not honor the request to align the | 368 ; For some reason, the OS X linker does not honor the request to align the |
| 369 ; segment unless we do this. | 369 ; segment unless we do this. |
| 370 align 16 | 370 align 16 |
| OLD | NEW |