| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; jidctflt.asm - floating-point IDCT (SSE & MMX) | 2 ; jidctflt.asm - floating-point IDCT (SSE & MMX) |
| 3 ; | 3 ; |
| 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 ; | 5 ; |
| 6 ; Based on | 6 ; Based on |
| 7 ; x86 SIMD extension for IJG JPEG library | 7 ; x86 SIMD extension for IJG JPEG library |
| 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
| 10 ; | 10 ; |
| (...skipping 19 matching lines...) Expand all Loading... |
| 30 %endmacro | 30 %endmacro |
| 31 | 31 |
| 32 %macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) | 32 %macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) |
| 33 shufps %1,%2,0xEE | 33 shufps %1,%2,0xEE |
| 34 %endmacro | 34 %endmacro |
| 35 | 35 |
| 36 ; -------------------------------------------------------------------------- | 36 ; -------------------------------------------------------------------------- |
| 37 SECTION SEG_CONST | 37 SECTION SEG_CONST |
| 38 | 38 |
| 39 alignz 16 | 39 alignz 16 |
| 40 global EXTN(jconst_idct_float_sse) | 40 global EXTN(jconst_idct_float_sse) PRIVATE |
| 41 | 41 |
| 42 EXTN(jconst_idct_float_sse): | 42 EXTN(jconst_idct_float_sse): |
| 43 | 43 |
| 44 PD_1_414 times 4 dd 1.414213562373095048801689 | 44 PD_1_414 times 4 dd 1.414213562373095048801689 |
| 45 PD_1_847 times 4 dd 1.847759065022573512256366 | 45 PD_1_847 times 4 dd 1.847759065022573512256366 |
| 46 PD_1_082 times 4 dd 1.082392200292393968799446 | 46 PD_1_082 times 4 dd 1.082392200292393968799446 |
| 47 PD_M2_613 times 4 dd -2.613125929752753055713286 | 47 PD_M2_613 times 4 dd -2.613125929752753055713286 |
| 48 PD_0_125 times 4 dd 0.125 ; 1/8 | 48 PD_0_125 times 4 dd 0.125 ; 1/8 |
| 49 PB_CENTERJSAMP times 8 db CENTERJSAMPLE | 49 PB_CENTERJSAMP times 8 db CENTERJSAMPLE |
| 50 | 50 |
| (...skipping 15 matching lines...) Expand all Loading... |
| 66 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf | 66 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf |
| 67 %define output_col(b) (b)+20 ; JDIMENSION output_col | 67 %define output_col(b) (b)+20 ; JDIMENSION output_col |
| 68 | 68 |
| 69 %define original_ebp ebp+0 | 69 %define original_ebp ebp+0 |
| 70 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] | 70 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] |
| 71 %define WK_NUM 2 | 71 %define WK_NUM 2 |
| 72 %define workspace wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT | 72 %define workspace wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT |
| 73 ; FAST_FLOAT workspace[DCTSIZE2] | 73 ; FAST_FLOAT workspace[DCTSIZE2] |
| 74 | 74 |
| 75 align 16 | 75 align 16 |
| 76 global EXTN(jsimd_idct_float_sse) | 76 global EXTN(jsimd_idct_float_sse) PRIVATE |
| 77 | 77 |
| 78 EXTN(jsimd_idct_float_sse): | 78 EXTN(jsimd_idct_float_sse): |
| 79 push ebp | 79 push ebp |
| 80 mov eax,esp ; eax = original ebp | 80 mov eax,esp ; eax = original ebp |
| 81 sub esp, byte 4 | 81 sub esp, byte 4 |
| 82 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits | 82 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits |
| 83 mov [esp],eax | 83 mov [esp],eax |
| 84 mov ebp,esp ; ebp = aligned ebp | 84 mov ebp,esp ; ebp = aligned ebp |
| 85 lea esp, [workspace] | 85 lea esp, [workspace] |
| 86 push ebx | 86 push ebx |
| (...skipping 476 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 563 ; pop ecx ; need not be preserved | 563 ; pop ecx ; need not be preserved |
| 564 pop ebx | 564 pop ebx |
| 565 mov esp,ebp ; esp <- aligned ebp | 565 mov esp,ebp ; esp <- aligned ebp |
| 566 pop esp ; esp <- original ebp | 566 pop esp ; esp <- original ebp |
| 567 pop ebp | 567 pop ebp |
| 568 ret | 568 ret |
| 569 | 569 |
| 570 ; For some reason, the OS X linker does not honor the request to align the | 570 ; For some reason, the OS X linker does not honor the request to align the |
| 571 ; segment unless we do this. | 571 ; segment unless we do this. |
| 572 align 16 | 572 align 16 |
| OLD | NEW |