| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; jidctflt.asm - floating-point IDCT (3DNow! & MMX) | 2 ; jidctflt.asm - floating-point IDCT (3DNow! & MMX) |
| 3 ; | 3 ; |
| 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 ; | 5 ; |
| 6 ; Based on | 6 ; Based on |
| 7 ; x86 SIMD extension for IJG JPEG library | 7 ; x86 SIMD extension for IJG JPEG library |
| 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
| 10 ; | 10 ; |
| 11 ; This file should be assembled with NASM (Netwide Assembler), | 11 ; This file should be assembled with NASM (Netwide Assembler), |
| 12 ; can *not* be assembled with Microsoft's MASM or any compatible | 12 ; can *not* be assembled with Microsoft's MASM or any compatible |
| 13 ; assembler (including Borland's Turbo Assembler). | 13 ; assembler (including Borland's Turbo Assembler). |
| 14 ; NASM is available from http://nasm.sourceforge.net/ or | 14 ; NASM is available from http://nasm.sourceforge.net/ or |
| 15 ; http://sourceforge.net/project/showfiles.php?group_id=6208 | 15 ; http://sourceforge.net/project/showfiles.php?group_id=6208 |
| 16 ; | 16 ; |
| 17 ; This file contains a floating-point implementation of the inverse DCT | 17 ; This file contains a floating-point implementation of the inverse DCT |
| 18 ; (Discrete Cosine Transform). The following code is based directly on | 18 ; (Discrete Cosine Transform). The following code is based directly on |
| 19 ; the IJG's original jidctflt.c; see the jidctflt.c for more details. | 19 ; the IJG's original jidctflt.c; see the jidctflt.c for more details. |
| 20 ; | 20 ; |
| 21 ; [TAB8] | 21 ; [TAB8] |
| 22 | 22 |
| 23 %include "jsimdext.inc" | 23 %include "jsimdext.inc" |
| 24 %include "jdct.inc" | 24 %include "jdct.inc" |
| 25 | 25 |
| 26 ; -------------------------------------------------------------------------- | 26 ; -------------------------------------------------------------------------- |
| 27 SECTION SEG_CONST | 27 SECTION SEG_CONST |
| 28 | 28 |
| 29 alignz 16 | 29 alignz 16 |
| 30 global EXTN(jconst_idct_float_3dnow) | 30 global EXTN(jconst_idct_float_3dnow) PRIVATE |
| 31 | 31 |
| 32 EXTN(jconst_idct_float_3dnow): | 32 EXTN(jconst_idct_float_3dnow): |
| 33 | 33 |
| 34 PD_1_414 times 2 dd 1.414213562373095048801689 | 34 PD_1_414 times 2 dd 1.414213562373095048801689 |
| 35 PD_1_847 times 2 dd 1.847759065022573512256366 | 35 PD_1_847 times 2 dd 1.847759065022573512256366 |
| 36 PD_1_082 times 2 dd 1.082392200292393968799446 | 36 PD_1_082 times 2 dd 1.082392200292393968799446 |
| 37 PD_2_613 times 2 dd 2.613125929752753055713286 | 37 PD_2_613 times 2 dd 2.613125929752753055713286 |
| 38 PD_RNDINT_MAGIC times 2 dd 100663296.0 ; (float)(0x00C00000 << 3) | 38 PD_RNDINT_MAGIC times 2 dd 100663296.0 ; (float)(0x00C00000 << 3) |
| 39 PB_CENTERJSAMP times 8 db CENTERJSAMPLE | 39 PB_CENTERJSAMP times 8 db CENTERJSAMPLE |
| 40 | 40 |
| (...skipping 15 matching lines...) Expand all Loading... |
| 56 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf | 56 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf |
| 57 %define output_col(b) (b)+20 ; JDIMENSION output_col | 57 %define output_col(b) (b)+20 ; JDIMENSION output_col |
| 58 | 58 |
| 59 %define original_ebp ebp+0 | 59 %define original_ebp ebp+0 |
| 60 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] | 60 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] |
| 61 %define WK_NUM 2 | 61 %define WK_NUM 2 |
| 62 %define workspace wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT | 62 %define workspace wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT |
| 63 ; FAST_FLOAT workspace[DCTSIZE2] | 63 ; FAST_FLOAT workspace[DCTSIZE2] |
| 64 | 64 |
| 65 align 16 | 65 align 16 |
| 66 global EXTN(jsimd_idct_float_3dnow) | 66 global EXTN(jsimd_idct_float_3dnow) PRIVATE |
| 67 | 67 |
| 68 EXTN(jsimd_idct_float_3dnow): | 68 EXTN(jsimd_idct_float_3dnow): |
| 69 push ebp | 69 push ebp |
| 70 mov eax,esp ; eax = original ebp | 70 mov eax,esp ; eax = original ebp |
| 71 sub esp, byte 4 | 71 sub esp, byte 4 |
| 72 and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits | 72 and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits |
| 73 mov [esp],eax | 73 mov [esp],eax |
| 74 mov ebp,esp ; ebp = aligned ebp | 74 mov ebp,esp ; ebp = aligned ebp |
| 75 lea esp, [workspace] | 75 lea esp, [workspace] |
| 76 push ebx | 76 push ebx |
| (...skipping 366 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 443 ; pop ecx ; need not be preserved | 443 ; pop ecx ; need not be preserved |
| 444 pop ebx | 444 pop ebx |
| 445 mov esp,ebp ; esp <- aligned ebp | 445 mov esp,ebp ; esp <- aligned ebp |
| 446 pop esp ; esp <- original ebp | 446 pop esp ; esp <- original ebp |
| 447 pop ebp | 447 pop ebp |
| 448 ret | 448 ret |
| 449 | 449 |
| 450 ; For some reason, the OS X linker does not honor the request to align the | 450 ; For some reason, the OS X linker does not honor the request to align the |
| 451 ; segment unless we do this. | 451 ; segment unless we do this. |
| 452 align 16 | 452 align 16 |
| OLD | NEW |