| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; jimmxfst.asm - fast integer IDCT (MMX) | 2 ; jimmxfst.asm - fast integer IDCT (MMX) |
| 3 ; | 3 ; |
| 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 ; | 5 ; |
| 6 ; Based on | 6 ; Based on |
| 7 ; x86 SIMD extension for IJG JPEG library | 7 ; x86 SIMD extension for IJG JPEG library |
| 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
| 10 ; | 10 ; |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 52 ; -------------------------------------------------------------------------- | 52 ; -------------------------------------------------------------------------- |
| 53 SECTION SEG_CONST | 53 SECTION SEG_CONST |
| 54 | 54 |
| 55 ; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) | 55 ; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) |
| 56 ; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) | 56 ; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) |
| 57 | 57 |
| 58 %define PRE_MULTIPLY_SCALE_BITS 2 | 58 %define PRE_MULTIPLY_SCALE_BITS 2 |
| 59 %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) | 59 %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
| 60 | 60 |
| 61 alignz 16 | 61 alignz 16 |
| 62 » global» EXTN(jconst_idct_ifast_mmx) | 62 » global» EXTN(jconst_idct_ifast_mmx) PRIVATE |
| 63 | 63 |
| 64 EXTN(jconst_idct_ifast_mmx): | 64 EXTN(jconst_idct_ifast_mmx): |
| 65 | 65 |
| 66 PW_F1414 times 4 dw F_1_414 << CONST_SHIFT | 66 PW_F1414 times 4 dw F_1_414 << CONST_SHIFT |
| 67 PW_F1847 times 4 dw F_1_847 << CONST_SHIFT | 67 PW_F1847 times 4 dw F_1_847 << CONST_SHIFT |
| 68 PW_MF1613 times 4 dw -F_1_613 << CONST_SHIFT | 68 PW_MF1613 times 4 dw -F_1_613 << CONST_SHIFT |
| 69 PW_F1082 times 4 dw F_1_082 << CONST_SHIFT | 69 PW_F1082 times 4 dw F_1_082 << CONST_SHIFT |
| 70 PB_CENTERJSAMP times 8 db CENTERJSAMPLE | 70 PB_CENTERJSAMP times 8 db CENTERJSAMPLE |
| 71 | 71 |
| 72 alignz 16 | 72 alignz 16 |
| (...skipping 14 matching lines...) Expand all Loading... |
| 87 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf | 87 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf |
| 88 %define output_col(b) (b)+20 ; JDIMENSION output_col | 88 %define output_col(b) (b)+20 ; JDIMENSION output_col |
| 89 | 89 |
| 90 %define original_ebp ebp+0 | 90 %define original_ebp ebp+0 |
| 91 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] | 91 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] |
| 92 %define WK_NUM 2 | 92 %define WK_NUM 2 |
| 93 %define workspace wk(0)-DCTSIZE2*SIZEOF_JCOEF | 93 %define workspace wk(0)-DCTSIZE2*SIZEOF_JCOEF |
| 94 ; JCOEF workspace[DCTSIZE2] | 94 ; JCOEF workspace[DCTSIZE2] |
| 95 | 95 |
| 96 align 16 | 96 align 16 |
| 97 » global» EXTN(jsimd_idct_ifast_mmx) | 97 » global» EXTN(jsimd_idct_ifast_mmx) PRIVATE |
| 98 | 98 |
| 99 EXTN(jsimd_idct_ifast_mmx): | 99 EXTN(jsimd_idct_ifast_mmx): |
| 100 push ebp | 100 push ebp |
| 101 mov eax,esp ; eax = original ebp | 101 mov eax,esp ; eax = original ebp |
| 102 sub esp, byte 4 | 102 sub esp, byte 4 |
| 103 and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits | 103 and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits |
| 104 mov [esp],eax | 104 mov [esp],eax |
| 105 mov ebp,esp ; ebp = aligned ebp | 105 mov ebp,esp ; ebp = aligned ebp |
| 106 lea esp, [workspace] | 106 lea esp, [workspace] |
| 107 push ebx | 107 push ebx |
| (...skipping 383 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 491 ; pop ecx ; need not be preserved | 491 ; pop ecx ; need not be preserved |
| 492 pop ebx | 492 pop ebx |
| 493 mov esp,ebp ; esp <- aligned ebp | 493 mov esp,ebp ; esp <- aligned ebp |
| 494 pop esp ; esp <- original ebp | 494 pop esp ; esp <- original ebp |
| 495 pop ebp | 495 pop ebp |
| 496 ret | 496 ret |
| 497 | 497 |
| 498 ; For some reason, the OS X linker does not honor the request to align the | 498 ; For some reason, the OS X linker does not honor the request to align the |
| 499 ; segment unless we do this. | 499 ; segment unless we do this. |
| 500 align 16 | 500 align 16 |
| OLD | NEW |