| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; jidctfst.asm - fast integer IDCT (SSE2) | 2 ; jidctfst.asm - fast integer IDCT (SSE2) |
| 3 ; | 3 ; |
| 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 ; | 5 ; |
| 6 ; Based on | 6 ; Based on |
| 7 ; x86 SIMD extension for IJG JPEG library | 7 ; x86 SIMD extension for IJG JPEG library |
| 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
| 10 ; | 10 ; |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 52 ; -------------------------------------------------------------------------- | 52 ; -------------------------------------------------------------------------- |
| 53 SECTION SEG_CONST | 53 SECTION SEG_CONST |
| 54 | 54 |
| 55 ; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) | 55 ; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) |
| 56 ; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) | 56 ; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) |
| 57 | 57 |
| 58 %define PRE_MULTIPLY_SCALE_BITS 2 | 58 %define PRE_MULTIPLY_SCALE_BITS 2 |
| 59 %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) | 59 %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
| 60 | 60 |
| 61 alignz 16 | 61 alignz 16 |
| 62 global EXTN(jconst_idct_ifast_sse2) | 62 global EXTN(jconst_idct_ifast_sse2) PRIVATE |
| 63 | 63 |
| 64 EXTN(jconst_idct_ifast_sse2): | 64 EXTN(jconst_idct_ifast_sse2): |
| 65 | 65 |
| 66 PW_F1414 times 8 dw F_1_414 << CONST_SHIFT | 66 PW_F1414 times 8 dw F_1_414 << CONST_SHIFT |
| 67 PW_F1847 times 8 dw F_1_847 << CONST_SHIFT | 67 PW_F1847 times 8 dw F_1_847 << CONST_SHIFT |
| 68 PW_MF1613 times 8 dw -F_1_613 << CONST_SHIFT | 68 PW_MF1613 times 8 dw -F_1_613 << CONST_SHIFT |
| 69 PW_F1082 times 8 dw F_1_082 << CONST_SHIFT | 69 PW_F1082 times 8 dw F_1_082 << CONST_SHIFT |
| 70 PB_CENTERJSAMP times 16 db CENTERJSAMPLE | 70 PB_CENTERJSAMP times 16 db CENTERJSAMPLE |
| 71 | 71 |
| 72 alignz 16 | 72 alignz 16 |
| (...skipping 12 matching lines...) Expand all Loading... |
| 85 %define dct_table(b) (b)+8 ; jpeg_component_info *compptr | 85 %define dct_table(b) (b)+8 ; jpeg_component_info *compptr |
| 86 %define coef_block(b) (b)+12 ; JCOEFPTR coef_block | 86 %define coef_block(b) (b)+12 ; JCOEFPTR coef_block |
| 87 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf | 87 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf |
| 88 %define output_col(b) (b)+20 ; JDIMENSION output_col | 88 %define output_col(b) (b)+20 ; JDIMENSION output_col |
| 89 | 89 |
| 90 %define original_ebp ebp+0 | 90 %define original_ebp ebp+0 |
| 91 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] | 91 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] |
| 92 %define WK_NUM 2 | 92 %define WK_NUM 2 |
| 93 | 93 |
| 94 align 16 | 94 align 16 |
| 95 global EXTN(jsimd_idct_ifast_sse2) | 95 global EXTN(jsimd_idct_ifast_sse2) PRIVATE |
| 96 | 96 |
| 97 EXTN(jsimd_idct_ifast_sse2): | 97 EXTN(jsimd_idct_ifast_sse2): |
| 98 push ebp | 98 push ebp |
| 99 mov eax,esp ; eax = original ebp | 99 mov eax,esp ; eax = original ebp |
| 100 sub esp, byte 4 | 100 sub esp, byte 4 |
| 101 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits | 101 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits |
| 102 mov [esp],eax | 102 mov [esp],eax |
| 103 mov ebp,esp ; ebp = aligned ebp | 103 mov ebp,esp ; ebp = aligned ebp |
| 104 lea esp, [wk(0)] | 104 lea esp, [wk(0)] |
| 105 pushpic ebx | 105 pushpic ebx |
| (...skipping 387 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 493 ; pop ecx ; unused | 493 ; pop ecx ; unused |
| 494 poppic ebx | 494 poppic ebx |
| 495 mov esp,ebp ; esp <- aligned ebp | 495 mov esp,ebp ; esp <- aligned ebp |
| 496 pop esp ; esp <- original ebp | 496 pop esp ; esp <- original ebp |
| 497 pop ebp | 497 pop ebp |
| 498 ret | 498 ret |
| 499 | 499 |
| 500 ; For some reason, the OS X linker does not honor the request to align the | 500 ; For some reason, the OS X linker does not honor the request to align the |
| 501 ; segment unless we do this. | 501 ; segment unless we do this. |
| 502 align 16 | 502 align 16 |
| OLD | NEW |