| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; jidctred.asm - reduced-size IDCT (SSE2) | 2 ; jidctred.asm - reduced-size IDCT (SSE2) |
| 3 ; | 3 ; |
| 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 ; | 5 ; |
| 6 ; Based on | 6 ; Based on |
| 7 ; x86 SIMD extension for IJG JPEG library | 7 ; x86 SIMD extension for IJG JPEG library |
| 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
| 10 ; | 10 ; |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 65 F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) | 65 F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) |
| 66 F_2_172 equ DESCALE(2332956230,30-CONST_BITS) ; FIX(2.172734803) | 66 F_2_172 equ DESCALE(2332956230,30-CONST_BITS) ; FIX(2.172734803) |
| 67 F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) | 67 F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) |
| 68 F_3_624 equ DESCALE(3891787747,30-CONST_BITS) ; FIX(3.624509785) | 68 F_3_624 equ DESCALE(3891787747,30-CONST_BITS) ; FIX(3.624509785) |
| 69 %endif | 69 %endif |
| 70 | 70 |
| 71 ; -------------------------------------------------------------------------- | 71 ; -------------------------------------------------------------------------- |
| 72 SECTION SEG_CONST | 72 SECTION SEG_CONST |
| 73 | 73 |
| 74 alignz 16 | 74 alignz 16 |
| 75 global EXTN(jconst_idct_red_sse2) | 75 global EXTN(jconst_idct_red_sse2) PRIVATE |
| 76 | 76 |
| 77 EXTN(jconst_idct_red_sse2): | 77 EXTN(jconst_idct_red_sse2): |
| 78 | 78 |
| 79 PW_F184_MF076 times 4 dw F_1_847,-F_0_765 | 79 PW_F184_MF076 times 4 dw F_1_847,-F_0_765 |
| 80 PW_F256_F089 times 4 dw F_2_562, F_0_899 | 80 PW_F256_F089 times 4 dw F_2_562, F_0_899 |
| 81 PW_F106_MF217 times 4 dw F_1_061,-F_2_172 | 81 PW_F106_MF217 times 4 dw F_1_061,-F_2_172 |
| 82 PW_MF060_MF050 times 4 dw -F_0_601,-F_0_509 | 82 PW_MF060_MF050 times 4 dw -F_0_601,-F_0_509 |
| 83 PW_F145_MF021 times 4 dw F_1_451,-F_0_211 | 83 PW_F145_MF021 times 4 dw F_1_451,-F_0_211 |
| 84 PW_F362_MF127 times 4 dw F_3_624,-F_1_272 | 84 PW_F362_MF127 times 4 dw F_3_624,-F_1_272 |
| 85 PW_F085_MF072 times 4 dw F_0_850,-F_0_720 | 85 PW_F085_MF072 times 4 dw F_0_850,-F_0_720 |
| (...skipping 20 matching lines...) Expand all Loading... |
| 106 %define dct_table(b) (b)+8 ; void *dct_table | 106 %define dct_table(b) (b)+8 ; void *dct_table |
| 107 %define coef_block(b) (b)+12 ; JCOEFPTR coef_block | 107 %define coef_block(b) (b)+12 ; JCOEFPTR coef_block |
| 108 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf | 108 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf |
| 109 %define output_col(b) (b)+20 ; JDIMENSION output_col | 109 %define output_col(b) (b)+20 ; JDIMENSION output_col |
| 110 | 110 |
| 111 %define original_ebp ebp+0 | 111 %define original_ebp ebp+0 |
| 112 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] | 112 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] |
| 113 %define WK_NUM 2 | 113 %define WK_NUM 2 |
| 114 | 114 |
| 115 align 16 | 115 align 16 |
| 116 global EXTN(jsimd_idct_4x4_sse2) | 116 global EXTN(jsimd_idct_4x4_sse2) PRIVATE |
| 117 | 117 |
| 118 EXTN(jsimd_idct_4x4_sse2): | 118 EXTN(jsimd_idct_4x4_sse2): |
| 119 push ebp | 119 push ebp |
| 120 mov eax,esp ; eax = original ebp | 120 mov eax,esp ; eax = original ebp |
| 121 sub esp, byte 4 | 121 sub esp, byte 4 |
| 122 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits | 122 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits |
| 123 mov [esp],eax | 123 mov [esp],eax |
| 124 mov ebp,esp ; ebp = aligned ebp | 124 mov ebp,esp ; ebp = aligned ebp |
| 125 lea esp, [wk(0)] | 125 lea esp, [wk(0)] |
| 126 pushpic ebx | 126 pushpic ebx |
| (...skipping 290 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 417 ; jsimd_idct_2x2_sse2 (void *dct_table, JCOEFPTR coef_block, | 417 ; jsimd_idct_2x2_sse2 (void *dct_table, JCOEFPTR coef_block, |
| 418 ; JSAMPARRAY output_buf, JDIMENSION output_col) | 418 ; JSAMPARRAY output_buf, JDIMENSION output_col) |
| 419 ; | 419 ; |
| 420 | 420 |
| 421 %define dct_table(b) (b)+8 ; void *dct_table | 421 %define dct_table(b) (b)+8 ; void *dct_table |
| 422 %define coef_block(b) (b)+12 ; JCOEFPTR coef_block | 422 %define coef_block(b) (b)+12 ; JCOEFPTR coef_block |
| 423 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf | 423 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf |
| 424 %define output_col(b) (b)+20 ; JDIMENSION output_col | 424 %define output_col(b) (b)+20 ; JDIMENSION output_col |
| 425 | 425 |
| 426 align 16 | 426 align 16 |
| 427 global EXTN(jsimd_idct_2x2_sse2) | 427 global EXTN(jsimd_idct_2x2_sse2) PRIVATE |
| 428 | 428 |
| 429 EXTN(jsimd_idct_2x2_sse2): | 429 EXTN(jsimd_idct_2x2_sse2): |
| 430 push ebp | 430 push ebp |
| 431 mov ebp,esp | 431 mov ebp,esp |
| 432 push ebx | 432 push ebx |
| 433 ; push ecx ; need not be preserved | 433 ; push ecx ; need not be preserved |
| 434 ; push edx ; need not be preserved | 434 ; push edx ; need not be preserved |
| 435 push esi | 435 push esi |
| 436 push edi | 436 push edi |
| 437 | 437 |
| (...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 585 pop esi | 585 pop esi |
| 586 ; pop edx ; need not be preserved | 586 ; pop edx ; need not be preserved |
| 587 ; pop ecx ; need not be preserved | 587 ; pop ecx ; need not be preserved |
| 588 pop ebx | 588 pop ebx |
| 589 pop ebp | 589 pop ebp |
| 590 ret | 590 ret |
| 591 | 591 |
| 592 ; For some reason, the OS X linker does not honor the request to align the | 592 ; For some reason, the OS X linker does not honor the request to align the |
| 593 ; segment unless we do this. | 593 ; segment unless we do this. |
| 594 align 16 | 594 align 16 |
| OLD | NEW |