OLD | NEW |
1 ; | 1 ; |
2 ; jiss2red.asm - reduced-size IDCT (SSE2) | 2 ; jiss2red.asm - reduced-size IDCT (SSE2) |
3 ; | 3 ; |
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
5 ; | 5 ; |
6 ; Based on | 6 ; Based on |
7 ; x86 SIMD extension for IJG JPEG library | 7 ; x86 SIMD extension for IJG JPEG library |
8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
10 ; | 10 ; |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
65 F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) | 65 F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) |
66 F_2_172 equ DESCALE(2332956230,30-CONST_BITS) ; FIX(2.172734803) | 66 F_2_172 equ DESCALE(2332956230,30-CONST_BITS) ; FIX(2.172734803) |
67 F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) | 67 F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) |
68 F_3_624 equ DESCALE(3891787747,30-CONST_BITS) ; FIX(3.624509785) | 68 F_3_624 equ DESCALE(3891787747,30-CONST_BITS) ; FIX(3.624509785) |
69 %endif | 69 %endif |
70 | 70 |
71 ; -------------------------------------------------------------------------- | 71 ; -------------------------------------------------------------------------- |
72 SECTION SEG_CONST | 72 SECTION SEG_CONST |
73 | 73 |
74 alignz 16 | 74 alignz 16 |
75 » global» EXTN(jconst_idct_red_sse2) | 75 » global» EXTN(jconst_idct_red_sse2) PRIVATE |
76 | 76 |
77 EXTN(jconst_idct_red_sse2): | 77 EXTN(jconst_idct_red_sse2): |
78 | 78 |
79 PW_F184_MF076 times 4 dw F_1_847,-F_0_765 | 79 PW_F184_MF076 times 4 dw F_1_847,-F_0_765 |
80 PW_F256_F089 times 4 dw F_2_562, F_0_899 | 80 PW_F256_F089 times 4 dw F_2_562, F_0_899 |
81 PW_F106_MF217 times 4 dw F_1_061,-F_2_172 | 81 PW_F106_MF217 times 4 dw F_1_061,-F_2_172 |
82 PW_MF060_MF050 times 4 dw -F_0_601,-F_0_509 | 82 PW_MF060_MF050 times 4 dw -F_0_601,-F_0_509 |
83 PW_F145_MF021 times 4 dw F_1_451,-F_0_211 | 83 PW_F145_MF021 times 4 dw F_1_451,-F_0_211 |
84 PW_F362_MF127 times 4 dw F_3_624,-F_1_272 | 84 PW_F362_MF127 times 4 dw F_3_624,-F_1_272 |
85 PW_F085_MF072 times 4 dw F_0_850,-F_0_720 | 85 PW_F085_MF072 times 4 dw F_0_850,-F_0_720 |
(...skipping 20 matching lines...) Expand all Loading... |
106 %define dct_table(b) (b)+8 ; void * dct_table | 106 %define dct_table(b) (b)+8 ; void * dct_table |
107 %define coef_block(b) (b)+12 ; JCOEFPTR coef_block | 107 %define coef_block(b) (b)+12 ; JCOEFPTR coef_block |
108 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf | 108 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf |
109 %define output_col(b) (b)+20 ; JDIMENSION output_col | 109 %define output_col(b) (b)+20 ; JDIMENSION output_col |
110 | 110 |
111 %define original_ebp ebp+0 | 111 %define original_ebp ebp+0 |
112 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] | 112 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] |
113 %define WK_NUM 2 | 113 %define WK_NUM 2 |
114 | 114 |
115 align 16 | 115 align 16 |
116 » global» EXTN(jsimd_idct_4x4_sse2) | 116 » global» EXTN(jsimd_idct_4x4_sse2) PRIVATE |
117 | 117 |
118 EXTN(jsimd_idct_4x4_sse2): | 118 EXTN(jsimd_idct_4x4_sse2): |
119 push ebp | 119 push ebp |
120 mov eax,esp ; eax = original ebp | 120 mov eax,esp ; eax = original ebp |
121 sub esp, byte 4 | 121 sub esp, byte 4 |
122 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits | 122 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits |
123 mov [esp],eax | 123 mov [esp],eax |
124 mov ebp,esp ; ebp = aligned ebp | 124 mov ebp,esp ; ebp = aligned ebp |
125 lea esp, [wk(0)] | 125 lea esp, [wk(0)] |
126 pushpic ebx | 126 pushpic ebx |
(...skipping 290 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
417 ; jsimd_idct_2x2_sse2 (void * dct_table, JCOEFPTR coef_block, | 417 ; jsimd_idct_2x2_sse2 (void * dct_table, JCOEFPTR coef_block, |
418 ; JSAMPARRAY output_buf, JDIMENSION output_col) | 418 ; JSAMPARRAY output_buf, JDIMENSION output_col) |
419 ; | 419 ; |
420 | 420 |
421 %define dct_table(b) (b)+8 ; void * dct_table | 421 %define dct_table(b) (b)+8 ; void * dct_table |
422 %define coef_block(b) (b)+12 ; JCOEFPTR coef_block | 422 %define coef_block(b) (b)+12 ; JCOEFPTR coef_block |
423 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf | 423 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf |
424 %define output_col(b) (b)+20 ; JDIMENSION output_col | 424 %define output_col(b) (b)+20 ; JDIMENSION output_col |
425 | 425 |
426 align 16 | 426 align 16 |
427 » global» EXTN(jsimd_idct_2x2_sse2) | 427 » global» EXTN(jsimd_idct_2x2_sse2) PRIVATE |
428 | 428 |
429 EXTN(jsimd_idct_2x2_sse2): | 429 EXTN(jsimd_idct_2x2_sse2): |
430 push ebp | 430 push ebp |
431 mov ebp,esp | 431 mov ebp,esp |
432 push ebx | 432 push ebx |
433 ; push ecx ; need not be preserved | 433 ; push ecx ; need not be preserved |
434 ; push edx ; need not be preserved | 434 ; push edx ; need not be preserved |
435 push esi | 435 push esi |
436 push edi | 436 push edi |
437 | 437 |
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
585 pop esi | 585 pop esi |
586 ; pop edx ; need not be preserved | 586 ; pop edx ; need not be preserved |
587 ; pop ecx ; need not be preserved | 587 ; pop ecx ; need not be preserved |
588 pop ebx | 588 pop ebx |
589 pop ebp | 589 pop ebp |
590 ret | 590 ret |
591 | 591 |
592 ; For some reason, the OS X linker does not honor the request to align the | 592 ; For some reason, the OS X linker does not honor the request to align the |
593 ; segment unless we do this. | 593 ; segment unless we do this. |
594 align 16 | 594 align 16 |
OLD | NEW |