OLD | NEW |
1 ; | 1 ; |
2 ; jiss2int.asm - accurate integer IDCT (SSE2) | 2 ; jiss2int.asm - accurate integer IDCT (SSE2) |
3 ; | 3 ; |
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
5 ; | 5 ; |
6 ; Based on | 6 ; Based on |
7 ; x86 SIMD extension for IJG JPEG library | 7 ; x86 SIMD extension for IJG JPEG library |
8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
10 ; | 10 ; |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
59 F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) | 59 F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) |
60 F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) | 60 F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) |
61 F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) | 61 F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) |
62 F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) | 62 F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) |
63 %endif | 63 %endif |
64 | 64 |
65 ; -------------------------------------------------------------------------- | 65 ; -------------------------------------------------------------------------- |
66 SECTION SEG_CONST | 66 SECTION SEG_CONST |
67 | 67 |
68 alignz 16 | 68 alignz 16 |
69 » global» EXTN(jconst_idct_islow_sse2) | 69 » global» EXTN(jconst_idct_islow_sse2) PRIVATE |
70 | 70 |
71 EXTN(jconst_idct_islow_sse2): | 71 EXTN(jconst_idct_islow_sse2): |
72 | 72 |
73 PW_F130_F054 times 4 dw (F_0_541+F_0_765), F_0_541 | 73 PW_F130_F054 times 4 dw (F_0_541+F_0_765), F_0_541 |
74 PW_F054_MF130 times 4 dw F_0_541, (F_0_541-F_1_847) | 74 PW_F054_MF130 times 4 dw F_0_541, (F_0_541-F_1_847) |
75 PW_MF078_F117 times 4 dw (F_1_175-F_1_961), F_1_175 | 75 PW_MF078_F117 times 4 dw (F_1_175-F_1_961), F_1_175 |
76 PW_F117_F078 times 4 dw F_1_175, (F_1_175-F_0_390) | 76 PW_F117_F078 times 4 dw F_1_175, (F_1_175-F_0_390) |
77 PW_MF060_MF089 times 4 dw (F_0_298-F_0_899),-F_0_899 | 77 PW_MF060_MF089 times 4 dw (F_0_298-F_0_899),-F_0_899 |
78 PW_MF089_F060 times 4 dw -F_0_899, (F_1_501-F_0_899) | 78 PW_MF089_F060 times 4 dw -F_0_899, (F_1_501-F_0_899) |
79 PW_MF050_MF256 times 4 dw (F_2_053-F_2_562),-F_2_562 | 79 PW_MF050_MF256 times 4 dw (F_2_053-F_2_562),-F_2_562 |
(...skipping 18 matching lines...) Expand all Loading... |
98 %define dct_table(b) (b)+8 ; jpeg_component_info * compptr | 98 %define dct_table(b) (b)+8 ; jpeg_component_info * compptr |
99 %define coef_block(b) (b)+12 ; JCOEFPTR coef_block | 99 %define coef_block(b) (b)+12 ; JCOEFPTR coef_block |
100 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf | 100 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf |
101 %define output_col(b) (b)+20 ; JDIMENSION output_col | 101 %define output_col(b) (b)+20 ; JDIMENSION output_col |
102 | 102 |
103 %define original_ebp ebp+0 | 103 %define original_ebp ebp+0 |
104 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] | 104 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] |
105 %define WK_NUM 12 | 105 %define WK_NUM 12 |
106 | 106 |
107 align 16 | 107 align 16 |
108 » global» EXTN(jsimd_idct_islow_sse2) | 108 » global» EXTN(jsimd_idct_islow_sse2) PRIVATE |
109 | 109 |
110 EXTN(jsimd_idct_islow_sse2): | 110 EXTN(jsimd_idct_islow_sse2): |
111 push ebp | 111 push ebp |
112 mov eax,esp ; eax = original ebp | 112 mov eax,esp ; eax = original ebp |
113 sub esp, byte 4 | 113 sub esp, byte 4 |
114 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits | 114 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits |
115 mov [esp],eax | 115 mov [esp],eax |
116 mov ebp,esp ; ebp = aligned ebp | 116 mov ebp,esp ; ebp = aligned ebp |
117 lea esp, [wk(0)] | 117 lea esp, [wk(0)] |
118 pushpic ebx | 118 pushpic ebx |
(...skipping 731 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
850 ; pop ecx ; unused | 850 ; pop ecx ; unused |
851 poppic ebx | 851 poppic ebx |
852 mov esp,ebp ; esp <- aligned ebp | 852 mov esp,ebp ; esp <- aligned ebp |
853 pop esp ; esp <- original ebp | 853 pop esp ; esp <- original ebp |
854 pop ebp | 854 pop ebp |
855 ret | 855 ret |
856 | 856 |
857 ; For some reason, the OS X linker does not honor the request to align the | 857 ; For some reason, the OS X linker does not honor the request to align the |
858 ; segment unless we do this. | 858 ; segment unless we do this. |
859 align 16 | 859 align 16 |
OLD | NEW |