OLD | NEW |
1 ; | 1 ; |
2 ; jiss2int-64.asm - accurate integer IDCT (64-bit SSE2) | 2 ; jiss2int-64.asm - accurate integer IDCT (64-bit SSE2) |
3 ; | 3 ; |
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
5 ; Copyright 2009 D. R. Commander | 5 ; Copyright 2009 D. R. Commander |
6 ; | 6 ; |
7 ; Based on | 7 ; Based on |
8 ; x86 SIMD extension for IJG JPEG library | 8 ; x86 SIMD extension for IJG JPEG library |
9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
60 F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) | 60 F_1_961 equ DESCALE(2106220350,30-CONST_BITS) ; FIX(1.961570560) |
61 F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) | 61 F_2_053 equ DESCALE(2204520673,30-CONST_BITS) ; FIX(2.053119869) |
62 F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) | 62 F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) |
63 F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) | 63 F_3_072 equ DESCALE(3299298341,30-CONST_BITS) ; FIX(3.072711026) |
64 %endif | 64 %endif |
65 | 65 |
66 ; -------------------------------------------------------------------------- | 66 ; -------------------------------------------------------------------------- |
67 SECTION SEG_CONST | 67 SECTION SEG_CONST |
68 | 68 |
69 alignz 16 | 69 alignz 16 |
70 » global» EXTN(jconst_idct_islow_sse2) | 70 » global» EXTN(jconst_idct_islow_sse2) PRIVATE |
71 | 71 |
72 EXTN(jconst_idct_islow_sse2): | 72 EXTN(jconst_idct_islow_sse2): |
73 | 73 |
74 PW_F130_F054 times 4 dw (F_0_541+F_0_765), F_0_541 | 74 PW_F130_F054 times 4 dw (F_0_541+F_0_765), F_0_541 |
75 PW_F054_MF130 times 4 dw F_0_541, (F_0_541-F_1_847) | 75 PW_F054_MF130 times 4 dw F_0_541, (F_0_541-F_1_847) |
76 PW_MF078_F117 times 4 dw (F_1_175-F_1_961), F_1_175 | 76 PW_MF078_F117 times 4 dw (F_1_175-F_1_961), F_1_175 |
77 PW_F117_F078 times 4 dw F_1_175, (F_1_175-F_0_390) | 77 PW_F117_F078 times 4 dw F_1_175, (F_1_175-F_0_390) |
78 PW_MF060_MF089 times 4 dw (F_0_298-F_0_899),-F_0_899 | 78 PW_MF060_MF089 times 4 dw (F_0_298-F_0_899),-F_0_899 |
79 PW_MF089_F060 times 4 dw -F_0_899, (F_1_501-F_0_899) | 79 PW_MF089_F060 times 4 dw -F_0_899, (F_1_501-F_0_899) |
80 PW_MF050_MF256 times 4 dw (F_2_053-F_2_562),-F_2_562 | 80 PW_MF050_MF256 times 4 dw (F_2_053-F_2_562),-F_2_562 |
(...skipping 18 matching lines...) Expand all Loading... |
99 ; r10 = jpeg_component_info * compptr | 99 ; r10 = jpeg_component_info * compptr |
100 ; r11 = JCOEFPTR coef_block | 100 ; r11 = JCOEFPTR coef_block |
101 ; r12 = JSAMPARRAY output_buf | 101 ; r12 = JSAMPARRAY output_buf |
102 ; r13 = JDIMENSION output_col | 102 ; r13 = JDIMENSION output_col |
103 | 103 |
104 %define original_rbp rbp+0 | 104 %define original_rbp rbp+0 |
105 %define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] | 105 %define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] |
106 %define WK_NUM 12 | 106 %define WK_NUM 12 |
107 | 107 |
108 align 16 | 108 align 16 |
109 » global» EXTN(jsimd_idct_islow_sse2) | 109 » global» EXTN(jsimd_idct_islow_sse2) PRIVATE |
110 | 110 |
111 EXTN(jsimd_idct_islow_sse2): | 111 EXTN(jsimd_idct_islow_sse2): |
112 push rbp | 112 push rbp |
113 mov rax,rsp ; rax = original rbp | 113 mov rax,rsp ; rax = original rbp |
114 sub rsp, byte 4 | 114 sub rsp, byte 4 |
115 and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits | 115 and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits |
116 mov [rsp],rax | 116 mov [rsp],rax |
117 mov rbp,rsp ; rbp = aligned rbp | 117 mov rbp,rsp ; rbp = aligned rbp |
118 lea rsp, [wk(0)] | 118 lea rsp, [wk(0)] |
119 collect_args | 119 collect_args |
(...skipping 719 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
839 | 839 |
840 uncollect_args | 840 uncollect_args |
841 mov rsp,rbp ; rsp <- aligned rbp | 841 mov rsp,rbp ; rsp <- aligned rbp |
842 pop rsp ; rsp <- original rbp | 842 pop rsp ; rsp <- original rbp |
843 pop rbp | 843 pop rbp |
844 ret | 844 ret |
845 | 845 |
846 ; For some reason, the OS X linker does not honor the request to align the | 846 ; For some reason, the OS X linker does not honor the request to align the |
847 ; segment unless we do this. | 847 ; segment unless we do this. |
848 align 16 | 848 align 16 |
OLD | NEW |