OLD | NEW |
1 ; | 1 ; |
2 ; jchuff-sse2.asm - Huffman entropy encoding (SSE2) | 2 ; jchuff-sse2.asm - Huffman entropy encoding (SSE2) |
3 ; | 3 ; |
4 ; Copyright 2009-2011, 2014-2016 D. R. Commander. | 4 ; Copyright 2009-2011, 2014-2016 D. R. Commander. |
5 ; Copyright 2015 Matthieu Darbois | 5 ; Copyright 2015 Matthieu Darbois |
6 ; | 6 ; |
7 ; Based on | 7 ; Based on |
8 ; x86 SIMD extension for IJG JPEG library | 8 ; x86 SIMD extension for IJG JPEG library |
9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
11 ; | 11 ; |
12 ; This file should be assembled with NASM (Netwide Assembler), | 12 ; This file should be assembled with NASM (Netwide Assembler), |
13 ; can *not* be assembled with Microsoft's MASM or any compatible | 13 ; can *not* be assembled with Microsoft's MASM or any compatible |
14 ; assembler (including Borland's Turbo Assembler). | 14 ; assembler (including Borland's Turbo Assembler). |
15 ; NASM is available from http://nasm.sourceforge.net/ or | 15 ; NASM is available from http://nasm.sourceforge.net/ or |
16 ; http://sourceforge.net/project/showfiles.php?group_id=6208 | 16 ; http://sourceforge.net/project/showfiles.php?group_id=6208 |
17 ; | 17 ; |
18 ; This file contains an SSE2 implementation for Huffman coding of one block. | 18 ; This file contains an SSE2 implementation for Huffman coding of one block. |
19 ; The following code is based directly on jchuff.c; see jchuff.c for more | 19 ; The following code is based directly on jchuff.c; see jchuff.c for more |
20 ; details. | 20 ; details. |
21 ; | 21 ; |
22 ; [TAB8] | 22 ; [TAB8] |
23 | 23 |
24 %include "jsimdext.inc" | 24 %include "jsimdext.inc" |
25 | 25 |
26 ; -------------------------------------------------------------------------- | 26 ; -------------------------------------------------------------------------- |
27 SECTION SEG_CONST | 27 SECTION SEG_CONST |
28 | 28 |
29 alignz 16 | 29 alignz 16 |
30 global EXTN(jconst_huff_encode_one_block) | 30 global EXTN(jconst_huff_encode_one_block) PRIVATE |
31 | 31 |
32 EXTN(jconst_huff_encode_one_block): | 32 EXTN(jconst_huff_encode_one_block): |
33 | 33 |
34 %include "jpeg_nbits_table.inc" | 34 %include "jpeg_nbits_table.inc" |
35 | 35 |
36 alignz 16 | 36 alignz 16 |
37 | 37 |
38 ; -------------------------------------------------------------------------- | 38 ; -------------------------------------------------------------------------- |
39 SECTION SEG_TEXT | 39 SECTION SEG_TEXT |
40 BITS 32 | 40 BITS 32 |
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
173 %define temp buffer+SIZEOF_DWORD | 173 %define temp buffer+SIZEOF_DWORD |
174 %define temp2 temp+SIZEOF_DWORD | 174 %define temp2 temp+SIZEOF_DWORD |
175 %define temp3 temp2+SIZEOF_DWORD | 175 %define temp3 temp2+SIZEOF_DWORD |
176 %define temp4 temp3+SIZEOF_DWORD | 176 %define temp4 temp3+SIZEOF_DWORD |
177 %define temp5 temp4+SIZEOF_DWORD | 177 %define temp5 temp4+SIZEOF_DWORD |
178 %define gotptr temp5+SIZEOF_DWORD ; void *gotptr | 178 %define gotptr temp5+SIZEOF_DWORD ; void *gotptr |
179 %define put_buffer ebx | 179 %define put_buffer ebx |
180 %define put_bits edi | 180 %define put_bits edi |
181 | 181 |
182 align 16 | 182 align 16 |
183 global EXTN(jsimd_huff_encode_one_block_sse2) | 183 global EXTN(jsimd_huff_encode_one_block_sse2) PRIVATE |
184 | 184 |
185 EXTN(jsimd_huff_encode_one_block_sse2): | 185 EXTN(jsimd_huff_encode_one_block_sse2): |
186 push ebp | 186 push ebp |
187 mov eax,esp ; eax = original ebp | 187 mov eax,esp ; eax = original ebp |
188 sub esp, byte 4 | 188 sub esp, byte 4 |
189 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits | 189 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits |
190 mov [esp],eax | 190 mov [esp],eax |
191 mov ebp,esp ; ebp = aligned ebp | 191 mov ebp,esp ; ebp = aligned ebp |
192 sub esp, temp5+9*SIZEOF_DWORD-pad | 192 sub esp, temp5+9*SIZEOF_DWORD-pad |
193 push ebx | 193 push ebx |
(...skipping 224 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
418 pop ecx | 418 pop ecx |
419 pop ebx | 419 pop ebx |
420 mov esp,ebp ; esp <- aligned ebp | 420 mov esp,ebp ; esp <- aligned ebp |
421 pop esp ; esp <- original ebp | 421 pop esp ; esp <- original ebp |
422 pop ebp | 422 pop ebp |
423 ret | 423 ret |
424 | 424 |
425 ; For some reason, the OS X linker does not honor the request to align the | 425 ; For some reason, the OS X linker does not honor the request to align the |
426 ; segment unless we do this. | 426 ; segment unless we do this. |
427 align 16 | 427 align 16 |
OLD | NEW |