| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; jchuff-sse2.asm - Huffman entropy encoding (SSE2) | 2 ; jchuff-sse2.asm - Huffman entropy encoding (SSE2) |
| 3 ; | 3 ; |
| 4 ; Copyright 2009-2011, 2014-2016 D. R. Commander. | 4 ; Copyright 2009-2011, 2014-2016 D. R. Commander. |
| 5 ; Copyright 2015 Matthieu Darbois | 5 ; Copyright 2015 Matthieu Darbois |
| 6 ; | 6 ; |
| 7 ; Based on | 7 ; Based on |
| 8 ; x86 SIMD extension for IJG JPEG library | 8 ; x86 SIMD extension for IJG JPEG library |
| 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
| 11 ; | 11 ; |
| 12 ; This file should be assembled with NASM (Netwide Assembler), | 12 ; This file should be assembled with NASM (Netwide Assembler), |
| 13 ; can *not* be assembled with Microsoft's MASM or any compatible | 13 ; can *not* be assembled with Microsoft's MASM or any compatible |
| 14 ; assembler (including Borland's Turbo Assembler). | 14 ; assembler (including Borland's Turbo Assembler). |
| 15 ; NASM is available from http://nasm.sourceforge.net/ or | 15 ; NASM is available from http://nasm.sourceforge.net/ or |
| 16 ; http://sourceforge.net/project/showfiles.php?group_id=6208 | 16 ; http://sourceforge.net/project/showfiles.php?group_id=6208 |
| 17 ; | 17 ; |
| 18 ; This file contains an SSE2 implementation for Huffman coding of one block. | 18 ; This file contains an SSE2 implementation for Huffman coding of one block. |
| 19 ; The following code is based directly on jchuff.c; see jchuff.c for more | 19 ; The following code is based directly on jchuff.c; see jchuff.c for more |
| 20 ; details. | 20 ; details. |
| 21 ; | 21 ; |
| 22 ; [TAB8] | 22 ; [TAB8] |
| 23 | 23 |
| 24 %include "jsimdext.inc" | 24 %include "jsimdext.inc" |
| 25 | 25 |
| 26 ; -------------------------------------------------------------------------- | 26 ; -------------------------------------------------------------------------- |
| 27 SECTION SEG_CONST | 27 SECTION SEG_CONST |
| 28 | 28 |
| 29 alignz 16 | 29 alignz 16 |
| 30 global EXTN(jconst_huff_encode_one_block) | 30 global EXTN(jconst_huff_encode_one_block) PRIVATE |
| 31 | 31 |
| 32 EXTN(jconst_huff_encode_one_block): | 32 EXTN(jconst_huff_encode_one_block): |
| 33 | 33 |
| 34 %include "jpeg_nbits_table.inc" | 34 %include "jpeg_nbits_table.inc" |
| 35 | 35 |
| 36 alignz 16 | 36 alignz 16 |
| 37 | 37 |
| 38 ; -------------------------------------------------------------------------- | 38 ; -------------------------------------------------------------------------- |
| 39 SECTION SEG_TEXT | 39 SECTION SEG_TEXT |
| 40 BITS 32 | 40 BITS 32 |
| (...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 173 %define temp buffer+SIZEOF_DWORD | 173 %define temp buffer+SIZEOF_DWORD |
| 174 %define temp2 temp+SIZEOF_DWORD | 174 %define temp2 temp+SIZEOF_DWORD |
| 175 %define temp3 temp2+SIZEOF_DWORD | 175 %define temp3 temp2+SIZEOF_DWORD |
| 176 %define temp4 temp3+SIZEOF_DWORD | 176 %define temp4 temp3+SIZEOF_DWORD |
| 177 %define temp5 temp4+SIZEOF_DWORD | 177 %define temp5 temp4+SIZEOF_DWORD |
| 178 %define gotptr temp5+SIZEOF_DWORD ; void *gotptr | 178 %define gotptr temp5+SIZEOF_DWORD ; void *gotptr |
| 179 %define put_buffer ebx | 179 %define put_buffer ebx |
| 180 %define put_bits edi | 180 %define put_bits edi |
| 181 | 181 |
| 182 align 16 | 182 align 16 |
| 183 global EXTN(jsimd_huff_encode_one_block_sse2) | 183 global EXTN(jsimd_huff_encode_one_block_sse2) PRIVATE |
| 184 | 184 |
| 185 EXTN(jsimd_huff_encode_one_block_sse2): | 185 EXTN(jsimd_huff_encode_one_block_sse2): |
| 186 push ebp | 186 push ebp |
| 187 mov eax,esp ; eax = original ebp | 187 mov eax,esp ; eax = original ebp |
| 188 sub esp, byte 4 | 188 sub esp, byte 4 |
| 189 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits | 189 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits |
| 190 mov [esp],eax | 190 mov [esp],eax |
| 191 mov ebp,esp ; ebp = aligned ebp | 191 mov ebp,esp ; ebp = aligned ebp |
| 192 sub esp, temp5+9*SIZEOF_DWORD-pad | 192 sub esp, temp5+9*SIZEOF_DWORD-pad |
| 193 push ebx | 193 push ebx |
| (...skipping 224 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 418 pop ecx | 418 pop ecx |
| 419 pop ebx | 419 pop ebx |
| 420 mov esp,ebp ; esp <- aligned ebp | 420 mov esp,ebp ; esp <- aligned ebp |
| 421 pop esp ; esp <- original ebp | 421 pop esp ; esp <- original ebp |
| 422 pop ebp | 422 pop ebp |
| 423 ret | 423 ret |
| 424 | 424 |
| 425 ; For some reason, the OS X linker does not honor the request to align the | 425 ; For some reason, the OS X linker does not honor the request to align the |
| 426 ; segment unless we do this. | 426 ; segment unless we do this. |
| 427 align 16 | 427 align 16 |
| OLD | NEW |