OLD | NEW |
1 ; | 1 ; |
2 ; jchuff-sse2-64.asm - Huffman entropy encoding (64-bit SSE2) | 2 ; jchuff-sse2-64.asm - Huffman entropy encoding (64-bit SSE2) |
3 ; | 3 ; |
4 ; Copyright 2009-2011, 2014-2016 D. R. Commander. | 4 ; Copyright 2009-2011, 2014-2016 D. R. Commander. |
5 ; Copyright 2015 Matthieu Darbois | 5 ; Copyright 2015 Matthieu Darbois |
6 ; | 6 ; |
7 ; Based on | 7 ; Based on |
8 ; x86 SIMD extension for IJG JPEG library | 8 ; x86 SIMD extension for IJG JPEG library |
9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
11 ; | 11 ; |
12 ; This file should be assembled with NASM (Netwide Assembler), | 12 ; This file should be assembled with NASM (Netwide Assembler), |
13 ; can *not* be assembled with Microsoft's MASM or any compatible | 13 ; can *not* be assembled with Microsoft's MASM or any compatible |
14 ; assembler (including Borland's Turbo Assembler). | 14 ; assembler (including Borland's Turbo Assembler). |
15 ; NASM is available from http://nasm.sourceforge.net/ or | 15 ; NASM is available from http://nasm.sourceforge.net/ or |
16 ; http://sourceforge.net/project/showfiles.php?group_id=6208 | 16 ; http://sourceforge.net/project/showfiles.php?group_id=6208 |
17 ; | 17 ; |
18 ; This file contains an SSE2 implementation for Huffman coding of one block. | 18 ; This file contains an SSE2 implementation for Huffman coding of one block. |
19 ; The following code is based directly on jchuff.c; see jchuff.c for more | 19 ; The following code is based directly on jchuff.c; see jchuff.c for more |
20 ; details. | 20 ; details. |
21 ; | 21 ; |
22 ; [TAB8] | 22 ; [TAB8] |
23 | 23 |
24 %include "jsimdext.inc" | 24 %include "jsimdext.inc" |
25 | 25 |
26 ; -------------------------------------------------------------------------- | 26 ; -------------------------------------------------------------------------- |
27 SECTION SEG_CONST | 27 SECTION SEG_CONST |
28 | 28 |
29 alignz 16 | 29 alignz 16 |
30 global EXTN(jconst_huff_encode_one_block) | 30 global EXTN(jconst_huff_encode_one_block) PRIVATE |
31 | 31 |
32 EXTN(jconst_huff_encode_one_block): | 32 EXTN(jconst_huff_encode_one_block): |
33 | 33 |
34 %include "jpeg_nbits_table.inc" | 34 %include "jpeg_nbits_table.inc" |
35 | 35 |
36 alignz 16 | 36 alignz 16 |
37 | 37 |
38 ; -------------------------------------------------------------------------- | 38 ; -------------------------------------------------------------------------- |
39 SECTION SEG_TEXT | 39 SECTION SEG_TEXT |
40 BITS 64 | 40 BITS 64 |
(...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
177 ; r14 = c_derived_tbl *dctbl | 177 ; r14 = c_derived_tbl *dctbl |
178 ; r15 = c_derived_tbl *actbl | 178 ; r15 = c_derived_tbl *actbl |
179 | 179 |
180 %define t1 rbp-(DCTSIZE2*SIZEOF_WORD) | 180 %define t1 rbp-(DCTSIZE2*SIZEOF_WORD) |
181 %define t2 t1-(DCTSIZE2*SIZEOF_WORD) | 181 %define t2 t1-(DCTSIZE2*SIZEOF_WORD) |
182 %define put_buffer r8 | 182 %define put_buffer r8 |
183 %define put_bits r9d | 183 %define put_bits r9d |
184 %define buffer rax | 184 %define buffer rax |
185 | 185 |
186 align 16 | 186 align 16 |
187 global EXTN(jsimd_huff_encode_one_block_sse2) | 187 global EXTN(jsimd_huff_encode_one_block_sse2) PRIVATE |
188 | 188 |
189 EXTN(jsimd_huff_encode_one_block_sse2): | 189 EXTN(jsimd_huff_encode_one_block_sse2): |
190 push rbp | 190 push rbp |
191 mov rax,rsp ; rax = original rbp | 191 mov rax,rsp ; rax = original rbp |
192 sub rsp, byte 4 | 192 sub rsp, byte 4 |
193 and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits | 193 and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits |
194 mov [rsp],rax | 194 mov [rsp],rax |
195 mov rbp,rsp ; rbp = aligned rbp | 195 mov rbp,rsp ; rbp = aligned rbp |
196 lea rsp, [t2] | 196 lea rsp, [t2] |
197 collect_args | 197 collect_args |
(...skipping 154 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
352 %endif | 352 %endif |
353 uncollect_args | 353 uncollect_args |
354 mov rsp,rbp ; rsp <- aligned rbp | 354 mov rsp,rbp ; rsp <- aligned rbp |
355 pop rsp ; rsp <- original rbp | 355 pop rsp ; rsp <- original rbp |
356 pop rbp | 356 pop rbp |
357 ret | 357 ret |
358 | 358 |
359 ; For some reason, the OS X linker does not honor the request to align the | 359 ; For some reason, the OS X linker does not honor the request to align the |
360 ; segment unless we do this. | 360 ; segment unless we do this. |
361 align 16 | 361 align 16 |
OLD | NEW |