| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; jidctflt.asm - floating-point IDCT (64-bit SSE & SSE2) | 2 ; jidctflt.asm - floating-point IDCT (64-bit SSE & SSE2) |
| 3 ; | 3 ; |
| 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 ; Copyright 2009 D. R. Commander | 5 ; Copyright 2009 D. R. Commander |
| 6 ; | 6 ; |
| 7 ; Based on | 7 ; Based on |
| 8 ; x86 SIMD extension for IJG JPEG library | 8 ; x86 SIMD extension for IJG JPEG library |
| 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
| (...skipping 20 matching lines...) Expand all Loading... |
| 31 %endmacro | 31 %endmacro |
| 32 | 32 |
| 33 %macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) | 33 %macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) |
| 34 shufps %1,%2,0xEE | 34 shufps %1,%2,0xEE |
| 35 %endmacro | 35 %endmacro |
| 36 | 36 |
| 37 ; -------------------------------------------------------------------------- | 37 ; -------------------------------------------------------------------------- |
| 38 SECTION SEG_CONST | 38 SECTION SEG_CONST |
| 39 | 39 |
| 40 alignz 16 | 40 alignz 16 |
| 41 global EXTN(jconst_idct_float_sse2) | 41 global EXTN(jconst_idct_float_sse2) PRIVATE |
| 42 | 42 |
| 43 EXTN(jconst_idct_float_sse2): | 43 EXTN(jconst_idct_float_sse2): |
| 44 | 44 |
| 45 PD_1_414 times 4 dd 1.414213562373095048801689 | 45 PD_1_414 times 4 dd 1.414213562373095048801689 |
| 46 PD_1_847 times 4 dd 1.847759065022573512256366 | 46 PD_1_847 times 4 dd 1.847759065022573512256366 |
| 47 PD_1_082 times 4 dd 1.082392200292393968799446 | 47 PD_1_082 times 4 dd 1.082392200292393968799446 |
| 48 PD_M2_613 times 4 dd -2.613125929752753055713286 | 48 PD_M2_613 times 4 dd -2.613125929752753055713286 |
| 49 PD_RNDINT_MAGIC times 4 dd 100663296.0 ; (float)(0x00C00000 << 3) | 49 PD_RNDINT_MAGIC times 4 dd 100663296.0 ; (float)(0x00C00000 << 3) |
| 50 PB_CENTERJSAMP times 16 db CENTERJSAMPLE | 50 PB_CENTERJSAMP times 16 db CENTERJSAMPLE |
| 51 | 51 |
| (...skipping 15 matching lines...) Expand all Loading... |
| 67 ; r12 = JSAMPARRAY output_buf | 67 ; r12 = JSAMPARRAY output_buf |
| 68 ; r13 = JDIMENSION output_col | 68 ; r13 = JDIMENSION output_col |
| 69 | 69 |
| 70 %define original_rbp rbp+0 | 70 %define original_rbp rbp+0 |
| 71 %define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] | 71 %define wk(i) rbp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] |
| 72 %define WK_NUM 2 | 72 %define WK_NUM 2 |
| 73 %define workspace wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT | 73 %define workspace wk(0)-DCTSIZE2*SIZEOF_FAST_FLOAT |
| 74 ; FAST_FLOAT workspace[DCTSIZE2] | 74 ; FAST_FLOAT workspace[DCTSIZE2] |
| 75 | 75 |
| 76 align 16 | 76 align 16 |
| 77 global EXTN(jsimd_idct_float_sse2) | 77 global EXTN(jsimd_idct_float_sse2) PRIVATE |
| 78 | 78 |
| 79 EXTN(jsimd_idct_float_sse2): | 79 EXTN(jsimd_idct_float_sse2): |
| 80 push rbp | 80 push rbp |
| 81 mov rax,rsp ; rax = original rbp | 81 mov rax,rsp ; rax = original rbp |
| 82 sub rsp, byte 4 | 82 sub rsp, byte 4 |
| 83 and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits | 83 and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits |
| 84 mov [rsp],rax | 84 mov [rsp],rax |
| 85 mov rbp,rsp ; rbp = aligned rbp | 85 mov rbp,rsp ; rbp = aligned rbp |
| 86 lea rsp, [workspace] | 86 lea rsp, [workspace] |
| 87 collect_args | 87 collect_args |
| (...skipping 386 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 474 pop rbx | 474 pop rbx |
| 475 uncollect_args | 475 uncollect_args |
| 476 mov rsp,rbp ; rsp <- aligned rbp | 476 mov rsp,rbp ; rsp <- aligned rbp |
| 477 pop rsp ; rsp <- original rbp | 477 pop rsp ; rsp <- original rbp |
| 478 pop rbp | 478 pop rbp |
| 479 ret | 479 ret |
| 480 | 480 |
| 481 ; For some reason, the OS X linker does not honor the request to align the | 481 ; For some reason, the OS X linker does not honor the request to align the |
| 482 ; segment unless we do this. | 482 ; segment unless we do this. |
| 483 align 16 | 483 align 16 |
| OLD | NEW |