OLD | NEW |
1 ; | 1 ; |
2 ; jfdctflt.asm - floating-point FDCT (SSE) | 2 ; jfdctflt.asm - floating-point FDCT (SSE) |
3 ; | 3 ; |
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
5 ; | 5 ; |
6 ; Based on | 6 ; Based on |
7 ; x86 SIMD extension for IJG JPEG library | 7 ; x86 SIMD extension for IJG JPEG library |
8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
10 ; | 10 ; |
(...skipping 19 matching lines...) Expand all Loading... |
30 %endmacro | 30 %endmacro |
31 | 31 |
32 %macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) | 32 %macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7) |
33 shufps %1,%2,0xEE | 33 shufps %1,%2,0xEE |
34 %endmacro | 34 %endmacro |
35 | 35 |
36 ; -------------------------------------------------------------------------- | 36 ; -------------------------------------------------------------------------- |
37 SECTION SEG_CONST | 37 SECTION SEG_CONST |
38 | 38 |
39 alignz 16 | 39 alignz 16 |
40 global EXTN(jconst_fdct_float_sse) | 40 global EXTN(jconst_fdct_float_sse) PRIVATE |
41 | 41 |
42 EXTN(jconst_fdct_float_sse): | 42 EXTN(jconst_fdct_float_sse): |
43 | 43 |
44 PD_0_382 times 4 dd 0.382683432365089771728460 | 44 PD_0_382 times 4 dd 0.382683432365089771728460 |
45 PD_0_707 times 4 dd 0.707106781186547524400844 | 45 PD_0_707 times 4 dd 0.707106781186547524400844 |
46 PD_0_541 times 4 dd 0.541196100146196984399723 | 46 PD_0_541 times 4 dd 0.541196100146196984399723 |
47 PD_1_306 times 4 dd 1.306562964876376527856643 | 47 PD_1_306 times 4 dd 1.306562964876376527856643 |
48 | 48 |
49 alignz 16 | 49 alignz 16 |
50 | 50 |
51 ; -------------------------------------------------------------------------- | 51 ; -------------------------------------------------------------------------- |
52 SECTION SEG_TEXT | 52 SECTION SEG_TEXT |
53 BITS 32 | 53 BITS 32 |
54 ; | 54 ; |
55 ; Perform the forward DCT on one block of samples. | 55 ; Perform the forward DCT on one block of samples. |
56 ; | 56 ; |
57 ; GLOBAL(void) | 57 ; GLOBAL(void) |
58 ; jsimd_fdct_float_sse (FAST_FLOAT *data) | 58 ; jsimd_fdct_float_sse (FAST_FLOAT *data) |
59 ; | 59 ; |
60 | 60 |
61 %define data(b) (b)+8 ; FAST_FLOAT *data | 61 %define data(b) (b)+8 ; FAST_FLOAT *data |
62 | 62 |
63 %define original_ebp ebp+0 | 63 %define original_ebp ebp+0 |
64 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] | 64 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] |
65 %define WK_NUM 2 | 65 %define WK_NUM 2 |
66 | 66 |
67 align 16 | 67 align 16 |
68 global EXTN(jsimd_fdct_float_sse) | 68 global EXTN(jsimd_fdct_float_sse) PRIVATE |
69 | 69 |
70 EXTN(jsimd_fdct_float_sse): | 70 EXTN(jsimd_fdct_float_sse): |
71 push ebp | 71 push ebp |
72 mov eax,esp ; eax = original ebp | 72 mov eax,esp ; eax = original ebp |
73 sub esp, byte 4 | 73 sub esp, byte 4 |
74 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits | 74 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits |
75 mov [esp],eax | 75 mov [esp],eax |
76 mov ebp,esp ; ebp = aligned ebp | 76 mov ebp,esp ; ebp = aligned ebp |
77 lea esp, [wk(0)] | 77 lea esp, [wk(0)] |
78 pushpic ebx | 78 pushpic ebx |
(...skipping 282 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
361 ; pop ecx ; need not be preserved | 361 ; pop ecx ; need not be preserved |
362 poppic ebx | 362 poppic ebx |
363 mov esp,ebp ; esp <- aligned ebp | 363 mov esp,ebp ; esp <- aligned ebp |
364 pop esp ; esp <- original ebp | 364 pop esp ; esp <- original ebp |
365 pop ebp | 365 pop ebp |
366 ret | 366 ret |
367 | 367 |
368 ; For some reason, the OS X linker does not honor the request to align the | 368 ; For some reason, the OS X linker does not honor the request to align the |
369 ; segment unless we do this. | 369 ; segment unless we do this. |
370 align 16 | 370 align 16 |
OLD | NEW |