| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; jfmmxfst.asm - fast integer FDCT (MMX) | 2 ; jfmmxfst.asm - fast integer FDCT (MMX) |
| 3 ; | 3 ; |
| 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 ; | 5 ; |
| 6 ; Based on | 6 ; Based on |
| 7 ; x86 SIMD extension for IJG JPEG library | 7 ; x86 SIMD extension for IJG JPEG library |
| 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
| 10 ; | 10 ; |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 45 ; -------------------------------------------------------------------------- | 45 ; -------------------------------------------------------------------------- |
| 46 SECTION SEG_CONST | 46 SECTION SEG_CONST |
| 47 | 47 |
| 48 ; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) | 48 ; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) |
| 49 ; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) | 49 ; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) |
| 50 | 50 |
| 51 %define PRE_MULTIPLY_SCALE_BITS 2 | 51 %define PRE_MULTIPLY_SCALE_BITS 2 |
| 52 %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) | 52 %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
| 53 | 53 |
| 54 alignz 16 | 54 alignz 16 |
| 55 » global» EXTN(jconst_fdct_ifast_mmx) | 55 » global» EXTN(jconst_fdct_ifast_mmx) PRIVATE |
| 56 | 56 |
| 57 EXTN(jconst_fdct_ifast_mmx): | 57 EXTN(jconst_fdct_ifast_mmx): |
| 58 | 58 |
| 59 PW_F0707 times 4 dw F_0_707 << CONST_SHIFT | 59 PW_F0707 times 4 dw F_0_707 << CONST_SHIFT |
| 60 PW_F0382 times 4 dw F_0_382 << CONST_SHIFT | 60 PW_F0382 times 4 dw F_0_382 << CONST_SHIFT |
| 61 PW_F0541 times 4 dw F_0_541 << CONST_SHIFT | 61 PW_F0541 times 4 dw F_0_541 << CONST_SHIFT |
| 62 PW_F1306 times 4 dw F_1_306 << CONST_SHIFT | 62 PW_F1306 times 4 dw F_1_306 << CONST_SHIFT |
| 63 | 63 |
| 64 alignz 16 | 64 alignz 16 |
| 65 | 65 |
| 66 ; -------------------------------------------------------------------------- | 66 ; -------------------------------------------------------------------------- |
| 67 SECTION SEG_TEXT | 67 SECTION SEG_TEXT |
| 68 BITS 32 | 68 BITS 32 |
| 69 ; | 69 ; |
| 70 ; Perform the forward DCT on one block of samples. | 70 ; Perform the forward DCT on one block of samples. |
| 71 ; | 71 ; |
| 72 ; GLOBAL(void) | 72 ; GLOBAL(void) |
| 73 ; jsimd_fdct_ifast_mmx (DCTELEM * data) | 73 ; jsimd_fdct_ifast_mmx (DCTELEM * data) |
| 74 ; | 74 ; |
| 75 | 75 |
| 76 %define data(b) (b)+8 ; DCTELEM * data | 76 %define data(b) (b)+8 ; DCTELEM * data |
| 77 | 77 |
| 78 %define original_ebp ebp+0 | 78 %define original_ebp ebp+0 |
| 79 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] | 79 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] |
| 80 %define WK_NUM 2 | 80 %define WK_NUM 2 |
| 81 | 81 |
| 82 align 16 | 82 align 16 |
| 83 » global» EXTN(jsimd_fdct_ifast_mmx) | 83 » global» EXTN(jsimd_fdct_ifast_mmx) PRIVATE |
| 84 | 84 |
| 85 EXTN(jsimd_fdct_ifast_mmx): | 85 EXTN(jsimd_fdct_ifast_mmx): |
| 86 push ebp | 86 push ebp |
| 87 mov eax,esp ; eax = original ebp | 87 mov eax,esp ; eax = original ebp |
| 88 sub esp, byte 4 | 88 sub esp, byte 4 |
| 89 and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits | 89 and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits |
| 90 mov [esp],eax | 90 mov [esp],eax |
| 91 mov ebp,esp ; ebp = aligned ebp | 91 mov ebp,esp ; ebp = aligned ebp |
| 92 lea esp, [wk(0)] | 92 lea esp, [wk(0)] |
| 93 pushpic ebx | 93 pushpic ebx |
| (...skipping 294 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 388 ; pop ecx ; need not be preserved | 388 ; pop ecx ; need not be preserved |
| 389 poppic ebx | 389 poppic ebx |
| 390 mov esp,ebp ; esp <- aligned ebp | 390 mov esp,ebp ; esp <- aligned ebp |
| 391 pop esp ; esp <- original ebp | 391 pop esp ; esp <- original ebp |
| 392 pop ebp | 392 pop ebp |
| 393 ret | 393 ret |
| 394 | 394 |
| 395 ; For some reason, the OS X linker does not honor the request to align the | 395 ; For some reason, the OS X linker does not honor the request to align the |
| 396 ; segment unless we do this. | 396 ; segment unless we do this. |
| 397 align 16 | 397 align 16 |
| OLD | NEW |