OLD | NEW |
1 ; | 1 ; |
2 ; jimmxfst.asm - fast integer IDCT (MMX) | 2 ; jimmxfst.asm - fast integer IDCT (MMX) |
3 ; | 3 ; |
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
5 ; | 5 ; |
6 ; Based on | 6 ; Based on |
7 ; x86 SIMD extension for IJG JPEG library | 7 ; x86 SIMD extension for IJG JPEG library |
8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
10 ; | 10 ; |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
52 ; -------------------------------------------------------------------------- | 52 ; -------------------------------------------------------------------------- |
53 SECTION SEG_CONST | 53 SECTION SEG_CONST |
54 | 54 |
55 ; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) | 55 ; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow) |
56 ; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) | 56 ; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw) |
57 | 57 |
58 %define PRE_MULTIPLY_SCALE_BITS 2 | 58 %define PRE_MULTIPLY_SCALE_BITS 2 |
59 %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) | 59 %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS) |
60 | 60 |
61 alignz 16 | 61 alignz 16 |
62 » global» EXTN(jconst_idct_ifast_mmx) | 62 » global» EXTN(jconst_idct_ifast_mmx) PRIVATE |
63 | 63 |
64 EXTN(jconst_idct_ifast_mmx): | 64 EXTN(jconst_idct_ifast_mmx): |
65 | 65 |
66 PW_F1414 times 4 dw F_1_414 << CONST_SHIFT | 66 PW_F1414 times 4 dw F_1_414 << CONST_SHIFT |
67 PW_F1847 times 4 dw F_1_847 << CONST_SHIFT | 67 PW_F1847 times 4 dw F_1_847 << CONST_SHIFT |
68 PW_MF1613 times 4 dw -F_1_613 << CONST_SHIFT | 68 PW_MF1613 times 4 dw -F_1_613 << CONST_SHIFT |
69 PW_F1082 times 4 dw F_1_082 << CONST_SHIFT | 69 PW_F1082 times 4 dw F_1_082 << CONST_SHIFT |
70 PB_CENTERJSAMP times 8 db CENTERJSAMPLE | 70 PB_CENTERJSAMP times 8 db CENTERJSAMPLE |
71 | 71 |
72 alignz 16 | 72 alignz 16 |
(...skipping 14 matching lines...) Expand all Loading... |
87 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf | 87 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf |
88 %define output_col(b) (b)+20 ; JDIMENSION output_col | 88 %define output_col(b) (b)+20 ; JDIMENSION output_col |
89 | 89 |
90 %define original_ebp ebp+0 | 90 %define original_ebp ebp+0 |
91 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] | 91 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] |
92 %define WK_NUM 2 | 92 %define WK_NUM 2 |
93 %define workspace wk(0)-DCTSIZE2*SIZEOF_JCOEF | 93 %define workspace wk(0)-DCTSIZE2*SIZEOF_JCOEF |
94 ; JCOEF workspace[DCTSIZE2] | 94 ; JCOEF workspace[DCTSIZE2] |
95 | 95 |
96 align 16 | 96 align 16 |
97 » global» EXTN(jsimd_idct_ifast_mmx) | 97 » global» EXTN(jsimd_idct_ifast_mmx) PRIVATE |
98 | 98 |
99 EXTN(jsimd_idct_ifast_mmx): | 99 EXTN(jsimd_idct_ifast_mmx): |
100 push ebp | 100 push ebp |
101 mov eax,esp ; eax = original ebp | 101 mov eax,esp ; eax = original ebp |
102 sub esp, byte 4 | 102 sub esp, byte 4 |
103 and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits | 103 and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits |
104 mov [esp],eax | 104 mov [esp],eax |
105 mov ebp,esp ; ebp = aligned ebp | 105 mov ebp,esp ; ebp = aligned ebp |
106 lea esp, [workspace] | 106 lea esp, [workspace] |
107 push ebx | 107 push ebx |
(...skipping 383 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
491 ; pop ecx ; need not be preserved | 491 ; pop ecx ; need not be preserved |
492 pop ebx | 492 pop ebx |
493 mov esp,ebp ; esp <- aligned ebp | 493 mov esp,ebp ; esp <- aligned ebp |
494 pop esp ; esp <- original ebp | 494 pop esp ; esp <- original ebp |
495 pop ebp | 495 pop ebp |
496 ret | 496 ret |
497 | 497 |
498 ; For some reason, the OS X linker does not honor the request to align the | 498 ; For some reason, the OS X linker does not honor the request to align the |
499 ; segment unless we do this. | 499 ; segment unless we do this. |
500 align 16 | 500 align 16 |
OLD | NEW |