OLD | NEW |
1 ; | 1 ; |
2 ; jimmxred.asm - reduced-size IDCT (MMX) | 2 ; jimmxred.asm - reduced-size IDCT (MMX) |
3 ; | 3 ; |
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
5 ; | 5 ; |
6 ; Based on | 6 ; Based on |
7 ; x86 SIMD extension for IJG JPEG library | 7 ; x86 SIMD extension for IJG JPEG library |
8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
10 ; | 10 ; |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
65 F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) | 65 F_1_847 equ DESCALE(1984016188,30-CONST_BITS) ; FIX(1.847759065) |
66 F_2_172 equ DESCALE(2332956230,30-CONST_BITS) ; FIX(2.172734803) | 66 F_2_172 equ DESCALE(2332956230,30-CONST_BITS) ; FIX(2.172734803) |
67 F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) | 67 F_2_562 equ DESCALE(2751909506,30-CONST_BITS) ; FIX(2.562915447) |
68 F_3_624 equ DESCALE(3891787747,30-CONST_BITS) ; FIX(3.624509785) | 68 F_3_624 equ DESCALE(3891787747,30-CONST_BITS) ; FIX(3.624509785) |
69 %endif | 69 %endif |
70 | 70 |
71 ; -------------------------------------------------------------------------- | 71 ; -------------------------------------------------------------------------- |
72 SECTION SEG_CONST | 72 SECTION SEG_CONST |
73 | 73 |
74 alignz 16 | 74 alignz 16 |
75 » global» EXTN(jconst_idct_red_mmx) | 75 » global» EXTN(jconst_idct_red_mmx) PRIVATE |
76 | 76 |
77 EXTN(jconst_idct_red_mmx): | 77 EXTN(jconst_idct_red_mmx): |
78 | 78 |
79 PW_F184_MF076 times 2 dw F_1_847,-F_0_765 | 79 PW_F184_MF076 times 2 dw F_1_847,-F_0_765 |
80 PW_F256_F089 times 2 dw F_2_562, F_0_899 | 80 PW_F256_F089 times 2 dw F_2_562, F_0_899 |
81 PW_F106_MF217 times 2 dw F_1_061,-F_2_172 | 81 PW_F106_MF217 times 2 dw F_1_061,-F_2_172 |
82 PW_MF060_MF050 times 2 dw -F_0_601,-F_0_509 | 82 PW_MF060_MF050 times 2 dw -F_0_601,-F_0_509 |
83 PW_F145_MF021 times 2 dw F_1_451,-F_0_211 | 83 PW_F145_MF021 times 2 dw F_1_451,-F_0_211 |
84 PW_F362_MF127 times 2 dw F_3_624,-F_1_272 | 84 PW_F362_MF127 times 2 dw F_3_624,-F_1_272 |
85 PW_F085_MF072 times 2 dw F_0_850,-F_0_720 | 85 PW_F085_MF072 times 2 dw F_0_850,-F_0_720 |
(...skipping 22 matching lines...) Expand all Loading... |
108 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf | 108 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf |
109 %define output_col(b) (b)+20 ; JDIMENSION output_col | 109 %define output_col(b) (b)+20 ; JDIMENSION output_col |
110 | 110 |
111 %define original_ebp ebp+0 | 111 %define original_ebp ebp+0 |
112 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] | 112 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] |
113 %define WK_NUM 2 | 113 %define WK_NUM 2 |
114 %define workspace wk(0)-DCTSIZE2*SIZEOF_JCOEF | 114 %define workspace wk(0)-DCTSIZE2*SIZEOF_JCOEF |
115 ; JCOEF workspace[DCTSIZE2] | 115 ; JCOEF workspace[DCTSIZE2] |
116 | 116 |
117 align 16 | 117 align 16 |
118 » global» EXTN(jsimd_idct_4x4_mmx) | 118 » global» EXTN(jsimd_idct_4x4_mmx) PRIVATE |
119 | 119 |
120 EXTN(jsimd_idct_4x4_mmx): | 120 EXTN(jsimd_idct_4x4_mmx): |
121 push ebp | 121 push ebp |
122 mov eax,esp ; eax = original ebp | 122 mov eax,esp ; eax = original ebp |
123 sub esp, byte 4 | 123 sub esp, byte 4 |
124 and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits | 124 and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits |
125 mov [esp],eax | 125 mov [esp],eax |
126 mov ebp,esp ; ebp = aligned ebp | 126 mov ebp,esp ; ebp = aligned ebp |
127 lea esp, [workspace] | 127 lea esp, [workspace] |
128 pushpic ebx | 128 pushpic ebx |
(...skipping 367 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
496 ; jsimd_idct_2x2_mmx (void * dct_table, JCOEFPTR coef_block, | 496 ; jsimd_idct_2x2_mmx (void * dct_table, JCOEFPTR coef_block, |
497 ; JSAMPARRAY output_buf, JDIMENSION output_col) | 497 ; JSAMPARRAY output_buf, JDIMENSION output_col) |
498 ; | 498 ; |
499 | 499 |
500 %define dct_table(b) (b)+8 ; void * dct_table | 500 %define dct_table(b) (b)+8 ; void * dct_table |
501 %define coef_block(b) (b)+12 ; JCOEFPTR coef_block | 501 %define coef_block(b) (b)+12 ; JCOEFPTR coef_block |
502 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf | 502 %define output_buf(b) (b)+16 ; JSAMPARRAY output_buf |
503 %define output_col(b) (b)+20 ; JDIMENSION output_col | 503 %define output_col(b) (b)+20 ; JDIMENSION output_col |
504 | 504 |
505 align 16 | 505 align 16 |
506 » global» EXTN(jsimd_idct_2x2_mmx) | 506 » global» EXTN(jsimd_idct_2x2_mmx) PRIVATE |
507 | 507 |
508 EXTN(jsimd_idct_2x2_mmx): | 508 EXTN(jsimd_idct_2x2_mmx): |
509 push ebp | 509 push ebp |
510 mov ebp,esp | 510 mov ebp,esp |
511 push ebx | 511 push ebx |
512 ; push ecx ; need not be preserved | 512 ; push ecx ; need not be preserved |
513 ; push edx ; need not be preserved | 513 ; push edx ; need not be preserved |
514 push esi | 514 push esi |
515 push edi | 515 push edi |
516 | 516 |
(...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
697 pop esi | 697 pop esi |
698 ; pop edx ; need not be preserved | 698 ; pop edx ; need not be preserved |
699 ; pop ecx ; need not be preserved | 699 ; pop ecx ; need not be preserved |
700 pop ebx | 700 pop ebx |
701 pop ebp | 701 pop ebp |
702 ret | 702 ret |
703 | 703 |
704 ; For some reason, the OS X linker does not honor the request to align the | 704 ; For some reason, the OS X linker does not honor the request to align the |
705 ; segment unless we do this. | 705 ; segment unless we do this. |
706 align 16 | 706 align 16 |
OLD | NEW |