OLD | NEW |
1 ; | 1 ; |
2 ; jdsample.asm - upsampling (MMX) | 2 ; jdsample.asm - upsampling (MMX) |
3 ; | 3 ; |
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
5 ; | 5 ; |
6 ; Based on | 6 ; Based on |
7 ; x86 SIMD extension for IJG JPEG library | 7 ; x86 SIMD extension for IJG JPEG library |
8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
10 ; | 10 ; |
11 ; This file should be assembled with NASM (Netwide Assembler), | 11 ; This file should be assembled with NASM (Netwide Assembler), |
12 ; can *not* be assembled with Microsoft's MASM or any compatible | 12 ; can *not* be assembled with Microsoft's MASM or any compatible |
13 ; assembler (including Borland's Turbo Assembler). | 13 ; assembler (including Borland's Turbo Assembler). |
14 ; NASM is available from http://nasm.sourceforge.net/ or | 14 ; NASM is available from http://nasm.sourceforge.net/ or |
15 ; http://sourceforge.net/project/showfiles.php?group_id=6208 | 15 ; http://sourceforge.net/project/showfiles.php?group_id=6208 |
16 ; | 16 ; |
17 ; [TAB8] | 17 ; [TAB8] |
18 | 18 |
19 %include "jsimdext.inc" | 19 %include "jsimdext.inc" |
20 | 20 |
21 ; -------------------------------------------------------------------------- | 21 ; -------------------------------------------------------------------------- |
22 SECTION SEG_CONST | 22 SECTION SEG_CONST |
23 | 23 |
24 alignz 16 | 24 alignz 16 |
25 global EXTN(jconst_fancy_upsample_mmx) | 25 global EXTN(jconst_fancy_upsample_mmx) PRIVATE |
26 | 26 |
27 EXTN(jconst_fancy_upsample_mmx): | 27 EXTN(jconst_fancy_upsample_mmx): |
28 | 28 |
29 PW_ONE times 4 dw 1 | 29 PW_ONE times 4 dw 1 |
30 PW_TWO times 4 dw 2 | 30 PW_TWO times 4 dw 2 |
31 PW_THREE times 4 dw 3 | 31 PW_THREE times 4 dw 3 |
32 PW_SEVEN times 4 dw 7 | 32 PW_SEVEN times 4 dw 7 |
33 PW_EIGHT times 4 dw 8 | 33 PW_EIGHT times 4 dw 8 |
34 | 34 |
35 alignz 16 | 35 alignz 16 |
(...skipping 15 matching lines...) Expand all Loading... |
51 ; JSAMPARRAY input_data, | 51 ; JSAMPARRAY input_data, |
52 ; JSAMPARRAY *output_data_ptr); | 52 ; JSAMPARRAY *output_data_ptr); |
53 ; | 53 ; |
54 | 54 |
55 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor | 55 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor |
56 %define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width | 56 %define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width |
57 %define input_data(b) (b)+16 ; JSAMPARRAY input_data | 57 %define input_data(b) (b)+16 ; JSAMPARRAY input_data |
58 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr | 58 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr |
59 | 59 |
60 align 16 | 60 align 16 |
61 global EXTN(jsimd_h2v1_fancy_upsample_mmx) | 61 global EXTN(jsimd_h2v1_fancy_upsample_mmx) PRIVATE |
62 | 62 |
63 EXTN(jsimd_h2v1_fancy_upsample_mmx): | 63 EXTN(jsimd_h2v1_fancy_upsample_mmx): |
64 push ebp | 64 push ebp |
65 mov ebp,esp | 65 mov ebp,esp |
66 pushpic ebx | 66 pushpic ebx |
67 ; push ecx ; need not be preserved | 67 ; push ecx ; need not be preserved |
68 ; push edx ; need not be preserved | 68 ; push edx ; need not be preserved |
69 push esi | 69 push esi |
70 push edi | 70 push edi |
71 | 71 |
(...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
209 %define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width | 209 %define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width |
210 %define input_data(b) (b)+16 ; JSAMPARRAY input_data | 210 %define input_data(b) (b)+16 ; JSAMPARRAY input_data |
211 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr | 211 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr |
212 | 212 |
213 %define original_ebp ebp+0 | 213 %define original_ebp ebp+0 |
214 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] | 214 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] |
215 %define WK_NUM 4 | 215 %define WK_NUM 4 |
216 %define gotptr wk(0)-SIZEOF_POINTER ; void *gotptr | 216 %define gotptr wk(0)-SIZEOF_POINTER ; void *gotptr |
217 | 217 |
218 align 16 | 218 align 16 |
219 global EXTN(jsimd_h2v2_fancy_upsample_mmx) | 219 global EXTN(jsimd_h2v2_fancy_upsample_mmx) PRIVATE |
220 | 220 |
221 EXTN(jsimd_h2v2_fancy_upsample_mmx): | 221 EXTN(jsimd_h2v2_fancy_upsample_mmx): |
222 push ebp | 222 push ebp |
223 mov eax,esp ; eax = original ebp | 223 mov eax,esp ; eax = original ebp |
224 sub esp, byte 4 | 224 sub esp, byte 4 |
225 and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits | 225 and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits |
226 mov [esp],eax | 226 mov [esp],eax |
227 mov ebp,esp ; ebp = aligned ebp | 227 mov ebp,esp ; ebp = aligned ebp |
228 lea esp, [wk(0)] | 228 lea esp, [wk(0)] |
229 pushpic eax ; make a room for GOT address | 229 pushpic eax ; make a room for GOT address |
(...skipping 305 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
535 ; JSAMPARRAY input_data, | 535 ; JSAMPARRAY input_data, |
536 ; JSAMPARRAY *output_data_ptr); | 536 ; JSAMPARRAY *output_data_ptr); |
537 ; | 537 ; |
538 | 538 |
539 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor | 539 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor |
540 %define output_width(b) (b)+12 ; JDIMENSION output_width | 540 %define output_width(b) (b)+12 ; JDIMENSION output_width |
541 %define input_data(b) (b)+16 ; JSAMPARRAY input_data | 541 %define input_data(b) (b)+16 ; JSAMPARRAY input_data |
542 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr | 542 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr |
543 | 543 |
544 align 16 | 544 align 16 |
545 global EXTN(jsimd_h2v1_upsample_mmx) | 545 global EXTN(jsimd_h2v1_upsample_mmx) PRIVATE |
546 | 546 |
547 EXTN(jsimd_h2v1_upsample_mmx): | 547 EXTN(jsimd_h2v1_upsample_mmx): |
548 push ebp | 548 push ebp |
549 mov ebp,esp | 549 mov ebp,esp |
550 ; push ebx ; unused | 550 ; push ebx ; unused |
551 ; push ecx ; need not be preserved | 551 ; push ecx ; need not be preserved |
552 ; push edx ; need not be preserved | 552 ; push edx ; need not be preserved |
553 push esi | 553 push esi |
554 push edi | 554 push edi |
555 | 555 |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
636 ; JSAMPARRAY input_data, | 636 ; JSAMPARRAY input_data, |
637 ; JSAMPARRAY *output_data_ptr); | 637 ; JSAMPARRAY *output_data_ptr); |
638 ; | 638 ; |
639 | 639 |
640 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor | 640 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor |
641 %define output_width(b) (b)+12 ; JDIMENSION output_width | 641 %define output_width(b) (b)+12 ; JDIMENSION output_width |
642 %define input_data(b) (b)+16 ; JSAMPARRAY input_data | 642 %define input_data(b) (b)+16 ; JSAMPARRAY input_data |
643 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr | 643 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr |
644 | 644 |
645 align 16 | 645 align 16 |
646 global EXTN(jsimd_h2v2_upsample_mmx) | 646 global EXTN(jsimd_h2v2_upsample_mmx) PRIVATE |
647 | 647 |
648 EXTN(jsimd_h2v2_upsample_mmx): | 648 EXTN(jsimd_h2v2_upsample_mmx): |
649 push ebp | 649 push ebp |
650 mov ebp,esp | 650 mov ebp,esp |
651 push ebx | 651 push ebx |
652 ; push ecx ; need not be preserved | 652 ; push ecx ; need not be preserved |
653 ; push edx ; need not be preserved | 653 ; push edx ; need not be preserved |
654 push esi | 654 push esi |
655 push edi | 655 push edi |
656 | 656 |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
728 pop esi | 728 pop esi |
729 ; pop edx ; need not be preserved | 729 ; pop edx ; need not be preserved |
730 ; pop ecx ; need not be preserved | 730 ; pop ecx ; need not be preserved |
731 pop ebx | 731 pop ebx |
732 pop ebp | 732 pop ebp |
733 ret | 733 ret |
734 | 734 |
735 ; For some reason, the OS X linker does not honor the request to align the | 735 ; For some reason, the OS X linker does not honor the request to align the |
736 ; segment unless we do this. | 736 ; segment unless we do this. |
737 align 16 | 737 align 16 |
OLD | NEW |