OLD | NEW |
1 ; | 1 ; |
2 ; jdsample.asm - upsampling (SSE2) | 2 ; jdsample.asm - upsampling (SSE2) |
3 ; | 3 ; |
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
5 ; | 5 ; |
6 ; Based on | 6 ; Based on |
7 ; x86 SIMD extension for IJG JPEG library | 7 ; x86 SIMD extension for IJG JPEG library |
8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
10 ; | 10 ; |
11 ; This file should be assembled with NASM (Netwide Assembler), | 11 ; This file should be assembled with NASM (Netwide Assembler), |
12 ; can *not* be assembled with Microsoft's MASM or any compatible | 12 ; can *not* be assembled with Microsoft's MASM or any compatible |
13 ; assembler (including Borland's Turbo Assembler). | 13 ; assembler (including Borland's Turbo Assembler). |
14 ; NASM is available from http://nasm.sourceforge.net/ or | 14 ; NASM is available from http://nasm.sourceforge.net/ or |
15 ; http://sourceforge.net/project/showfiles.php?group_id=6208 | 15 ; http://sourceforge.net/project/showfiles.php?group_id=6208 |
16 ; | 16 ; |
17 ; [TAB8] | 17 ; [TAB8] |
18 | 18 |
19 %include "jsimdext.inc" | 19 %include "jsimdext.inc" |
20 | 20 |
21 ; -------------------------------------------------------------------------- | 21 ; -------------------------------------------------------------------------- |
22 SECTION SEG_CONST | 22 SECTION SEG_CONST |
23 | 23 |
24 alignz 16 | 24 alignz 16 |
25 global EXTN(jconst_fancy_upsample_sse2) | 25 global EXTN(jconst_fancy_upsample_sse2) PRIVATE |
26 | 26 |
27 EXTN(jconst_fancy_upsample_sse2): | 27 EXTN(jconst_fancy_upsample_sse2): |
28 | 28 |
29 PW_ONE times 8 dw 1 | 29 PW_ONE times 8 dw 1 |
30 PW_TWO times 8 dw 2 | 30 PW_TWO times 8 dw 2 |
31 PW_THREE times 8 dw 3 | 31 PW_THREE times 8 dw 3 |
32 PW_SEVEN times 8 dw 7 | 32 PW_SEVEN times 8 dw 7 |
33 PW_EIGHT times 8 dw 8 | 33 PW_EIGHT times 8 dw 8 |
34 | 34 |
35 alignz 16 | 35 alignz 16 |
(...skipping 15 matching lines...) Expand all Loading... |
51 ; JSAMPARRAY input_data, | 51 ; JSAMPARRAY input_data, |
52 ; JSAMPARRAY *output_data_ptr); | 52 ; JSAMPARRAY *output_data_ptr); |
53 ; | 53 ; |
54 | 54 |
55 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor | 55 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor |
56 %define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width | 56 %define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width |
57 %define input_data(b) (b)+16 ; JSAMPARRAY input_data | 57 %define input_data(b) (b)+16 ; JSAMPARRAY input_data |
58 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr | 58 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr |
59 | 59 |
60 align 16 | 60 align 16 |
61 global EXTN(jsimd_h2v1_fancy_upsample_sse2) | 61 global EXTN(jsimd_h2v1_fancy_upsample_sse2) PRIVATE |
62 | 62 |
63 EXTN(jsimd_h2v1_fancy_upsample_sse2): | 63 EXTN(jsimd_h2v1_fancy_upsample_sse2): |
64 push ebp | 64 push ebp |
65 mov ebp,esp | 65 mov ebp,esp |
66 pushpic ebx | 66 pushpic ebx |
67 ; push ecx ; need not be preserved | 67 ; push ecx ; need not be preserved |
68 ; push edx ; need not be preserved | 68 ; push edx ; need not be preserved |
69 push esi | 69 push esi |
70 push edi | 70 push edi |
71 | 71 |
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
207 %define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width | 207 %define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width |
208 %define input_data(b) (b)+16 ; JSAMPARRAY input_data | 208 %define input_data(b) (b)+16 ; JSAMPARRAY input_data |
209 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr | 209 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr |
210 | 210 |
211 %define original_ebp ebp+0 | 211 %define original_ebp ebp+0 |
212 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] | 212 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] |
213 %define WK_NUM 4 | 213 %define WK_NUM 4 |
214 %define gotptr wk(0)-SIZEOF_POINTER ; void *gotptr | 214 %define gotptr wk(0)-SIZEOF_POINTER ; void *gotptr |
215 | 215 |
216 align 16 | 216 align 16 |
217 global EXTN(jsimd_h2v2_fancy_upsample_sse2) | 217 global EXTN(jsimd_h2v2_fancy_upsample_sse2) PRIVATE |
218 | 218 |
219 EXTN(jsimd_h2v2_fancy_upsample_sse2): | 219 EXTN(jsimd_h2v2_fancy_upsample_sse2): |
220 push ebp | 220 push ebp |
221 mov eax,esp ; eax = original ebp | 221 mov eax,esp ; eax = original ebp |
222 sub esp, byte 4 | 222 sub esp, byte 4 |
223 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits | 223 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits |
224 mov [esp],eax | 224 mov [esp],eax |
225 mov ebp,esp ; ebp = aligned ebp | 225 mov ebp,esp ; ebp = aligned ebp |
226 lea esp, [wk(0)] | 226 lea esp, [wk(0)] |
227 pushpic eax ; make a room for GOT address | 227 pushpic eax ; make a room for GOT address |
(...skipping 303 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
531 ; JSAMPARRAY input_data, | 531 ; JSAMPARRAY input_data, |
532 ; JSAMPARRAY *output_data_ptr); | 532 ; JSAMPARRAY *output_data_ptr); |
533 ; | 533 ; |
534 | 534 |
535 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor | 535 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor |
536 %define output_width(b) (b)+12 ; JDIMENSION output_width | 536 %define output_width(b) (b)+12 ; JDIMENSION output_width |
537 %define input_data(b) (b)+16 ; JSAMPARRAY input_data | 537 %define input_data(b) (b)+16 ; JSAMPARRAY input_data |
538 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr | 538 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr |
539 | 539 |
540 align 16 | 540 align 16 |
541 global EXTN(jsimd_h2v1_upsample_sse2) | 541 global EXTN(jsimd_h2v1_upsample_sse2) PRIVATE |
542 | 542 |
543 EXTN(jsimd_h2v1_upsample_sse2): | 543 EXTN(jsimd_h2v1_upsample_sse2): |
544 push ebp | 544 push ebp |
545 mov ebp,esp | 545 mov ebp,esp |
546 ; push ebx ; unused | 546 ; push ebx ; unused |
547 ; push ecx ; need not be preserved | 547 ; push ecx ; need not be preserved |
548 ; push edx ; need not be preserved | 548 ; push edx ; need not be preserved |
549 push esi | 549 push esi |
550 push edi | 550 push edi |
551 | 551 |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
630 ; JSAMPARRAY input_data, | 630 ; JSAMPARRAY input_data, |
631 ; JSAMPARRAY *output_data_ptr); | 631 ; JSAMPARRAY *output_data_ptr); |
632 ; | 632 ; |
633 | 633 |
634 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor | 634 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor |
635 %define output_width(b) (b)+12 ; JDIMENSION output_width | 635 %define output_width(b) (b)+12 ; JDIMENSION output_width |
636 %define input_data(b) (b)+16 ; JSAMPARRAY input_data | 636 %define input_data(b) (b)+16 ; JSAMPARRAY input_data |
637 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr | 637 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr |
638 | 638 |
639 align 16 | 639 align 16 |
640 global EXTN(jsimd_h2v2_upsample_sse2) | 640 global EXTN(jsimd_h2v2_upsample_sse2) PRIVATE |
641 | 641 |
642 EXTN(jsimd_h2v2_upsample_sse2): | 642 EXTN(jsimd_h2v2_upsample_sse2): |
643 push ebp | 643 push ebp |
644 mov ebp,esp | 644 mov ebp,esp |
645 push ebx | 645 push ebx |
646 ; push ecx ; need not be preserved | 646 ; push ecx ; need not be preserved |
647 ; push edx ; need not be preserved | 647 ; push edx ; need not be preserved |
648 push esi | 648 push esi |
649 push edi | 649 push edi |
650 | 650 |
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
720 pop esi | 720 pop esi |
721 ; pop edx ; need not be preserved | 721 ; pop edx ; need not be preserved |
722 ; pop ecx ; need not be preserved | 722 ; pop ecx ; need not be preserved |
723 pop ebx | 723 pop ebx |
724 pop ebp | 724 pop ebp |
725 ret | 725 ret |
726 | 726 |
727 ; For some reason, the OS X linker does not honor the request to align the | 727 ; For some reason, the OS X linker does not honor the request to align the |
728 ; segment unless we do this. | 728 ; segment unless we do this. |
729 align 16 | 729 align 16 |
OLD | NEW |