| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; jdsample.asm - upsampling (MMX) | 2 ; jdsample.asm - upsampling (MMX) |
| 3 ; | 3 ; |
| 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 ; | 5 ; |
| 6 ; Based on | 6 ; Based on |
| 7 ; x86 SIMD extension for IJG JPEG library | 7 ; x86 SIMD extension for IJG JPEG library |
| 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
| 10 ; | 10 ; |
| 11 ; This file should be assembled with NASM (Netwide Assembler), | 11 ; This file should be assembled with NASM (Netwide Assembler), |
| 12 ; can *not* be assembled with Microsoft's MASM or any compatible | 12 ; can *not* be assembled with Microsoft's MASM or any compatible |
| 13 ; assembler (including Borland's Turbo Assembler). | 13 ; assembler (including Borland's Turbo Assembler). |
| 14 ; NASM is available from http://nasm.sourceforge.net/ or | 14 ; NASM is available from http://nasm.sourceforge.net/ or |
| 15 ; http://sourceforge.net/project/showfiles.php?group_id=6208 | 15 ; http://sourceforge.net/project/showfiles.php?group_id=6208 |
| 16 ; | 16 ; |
| 17 ; [TAB8] | 17 ; [TAB8] |
| 18 | 18 |
| 19 %include "jsimdext.inc" | 19 %include "jsimdext.inc" |
| 20 | 20 |
| 21 ; -------------------------------------------------------------------------- | 21 ; -------------------------------------------------------------------------- |
| 22 SECTION SEG_CONST | 22 SECTION SEG_CONST |
| 23 | 23 |
| 24 alignz 16 | 24 alignz 16 |
| 25 global EXTN(jconst_fancy_upsample_mmx) | 25 global EXTN(jconst_fancy_upsample_mmx) PRIVATE |
| 26 | 26 |
| 27 EXTN(jconst_fancy_upsample_mmx): | 27 EXTN(jconst_fancy_upsample_mmx): |
| 28 | 28 |
| 29 PW_ONE times 4 dw 1 | 29 PW_ONE times 4 dw 1 |
| 30 PW_TWO times 4 dw 2 | 30 PW_TWO times 4 dw 2 |
| 31 PW_THREE times 4 dw 3 | 31 PW_THREE times 4 dw 3 |
| 32 PW_SEVEN times 4 dw 7 | 32 PW_SEVEN times 4 dw 7 |
| 33 PW_EIGHT times 4 dw 8 | 33 PW_EIGHT times 4 dw 8 |
| 34 | 34 |
| 35 alignz 16 | 35 alignz 16 |
| (...skipping 15 matching lines...) Expand all Loading... |
| 51 ; JSAMPARRAY input_data, | 51 ; JSAMPARRAY input_data, |
| 52 ; JSAMPARRAY *output_data_ptr); | 52 ; JSAMPARRAY *output_data_ptr); |
| 53 ; | 53 ; |
| 54 | 54 |
| 55 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor | 55 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor |
| 56 %define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width | 56 %define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width |
| 57 %define input_data(b) (b)+16 ; JSAMPARRAY input_data | 57 %define input_data(b) (b)+16 ; JSAMPARRAY input_data |
| 58 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr | 58 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr |
| 59 | 59 |
| 60 align 16 | 60 align 16 |
| 61 global EXTN(jsimd_h2v1_fancy_upsample_mmx) | 61 global EXTN(jsimd_h2v1_fancy_upsample_mmx) PRIVATE |
| 62 | 62 |
| 63 EXTN(jsimd_h2v1_fancy_upsample_mmx): | 63 EXTN(jsimd_h2v1_fancy_upsample_mmx): |
| 64 push ebp | 64 push ebp |
| 65 mov ebp,esp | 65 mov ebp,esp |
| 66 pushpic ebx | 66 pushpic ebx |
| 67 ; push ecx ; need not be preserved | 67 ; push ecx ; need not be preserved |
| 68 ; push edx ; need not be preserved | 68 ; push edx ; need not be preserved |
| 69 push esi | 69 push esi |
| 70 push edi | 70 push edi |
| 71 | 71 |
| (...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 209 %define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width | 209 %define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width |
| 210 %define input_data(b) (b)+16 ; JSAMPARRAY input_data | 210 %define input_data(b) (b)+16 ; JSAMPARRAY input_data |
| 211 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr | 211 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr |
| 212 | 212 |
| 213 %define original_ebp ebp+0 | 213 %define original_ebp ebp+0 |
| 214 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] | 214 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM] |
| 215 %define WK_NUM 4 | 215 %define WK_NUM 4 |
| 216 %define gotptr wk(0)-SIZEOF_POINTER ; void *gotptr | 216 %define gotptr wk(0)-SIZEOF_POINTER ; void *gotptr |
| 217 | 217 |
| 218 align 16 | 218 align 16 |
| 219 global EXTN(jsimd_h2v2_fancy_upsample_mmx) | 219 global EXTN(jsimd_h2v2_fancy_upsample_mmx) PRIVATE |
| 220 | 220 |
| 221 EXTN(jsimd_h2v2_fancy_upsample_mmx): | 221 EXTN(jsimd_h2v2_fancy_upsample_mmx): |
| 222 push ebp | 222 push ebp |
| 223 mov eax,esp ; eax = original ebp | 223 mov eax,esp ; eax = original ebp |
| 224 sub esp, byte 4 | 224 sub esp, byte 4 |
| 225 and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits | 225 and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits |
| 226 mov [esp],eax | 226 mov [esp],eax |
| 227 mov ebp,esp ; ebp = aligned ebp | 227 mov ebp,esp ; ebp = aligned ebp |
| 228 lea esp, [wk(0)] | 228 lea esp, [wk(0)] |
| 229 pushpic eax ; make a room for GOT address | 229 pushpic eax ; make a room for GOT address |
| (...skipping 305 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 535 ; JSAMPARRAY input_data, | 535 ; JSAMPARRAY input_data, |
| 536 ; JSAMPARRAY *output_data_ptr); | 536 ; JSAMPARRAY *output_data_ptr); |
| 537 ; | 537 ; |
| 538 | 538 |
| 539 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor | 539 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor |
| 540 %define output_width(b) (b)+12 ; JDIMENSION output_width | 540 %define output_width(b) (b)+12 ; JDIMENSION output_width |
| 541 %define input_data(b) (b)+16 ; JSAMPARRAY input_data | 541 %define input_data(b) (b)+16 ; JSAMPARRAY input_data |
| 542 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr | 542 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr |
| 543 | 543 |
| 544 align 16 | 544 align 16 |
| 545 global EXTN(jsimd_h2v1_upsample_mmx) | 545 global EXTN(jsimd_h2v1_upsample_mmx) PRIVATE |
| 546 | 546 |
| 547 EXTN(jsimd_h2v1_upsample_mmx): | 547 EXTN(jsimd_h2v1_upsample_mmx): |
| 548 push ebp | 548 push ebp |
| 549 mov ebp,esp | 549 mov ebp,esp |
| 550 ; push ebx ; unused | 550 ; push ebx ; unused |
| 551 ; push ecx ; need not be preserved | 551 ; push ecx ; need not be preserved |
| 552 ; push edx ; need not be preserved | 552 ; push edx ; need not be preserved |
| 553 push esi | 553 push esi |
| 554 push edi | 554 push edi |
| 555 | 555 |
| (...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 636 ; JSAMPARRAY input_data, | 636 ; JSAMPARRAY input_data, |
| 637 ; JSAMPARRAY *output_data_ptr); | 637 ; JSAMPARRAY *output_data_ptr); |
| 638 ; | 638 ; |
| 639 | 639 |
| 640 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor | 640 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor |
| 641 %define output_width(b) (b)+12 ; JDIMENSION output_width | 641 %define output_width(b) (b)+12 ; JDIMENSION output_width |
| 642 %define input_data(b) (b)+16 ; JSAMPARRAY input_data | 642 %define input_data(b) (b)+16 ; JSAMPARRAY input_data |
| 643 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr | 643 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr |
| 644 | 644 |
| 645 align 16 | 645 align 16 |
| 646 global EXTN(jsimd_h2v2_upsample_mmx) | 646 global EXTN(jsimd_h2v2_upsample_mmx) PRIVATE |
| 647 | 647 |
| 648 EXTN(jsimd_h2v2_upsample_mmx): | 648 EXTN(jsimd_h2v2_upsample_mmx): |
| 649 push ebp | 649 push ebp |
| 650 mov ebp,esp | 650 mov ebp,esp |
| 651 push ebx | 651 push ebx |
| 652 ; push ecx ; need not be preserved | 652 ; push ecx ; need not be preserved |
| 653 ; push edx ; need not be preserved | 653 ; push edx ; need not be preserved |
| 654 push esi | 654 push esi |
| 655 push edi | 655 push edi |
| 656 | 656 |
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 728 pop esi | 728 pop esi |
| 729 ; pop edx ; need not be preserved | 729 ; pop edx ; need not be preserved |
| 730 ; pop ecx ; need not be preserved | 730 ; pop ecx ; need not be preserved |
| 731 pop ebx | 731 pop ebx |
| 732 pop ebp | 732 pop ebp |
| 733 ret | 733 ret |
| 734 | 734 |
| 735 ; For some reason, the OS X linker does not honor the request to align the | 735 ; For some reason, the OS X linker does not honor the request to align the |
| 736 ; segment unless we do this. | 736 ; segment unless we do this. |
| 737 align 16 | 737 align 16 |
| OLD | NEW |