| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; jdsample.asm - upsampling (SSE2) | 2 ; jdsample.asm - upsampling (SSE2) |
| 3 ; | 3 ; |
| 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 ; | 5 ; |
| 6 ; Based on | 6 ; Based on |
| 7 ; x86 SIMD extension for IJG JPEG library | 7 ; x86 SIMD extension for IJG JPEG library |
| 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
| 10 ; | 10 ; |
| 11 ; This file should be assembled with NASM (Netwide Assembler), | 11 ; This file should be assembled with NASM (Netwide Assembler), |
| 12 ; can *not* be assembled with Microsoft's MASM or any compatible | 12 ; can *not* be assembled with Microsoft's MASM or any compatible |
| 13 ; assembler (including Borland's Turbo Assembler). | 13 ; assembler (including Borland's Turbo Assembler). |
| 14 ; NASM is available from http://nasm.sourceforge.net/ or | 14 ; NASM is available from http://nasm.sourceforge.net/ or |
| 15 ; http://sourceforge.net/project/showfiles.php?group_id=6208 | 15 ; http://sourceforge.net/project/showfiles.php?group_id=6208 |
| 16 ; | 16 ; |
| 17 ; [TAB8] | 17 ; [TAB8] |
| 18 | 18 |
| 19 %include "jsimdext.inc" | 19 %include "jsimdext.inc" |
| 20 | 20 |
| 21 ; -------------------------------------------------------------------------- | 21 ; -------------------------------------------------------------------------- |
| 22 SECTION SEG_CONST | 22 SECTION SEG_CONST |
| 23 | 23 |
| 24 alignz 16 | 24 alignz 16 |
| 25 global EXTN(jconst_fancy_upsample_sse2) | 25 global EXTN(jconst_fancy_upsample_sse2) PRIVATE |
| 26 | 26 |
| 27 EXTN(jconst_fancy_upsample_sse2): | 27 EXTN(jconst_fancy_upsample_sse2): |
| 28 | 28 |
| 29 PW_ONE times 8 dw 1 | 29 PW_ONE times 8 dw 1 |
| 30 PW_TWO times 8 dw 2 | 30 PW_TWO times 8 dw 2 |
| 31 PW_THREE times 8 dw 3 | 31 PW_THREE times 8 dw 3 |
| 32 PW_SEVEN times 8 dw 7 | 32 PW_SEVEN times 8 dw 7 |
| 33 PW_EIGHT times 8 dw 8 | 33 PW_EIGHT times 8 dw 8 |
| 34 | 34 |
| 35 alignz 16 | 35 alignz 16 |
| (...skipping 15 matching lines...) Expand all Loading... |
| 51 ; JSAMPARRAY input_data, | 51 ; JSAMPARRAY input_data, |
| 52 ; JSAMPARRAY *output_data_ptr); | 52 ; JSAMPARRAY *output_data_ptr); |
| 53 ; | 53 ; |
| 54 | 54 |
| 55 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor | 55 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor |
| 56 %define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width | 56 %define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width |
| 57 %define input_data(b) (b)+16 ; JSAMPARRAY input_data | 57 %define input_data(b) (b)+16 ; JSAMPARRAY input_data |
| 58 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr | 58 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr |
| 59 | 59 |
| 60 align 16 | 60 align 16 |
| 61 global EXTN(jsimd_h2v1_fancy_upsample_sse2) | 61 global EXTN(jsimd_h2v1_fancy_upsample_sse2) PRIVATE |
| 62 | 62 |
| 63 EXTN(jsimd_h2v1_fancy_upsample_sse2): | 63 EXTN(jsimd_h2v1_fancy_upsample_sse2): |
| 64 push ebp | 64 push ebp |
| 65 mov ebp,esp | 65 mov ebp,esp |
| 66 pushpic ebx | 66 pushpic ebx |
| 67 ; push ecx ; need not be preserved | 67 ; push ecx ; need not be preserved |
| 68 ; push edx ; need not be preserved | 68 ; push edx ; need not be preserved |
| 69 push esi | 69 push esi |
| 70 push edi | 70 push edi |
| 71 | 71 |
| (...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 207 %define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width | 207 %define downsamp_width(b) (b)+12 ; JDIMENSION downsampled_width |
| 208 %define input_data(b) (b)+16 ; JSAMPARRAY input_data | 208 %define input_data(b) (b)+16 ; JSAMPARRAY input_data |
| 209 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr | 209 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr |
| 210 | 210 |
| 211 %define original_ebp ebp+0 | 211 %define original_ebp ebp+0 |
| 212 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] | 212 %define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM] |
| 213 %define WK_NUM 4 | 213 %define WK_NUM 4 |
| 214 %define gotptr wk(0)-SIZEOF_POINTER ; void *gotptr | 214 %define gotptr wk(0)-SIZEOF_POINTER ; void *gotptr |
| 215 | 215 |
| 216 align 16 | 216 align 16 |
| 217 global EXTN(jsimd_h2v2_fancy_upsample_sse2) | 217 global EXTN(jsimd_h2v2_fancy_upsample_sse2) PRIVATE |
| 218 | 218 |
| 219 EXTN(jsimd_h2v2_fancy_upsample_sse2): | 219 EXTN(jsimd_h2v2_fancy_upsample_sse2): |
| 220 push ebp | 220 push ebp |
| 221 mov eax,esp ; eax = original ebp | 221 mov eax,esp ; eax = original ebp |
| 222 sub esp, byte 4 | 222 sub esp, byte 4 |
| 223 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits | 223 and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits |
| 224 mov [esp],eax | 224 mov [esp],eax |
| 225 mov ebp,esp ; ebp = aligned ebp | 225 mov ebp,esp ; ebp = aligned ebp |
| 226 lea esp, [wk(0)] | 226 lea esp, [wk(0)] |
| 227 pushpic eax ; make a room for GOT address | 227 pushpic eax ; make a room for GOT address |
| (...skipping 303 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 531 ; JSAMPARRAY input_data, | 531 ; JSAMPARRAY input_data, |
| 532 ; JSAMPARRAY *output_data_ptr); | 532 ; JSAMPARRAY *output_data_ptr); |
| 533 ; | 533 ; |
| 534 | 534 |
| 535 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor | 535 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor |
| 536 %define output_width(b) (b)+12 ; JDIMENSION output_width | 536 %define output_width(b) (b)+12 ; JDIMENSION output_width |
| 537 %define input_data(b) (b)+16 ; JSAMPARRAY input_data | 537 %define input_data(b) (b)+16 ; JSAMPARRAY input_data |
| 538 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr | 538 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr |
| 539 | 539 |
| 540 align 16 | 540 align 16 |
| 541 global EXTN(jsimd_h2v1_upsample_sse2) | 541 global EXTN(jsimd_h2v1_upsample_sse2) PRIVATE |
| 542 | 542 |
| 543 EXTN(jsimd_h2v1_upsample_sse2): | 543 EXTN(jsimd_h2v1_upsample_sse2): |
| 544 push ebp | 544 push ebp |
| 545 mov ebp,esp | 545 mov ebp,esp |
| 546 ; push ebx ; unused | 546 ; push ebx ; unused |
| 547 ; push ecx ; need not be preserved | 547 ; push ecx ; need not be preserved |
| 548 ; push edx ; need not be preserved | 548 ; push edx ; need not be preserved |
| 549 push esi | 549 push esi |
| 550 push edi | 550 push edi |
| 551 | 551 |
| (...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 630 ; JSAMPARRAY input_data, | 630 ; JSAMPARRAY input_data, |
| 631 ; JSAMPARRAY *output_data_ptr); | 631 ; JSAMPARRAY *output_data_ptr); |
| 632 ; | 632 ; |
| 633 | 633 |
| 634 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor | 634 %define max_v_samp(b) (b)+8 ; int max_v_samp_factor |
| 635 %define output_width(b) (b)+12 ; JDIMENSION output_width | 635 %define output_width(b) (b)+12 ; JDIMENSION output_width |
| 636 %define input_data(b) (b)+16 ; JSAMPARRAY input_data | 636 %define input_data(b) (b)+16 ; JSAMPARRAY input_data |
| 637 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr | 637 %define output_data_ptr(b) (b)+20 ; JSAMPARRAY *output_data_ptr |
| 638 | 638 |
| 639 align 16 | 639 align 16 |
| 640 global EXTN(jsimd_h2v2_upsample_sse2) | 640 global EXTN(jsimd_h2v2_upsample_sse2) PRIVATE |
| 641 | 641 |
| 642 EXTN(jsimd_h2v2_upsample_sse2): | 642 EXTN(jsimd_h2v2_upsample_sse2): |
| 643 push ebp | 643 push ebp |
| 644 mov ebp,esp | 644 mov ebp,esp |
| 645 push ebx | 645 push ebx |
| 646 ; push ecx ; need not be preserved | 646 ; push ecx ; need not be preserved |
| 647 ; push edx ; need not be preserved | 647 ; push edx ; need not be preserved |
| 648 push esi | 648 push esi |
| 649 push edi | 649 push edi |
| 650 | 650 |
| (...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 720 pop esi | 720 pop esi |
| 721 ; pop edx ; need not be preserved | 721 ; pop edx ; need not be preserved |
| 722 ; pop ecx ; need not be preserved | 722 ; pop ecx ; need not be preserved |
| 723 pop ebx | 723 pop ebx |
| 724 pop ebp | 724 pop ebp |
| 725 ret | 725 ret |
| 726 | 726 |
| 727 ; For some reason, the OS X linker does not honor the request to align the | 727 ; For some reason, the OS X linker does not honor the request to align the |
| 728 ; segment unless we do this. | 728 ; segment unless we do this. |
| 729 align 16 | 729 align 16 |
| OLD | NEW |