| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; jdmrgss2.asm - merged upsampling/color conversion (SSE2) | 2 ; jdmrgss2.asm - merged upsampling/color conversion (SSE2) |
| 3 ; | 3 ; |
| 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 ; | 5 ; |
| 6 ; Based on | 6 ; Based on |
| 7 ; x86 SIMD extension for IJG JPEG library | 7 ; x86 SIMD extension for IJG JPEG library |
| 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
| 10 ; | 10 ; |
| 11 ; This file should be assembled with NASM (Netwide Assembler), | 11 ; This file should be assembled with NASM (Netwide Assembler), |
| 12 ; can *not* be assembled with Microsoft's MASM or any compatible | 12 ; can *not* be assembled with Microsoft's MASM or any compatible |
| 13 ; assembler (including Borland's Turbo Assembler). | 13 ; assembler (including Borland's Turbo Assembler). |
| 14 ; NASM is available from http://nasm.sourceforge.net/ or | 14 ; NASM is available from http://nasm.sourceforge.net/ or |
| 15 ; http://sourceforge.net/project/showfiles.php?group_id=6208 | 15 ; http://sourceforge.net/project/showfiles.php?group_id=6208 |
| 16 ; | 16 ; |
| 17 ; [TAB8] | 17 ; [TAB8] |
| 18 | 18 |
| 19 %include "jcolsamp.inc" | 19 %include "jcolsamp.inc" |
| 20 | 20 |
| 21 ; -------------------------------------------------------------------------- | 21 ; -------------------------------------------------------------------------- |
| 22 %ifndef NEED_SECTION |
| 23 %define NEED_SECTION |
| 22 SECTION SEG_TEXT | 24 SECTION SEG_TEXT |
| 23 BITS 32 | 25 BITS 32 |
| 26 %endif |
| 27 |
| 24 ; | 28 ; |
| 25 ; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. | 29 ; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. |
| 26 ; | 30 ; |
| 27 ; GLOBAL(void) | 31 ; GLOBAL(void) |
| 28 ; jsimd_h2v1_merged_upsample_sse2 (JDIMENSION output_width, | 32 ; jsimd_h2v1_merged_upsample_sse2 (JDIMENSION output_width, |
| 29 ; JSAMPIMAGE input_buf, | 33 ; JSAMPIMAGE input_buf, |
| 30 ; JDIMENSION in_row_group_ctr, | 34 ; JDIMENSION in_row_group_ctr, |
| 31 ; JSAMPARRAY output_buf); | 35 ; JSAMPARRAY output_buf); |
| 32 ; | 36 ; |
| 33 | 37 |
| (...skipping 437 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 471 add edi, byte SIZEOF_XMMWORD ; outptr | 475 add edi, byte SIZEOF_XMMWORD ; outptr |
| 472 movdqa xmmA,xmmD | 476 movdqa xmmA,xmmD |
| 473 sub ecx, byte SIZEOF_XMMWORD/4 | 477 sub ecx, byte SIZEOF_XMMWORD/4 |
| 474 .column_st15: | 478 .column_st15: |
| 475 %ifdef STRICT_MEMORY_ACCESS | 479 %ifdef STRICT_MEMORY_ACCESS |
| 476 ; Store two pixels (8 bytes) of xmmA to the output when it has enough | 480 ; Store two pixels (8 bytes) of xmmA to the output when it has enough |
| 477 ; space. | 481 ; space. |
| 478 cmp ecx, byte SIZEOF_XMMWORD/8 | 482 cmp ecx, byte SIZEOF_XMMWORD/8 |
| 479 jb short .column_st7 | 483 jb short .column_st7 |
| 480 movq MMWORD [edi], xmmA | 484 movq MMWORD [edi], xmmA |
| 481 » add» edi, byte SIZEOF_XMMWORD/2 | 485 » add» edi, byte SIZEOF_XMMWORD/8*4 |
| 482 sub ecx, byte SIZEOF_XMMWORD/8 | 486 sub ecx, byte SIZEOF_XMMWORD/8 |
| 483 » psrldq» xmmA, 64 | 487 » psrldq» xmmA, SIZEOF_XMMWORD/8*4 |
| 484 .column_st7: | 488 .column_st7: |
| 485 ; Store one pixel (4 bytes) of xmmA to the output when it has enough | 489 ; Store one pixel (4 bytes) of xmmA to the output when it has enough |
| 486 ; space. | 490 ; space. |
| 487 test ecx, ecx | 491 test ecx, ecx |
| 488 jz short .endcolumn | 492 jz short .endcolumn |
| 489 movd DWORD [edi], xmmA | 493 movd DWORD [edi], xmmA |
| 490 %else | 494 %else |
| 491 cmp ecx, byte SIZEOF_XMMWORD/16 | 495 cmp ecx, byte SIZEOF_XMMWORD/16 |
| 492 jb short .endcolumn | 496 jb short .endcolumn |
| 493 mov eax,ecx | 497 mov eax,ecx |
| (...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 608 pop esi | 612 pop esi |
| 609 ; pop edx ; need not be preserved | 613 ; pop edx ; need not be preserved |
| 610 ; pop ecx ; need not be preserved | 614 ; pop ecx ; need not be preserved |
| 611 pop ebx | 615 pop ebx |
| 612 pop ebp | 616 pop ebp |
| 613 ret | 617 ret |
| 614 | 618 |
| 615 ; For some reason, the OS X linker does not honor the request to align the | 619 ; For some reason, the OS X linker does not honor the request to align the |
| 616 ; segment unless we do this. | 620 ; segment unless we do this. |
| 617 align 16 | 621 align 16 |
| OLD | NEW |