OLD | NEW |
1 ; | 1 ; |
2 ; jdmrgss2.asm - merged upsampling/color conversion (SSE2) | 2 ; jdmrgss2.asm - merged upsampling/color conversion (SSE2) |
3 ; | 3 ; |
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
5 ; | 5 ; |
6 ; Based on | 6 ; Based on |
7 ; x86 SIMD extension for IJG JPEG library | 7 ; x86 SIMD extension for IJG JPEG library |
8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 8 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 9 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
10 ; | 10 ; |
11 ; This file should be assembled with NASM (Netwide Assembler), | 11 ; This file should be assembled with NASM (Netwide Assembler), |
12 ; can *not* be assembled with Microsoft's MASM or any compatible | 12 ; can *not* be assembled with Microsoft's MASM or any compatible |
13 ; assembler (including Borland's Turbo Assembler). | 13 ; assembler (including Borland's Turbo Assembler). |
14 ; NASM is available from http://nasm.sourceforge.net/ or | 14 ; NASM is available from http://nasm.sourceforge.net/ or |
15 ; http://sourceforge.net/project/showfiles.php?group_id=6208 | 15 ; http://sourceforge.net/project/showfiles.php?group_id=6208 |
16 ; | 16 ; |
17 ; [TAB8] | 17 ; [TAB8] |
18 | 18 |
19 %include "jcolsamp.inc" | 19 %include "jcolsamp.inc" |
20 | 20 |
21 ; -------------------------------------------------------------------------- | 21 ; -------------------------------------------------------------------------- |
| 22 %ifndef NEED_SECTION |
| 23 %define NEED_SECTION |
22 SECTION SEG_TEXT | 24 SECTION SEG_TEXT |
23 BITS 32 | 25 BITS 32 |
| 26 %endif |
| 27 |
24 ; | 28 ; |
25 ; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. | 29 ; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical. |
26 ; | 30 ; |
27 ; GLOBAL(void) | 31 ; GLOBAL(void) |
28 ; jsimd_h2v1_merged_upsample_sse2 (JDIMENSION output_width, | 32 ; jsimd_h2v1_merged_upsample_sse2 (JDIMENSION output_width, |
29 ; JSAMPIMAGE input_buf, | 33 ; JSAMPIMAGE input_buf, |
30 ; JDIMENSION in_row_group_ctr, | 34 ; JDIMENSION in_row_group_ctr, |
31 ; JSAMPARRAY output_buf); | 35 ; JSAMPARRAY output_buf); |
32 ; | 36 ; |
33 | 37 |
(...skipping 437 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
471 add edi, byte SIZEOF_XMMWORD ; outptr | 475 add edi, byte SIZEOF_XMMWORD ; outptr |
472 movdqa xmmA,xmmD | 476 movdqa xmmA,xmmD |
473 sub ecx, byte SIZEOF_XMMWORD/4 | 477 sub ecx, byte SIZEOF_XMMWORD/4 |
474 .column_st15: | 478 .column_st15: |
475 %ifdef STRICT_MEMORY_ACCESS | 479 %ifdef STRICT_MEMORY_ACCESS |
476 ; Store two pixels (8 bytes) of xmmA to the output when it has enough | 480 ; Store two pixels (8 bytes) of xmmA to the output when it has enough |
477 ; space. | 481 ; space. |
478 cmp ecx, byte SIZEOF_XMMWORD/8 | 482 cmp ecx, byte SIZEOF_XMMWORD/8 |
479 jb short .column_st7 | 483 jb short .column_st7 |
480 movq MMWORD [edi], xmmA | 484 movq MMWORD [edi], xmmA |
481 » add» edi, byte SIZEOF_XMMWORD/2 | 485 » add» edi, byte SIZEOF_XMMWORD/8*4 |
482 sub ecx, byte SIZEOF_XMMWORD/8 | 486 sub ecx, byte SIZEOF_XMMWORD/8 |
483 » psrldq» xmmA, 64 | 487 » psrldq» xmmA, SIZEOF_XMMWORD/8*4 |
484 .column_st7: | 488 .column_st7: |
485 ; Store one pixel (4 bytes) of xmmA to the output when it has enough | 489 ; Store one pixel (4 bytes) of xmmA to the output when it has enough |
486 ; space. | 490 ; space. |
487 test ecx, ecx | 491 test ecx, ecx |
488 jz short .endcolumn | 492 jz short .endcolumn |
489 movd DWORD [edi], xmmA | 493 movd DWORD [edi], xmmA |
490 %else | 494 %else |
491 cmp ecx, byte SIZEOF_XMMWORD/16 | 495 cmp ecx, byte SIZEOF_XMMWORD/16 |
492 jb short .endcolumn | 496 jb short .endcolumn |
493 mov eax,ecx | 497 mov eax,ecx |
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
608 pop esi | 612 pop esi |
609 ; pop edx ; need not be preserved | 613 ; pop edx ; need not be preserved |
610 ; pop ecx ; need not be preserved | 614 ; pop ecx ; need not be preserved |
611 pop ebx | 615 pop ebx |
612 pop ebp | 616 pop ebp |
613 ret | 617 ret |
614 | 618 |
615 ; For some reason, the OS X linker does not honor the request to align the | 619 ; For some reason, the OS X linker does not honor the request to align the |
616 ; segment unless we do this. | 620 ; segment unless we do this. |
617 align 16 | 621 align 16 |
OLD | NEW |