| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; jdmrgss2-64.asm - merged upsampling/color conversion (64-bit SSE2) | 2 ; jdmrgss2-64.asm - merged upsampling/color conversion (64-bit SSE2) |
| 3 ; | 3 ; |
| 4 ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 ; Copyright 2009, 2012 D. R. Commander | 5 ; Copyright 2009, 2012 D. R. Commander |
| 6 ; | 6 ; |
| 7 ; Based on | 7 ; Based on |
| 8 ; x86 SIMD extension for IJG JPEG library | 8 ; x86 SIMD extension for IJG JPEG library |
| 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 45 push rbp | 45 push rbp |
| 46 mov rax,rsp ; rax = original rbp | 46 mov rax,rsp ; rax = original rbp |
| 47 sub rsp, byte 4 | 47 sub rsp, byte 4 |
| 48 and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits | 48 and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits |
| 49 mov [rsp],rax | 49 mov [rsp],rax |
| 50 mov rbp,rsp ; rbp = aligned rbp | 50 mov rbp,rsp ; rbp = aligned rbp |
| 51 lea rsp, [wk(0)] | 51 lea rsp, [wk(0)] |
| 52 collect_args | 52 collect_args |
| 53 push rbx | 53 push rbx |
| 54 | 54 |
| 55 » mov» rcx, r10» ; col | 55 » mov» ecx, r10d» ; col |
| 56 test rcx,rcx | 56 test rcx,rcx |
| 57 jz near .return | 57 jz near .return |
| 58 | 58 |
| 59 push rcx | 59 push rcx |
| 60 | 60 |
| 61 mov rdi, r11 | 61 mov rdi, r11 |
| 62 » mov» rcx, r12 | 62 » mov» ecx, r12d |
| 63 mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] | 63 mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] |
| 64 mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] | 64 mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] |
| 65 mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] | 65 mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] |
| 66 mov rdi, r13 | 66 mov rdi, r13 |
| 67 mov rsi, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW] ; inptr0 | 67 mov rsi, JSAMPROW [rsi+rcx*SIZEOF_JSAMPROW] ; inptr0 |
| 68 mov rbx, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW] ; inptr1 | 68 mov rbx, JSAMPROW [rbx+rcx*SIZEOF_JSAMPROW] ; inptr1 |
| 69 mov rdx, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW] ; inptr2 | 69 mov rdx, JSAMPROW [rdx+rcx*SIZEOF_JSAMPROW] ; inptr2 |
| 70 mov rdi, JSAMPROW [rdi] ; outptr | 70 mov rdi, JSAMPROW [rdi] ; outptr |
| 71 | 71 |
| 72 pop rcx ; col | 72 pop rcx ; col |
| (...skipping 375 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 448 align 16 | 448 align 16 |
| 449 global EXTN(jsimd_h2v2_merged_upsample_sse2) PRIVATE | 449 global EXTN(jsimd_h2v2_merged_upsample_sse2) PRIVATE |
| 450 | 450 |
| 451 EXTN(jsimd_h2v2_merged_upsample_sse2): | 451 EXTN(jsimd_h2v2_merged_upsample_sse2): |
| 452 push rbp | 452 push rbp |
| 453 mov rax,rsp | 453 mov rax,rsp |
| 454 mov rbp,rsp | 454 mov rbp,rsp |
| 455 collect_args | 455 collect_args |
| 456 push rbx | 456 push rbx |
| 457 | 457 |
| 458 » mov» rax, r10 | 458 » mov» eax, r10d |
| 459 | 459 |
| 460 mov rdi, r11 | 460 mov rdi, r11 |
| 461 » mov» rcx, r12 | 461 » mov» ecx, r12d |
| 462 mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] | 462 mov rsi, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY] |
| 463 mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] | 463 mov rbx, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY] |
| 464 mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] | 464 mov rdx, JSAMPARRAY [rdi+2*SIZEOF_JSAMPARRAY] |
| 465 mov rdi, r13 | 465 mov rdi, r13 |
| 466 lea rsi, [rsi+rcx*SIZEOF_JSAMPROW] | 466 lea rsi, [rsi+rcx*SIZEOF_JSAMPROW] |
| 467 | 467 |
| 468 push rdx ; inptr2 | 468 push rdx ; inptr2 |
| 469 push rbx ; inptr1 | 469 push rbx ; inptr1 |
| 470 push rsi ; inptr00 | 470 push rsi ; inptr00 |
| 471 mov rbx,rsp | 471 mov rbx,rsp |
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 529 pop rdx | 529 pop rdx |
| 530 | 530 |
| 531 pop rbx | 531 pop rbx |
| 532 uncollect_args | 532 uncollect_args |
| 533 pop rbp | 533 pop rbp |
| 534 ret | 534 ret |
| 535 | 535 |
| 536 ; For some reason, the OS X linker does not honor the request to align the | 536 ; For some reason, the OS X linker does not honor the request to align the |
| 537 ; segment unless we do this. | 537 ; segment unless we do this. |
| 538 align 16 | 538 align 16 |
| OLD | NEW |