| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; jiss2int-64.asm - accurate integer IDCT (64-bit SSE2) | 2 ; jiss2int-64.asm - accurate integer IDCT (64-bit SSE2) |
| 3 ; | 3 ; |
| 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 ; Copyright 2009 D. R. Commander | 5 ; Copyright 2009 D. R. Commander |
| 6 ; | 6 ; |
| 7 ; Based on | 7 ; Based on |
| 8 ; x86 SIMD extension for IJG JPEG library | 8 ; x86 SIMD extension for IJG JPEG library |
| 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
| (...skipping 497 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 508 | 508 |
| 509 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] | 509 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] |
| 510 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] | 510 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] |
| 511 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] | 511 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] |
| 512 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] | 512 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] |
| 513 | 513 |
| 514 ; ---- Pass 2: process rows from work array, store into output array. | 514 ; ---- Pass 2: process rows from work array, store into output array. |
| 515 | 515 |
| 516 mov rax, [original_rbp] | 516 mov rax, [original_rbp] |
| 517 mov rdi, r12 ; (JSAMPROW *) | 517 mov rdi, r12 ; (JSAMPROW *) |
| 518 » mov» rax, r13 | 518 » mov» eax, r13d |
| 519 | 519 |
| 520 ; -- Even part | 520 ; -- Even part |
| 521 | 521 |
| 522 ; xmm7=col0, xmm1=col2, xmm0=col4, xmm2=col6 | 522 ; xmm7=col0, xmm1=col2, xmm0=col4, xmm2=col6 |
| 523 | 523 |
| 524 ; (Original) | 524 ; (Original) |
| 525 ; z1 = (z2 + z3) * 0.541196100; | 525 ; z1 = (z2 + z3) * 0.541196100; |
| 526 ; tmp2 = z1 + z3 * -1.847759065; | 526 ; tmp2 = z1 + z3 * -1.847759065; |
| 527 ; tmp3 = z1 + z2 * 0.765366865; | 527 ; tmp3 = z1 + z2 * 0.765366865; |
| 528 ; | 528 ; |
| (...skipping 310 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 839 | 839 |
| 840 uncollect_args | 840 uncollect_args |
| 841 mov rsp,rbp ; rsp <- aligned rbp | 841 mov rsp,rbp ; rsp <- aligned rbp |
| 842 pop rsp ; rsp <- original rbp | 842 pop rsp ; rsp <- original rbp |
| 843 pop rbp | 843 pop rbp |
| 844 ret | 844 ret |
| 845 | 845 |
| 846 ; For some reason, the OS X linker does not honor the request to align the | 846 ; For some reason, the OS X linker does not honor the request to align the |
| 847 ; segment unless we do this. | 847 ; segment unless we do this. |
| 848 align 16 | 848 align 16 |
| OLD | NEW |