| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; jiss2fst-64.asm - fast integer IDCT (64-bit SSE2) | 2 ; jiss2fst-64.asm - fast integer IDCT (64-bit SSE2) |
| 3 ; | 3 ; |
| 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 ; Copyright 2009 D. R. Commander | 5 ; Copyright 2009 D. R. Commander |
| 6 ; | 6 ; |
| 7 ; Based on | 7 ; Based on |
| 8 ; x86 SIMD extension for IJG JPEG library | 8 ; x86 SIMD extension for IJG JPEG library |
| 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
| (...skipping 305 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 316 | 316 |
| 317 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] | 317 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] |
| 318 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] | 318 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] |
| 319 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] | 319 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] |
| 320 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] | 320 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] |
| 321 | 321 |
| 322 ; ---- Pass 2: process rows from work array, store into output array. | 322 ; ---- Pass 2: process rows from work array, store into output array. |
| 323 | 323 |
| 324 mov rax, [original_rbp] | 324 mov rax, [original_rbp] |
| 325 mov rdi, r12 ; (JSAMPROW *) | 325 mov rdi, r12 ; (JSAMPROW *) |
| 326 » mov» rax, r13 | 326 » mov» eax, r13d |
| 327 | 327 |
| 328 ; -- Even part | 328 ; -- Even part |
| 329 | 329 |
| 330 ; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6 | 330 ; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6 |
| 331 | 331 |
| 332 movdqa xmm2,xmm6 | 332 movdqa xmm2,xmm6 |
| 333 movdqa xmm0,xmm5 | 333 movdqa xmm0,xmm5 |
| 334 psubw xmm6,xmm1 ; xmm6=tmp11 | 334 psubw xmm6,xmm1 ; xmm6=tmp11 |
| 335 psubw xmm5,xmm3 | 335 psubw xmm5,xmm3 |
| 336 paddw xmm2,xmm1 ; xmm2=tmp10 | 336 paddw xmm2,xmm1 ; xmm2=tmp10 |
| (...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 483 uncollect_args | 483 uncollect_args |
| 484 mov rsp,rbp ; rsp <- aligned rbp | 484 mov rsp,rbp ; rsp <- aligned rbp |
| 485 pop rsp ; rsp <- original rbp | 485 pop rsp ; rsp <- original rbp |
| 486 pop rbp | 486 pop rbp |
| 487 ret | 487 ret |
| 488 ret | 488 ret |
| 489 | 489 |
| 490 ; For some reason, the OS X linker does not honor the request to align the | 490 ; For some reason, the OS X linker does not honor the request to align the |
| 491 ; segment unless we do this. | 491 ; segment unless we do this. |
| 492 align 16 | 492 align 16 |
| OLD | NEW |