OLD | NEW |
1 ; | 1 ; |
2 ; jiss2fst-64.asm - fast integer IDCT (64-bit SSE2) | 2 ; jiss2fst-64.asm - fast integer IDCT (64-bit SSE2) |
3 ; | 3 ; |
4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB | 4 ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
5 ; Copyright 2009 D. R. Commander | 5 ; Copyright 2009 D. R. Commander |
6 ; | 6 ; |
7 ; Based on | 7 ; Based on |
8 ; x86 SIMD extension for IJG JPEG library | 8 ; x86 SIMD extension for IJG JPEG library |
9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. | 9 ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc | 10 ; For conditions of distribution and use, see copyright notice in jsimdext.inc |
(...skipping 305 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
316 | 316 |
317 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] | 317 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32] |
318 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] | 318 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32] |
319 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] | 319 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32] |
320 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] | 320 prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32] |
321 | 321 |
322 ; ---- Pass 2: process rows from work array, store into output array. | 322 ; ---- Pass 2: process rows from work array, store into output array. |
323 | 323 |
324 mov rax, [original_rbp] | 324 mov rax, [original_rbp] |
325 mov rdi, r12 ; (JSAMPROW *) | 325 mov rdi, r12 ; (JSAMPROW *) |
326 » mov» rax, r13 | 326 » mov» eax, r13d |
327 | 327 |
328 ; -- Even part | 328 ; -- Even part |
329 | 329 |
330 ; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6 | 330 ; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6 |
331 | 331 |
332 movdqa xmm2,xmm6 | 332 movdqa xmm2,xmm6 |
333 movdqa xmm0,xmm5 | 333 movdqa xmm0,xmm5 |
334 psubw xmm6,xmm1 ; xmm6=tmp11 | 334 psubw xmm6,xmm1 ; xmm6=tmp11 |
335 psubw xmm5,xmm3 | 335 psubw xmm5,xmm3 |
336 paddw xmm2,xmm1 ; xmm2=tmp10 | 336 paddw xmm2,xmm1 ; xmm2=tmp10 |
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
483 uncollect_args | 483 uncollect_args |
484 mov rsp,rbp ; rsp <- aligned rbp | 484 mov rsp,rbp ; rsp <- aligned rbp |
485 pop rsp ; rsp <- original rbp | 485 pop rsp ; rsp <- original rbp |
486 pop rbp | 486 pop rbp |
487 ret | 487 ret |
488 ret | 488 ret |
489 | 489 |
490 ; For some reason, the OS X linker does not honor the request to align the | 490 ; For some reason, the OS X linker does not honor the request to align the |
491 ; segment unless we do this. | 491 ; segment unless we do this. |
492 align 16 | 492 align 16 |
OLD | NEW |