OLD | NEW |
(Empty) | |
| 1 ; |
| 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; |
| 10 |
| 11 |
| 12 %include "vpx_ports/x86_abi_support.asm" |
| 13 |
| 14 |
| 15 ;void vp8_copy32xn_sse2( |
| 16 ; unsigned char *src_ptr, |
| 17 ; int src_stride, |
| 18 ; unsigned char *dst_ptr, |
| 19 ; int dst_stride, |
| 20 ; int height); |
| 21 global sym(vp8_copy32xn_sse2) PRIVATE |
| 22 sym(vp8_copy32xn_sse2): |
| 23 push rbp |
| 24 mov rbp, rsp |
| 25 SHADOW_ARGS_TO_STACK 5 |
| 26 SAVE_XMM 7 |
| 27 push rsi |
| 28 push rdi |
| 29 ; end prolog |
| 30 |
| 31 mov rsi, arg(0) ;src_ptr |
| 32 mov rdi, arg(2) ;dst_ptr |
| 33 |
| 34 movsxd rax, dword ptr arg(1) ;src_stride |
| 35 movsxd rdx, dword ptr arg(3) ;dst_stride |
| 36 movsxd rcx, dword ptr arg(4) ;height |
| 37 |
| 38 .block_copy_sse2_loopx4: |
| 39 movdqu xmm0, XMMWORD PTR [rsi] |
| 40 movdqu xmm1, XMMWORD PTR [rsi + 16] |
| 41 movdqu xmm2, XMMWORD PTR [rsi + rax] |
| 42 movdqu xmm3, XMMWORD PTR [rsi + rax + 16] |
| 43 |
| 44 lea rsi, [rsi+rax*2] |
| 45 |
| 46 movdqu xmm4, XMMWORD PTR [rsi] |
| 47 movdqu xmm5, XMMWORD PTR [rsi + 16] |
| 48 movdqu xmm6, XMMWORD PTR [rsi + rax] |
| 49 movdqu xmm7, XMMWORD PTR [rsi + rax + 16] |
| 50 |
| 51 lea rsi, [rsi+rax*2] |
| 52 |
| 53 movdqa XMMWORD PTR [rdi], xmm0 |
| 54 movdqa XMMWORD PTR [rdi + 16], xmm1 |
| 55 movdqa XMMWORD PTR [rdi + rdx], xmm2 |
| 56 movdqa XMMWORD PTR [rdi + rdx + 16], xmm3 |
| 57 |
| 58 lea rdi, [rdi+rdx*2] |
| 59 |
| 60 movdqa XMMWORD PTR [rdi], xmm4 |
| 61 movdqa XMMWORD PTR [rdi + 16], xmm5 |
| 62 movdqa XMMWORD PTR [rdi + rdx], xmm6 |
| 63 movdqa XMMWORD PTR [rdi + rdx + 16], xmm7 |
| 64 |
| 65 lea rdi, [rdi+rdx*2] |
| 66 |
| 67 sub rcx, 4 |
| 68 cmp rcx, 4 |
| 69 jge .block_copy_sse2_loopx4 |
| 70 |
| 71 cmp rcx, 0 |
| 72 je .copy_is_done |
| 73 |
| 74 .block_copy_sse2_loop: |
| 75 movdqu xmm0, XMMWORD PTR [rsi] |
| 76 movdqu xmm1, XMMWORD PTR [rsi + 16] |
| 77 lea rsi, [rsi+rax] |
| 78 |
| 79 movdqa XMMWORD PTR [rdi], xmm0 |
| 80 movdqa XMMWORD PTR [rdi + 16], xmm1 |
| 81 lea rdi, [rdi+rdx] |
| 82 |
| 83 sub rcx, 1 |
| 84 jne .block_copy_sse2_loop |
| 85 |
| 86 .copy_is_done: |
| 87 ; begin epilog |
| 88 pop rdi |
| 89 pop rsi |
| 90 RESTORE_XMM |
| 91 UNSHADOW_ARGS |
| 92 pop rbp |
| 93 ret |
OLD | NEW |