Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(110)

Side by Side Diff: source/libvpx/vp8/encoder/x86/sad_sse3.asm

Issue 7671004: Update libvpx snapshot to v0.9.7-p1 (Cayuga). (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/
Patch Set: '' Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « source/libvpx/vp8/encoder/x86/sad_sse2.asm ('k') | source/libvpx/vp8/encoder/x86/sad_ssse3.asm » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
11 %include "vpx_ports/x86_abi_support.asm" 11 %include "vpx_ports/x86_abi_support.asm"
12 12
13 %macro STACK_FRAME_CREATE_X3 0 13 %macro STACK_FRAME_CREATE_X3 0
14 %if ABI_IS_32BIT 14 %if ABI_IS_32BIT
15 %define src_ptr rsi 15 %define src_ptr rsi
16 %define src_stride rax 16 %define src_stride rax
17 %define ref_ptr rdi 17 %define ref_ptr rdi
18 %define ref_stride rdx 18 %define ref_stride rdx
19 %define end_ptr rcx 19 %define end_ptr rcx
20 %define ret_var rbx 20 %define ret_var rbx
21 %define result_ptr arg(4) 21 %define result_ptr arg(4)
22 %define max_err arg(4) 22 %define max_err arg(4)
23 %define height dword ptr arg(4)
23 push rbp 24 push rbp
24 mov rbp, rsp 25 mov rbp, rsp
25 push rsi 26 push rsi
26 push rdi 27 push rdi
27 push rbx 28 push rbx
28 29
29 mov rsi, arg(0) ; src_ptr 30 mov rsi, arg(0) ; src_ptr
30 mov rdi, arg(2) ; ref_ptr 31 mov rdi, arg(2) ; ref_ptr
31 32
32 movsxd rax, dword ptr arg(1) ; src_stride 33 movsxd rax, dword ptr arg(1) ; src_stride
33 movsxd rdx, dword ptr arg(3) ; ref_stride 34 movsxd rdx, dword ptr arg(3) ; ref_stride
34 %else 35 %else
35 %ifidn __OUTPUT_FORMAT__,x64 36 %ifidn __OUTPUT_FORMAT__,x64
37 SAVE_XMM 7, u
36 %define src_ptr rcx 38 %define src_ptr rcx
37 %define src_stride rdx 39 %define src_stride rdx
38 %define ref_ptr r8 40 %define ref_ptr r8
39 %define ref_stride r9 41 %define ref_stride r9
40 %define end_ptr r10 42 %define end_ptr r10
41 %define ret_var r11 43 %define ret_var r11
42 %define result_ptr [rsp+8+4*8] 44 %define result_ptr [rsp+xmm_stack_space+8+4*8]
43 %define max_err [rsp+8+4*8] 45 %define max_err [rsp+xmm_stack_space+8+4*8]
46 %define height dword ptr [rsp+xmm_stack_space+8+4*8]
44 %else 47 %else
45 %define src_ptr rdi 48 %define src_ptr rdi
46 %define src_stride rsi 49 %define src_stride rsi
47 %define ref_ptr rdx 50 %define ref_ptr rdx
48 %define ref_stride rcx 51 %define ref_stride rcx
49 %define end_ptr r9 52 %define end_ptr r9
50 %define ret_var r10 53 %define ret_var r10
51 %define result_ptr r8 54 %define result_ptr r8
52 %define max_err r8 55 %define max_err r8
56 %define height r8
53 %endif 57 %endif
54 %endif 58 %endif
55 59
56 %endmacro 60 %endmacro
57 61
58 %macro STACK_FRAME_DESTROY_X3 0 62 %macro STACK_FRAME_DESTROY_X3 0
59 %define src_ptr 63 %define src_ptr
60 %define src_stride 64 %define src_stride
61 %define ref_ptr 65 %define ref_ptr
62 %define ref_stride 66 %define ref_stride
63 %define end_ptr 67 %define end_ptr
64 %define ret_var 68 %define ret_var
65 %define result_ptr 69 %define result_ptr
66 %define max_err 70 %define max_err
71 %define height
67 72
68 %if ABI_IS_32BIT 73 %if ABI_IS_32BIT
69 pop rbx 74 pop rbx
70 pop rdi 75 pop rdi
71 pop rsi 76 pop rsi
72 pop rbp 77 pop rbp
73 %else 78 %else
74 %ifidn __OUTPUT_FORMAT__,x64 79 %ifidn __OUTPUT_FORMAT__,x64
80 RESTORE_XMM
75 %endif 81 %endif
76 %endif 82 %endif
77 ret 83 ret
78 %endmacro 84 %endmacro
79 85
80 %macro STACK_FRAME_CREATE_X4 0 86 %macro STACK_FRAME_CREATE_X4 0
81 %if ABI_IS_32BIT 87 %if ABI_IS_32BIT
82 %define src_ptr rsi 88 %define src_ptr rsi
83 %define src_stride rax 89 %define src_stride rax
84 %define r0_ptr rcx 90 %define r0_ptr rcx
(...skipping 14 matching lines...) Expand all
99 LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi 105 LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
100 106
101 mov rsi, arg(0) ; src_ptr 107 mov rsi, arg(0) ; src_ptr
102 108
103 movsxd rbx, dword ptr arg(1) ; src_stride 109 movsxd rbx, dword ptr arg(1) ; src_stride
104 movsxd rbp, dword ptr arg(3) ; ref_stride 110 movsxd rbp, dword ptr arg(3) ; ref_stride
105 111
106 xchg rbx, rax 112 xchg rbx, rax
107 %else 113 %else
108 %ifidn __OUTPUT_FORMAT__,x64 114 %ifidn __OUTPUT_FORMAT__,x64
115 SAVE_XMM 7, u
109 %define src_ptr rcx 116 %define src_ptr rcx
110 %define src_stride rdx 117 %define src_stride rdx
111 %define r0_ptr rsi 118 %define r0_ptr rsi
112 %define r1_ptr r10 119 %define r1_ptr r10
113 %define r2_ptr r11 120 %define r2_ptr r11
114 %define r3_ptr r8 121 %define r3_ptr r8
115 %define ref_stride r9 122 %define ref_stride r9
116 %define result_ptr [rsp+16+4*8] 123 %define result_ptr [rsp+xmm_stack_space+16+4*8]
117 push rsi 124 push rsi
118 125
119 LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr 126 LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr
120 %else 127 %else
121 %define src_ptr rdi 128 %define src_ptr rdi
122 %define src_stride rsi 129 %define src_stride rsi
123 %define r0_ptr r9 130 %define r0_ptr r9
124 %define r1_ptr r10 131 %define r1_ptr r10
125 %define r2_ptr r11 132 %define r2_ptr r11
126 %define r3_ptr rdx 133 %define r3_ptr rdx
(...skipping 17 matching lines...) Expand all
144 %define result_ptr 151 %define result_ptr
145 152
146 %if ABI_IS_32BIT 153 %if ABI_IS_32BIT
147 pop rbx 154 pop rbx
148 pop rdi 155 pop rdi
149 pop rsi 156 pop rsi
150 pop rbp 157 pop rbp
151 %else 158 %else
152 %ifidn __OUTPUT_FORMAT__,x64 159 %ifidn __OUTPUT_FORMAT__,x64
153 pop rsi 160 pop rsi
161 RESTORE_XMM
154 %endif 162 %endif
155 %endif 163 %endif
156 ret 164 ret
157 %endmacro 165 %endmacro
158 166
159 %macro PROCESS_16X2X3 5 167 %macro PROCESS_16X2X3 5
160 %if %1==0 168 %if %1==0
161 movdqa xmm0, XMMWORD PTR [%2] 169 movdqa xmm0, XMMWORD PTR [%2]
162 lddqu xmm5, XMMWORD PTR [%3] 170 lddqu xmm5, XMMWORD PTR [%3]
163 lddqu xmm6, XMMWORD PTR [%3+1] 171 lddqu xmm6, XMMWORD PTR [%3+1]
(...skipping 457 matching lines...) Expand 10 before | Expand all | Expand 10 after
621 sub end_ptr, 1 629 sub end_ptr, 1
622 jne .vp8_sad16x16_sse3_loop 630 jne .vp8_sad16x16_sse3_loop
623 631
624 movq xmm0, xmm7 632 movq xmm0, xmm7
625 psrldq xmm7, 8 633 psrldq xmm7, 8
626 paddw xmm0, xmm7 634 paddw xmm0, xmm7
627 movq rax, xmm0 635 movq rax, xmm0
628 636
629 STACK_FRAME_DESTROY_X3 637 STACK_FRAME_DESTROY_X3
630 638
639 ;void vp8_copy32xn_sse3(
640 ; unsigned char *src_ptr,
641 ; int src_stride,
642 ; unsigned char *dst_ptr,
643 ; int dst_stride,
644 ; int height);
645 global sym(vp8_copy32xn_sse3)
646 sym(vp8_copy32xn_sse3):
647
648 STACK_FRAME_CREATE_X3
649
650 block_copy_sse3_loopx4:
651 lea end_ptr, [src_ptr+src_stride*2]
652
653 movdqu xmm0, XMMWORD PTR [src_ptr]
654 movdqu xmm1, XMMWORD PTR [src_ptr + 16]
655 movdqu xmm2, XMMWORD PTR [src_ptr + src_stride]
656 movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16]
657 movdqu xmm4, XMMWORD PTR [end_ptr]
658 movdqu xmm5, XMMWORD PTR [end_ptr + 16]
659 movdqu xmm6, XMMWORD PTR [end_ptr + src_stride]
660 movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16]
661
662 lea src_ptr, [src_ptr+src_stride*4]
663
664 lea end_ptr, [ref_ptr+ref_stride*2]
665
666 movdqa XMMWORD PTR [ref_ptr], xmm0
667 movdqa XMMWORD PTR [ref_ptr + 16], xmm1
668 movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2
669 movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3
670 movdqa XMMWORD PTR [end_ptr], xmm4
671 movdqa XMMWORD PTR [end_ptr + 16], xmm5
672 movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6
673 movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7
674
675 lea ref_ptr, [ref_ptr+ref_stride*4]
676
677 sub height, 4
678 cmp height, 4
679 jge block_copy_sse3_loopx4
680
681 ;Check to see if there is more rows need to be copied.
682 cmp height, 0
683 je copy_is_done
684
685 block_copy_sse3_loop:
686 movdqu xmm0, XMMWORD PTR [src_ptr]
687 movdqu xmm1, XMMWORD PTR [src_ptr + 16]
688 lea src_ptr, [src_ptr+src_stride]
689
690 movdqa XMMWORD PTR [ref_ptr], xmm0
691 movdqa XMMWORD PTR [ref_ptr + 16], xmm1
692 lea ref_ptr, [ref_ptr+ref_stride]
693
694 sub height, 1
695 jne block_copy_sse3_loop
696
697 copy_is_done:
698 STACK_FRAME_DESTROY_X3
699
631 ;void vp8_sad16x16x4d_sse3( 700 ;void vp8_sad16x16x4d_sse3(
632 ; unsigned char *src_ptr, 701 ; unsigned char *src_ptr,
633 ; int src_stride, 702 ; int src_stride,
634 ; unsigned char *ref_ptr_base, 703 ; unsigned char *ref_ptr_base,
635 ; int ref_stride, 704 ; int ref_stride,
636 ; int *results) 705 ; int *results)
637 global sym(vp8_sad16x16x4d_sse3) 706 global sym(vp8_sad16x16x4d_sse3)
638 sym(vp8_sad16x16x4d_sse3): 707 sym(vp8_sad16x16x4d_sse3):
639 708
640 STACK_FRAME_CREATE_X4 709 STACK_FRAME_CREATE_X4
(...skipping 240 matching lines...) Expand 10 before | Expand all | Expand 10 after
881 punpcklbw mm2, mm1 950 punpcklbw mm2, mm1
882 951
883 movd [rsi+8], mm7 952 movd [rsi+8], mm7
884 psadbw mm2, mm0 953 psadbw mm2, mm0
885 954
886 paddw mm2, mm6 955 paddw mm2, mm6
887 movd [rsi+12], mm2 956 movd [rsi+12], mm2
888 957
889 958
890 STACK_FRAME_DESTROY_X4 959 STACK_FRAME_DESTROY_X4
960
OLDNEW
« no previous file with comments | « source/libvpx/vp8/encoder/x86/sad_sse2.asm ('k') | source/libvpx/vp8/encoder/x86/sad_ssse3.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698