OLD | NEW |
1 ; | 1 ; |
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 ; | 3 ; |
4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
9 ; | 9 ; |
10 | 10 |
| 11 %define program_name vpx |
| 12 |
11 %include "third_party/x86inc/x86inc.asm" | 13 %include "third_party/x86inc/x86inc.asm" |
12 | 14 |
13 SECTION .text | 15 SECTION .text |
14 | 16 |
15 ; PROCESS_4x2x4 first, off_{first,second}_{src,ref}, advance_at_end | 17 ; PROCESS_4x2x4 first, off_{first,second}_{src,ref}, advance_at_end |
16 %macro PROCESS_4x2x4 5-6 0 | 18 %macro PROCESS_4x2x4 5-6 0 |
17 movd m0, [srcq +%2] | 19 movd m0, [srcq +%2] |
18 %if %1 == 1 | 20 %if %1 == 1 |
19 movd m6, [ref1q+%3] | 21 movd m6, [ref1q+%3] |
20 movd m4, [ref2q+%3] | 22 movd m4, [ref2q+%3] |
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
160 PROCESS_16x2x4 %1, %2, %3, %2 + 16, %3 + 16 | 162 PROCESS_16x2x4 %1, %2, %3, %2 + 16, %3 + 16 |
161 PROCESS_16x2x4 0, %4, %5, %4 + 16, %5 + 16, %6 | 163 PROCESS_16x2x4 0, %4, %5, %4 + 16, %5 + 16, %6 |
162 %endmacro | 164 %endmacro |
163 | 165 |
164 ; PROCESS_64x2x4 first, off_{first,second}_{src,ref}, advance_at_end | 166 ; PROCESS_64x2x4 first, off_{first,second}_{src,ref}, advance_at_end |
165 %macro PROCESS_64x2x4 5-6 0 | 167 %macro PROCESS_64x2x4 5-6 0 |
166 PROCESS_32x2x4 %1, %2, %3, %2 + 32, %3 + 32 | 168 PROCESS_32x2x4 %1, %2, %3, %2 + 32, %3 + 32 |
167 PROCESS_32x2x4 0, %4, %5, %4 + 32, %5 + 32, %6 | 169 PROCESS_32x2x4 0, %4, %5, %4 + 32, %5 + 32, %6 |
168 %endmacro | 170 %endmacro |
169 | 171 |
170 ; void vp9_sadNxNx4d_sse2(uint8_t *src, int src_stride, | 172 ; void vpx_sadNxNx4d_sse2(uint8_t *src, int src_stride, |
171 ; uint8_t *ref[4], int ref_stride, | 173 ; uint8_t *ref[4], int ref_stride, |
172 ; unsigned int res[4]); | 174 ; uint32_t res[4]); |
173 ; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8 | 175 ; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8 |
174 %macro SADNXN4D 2 | 176 %macro SADNXN4D 2 |
175 %if UNIX64 | 177 %if UNIX64 |
176 cglobal sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \ | 178 cglobal sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \ |
177 res, ref2, ref3, ref4 | 179 res, ref2, ref3, ref4 |
178 %else | 180 %else |
179 cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \ | 181 cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \ |
180 ref2, ref3, ref4 | 182 ref2, ref3, ref4 |
181 %endif | 183 %endif |
182 movsxdifnidn src_strideq, src_strided | 184 movsxdifnidn src_strideq, src_strided |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
222 SADNXN4D 16, 32 | 224 SADNXN4D 16, 32 |
223 SADNXN4D 16, 16 | 225 SADNXN4D 16, 16 |
224 SADNXN4D 16, 8 | 226 SADNXN4D 16, 8 |
225 SADNXN4D 8, 16 | 227 SADNXN4D 8, 16 |
226 SADNXN4D 8, 8 | 228 SADNXN4D 8, 8 |
227 SADNXN4D 8, 4 | 229 SADNXN4D 8, 4 |
228 | 230 |
229 INIT_MMX sse | 231 INIT_MMX sse |
230 SADNXN4D 4, 8 | 232 SADNXN4D 4, 8 |
231 SADNXN4D 4, 4 | 233 SADNXN4D 4, 4 |
OLD | NEW |