| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 ; | 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; | 9 ; |
| 10 | 10 |
| 11 %define program_name vpx |
| 12 |
| 11 %include "third_party/x86inc/x86inc.asm" | 13 %include "third_party/x86inc/x86inc.asm" |
| 12 | 14 |
| 13 SECTION .text | 15 SECTION .text |
| 14 | 16 |
| 15 ; PROCESS_4x2x4 first, off_{first,second}_{src,ref}, advance_at_end | 17 ; PROCESS_4x2x4 first, off_{first,second}_{src,ref}, advance_at_end |
| 16 %macro PROCESS_4x2x4 5-6 0 | 18 %macro PROCESS_4x2x4 5-6 0 |
| 17 movd m0, [srcq +%2] | 19 movd m0, [srcq +%2] |
| 18 %if %1 == 1 | 20 %if %1 == 1 |
| 19 movd m6, [ref1q+%3] | 21 movd m6, [ref1q+%3] |
| 20 movd m4, [ref2q+%3] | 22 movd m4, [ref2q+%3] |
| (...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 160 PROCESS_16x2x4 %1, %2, %3, %2 + 16, %3 + 16 | 162 PROCESS_16x2x4 %1, %2, %3, %2 + 16, %3 + 16 |
| 161 PROCESS_16x2x4 0, %4, %5, %4 + 16, %5 + 16, %6 | 163 PROCESS_16x2x4 0, %4, %5, %4 + 16, %5 + 16, %6 |
| 162 %endmacro | 164 %endmacro |
| 163 | 165 |
| 164 ; PROCESS_64x2x4 first, off_{first,second}_{src,ref}, advance_at_end | 166 ; PROCESS_64x2x4 first, off_{first,second}_{src,ref}, advance_at_end |
| 165 %macro PROCESS_64x2x4 5-6 0 | 167 %macro PROCESS_64x2x4 5-6 0 |
| 166 PROCESS_32x2x4 %1, %2, %3, %2 + 32, %3 + 32 | 168 PROCESS_32x2x4 %1, %2, %3, %2 + 32, %3 + 32 |
| 167 PROCESS_32x2x4 0, %4, %5, %4 + 32, %5 + 32, %6 | 169 PROCESS_32x2x4 0, %4, %5, %4 + 32, %5 + 32, %6 |
| 168 %endmacro | 170 %endmacro |
| 169 | 171 |
| 170 ; void vp9_sadNxNx4d_sse2(uint8_t *src, int src_stride, | 172 ; void vpx_sadNxNx4d_sse2(uint8_t *src, int src_stride, |
| 171 ; uint8_t *ref[4], int ref_stride, | 173 ; uint8_t *ref[4], int ref_stride, |
| 172 ; unsigned int res[4]); | 174 ; uint32_t res[4]); |
| 173 ; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8 | 175 ; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8 |
| 174 %macro SADNXN4D 2 | 176 %macro SADNXN4D 2 |
| 175 %if UNIX64 | 177 %if UNIX64 |
| 176 cglobal sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \ | 178 cglobal sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \ |
| 177 res, ref2, ref3, ref4 | 179 res, ref2, ref3, ref4 |
| 178 %else | 180 %else |
| 179 cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \ | 181 cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \ |
| 180 ref2, ref3, ref4 | 182 ref2, ref3, ref4 |
| 181 %endif | 183 %endif |
| 182 movsxdifnidn src_strideq, src_strided | 184 movsxdifnidn src_strideq, src_strided |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 222 SADNXN4D 16, 32 | 224 SADNXN4D 16, 32 |
| 223 SADNXN4D 16, 16 | 225 SADNXN4D 16, 16 |
| 224 SADNXN4D 16, 8 | 226 SADNXN4D 16, 8 |
| 225 SADNXN4D 8, 16 | 227 SADNXN4D 8, 16 |
| 226 SADNXN4D 8, 8 | 228 SADNXN4D 8, 8 |
| 227 SADNXN4D 8, 4 | 229 SADNXN4D 8, 4 |
| 228 | 230 |
| 229 INIT_MMX sse | 231 INIT_MMX sse |
| 230 SADNXN4D 4, 8 | 232 SADNXN4D 4, 8 |
| 231 SADNXN4D 4, 4 | 233 SADNXN4D 4, 4 |
| OLD | NEW |