OLD | NEW |
1 ; | 1 ; |
2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
3 ; | 3 ; |
4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
9 ; | 9 ; |
10 | 10 |
(...skipping 197 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
208 HIGH_PROCESS_32x2x4 %1, %2, %3, (%2 + 32), (%3 + 32) | 208 HIGH_PROCESS_32x2x4 %1, %2, %3, (%2 + 32), (%3 + 32) |
209 HIGH_PROCESS_32x2x4 0, %4, %5, (%4 + 32), (%5 + 32), %6 | 209 HIGH_PROCESS_32x2x4 0, %4, %5, (%4 + 32), (%5 + 32), %6 |
210 %endmacro | 210 %endmacro |
211 | 211 |
212 ; void vp9_highbd_sadNxNx4d_sse2(uint8_t *src, int src_stride, | 212 ; void vp9_highbd_sadNxNx4d_sse2(uint8_t *src, int src_stride, |
213 ; uint8_t *ref[4], int ref_stride, | 213 ; uint8_t *ref[4], int ref_stride, |
214 ; unsigned int res[4]); | 214 ; unsigned int res[4]); |
215 ; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8 | 215 ; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8 |
216 %macro HIGH_SADNXN4D 2 | 216 %macro HIGH_SADNXN4D 2 |
217 %if UNIX64 | 217 %if UNIX64 |
218 cglobal highbd_sad%1x%2x4d, 5, 9, 8, src, src_stride, ref1, ref_stride, \ | 218 cglobal highbd_sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \ |
219 res, ref2, ref3, ref4, one | 219 res, ref2, ref3, ref4 |
220 %else | 220 %else |
221 cglobal highbd_sad%1x%2x4d, 4, 8, 8, src, src_stride, ref1, ref_stride, \ | 221 cglobal highbd_sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \ |
222 ref2, ref3, ref4, one | 222 ref2, ref3, ref4 |
223 %endif | 223 %endif |
224 | 224 |
| 225 ; set m1 |
| 226 push srcq |
| 227 mov srcd, 0x00010001 |
| 228 movd m1, srcd |
| 229 pshufd m1, m1, 0x0 |
| 230 pop srcq |
| 231 |
225 movsxdifnidn src_strideq, src_strided | 232 movsxdifnidn src_strideq, src_strided |
226 movsxdifnidn ref_strideq, ref_strided | 233 movsxdifnidn ref_strideq, ref_strided |
227 mov ref2q, [ref1q+gprsize*1] | 234 mov ref2q, [ref1q+gprsize*1] |
228 mov ref3q, [ref1q+gprsize*2] | 235 mov ref3q, [ref1q+gprsize*2] |
229 mov ref4q, [ref1q+gprsize*3] | 236 mov ref4q, [ref1q+gprsize*3] |
230 mov ref1q, [ref1q+gprsize*0] | 237 mov ref1q, [ref1q+gprsize*0] |
231 | 238 |
232 ; convert byte pointers to short pointers | 239 ; convert byte pointers to short pointers |
233 shl srcq, 1 | 240 shl srcq, 1 |
234 shl ref2q, 1 | 241 shl ref2q, 1 |
235 shl ref3q, 1 | 242 shl ref3q, 1 |
236 shl ref4q, 1 | 243 shl ref4q, 1 |
237 shl ref1q, 1 | 244 shl ref1q, 1 |
238 | 245 |
239 mov oned, 0x00010001 | |
240 movd m1, oned | |
241 pshufd m1, m1, 0x0 | |
242 | |
243 HIGH_PROCESS_%1x2x4 1, 0, 0, src_strideq, ref_strideq, 1 | 246 HIGH_PROCESS_%1x2x4 1, 0, 0, src_strideq, ref_strideq, 1 |
244 %rep (%2-4)/2 | 247 %rep (%2-4)/2 |
245 HIGH_PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 1 | 248 HIGH_PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 1 |
246 %endrep | 249 %endrep |
247 HIGH_PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 0 | 250 HIGH_PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 0 |
248 ; N.B. HIGH_PROCESS outputs dwords (32 bits) | 251 ; N.B. HIGH_PROCESS outputs dwords (32 bits) |
249 ; so in high bit depth even the smallest width (4) needs 128bits i.e. XMM | 252 ; so in high bit depth even the smallest width (4) needs 128bits i.e. XMM |
250 movhlps m0, m4 | 253 movhlps m0, m4 |
251 movhlps m1, m5 | 254 movhlps m1, m5 |
252 movhlps m2, m6 | 255 movhlps m2, m6 |
(...skipping 22 matching lines...) Expand all Loading... |
275 HIGH_SADNXN4D 32, 32 | 278 HIGH_SADNXN4D 32, 32 |
276 HIGH_SADNXN4D 32, 16 | 279 HIGH_SADNXN4D 32, 16 |
277 HIGH_SADNXN4D 16, 32 | 280 HIGH_SADNXN4D 16, 32 |
278 HIGH_SADNXN4D 16, 16 | 281 HIGH_SADNXN4D 16, 16 |
279 HIGH_SADNXN4D 16, 8 | 282 HIGH_SADNXN4D 16, 8 |
280 HIGH_SADNXN4D 8, 16 | 283 HIGH_SADNXN4D 8, 16 |
281 HIGH_SADNXN4D 8, 8 | 284 HIGH_SADNXN4D 8, 8 |
282 HIGH_SADNXN4D 8, 4 | 285 HIGH_SADNXN4D 8, 4 |
283 HIGH_SADNXN4D 4, 8 | 286 HIGH_SADNXN4D 4, 8 |
284 HIGH_SADNXN4D 4, 4 | 287 HIGH_SADNXN4D 4, 4 |
OLD | NEW |