Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(71)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm

Issue 958693004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
(...skipping 197 matching lines...) Expand 10 before | Expand all | Expand 10 after
208 HIGH_PROCESS_32x2x4 %1, %2, %3, (%2 + 32), (%3 + 32) 208 HIGH_PROCESS_32x2x4 %1, %2, %3, (%2 + 32), (%3 + 32)
209 HIGH_PROCESS_32x2x4 0, %4, %5, (%4 + 32), (%5 + 32), %6 209 HIGH_PROCESS_32x2x4 0, %4, %5, (%4 + 32), (%5 + 32), %6
210 %endmacro 210 %endmacro
211 211
212 ; void vp9_highbd_sadNxNx4d_sse2(uint8_t *src, int src_stride, 212 ; void vp9_highbd_sadNxNx4d_sse2(uint8_t *src, int src_stride,
213 ; uint8_t *ref[4], int ref_stride, 213 ; uint8_t *ref[4], int ref_stride,
214 ; unsigned int res[4]); 214 ; unsigned int res[4]);
215 ; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8 215 ; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8
216 %macro HIGH_SADNXN4D 2 216 %macro HIGH_SADNXN4D 2
217 %if UNIX64 217 %if UNIX64
218 cglobal highbd_sad%1x%2x4d, 5, 9, 8, src, src_stride, ref1, ref_stride, \ 218 cglobal highbd_sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
219 res, ref2, ref3, ref4, one 219 res, ref2, ref3, ref4
220 %else 220 %else
221 cglobal highbd_sad%1x%2x4d, 4, 8, 8, src, src_stride, ref1, ref_stride, \ 221 cglobal highbd_sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
222 ref2, ref3, ref4, one 222 ref2, ref3, ref4
223 %endif 223 %endif
224 224
225 ; set m1
226 push srcq
227 mov srcd, 0x00010001
228 movd m1, srcd
229 pshufd m1, m1, 0x0
230 pop srcq
231
225 movsxdifnidn src_strideq, src_strided 232 movsxdifnidn src_strideq, src_strided
226 movsxdifnidn ref_strideq, ref_strided 233 movsxdifnidn ref_strideq, ref_strided
227 mov ref2q, [ref1q+gprsize*1] 234 mov ref2q, [ref1q+gprsize*1]
228 mov ref3q, [ref1q+gprsize*2] 235 mov ref3q, [ref1q+gprsize*2]
229 mov ref4q, [ref1q+gprsize*3] 236 mov ref4q, [ref1q+gprsize*3]
230 mov ref1q, [ref1q+gprsize*0] 237 mov ref1q, [ref1q+gprsize*0]
231 238
232 ; convert byte pointers to short pointers 239 ; convert byte pointers to short pointers
233 shl srcq, 1 240 shl srcq, 1
234 shl ref2q, 1 241 shl ref2q, 1
235 shl ref3q, 1 242 shl ref3q, 1
236 shl ref4q, 1 243 shl ref4q, 1
237 shl ref1q, 1 244 shl ref1q, 1
238 245
239 mov oned, 0x00010001
240 movd m1, oned
241 pshufd m1, m1, 0x0
242
243 HIGH_PROCESS_%1x2x4 1, 0, 0, src_strideq, ref_strideq, 1 246 HIGH_PROCESS_%1x2x4 1, 0, 0, src_strideq, ref_strideq, 1
244 %rep (%2-4)/2 247 %rep (%2-4)/2
245 HIGH_PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 1 248 HIGH_PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 1
246 %endrep 249 %endrep
247 HIGH_PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 0 250 HIGH_PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 0
248 ; N.B. HIGH_PROCESS outputs dwords (32 bits) 251 ; N.B. HIGH_PROCESS outputs dwords (32 bits)
249 ; so in high bit depth even the smallest width (4) needs 128bits i.e. XMM 252 ; so in high bit depth even the smallest width (4) needs 128bits i.e. XMM
250 movhlps m0, m4 253 movhlps m0, m4
251 movhlps m1, m5 254 movhlps m1, m5
252 movhlps m2, m6 255 movhlps m2, m6
(...skipping 22 matching lines...) Expand all
275 HIGH_SADNXN4D 32, 32 278 HIGH_SADNXN4D 32, 32
276 HIGH_SADNXN4D 32, 16 279 HIGH_SADNXN4D 32, 16
277 HIGH_SADNXN4D 16, 32 280 HIGH_SADNXN4D 16, 32
278 HIGH_SADNXN4D 16, 16 281 HIGH_SADNXN4D 16, 16
279 HIGH_SADNXN4D 16, 8 282 HIGH_SADNXN4D 16, 8
280 HIGH_SADNXN4D 8, 16 283 HIGH_SADNXN4D 8, 16
281 HIGH_SADNXN4D 8, 8 284 HIGH_SADNXN4D 8, 8
282 HIGH_SADNXN4D 8, 4 285 HIGH_SADNXN4D 8, 4
283 HIGH_SADNXN4D 4, 8 286 HIGH_SADNXN4D 4, 8
284 HIGH_SADNXN4D 4, 4 287 HIGH_SADNXN4D 4, 4
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c ('k') | source/libvpx/vp9/encoder/x86/vp9_highbd_subpel_variance.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698