Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(28)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_error_sse2.asm

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: only update to last nights LKGR Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
65 movq rax, m4 65 movq rax, m4
66 movq [sszq], m6 66 movq [sszq], m6
67 %else 67 %else
68 mov eax, sszm 68 mov eax, sszm
69 pshufd m5, m4, 0x1 69 pshufd m5, m4, 0x1
70 movq [eax], m6 70 movq [eax], m6
71 movd eax, m4 71 movd eax, m4
72 movd edx, m5 72 movd edx, m5
73 %endif 73 %endif
74 RET 74 RET
75
76 ; Compute the sum of squared difference between two int16_t vectors.
77 ; int64_t vp9_block_error_fp(int16_t *coeff, int16_t *dqcoeff,
78 ; intptr_t block_size)
79
80 INIT_XMM sse2
81 cglobal block_error_fp, 3, 3, 6, uqc, dqc, size
82 pxor m4, m4 ; sse accumulator
83 pxor m5, m5 ; dedicated zero register
84 lea uqcq, [uqcq+sizeq*2]
85 lea dqcq, [dqcq+sizeq*2]
86 neg sizeq
87 .loop:
88 mova m2, [uqcq+sizeq*2]
89 mova m0, [dqcq+sizeq*2]
90 mova m3, [uqcq+sizeq*2+mmsize]
91 mova m1, [dqcq+sizeq*2+mmsize]
92 psubw m0, m2
93 psubw m1, m3
94 ; individual errors are max. 15bit+sign, so squares are 30bit, and
95 ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit)
96 pmaddwd m0, m0
97 pmaddwd m1, m1
98 ; accumulate in 64bit
99 punpckldq m3, m0, m5
100 punpckhdq m0, m5
101 paddq m4, m3
102 punpckldq m3, m1, m5
103 paddq m4, m0
104 punpckhdq m1, m5
105 paddq m4, m3
106 paddq m4, m1
107 add sizeq, mmsize
108 jl .loop
109
110 ; accumulate horizontally and store in return value
111 movhlps m5, m4
112 paddq m4, m5
113 %if ARCH_X86_64
114 movq rax, m4
115 %else
116 pshufd m5, m4, 0x1
117 movd eax, m4
118 movd edx, m5
119 %endif
120 RET
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_denoiser_sse2.c ('k') | source/libvpx/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698