Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(170)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm

Issue 812033011: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
11 %include "third_party/x86inc/x86inc.asm" 11 %include "third_party/x86inc/x86inc.asm"
12 12
13 SECTION_RODATA 13 SECTION_RODATA
14 pw_1: times 8 dw 1 14 pw_1: times 8 dw 1
15 15
16 SECTION .text 16 SECTION .text
17 17
18 %macro QUANTIZE_FN 2 18 %macro QUANTIZE_FN 2
19 cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ 19 cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
20 shift, qcoeff, dqcoeff, dequant, zbin_oq, \ 20 shift, qcoeff, dqcoeff, dequant, \
21 eob, scan, iscan 21 eob, scan, iscan
22 cmp dword skipm, 0 22 cmp dword skipm, 0
23 jne .blank 23 jne .blank
24 24
25 ; actual quantize loop - setup pointers, rounders, etc. 25 ; actual quantize loop - setup pointers, rounders, etc.
26 movifnidn coeffq, coeffmp 26 movifnidn coeffq, coeffmp
27 movifnidn ncoeffq, ncoeffmp 27 movifnidn ncoeffq, ncoeffmp
28 mov r2, dequantmp 28 mov r2, dequantmp
29 movifnidn zbinq, zbinmp 29 movifnidn zbinq, zbinmp
30 movifnidn roundq, roundmp 30 movifnidn roundq, roundmp
31 movifnidn quantq, quantmp 31 movifnidn quantq, quantmp
32 movd m4, dword zbin_oqm ; m4 = zbin_oq
33 mova m0, [zbinq] ; m0 = zbin 32 mova m0, [zbinq] ; m0 = zbin
34 punpcklwd m4, m4
35 mova m1, [roundq] ; m1 = round 33 mova m1, [roundq] ; m1 = round
36 pshufd m4, m4, 0
37 mova m2, [quantq] ; m2 = quant 34 mova m2, [quantq] ; m2 = quant
38 paddw m0, m4 ; m0 = zbin + zbin_oq
39 %ifidn %1, b_32x32 35 %ifidn %1, b_32x32
40 pcmpeqw m5, m5 36 pcmpeqw m5, m5
41 psrlw m5, 15 37 psrlw m5, 15
42 paddw m0, m5 38 paddw m0, m5
43 paddw m1, m5 39 paddw m1, m5
44 psrlw m0, 1 ; m0 = (m0 + 1) / 2 40 psrlw m0, 1 ; m0 = (m0 + 1) / 2
45 psrlw m1, 1 ; m1 = (m1 + 1) / 2 41 psrlw m1, 1 ; m1 = (m1 + 1) / 2
46 %endif 42 %endif
47 mova m3, [r2q] ; m3 = dequant 43 mova m3, [r2q] ; m3 = dequant
48 psubw m0, [pw_1] 44 psubw m0, [pw_1]
49 mov r2, shiftmp 45 mov r2, shiftmp
50 mov r3, qcoeffmp 46 mov r3, qcoeffmp
51 mova m4, [r2] ; m4 = shift 47 mova m4, [r2] ; m4 = shift
52 mov r4, dqcoeffmp 48 mov r4, dqcoeffmp
53 mov r5, iscanmp 49 mov r5, iscanmp
54 %ifidn %1, b_32x32 50 %ifidn %1, b_32x32
55 psllw m4, 1 51 psllw m4, 1
56 %endif 52 %endif
57 pxor m5, m5 ; m5 = dedicated zero 53 pxor m5, m5 ; m5 = dedicated zero
58 DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob 54 DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob
59 lea coeffq, [ coeffq+ncoeffq*2] 55 lea coeffq, [ coeffq+ncoeffq*2]
60 lea iscanq, [ iscanq+ncoeffq*2] 56 lea iscanq, [ iscanq+ncoeffq*2]
61 lea qcoeffq, [ qcoeffq+ncoeffq*2] 57 lea qcoeffq, [ qcoeffq+ncoeffq*2]
62 lea dqcoeffq, [dqcoeffq+ncoeffq*2] 58 lea dqcoeffq, [dqcoeffq+ncoeffq*2]
63 neg ncoeffq 59 neg ncoeffq
64 60
65 ; get DC and first 15 AC coeffs 61 ; get DC and first 15 AC coeffs
66 mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i] 62 mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
67 mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] 63 mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
68 pabsw m6, m9 ; m6 = abs(m9) 64 pabsw m6, m9 ; m6 = abs(m9)
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
115 jz .accumulate_eob 111 jz .accumulate_eob
116 112
117 .ac_only_loop: 113 .ac_only_loop:
118 mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i] 114 mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
119 mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] 115 mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
120 pabsw m6, m9 ; m6 = abs(m9) 116 pabsw m6, m9 ; m6 = abs(m9)
121 pabsw m11, m10 ; m11 = abs(m10) 117 pabsw m11, m10 ; m11 = abs(m10)
122 pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin 118 pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
123 pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin 119 pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
124 %ifidn %1, b_32x32 120 %ifidn %1, b_32x32
125 pmovmskb r6, m7 121 pmovmskb r6d, m7
126 pmovmskb r2, m12 122 pmovmskb r2d, m12
127 or r6, r2 123 or r6, r2
128 jz .skip_iter 124 jz .skip_iter
129 %endif 125 %endif
130 paddsw m6, m1 ; m6 += round 126 paddsw m6, m1 ; m6 += round
131 paddsw m11, m1 ; m11 += round 127 paddsw m11, m1 ; m11 += round
132 pmulhw m14, m6, m2 ; m14 = m6*q>>16 128 pmulhw m14, m6, m2 ; m14 = m6*q>>16
133 pmulhw m13, m11, m2 ; m13 = m11*q>>16 129 pmulhw m13, m11, m2 ; m13 = m11*q>>16
134 paddw m14, m6 ; m14 += m6 130 paddw m14, m6 ; m14 += m6
135 paddw m13, m11 ; m13 += m11 131 paddw m13, m11 ; m13 += m11
136 pmulhw m14, m4 ; m14 = m14*qsh>>16 132 pmulhw m14, m4 ; m14 = m14*qsh>>16
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
213 mov word [eobq], 0 209 mov word [eobq], 0
214 RET 210 RET
215 %endmacro 211 %endmacro
216 212
217 INIT_XMM ssse3 213 INIT_XMM ssse3
218 QUANTIZE_FN b, 7 214 QUANTIZE_FN b, 7
219 QUANTIZE_FN b_32x32, 7 215 QUANTIZE_FN b_32x32, 7
220 216
221 %macro QUANTIZE_FP 2 217 %macro QUANTIZE_FP 2
222 cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ 218 cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
223 shift, qcoeff, dqcoeff, dequant, zbin_oq, \ 219 shift, qcoeff, dqcoeff, dequant, \
224 eob, scan, iscan 220 eob, scan, iscan
225 cmp dword skipm, 0 221 cmp dword skipm, 0
226 jne .blank 222 jne .blank
227 223
228 ; actual quantize loop - setup pointers, rounders, etc. 224 ; actual quantize loop - setup pointers, rounders, etc.
229 movifnidn coeffq, coeffmp 225 movifnidn coeffq, coeffmp
230 movifnidn ncoeffq, ncoeffmp 226 movifnidn ncoeffq, ncoeffmp
231 mov r2, dequantmp 227 mov r2, dequantmp
232 movifnidn zbinq, zbinmp 228 movifnidn zbinq, zbinmp
233 movifnidn roundq, roundmp 229 movifnidn roundq, roundmp
234 movifnidn quantq, quantmp 230 movifnidn quantq, quantmp
235 mova m1, [roundq] ; m1 = round 231 mova m1, [roundq] ; m1 = round
236 mova m2, [quantq] ; m2 = quant 232 mova m2, [quantq] ; m2 = quant
237 %ifidn %1, fp_32x32 233 %ifidn %1, fp_32x32
238 pcmpeqw m5, m5 234 pcmpeqw m5, m5
239 psrlw m5, 15 235 psrlw m5, 15
240 paddw m1, m5 236 paddw m1, m5
241 psrlw m1, 1 ; m1 = (m1 + 1) / 2 237 psrlw m1, 1 ; m1 = (m1 + 1) / 2
242 %endif 238 %endif
243 mova m3, [r2q] ; m3 = dequant 239 mova m3, [r2q] ; m3 = dequant
244 mov r3, qcoeffmp 240 mov r3, qcoeffmp
245 mov r4, dqcoeffmp 241 mov r4, dqcoeffmp
246 mov r5, iscanmp 242 mov r5, iscanmp
247 %ifidn %1, fp_32x32 243 %ifidn %1, fp_32x32
248 psllw m2, 1 244 psllw m2, 1
249 %endif 245 %endif
250 pxor m5, m5 ; m5 = dedicated zero 246 pxor m5, m5 ; m5 = dedicated zero
251 DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob 247 DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob
252 lea coeffq, [ coeffq+ncoeffq*2] 248 lea coeffq, [ coeffq+ncoeffq*2]
253 lea iscanq, [ iscanq+ncoeffq*2] 249 lea iscanq, [ iscanq+ncoeffq*2]
254 lea qcoeffq, [ qcoeffq+ncoeffq*2] 250 lea qcoeffq, [ qcoeffq+ncoeffq*2]
255 lea dqcoeffq, [dqcoeffq+ncoeffq*2] 251 lea dqcoeffq, [dqcoeffq+ncoeffq*2]
256 neg ncoeffq 252 neg ncoeffq
257 253
258 ; get DC and first 15 AC coeffs 254 ; get DC and first 15 AC coeffs
259 mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i] 255 mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
260 mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] 256 mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
261 pabsw m6, m9 ; m6 = abs(m9) 257 pabsw m6, m9 ; m6 = abs(m9)
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
301 jz .accumulate_eob 297 jz .accumulate_eob
302 298
303 .ac_only_loop: 299 .ac_only_loop:
304 mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i] 300 mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
305 mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] 301 mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
306 pabsw m6, m9 ; m6 = abs(m9) 302 pabsw m6, m9 ; m6 = abs(m9)
307 pabsw m11, m10 ; m11 = abs(m10) 303 pabsw m11, m10 ; m11 = abs(m10)
308 %ifidn %1, fp_32x32 304 %ifidn %1, fp_32x32
309 pcmpgtw m7, m6, m0 305 pcmpgtw m7, m6, m0
310 pcmpgtw m12, m11, m0 306 pcmpgtw m12, m11, m0
311 pmovmskb r6, m7 307 pmovmskb r6d, m7
312 pmovmskb r2, m12 308 pmovmskb r2d, m12
313 309
314 or r6, r2 310 or r6, r2
315 jz .skip_iter 311 jz .skip_iter
316 %endif 312 %endif
317 pcmpeqw m7, m7 313 pcmpeqw m7, m7
318 314
319 paddsw m6, m1 ; m6 += round 315 paddsw m6, m1 ; m6 += round
320 paddsw m11, m1 ; m11 += round 316 paddsw m11, m1 ; m11 += round
321 pmulhw m14, m6, m2 ; m14 = m6*q>>16 317 pmulhw m14, m6, m2 ; m14 = m6*q>>16
322 pmulhw m13, m11, m2 ; m13 = m11*q>>16 318 pmulhw m13, m11, m2 ; m13 = m11*q>>16
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
393 mova [qcoeffq+ncoeffq*2+16], m7 389 mova [qcoeffq+ncoeffq*2+16], m7
394 add ncoeffq, mmsize 390 add ncoeffq, mmsize
395 jl .blank_loop 391 jl .blank_loop
396 mov word [eobq], 0 392 mov word [eobq], 0
397 RET 393 RET
398 %endmacro 394 %endmacro
399 395
400 INIT_XMM ssse3 396 INIT_XMM ssse3
401 QUANTIZE_FP fp, 7 397 QUANTIZE_FP fp, 7
402 QUANTIZE_FP fp_32x32, 7 398 QUANTIZE_FP fp_32x32, 7
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c ('k') | source/libvpx/vp9/encoder/x86/vp9_subpel_variance.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698