Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(286)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: only update to last nights LKGR Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
(...skipping 264 matching lines...) Expand 10 before | Expand all | Expand 10 after
275 %endif 275 %endif
276 pmullw m8, m3 ; r4[i] = r3[i] * q 276 pmullw m8, m3 ; r4[i] = r3[i] * q
277 punpckhqdq m3, m3 277 punpckhqdq m3, m3
278 pmullw m13, m3 ; r4[i] = r3[i] * q 278 pmullw m13, m3 ; r4[i] = r3[i] * q
279 %ifidn %1, fp_32x32 279 %ifidn %1, fp_32x32
280 psrlw m8, 1 280 psrlw m8, 1
281 psrlw m13, 1 281 psrlw m13, 1
282 psignw m8, m9 282 psignw m8, m9
283 psignw m13, m10 283 psignw m13, m10
284 psrlw m0, m3, 2 284 psrlw m0, m3, 2
285 %else
286 psrlw m0, m3, 1
285 %endif 287 %endif
286 mova [r4q+ncoeffq*2+ 0], m8 288 mova [r4q+ncoeffq*2+ 0], m8
287 mova [r4q+ncoeffq*2+16], m13 289 mova [r4q+ncoeffq*2+16], m13
288 pcmpeqw m8, m5 ; m8 = c[i] == 0 290 pcmpeqw m8, m5 ; m8 = c[i] == 0
289 pcmpeqw m13, m5 ; m13 = c[i] == 0 291 pcmpeqw m13, m5 ; m13 = c[i] == 0
290 mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i] 292 mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i]
291 mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i] 293 mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i]
292 psubw m6, m7 ; m6 = scan[i] + 1 294 psubw m6, m7 ; m6 = scan[i] + 1
293 psubw m11, m7 ; m11 = scan[i] + 1 295 psubw m11, m7 ; m11 = scan[i] + 1
294 pandn m8, m6 ; m8 = max(eob) 296 pandn m8, m6 ; m8 = max(eob)
295 pandn m13, m11 ; m13 = max(eob) 297 pandn m13, m11 ; m13 = max(eob)
296 pmaxsw m8, m13 298 pmaxsw m8, m13
297 add ncoeffq, mmsize 299 add ncoeffq, mmsize
298 jz .accumulate_eob 300 jz .accumulate_eob
299 301
300 .ac_only_loop: 302 .ac_only_loop:
301 mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i] 303 mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
302 mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] 304 mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
303 pabsw m6, m9 ; m6 = abs(m9) 305 pabsw m6, m9 ; m6 = abs(m9)
304 pabsw m11, m10 ; m11 = abs(m10) 306 pabsw m11, m10 ; m11 = abs(m10)
305 %ifidn %1, fp_32x32 307
306 pcmpgtw m7, m6, m0 308 pcmpgtw m7, m6, m0
307 pcmpgtw m12, m11, m0 309 pcmpgtw m12, m11, m0
308 pmovmskb r6d, m7 310 pmovmskb r6d, m7
309 pmovmskb r2d, m12 311 pmovmskb r2d, m12
310 312
311 or r6, r2 313 or r6, r2
312 jz .skip_iter 314 jz .skip_iter
313 %endif 315
314 pcmpeqw m7, m7 316 pcmpeqw m7, m7
315 317
316 paddsw m6, m1 ; m6 += round 318 paddsw m6, m1 ; m6 += round
317 paddsw m11, m1 ; m11 += round 319 paddsw m11, m1 ; m11 += round
318 pmulhw m14, m6, m2 ; m14 = m6*q>>16 320 pmulhw m14, m6, m2 ; m14 = m6*q>>16
319 pmulhw m13, m11, m2 ; m13 = m11*q>>16 321 pmulhw m13, m11, m2 ; m13 = m11*q>>16
320 psignw m14, m9 ; m14 = reinsert sign 322 psignw m14, m9 ; m14 = reinsert sign
321 psignw m13, m10 ; m13 = reinsert sign 323 psignw m13, m10 ; m13 = reinsert sign
322 mova [r3q+ncoeffq*2+ 0], m14 324 mova [r3q+ncoeffq*2+ 0], m14
323 mova [r3q+ncoeffq*2+16], m13 325 mova [r3q+ncoeffq*2+16], m13
(...skipping 17 matching lines...) Expand all
341 mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i] 343 mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i]
342 psubw m6, m7 ; m6 = scan[i] + 1 344 psubw m6, m7 ; m6 = scan[i] + 1
343 psubw m11, m7 ; m11 = scan[i] + 1 345 psubw m11, m7 ; m11 = scan[i] + 1
344 pandn m14, m6 ; m14 = max(eob) 346 pandn m14, m6 ; m14 = max(eob)
345 pandn m13, m11 ; m13 = max(eob) 347 pandn m13, m11 ; m13 = max(eob)
346 pmaxsw m8, m14 348 pmaxsw m8, m14
347 pmaxsw m8, m13 349 pmaxsw m8, m13
348 add ncoeffq, mmsize 350 add ncoeffq, mmsize
349 jl .ac_only_loop 351 jl .ac_only_loop
350 352
351 %ifidn %1, fp_32x32
352 jmp .accumulate_eob 353 jmp .accumulate_eob
353 .skip_iter: 354 .skip_iter:
354 mova [r3q+ncoeffq*2+ 0], m5 355 mova [r3q+ncoeffq*2+ 0], m5
355 mova [r3q+ncoeffq*2+16], m5 356 mova [r3q+ncoeffq*2+16], m5
356 mova [r4q+ncoeffq*2+ 0], m5 357 mova [r4q+ncoeffq*2+ 0], m5
357 mova [r4q+ncoeffq*2+16], m5 358 mova [r4q+ncoeffq*2+16], m5
358 add ncoeffq, mmsize 359 add ncoeffq, mmsize
359 jl .ac_only_loop 360 jl .ac_only_loop
360 %endif
361 361
362 .accumulate_eob: 362 .accumulate_eob:
363 ; horizontally accumulate/max eobs and write into [eob] memory pointer 363 ; horizontally accumulate/max eobs and write into [eob] memory pointer
364 mov r2, eobmp 364 mov r2, eobmp
365 pshufd m7, m8, 0xe 365 pshufd m7, m8, 0xe
366 pmaxsw m8, m7 366 pmaxsw m8, m7
367 pshuflw m7, m8, 0xe 367 pshuflw m7, m8, 0xe
368 pmaxsw m8, m7 368 pmaxsw m8, m7
369 pshuflw m7, m8, 0x1 369 pshuflw m7, m8, 0x1
370 pmaxsw m8, m7 370 pmaxsw m8, m7
(...skipping 19 matching lines...) Expand all
390 mova [r2q+ncoeffq*2+16], m7 390 mova [r2q+ncoeffq*2+16], m7
391 add ncoeffq, mmsize 391 add ncoeffq, mmsize
392 jl .blank_loop 392 jl .blank_loop
393 mov word [r3q], 0 393 mov word [r3q], 0
394 RET 394 RET
395 %endmacro 395 %endmacro
396 396
397 INIT_XMM ssse3 397 INIT_XMM ssse3
398 QUANTIZE_FP fp, 7 398 QUANTIZE_FP fp, 7
399 QUANTIZE_FP fp_32x32, 7 399 QUANTIZE_FP fp_32x32, 7
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c ('k') | source/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698