Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(31)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: only update to last nights LKGR Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 275 matching lines...) Expand 10 before | Expand all | Expand 10 after
286 } 286 }
287 287
288 iscan_ptr += n_coeffs; 288 iscan_ptr += n_coeffs;
289 qcoeff_ptr += n_coeffs; 289 qcoeff_ptr += n_coeffs;
290 dqcoeff_ptr += n_coeffs; 290 dqcoeff_ptr += n_coeffs;
291 n_coeffs = -n_coeffs; 291 n_coeffs = -n_coeffs;
292 zero = _mm_setzero_si128(); 292 zero = _mm_setzero_si128();
293 293
294 if (!skip_block) { 294 if (!skip_block) {
295 __m128i eob; 295 __m128i eob;
296 __m128i round, quant, dequant; 296 __m128i round, quant, dequant, thr;
297 int16_t nzflag;
297 { 298 {
298 __m128i coeff0, coeff1; 299 __m128i coeff0, coeff1;
299 300
300 // Setup global values 301 // Setup global values
301 { 302 {
302 round = _mm_load_si128((const __m128i*)round_ptr); 303 round = _mm_load_si128((const __m128i*)round_ptr);
303 quant = _mm_load_si128((const __m128i*)quant_ptr); 304 quant = _mm_load_si128((const __m128i*)quant_ptr);
304 dequant = _mm_load_si128((const __m128i*)dequant_ptr); 305 dequant = _mm_load_si128((const __m128i*)dequant_ptr);
305 } 306 }
306 307
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
361 iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1); 362 iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
362 eob = _mm_and_si128(iscan0, nzero_coeff0); 363 eob = _mm_and_si128(iscan0, nzero_coeff0);
363 eob1 = _mm_and_si128(iscan1, nzero_coeff1); 364 eob1 = _mm_and_si128(iscan1, nzero_coeff1);
364 eob = _mm_max_epi16(eob, eob1); 365 eob = _mm_max_epi16(eob, eob1);
365 } 366 }
366 n_coeffs += 8 * 2; 367 n_coeffs += 8 * 2;
367 } 368 }
368 369
369 // AC only loop 370 // AC only loop
370 index = 2; 371 index = 2;
372 thr = _mm_srai_epi16(dequant, 1);
371 while (n_coeffs < 0) { 373 while (n_coeffs < 0) {
372 __m128i coeff0, coeff1; 374 __m128i coeff0, coeff1;
373 { 375 {
374 __m128i coeff0_sign, coeff1_sign; 376 __m128i coeff0_sign, coeff1_sign;
375 __m128i qcoeff0, qcoeff1; 377 __m128i qcoeff0, qcoeff1;
376 __m128i qtmp0, qtmp1; 378 __m128i qtmp0, qtmp1;
377 379
378 assert(index < (int)(sizeof(in) / sizeof(in[0])) - 1); 380 assert(index < (int)(sizeof(in) / sizeof(in[0])) - 1);
379 coeff0 = *in[index]; 381 coeff0 = *in[index];
380 coeff1 = *in[index + 1]; 382 coeff1 = *in[index + 1];
381 383
382 // Poor man's sign extract 384 // Poor man's sign extract
383 coeff0_sign = _mm_srai_epi16(coeff0, 15); 385 coeff0_sign = _mm_srai_epi16(coeff0, 15);
384 coeff1_sign = _mm_srai_epi16(coeff1, 15); 386 coeff1_sign = _mm_srai_epi16(coeff1, 15);
385 qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign); 387 qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign);
386 qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign); 388 qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign);
387 qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); 389 qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
388 qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); 390 qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
389 391
390 qcoeff0 = _mm_adds_epi16(qcoeff0, round); 392 nzflag = _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff0, thr)) |
391 qcoeff1 = _mm_adds_epi16(qcoeff1, round); 393 _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr));
392 qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
393 qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
394 394
395 // Reinsert signs 395 if (nzflag) {
396 qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign); 396 qcoeff0 = _mm_adds_epi16(qcoeff0, round);
397 qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign); 397 qcoeff1 = _mm_adds_epi16(qcoeff1, round);
398 qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); 398 qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
399 qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); 399 qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
400 400
401 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0); 401 // Reinsert signs
402 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1); 402 qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign);
403 qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign);
404 qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
405 qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
403 406
404 coeff0 = _mm_mullo_epi16(qcoeff0, dequant); 407 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
405 coeff1 = _mm_mullo_epi16(qcoeff1, dequant); 408 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
406 409
407 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0); 410 coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
408 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1); 411 coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
412
413 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
414 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
415 } else {
416 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
417 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
418
419 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
420 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
421 }
409 } 422 }
410 423
411 { 424 if (nzflag) {
412 // Scan for eob 425 // Scan for eob
413 __m128i zero_coeff0, zero_coeff1; 426 __m128i zero_coeff0, zero_coeff1;
414 __m128i nzero_coeff0, nzero_coeff1; 427 __m128i nzero_coeff0, nzero_coeff1;
415 __m128i iscan0, iscan1; 428 __m128i iscan0, iscan1;
416 __m128i eob0, eob1; 429 __m128i eob0, eob1;
417 zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero); 430 zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero);
418 zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero); 431 zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
419 nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero); 432 nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
420 nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero); 433 nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
421 iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs)); 434 iscan0 = _mm_load_si128((const __m128i*)(iscan_ptr + n_coeffs));
(...skipping 25 matching lines...) Expand all
447 do { 460 do {
448 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero); 461 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
449 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero); 462 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
450 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero); 463 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
451 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero); 464 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
452 n_coeffs += 8 * 2; 465 n_coeffs += 8 * 2;
453 } while (n_coeffs < 0); 466 } while (n_coeffs < 0);
454 *eob_ptr = 0; 467 *eob_ptr = 0;
455 } 468 }
456 } 469 }
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_dct_impl_sse2.c ('k') | source/libvpx/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698