Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(151)

Side by Side Diff: source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c

Issue 341293003: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include <assert.h> 11 #include "vp9/common/x86/vp9_idct_intrin_sse2.h"
12 #include <emmintrin.h> // SSE2
13 #include "./vpx_config.h"
14 #include "vpx/vpx_integer.h"
15 #include "vp9/common/vp9_common.h"
16 #include "vp9/common/vp9_idct.h"
17 12
18 #define RECON_AND_STORE4X4(dest, in_x) \ 13 #define RECON_AND_STORE4X4(dest, in_x) \
19 { \ 14 { \
20 __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); \ 15 __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); \
21 d0 = _mm_unpacklo_epi8(d0, zero); \ 16 d0 = _mm_unpacklo_epi8(d0, zero); \
22 d0 = _mm_add_epi16(in_x, d0); \ 17 d0 = _mm_add_epi16(in_x, d0); \
23 d0 = _mm_packus_epi16(d0, d0); \ 18 d0 = _mm_packus_epi16(d0, d0); \
24 *(int *)dest = _mm_cvtsi128_si32(d0); \ 19 *(int *)dest = _mm_cvtsi128_si32(d0); \
25 dest += stride; \ 20 dest += stride; \
26 } 21 }
(...skipping 346 matching lines...) Expand 10 before | Expand all | Expand 10 after
373 const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); \ 368 const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); \
374 const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); \ 369 const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); \
375 const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); \ 370 const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); \
376 \ 371 \
377 out0 = _mm_unpacklo_epi64(tr1_0, tr1_4); \ 372 out0 = _mm_unpacklo_epi64(tr1_0, tr1_4); \
378 out1 = _mm_unpackhi_epi64(tr1_0, tr1_4); \ 373 out1 = _mm_unpackhi_epi64(tr1_0, tr1_4); \
379 out2 = _mm_unpacklo_epi64(tr1_2, tr1_6); \ 374 out2 = _mm_unpacklo_epi64(tr1_2, tr1_6); \
380 out3 = _mm_unpackhi_epi64(tr1_2, tr1_6); \ 375 out3 = _mm_unpackhi_epi64(tr1_2, tr1_6); \
381 } 376 }
382 377
383 #define TRANSPOSE_8X4(in0, in1, in2, in3, out0, out1) \
384 { \
385 const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \
386 const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \
387 \
388 in0 = _mm_unpacklo_epi32(tr0_0, tr0_1); /* i1 i0 */ \
389 in1 = _mm_unpackhi_epi32(tr0_0, tr0_1); /* i3 i2 */ \
390 }
391
392 #define TRANSPOSE_8X8_10(in0, in1, in2, in3, out0, out1) \ 378 #define TRANSPOSE_8X8_10(in0, in1, in2, in3, out0, out1) \
393 { \ 379 { \
394 const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \ 380 const __m128i tr0_0 = _mm_unpacklo_epi16(in0, in1); \
395 const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \ 381 const __m128i tr0_1 = _mm_unpacklo_epi16(in2, in3); \
396 out0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \ 382 out0 = _mm_unpacklo_epi32(tr0_0, tr0_1); \
397 out1 = _mm_unpackhi_epi32(tr0_0, tr0_1); \ 383 out1 = _mm_unpackhi_epi32(tr0_0, tr0_1); \
398 } 384 }
399 385
400 // Define Macro for multiplying elements by constants and adding them together. 386 // Define Macro for multiplying elements by constants and adding them together.
401 #define MULTIPLICATION_AND_ADD(lo_0, hi_0, lo_1, hi_1, \ 387 #define MULTIPLICATION_AND_ADD(lo_0, hi_0, lo_1, hi_1, \
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after
520 out0 = _mm_adds_epi16(stp1_0, stp2_7); \ 506 out0 = _mm_adds_epi16(stp1_0, stp2_7); \
521 out1 = _mm_adds_epi16(stp1_1, stp1_6); \ 507 out1 = _mm_adds_epi16(stp1_1, stp1_6); \
522 out2 = _mm_adds_epi16(stp1_2, stp1_5); \ 508 out2 = _mm_adds_epi16(stp1_2, stp1_5); \
523 out3 = _mm_adds_epi16(stp1_3, stp2_4); \ 509 out3 = _mm_adds_epi16(stp1_3, stp2_4); \
524 out4 = _mm_subs_epi16(stp1_3, stp2_4); \ 510 out4 = _mm_subs_epi16(stp1_3, stp2_4); \
525 out5 = _mm_subs_epi16(stp1_2, stp1_5); \ 511 out5 = _mm_subs_epi16(stp1_2, stp1_5); \
526 out6 = _mm_subs_epi16(stp1_1, stp1_6); \ 512 out6 = _mm_subs_epi16(stp1_1, stp1_6); \
527 out7 = _mm_subs_epi16(stp1_0, stp2_7); \ 513 out7 = _mm_subs_epi16(stp1_0, stp2_7); \
528 } 514 }
529 515
530 #define RECON_AND_STORE(dest, in_x) \
531 { \
532 __m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); \
533 d0 = _mm_unpacklo_epi8(d0, zero); \
534 d0 = _mm_add_epi16(in_x, d0); \
535 d0 = _mm_packus_epi16(d0, d0); \
536 _mm_storel_epi64((__m128i *)(dest), d0); \
537 dest += stride; \
538 }
539
540 void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) { 516 void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
541 const __m128i zero = _mm_setzero_si128(); 517 const __m128i zero = _mm_setzero_si128();
542 const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); 518 const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
543 const __m128i final_rounding = _mm_set1_epi16(1<<4); 519 const __m128i final_rounding = _mm_set1_epi16(1<<4);
544 const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); 520 const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
545 const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64); 521 const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
546 const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64); 522 const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64);
547 const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64); 523 const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64);
548 const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64); 524 const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
549 const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); 525 const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
620 RECON_AND_STORE(dest, dc_value); 596 RECON_AND_STORE(dest, dc_value);
621 RECON_AND_STORE(dest, dc_value); 597 RECON_AND_STORE(dest, dc_value);
622 RECON_AND_STORE(dest, dc_value); 598 RECON_AND_STORE(dest, dc_value);
623 RECON_AND_STORE(dest, dc_value); 599 RECON_AND_STORE(dest, dc_value);
624 RECON_AND_STORE(dest, dc_value); 600 RECON_AND_STORE(dest, dc_value);
625 RECON_AND_STORE(dest, dc_value); 601 RECON_AND_STORE(dest, dc_value);
626 RECON_AND_STORE(dest, dc_value); 602 RECON_AND_STORE(dest, dc_value);
627 RECON_AND_STORE(dest, dc_value); 603 RECON_AND_STORE(dest, dc_value);
628 } 604 }
629 605
630 // perform 8x8 transpose
631 static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) {
632 const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
633 const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]);
634 const __m128i tr0_2 = _mm_unpackhi_epi16(in[0], in[1]);
635 const __m128i tr0_3 = _mm_unpackhi_epi16(in[2], in[3]);
636 const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]);
637 const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]);
638 const __m128i tr0_6 = _mm_unpackhi_epi16(in[4], in[5]);
639 const __m128i tr0_7 = _mm_unpackhi_epi16(in[6], in[7]);
640
641 const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
642 const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_4, tr0_5);
643 const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
644 const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_4, tr0_5);
645 const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_2, tr0_3);
646 const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7);
647 const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_2, tr0_3);
648 const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7);
649
650 res[0] = _mm_unpacklo_epi64(tr1_0, tr1_1);
651 res[1] = _mm_unpackhi_epi64(tr1_0, tr1_1);
652 res[2] = _mm_unpacklo_epi64(tr1_2, tr1_3);
653 res[3] = _mm_unpackhi_epi64(tr1_2, tr1_3);
654 res[4] = _mm_unpacklo_epi64(tr1_4, tr1_5);
655 res[5] = _mm_unpackhi_epi64(tr1_4, tr1_5);
656 res[6] = _mm_unpacklo_epi64(tr1_6, tr1_7);
657 res[7] = _mm_unpackhi_epi64(tr1_6, tr1_7);
658 }
659
660 static INLINE void array_transpose_4X8(__m128i *in, __m128i * out) {
661 const __m128i tr0_0 = _mm_unpacklo_epi16(in[0], in[1]);
662 const __m128i tr0_1 = _mm_unpacklo_epi16(in[2], in[3]);
663 const __m128i tr0_4 = _mm_unpacklo_epi16(in[4], in[5]);
664 const __m128i tr0_5 = _mm_unpacklo_epi16(in[6], in[7]);
665
666 const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1);
667 const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1);
668 const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5);
669 const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5);
670
671 out[0] = _mm_unpacklo_epi64(tr1_0, tr1_4);
672 out[1] = _mm_unpackhi_epi64(tr1_0, tr1_4);
673 out[2] = _mm_unpacklo_epi64(tr1_2, tr1_6);
674 out[3] = _mm_unpackhi_epi64(tr1_2, tr1_6);
675 }
676
677 static void idct8_sse2(__m128i *in) { 606 static void idct8_sse2(__m128i *in) {
678 const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); 607 const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
679 const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64); 608 const __m128i stg1_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
680 const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64); 609 const __m128i stg1_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
681 const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64); 610 const __m128i stg1_2 = pair_set_epi16(-cospi_20_64, cospi_12_64);
682 const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64); 611 const __m128i stg1_3 = pair_set_epi16(cospi_12_64, cospi_20_64);
683 const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64); 612 const __m128i stg2_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
684 const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64); 613 const __m128i stg2_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
685 const __m128i stg2_2 = pair_set_epi16(cospi_24_64, -cospi_8_64); 614 const __m128i stg2_2 = pair_set_epi16(cospi_24_64, -cospi_8_64);
686 const __m128i stg2_3 = pair_set_epi16(cospi_8_64, cospi_24_64); 615 const __m128i stg2_3 = pair_set_epi16(cospi_8_64, cospi_24_64);
(...skipping 879 matching lines...) Expand 10 before | Expand all | Expand 10 after
1566 RECON_AND_STORE(dest, dc_value); 1495 RECON_AND_STORE(dest, dc_value);
1567 RECON_AND_STORE(dest, dc_value); 1496 RECON_AND_STORE(dest, dc_value);
1568 RECON_AND_STORE(dest, dc_value); 1497 RECON_AND_STORE(dest, dc_value);
1569 RECON_AND_STORE(dest, dc_value); 1498 RECON_AND_STORE(dest, dc_value);
1570 RECON_AND_STORE(dest, dc_value); 1499 RECON_AND_STORE(dest, dc_value);
1571 RECON_AND_STORE(dest, dc_value); 1500 RECON_AND_STORE(dest, dc_value);
1572 dest += 8 - (stride * 16); 1501 dest += 8 - (stride * 16);
1573 } 1502 }
1574 } 1503 }
1575 1504
1576 static INLINE void array_transpose_16x16(__m128i *res0, __m128i *res1) {
1577 __m128i tbuf[8];
1578 array_transpose_8x8(res0, res0);
1579 array_transpose_8x8(res1, tbuf);
1580 array_transpose_8x8(res0 + 8, res1);
1581 array_transpose_8x8(res1 + 8, res1 + 8);
1582
1583 res0[8] = tbuf[0];
1584 res0[9] = tbuf[1];
1585 res0[10] = tbuf[2];
1586 res0[11] = tbuf[3];
1587 res0[12] = tbuf[4];
1588 res0[13] = tbuf[5];
1589 res0[14] = tbuf[6];
1590 res0[15] = tbuf[7];
1591 }
1592
1593 static void iadst16_8col(__m128i *in) { 1505 static void iadst16_8col(__m128i *in) {
1594 // perform 16x16 1-D ADST for 8 columns 1506 // perform 16x16 1-D ADST for 8 columns
1595 __m128i s[16], x[16], u[32], v[32]; 1507 __m128i s[16], x[16], u[32], v[32];
1596 const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64); 1508 const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64);
1597 const __m128i k__cospi_p31_m01 = pair_set_epi16(cospi_31_64, -cospi_1_64); 1509 const __m128i k__cospi_p31_m01 = pair_set_epi16(cospi_31_64, -cospi_1_64);
1598 const __m128i k__cospi_p05_p27 = pair_set_epi16(cospi_5_64, cospi_27_64); 1510 const __m128i k__cospi_p05_p27 = pair_set_epi16(cospi_5_64, cospi_27_64);
1599 const __m128i k__cospi_p27_m05 = pair_set_epi16(cospi_27_64, -cospi_5_64); 1511 const __m128i k__cospi_p27_m05 = pair_set_epi16(cospi_27_64, -cospi_5_64);
1600 const __m128i k__cospi_p09_p23 = pair_set_epi16(cospi_9_64, cospi_23_64); 1512 const __m128i k__cospi_p09_p23 = pair_set_epi16(cospi_9_64, cospi_23_64);
1601 const __m128i k__cospi_p23_m09 = pair_set_epi16(cospi_23_64, -cospi_9_64); 1513 const __m128i k__cospi_p23_m09 = pair_set_epi16(cospi_23_64, -cospi_9_64);
1602 const __m128i k__cospi_p13_p19 = pair_set_epi16(cospi_13_64, cospi_19_64); 1514 const __m128i k__cospi_p13_p19 = pair_set_epi16(cospi_13_64, cospi_19_64);
(...skipping 806 matching lines...) Expand 10 before | Expand all | Expand 10 after
2409 idct16_8col(in0); 2321 idct16_8col(in0);
2410 idct16_8col(in1); 2322 idct16_8col(in1);
2411 } 2323 }
2412 2324
2413 static void iadst16_sse2(__m128i *in0, __m128i *in1) { 2325 static void iadst16_sse2(__m128i *in0, __m128i *in1) {
2414 array_transpose_16x16(in0, in1); 2326 array_transpose_16x16(in0, in1);
2415 iadst16_8col(in0); 2327 iadst16_8col(in0);
2416 iadst16_8col(in1); 2328 iadst16_8col(in1);
2417 } 2329 }
2418 2330
2419 static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in) {
2420 in[0] = _mm_load_si128((const __m128i *)(input + 0 * 16));
2421 in[1] = _mm_load_si128((const __m128i *)(input + 1 * 16));
2422 in[2] = _mm_load_si128((const __m128i *)(input + 2 * 16));
2423 in[3] = _mm_load_si128((const __m128i *)(input + 3 * 16));
2424 in[4] = _mm_load_si128((const __m128i *)(input + 4 * 16));
2425 in[5] = _mm_load_si128((const __m128i *)(input + 5 * 16));
2426 in[6] = _mm_load_si128((const __m128i *)(input + 6 * 16));
2427 in[7] = _mm_load_si128((const __m128i *)(input + 7 * 16));
2428
2429 in[8] = _mm_load_si128((const __m128i *)(input + 8 * 16));
2430 in[9] = _mm_load_si128((const __m128i *)(input + 9 * 16));
2431 in[10] = _mm_load_si128((const __m128i *)(input + 10 * 16));
2432 in[11] = _mm_load_si128((const __m128i *)(input + 11 * 16));
2433 in[12] = _mm_load_si128((const __m128i *)(input + 12 * 16));
2434 in[13] = _mm_load_si128((const __m128i *)(input + 13 * 16));
2435 in[14] = _mm_load_si128((const __m128i *)(input + 14 * 16));
2436 in[15] = _mm_load_si128((const __m128i *)(input + 15 * 16));
2437 }
2438
2439 static INLINE void write_buffer_8x16(uint8_t *dest, __m128i *in, int stride) {
2440 const __m128i final_rounding = _mm_set1_epi16(1<<5);
2441 const __m128i zero = _mm_setzero_si128();
2442 // Final rounding and shift
2443 in[0] = _mm_adds_epi16(in[0], final_rounding);
2444 in[1] = _mm_adds_epi16(in[1], final_rounding);
2445 in[2] = _mm_adds_epi16(in[2], final_rounding);
2446 in[3] = _mm_adds_epi16(in[3], final_rounding);
2447 in[4] = _mm_adds_epi16(in[4], final_rounding);
2448 in[5] = _mm_adds_epi16(in[5], final_rounding);
2449 in[6] = _mm_adds_epi16(in[6], final_rounding);
2450 in[7] = _mm_adds_epi16(in[7], final_rounding);
2451 in[8] = _mm_adds_epi16(in[8], final_rounding);
2452 in[9] = _mm_adds_epi16(in[9], final_rounding);
2453 in[10] = _mm_adds_epi16(in[10], final_rounding);
2454 in[11] = _mm_adds_epi16(in[11], final_rounding);
2455 in[12] = _mm_adds_epi16(in[12], final_rounding);
2456 in[13] = _mm_adds_epi16(in[13], final_rounding);
2457 in[14] = _mm_adds_epi16(in[14], final_rounding);
2458 in[15] = _mm_adds_epi16(in[15], final_rounding);
2459
2460 in[0] = _mm_srai_epi16(in[0], 6);
2461 in[1] = _mm_srai_epi16(in[1], 6);
2462 in[2] = _mm_srai_epi16(in[2], 6);
2463 in[3] = _mm_srai_epi16(in[3], 6);
2464 in[4] = _mm_srai_epi16(in[4], 6);
2465 in[5] = _mm_srai_epi16(in[5], 6);
2466 in[6] = _mm_srai_epi16(in[6], 6);
2467 in[7] = _mm_srai_epi16(in[7], 6);
2468 in[8] = _mm_srai_epi16(in[8], 6);
2469 in[9] = _mm_srai_epi16(in[9], 6);
2470 in[10] = _mm_srai_epi16(in[10], 6);
2471 in[11] = _mm_srai_epi16(in[11], 6);
2472 in[12] = _mm_srai_epi16(in[12], 6);
2473 in[13] = _mm_srai_epi16(in[13], 6);
2474 in[14] = _mm_srai_epi16(in[14], 6);
2475 in[15] = _mm_srai_epi16(in[15], 6);
2476
2477 RECON_AND_STORE(dest, in[0]);
2478 RECON_AND_STORE(dest, in[1]);
2479 RECON_AND_STORE(dest, in[2]);
2480 RECON_AND_STORE(dest, in[3]);
2481 RECON_AND_STORE(dest, in[4]);
2482 RECON_AND_STORE(dest, in[5]);
2483 RECON_AND_STORE(dest, in[6]);
2484 RECON_AND_STORE(dest, in[7]);
2485 RECON_AND_STORE(dest, in[8]);
2486 RECON_AND_STORE(dest, in[9]);
2487 RECON_AND_STORE(dest, in[10]);
2488 RECON_AND_STORE(dest, in[11]);
2489 RECON_AND_STORE(dest, in[12]);
2490 RECON_AND_STORE(dest, in[13]);
2491 RECON_AND_STORE(dest, in[14]);
2492 RECON_AND_STORE(dest, in[15]);
2493 }
2494
2495 void vp9_iht16x16_256_add_sse2(const int16_t *input, uint8_t *dest, int stride, 2331 void vp9_iht16x16_256_add_sse2(const int16_t *input, uint8_t *dest, int stride,
2496 int tx_type) { 2332 int tx_type) {
2497 __m128i in0[16], in1[16]; 2333 __m128i in0[16], in1[16];
2498 2334
2499 load_buffer_8x16(input, in0); 2335 load_buffer_8x16(input, in0);
2500 input += 8; 2336 input += 8;
2501 load_buffer_8x16(input, in1); 2337 load_buffer_8x16(input, in1);
2502 2338
2503 switch (tx_type) { 2339 switch (tx_type) {
2504 case 0: // DCT_DCT 2340 case 0: // DCT_DCT
(...skipping 1644 matching lines...) Expand 10 before | Expand all | Expand 10 after
4149 RECON_AND_STORE(dest, dc_value); 3985 RECON_AND_STORE(dest, dc_value);
4150 RECON_AND_STORE(dest, dc_value); 3986 RECON_AND_STORE(dest, dc_value);
4151 RECON_AND_STORE(dest, dc_value); 3987 RECON_AND_STORE(dest, dc_value);
4152 RECON_AND_STORE(dest, dc_value); 3988 RECON_AND_STORE(dest, dc_value);
4153 RECON_AND_STORE(dest, dc_value); 3989 RECON_AND_STORE(dest, dc_value);
4154 RECON_AND_STORE(dest, dc_value); 3990 RECON_AND_STORE(dest, dc_value);
4155 RECON_AND_STORE(dest, dc_value); 3991 RECON_AND_STORE(dest, dc_value);
4156 dest += 8 - (stride * 32); 3992 dest += 8 - (stride * 32);
4157 } 3993 }
4158 } 3994 }
OLDNEW
« no previous file with comments | « source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.h ('k') | source/libvpx/vp9/common/x86/vp9_idct_intrin_ssse3.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698