Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(236)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c

Issue 958693004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 694 matching lines...) Expand 10 before | Expand all | Expand 10 after
705 in[1] = _mm_slli_epi16(in[1], 2); 705 in[1] = _mm_slli_epi16(in[1], 2);
706 in[2] = _mm_slli_epi16(in[2], 2); 706 in[2] = _mm_slli_epi16(in[2], 2);
707 in[3] = _mm_slli_epi16(in[3], 2); 707 in[3] = _mm_slli_epi16(in[3], 2);
708 in[4] = _mm_slli_epi16(in[4], 2); 708 in[4] = _mm_slli_epi16(in[4], 2);
709 in[5] = _mm_slli_epi16(in[5], 2); 709 in[5] = _mm_slli_epi16(in[5], 2);
710 in[6] = _mm_slli_epi16(in[6], 2); 710 in[6] = _mm_slli_epi16(in[6], 2);
711 in[7] = _mm_slli_epi16(in[7], 2); 711 in[7] = _mm_slli_epi16(in[7], 2);
712 } 712 }
713 713
714 // right shift and rounding 714 // right shift and rounding
715 static INLINE void right_shift_8x8(__m128i *res, int const bit) { 715 static INLINE void right_shift_8x8(__m128i *res, const int bit) {
716 const __m128i kOne = _mm_set1_epi16(1);
717 const int bit_m02 = bit - 2;
718 __m128i sign0 = _mm_srai_epi16(res[0], 15); 716 __m128i sign0 = _mm_srai_epi16(res[0], 15);
719 __m128i sign1 = _mm_srai_epi16(res[1], 15); 717 __m128i sign1 = _mm_srai_epi16(res[1], 15);
720 __m128i sign2 = _mm_srai_epi16(res[2], 15); 718 __m128i sign2 = _mm_srai_epi16(res[2], 15);
721 __m128i sign3 = _mm_srai_epi16(res[3], 15); 719 __m128i sign3 = _mm_srai_epi16(res[3], 15);
722 __m128i sign4 = _mm_srai_epi16(res[4], 15); 720 __m128i sign4 = _mm_srai_epi16(res[4], 15);
723 __m128i sign5 = _mm_srai_epi16(res[5], 15); 721 __m128i sign5 = _mm_srai_epi16(res[5], 15);
724 __m128i sign6 = _mm_srai_epi16(res[6], 15); 722 __m128i sign6 = _mm_srai_epi16(res[6], 15);
725 __m128i sign7 = _mm_srai_epi16(res[7], 15); 723 __m128i sign7 = _mm_srai_epi16(res[7], 15);
726 724
727 if (bit_m02 >= 0) { 725 if (bit == 2) {
728 __m128i k_const_rounding = _mm_slli_epi16(kOne, bit_m02); 726 const __m128i const_rounding = _mm_set1_epi16(1);
729 res[0] = _mm_add_epi16(res[0], k_const_rounding); 727 res[0] = _mm_add_epi16(res[0], const_rounding);
730 res[1] = _mm_add_epi16(res[1], k_const_rounding); 728 res[1] = _mm_add_epi16(res[1], const_rounding);
731 res[2] = _mm_add_epi16(res[2], k_const_rounding); 729 res[2] = _mm_add_epi16(res[2], const_rounding);
732 res[3] = _mm_add_epi16(res[3], k_const_rounding); 730 res[3] = _mm_add_epi16(res[3], const_rounding);
733 res[4] = _mm_add_epi16(res[4], k_const_rounding); 731 res[4] = _mm_add_epi16(res[4], const_rounding);
734 res[5] = _mm_add_epi16(res[5], k_const_rounding); 732 res[5] = _mm_add_epi16(res[5], const_rounding);
735 res[6] = _mm_add_epi16(res[6], k_const_rounding); 733 res[6] = _mm_add_epi16(res[6], const_rounding);
736 res[7] = _mm_add_epi16(res[7], k_const_rounding); 734 res[7] = _mm_add_epi16(res[7], const_rounding);
737 } 735 }
738 736
739 res[0] = _mm_sub_epi16(res[0], sign0); 737 res[0] = _mm_sub_epi16(res[0], sign0);
740 res[1] = _mm_sub_epi16(res[1], sign1); 738 res[1] = _mm_sub_epi16(res[1], sign1);
741 res[2] = _mm_sub_epi16(res[2], sign2); 739 res[2] = _mm_sub_epi16(res[2], sign2);
742 res[3] = _mm_sub_epi16(res[3], sign3); 740 res[3] = _mm_sub_epi16(res[3], sign3);
743 res[4] = _mm_sub_epi16(res[4], sign4); 741 res[4] = _mm_sub_epi16(res[4], sign4);
744 res[5] = _mm_sub_epi16(res[5], sign5); 742 res[5] = _mm_sub_epi16(res[5], sign5);
745 res[6] = _mm_sub_epi16(res[6], sign6); 743 res[6] = _mm_sub_epi16(res[6], sign6);
746 res[7] = _mm_sub_epi16(res[7], sign7); 744 res[7] = _mm_sub_epi16(res[7], sign7);
747 745
748 res[0] = _mm_srai_epi16(res[0], bit); 746 if (bit == 1) {
749 res[1] = _mm_srai_epi16(res[1], bit); 747 res[0] = _mm_srai_epi16(res[0], 1);
750 res[2] = _mm_srai_epi16(res[2], bit); 748 res[1] = _mm_srai_epi16(res[1], 1);
751 res[3] = _mm_srai_epi16(res[3], bit); 749 res[2] = _mm_srai_epi16(res[2], 1);
752 res[4] = _mm_srai_epi16(res[4], bit); 750 res[3] = _mm_srai_epi16(res[3], 1);
753 res[5] = _mm_srai_epi16(res[5], bit); 751 res[4] = _mm_srai_epi16(res[4], 1);
754 res[6] = _mm_srai_epi16(res[6], bit); 752 res[5] = _mm_srai_epi16(res[5], 1);
755 res[7] = _mm_srai_epi16(res[7], bit); 753 res[6] = _mm_srai_epi16(res[6], 1);
754 res[7] = _mm_srai_epi16(res[7], 1);
755 } else {
756 res[0] = _mm_srai_epi16(res[0], 2);
757 res[1] = _mm_srai_epi16(res[1], 2);
758 res[2] = _mm_srai_epi16(res[2], 2);
759 res[3] = _mm_srai_epi16(res[3], 2);
760 res[4] = _mm_srai_epi16(res[4], 2);
761 res[5] = _mm_srai_epi16(res[5], 2);
762 res[6] = _mm_srai_epi16(res[6], 2);
763 res[7] = _mm_srai_epi16(res[7], 2);
764 }
756 } 765 }
757 766
758 // write 8x8 array 767 // write 8x8 array
759 static INLINE void write_buffer_8x8(tran_low_t *output, __m128i *res, 768 static INLINE void write_buffer_8x8(tran_low_t *output, __m128i *res,
760 int stride) { 769 int stride) {
761 store_output(&res[0], (output + 0 * stride)); 770 store_output(&res[0], (output + 0 * stride));
762 store_output(&res[1], (output + 1 * stride)); 771 store_output(&res[1], (output + 1 * stride));
763 store_output(&res[2], (output + 2 * stride)); 772 store_output(&res[2], (output + 2 * stride));
764 store_output(&res[3], (output + 3 * stride)); 773 store_output(&res[3], (output + 3 * stride));
765 store_output(&res[4], (output + 4 * stride)); 774 store_output(&res[4], (output + 4 * stride));
(...skipping 1641 matching lines...) Expand 10 before | Expand all | Expand 10 after
2407 2416
2408 #define FDCT32x32_2D vp9_highbd_fdct32x32_sse2 2417 #define FDCT32x32_2D vp9_highbd_fdct32x32_sse2
2409 #define FDCT32x32_HIGH_PRECISION 1 2418 #define FDCT32x32_HIGH_PRECISION 1
2410 #include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT 2419 #include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT
2411 #undef FDCT32x32_2D 2420 #undef FDCT32x32_2D
2412 #undef FDCT32x32_HIGH_PRECISION 2421 #undef FDCT32x32_HIGH_PRECISION
2413 2422
2414 #undef DCT_HIGH_BIT_DEPTH 2423 #undef DCT_HIGH_BIT_DEPTH
2415 2424
2416 #endif // CONFIG_VP9_HIGHBITDEPTH 2425 #endif // CONFIG_VP9_HIGHBITDEPTH
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c ('k') | source/libvpx/vp9/encoder/x86/vp9_highbd_sad4d_sse2.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698