Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(635)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c

Issue 1162573005: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include <assert.h> 11 #include <assert.h>
12 #include <emmintrin.h> // SSE2 12 #include <emmintrin.h> // SSE2
13
14 #include "./vp9_rtcd.h"
13 #include "vp9/common/vp9_idct.h" // for cospi constants 15 #include "vp9/common/vp9_idct.h" // for cospi constants
14 #include "vp9/encoder/vp9_dct.h" 16 #include "vp9/encoder/vp9_dct.h"
15 #include "vp9/encoder/x86/vp9_dct_sse2.h" 17 #include "vp9/encoder/x86/vp9_dct_sse2.h"
16 #include "vpx_ports/mem.h" 18 #include "vpx_ports/mem.h"
17 19
18 void vp9_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride) { 20 void vp9_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride) {
19 __m128i in0, in1; 21 __m128i in0, in1;
20 __m128i tmp; 22 __m128i tmp;
21 const __m128i zero = _mm_setzero_si128(); 23 const __m128i zero = _mm_setzero_si128();
22 in0 = _mm_loadl_epi64((const __m128i *)(input + 0 * stride)); 24 in0 = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
89 res[0] = _mm_unpacklo_epi32(tr0_0, tr0_1); 91 res[0] = _mm_unpacklo_epi32(tr0_0, tr0_1);
90 res[2] = _mm_unpackhi_epi32(tr0_0, tr0_1); 92 res[2] = _mm_unpackhi_epi32(tr0_0, tr0_1);
91 93
92 // 00 10 20 30 01 11 21 31 94 // 00 10 20 30 01 11 21 31
93 // 02 12 22 32 03 13 23 33 95 // 02 12 22 32 03 13 23 33
94 // only use the first 4 16-bit integers 96 // only use the first 4 16-bit integers
95 res[1] = _mm_unpackhi_epi64(res[0], res[0]); 97 res[1] = _mm_unpackhi_epi64(res[0], res[0]);
96 res[3] = _mm_unpackhi_epi64(res[2], res[2]); 98 res[3] = _mm_unpackhi_epi64(res[2], res[2]);
97 } 99 }
98 100
99 void fdct4_sse2(__m128i *in) { 101 static void fdct4_sse2(__m128i *in) {
100 const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); 102 const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
101 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); 103 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
102 const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); 104 const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
103 const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); 105 const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64);
104 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); 106 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
105 107
106 __m128i u[4], v[4]; 108 __m128i u[4], v[4];
107 u[0]=_mm_unpacklo_epi16(in[0], in[1]); 109 u[0]=_mm_unpacklo_epi16(in[0], in[1]);
108 u[1]=_mm_unpacklo_epi16(in[3], in[2]); 110 u[1]=_mm_unpacklo_epi16(in[3], in[2]);
109 111
(...skipping 12 matching lines...) Expand all
122 u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); 124 u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
123 u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); 125 u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
124 u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); 126 u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
125 u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); 127 u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
126 128
127 in[0] = _mm_packs_epi32(u[0], u[1]); 129 in[0] = _mm_packs_epi32(u[0], u[1]);
128 in[1] = _mm_packs_epi32(u[2], u[3]); 130 in[1] = _mm_packs_epi32(u[2], u[3]);
129 transpose_4x4(in); 131 transpose_4x4(in);
130 } 132 }
131 133
132 void fadst4_sse2(__m128i *in) { 134 static void fadst4_sse2(__m128i *in) {
133 const __m128i k__sinpi_p01_p02 = pair_set_epi16(sinpi_1_9, sinpi_2_9); 135 const __m128i k__sinpi_p01_p02 = pair_set_epi16(sinpi_1_9, sinpi_2_9);
134 const __m128i k__sinpi_p04_m01 = pair_set_epi16(sinpi_4_9, -sinpi_1_9); 136 const __m128i k__sinpi_p04_m01 = pair_set_epi16(sinpi_4_9, -sinpi_1_9);
135 const __m128i k__sinpi_p03_p04 = pair_set_epi16(sinpi_3_9, sinpi_4_9); 137 const __m128i k__sinpi_p03_p04 = pair_set_epi16(sinpi_3_9, sinpi_4_9);
136 const __m128i k__sinpi_m03_p02 = pair_set_epi16(-sinpi_3_9, sinpi_2_9); 138 const __m128i k__sinpi_m03_p02 = pair_set_epi16(-sinpi_3_9, sinpi_2_9);
137 const __m128i k__sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi_3_9); 139 const __m128i k__sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi_3_9);
138 const __m128i kZero = _mm_set1_epi16(0); 140 const __m128i kZero = _mm_set1_epi16(0);
139 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); 141 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
140 __m128i u[8], v[8]; 142 __m128i u[8], v[8];
141 __m128i in7 = _mm_add_epi16(in[0], in[1]); 143 __m128i in7 = _mm_add_epi16(in[0], in[1]);
142 144
(...skipping 681 matching lines...) Expand 10 before | Expand all | Expand 10 after
824 // 00 10 20 30 40 50 60 70 826 // 00 10 20 30 40 50 60 70
825 // 01 11 21 31 41 51 61 71 827 // 01 11 21 31 41 51 61 71
826 // 02 12 22 32 42 52 62 72 828 // 02 12 22 32 42 52 62 72
827 // 03 13 23 33 43 53 63 73 829 // 03 13 23 33 43 53 63 73
828 // 04 14 24 34 44 54 64 74 830 // 04 14 24 34 44 54 64 74
829 // 05 15 25 35 45 55 65 75 831 // 05 15 25 35 45 55 65 75
830 // 06 16 26 36 46 56 66 76 832 // 06 16 26 36 46 56 66 76
831 // 07 17 27 37 47 57 67 77 833 // 07 17 27 37 47 57 67 77
832 } 834 }
833 835
834 void fdct8_sse2(__m128i *in) { 836 static void fdct8_sse2(__m128i *in) {
835 // constants 837 // constants
836 const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); 838 const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
837 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); 839 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
838 const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); 840 const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
839 const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); 841 const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
840 const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64); 842 const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64);
841 const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64); 843 const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
842 const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64); 844 const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64);
843 const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64); 845 const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64);
844 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); 846 const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING);
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after
964 966
965 in[1] = _mm_packs_epi32(v0, v1); 967 in[1] = _mm_packs_epi32(v0, v1);
966 in[3] = _mm_packs_epi32(v4, v5); 968 in[3] = _mm_packs_epi32(v4, v5);
967 in[5] = _mm_packs_epi32(v2, v3); 969 in[5] = _mm_packs_epi32(v2, v3);
968 in[7] = _mm_packs_epi32(v6, v7); 970 in[7] = _mm_packs_epi32(v6, v7);
969 971
970 // transpose 972 // transpose
971 array_transpose_8x8(in, in); 973 array_transpose_8x8(in, in);
972 } 974 }
973 975
974 void fadst8_sse2(__m128i *in) { 976 static void fadst8_sse2(__m128i *in) {
975 // Constants 977 // Constants
976 const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); 978 const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
977 const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64); 979 const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
978 const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64); 980 const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64);
979 const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64); 981 const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64);
980 const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64); 982 const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64);
981 const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64); 983 const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64);
982 const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64); 984 const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64);
983 const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64); 985 const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64);
984 const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); 986 const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
(...skipping 361 matching lines...) Expand 10 before | Expand all | Expand 10 after
1346 } 1348 }
1347 1349
1348 static INLINE void right_shift_16x16(__m128i *res0, __m128i *res1) { 1350 static INLINE void right_shift_16x16(__m128i *res0, __m128i *res1) {
1349 // perform rounding operations 1351 // perform rounding operations
1350 right_shift_8x8(res0, 2); 1352 right_shift_8x8(res0, 2);
1351 right_shift_8x8(res0 + 8, 2); 1353 right_shift_8x8(res0 + 8, 2);
1352 right_shift_8x8(res1, 2); 1354 right_shift_8x8(res1, 2);
1353 right_shift_8x8(res1 + 8, 2); 1355 right_shift_8x8(res1 + 8, 2);
1354 } 1356 }
1355 1357
1356 void fdct16_8col(__m128i *in) { 1358 static void fdct16_8col(__m128i *in) {
1357 // perform 16x16 1-D DCT for 8 columns 1359 // perform 16x16 1-D DCT for 8 columns
1358 __m128i i[8], s[8], p[8], t[8], u[16], v[16]; 1360 __m128i i[8], s[8], p[8], t[8], u[16], v[16];
1359 const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); 1361 const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
1360 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); 1362 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
1361 const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); 1363 const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64);
1362 const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); 1364 const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64);
1363 const __m128i k__cospi_p08_m24 = pair_set_epi16(cospi_8_64, -cospi_24_64); 1365 const __m128i k__cospi_p08_m24 = pair_set_epi16(cospi_8_64, -cospi_24_64);
1364 const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); 1366 const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64);
1365 const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64); 1367 const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64);
1366 const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64); 1368 const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64);
(...skipping 301 matching lines...) Expand 10 before | Expand all | Expand 10 after
1668 in[1] = _mm_packs_epi32(v[0], v[1]); 1670 in[1] = _mm_packs_epi32(v[0], v[1]);
1669 in[9] = _mm_packs_epi32(v[2], v[3]); 1671 in[9] = _mm_packs_epi32(v[2], v[3]);
1670 in[5] = _mm_packs_epi32(v[4], v[5]); 1672 in[5] = _mm_packs_epi32(v[4], v[5]);
1671 in[13] = _mm_packs_epi32(v[6], v[7]); 1673 in[13] = _mm_packs_epi32(v[6], v[7]);
1672 in[3] = _mm_packs_epi32(v[8], v[9]); 1674 in[3] = _mm_packs_epi32(v[8], v[9]);
1673 in[11] = _mm_packs_epi32(v[10], v[11]); 1675 in[11] = _mm_packs_epi32(v[10], v[11]);
1674 in[7] = _mm_packs_epi32(v[12], v[13]); 1676 in[7] = _mm_packs_epi32(v[12], v[13]);
1675 in[15] = _mm_packs_epi32(v[14], v[15]); 1677 in[15] = _mm_packs_epi32(v[14], v[15]);
1676 } 1678 }
1677 1679
1678 void fadst16_8col(__m128i *in) { 1680 static void fadst16_8col(__m128i *in) {
1679 // perform 16x16 1-D ADST for 8 columns 1681 // perform 16x16 1-D ADST for 8 columns
1680 __m128i s[16], x[16], u[32], v[32]; 1682 __m128i s[16], x[16], u[32], v[32];
1681 const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64); 1683 const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64);
1682 const __m128i k__cospi_p31_m01 = pair_set_epi16(cospi_31_64, -cospi_1_64); 1684 const __m128i k__cospi_p31_m01 = pair_set_epi16(cospi_31_64, -cospi_1_64);
1683 const __m128i k__cospi_p05_p27 = pair_set_epi16(cospi_5_64, cospi_27_64); 1685 const __m128i k__cospi_p05_p27 = pair_set_epi16(cospi_5_64, cospi_27_64);
1684 const __m128i k__cospi_p27_m05 = pair_set_epi16(cospi_27_64, -cospi_5_64); 1686 const __m128i k__cospi_p27_m05 = pair_set_epi16(cospi_27_64, -cospi_5_64);
1685 const __m128i k__cospi_p09_p23 = pair_set_epi16(cospi_9_64, cospi_23_64); 1687 const __m128i k__cospi_p09_p23 = pair_set_epi16(cospi_9_64, cospi_23_64);
1686 const __m128i k__cospi_p23_m09 = pair_set_epi16(cospi_23_64, -cospi_9_64); 1688 const __m128i k__cospi_p23_m09 = pair_set_epi16(cospi_23_64, -cospi_9_64);
1687 const __m128i k__cospi_p13_p19 = pair_set_epi16(cospi_13_64, cospi_19_64); 1689 const __m128i k__cospi_p13_p19 = pair_set_epi16(cospi_13_64, cospi_19_64);
1688 const __m128i k__cospi_p19_m13 = pair_set_epi16(cospi_19_64, -cospi_13_64); 1690 const __m128i k__cospi_p19_m13 = pair_set_epi16(cospi_19_64, -cospi_13_64);
(...skipping 449 matching lines...) Expand 10 before | Expand all | Expand 10 after
2138 in[8] = _mm_packs_epi32(v[2], v[3]); 2140 in[8] = _mm_packs_epi32(v[2], v[3]);
2139 in[9] = _mm_packs_epi32(v[10], v[11]); 2141 in[9] = _mm_packs_epi32(v[10], v[11]);
2140 in[10] = _mm_packs_epi32(v[14], v[15]); 2142 in[10] = _mm_packs_epi32(v[14], v[15]);
2141 in[11] = _mm_packs_epi32(v[6], v[7]); 2143 in[11] = _mm_packs_epi32(v[6], v[7]);
2142 in[12] = s[5]; 2144 in[12] = s[5];
2143 in[13] = _mm_sub_epi16(kZero, s[13]); 2145 in[13] = _mm_sub_epi16(kZero, s[13]);
2144 in[14] = s[9]; 2146 in[14] = s[9];
2145 in[15] = _mm_sub_epi16(kZero, s[1]); 2147 in[15] = _mm_sub_epi16(kZero, s[1]);
2146 } 2148 }
2147 2149
2148 void fdct16_sse2(__m128i *in0, __m128i *in1) { 2150 static void fdct16_sse2(__m128i *in0, __m128i *in1) {
2149 fdct16_8col(in0); 2151 fdct16_8col(in0);
2150 fdct16_8col(in1); 2152 fdct16_8col(in1);
2151 array_transpose_16x16(in0, in1); 2153 array_transpose_16x16(in0, in1);
2152 } 2154 }
2153 2155
2154 void fadst16_sse2(__m128i *in0, __m128i *in1) { 2156 static void fadst16_sse2(__m128i *in0, __m128i *in1) {
2155 fadst16_8col(in0); 2157 fadst16_8col(in0);
2156 fadst16_8col(in1); 2158 fadst16_8col(in1);
2157 array_transpose_16x16(in0, in1); 2159 array_transpose_16x16(in0, in1);
2158 } 2160 }
2159 2161
2160 void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output, 2162 void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output,
2161 int stride, int tx_type) { 2163 int stride, int tx_type) {
2162 __m128i in0[16], in1[16]; 2164 __m128i in0[16], in1[16];
2163 2165
2164 switch (tx_type) { 2166 switch (tx_type) {
(...skipping 162 matching lines...) Expand 10 before | Expand all | Expand 10 after
2327 for (i = 0; i < 8; ++i) { 2329 for (i = 0; i < 8; ++i) {
2328 for (j = 0; j < 8; ++j) 2330 for (j = 0; j < 8; ++j)
2329 temp_in[j] = out[j + i * 8]; 2331 temp_in[j] = out[j + i * 8];
2330 ht.rows(temp_in, temp_out); 2332 ht.rows(temp_in, temp_out);
2331 for (j = 0; j < 8; ++j) 2333 for (j = 0; j < 8; ++j)
2332 output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1; 2334 output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
2333 } 2335 }
2334 } 2336 }
2335 } 2337 }
2336 2338
2337 void vp9_highbd_fht16x16_sse2(int16_t *input, tran_low_t *output, 2339 void vp9_highbd_fht16x16_sse2(const int16_t *input, tran_low_t *output,
2338 int stride, int tx_type) { 2340 int stride, int tx_type) {
2339 if (tx_type == DCT_DCT) { 2341 if (tx_type == DCT_DCT) {
2340 vp9_highbd_fdct16x16_sse2(input, output, stride); 2342 vp9_highbd_fdct16x16_sse2(input, output, stride);
2341 } else { 2343 } else {
2342 tran_low_t out[256]; 2344 tran_low_t out[256];
2343 tran_low_t *outptr = &out[0]; 2345 tran_low_t *outptr = &out[0];
2344 int i, j; 2346 int i, j;
2345 tran_low_t temp_in[16], temp_out[16]; 2347 tran_low_t temp_in[16], temp_out[16];
2346 const transform_2d ht = FHT_16[tx_type]; 2348 const transform_2d ht = FHT_16[tx_type];
2347 2349
(...skipping 13 matching lines...) Expand all
2361 ht.rows(temp_in, temp_out); 2363 ht.rows(temp_in, temp_out);
2362 for (j = 0; j < 16; ++j) 2364 for (j = 0; j < 16; ++j)
2363 output[j + i * 16] = temp_out[j]; 2365 output[j + i * 16] = temp_out[j];
2364 } 2366 }
2365 } 2367 }
2366 } 2368 }
2367 #endif // CONFIG_VP9_HIGHBITDEPTH 2369 #endif // CONFIG_VP9_HIGHBITDEPTH
2368 2370
2369 /* 2371 /*
2370 * The DCTnxn functions are defined using the macros below. The main code for 2372 * The DCTnxn functions are defined using the macros below. The main code for
2371 * them is in separate files (vp9/encoder/x86/vp9_dct_impl_sse2.c & 2373 * them is in separate files (vp9/encoder/x86/vp9_dct_sse2_impl.h &
2372 * vp9/encoder/x86/vp9_dct32x32_sse2.c) which are used by both the 8 bit code 2374 * vp9/encoder/x86/vp9_dct32x32_sse2_impl.h) which are used by both the 8 bit co de
2373 * and the high bit depth code. 2375 * and the high bit depth code.
2374 */ 2376 */
2375 2377
2376 #define DCT_HIGH_BIT_DEPTH 0 2378 #define DCT_HIGH_BIT_DEPTH 0
2377 2379
2378 #define FDCT4x4_2D vp9_fdct4x4_sse2 2380 #define FDCT4x4_2D vp9_fdct4x4_sse2
2379 #define FDCT8x8_2D vp9_fdct8x8_sse2 2381 #define FDCT8x8_2D vp9_fdct8x8_sse2
2380 #define FDCT16x16_2D vp9_fdct16x16_sse2 2382 #define FDCT16x16_2D vp9_fdct16x16_sse2
2381 #include "vp9/encoder/x86/vp9_dct_impl_sse2.c" 2383 #include "vp9/encoder/x86/vp9_dct_sse2_impl.h"
2382 #undef FDCT4x4_2D 2384 #undef FDCT4x4_2D
2383 #undef FDCT8x8_2D 2385 #undef FDCT8x8_2D
2384 #undef FDCT16x16_2D 2386 #undef FDCT16x16_2D
2385 2387
2386 #define FDCT32x32_2D vp9_fdct32x32_rd_sse2 2388 #define FDCT32x32_2D vp9_fdct32x32_rd_sse2
2387 #define FDCT32x32_HIGH_PRECISION 0 2389 #define FDCT32x32_HIGH_PRECISION 0
2388 #include "vp9/encoder/x86/vp9_dct32x32_sse2.c" 2390 #include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h"
2389 #undef FDCT32x32_2D 2391 #undef FDCT32x32_2D
2390 #undef FDCT32x32_HIGH_PRECISION 2392 #undef FDCT32x32_HIGH_PRECISION
2391 2393
2392 #define FDCT32x32_2D vp9_fdct32x32_sse2 2394 #define FDCT32x32_2D vp9_fdct32x32_sse2
2393 #define FDCT32x32_HIGH_PRECISION 1 2395 #define FDCT32x32_HIGH_PRECISION 1
2394 #include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT 2396 #include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h" // NOLINT
2395 #undef FDCT32x32_2D 2397 #undef FDCT32x32_2D
2396 #undef FDCT32x32_HIGH_PRECISION 2398 #undef FDCT32x32_HIGH_PRECISION
2397 2399
2398 #undef DCT_HIGH_BIT_DEPTH 2400 #undef DCT_HIGH_BIT_DEPTH
2399 2401
2400 2402
2401 #if CONFIG_VP9_HIGHBITDEPTH 2403 #if CONFIG_VP9_HIGHBITDEPTH
2402 2404
2403 #define DCT_HIGH_BIT_DEPTH 1 2405 #define DCT_HIGH_BIT_DEPTH 1
2404 2406
2405 #define FDCT4x4_2D vp9_highbd_fdct4x4_sse2 2407 #define FDCT4x4_2D vp9_highbd_fdct4x4_sse2
2406 #define FDCT8x8_2D vp9_highbd_fdct8x8_sse2 2408 #define FDCT8x8_2D vp9_highbd_fdct8x8_sse2
2407 #define FDCT16x16_2D vp9_highbd_fdct16x16_sse2 2409 #define FDCT16x16_2D vp9_highbd_fdct16x16_sse2
2408 #include "vp9/encoder/x86/vp9_dct_impl_sse2.c" // NOLINT 2410 #include "vp9/encoder/x86/vp9_dct_sse2_impl.h" // NOLINT
2409 #undef FDCT4x4_2D 2411 #undef FDCT4x4_2D
2410 #undef FDCT8x8_2D 2412 #undef FDCT8x8_2D
2411 #undef FDCT16x16_2D 2413 #undef FDCT16x16_2D
2412 2414
2413 #define FDCT32x32_2D vp9_highbd_fdct32x32_rd_sse2 2415 #define FDCT32x32_2D vp9_highbd_fdct32x32_rd_sse2
2414 #define FDCT32x32_HIGH_PRECISION 0 2416 #define FDCT32x32_HIGH_PRECISION 0
2415 #include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT 2417 #include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h" // NOLINT
2416 #undef FDCT32x32_2D 2418 #undef FDCT32x32_2D
2417 #undef FDCT32x32_HIGH_PRECISION 2419 #undef FDCT32x32_HIGH_PRECISION
2418 2420
2419 #define FDCT32x32_2D vp9_highbd_fdct32x32_sse2 2421 #define FDCT32x32_2D vp9_highbd_fdct32x32_sse2
2420 #define FDCT32x32_HIGH_PRECISION 1 2422 #define FDCT32x32_HIGH_PRECISION 1
2421 #include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT 2423 #include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h" // NOLINT
2422 #undef FDCT32x32_2D 2424 #undef FDCT32x32_2D
2423 #undef FDCT32x32_HIGH_PRECISION 2425 #undef FDCT32x32_HIGH_PRECISION
2424 2426
2425 #undef DCT_HIGH_BIT_DEPTH 2427 #undef DCT_HIGH_BIT_DEPTH
2426 2428
2427 #endif // CONFIG_VP9_HIGHBITDEPTH 2429 #endif // CONFIG_VP9_HIGHBITDEPTH
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_dct_impl_sse2.c ('k') | source/libvpx/vp9/encoder/x86/vp9_dct_sse2_impl.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698