Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(37)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c

Issue 168343002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: libvpx: Pull from upstream Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 224 matching lines...) Expand 10 before | Expand all | Expand 10 after
235 u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); 235 u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS);
236 u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); 236 u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS);
237 u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); 237 u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS);
238 u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); 238 u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS);
239 239
240 in[0] = _mm_packs_epi32(u[0], u[2]); 240 in[0] = _mm_packs_epi32(u[0], u[2]);
241 in[1] = _mm_packs_epi32(u[1], u[3]); 241 in[1] = _mm_packs_epi32(u[1], u[3]);
242 transpose_4x4(in); 242 transpose_4x4(in);
243 } 243 }
244 244
245 void vp9_short_fht4x4_sse2(const int16_t *input, int16_t *output, 245 void vp9_fht4x4_sse2(const int16_t *input, int16_t *output,
246 int stride, int tx_type) { 246 int stride, int tx_type) {
247 __m128i in[4]; 247 __m128i in[4];
248 load_buffer_4x4(input, in, stride); 248
249 switch (tx_type) { 249 switch (tx_type) {
250 case 0: // DCT_DCT 250 case DCT_DCT:
251 fdct4_sse2(in); 251 vp9_fdct4x4_sse2(input, output, stride);
252 fdct4_sse2(in);
253 break; 252 break;
254 case 1: // ADST_DCT 253 case ADST_DCT:
254 load_buffer_4x4(input, in, stride);
255 fadst4_sse2(in); 255 fadst4_sse2(in);
256 fdct4_sse2(in); 256 fdct4_sse2(in);
257 write_buffer_4x4(output, in);
257 break; 258 break;
258 case 2: // DCT_ADST 259 case DCT_ADST:
260 load_buffer_4x4(input, in, stride);
259 fdct4_sse2(in); 261 fdct4_sse2(in);
260 fadst4_sse2(in); 262 fadst4_sse2(in);
263 write_buffer_4x4(output, in);
261 break; 264 break;
262 case 3: // ADST_ADST 265 case ADST_ADST:
266 load_buffer_4x4(input, in, stride);
263 fadst4_sse2(in); 267 fadst4_sse2(in);
264 fadst4_sse2(in); 268 fadst4_sse2(in);
269 write_buffer_4x4(output, in);
265 break; 270 break;
266 default: 271 default:
267 assert(0); 272 assert(0);
268 break; 273 break;
269 } 274 }
270 write_buffer_4x4(output, in);
271 } 275 }
272 276
273 void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) { 277 void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) {
274 int pass; 278 int pass;
275 // Constants 279 // Constants
276 // When we use them, in one case, they are all the same. In all others 280 // When we use them, in one case, they are all the same. In all others
277 // it's a pair of them that we need to repeat four times. This is done 281 // it's a pair of them that we need to repeat four times. This is done
278 // by constructing the 32 bit constant corresponding to that pair. 282 // by constructing the 32 bit constant corresponding to that pair.
279 const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); 283 const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64);
280 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); 284 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
(...skipping 738 matching lines...) Expand 10 before | Expand all | Expand 10 after
1019 in[3] = _mm_sub_epi16(k__const_0, s2); 1023 in[3] = _mm_sub_epi16(k__const_0, s2);
1020 in[4] = s3; 1024 in[4] = s3;
1021 in[5] = _mm_sub_epi16(k__const_0, s7); 1025 in[5] = _mm_sub_epi16(k__const_0, s7);
1022 in[6] = s5; 1026 in[6] = s5;
1023 in[7] = _mm_sub_epi16(k__const_0, s1); 1027 in[7] = _mm_sub_epi16(k__const_0, s1);
1024 1028
1025 // transpose 1029 // transpose
1026 array_transpose_8x8(in, in); 1030 array_transpose_8x8(in, in);
1027 } 1031 }
1028 1032
1029 void vp9_short_fht8x8_sse2(const int16_t *input, int16_t *output, 1033 void vp9_fht8x8_sse2(const int16_t *input, int16_t *output,
1030 int stride, int tx_type) { 1034 int stride, int tx_type) {
1031 __m128i in[8]; 1035 __m128i in[8];
1032 load_buffer_8x8(input, in, stride); 1036
1033 switch (tx_type) { 1037 switch (tx_type) {
1034 case 0: // DCT_DCT 1038 case DCT_DCT:
1035 fdct8_sse2(in); 1039 vp9_fdct8x8_sse2(input, output, stride);
1036 fdct8_sse2(in);
1037 break; 1040 break;
1038 case 1: // ADST_DCT 1041 case ADST_DCT:
1042 load_buffer_8x8(input, in, stride);
1039 fadst8_sse2(in); 1043 fadst8_sse2(in);
1040 fdct8_sse2(in); 1044 fdct8_sse2(in);
1045 right_shift_8x8(in, 1);
1046 write_buffer_8x8(output, in, 8);
1041 break; 1047 break;
1042 case 2: // DCT_ADST 1048 case DCT_ADST:
1049 load_buffer_8x8(input, in, stride);
1043 fdct8_sse2(in); 1050 fdct8_sse2(in);
1044 fadst8_sse2(in); 1051 fadst8_sse2(in);
1052 right_shift_8x8(in, 1);
1053 write_buffer_8x8(output, in, 8);
1045 break; 1054 break;
1046 case 3: // ADST_ADST 1055 case ADST_ADST:
1056 load_buffer_8x8(input, in, stride);
1047 fadst8_sse2(in); 1057 fadst8_sse2(in);
1048 fadst8_sse2(in); 1058 fadst8_sse2(in);
1059 right_shift_8x8(in, 1);
1060 write_buffer_8x8(output, in, 8);
1049 break; 1061 break;
1050 default: 1062 default:
1051 assert(0); 1063 assert(0);
1052 break; 1064 break;
1053 } 1065 }
1054 right_shift_8x8(in, 1);
1055 write_buffer_8x8(output, in, 8);
1056 } 1066 }
1057 1067
1058 void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) { 1068 void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
1059 // The 2D transform is done with two passes which are actually pretty 1069 // The 2D transform is done with two passes which are actually pretty
1060 // similar. In the first one, we transform the columns and transpose 1070 // similar. In the first one, we transform the columns and transpose
1061 // the results. In the second one, we transform the rows. To achieve that, 1071 // the results. In the second one, we transform the rows. To achieve that,
1062 // as the first pass results are transposed, we tranpose the columns (that 1072 // as the first pass results are transposed, we tranpose the columns (that
1063 // is the transposed rows) and transpose the results (so that it goes back 1073 // is the transposed rows) and transpose the results (so that it goes back
1064 // in normal/row positions). 1074 // in normal/row positions).
1065 int pass; 1075 int pass;
(...skipping 1459 matching lines...) Expand 10 before | Expand all | Expand 10 after
2525 fdct16_8col(in1); 2535 fdct16_8col(in1);
2526 array_transpose_16x16(in0, in1); 2536 array_transpose_16x16(in0, in1);
2527 } 2537 }
2528 2538
2529 void fadst16_sse2(__m128i *in0, __m128i *in1) { 2539 void fadst16_sse2(__m128i *in0, __m128i *in1) {
2530 fadst16_8col(in0); 2540 fadst16_8col(in0);
2531 fadst16_8col(in1); 2541 fadst16_8col(in1);
2532 array_transpose_16x16(in0, in1); 2542 array_transpose_16x16(in0, in1);
2533 } 2543 }
2534 2544
2535 void vp9_short_fht16x16_sse2(const int16_t *input, int16_t *output, 2545 void vp9_fht16x16_sse2(const int16_t *input, int16_t *output,
2536 int stride, int tx_type) { 2546 int stride, int tx_type) {
2537 __m128i in0[16], in1[16]; 2547 __m128i in0[16], in1[16];
2538 load_buffer_16x16(input, in0, in1, stride); 2548
2539 switch (tx_type) { 2549 switch (tx_type) {
2540 case 0: // DCT_DCT 2550 case DCT_DCT:
2541 fdct16_sse2(in0, in1); 2551 vp9_fdct16x16_sse2(input, output, stride);
2542 right_shift_16x16(in0, in1);
2543 fdct16_sse2(in0, in1);
2544 break; 2552 break;
2545 case 1: // ADST_DCT 2553 case ADST_DCT:
2554 load_buffer_16x16(input, in0, in1, stride);
2546 fadst16_sse2(in0, in1); 2555 fadst16_sse2(in0, in1);
2547 right_shift_16x16(in0, in1); 2556 right_shift_16x16(in0, in1);
2548 fdct16_sse2(in0, in1); 2557 fdct16_sse2(in0, in1);
2558 write_buffer_16x16(output, in0, in1, 16);
2549 break; 2559 break;
2550 case 2: // DCT_ADST 2560 case DCT_ADST:
2561 load_buffer_16x16(input, in0, in1, stride);
2551 fdct16_sse2(in0, in1); 2562 fdct16_sse2(in0, in1);
2552 right_shift_16x16(in0, in1); 2563 right_shift_16x16(in0, in1);
2553 fadst16_sse2(in0, in1); 2564 fadst16_sse2(in0, in1);
2565 write_buffer_16x16(output, in0, in1, 16);
2554 break; 2566 break;
2555 case 3: // ADST_ADST 2567 case ADST_ADST:
2568 load_buffer_16x16(input, in0, in1, stride);
2556 fadst16_sse2(in0, in1); 2569 fadst16_sse2(in0, in1);
2557 right_shift_16x16(in0, in1); 2570 right_shift_16x16(in0, in1);
2558 fadst16_sse2(in0, in1); 2571 fadst16_sse2(in0, in1);
2572 write_buffer_16x16(output, in0, in1, 16);
2559 break; 2573 break;
2560 default: 2574 default:
2561 assert(0); 2575 assert(0);
2562 break; 2576 break;
2563 } 2577 }
2564 write_buffer_16x16(output, in0, in1, 16);
2565 } 2578 }
2566 2579
2567 #define FDCT32x32_2D vp9_fdct32x32_rd_sse2 2580 #define FDCT32x32_2D vp9_fdct32x32_rd_sse2
2568 #define FDCT32x32_HIGH_PRECISION 0 2581 #define FDCT32x32_HIGH_PRECISION 0
2569 #include "vp9/encoder/x86/vp9_dct32x32_sse2.c" 2582 #include "vp9/encoder/x86/vp9_dct32x32_sse2.c"
2570 #undef FDCT32x32_2D 2583 #undef FDCT32x32_2D
2571 #undef FDCT32x32_HIGH_PRECISION 2584 #undef FDCT32x32_HIGH_PRECISION
2572 2585
2573 #define FDCT32x32_2D vp9_fdct32x32_sse2 2586 #define FDCT32x32_2D vp9_fdct32x32_sse2
2574 #define FDCT32x32_HIGH_PRECISION 1 2587 #define FDCT32x32_HIGH_PRECISION 1
2575 #include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT 2588 #include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT
2576 #undef FDCT32x32_2D 2589 #undef FDCT32x32_2D
2577 #undef FDCT32x32_HIGH_PRECISION 2590 #undef FDCT32x32_HIGH_PRECISION
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_dct_avx2.c ('k') | source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698