| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 236 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 247 in1 = _mm_add_epi32(sum, in0); | 247 in1 = _mm_add_epi32(sum, in0); |
| 248 store_output(&in1, output); | 248 store_output(&in1, output); |
| 249 } | 249 } |
| 250 | 250 |
| 251 void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride, | 251 void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride, |
| 252 int16_t* coeff_ptr, intptr_t n_coeffs, | 252 int16_t* coeff_ptr, intptr_t n_coeffs, |
| 253 int skip_block, const int16_t* zbin_ptr, | 253 int skip_block, const int16_t* zbin_ptr, |
| 254 const int16_t* round_ptr, const int16_t* quant_ptr, | 254 const int16_t* round_ptr, const int16_t* quant_ptr, |
| 255 const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr, | 255 const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr, |
| 256 int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, | 256 int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, |
| 257 int zbin_oq_value, uint16_t* eob_ptr, | 257 uint16_t* eob_ptr, |
| 258 const int16_t* scan_ptr, | 258 const int16_t* scan_ptr, |
| 259 const int16_t* iscan_ptr) { | 259 const int16_t* iscan_ptr) { |
| 260 __m128i zero; | 260 __m128i zero; |
| 261 int pass; | 261 int pass; |
| 262 // Constants | 262 // Constants |
| 263 // When we use them, in one case, they are all the same. In all others | 263 // When we use them, in one case, they are all the same. In all others |
| 264 // it's a pair of them that we need to repeat four times. This is done | 264 // it's a pair of them that we need to repeat four times. This is done |
| 265 // by constructing the 32 bit constant corresponding to that pair. | 265 // by constructing the 32 bit constant corresponding to that pair. |
| 266 const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); | 266 const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); |
| 267 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); | 267 const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); |
| (...skipping 12 matching lines...) Expand all Loading... |
| 280 __m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride)); | 280 __m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride)); |
| 281 __m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride)); | 281 __m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride)); |
| 282 __m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride)); | 282 __m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride)); |
| 283 __m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride)); | 283 __m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride)); |
| 284 __m128i *in[8]; | 284 __m128i *in[8]; |
| 285 int index = 0; | 285 int index = 0; |
| 286 | 286 |
| 287 (void)scan_ptr; | 287 (void)scan_ptr; |
| 288 (void)zbin_ptr; | 288 (void)zbin_ptr; |
| 289 (void)quant_shift_ptr; | 289 (void)quant_shift_ptr; |
| 290 (void)zbin_oq_value; | |
| 291 (void)coeff_ptr; | 290 (void)coeff_ptr; |
| 292 | 291 |
| 293 // Pre-condition input (shift by two) | 292 // Pre-condition input (shift by two) |
| 294 in0 = _mm_slli_epi16(in0, 2); | 293 in0 = _mm_slli_epi16(in0, 2); |
| 295 in1 = _mm_slli_epi16(in1, 2); | 294 in1 = _mm_slli_epi16(in1, 2); |
| 296 in2 = _mm_slli_epi16(in2, 2); | 295 in2 = _mm_slli_epi16(in2, 2); |
| 297 in3 = _mm_slli_epi16(in3, 2); | 296 in3 = _mm_slli_epi16(in3, 2); |
| 298 in4 = _mm_slli_epi16(in4, 2); | 297 in4 = _mm_slli_epi16(in4, 2); |
| 299 in5 = _mm_slli_epi16(in5, 2); | 298 in5 = _mm_slli_epi16(in5, 2); |
| 300 in6 = _mm_slli_epi16(in6, 2); | 299 in6 = _mm_slli_epi16(in6, 2); |
| (...skipping 2107 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2408 | 2407 |
| 2409 #define FDCT32x32_2D vp9_highbd_fdct32x32_sse2 | 2408 #define FDCT32x32_2D vp9_highbd_fdct32x32_sse2 |
| 2410 #define FDCT32x32_HIGH_PRECISION 1 | 2409 #define FDCT32x32_HIGH_PRECISION 1 |
| 2411 #include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT | 2410 #include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT |
| 2412 #undef FDCT32x32_2D | 2411 #undef FDCT32x32_2D |
| 2413 #undef FDCT32x32_HIGH_PRECISION | 2412 #undef FDCT32x32_HIGH_PRECISION |
| 2414 | 2413 |
| 2415 #undef DCT_HIGH_BIT_DEPTH | 2414 #undef DCT_HIGH_BIT_DEPTH |
| 2416 | 2415 |
| 2417 #endif // CONFIG_VP9_HIGHBITDEPTH | 2416 #endif // CONFIG_VP9_HIGHBITDEPTH |
| OLD | NEW |