source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c - Issue 812033011: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c

Issue 812033011: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #if defined(_MSC_VER) && _MSC_VER <= 1500	11 #if defined(_MSC_VER) && _MSC_VER <= 1500

12 // Need to include math.h before calling tmmintrin.h/intrin.h	12 // Need to include math.h before calling tmmintrin.h/intrin.h

13 // in certain versions of MSVS.	13 // in certain versions of MSVS.

14 #include <math.h>	14 #include <math.h>

15 #endif	15 #endif

16 #include <tmmintrin.h> // SSSE3	16 #include <tmmintrin.h> // SSSE3

17 #include "vp9/common/x86/vp9_idct_intrin_sse2.h"	17 #include "vp9/common/x86/vp9_idct_intrin_sse2.h"

18	18

19 void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,	19 void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,

20 int16_t* coeff_ptr, intptr_t n_coeffs,	20 int16_t* coeff_ptr, intptr_t n_coeffs,

21 int skip_block, const int16_t* zbin_ptr,	21 int skip_block, const int16_t* zbin_ptr,

22 const int16_t* round_ptr, const int16_t* quant_ptr,	22 const int16_t* round_ptr, const int16_t* quant_ptr,

23 const int16_t* quant_shift_ptr,	23 const int16_t* quant_shift_ptr,

24 int16_t* qcoeff_ptr,	24 int16_t* qcoeff_ptr,

25 int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,	25 int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,

26 int zbin_oq_value, uint16_t* eob_ptr,	26 uint16_t* eob_ptr,

27 const int16_t* scan_ptr,	27 const int16_t* scan_ptr,

28 const int16_t* iscan_ptr) {	28 const int16_t* iscan_ptr) {

29 __m128i zero;	29 __m128i zero;

30 int pass;	30 int pass;

31 // Constants	31 // Constants

32 // When we use them, in one case, they are all the same. In all others	32 // When we use them, in one case, they are all the same. In all others

33 // it's a pair of them that we need to repeat four times. This is done	33 // it's a pair of them that we need to repeat four times. This is done

34 // by constructing the 32 bit constant corresponding to that pair.	34 // by constructing the 32 bit constant corresponding to that pair.

35 const __m128i k__dual_p16_p16 = dual_set_epi16(23170, 23170);	35 const __m128i k__dual_p16_p16 = dual_set_epi16(23170, 23170);

36 const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);	36 const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);

(...skipping 13 matching lines...) Expand all Loading...
50 __m128i in4 = _mm_load_si128((const __m128i )(input + 4 stride));	50 __m128i in4 = _mm_load_si128((const __m128i )(input + 4 stride));

51 __m128i in5 = _mm_load_si128((const __m128i )(input + 5 stride));	51 __m128i in5 = _mm_load_si128((const __m128i )(input + 5 stride));

52 __m128i in6 = _mm_load_si128((const __m128i )(input + 6 stride));	52 __m128i in6 = _mm_load_si128((const __m128i )(input + 6 stride));

53 __m128i in7 = _mm_load_si128((const __m128i )(input + 7 stride));	53 __m128i in7 = _mm_load_si128((const __m128i )(input + 7 stride));

54 __m128i *in[8];	54 __m128i *in[8];

55 int index = 0;	55 int index = 0;

56	56

57 (void)scan_ptr;	57 (void)scan_ptr;

58 (void)zbin_ptr;	58 (void)zbin_ptr;

59 (void)quant_shift_ptr;	59 (void)quant_shift_ptr;

60 (void)zbin_oq_value;

61 (void)coeff_ptr;	60 (void)coeff_ptr;

62	61

63 // Pre-condition input (shift by two)	62 // Pre-condition input (shift by two)

64 in0 = _mm_slli_epi16(in0, 2);	63 in0 = _mm_slli_epi16(in0, 2);

65 in1 = _mm_slli_epi16(in1, 2);	64 in1 = _mm_slli_epi16(in1, 2);

66 in2 = _mm_slli_epi16(in2, 2);	65 in2 = _mm_slli_epi16(in2, 2);

67 in3 = _mm_slli_epi16(in3, 2);	66 in3 = _mm_slli_epi16(in3, 2);

68 in4 = _mm_slli_epi16(in4, 2);	67 in4 = _mm_slli_epi16(in4, 2);

69 in5 = _mm_slli_epi16(in5, 2);	68 in5 = _mm_slli_epi16(in5, 2);

70 in6 = _mm_slli_epi16(in6, 2);	69 in6 = _mm_slli_epi16(in6, 2);

(...skipping 418 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
489 do {	488 do {

490 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);	489 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);

491 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);	490 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);

492 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);	491 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);

493 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);	492 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);

494 n_coeffs += 8 * 2;	493 n_coeffs += 8 * 2;

495 } while (n_coeffs < 0);	494 } while (n_coeffs < 0);

496 *eob_ptr = 0;	495 *eob_ptr = 0;

497 }	496 }

498 }	497 }

OLD	NEW

« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c ('k') | source/libvpx/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c » ('j') | no next file with comments »