source/libvpx/vpx_dsp/mips/idct8x8_msa.c - Issue 1302353004: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vpx_dsp/mips/idct8x8_msa.c

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « source/libvpx/vpx_dsp/mips/idct4x4_msa.c ('k') | source/libvpx/vpx_dsp/mips/inv_txfm_dspr2.h » ('j') | no next file with comments »

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "vpx_dsp/mips/inv_txfm_msa.h"	11 #include "vpx_dsp/mips/inv_txfm_msa.h"

12	12

13 void vp9_idct8x8_64_add_msa(const int16_t input, uint8_t dst,	13 void vpx_idct8x8_64_add_msa(const int16_t input, uint8_t dst,

14 int32_t dst_stride) {	14 int32_t dst_stride) {

15 v8i16 in0, in1, in2, in3, in4, in5, in6, in7;	15 v8i16 in0, in1, in2, in3, in4, in5, in6, in7;

16	16

17 /* load vector elements of 8x8 block */	17 /* load vector elements of 8x8 block */

18 LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);	18 LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);

19	19

20 /* rows transform */	20 /* rows transform */

21 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,	21 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,

22 in0, in1, in2, in3, in4, in5, in6, in7);	22 in0, in1, in2, in3, in4, in5, in6, in7);

23 /* 1D idct8x8 */	23 /* 1D idct8x8 */

24 VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,	24 VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,

25 in0, in1, in2, in3, in4, in5, in6, in7);	25 in0, in1, in2, in3, in4, in5, in6, in7);

26 /* columns transform */	26 /* columns transform */

27 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,	27 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,

28 in0, in1, in2, in3, in4, in5, in6, in7);	28 in0, in1, in2, in3, in4, in5, in6, in7);

29 /* 1D idct8x8 */	29 /* 1D idct8x8 */

30 VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,	30 VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,

31 in0, in1, in2, in3, in4, in5, in6, in7);	31 in0, in1, in2, in3, in4, in5, in6, in7);

32 /* final rounding (add 2^4, divide by 2^5) and shift */	32 /* final rounding (add 2^4, divide by 2^5) and shift */

33 SRARI_H4_SH(in0, in1, in2, in3, 5);	33 SRARI_H4_SH(in0, in1, in2, in3, 5);

34 SRARI_H4_SH(in4, in5, in6, in7, 5);	34 SRARI_H4_SH(in4, in5, in6, in7, 5);

35 /* add block and store 8x8 */	35 /* add block and store 8x8 */

36 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);	36 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);

37 dst += (4 * dst_stride);	37 dst += (4 * dst_stride);

38 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);	38 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);

39 }	39 }

40	40

41 void vp9_idct8x8_12_add_msa(const int16_t input, uint8_t dst,	41 void vpx_idct8x8_12_add_msa(const int16_t input, uint8_t dst,

42 int32_t dst_stride) {	42 int32_t dst_stride) {

43 v8i16 in0, in1, in2, in3, in4, in5, in6, in7;	43 v8i16 in0, in1, in2, in3, in4, in5, in6, in7;

44 v8i16 s0, s1, s2, s3, s4, s5, s6, s7, k0, k1, k2, k3, m0, m1, m2, m3;	44 v8i16 s0, s1, s2, s3, s4, s5, s6, s7, k0, k1, k2, k3, m0, m1, m2, m3;

45 v4i32 tmp0, tmp1, tmp2, tmp3;	45 v4i32 tmp0, tmp1, tmp2, tmp3;

46 v8i16 zero = { 0 };	46 v8i16 zero = { 0 };

47	47

48 /* load vector elements of 8x8 block */	48 /* load vector elements of 8x8 block */

49 LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);	49 LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);

50 TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);	50 TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);

51	51

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
92 /* final rounding (add 2^4, divide by 2^5) and shift */	92 /* final rounding (add 2^4, divide by 2^5) and shift */

93 SRARI_H4_SH(in0, in1, in2, in3, 5);	93 SRARI_H4_SH(in0, in1, in2, in3, 5);

94 SRARI_H4_SH(in4, in5, in6, in7, 5);	94 SRARI_H4_SH(in4, in5, in6, in7, 5);

95	95

96 /* add block and store 8x8 */	96 /* add block and store 8x8 */

97 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);	97 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);

98 dst += (4 * dst_stride);	98 dst += (4 * dst_stride);

99 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);	99 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);

100 }	100 }

101	101

102 void vp9_idct8x8_1_add_msa(const int16_t input, uint8_t dst,	102 void vpx_idct8x8_1_add_msa(const int16_t input, uint8_t dst,

103 int32_t dst_stride) {	103 int32_t dst_stride) {

104 int16_t out;	104 int16_t out;

105 int32_t val;	105 int32_t val;

106 v8i16 vec;	106 v8i16 vec;

107	107

108 out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS);	108 out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS);

109 out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS);	109 out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS);

110 val = ROUND_POWER_OF_TWO(out, 5);	110 val = ROUND_POWER_OF_TWO(out, 5);

111 vec = __msa_fill_h(val);	111 vec = __msa_fill_h(val);

112	112

113 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec);	113 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec);

114 dst += (4 * dst_stride);	114 dst += (4 * dst_stride);

115 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec);	115 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec);

116 }	116 }

OLD	NEW