source/libvpx/vpx_dsp/mips/idct4x4_msa.c - Issue 1302353004: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vpx_dsp/mips/idct4x4_msa.c

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "vpx_dsp/mips/inv_txfm_msa.h"	11 #include "vpx_dsp/mips/inv_txfm_msa.h"

12	12

13 void vp9_iwht4x4_16_add_msa(const int16_t input, uint8_t dst,	13 void vpx_iwht4x4_16_add_msa(const int16_t input, uint8_t dst,

14 int32_t dst_stride) {	14 int32_t dst_stride) {

15 v8i16 in0, in1, in2, in3;	15 v8i16 in0, in1, in2, in3;

16 v4i32 in0_r, in1_r, in2_r, in3_r, in4_r;	16 v4i32 in0_r, in1_r, in2_r, in3_r, in4_r;

17	17

18 /* load vector elements of 4x4 block */	18 /* load vector elements of 4x4 block */

19 LD4x4_SH(input, in0, in2, in3, in1);	19 LD4x4_SH(input, in0, in2, in3, in1);

20 TRANSPOSE4x4_SH_SH(in0, in2, in3, in1, in0, in2, in3, in1);	20 TRANSPOSE4x4_SH_SH(in0, in2, in3, in1, in0, in2, in3, in1);

21 UNPCK_R_SH_SW(in0, in0_r);	21 UNPCK_R_SH_SW(in0, in0_r);

22 UNPCK_R_SH_SW(in2, in2_r);	22 UNPCK_R_SH_SW(in2, in2_r);

23 UNPCK_R_SH_SW(in3, in3_r);	23 UNPCK_R_SH_SW(in3, in3_r);

(...skipping 16 matching lines...) Expand all Loading...
40 in3_r = in4_r - in3_r;	40 in3_r = in4_r - in3_r;

41 in1_r = in4_r - in1_r;	41 in1_r = in4_r - in1_r;

42 in0_r -= in3_r;	42 in0_r -= in3_r;

43 in2_r += in1_r;	43 in2_r += in1_r;

44	44

45 PCKEV_H4_SH(in0_r, in0_r, in1_r, in1_r, in2_r, in2_r, in3_r, in3_r,	45 PCKEV_H4_SH(in0_r, in0_r, in1_r, in1_r, in2_r, in2_r, in3_r, in3_r,

46 in0, in1, in2, in3);	46 in0, in1, in2, in3);

47 ADDBLK_ST4x4_UB(in0, in3, in1, in2, dst, dst_stride);	47 ADDBLK_ST4x4_UB(in0, in3, in1, in2, dst, dst_stride);

48 }	48 }

49	49

50 void vp9_iwht4x4_1_add_msa(const int16_t input, uint8_t dst,	50 void vpx_iwht4x4_1_add_msa(const int16_t input, uint8_t dst,

51 int32_t dst_stride) {	51 int32_t dst_stride) {

52 int16_t a1, e1;	52 int16_t a1, e1;

53 v8i16 in1, in0 = { 0 };	53 v8i16 in1, in0 = { 0 };

54	54

55 a1 = input[0] >> UNIT_QUANT_SHIFT;	55 a1 = input[0] >> UNIT_QUANT_SHIFT;

56 e1 = a1 >> 1;	56 e1 = a1 >> 1;

57 a1 -= e1;	57 a1 -= e1;

58	58

59 in0 = __msa_insert_h(in0, 0, a1);	59 in0 = __msa_insert_h(in0, 0, a1);

60 in0 = __msa_insert_h(in0, 1, e1);	60 in0 = __msa_insert_h(in0, 1, e1);

61 in0 = __msa_insert_h(in0, 2, e1);	61 in0 = __msa_insert_h(in0, 2, e1);

62 in0 = __msa_insert_h(in0, 3, e1);	62 in0 = __msa_insert_h(in0, 3, e1);

63	63

64 in1 = in0 >> 1;	64 in1 = in0 >> 1;

65 in0 -= in1;	65 in0 -= in1;

66	66

67 ADDBLK_ST4x4_UB(in0, in1, in1, in1, dst, dst_stride);	67 ADDBLK_ST4x4_UB(in0, in1, in1, in1, dst, dst_stride);

68 }	68 }

69	69

70 void vp9_idct4x4_16_add_msa(const int16_t input, uint8_t dst,	70 void vpx_idct4x4_16_add_msa(const int16_t input, uint8_t dst,

71 int32_t dst_stride) {	71 int32_t dst_stride) {

72 v8i16 in0, in1, in2, in3;	72 v8i16 in0, in1, in2, in3;

73	73

74 /* load vector elements of 4x4 block */	74 /* load vector elements of 4x4 block */

75 LD4x4_SH(input, in0, in1, in2, in3);	75 LD4x4_SH(input, in0, in1, in2, in3);

76 /* rows */	76 /* rows */

77 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);	77 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);

78 VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);	78 VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);

79 /* columns */	79 /* columns */

80 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);	80 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);

81 VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);	81 VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);

82 /* rounding (add 2^3, divide by 2^4) */	82 /* rounding (add 2^3, divide by 2^4) */

83 SRARI_H4_SH(in0, in1, in2, in3, 4);	83 SRARI_H4_SH(in0, in1, in2, in3, 4);

84 ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride);	84 ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride);

85 }	85 }

86	86

87 void vp9_idct4x4_1_add_msa(const int16_t input, uint8_t dst,	87 void vpx_idct4x4_1_add_msa(const int16_t input, uint8_t dst,

88 int32_t dst_stride) {	88 int32_t dst_stride) {

89 int16_t out;	89 int16_t out;

90 v8i16 vec;	90 v8i16 vec;

91	91

92 out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS);	92 out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS);

93 out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS);	93 out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS);

94 out = ROUND_POWER_OF_TWO(out, 4);	94 out = ROUND_POWER_OF_TWO(out, 4);

95 vec = __msa_fill_h(out);	95 vec = __msa_fill_h(out);

96	96

97 ADDBLK_ST4x4_UB(vec, vec, vec, vec, dst, dst_stride);	97 ADDBLK_ST4x4_UB(vec, vec, vec, vec, dst, dst_stride);

98 }	98 }

OLD	NEW

« no previous file with comments | « source/libvpx/vpx_dsp/mips/idct32x32_msa.c ('k') | source/libvpx/vpx_dsp/mips/idct8x8_msa.c » ('j') | no next file with comments »