OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include "vpx_dsp/mips/inv_txfm_msa.h" | 11 #include "vpx_dsp/mips/inv_txfm_msa.h" |
12 | 12 |
13 void vp9_iwht4x4_16_add_msa(const int16_t *input, uint8_t *dst, | 13 void vpx_iwht4x4_16_add_msa(const int16_t *input, uint8_t *dst, |
14 int32_t dst_stride) { | 14 int32_t dst_stride) { |
15 v8i16 in0, in1, in2, in3; | 15 v8i16 in0, in1, in2, in3; |
16 v4i32 in0_r, in1_r, in2_r, in3_r, in4_r; | 16 v4i32 in0_r, in1_r, in2_r, in3_r, in4_r; |
17 | 17 |
18 /* load vector elements of 4x4 block */ | 18 /* load vector elements of 4x4 block */ |
19 LD4x4_SH(input, in0, in2, in3, in1); | 19 LD4x4_SH(input, in0, in2, in3, in1); |
20 TRANSPOSE4x4_SH_SH(in0, in2, in3, in1, in0, in2, in3, in1); | 20 TRANSPOSE4x4_SH_SH(in0, in2, in3, in1, in0, in2, in3, in1); |
21 UNPCK_R_SH_SW(in0, in0_r); | 21 UNPCK_R_SH_SW(in0, in0_r); |
22 UNPCK_R_SH_SW(in2, in2_r); | 22 UNPCK_R_SH_SW(in2, in2_r); |
23 UNPCK_R_SH_SW(in3, in3_r); | 23 UNPCK_R_SH_SW(in3, in3_r); |
(...skipping 16 matching lines...) Expand all Loading... |
40 in3_r = in4_r - in3_r; | 40 in3_r = in4_r - in3_r; |
41 in1_r = in4_r - in1_r; | 41 in1_r = in4_r - in1_r; |
42 in0_r -= in3_r; | 42 in0_r -= in3_r; |
43 in2_r += in1_r; | 43 in2_r += in1_r; |
44 | 44 |
45 PCKEV_H4_SH(in0_r, in0_r, in1_r, in1_r, in2_r, in2_r, in3_r, in3_r, | 45 PCKEV_H4_SH(in0_r, in0_r, in1_r, in1_r, in2_r, in2_r, in3_r, in3_r, |
46 in0, in1, in2, in3); | 46 in0, in1, in2, in3); |
47 ADDBLK_ST4x4_UB(in0, in3, in1, in2, dst, dst_stride); | 47 ADDBLK_ST4x4_UB(in0, in3, in1, in2, dst, dst_stride); |
48 } | 48 } |
49 | 49 |
50 void vp9_iwht4x4_1_add_msa(const int16_t *input, uint8_t *dst, | 50 void vpx_iwht4x4_1_add_msa(const int16_t *input, uint8_t *dst, |
51 int32_t dst_stride) { | 51 int32_t dst_stride) { |
52 int16_t a1, e1; | 52 int16_t a1, e1; |
53 v8i16 in1, in0 = { 0 }; | 53 v8i16 in1, in0 = { 0 }; |
54 | 54 |
55 a1 = input[0] >> UNIT_QUANT_SHIFT; | 55 a1 = input[0] >> UNIT_QUANT_SHIFT; |
56 e1 = a1 >> 1; | 56 e1 = a1 >> 1; |
57 a1 -= e1; | 57 a1 -= e1; |
58 | 58 |
59 in0 = __msa_insert_h(in0, 0, a1); | 59 in0 = __msa_insert_h(in0, 0, a1); |
60 in0 = __msa_insert_h(in0, 1, e1); | 60 in0 = __msa_insert_h(in0, 1, e1); |
61 in0 = __msa_insert_h(in0, 2, e1); | 61 in0 = __msa_insert_h(in0, 2, e1); |
62 in0 = __msa_insert_h(in0, 3, e1); | 62 in0 = __msa_insert_h(in0, 3, e1); |
63 | 63 |
64 in1 = in0 >> 1; | 64 in1 = in0 >> 1; |
65 in0 -= in1; | 65 in0 -= in1; |
66 | 66 |
67 ADDBLK_ST4x4_UB(in0, in1, in1, in1, dst, dst_stride); | 67 ADDBLK_ST4x4_UB(in0, in1, in1, in1, dst, dst_stride); |
68 } | 68 } |
69 | 69 |
70 void vp9_idct4x4_16_add_msa(const int16_t *input, uint8_t *dst, | 70 void vpx_idct4x4_16_add_msa(const int16_t *input, uint8_t *dst, |
71 int32_t dst_stride) { | 71 int32_t dst_stride) { |
72 v8i16 in0, in1, in2, in3; | 72 v8i16 in0, in1, in2, in3; |
73 | 73 |
74 /* load vector elements of 4x4 block */ | 74 /* load vector elements of 4x4 block */ |
75 LD4x4_SH(input, in0, in1, in2, in3); | 75 LD4x4_SH(input, in0, in1, in2, in3); |
76 /* rows */ | 76 /* rows */ |
77 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); | 77 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); |
78 VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3); | 78 VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3); |
79 /* columns */ | 79 /* columns */ |
80 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); | 80 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); |
81 VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3); | 81 VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3); |
82 /* rounding (add 2^3, divide by 2^4) */ | 82 /* rounding (add 2^3, divide by 2^4) */ |
83 SRARI_H4_SH(in0, in1, in2, in3, 4); | 83 SRARI_H4_SH(in0, in1, in2, in3, 4); |
84 ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride); | 84 ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride); |
85 } | 85 } |
86 | 86 |
87 void vp9_idct4x4_1_add_msa(const int16_t *input, uint8_t *dst, | 87 void vpx_idct4x4_1_add_msa(const int16_t *input, uint8_t *dst, |
88 int32_t dst_stride) { | 88 int32_t dst_stride) { |
89 int16_t out; | 89 int16_t out; |
90 v8i16 vec; | 90 v8i16 vec; |
91 | 91 |
92 out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS); | 92 out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS); |
93 out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS); | 93 out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS); |
94 out = ROUND_POWER_OF_TWO(out, 4); | 94 out = ROUND_POWER_OF_TWO(out, 4); |
95 vec = __msa_fill_h(out); | 95 vec = __msa_fill_h(out); |
96 | 96 |
97 ADDBLK_ST4x4_UB(vec, vec, vec, vec, dst, dst_stride); | 97 ADDBLK_ST4x4_UB(vec, vec, vec, vec, dst, dst_stride); |
98 } | 98 } |
OLD | NEW |