OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include "vpx_dsp/mips/inv_txfm_msa.h" | 11 #include "vpx_dsp/mips/inv_txfm_msa.h" |
12 | 12 |
13 void vp9_idct8x8_64_add_msa(const int16_t *input, uint8_t *dst, | 13 void vpx_idct8x8_64_add_msa(const int16_t *input, uint8_t *dst, |
14 int32_t dst_stride) { | 14 int32_t dst_stride) { |
15 v8i16 in0, in1, in2, in3, in4, in5, in6, in7; | 15 v8i16 in0, in1, in2, in3, in4, in5, in6, in7; |
16 | 16 |
17 /* load vector elements of 8x8 block */ | 17 /* load vector elements of 8x8 block */ |
18 LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7); | 18 LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7); |
19 | 19 |
20 /* rows transform */ | 20 /* rows transform */ |
21 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, | 21 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, |
22 in0, in1, in2, in3, in4, in5, in6, in7); | 22 in0, in1, in2, in3, in4, in5, in6, in7); |
23 /* 1D idct8x8 */ | 23 /* 1D idct8x8 */ |
24 VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, | 24 VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, |
25 in0, in1, in2, in3, in4, in5, in6, in7); | 25 in0, in1, in2, in3, in4, in5, in6, in7); |
26 /* columns transform */ | 26 /* columns transform */ |
27 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, | 27 TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, |
28 in0, in1, in2, in3, in4, in5, in6, in7); | 28 in0, in1, in2, in3, in4, in5, in6, in7); |
29 /* 1D idct8x8 */ | 29 /* 1D idct8x8 */ |
30 VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, | 30 VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, |
31 in0, in1, in2, in3, in4, in5, in6, in7); | 31 in0, in1, in2, in3, in4, in5, in6, in7); |
32 /* final rounding (add 2^4, divide by 2^5) and shift */ | 32 /* final rounding (add 2^4, divide by 2^5) and shift */ |
33 SRARI_H4_SH(in0, in1, in2, in3, 5); | 33 SRARI_H4_SH(in0, in1, in2, in3, 5); |
34 SRARI_H4_SH(in4, in5, in6, in7, 5); | 34 SRARI_H4_SH(in4, in5, in6, in7, 5); |
35 /* add block and store 8x8 */ | 35 /* add block and store 8x8 */ |
36 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3); | 36 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3); |
37 dst += (4 * dst_stride); | 37 dst += (4 * dst_stride); |
38 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7); | 38 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7); |
39 } | 39 } |
40 | 40 |
41 void vp9_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst, | 41 void vpx_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst, |
42 int32_t dst_stride) { | 42 int32_t dst_stride) { |
43 v8i16 in0, in1, in2, in3, in4, in5, in6, in7; | 43 v8i16 in0, in1, in2, in3, in4, in5, in6, in7; |
44 v8i16 s0, s1, s2, s3, s4, s5, s6, s7, k0, k1, k2, k3, m0, m1, m2, m3; | 44 v8i16 s0, s1, s2, s3, s4, s5, s6, s7, k0, k1, k2, k3, m0, m1, m2, m3; |
45 v4i32 tmp0, tmp1, tmp2, tmp3; | 45 v4i32 tmp0, tmp1, tmp2, tmp3; |
46 v8i16 zero = { 0 }; | 46 v8i16 zero = { 0 }; |
47 | 47 |
48 /* load vector elements of 8x8 block */ | 48 /* load vector elements of 8x8 block */ |
49 LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7); | 49 LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7); |
50 TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); | 50 TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); |
51 | 51 |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
92 /* final rounding (add 2^4, divide by 2^5) and shift */ | 92 /* final rounding (add 2^4, divide by 2^5) and shift */ |
93 SRARI_H4_SH(in0, in1, in2, in3, 5); | 93 SRARI_H4_SH(in0, in1, in2, in3, 5); |
94 SRARI_H4_SH(in4, in5, in6, in7, 5); | 94 SRARI_H4_SH(in4, in5, in6, in7, 5); |
95 | 95 |
96 /* add block and store 8x8 */ | 96 /* add block and store 8x8 */ |
97 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3); | 97 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3); |
98 dst += (4 * dst_stride); | 98 dst += (4 * dst_stride); |
99 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7); | 99 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7); |
100 } | 100 } |
101 | 101 |
102 void vp9_idct8x8_1_add_msa(const int16_t *input, uint8_t *dst, | 102 void vpx_idct8x8_1_add_msa(const int16_t *input, uint8_t *dst, |
103 int32_t dst_stride) { | 103 int32_t dst_stride) { |
104 int16_t out; | 104 int16_t out; |
105 int32_t val; | 105 int32_t val; |
106 v8i16 vec; | 106 v8i16 vec; |
107 | 107 |
108 out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS); | 108 out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS); |
109 out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS); | 109 out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS); |
110 val = ROUND_POWER_OF_TWO(out, 5); | 110 val = ROUND_POWER_OF_TWO(out, 5); |
111 vec = __msa_fill_h(val); | 111 vec = __msa_fill_h(val); |
112 | 112 |
113 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec); | 113 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec); |
114 dst += (4 * dst_stride); | 114 dst += (4 * dst_stride); |
115 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec); | 115 VP9_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec); |
116 } | 116 } |
OLD | NEW |