| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "vpx_dsp/mips/inv_txfm_msa.h" | 11 #include "vpx_dsp/mips/inv_txfm_msa.h" |
| 12 | 12 |
| 13 void vp9_iwht4x4_16_add_msa(const int16_t *input, uint8_t *dst, | 13 void vpx_iwht4x4_16_add_msa(const int16_t *input, uint8_t *dst, |
| 14 int32_t dst_stride) { | 14 int32_t dst_stride) { |
| 15 v8i16 in0, in1, in2, in3; | 15 v8i16 in0, in1, in2, in3; |
| 16 v4i32 in0_r, in1_r, in2_r, in3_r, in4_r; | 16 v4i32 in0_r, in1_r, in2_r, in3_r, in4_r; |
| 17 | 17 |
| 18 /* load vector elements of 4x4 block */ | 18 /* load vector elements of 4x4 block */ |
| 19 LD4x4_SH(input, in0, in2, in3, in1); | 19 LD4x4_SH(input, in0, in2, in3, in1); |
| 20 TRANSPOSE4x4_SH_SH(in0, in2, in3, in1, in0, in2, in3, in1); | 20 TRANSPOSE4x4_SH_SH(in0, in2, in3, in1, in0, in2, in3, in1); |
| 21 UNPCK_R_SH_SW(in0, in0_r); | 21 UNPCK_R_SH_SW(in0, in0_r); |
| 22 UNPCK_R_SH_SW(in2, in2_r); | 22 UNPCK_R_SH_SW(in2, in2_r); |
| 23 UNPCK_R_SH_SW(in3, in3_r); | 23 UNPCK_R_SH_SW(in3, in3_r); |
| (...skipping 16 matching lines...) Expand all Loading... |
| 40 in3_r = in4_r - in3_r; | 40 in3_r = in4_r - in3_r; |
| 41 in1_r = in4_r - in1_r; | 41 in1_r = in4_r - in1_r; |
| 42 in0_r -= in3_r; | 42 in0_r -= in3_r; |
| 43 in2_r += in1_r; | 43 in2_r += in1_r; |
| 44 | 44 |
| 45 PCKEV_H4_SH(in0_r, in0_r, in1_r, in1_r, in2_r, in2_r, in3_r, in3_r, | 45 PCKEV_H4_SH(in0_r, in0_r, in1_r, in1_r, in2_r, in2_r, in3_r, in3_r, |
| 46 in0, in1, in2, in3); | 46 in0, in1, in2, in3); |
| 47 ADDBLK_ST4x4_UB(in0, in3, in1, in2, dst, dst_stride); | 47 ADDBLK_ST4x4_UB(in0, in3, in1, in2, dst, dst_stride); |
| 48 } | 48 } |
| 49 | 49 |
| 50 void vp9_iwht4x4_1_add_msa(const int16_t *input, uint8_t *dst, | 50 void vpx_iwht4x4_1_add_msa(const int16_t *input, uint8_t *dst, |
| 51 int32_t dst_stride) { | 51 int32_t dst_stride) { |
| 52 int16_t a1, e1; | 52 int16_t a1, e1; |
| 53 v8i16 in1, in0 = { 0 }; | 53 v8i16 in1, in0 = { 0 }; |
| 54 | 54 |
| 55 a1 = input[0] >> UNIT_QUANT_SHIFT; | 55 a1 = input[0] >> UNIT_QUANT_SHIFT; |
| 56 e1 = a1 >> 1; | 56 e1 = a1 >> 1; |
| 57 a1 -= e1; | 57 a1 -= e1; |
| 58 | 58 |
| 59 in0 = __msa_insert_h(in0, 0, a1); | 59 in0 = __msa_insert_h(in0, 0, a1); |
| 60 in0 = __msa_insert_h(in0, 1, e1); | 60 in0 = __msa_insert_h(in0, 1, e1); |
| 61 in0 = __msa_insert_h(in0, 2, e1); | 61 in0 = __msa_insert_h(in0, 2, e1); |
| 62 in0 = __msa_insert_h(in0, 3, e1); | 62 in0 = __msa_insert_h(in0, 3, e1); |
| 63 | 63 |
| 64 in1 = in0 >> 1; | 64 in1 = in0 >> 1; |
| 65 in0 -= in1; | 65 in0 -= in1; |
| 66 | 66 |
| 67 ADDBLK_ST4x4_UB(in0, in1, in1, in1, dst, dst_stride); | 67 ADDBLK_ST4x4_UB(in0, in1, in1, in1, dst, dst_stride); |
| 68 } | 68 } |
| 69 | 69 |
| 70 void vp9_idct4x4_16_add_msa(const int16_t *input, uint8_t *dst, | 70 void vpx_idct4x4_16_add_msa(const int16_t *input, uint8_t *dst, |
| 71 int32_t dst_stride) { | 71 int32_t dst_stride) { |
| 72 v8i16 in0, in1, in2, in3; | 72 v8i16 in0, in1, in2, in3; |
| 73 | 73 |
| 74 /* load vector elements of 4x4 block */ | 74 /* load vector elements of 4x4 block */ |
| 75 LD4x4_SH(input, in0, in1, in2, in3); | 75 LD4x4_SH(input, in0, in1, in2, in3); |
| 76 /* rows */ | 76 /* rows */ |
| 77 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); | 77 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); |
| 78 VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3); | 78 VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3); |
| 79 /* columns */ | 79 /* columns */ |
| 80 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); | 80 TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); |
| 81 VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3); | 81 VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3); |
| 82 /* rounding (add 2^3, divide by 2^4) */ | 82 /* rounding (add 2^3, divide by 2^4) */ |
| 83 SRARI_H4_SH(in0, in1, in2, in3, 4); | 83 SRARI_H4_SH(in0, in1, in2, in3, 4); |
| 84 ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride); | 84 ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride); |
| 85 } | 85 } |
| 86 | 86 |
| 87 void vp9_idct4x4_1_add_msa(const int16_t *input, uint8_t *dst, | 87 void vpx_idct4x4_1_add_msa(const int16_t *input, uint8_t *dst, |
| 88 int32_t dst_stride) { | 88 int32_t dst_stride) { |
| 89 int16_t out; | 89 int16_t out; |
| 90 v8i16 vec; | 90 v8i16 vec; |
| 91 | 91 |
| 92 out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS); | 92 out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS); |
| 93 out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS); | 93 out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS); |
| 94 out = ROUND_POWER_OF_TWO(out, 4); | 94 out = ROUND_POWER_OF_TWO(out, 4); |
| 95 vec = __msa_fill_h(out); | 95 vec = __msa_fill_h(out); |
| 96 | 96 |
| 97 ADDBLK_ST4x4_UB(vec, vec, vec, vec, dst, dst_stride); | 97 ADDBLK_ST4x4_UB(vec, vec, vec, vec, dst, dst_stride); |
| 98 } | 98 } |
| OLD | NEW |