OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ |
| 10 |
| 11 #include "libyuv/scale_row.h" |
| 12 |
| 13 // This module is for GCC MSA |
| 14 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
| 15 #include "libyuv/macros_msa.h" |
| 16 |
| 17 #ifdef __cplusplus |
| 18 namespace libyuv { |
| 19 extern "C" { |
| 20 #endif |
| 21 |
| 22 void ScaleARGBRowDown2_MSA(const uint8_t* src_argb, |
| 23 ptrdiff_t src_stride, |
| 24 uint8_t* dst_argb, |
| 25 int dst_width) { |
| 26 int x; |
| 27 v16u8 src0, src1, dst0; |
| 28 |
| 29 for (x = 0; x < dst_width; x += 4) { |
| 30 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); |
| 31 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); |
| 32 dst0 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0); |
| 33 ST_UB(dst0, dst_argb); |
| 34 src_argb += 32; |
| 35 dst_argb += 16; |
| 36 } |
| 37 } |
| 38 |
| 39 void ScaleARGBRowDown2Linear_MSA(const uint8_t* src_argb, |
| 40 ptrdiff_t src_stride, |
| 41 uint8_t* dst_argb, |
| 42 int dst_width) { |
| 43 int x; |
| 44 v16u8 src0, src1, vec0, vec1, dst0; |
| 45 |
| 46 for (x = 0; x < dst_width; x += 4) { |
| 47 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); |
| 48 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); |
| 49 vec0 = (v16u8)__msa_pckev_w((v4i32)src1, (v4i32)src0); |
| 50 vec1 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0); |
| 51 dst0 = (v16u8)__msa_aver_u_b((v16u8)vec0, (v16u8)vec1); |
| 52 ST_UB(dst0, dst_argb); |
| 53 src_argb += 32; |
| 54 dst_argb += 16; |
| 55 } |
| 56 } |
| 57 |
| 58 void ScaleARGBRowDown2Box_MSA(const uint8_t* src_argb, |
| 59 ptrdiff_t src_stride, |
| 60 uint8_t* dst_argb, |
| 61 int dst_width) { |
| 62 int x; |
| 63 const uint8_t* s = src_argb; |
| 64 const uint8_t* t = src_argb + src_stride; |
| 65 v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0; |
| 66 v8u16 reg0, reg1, reg2, reg3; |
| 67 v16i8 shuffler = {0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15}; |
| 68 |
| 69 for (x = 0; x < dst_width; x += 4) { |
| 70 src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); |
| 71 src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); |
| 72 src2 = (v16u8)__msa_ld_b((v16i8*)t, 0); |
| 73 src3 = (v16u8)__msa_ld_b((v16i8*)t, 16); |
| 74 vec0 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src0, (v16i8)src0); |
| 75 vec1 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src1, (v16i8)src1); |
| 76 vec2 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src2, (v16i8)src2); |
| 77 vec3 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src3, (v16i8)src3); |
| 78 reg0 = __msa_hadd_u_h(vec0, vec0); |
| 79 reg1 = __msa_hadd_u_h(vec1, vec1); |
| 80 reg2 = __msa_hadd_u_h(vec2, vec2); |
| 81 reg3 = __msa_hadd_u_h(vec3, vec3); |
| 82 reg0 += reg2; |
| 83 reg1 += reg3; |
| 84 reg0 = (v8u16)__msa_srari_h((v8i16)reg0, 2); |
| 85 reg1 = (v8u16)__msa_srari_h((v8i16)reg1, 2); |
| 86 dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); |
| 87 ST_UB(dst0, dst_argb); |
| 88 s += 32; |
| 89 t += 32; |
| 90 dst_argb += 16; |
| 91 } |
| 92 } |
| 93 |
| 94 void ScaleARGBRowDownEven_MSA(const uint8_t* src_argb, |
| 95 ptrdiff_t src_stride, |
| 96 int32_t src_stepx, |
| 97 uint8_t* dst_argb, |
| 98 int dst_width) { |
| 99 int x; |
| 100 int32_t stepx = src_stepx * 4; |
| 101 int32_t data0, data1, data2, data3; |
| 102 |
| 103 for (x = 0; x < dst_width; x += 4) { |
| 104 data0 = LW(src_argb); |
| 105 data1 = LW(src_argb + stepx); |
| 106 data2 = LW(src_argb + stepx * 2); |
| 107 data3 = LW(src_argb + stepx * 3); |
| 108 SW(data0, dst_argb); |
| 109 SW(data1, dst_argb + 4); |
| 110 SW(data2, dst_argb + 8); |
| 111 SW(data3, dst_argb + 12); |
| 112 src_argb += stepx * 4; |
| 113 dst_argb += 16; |
| 114 } |
| 115 } |
| 116 |
| 117 void ScaleARGBRowDownEvenBox_MSA(const uint8* src_argb, |
| 118 ptrdiff_t src_stride, |
| 119 int src_stepx, |
| 120 uint8* dst_argb, |
| 121 int dst_width) { |
| 122 int x; |
| 123 const uint8* nxt_argb = src_argb + src_stride; |
| 124 int32_t stepx = src_stepx * 4; |
| 125 int64_t data0, data1, data2, data3; |
| 126 v16u8 src0 = {0}, src1 = {0}, src2 = {0}, src3 = {0}; |
| 127 v16u8 vec0, vec1, vec2, vec3; |
| 128 v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; |
| 129 v16u8 dst0; |
| 130 |
| 131 for (x = 0; x < dst_width; x += 4) { |
| 132 data0 = LD(src_argb); |
| 133 data1 = LD(src_argb + stepx); |
| 134 data2 = LD(src_argb + stepx * 2); |
| 135 data3 = LD(src_argb + stepx * 3); |
| 136 src0 = (v16u8)__msa_insert_d((v2i64)src0, 0, data0); |
| 137 src0 = (v16u8)__msa_insert_d((v2i64)src0, 1, data1); |
| 138 src1 = (v16u8)__msa_insert_d((v2i64)src1, 0, data2); |
| 139 src1 = (v16u8)__msa_insert_d((v2i64)src1, 1, data3); |
| 140 data0 = LD(nxt_argb); |
| 141 data1 = LD(nxt_argb + stepx); |
| 142 data2 = LD(nxt_argb + stepx * 2); |
| 143 data3 = LD(nxt_argb + stepx * 3); |
| 144 src2 = (v16u8)__msa_insert_d((v2i64)src2, 0, data0); |
| 145 src2 = (v16u8)__msa_insert_d((v2i64)src2, 1, data1); |
| 146 src3 = (v16u8)__msa_insert_d((v2i64)src3, 0, data2); |
| 147 src3 = (v16u8)__msa_insert_d((v2i64)src3, 1, data3); |
| 148 vec0 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src0); |
| 149 vec1 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src1); |
| 150 vec2 = (v16u8)__msa_ilvl_b((v16i8)src2, (v16i8)src0); |
| 151 vec3 = (v16u8)__msa_ilvl_b((v16i8)src3, (v16i8)src1); |
| 152 reg0 = __msa_hadd_u_h(vec0, vec0); |
| 153 reg1 = __msa_hadd_u_h(vec1, vec1); |
| 154 reg2 = __msa_hadd_u_h(vec2, vec2); |
| 155 reg3 = __msa_hadd_u_h(vec3, vec3); |
| 156 reg4 = (v8u16)__msa_pckev_d((v2i64)reg2, (v2i64)reg0); |
| 157 reg5 = (v8u16)__msa_pckev_d((v2i64)reg3, (v2i64)reg1); |
| 158 reg6 = (v8u16)__msa_pckod_d((v2i64)reg2, (v2i64)reg0); |
| 159 reg7 = (v8u16)__msa_pckod_d((v2i64)reg3, (v2i64)reg1); |
| 160 reg4 += reg6; |
| 161 reg5 += reg7; |
| 162 reg4 = (v8u16)__msa_srari_h((v8i16)reg4, 2); |
| 163 reg5 = (v8u16)__msa_srari_h((v8i16)reg5, 2); |
| 164 dst0 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4); |
| 165 ST_UB(dst0, dst_argb); |
| 166 src_argb += stepx * 4; |
| 167 nxt_argb += stepx * 4; |
| 168 dst_argb += 16; |
| 169 } |
| 170 } |
| 171 |
| 172 #ifdef __cplusplus |
| 173 } // extern "C" |
| 174 } // namespace libyuv |
| 175 #endif |
| 176 |
| 177 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
OLD | NEW |