| OLD | NEW |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef WebGLImageConversionMSA_h | 5 #ifndef WebGLImageConversionMSA_h |
| 6 #define WebGLImageConversionMSA_h | 6 #define WebGLImageConversionMSA_h |
| 7 | 7 |
| 8 #if HAVE(MIPS_MSA_INTRINSICS) | 8 #if HAVE(MIPS_MSA_INTRINSICS) |
| 9 | 9 |
| 10 #include "platform/cpu/mips/CommonMacrosMSA.h" | 10 #include "platform/cpu/mips/CommonMacrosMSA.h" |
| 11 | 11 |
| 12 namespace blink { | 12 namespace blink { |
| 13 | 13 |
| 14 namespace SIMD { | 14 namespace SIMD { |
| 15 | 15 |
| 16 #define SEPERATE_RGBA_FRM_16BIT_5551INPUT(in, out_r, out_g, out_b, out_a) \ | 16 #define SEPERATE_RGBA_FRM_16BIT_5551INPUT(in, out_r, out_g, out_b, out_a) \ |
| 17 cnst31 = (v8u16)__msa_ldi_h(0x1F); \ | 17 cnst31 = (v8u16)__msa_ldi_h(0x1F); \ |
| 18 cnst7 = (v8u16)__msa_ldi_h(0x7); \ | 18 cnst7 = (v8u16)__msa_ldi_h(0x7); \ |
| 19 cnst1 = (v8u16)__msa_ldi_h(0x1); \ | 19 cnst1 = (v8u16)__msa_ldi_h(0x1); \ |
| 20 out_r = (v8u16)SRLI_H(in, 11); \ | 20 out_r = (v8u16)SRLI_H(in, 11); \ |
| 21 out_g = ((v8u16)SRLI_H(in, 6)) & cnst31; \ | 21 out_g = ((v8u16)SRLI_H(in, 6)) & cnst31; \ |
| 22 out_b = ((v8u16)SRLI_H(in, 1)) & cnst31; \ | 22 out_b = ((v8u16)SRLI_H(in, 1)) & cnst31; \ |
| 23 out_a = in & cnst1; \ | 23 out_a = in & cnst1; \ |
| 24 out_r = ((v8u16)SLLI_H(out_r, 3)) | (out_r & cnst7); \ | 24 out_r = ((v8u16)SLLI_H(out_r, 3)) | (out_r & cnst7); \ |
| 25 out_g = ((v8u16)SLLI_H(out_g, 3)) | (out_g & cnst7); \ | 25 out_g = ((v8u16)SLLI_H(out_g, 3)) | (out_g & cnst7); \ |
| 26 out_b = ((v8u16)SLLI_H(out_b, 3)) | (out_b & cnst7); \ | 26 out_b = ((v8u16)SLLI_H(out_b, 3)) | (out_b & cnst7); \ |
| 27 out_a = (v8u16)CEQI_H((v8i16)out_a, 1); \ | 27 out_a = (v8u16)CEQI_H((v8i16)out_a, 1); \ |
| 28 | 28 |
| 29 #define SEPERATE_RGBA_FRM_16BIT_4444INPUT(in, out_rb, out_ga) \ |
| 30 out_rb = (v16u8)SRLI_B((v16u8)in, 4); \ |
| 31 out_ga = ANDI_B((v16u8)in, 15); \ |
| 32 out_rb = ((v16u8)SLLI_B(out_rb, 4)) | out_rb; \ |
| 33 out_ga = ((v16u8)SLLI_B(out_ga, 4)) | out_ga; \ |
| 34 |
| 29 ALWAYS_INLINE void unpackOneRowOfRGBA5551ToRGBA8MSA(const uint16_t*& source, uin
t8_t*& destination, unsigned& pixelsPerRow) | 35 ALWAYS_INLINE void unpackOneRowOfRGBA5551ToRGBA8MSA(const uint16_t*& source, uin
t8_t*& destination, unsigned& pixelsPerRow) |
| 30 { | 36 { |
| 31 unsigned i; | 37 unsigned i; |
| 32 v8u16 src0, src1, src2, src3; | 38 v8u16 src0, src1, src2, src3; |
| 33 v8u16 src0r, src0g, src0b, src0a, src1r, src1g, src1b, src1a; | 39 v8u16 src0r, src0g, src0b, src0a, src1r, src1g, src1b, src1a; |
| 34 v8u16 src2r, src2g, src2b, src2a, src3r, src3g, src3b, src3a; | 40 v8u16 src2r, src2g, src2b, src2a, src3r, src3g, src3b, src3a; |
| 35 v8u16 cnst31, cnst7, cnst1; | 41 v8u16 cnst31, cnst7, cnst1; |
| 36 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; | 42 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; |
| 37 v16u8 dst8, dst9, dst10, dst11, dst12, dst13, dst14, dst15; | 43 v16u8 dst8, dst9, dst10, dst11, dst12, dst13, dst14, dst15; |
| 38 v16u8 out0, out1, out2, out3, out4, out5, out6, out7; | 44 v16u8 out0, out1, out2, out3, out4, out5, out6, out7; |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 92 ILVRL_B2_UB(src0g, src0r, dst0, dst1); | 98 ILVRL_B2_UB(src0g, src0r, dst0, dst1); |
| 93 ILVRL_B2_UB(src0a, src0b, dst2, dst3); | 99 ILVRL_B2_UB(src0a, src0b, dst2, dst3); |
| 94 ILVEV_H2_UB(dst0, dst2, dst1, dst3, out0, out1); | 100 ILVEV_H2_UB(dst0, dst2, dst1, dst3, out0, out1); |
| 95 ST_UB2(out0, out1, destination, 16); | 101 ST_UB2(out0, out1, destination, 16); |
| 96 } | 102 } |
| 97 } | 103 } |
| 98 | 104 |
| 99 pixelsPerRow &= 7; | 105 pixelsPerRow &= 7; |
| 100 } | 106 } |
| 101 | 107 |
| 108 ALWAYS_INLINE void unpackOneRowOfBGRA8LittleToRGBA8MSA(const uint32_t*& source,
uint32_t*& destination, unsigned& pixelsPerRow) |
| 109 { |
| 110 unsigned i; |
| 111 v16u8 src0, src1, src2, src3, src4, src5, src6, src7; |
| 112 v16u8 src8, src9, src10, src11, src12, src13, src14, src15; |
| 113 |
| 114 for (i = (pixelsPerRow >> 6); i--;) { |
| 115 LD_UB8(source, 4, src0, src1, src2, src3, src4, src5, src6, src7); |
| 116 LD_UB8(source, 4, src8, src9, src10, src11, src12, src13, src14, src15); |
| 117 SHF_B4_UB(src0, src1, src2, src3, 198); |
| 118 SHF_B4_UB(src4, src5, src6, src7, 198); |
| 119 SHF_B4_UB(src8, src9, src10, src11, 198); |
| 120 SHF_B4_UB(src12, src13, src14, src15, 198); |
| 121 ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, destination, 4); |
| 122 ST_UB8(src8, src9, src10, src11, src12, src13, src14, src15, destination
, 4); |
| 123 } |
| 124 |
| 125 if (pixelsPerRow & 63) { |
| 126 if (pixelsPerRow & 32) { |
| 127 if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) { |
| 128 LD_UB8(source, 4, src0, src1, src2, src3, src4, src5, src6, src7
); |
| 129 LD_UB6(source, 4, src8, src9, src10, src11, src12, src13); |
| 130 SHF_B4_UB(src0, src1, src2, src3, 198); |
| 131 SHF_B4_UB(src4, src5, src6, src7, 198); |
| 132 SHF_B4_UB(src8, src9, src10, src11, 198); |
| 133 SHF_B2_UB(src12, src13, 198); |
| 134 ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, destinati
on, 4); |
| 135 ST_UB6(src8, src9, src10, src11, src12, src13, destination, 4); |
| 136 } else if (pixelsPerRow & 16) { |
| 137 LD_UB8(source, 4, src0, src1, src2, src3, src4, src5, src6, src7
); |
| 138 LD_UB4(source, 4, src8, src9, src10, src11); |
| 139 SHF_B4_UB(src0, src1, src2, src3, 198); |
| 140 SHF_B4_UB(src4, src5, src6, src7, 198); |
| 141 SHF_B4_UB(src8, src9, src10, src11, 198); |
| 142 ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, destinati
on, 4); |
| 143 ST_UB4(src8, src9, src10, src11, destination, 4); |
| 144 } else if (pixelsPerRow & 8) { |
| 145 LD_UB8(source, 4, src0, src1, src2, src3, src4, src5, src6, src7
); |
| 146 LD_UB2(source, 4, src8, src9); |
| 147 SHF_B4_UB(src0, src1, src2, src3, 198); |
| 148 SHF_B4_UB(src4, src5, src6, src7, 198); |
| 149 SHF_B2_UB(src8, src9, 198); |
| 150 ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, destinati
on, 4); |
| 151 ST_UB2(src8, src9, destination, 4); |
| 152 } else { |
| 153 LD_UB8(source, 4, src0, src1, src2, src3, src4, src5, src6, src7
); |
| 154 SHF_B4_UB(src0, src1, src2, src3, 198); |
| 155 SHF_B4_UB(src4, src5, src6, src7, 198); |
| 156 ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, destinati
on, 4); |
| 157 } |
| 158 } else if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) { |
| 159 LD_UB6(source, 4, src0, src1, src2, src3, src4, src5); |
| 160 SHF_B4_UB(src0, src1, src2, src3, 198); |
| 161 SHF_B2_UB(src4, src5, 198); |
| 162 ST_UB6(src0, src1, src2, src3, src4, src5, destination, 4); |
| 163 } else if (pixelsPerRow & 16) { |
| 164 LD_UB4(source, 4, src0, src1, src2, src3); |
| 165 SHF_B4_UB(src0, src1, src2, src3, 198); |
| 166 ST_UB4(src0, src1, src2, src3, destination, 4); |
| 167 } else if (pixelsPerRow & 8) { |
| 168 LD_UB2(source, 4, src0, src1); |
| 169 SHF_B2_UB(src0, src1, 198); |
| 170 ST_UB2(src0, src1, destination, 4); |
| 171 } |
| 172 |
| 173 if (pixelsPerRow & 4) { |
| 174 src0 = LD_UB(source); |
| 175 source += 4; |
| 176 src0 = (v16u8)__msa_shf_b((v16i8)src0, 198); |
| 177 ST_UB(src0, destination); |
| 178 destination += 4; |
| 179 } |
| 180 } |
| 181 |
| 182 pixelsPerRow &= 3; |
| 183 } |
| 184 |
| 185 ALWAYS_INLINE void unpackOneRowOfRGBA4444ToRGBA8MSA(const uint16_t*& source, uin
t8_t*& destination, unsigned& pixelsPerRow) |
| 186 { |
| 187 unsigned i; |
| 188 v8u16 src0, src1, src2, src3; |
| 189 v16u8 src0rb, src0ga, src1rb, src1ga, src2rb, src2ga, src3rb, src3ga; |
| 190 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; |
| 191 v16u8 out0, out1, out2, out3, out4, out5, out6, out7; |
| 192 |
| 193 for (i = (pixelsPerRow >> 5); i--;) { |
| 194 LD_UH4(source, 8, src0, src1, src2, src3); |
| 195 SEPERATE_RGBA_FRM_16BIT_4444INPUT(src0, src0rb, src0ga); |
| 196 SEPERATE_RGBA_FRM_16BIT_4444INPUT(src1, src1rb, src1ga); |
| 197 SEPERATE_RGBA_FRM_16BIT_4444INPUT(src2, src2rb, src2ga); |
| 198 SEPERATE_RGBA_FRM_16BIT_4444INPUT(src3, src3rb, src3ga); |
| 199 ILVODEV_B2_UB(src0ga, src0rb, dst0, dst1); |
| 200 ILVODEV_B2_UB(src1ga, src1rb, dst2, dst3); |
| 201 ILVODEV_B2_UB(src2ga, src2rb, dst4, dst5); |
| 202 ILVODEV_B2_UB(src3ga, src3rb, dst6, dst7); |
| 203 ILVRL_H2_UB(dst1, dst0, out0, out1); |
| 204 ILVRL_H2_UB(dst3, dst2, out2, out3); |
| 205 ILVRL_H2_UB(dst5, dst4, out4, out5); |
| 206 ILVRL_H2_UB(dst7, dst6, out6, out7); |
| 207 ST_UB8(out0, out1, out2, out3, out4, out5, out6, out7, destination, 16); |
| 208 } |
| 209 |
| 210 if (pixelsPerRow & 31) { |
| 211 if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) { |
| 212 LD_UH3(source, 8, src0, src1, src2); |
| 213 SEPERATE_RGBA_FRM_16BIT_4444INPUT(src0, src0rb, src0ga); |
| 214 SEPERATE_RGBA_FRM_16BIT_4444INPUT(src1, src1rb, src1ga); |
| 215 SEPERATE_RGBA_FRM_16BIT_4444INPUT(src2, src2rb, src2ga); |
| 216 ILVODEV_B2_UB(src0ga, src0rb, dst0, dst1); |
| 217 ILVODEV_B2_UB(src1ga, src1rb, dst2, dst3); |
| 218 ILVODEV_B2_UB(src2ga, src2rb, dst4, dst5); |
| 219 ILVRL_H2_UB(dst1, dst0, out0, out1); |
| 220 ILVRL_H2_UB(dst3, dst2, out2, out3); |
| 221 ILVRL_H2_UB(dst5, dst4, out4, out5); |
| 222 ST_UB6(out0, out1, out2, out3, out4, out5, destination, 16); |
| 223 } else if (pixelsPerRow & 16) { |
| 224 LD_UH2(source, 8, src0, src1); |
| 225 SEPERATE_RGBA_FRM_16BIT_4444INPUT(src0, src0rb, src0ga); |
| 226 SEPERATE_RGBA_FRM_16BIT_4444INPUT(src1, src1rb, src1ga); |
| 227 ILVODEV_B2_UB(src0ga, src0rb, dst0, dst1); |
| 228 ILVODEV_B2_UB(src1ga, src1rb, dst2, dst3); |
| 229 ILVRL_H2_UB(dst1, dst0, out0, out1); |
| 230 ILVRL_H2_UB(dst3, dst2, out2, out3); |
| 231 ST_UB4(out0, out1, out2, out3, destination, 16); |
| 232 } else if (pixelsPerRow & 8) { |
| 233 src0 = LD_UH(source); |
| 234 source += 8; |
| 235 SEPERATE_RGBA_FRM_16BIT_4444INPUT(src0, src0rb, src0ga); |
| 236 ILVODEV_B2_UB(src0ga, src0rb, dst0, dst1); |
| 237 ILVRL_H2_UB(dst1, dst0, out0, out1); |
| 238 ST_UB2(out0, out1, destination, 16); |
| 239 } |
| 240 } |
| 241 |
| 242 pixelsPerRow &= 7; |
| 243 } |
| 244 |
| 245 ALWAYS_INLINE void packOneRowOfRGBA8LittleToRGBA8MSA(const uint8_t*& source, uin
t8_t*& destination, unsigned& pixelsPerRow) |
| 246 { |
| 247 unsigned i; |
| 248 v16u8 src0, src1, src2, src3, out0, out1, out2, out3; |
| 249 v16u8 src0R, src1R, src2R, src3R, src0G, src1G, src2G, src3G; |
| 250 v16u8 src0B, src1B, src2B, src3B, src0A, src1A, src2A, src3A; |
| 251 v16u8 dst0R, dst1R, dst2R, dst3R, dst0G, dst1G, dst2G, dst3G; |
| 252 v16u8 dst0B, dst1B, dst2B, dst3B, dst0A, dst1A, dst2A, dst3A; |
| 253 v16u8 dst0RG, dst1RG, dst2RG, dst3RG, dst0BA, dst1BA, dst2BA, dst3BA; |
| 254 v4f32 fsrc0R, fsrc1R, fsrc2R, fsrc3R, fsrc0G, fsrc1G, fsrc2G, fsrc3G; |
| 255 v4f32 fsrc0B, fsrc1B, fsrc2B, fsrc3B, fsrc0A, fsrc1A, fsrc2A, fsrc3A; |
| 256 v4u32 vCnst255 = (v4u32) __msa_ldi_w(255); |
| 257 v16u8 alphaMask = {0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255}; |
| 258 v4f32 vfCnst255 = __msa_ffint_u_w(vCnst255); |
| 259 |
| 260 for (i = (pixelsPerRow >> 4); i--;) { |
| 261 LD_UB4(source, 16, src0, src1, src2, src3); |
| 262 CEQI_B4_UB(src0, src1, src2, src3, 0, src0A, src1A, src2A, src3A); |
| 263 src0A = __msa_bmnz_v(src0, alphaMask, src0A); |
| 264 src1A = __msa_bmnz_v(src1, alphaMask, src1A); |
| 265 src2A = __msa_bmnz_v(src2, alphaMask, src2A); |
| 266 src3A = __msa_bmnz_v(src3, alphaMask, src3A); |
| 267 AND_V4_UB(src0A, src1A, src2A, src3A, alphaMask, src0A, src1A, src2A, sr
c3A); |
| 268 src0A = SLDI_UB(src0A, src0A, 3); |
| 269 src1A = SLDI_UB(src1A, src1A, 3); |
| 270 src2A = SLDI_UB(src2A, src2A, 3); |
| 271 src3A = SLDI_UB(src3A, src3A, 3); |
| 272 FFINTU_W4_SP(src0A, src1A, src2A, src3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A)
; |
| 273 DIV4(vfCnst255, fsrc0A, vfCnst255, fsrc1A, vfCnst255, fsrc2A, vfCnst255,
fsrc3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A); |
| 274 AND_V4_UB(src0, src1, src2, src3, vCnst255, src0R, src1R, src2R, src3R); |
| 275 FFINTU_W4_SP(src0R, src1R, src2R, src3R, fsrc0R, fsrc1R, fsrc2R, fsrc3R)
; |
| 276 MUL4(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fsrc2R, fsrc2A, fsrc3R, fsrc3A, fsr
c0R, fsrc1R, fsrc2R, fsrc3R); |
| 277 src0G = SLDI_UB(src0, src0, 1); |
| 278 src1G = SLDI_UB(src1, src1, 1); |
| 279 src2G = SLDI_UB(src2, src2, 1); |
| 280 src3G = SLDI_UB(src3, src3, 1); |
| 281 AND_V4_UB(src0G, src1G, src2G, src3G, vCnst255, src0G, src1G, src2G, src
3G); |
| 282 FFINTU_W4_SP(src0G, src1G, src2G, src3G, fsrc0G, fsrc1G, fsrc2G, fsrc3G)
; |
| 283 MUL4(fsrc0G, fsrc0A, fsrc1G, fsrc1A, fsrc2G, fsrc2A, fsrc3G, fsrc3A, fsr
c0G, fsrc1G, fsrc2G, fsrc3G); |
| 284 src0B = SLDI_UB(src0, src0, 2); |
| 285 src1B = SLDI_UB(src1, src1, 2); |
| 286 src2B = SLDI_UB(src2, src2, 2); |
| 287 src3B = SLDI_UB(src3, src3, 2); |
| 288 AND_V4_UB(src0B, src1B, src2B, src3B, vCnst255, src0B, src1B, src2B, src
3B); |
| 289 FFINTU_W4_SP(src0B, src1B, src2B, src3B, fsrc0B, fsrc1B, fsrc2B, fsrc3B)
; |
| 290 MUL4(fsrc0B, fsrc0A, fsrc1B, fsrc1A, fsrc2B, fsrc2A, fsrc3B, fsrc3A, fsr
c0B, fsrc1B, fsrc2B, fsrc3B); |
| 291 FTRUNCU_W4_UB(fsrc0R, fsrc1R, fsrc2R, fsrc3R, dst0R, dst1R, dst2R, dst3R
); |
| 292 FTRUNCU_W4_UB(fsrc0G, fsrc1G, fsrc2G, fsrc3G, dst0G, dst1G, dst2G, dst3G
); |
| 293 FTRUNCU_W4_UB(fsrc0B, fsrc1B, fsrc2B, fsrc3B, dst0B, dst1B, dst2B, dst3B
); |
| 294 dst0A = SLDI_UB(src0, src0, 3); |
| 295 dst1A = SLDI_UB(src1, src1, 3); |
| 296 dst2A = SLDI_UB(src2, src2, 3); |
| 297 dst3A = SLDI_UB(src3, src3, 3); |
| 298 ILVEV_B2_UB(dst0R, dst0G, dst1R, dst1G, dst0RG, dst1RG); |
| 299 ILVEV_B2_UB(dst2R, dst2G, dst3R, dst3G, dst2RG, dst3RG); |
| 300 ILVEV_B2_UB(dst0B, dst0A, dst1B, dst1A, dst0BA, dst1BA); |
| 301 ILVEV_B2_UB(dst2B, dst2A, dst3B, dst3A, dst2BA, dst3BA); |
| 302 ILVEV_H2_UB(dst0RG, dst0BA, dst1RG, dst1BA, out0, out1); |
| 303 ILVEV_H2_UB(dst2RG, dst2BA, dst3RG, dst3BA, out2, out3); |
| 304 ST_UB4(out0, out1, out2, out3, destination, 16); |
| 305 } |
| 306 |
| 307 if (pixelsPerRow & 15) { |
| 308 if (pixelsPerRow & 8) { |
| 309 LD_UB2(source, 16, src0, src1); |
| 310 CEQI_B2_UB(src0, src1, 0, src0A, src1A); |
| 311 src0A = __msa_bmnz_v(src0, alphaMask, src0A); |
| 312 src1A = __msa_bmnz_v(src1, alphaMask, src1A); |
| 313 AND_V2_UB(src0A, src1A, alphaMask, src0A, src1A); |
| 314 src0A = SLDI_UB(src0A, src0A, 3); |
| 315 src1A = SLDI_UB(src1A, src1A, 3); |
| 316 FFINTU_W2_SP(src0A, src1A, fsrc0A, fsrc1A); |
| 317 DIV2(vfCnst255, fsrc0A, vfCnst255, fsrc1A, fsrc0A, fsrc1A); |
| 318 AND_V2_UB(src0, src1, vCnst255, src0R, src1R); |
| 319 FFINTU_W2_SP(src0R, src1R, fsrc0R, fsrc1R); |
| 320 MUL2(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fsrc0R, fsrc1R); |
| 321 src0G = SLDI_UB(src0, src0, 1); |
| 322 src1G = SLDI_UB(src1, src1, 1); |
| 323 AND_V2_UB(src0G, src1G, vCnst255, src0G, src1G); |
| 324 FFINTU_W2_SP(src0G, src1G, fsrc0G, fsrc1G); |
| 325 MUL2(fsrc0G, fsrc0A, fsrc1G, fsrc1A, fsrc0G, fsrc1G); |
| 326 src0B = SLDI_UB(src0, src0, 2); |
| 327 src1B = SLDI_UB(src1, src1, 2); |
| 328 AND_V2_UB(src0B, src1B, vCnst255, src0B, src1B); |
| 329 FFINTU_W2_SP(src0B, src1B, fsrc0B, fsrc1B); |
| 330 MUL2(fsrc0B, fsrc0A, fsrc1B, fsrc1A, fsrc0B, fsrc1B); |
| 331 FTRUNCU_W2_UB(fsrc0R, fsrc1R, dst0R, dst1R); |
| 332 FTRUNCU_W2_UB(fsrc0G, fsrc1G, dst0G, dst1G); |
| 333 FTRUNCU_W2_UB(fsrc0B, fsrc1B, dst0B, dst1B); |
| 334 dst0A = SLDI_UB(src0, src0, 3); |
| 335 dst1A = SLDI_UB(src1, src1, 3); |
| 336 ILVEV_B2_UB(dst0R, dst0G, dst1R, dst1G, dst0RG, dst1RG); |
| 337 ILVEV_B2_UB(dst0B, dst0A, dst1B, dst1A, dst0BA, dst1BA); |
| 338 ILVEV_H2_UB(dst0RG, dst0BA, dst1RG, dst1BA, out0, out1); |
| 339 ST_UB2(out0, out1, destination, 16); |
| 340 } |
| 341 |
| 342 if (pixelsPerRow & 4) { |
| 343 src0 = LD_UB(source); |
| 344 source += 16; |
| 345 src0A = CEQI_B(src0, 0); |
| 346 src0A = __msa_bmnz_v(src0, alphaMask, src0A); |
| 347 src0A = src0A & alphaMask; |
| 348 src0A = SLDI_UB(src0A, src0A, 3); |
| 349 fsrc0A = __msa_ffint_u_w((v4u32)src0A); |
| 350 fsrc0A = vfCnst255 / fsrc0A; |
| 351 src0R = src0 & (v16u8)vCnst255; |
| 352 fsrc0R = __msa_ffint_u_w((v4u32)src0R); |
| 353 fsrc0R *= fsrc0A; |
| 354 src0G = SLDI_UB(src0, src0, 1); |
| 355 src0G &= (v16u8)vCnst255; |
| 356 fsrc0G = __msa_ffint_u_w((v4u32)src0G); |
| 357 fsrc0G *= fsrc0A; |
| 358 src0B = SLDI_UB(src0, src0, 2); |
| 359 src0B &= (v16u8)vCnst255; |
| 360 fsrc0B = __msa_ffint_u_w((v4u32)src0B); |
| 361 fsrc0B *= fsrc0A; |
| 362 dst0R = (v16u8)__msa_ftrunc_u_w(fsrc0R); |
| 363 dst0G = (v16u8)__msa_ftrunc_u_w(fsrc0G); |
| 364 dst0B = (v16u8)__msa_ftrunc_u_w(fsrc0B); |
| 365 dst0A = SLDI_UB(src0, src0, 3); |
| 366 dst0RG = (v16u8)__msa_ilvev_b((v16i8)dst0G, (v16i8)dst0R); |
| 367 dst0BA = (v16u8)__msa_ilvev_b((v16i8)dst0A, (v16i8)dst0B); |
| 368 out0 = (v16u8)__msa_ilvev_h((v8i16)dst0BA, (v8i16)dst0RG); |
| 369 ST_UB(out0, destination); |
| 370 destination += 16; |
| 371 } |
| 372 } |
| 373 |
| 374 pixelsPerRow &= 3; |
| 375 } |
| 376 |
| 377 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort5551MSA(const uint8_t*& sourc
e, uint16_t*& destination, unsigned& pixelsPerRow) |
| 378 { |
| 379 unsigned i; |
| 380 v16u8 src0, src1, src2, src3, src4, src5, src6, src7; |
| 381 v16u8 src0r, src0b, src1r, src1b, src2r, src2b, src3r, src3b; |
| 382 v16u8 src0g = { 0 }, src0a = { 0 }, src1g = { 0 }, src1a = { 0 }; |
| 383 v16u8 src2g = { 0 }, src2a = { 0 }, src3g = { 0 }, src3a = { 0 }; |
| 384 v16u8 src0gt, src1gt, src2gt, src3gt; |
| 385 v8u16 dst0, dst1, dst2, dst3; |
| 386 |
| 387 for (i = (pixelsPerRow >> 5); i--;) { |
| 388 LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src7); |
| 389 PCKEV_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0r, src1r
, src2r, src3r); |
| 390 PCKOD_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0b, src1b
, src2b, src3b); |
| 391 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1); |
| 392 SLDI_B2_UB(src2g, src3g, src2r, src3r, src2g, src3g, 1); |
| 393 SLDI_B2_UB(src0a, src1a, src0b, src1b, src0a, src1a, 1); |
| 394 SLDI_B2_UB(src2a, src3a, src2b, src3b, src2a, src3a, 1); |
| 395 src0gt = (v16u8)SLLI_B(src0g, 3); |
| 396 src1gt = (v16u8)SLLI_B(src1g, 3); |
| 397 src2gt = (v16u8)SLLI_B(src2g, 3); |
| 398 src3gt = (v16u8)SLLI_B(src3g, 3); |
| 399 SRLI_B4_UB(src0g, src1g, src2g, src3g, 5); |
| 400 SRLI_B4_UB(src0b, src1b, src2b, src3b, 2); |
| 401 SRLI_B4_UB(src0a, src1a, src2a, src3a, 7); |
| 402 BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2); |
| 403 BINSRI_B2_UB(src2r, src2g, src3r, src3g, src2r, src3r, 2); |
| 404 BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 5); |
| 405 BINSRI_B2_UB(src2gt, src2b, src3gt, src3b, src2b, src3b, 5); |
| 406 BINSRI_B2_UB(src0b, src0a, src1b, src1a, src0b, src1b, 0); |
| 407 BINSRI_B2_UB(src2b, src2a, src3b, src3a, src2b, src3b, 0); |
| 408 ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1); |
| 409 ILVEV_B2_UH(src2b, src2r, src3b, src3r, dst2, dst3); |
| 410 ST_UH4(dst0, dst1, dst2, dst3, destination, 8); |
| 411 } |
| 412 |
| 413 if (pixelsPerRow & 31) { |
| 414 if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) { |
| 415 LD_UB6(source, 16, src0, src1, src2, src3, src4, src5); |
| 416 PCKEV_H3_UB(src1, src0, src3, src2, src5, src4, src0r, src1r, src2r)
; |
| 417 PCKOD_H3_UB(src1, src0, src3, src2, src5, src4, src0b, src1b, src2b)
; |
| 418 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1); |
| 419 SLDI_B2_UB(src2g, src0a, src2r, src0b, src2g, src0a, 1); |
| 420 SLDI_B2_UB(src1a, src2a, src1b, src2b, src1a, src2a, 1); |
| 421 src0gt = (v16u8)SLLI_B(src0g, 3); |
| 422 src1gt = (v16u8)SLLI_B(src1g, 3); |
| 423 src2gt = (v16u8)SLLI_B(src2g, 3); |
| 424 SRLI_B3_UB(src0g, src1g, src2g, 5); |
| 425 SRLI_B3_UB(src0b, src1b, src2b, 2); |
| 426 SRLI_B3_UB(src0a, src1a, src2a, 7); |
| 427 BINSRI_B3_UB(src0r, src0g, src1r, src1g, src2r, src2g, src0r, src1r,
src2r, 2); |
| 428 BINSRI_B3_UB(src0gt, src0b, src1gt, src1b, src2gt, src2b, src0b, src
1b, src2b, 5); |
| 429 BINSRI_B3_UB(src0b, src0a, src1b, src1a, src2b, src2a, src0b, src1b,
src2b, 0); |
| 430 ILVEV_B3_UH(src0b, src0r, src1b, src1r, src2b, src2r, dst0, dst1, ds
t2); |
| 431 ST_UH3(dst0, dst1, dst2, destination, 8); |
| 432 } else if (pixelsPerRow & 16) { |
| 433 LD_UB4(source, 16, src0, src1, src2, src3); |
| 434 PCKEV_H2_UB(src1, src0, src3, src2, src0r, src1r); |
| 435 PCKOD_H2_UB(src1, src0, src3, src2, src0b, src1b); |
| 436 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1); |
| 437 SLDI_B2_UB(src0a, src1a, src0b, src1b, src0a, src1a, 1); |
| 438 src0gt = (v16u8)SLLI_B(src0g, 3); |
| 439 src1gt = (v16u8)SLLI_B(src1g, 3); |
| 440 SRLI_B2_UB(src0g, src1g, 5); |
| 441 SRLI_B2_UB(src0b, src1b, 2); |
| 442 SRLI_B2_UB(src0a, src1a, 7); |
| 443 BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2); |
| 444 BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 5); |
| 445 BINSRI_B2_UB(src0b, src0a, src1b, src1a, src0b, src1b, 0); |
| 446 ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1); |
| 447 ST_UH2(dst0, dst1, destination, 8); |
| 448 } else if (pixelsPerRow & 8) { |
| 449 LD_UB2(source, 16, src0, src1); |
| 450 src0r = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0); |
| 451 src0b = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0); |
| 452 SLDI_B2_UB(src0g, src0a, src0r, src0b, src0g, src0a, 1); |
| 453 src0gt = (v16u8)SLLI_B(src0g, 3); |
| 454 src0g = (v16u8)SRLI_B(src0g, 5); |
| 455 src0b = (v16u8)SRLI_B(src0b, 2); |
| 456 src0a = (v16u8)SRLI_B(src0a, 7); |
| 457 src0r = (v16u8)__msa_binsri_b((v16u8)src0r, (v16u8)src0g, 2); |
| 458 src0b = (v16u8)__msa_binsri_b((v16u8)src0gt, (v16u8)src0b, 5); |
| 459 src0b = (v16u8)__msa_binsri_b((v16u8)src0b, (v16u8)src0a, 0); |
| 460 dst0 = (v8u16)__msa_ilvev_b((v16i8)src0r, (v16i8)src0b); |
| 461 ST_UH(dst0, destination); |
| 462 destination += 8; |
| 463 } |
| 464 } |
| 465 |
| 466 pixelsPerRow &= 7; |
| 467 } |
| 468 |
| 469 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort565MSA(const uint8_t*& source
, uint16_t*& destination, unsigned& pixelsPerRow) |
| 470 { |
| 471 unsigned i; |
| 472 v16u8 src0, src1, src2, src3, src4, src5, src6, src7; |
| 473 v16u8 src0r, src0b, src1r, src1b, src2r, src2b, src3r, src3b; |
| 474 v16u8 src0g = { 0 }, src1g = { 0 }, src2g = { 0 }, src3g = { 0 }; |
| 475 v16u8 src0gt, src1gt, src2gt, src3gt; |
| 476 v8u16 dst0, dst1, dst2, dst3; |
| 477 |
| 478 for (i = (pixelsPerRow >> 6); i--;) { |
| 479 LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src7); |
| 480 PCKEV_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0r, src1r
, src2r, src3r); |
| 481 PCKOD_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0b, src1b
, src2b, src3b); |
| 482 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1); |
| 483 SLDI_B2_UB(src2g, src3g, src2r, src3r, src2g, src3g, 1); |
| 484 src0gt = (v16u8)SLLI_B(src0g, 3); |
| 485 src1gt = (v16u8)SLLI_B(src1g, 3); |
| 486 src2gt = (v16u8)SLLI_B(src2g, 3); |
| 487 src3gt = (v16u8)SLLI_B(src3g, 3); |
| 488 SRLI_B4_UB(src0g, src1g, src2g, src3g, 5); |
| 489 SRLI_B4_UB(src0b, src1b, src2b, src3b, 3); |
| 490 BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2); |
| 491 BINSRI_B2_UB(src2r, src2g, src3r, src3g, src2r, src3r, 2); |
| 492 BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4); |
| 493 BINSRI_B2_UB(src2gt, src2b, src3gt, src3b, src2b, src3b, 4); |
| 494 ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1); |
| 495 ILVEV_B2_UH(src2b, src2r, src3b, src3r, dst2, dst3); |
| 496 LD_UB4(source, 16, src0, src1, src2, src3); |
| 497 ST_UH4(dst0, dst1, dst2, dst3, destination, 8); |
| 498 LD_UB4(source, 16, src4, src5, src6, src7); |
| 499 PCKEV_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0r, src1r
, src2r, src3r); |
| 500 PCKOD_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0b, src1b
, src2b, src3b); |
| 501 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1); |
| 502 SLDI_B2_UB(src2g, src3g, src2r, src3r, src2g, src3g, 1); |
| 503 src0gt = (v16u8)SLLI_B(src0g, 3); |
| 504 src1gt = (v16u8)SLLI_B(src1g, 3); |
| 505 src2gt = (v16u8)SLLI_B(src2g, 3); |
| 506 src3gt = (v16u8)SLLI_B(src3g, 3); |
| 507 SRLI_B4_UB(src0g, src1g, src2g, src3g, 5); |
| 508 SRLI_B4_UB(src0b, src1b, src2b, src3b, 3); |
| 509 BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2); |
| 510 BINSRI_B2_UB(src2r, src2g, src3r, src3g, src2r, src3r, 2); |
| 511 BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4); |
| 512 BINSRI_B2_UB(src2gt, src2b, src3gt, src3b, src2b, src3b, 4); |
| 513 ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1); |
| 514 ILVEV_B2_UH(src2b, src2r, src3b, src3r, dst2, dst3); |
| 515 ST_UH4(dst0, dst1, dst2, dst3, destination, 8); |
| 516 } |
| 517 |
| 518 if (pixelsPerRow & 63) { |
| 519 if (pixelsPerRow & 32) { |
| 520 if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) { |
| 521 LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src
7); |
| 522 PCKEV_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0
r, src1r, src2r, src3r); |
| 523 PCKOD_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0
b, src1b, src2b, src3b); |
| 524 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1); |
| 525 SLDI_B2_UB(src2g, src3g, src2r, src3r, src2g, src3g, 1); |
| 526 src0gt = (v16u8)SLLI_B(src0g, 3); |
| 527 src1gt = (v16u8)SLLI_B(src1g, 3); |
| 528 src2gt = (v16u8)SLLI_B(src2g, 3); |
| 529 src3gt = (v16u8)SLLI_B(src3g, 3); |
| 530 SRLI_B4_UB(src0g, src1g, src2g, src3g, 5); |
| 531 SRLI_B4_UB(src0b, src1b, src2b, src3b, 3); |
| 532 BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2); |
| 533 BINSRI_B2_UB(src2r, src2g, src3r, src3g, src2r, src3r, 2); |
| 534 BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4); |
| 535 BINSRI_B2_UB(src2gt, src2b, src3gt, src3b, src2b, src3b, 4); |
| 536 ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1); |
| 537 ILVEV_B2_UH(src2b, src2r, src3b, src3r, dst2, dst3); |
| 538 LD_UB6(source, 16, src0, src1, src2, src3, src4, src5); |
| 539 ST_UH4(dst0, dst1, dst2, dst3, destination, 8); |
| 540 PCKEV_H3_UB(src1, src0, src3, src2, src5, src4, src0r, src1r, sr
c2r); |
| 541 PCKOD_H3_UB(src1, src0, src3, src2, src5, src4, src0b, src1b, sr
c2b); |
| 542 src0g = SLDI_UB(src0g, src0r, 1); |
| 543 src1g = SLDI_UB(src1g, src1r, 1); |
| 544 src2g = SLDI_UB(src2g, src2r, 1); |
| 545 src0gt = (v16u8)SLLI_B(src0g, 3); |
| 546 src1gt = (v16u8)SLLI_B(src1g, 3); |
| 547 src2gt = (v16u8)SLLI_B(src2g, 3); |
| 548 SRLI_B3_UB(src0g, src1g, src2g, 5); |
| 549 SRLI_B3_UB(src0b, src1b, src2b, 3); |
| 550 BINSRI_B3_UB(src0r, src0g, src1r, src1g, src2r, src2g, src0r, sr
c1r, src2r, 2); |
| 551 BINSRI_B3_UB(src0gt, src0b, src1gt, src1b, src2gt, src2b, src0b,
src1b, src2b, 4); |
| 552 ILVEV_B3_UH(src0b, src0r, src1b, src1r, src2b, src2r, dst0, dst1
, dst2); |
| 553 ST_UH3(dst0, dst1, dst2, destination, 8); |
| 554 } else if (pixelsPerRow & 16) { |
| 555 LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src
7); |
| 556 PCKEV_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0
r, src1r, src2r, src3r); |
| 557 PCKOD_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0
b, src1b, src2b, src3b); |
| 558 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1); |
| 559 SLDI_B2_UB(src2g, src3g, src2r, src3r, src2g, src3g, 1); |
| 560 src0gt = (v16u8)SLLI_B(src0g, 3); |
| 561 src1gt = (v16u8)SLLI_B(src1g, 3); |
| 562 src2gt = (v16u8)SLLI_B(src2g, 3); |
| 563 src3gt = (v16u8)SLLI_B(src3g, 3); |
| 564 SRLI_B4_UB(src0g, src1g, src2g, src3g, 5); |
| 565 SRLI_B4_UB(src0b, src1b, src2b, src3b, 3); |
| 566 BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2); |
| 567 BINSRI_B2_UB(src2r, src2g, src3r, src3g, src2r, src3r, 2); |
| 568 BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4); |
| 569 BINSRI_B2_UB(src2gt, src2b, src3gt, src3b, src2b, src3b, 4); |
| 570 ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1); |
| 571 ILVEV_B2_UH(src2b, src2r, src3b, src3r, dst2, dst3); |
| 572 LD_UB4(source, 16, src0, src1, src2, src3); |
| 573 ST_UH4(dst0, dst1, dst2, dst3, destination, 8); |
| 574 PCKEV_H2_UB(src1, src0, src3, src2, src0r, src1r); |
| 575 PCKOD_H2_UB(src1, src0, src3, src2, src0b, src1b); |
| 576 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1); |
| 577 src0gt = (v16u8)SLLI_B(src0g, 3); |
| 578 src1gt = (v16u8)SLLI_B(src1g, 3); |
| 579 SRLI_B2_UB(src0g, src1g, 5); |
| 580 SRLI_B2_UB(src0b, src1b, 3); |
| 581 BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2); |
| 582 BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4); |
| 583 ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1); |
| 584 ST_UH2(dst0, dst1, destination, 8); |
| 585 } else if (pixelsPerRow & 8) { |
| 586 LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src
7); |
| 587 PCKEV_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0
r, src1r, src2r, src3r); |
| 588 PCKOD_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0
b, src1b, src2b, src3b); |
| 589 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1); |
| 590 SLDI_B2_UB(src2g, src3g, src2r, src3r, src2g, src3g, 1); |
| 591 src0gt = (v16u8)SLLI_B(src0g, 3); |
| 592 src1gt = (v16u8)SLLI_B(src1g, 3); |
| 593 src2gt = (v16u8)SLLI_B(src2g, 3); |
| 594 src3gt = (v16u8)SLLI_B(src3g, 3); |
| 595 SRLI_B4_UB(src0g, src1g, src2g, src3g, 5); |
| 596 SRLI_B4_UB(src0b, src1b, src2b, src3b, 3); |
| 597 BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2); |
| 598 BINSRI_B2_UB(src2r, src2g, src3r, src3g, src2r, src3r, 2); |
| 599 BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4); |
| 600 BINSRI_B2_UB(src2gt, src2b, src3gt, src3b, src2b, src3b, 4); |
| 601 ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1); |
| 602 ILVEV_B2_UH(src2b, src2r, src3b, src3r, dst2, dst3); |
| 603 LD_UB2(source, 16, src0, src1); |
| 604 ST_UH4(dst0, dst1, dst2, dst3, destination, 8); |
| 605 src0r = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0); |
| 606 src0b = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0); |
| 607 src0g = SLDI_UB(src0g, src0r, 1); |
| 608 src0gt = (v16u8)SLLI_B(src0g, 3); |
| 609 src0g = (v16u8)SRLI_B(src0g, 5); |
| 610 src0b = (v16u8)SRLI_B(src0b, 3); |
| 611 src0r = (v16u8)__msa_binsri_b((v16u8)src0r, (v16u8)src0g, 2); |
| 612 src0b = (v16u8)__msa_binsri_b((v16u8)src0gt, (v16u8)src0b, 4); |
| 613 dst0 = (v8u16)__msa_ilvev_b((v16i8)src0r, (v16i8)src0b); |
| 614 ST_UH(dst0, destination); |
| 615 destination += 8; |
| 616 } else { |
| 617 LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src
7); |
| 618 PCKEV_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0
r, src1r, src2r, src3r); |
| 619 PCKOD_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0
b, src1b, src2b, src3b); |
| 620 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1); |
| 621 SLDI_B2_UB(src2g, src3g, src2r, src3r, src2g, src3g, 1); |
| 622 src0gt = (v16u8)SLLI_B(src0g, 3); |
| 623 src1gt = (v16u8)SLLI_B(src1g, 3); |
| 624 src2gt = (v16u8)SLLI_B(src2g, 3); |
| 625 src3gt = (v16u8)SLLI_B(src3g, 3); |
| 626 SRLI_B4_UB(src0g, src1g, src2g, src3g, 5); |
| 627 SRLI_B4_UB(src0b, src1b, src2b, src3b, 3); |
| 628 BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2); |
| 629 BINSRI_B2_UB(src2r, src2g, src3r, src3g, src2r, src3r, 2); |
| 630 BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4); |
| 631 BINSRI_B2_UB(src2gt, src2b, src3gt, src3b, src2b, src3b, 4); |
| 632 ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1); |
| 633 ILVEV_B2_UH(src2b, src2r, src3b, src3r, dst2, dst3); |
| 634 ST_UH4(dst0, dst1, dst2, dst3, destination, 8); |
| 635 } |
| 636 } else if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) { |
| 637 LD_UB6(source, 16, src0, src1, src2, src3, src4, src5); |
| 638 PCKEV_H3_UB(src1, src0, src3, src2, src5, src4, src0r, src1r, src2r)
; |
| 639 PCKOD_H3_UB(src1, src0, src3, src2, src5, src4, src0b, src1b, src2b)
; |
| 640 src0g = SLDI_UB(src0g, src0r, 1); |
| 641 src1g = SLDI_UB(src1g, src1r, 1); |
| 642 src2g = SLDI_UB(src2g, src2r, 1); |
| 643 src0gt = (v16u8)SLLI_B(src0g, 3); |
| 644 src1gt = (v16u8)SLLI_B(src1g, 3); |
| 645 src2gt = (v16u8)SLLI_B(src2g, 3); |
| 646 SRLI_B3_UB(src0g, src1g, src2g, 5); |
| 647 SRLI_B3_UB(src0b, src1b, src2b, 3); |
| 648 BINSRI_B3_UB(src0r, src0g, src1r, src1g, src2r, src2g, src0r, src1r,
src2r, 2); |
| 649 BINSRI_B3_UB(src0gt, src0b, src1gt, src1b, src2gt, src2b, src0b, src
1b, src2b, 4); |
| 650 ILVEV_B3_UH(src0b, src0r, src1b, src1r, src2b, src2r, dst0, dst1, ds
t2); |
| 651 ST_UH3(dst0, dst1, dst2, destination, 8); |
| 652 } else if (pixelsPerRow & 16) { |
| 653 LD_UB4(source, 16, src0, src1, src2, src3); |
| 654 PCKEV_H2_UB(src1, src0, src3, src2, src0r, src1r); |
| 655 PCKOD_H2_UB(src1, src0, src3, src2, src0b, src1b); |
| 656 SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1); |
| 657 src0gt = (v16u8)SLLI_B(src0g, 3); |
| 658 src1gt = (v16u8)SLLI_B(src1g, 3); |
| 659 SRLI_B2_UB(src0g, src1g, 5); |
| 660 SRLI_B2_UB(src0b, src1b, 3); |
| 661 BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2); |
| 662 BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4); |
| 663 ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1); |
| 664 ST_UH2(dst0, dst1, destination, 8); |
| 665 } else if (pixelsPerRow & 8) { |
| 666 LD_UB2(source, 16, src0, src1); |
| 667 src0r = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0); |
| 668 src0b = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0); |
| 669 src0g = SLDI_UB(src0g, src0r, 1); |
| 670 src0gt = (v16u8)SLLI_B(src0g, 3); |
| 671 src0g = (v16u8)SRLI_B(src0g, 5); |
| 672 src0b = (v16u8)SRLI_B(src0b, 3); |
| 673 src0r = (v16u8)__msa_binsri_b((v16u8)src0r, (v16u8)src0g, 2); |
| 674 src0b = (v16u8)__msa_binsri_b((v16u8)src0gt, (v16u8)src0b, 4); |
| 675 dst0 = (v8u16)__msa_ilvev_b((v16i8)src0r, (v16i8)src0b); |
| 676 ST_UH(dst0, destination); |
| 677 destination += 8; |
| 678 } |
| 679 } |
| 680 |
| 681 pixelsPerRow &= 7; |
| 682 } |
| 102 } // namespace SIMD | 683 } // namespace SIMD |
| 103 | 684 |
| 104 } // namespace blink | 685 } // namespace blink |
| 105 | 686 |
| 106 #endif // HAVE(MIPS_MSA_INTRINSICS) | 687 #endif // HAVE(MIPS_MSA_INTRINSICS) |
| 107 | 688 |
| 108 #endif // WebGLImageConversionMSA_h | 689 #endif // WebGLImageConversionMSA_h |
| OLD | NEW |