Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(74)

Unified Diff: third_party/WebKit/Source/platform/graphics/cpu/mips/WebGLImageConversionMSA.h

Issue 2304183002: Add MSA (MIPS SIMD Arch) optimized WebGL image conversion functions (Closed)
Patch Set: Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/WebKit/Source/platform/graphics/cpu/mips/WebGLImageConversionMSA.h
diff --git a/third_party/WebKit/Source/platform/graphics/cpu/mips/WebGLImageConversionMSA.h b/third_party/WebKit/Source/platform/graphics/cpu/mips/WebGLImageConversionMSA.h
index 0019526a7e93184c3a3523716b27a349918b4f56..eb93aca7f98766be6da8768e8fced1e53182d64f 100644
--- a/third_party/WebKit/Source/platform/graphics/cpu/mips/WebGLImageConversionMSA.h
+++ b/third_party/WebKit/Source/platform/graphics/cpu/mips/WebGLImageConversionMSA.h
@@ -26,6 +26,12 @@ namespace SIMD {
out_b = ((v8u16)SLLI_H(out_b, 3)) | (out_b & cnst7); \
out_a = (v8u16)CEQI_H((v8i16)out_a, 1); \
+#define SEPERATE_RGBA_FRM_16BIT_4444INPUT(in, out_rb, out_ga) \
+ out_rb = (v16u8)SRLI_B((v16u8)in, 4); \
+ out_ga = ANDI_B((v16u8)in, 15); \
+ out_rb = ((v16u8)SLLI_B(out_rb, 4)) | out_rb; \
+ out_ga = ((v16u8)SLLI_B(out_ga, 4)) | out_ga; \
+
ALWAYS_INLINE void unpackOneRowOfRGBA5551ToRGBA8MSA(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
{
unsigned i;
@@ -99,6 +105,581 @@ ALWAYS_INLINE void unpackOneRowOfRGBA5551ToRGBA8MSA(const uint16_t*& source, uin
pixelsPerRow &= 7;
}
+ALWAYS_INLINE void unpackOneRowOfBGRA8LittleToRGBA8MSA(const uint32_t*& source, uint32_t*& destination, unsigned& pixelsPerRow)
+{
+ unsigned i;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+ v16u8 src8, src9, src10, src11, src12, src13, src14, src15;
+
+ for (i = (pixelsPerRow >> 6); i--;) {
+ LD_UB8(source, 4, src0, src1, src2, src3, src4, src5, src6, src7);
+ LD_UB8(source, 4, src8, src9, src10, src11, src12, src13, src14, src15);
+ SHF_B4_UB(src0, src1, src2, src3, 198);
+ SHF_B4_UB(src4, src5, src6, src7, 198);
+ SHF_B4_UB(src8, src9, src10, src11, 198);
+ SHF_B4_UB(src12, src13, src14, src15, 198);
+ ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, destination, 4);
+ ST_UB8(src8, src9, src10, src11, src12, src13, src14, src15, destination, 4);
+ }
+
+ if (pixelsPerRow & 63) {
+ if (pixelsPerRow & 32) {
+ if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) {
+ LD_UB8(source, 4, src0, src1, src2, src3, src4, src5, src6, src7);
+ LD_UB6(source, 4, src8, src9, src10, src11, src12, src13);
+ SHF_B4_UB(src0, src1, src2, src3, 198);
+ SHF_B4_UB(src4, src5, src6, src7, 198);
+ SHF_B4_UB(src8, src9, src10, src11, 198);
+ SHF_B2_UB(src12, src13, 198);
+ ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, destination, 4);
+ ST_UB6(src8, src9, src10, src11, src12, src13, destination, 4);
+ } else if (pixelsPerRow & 16) {
+ LD_UB8(source, 4, src0, src1, src2, src3, src4, src5, src6, src7);
+ LD_UB4(source, 4, src8, src9, src10, src11);
+ SHF_B4_UB(src0, src1, src2, src3, 198);
+ SHF_B4_UB(src4, src5, src6, src7, 198);
+ SHF_B4_UB(src8, src9, src10, src11, 198);
+ ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, destination, 4);
+ ST_UB4(src8, src9, src10, src11, destination, 4);
+ } else if (pixelsPerRow & 8) {
+ LD_UB8(source, 4, src0, src1, src2, src3, src4, src5, src6, src7);
+ LD_UB2(source, 4, src8, src9);
+ SHF_B4_UB(src0, src1, src2, src3, 198);
+ SHF_B4_UB(src4, src5, src6, src7, 198);
+ SHF_B2_UB(src8, src9, 198);
+ ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, destination, 4);
+ ST_UB2(src8, src9, destination, 4);
+ } else {
+ LD_UB8(source, 4, src0, src1, src2, src3, src4, src5, src6, src7);
+ SHF_B4_UB(src0, src1, src2, src3, 198);
+ SHF_B4_UB(src4, src5, src6, src7, 198);
+ ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, destination, 4);
+ }
+ } else if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) {
+ LD_UB6(source, 4, src0, src1, src2, src3, src4, src5);
+ SHF_B4_UB(src0, src1, src2, src3, 198);
+ SHF_B2_UB(src4, src5, 198);
+ ST_UB6(src0, src1, src2, src3, src4, src5, destination, 4);
+ } else if (pixelsPerRow & 16) {
+ LD_UB4(source, 4, src0, src1, src2, src3);
+ SHF_B4_UB(src0, src1, src2, src3, 198);
+ ST_UB4(src0, src1, src2, src3, destination, 4);
+ } else if (pixelsPerRow & 8) {
+ LD_UB2(source, 4, src0, src1);
+ SHF_B2_UB(src0, src1, 198);
+ ST_UB2(src0, src1, destination, 4);
+ }
+
+ if (pixelsPerRow & 4) {
+ src0 = LD_UB(source);
+ source += 4;
+ src0 = (v16u8)__msa_shf_b((v16i8)src0, 198);
+ ST_UB(src0, destination);
+ destination += 4;
+ }
+ }
+
+ pixelsPerRow &= 3;
+}
+
+ALWAYS_INLINE void unpackOneRowOfRGBA4444ToRGBA8MSA(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
+{
+ unsigned i;
+ v8u16 src0, src1, src2, src3;
+ v16u8 src0rb, src0ga, src1rb, src1ga, src2rb, src2ga, src3rb, src3ga;
+ v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+ v16u8 out0, out1, out2, out3, out4, out5, out6, out7;
+
+ for (i = (pixelsPerRow >> 5); i--;) {
+ LD_UH4(source, 8, src0, src1, src2, src3);
+ SEPERATE_RGBA_FRM_16BIT_4444INPUT(src0, src0rb, src0ga);
+ SEPERATE_RGBA_FRM_16BIT_4444INPUT(src1, src1rb, src1ga);
+ SEPERATE_RGBA_FRM_16BIT_4444INPUT(src2, src2rb, src2ga);
+ SEPERATE_RGBA_FRM_16BIT_4444INPUT(src3, src3rb, src3ga);
+ ILVODEV_B2_UB(src0ga, src0rb, dst0, dst1);
+ ILVODEV_B2_UB(src1ga, src1rb, dst2, dst3);
+ ILVODEV_B2_UB(src2ga, src2rb, dst4, dst5);
+ ILVODEV_B2_UB(src3ga, src3rb, dst6, dst7);
+ ILVRL_H2_UB(dst1, dst0, out0, out1);
+ ILVRL_H2_UB(dst3, dst2, out2, out3);
+ ILVRL_H2_UB(dst5, dst4, out4, out5);
+ ILVRL_H2_UB(dst7, dst6, out6, out7);
+ ST_UB8(out0, out1, out2, out3, out4, out5, out6, out7, destination, 16);
+ }
+
+ if (pixelsPerRow & 31) {
+ if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) {
+ LD_UH3(source, 8, src0, src1, src2);
+ SEPERATE_RGBA_FRM_16BIT_4444INPUT(src0, src0rb, src0ga);
+ SEPERATE_RGBA_FRM_16BIT_4444INPUT(src1, src1rb, src1ga);
+ SEPERATE_RGBA_FRM_16BIT_4444INPUT(src2, src2rb, src2ga);
+ ILVODEV_B2_UB(src0ga, src0rb, dst0, dst1);
+ ILVODEV_B2_UB(src1ga, src1rb, dst2, dst3);
+ ILVODEV_B2_UB(src2ga, src2rb, dst4, dst5);
+ ILVRL_H2_UB(dst1, dst0, out0, out1);
+ ILVRL_H2_UB(dst3, dst2, out2, out3);
+ ILVRL_H2_UB(dst5, dst4, out4, out5);
+ ST_UB6(out0, out1, out2, out3, out4, out5, destination, 16);
+ } else if (pixelsPerRow & 16) {
+ LD_UH2(source, 8, src0, src1);
+ SEPERATE_RGBA_FRM_16BIT_4444INPUT(src0, src0rb, src0ga);
+ SEPERATE_RGBA_FRM_16BIT_4444INPUT(src1, src1rb, src1ga);
+ ILVODEV_B2_UB(src0ga, src0rb, dst0, dst1);
+ ILVODEV_B2_UB(src1ga, src1rb, dst2, dst3);
+ ILVRL_H2_UB(dst1, dst0, out0, out1);
+ ILVRL_H2_UB(dst3, dst2, out2, out3);
+ ST_UB4(out0, out1, out2, out3, destination, 16);
+ } else if (pixelsPerRow & 8) {
+ src0 = LD_UH(source);
+ source += 8;
+ SEPERATE_RGBA_FRM_16BIT_4444INPUT(src0, src0rb, src0ga);
+ ILVODEV_B2_UB(src0ga, src0rb, dst0, dst1);
+ ILVRL_H2_UB(dst1, dst0, out0, out1);
+ ST_UB2(out0, out1, destination, 16);
+ }
+ }
+
+ pixelsPerRow &= 7;
+}
+
+ALWAYS_INLINE void packOneRowOfRGBA8LittleToRGBA8MSA(const uint8_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
+{
+ unsigned i;
+ v16u8 src0, src1, src2, src3, out0, out1, out2, out3;
+ v16u8 src0R, src1R, src2R, src3R, src0G, src1G, src2G, src3G;
+ v16u8 src0B, src1B, src2B, src3B, src0A, src1A, src2A, src3A;
+ v16u8 dst0R, dst1R, dst2R, dst3R, dst0G, dst1G, dst2G, dst3G;
+ v16u8 dst0B, dst1B, dst2B, dst3B, dst0A, dst1A, dst2A, dst3A;
+ v16u8 dst0RG, dst1RG, dst2RG, dst3RG, dst0BA, dst1BA, dst2BA, dst3BA;
+ v4f32 fsrc0R, fsrc1R, fsrc2R, fsrc3R, fsrc0G, fsrc1G, fsrc2G, fsrc3G;
+ v4f32 fsrc0B, fsrc1B, fsrc2B, fsrc3B, fsrc0A, fsrc1A, fsrc2A, fsrc3A;
+ v4u32 vCnst255 = (v4u32) __msa_ldi_w(255);
+ v16u8 alphaMask = {0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255};
+ v4f32 vfCnst255 = __msa_ffint_u_w(vCnst255);
+
+ for (i = (pixelsPerRow >> 4); i--;) {
+ LD_UB4(source, 16, src0, src1, src2, src3);
+ CEQI_B4_UB(src0, src1, src2, src3, 0, src0A, src1A, src2A, src3A);
+ src0A = __msa_bmnz_v(src0, alphaMask, src0A);
+ src1A = __msa_bmnz_v(src1, alphaMask, src1A);
+ src2A = __msa_bmnz_v(src2, alphaMask, src2A);
+ src3A = __msa_bmnz_v(src3, alphaMask, src3A);
+ AND_V4_UB(src0A, src1A, src2A, src3A, alphaMask, src0A, src1A, src2A, src3A);
+ src0A = SLDI_UB(src0A, src0A, 3);
+ src1A = SLDI_UB(src1A, src1A, 3);
+ src2A = SLDI_UB(src2A, src2A, 3);
+ src3A = SLDI_UB(src3A, src3A, 3);
+ FFINTU_W4_SP(src0A, src1A, src2A, src3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A);
+ DIV4(vfCnst255, fsrc0A, vfCnst255, fsrc1A, vfCnst255, fsrc2A, vfCnst255, fsrc3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A);
+ AND_V4_UB(src0, src1, src2, src3, vCnst255, src0R, src1R, src2R, src3R);
+ FFINTU_W4_SP(src0R, src1R, src2R, src3R, fsrc0R, fsrc1R, fsrc2R, fsrc3R);
+ MUL4(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fsrc2R, fsrc2A, fsrc3R, fsrc3A, fsrc0R, fsrc1R, fsrc2R, fsrc3R);
+ src0G = SLDI_UB(src0, src0, 1);
+ src1G = SLDI_UB(src1, src1, 1);
+ src2G = SLDI_UB(src2, src2, 1);
+ src3G = SLDI_UB(src3, src3, 1);
+ AND_V4_UB(src0G, src1G, src2G, src3G, vCnst255, src0G, src1G, src2G, src3G);
+ FFINTU_W4_SP(src0G, src1G, src2G, src3G, fsrc0G, fsrc1G, fsrc2G, fsrc3G);
+ MUL4(fsrc0G, fsrc0A, fsrc1G, fsrc1A, fsrc2G, fsrc2A, fsrc3G, fsrc3A, fsrc0G, fsrc1G, fsrc2G, fsrc3G);
+ src0B = SLDI_UB(src0, src0, 2);
+ src1B = SLDI_UB(src1, src1, 2);
+ src2B = SLDI_UB(src2, src2, 2);
+ src3B = SLDI_UB(src3, src3, 2);
+ AND_V4_UB(src0B, src1B, src2B, src3B, vCnst255, src0B, src1B, src2B, src3B);
+ FFINTU_W4_SP(src0B, src1B, src2B, src3B, fsrc0B, fsrc1B, fsrc2B, fsrc3B);
+ MUL4(fsrc0B, fsrc0A, fsrc1B, fsrc1A, fsrc2B, fsrc2A, fsrc3B, fsrc3A, fsrc0B, fsrc1B, fsrc2B, fsrc3B);
+ FTRUNCU_W4_UB(fsrc0R, fsrc1R, fsrc2R, fsrc3R, dst0R, dst1R, dst2R, dst3R);
+ FTRUNCU_W4_UB(fsrc0G, fsrc1G, fsrc2G, fsrc3G, dst0G, dst1G, dst2G, dst3G);
+ FTRUNCU_W4_UB(fsrc0B, fsrc1B, fsrc2B, fsrc3B, dst0B, dst1B, dst2B, dst3B);
+ dst0A = SLDI_UB(src0, src0, 3);
+ dst1A = SLDI_UB(src1, src1, 3);
+ dst2A = SLDI_UB(src2, src2, 3);
+ dst3A = SLDI_UB(src3, src3, 3);
+ ILVEV_B2_UB(dst0R, dst0G, dst1R, dst1G, dst0RG, dst1RG);
+ ILVEV_B2_UB(dst2R, dst2G, dst3R, dst3G, dst2RG, dst3RG);
+ ILVEV_B2_UB(dst0B, dst0A, dst1B, dst1A, dst0BA, dst1BA);
+ ILVEV_B2_UB(dst2B, dst2A, dst3B, dst3A, dst2BA, dst3BA);
+ ILVEV_H2_UB(dst0RG, dst0BA, dst1RG, dst1BA, out0, out1);
+ ILVEV_H2_UB(dst2RG, dst2BA, dst3RG, dst3BA, out2, out3);
+ ST_UB4(out0, out1, out2, out3, destination, 16);
+ }
+
+ if (pixelsPerRow & 15) {
+ if (pixelsPerRow & 8) {
+ LD_UB2(source, 16, src0, src1);
+ CEQI_B2_UB(src0, src1, 0, src0A, src1A);
+ src0A = __msa_bmnz_v(src0, alphaMask, src0A);
+ src1A = __msa_bmnz_v(src1, alphaMask, src1A);
+ AND_V2_UB(src0A, src1A, alphaMask, src0A, src1A);
+ src0A = SLDI_UB(src0A, src0A, 3);
+ src1A = SLDI_UB(src1A, src1A, 3);
+ FFINTU_W2_SP(src0A, src1A, fsrc0A, fsrc1A);
+ DIV2(vfCnst255, fsrc0A, vfCnst255, fsrc1A, fsrc0A, fsrc1A);
+ AND_V2_UB(src0, src1, vCnst255, src0R, src1R);
+ FFINTU_W2_SP(src0R, src1R, fsrc0R, fsrc1R);
+ MUL2(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fsrc0R, fsrc1R);
+ src0G = SLDI_UB(src0, src0, 1);
+ src1G = SLDI_UB(src1, src1, 1);
+ AND_V2_UB(src0G, src1G, vCnst255, src0G, src1G);
+ FFINTU_W2_SP(src0G, src1G, fsrc0G, fsrc1G);
+ MUL2(fsrc0G, fsrc0A, fsrc1G, fsrc1A, fsrc0G, fsrc1G);
+ src0B = SLDI_UB(src0, src0, 2);
+ src1B = SLDI_UB(src1, src1, 2);
+ AND_V2_UB(src0B, src1B, vCnst255, src0B, src1B);
+ FFINTU_W2_SP(src0B, src1B, fsrc0B, fsrc1B);
+ MUL2(fsrc0B, fsrc0A, fsrc1B, fsrc1A, fsrc0B, fsrc1B);
+ FTRUNCU_W2_UB(fsrc0R, fsrc1R, dst0R, dst1R);
+ FTRUNCU_W2_UB(fsrc0G, fsrc1G, dst0G, dst1G);
+ FTRUNCU_W2_UB(fsrc0B, fsrc1B, dst0B, dst1B);
+ dst0A = SLDI_UB(src0, src0, 3);
+ dst1A = SLDI_UB(src1, src1, 3);
+ ILVEV_B2_UB(dst0R, dst0G, dst1R, dst1G, dst0RG, dst1RG);
+ ILVEV_B2_UB(dst0B, dst0A, dst1B, dst1A, dst0BA, dst1BA);
+ ILVEV_H2_UB(dst0RG, dst0BA, dst1RG, dst1BA, out0, out1);
+ ST_UB2(out0, out1, destination, 16);
+ }
+
+ if (pixelsPerRow & 4) {
+ src0 = LD_UB(source);
+ source += 16;
+ src0A = CEQI_B(src0, 0);
+ src0A = __msa_bmnz_v(src0, alphaMask, src0A);
+ src0A = src0A & alphaMask;
+ src0A = SLDI_UB(src0A, src0A, 3);
+ fsrc0A = __msa_ffint_u_w((v4u32)src0A);
+ fsrc0A = vfCnst255 / fsrc0A;
+ src0R = src0 & (v16u8)vCnst255;
+ fsrc0R = __msa_ffint_u_w((v4u32)src0R);
+ fsrc0R *= fsrc0A;
+ src0G = SLDI_UB(src0, src0, 1);
+ src0G &= (v16u8)vCnst255;
+ fsrc0G = __msa_ffint_u_w((v4u32)src0G);
+ fsrc0G *= fsrc0A;
+ src0B = SLDI_UB(src0, src0, 2);
+ src0B &= (v16u8)vCnst255;
+ fsrc0B = __msa_ffint_u_w((v4u32)src0B);
+ fsrc0B *= fsrc0A;
+ dst0R = (v16u8)__msa_ftrunc_u_w(fsrc0R);
+ dst0G = (v16u8)__msa_ftrunc_u_w(fsrc0G);
+ dst0B = (v16u8)__msa_ftrunc_u_w(fsrc0B);
+ dst0A = SLDI_UB(src0, src0, 3);
+ dst0RG = (v16u8)__msa_ilvev_b((v16i8)dst0G, (v16i8)dst0R);
+ dst0BA = (v16u8)__msa_ilvev_b((v16i8)dst0A, (v16i8)dst0B);
+ out0 = (v16u8)__msa_ilvev_h((v8i16)dst0BA, (v8i16)dst0RG);
+ ST_UB(out0, destination);
+ destination += 16;
+ }
+ }
+
+ pixelsPerRow &= 3;
+}
+
+ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort5551MSA(const uint8_t*& source, uint16_t*& destination, unsigned& pixelsPerRow)
+{
+ unsigned i;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+ v16u8 src0r, src0b, src1r, src1b, src2r, src2b, src3r, src3b;
+ v16u8 src0g = { 0 }, src0a = { 0 }, src1g = { 0 }, src1a = { 0 };
+ v16u8 src2g = { 0 }, src2a = { 0 }, src3g = { 0 }, src3a = { 0 };
+ v16u8 src0gt, src1gt, src2gt, src3gt;
+ v8u16 dst0, dst1, dst2, dst3;
+
+ for (i = (pixelsPerRow >> 5); i--;) {
+ LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src7);
+ PCKEV_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0r, src1r, src2r, src3r);
+ PCKOD_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0b, src1b, src2b, src3b);
+ SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
+ SLDI_B2_UB(src2g, src3g, src2r, src3r, src2g, src3g, 1);
+ SLDI_B2_UB(src0a, src1a, src0b, src1b, src0a, src1a, 1);
+ SLDI_B2_UB(src2a, src3a, src2b, src3b, src2a, src3a, 1);
+ src0gt = (v16u8)SLLI_B(src0g, 3);
+ src1gt = (v16u8)SLLI_B(src1g, 3);
+ src2gt = (v16u8)SLLI_B(src2g, 3);
+ src3gt = (v16u8)SLLI_B(src3g, 3);
+ SRLI_B4_UB(src0g, src1g, src2g, src3g, 5);
+ SRLI_B4_UB(src0b, src1b, src2b, src3b, 2);
+ SRLI_B4_UB(src0a, src1a, src2a, src3a, 7);
+ BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2);
+ BINSRI_B2_UB(src2r, src2g, src3r, src3g, src2r, src3r, 2);
+ BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 5);
+ BINSRI_B2_UB(src2gt, src2b, src3gt, src3b, src2b, src3b, 5);
+ BINSRI_B2_UB(src0b, src0a, src1b, src1a, src0b, src1b, 0);
+ BINSRI_B2_UB(src2b, src2a, src3b, src3a, src2b, src3b, 0);
+ ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1);
+ ILVEV_B2_UH(src2b, src2r, src3b, src3r, dst2, dst3);
+ ST_UH4(dst0, dst1, dst2, dst3, destination, 8);
+ }
+
+ if (pixelsPerRow & 31) {
+ if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) {
+ LD_UB6(source, 16, src0, src1, src2, src3, src4, src5);
+ PCKEV_H3_UB(src1, src0, src3, src2, src5, src4, src0r, src1r, src2r);
+ PCKOD_H3_UB(src1, src0, src3, src2, src5, src4, src0b, src1b, src2b);
+ SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
+ SLDI_B2_UB(src2g, src0a, src2r, src0b, src2g, src0a, 1);
+ SLDI_B2_UB(src1a, src2a, src1b, src2b, src1a, src2a, 1);
+ src0gt = (v16u8)SLLI_B(src0g, 3);
+ src1gt = (v16u8)SLLI_B(src1g, 3);
+ src2gt = (v16u8)SLLI_B(src2g, 3);
+ SRLI_B3_UB(src0g, src1g, src2g, 5);
+ SRLI_B3_UB(src0b, src1b, src2b, 2);
+ SRLI_B3_UB(src0a, src1a, src2a, 7);
+ BINSRI_B3_UB(src0r, src0g, src1r, src1g, src2r, src2g, src0r, src1r, src2r, 2);
+ BINSRI_B3_UB(src0gt, src0b, src1gt, src1b, src2gt, src2b, src0b, src1b, src2b, 5);
+ BINSRI_B3_UB(src0b, src0a, src1b, src1a, src2b, src2a, src0b, src1b, src2b, 0);
+ ILVEV_B3_UH(src0b, src0r, src1b, src1r, src2b, src2r, dst0, dst1, dst2);
+ ST_UH3(dst0, dst1, dst2, destination, 8);
+ } else if (pixelsPerRow & 16) {
+ LD_UB4(source, 16, src0, src1, src2, src3);
+ PCKEV_H2_UB(src1, src0, src3, src2, src0r, src1r);
+ PCKOD_H2_UB(src1, src0, src3, src2, src0b, src1b);
+ SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
+ SLDI_B2_UB(src0a, src1a, src0b, src1b, src0a, src1a, 1);
+ src0gt = (v16u8)SLLI_B(src0g, 3);
+ src1gt = (v16u8)SLLI_B(src1g, 3);
+ SRLI_B2_UB(src0g, src1g, 5);
+ SRLI_B2_UB(src0b, src1b, 2);
+ SRLI_B2_UB(src0a, src1a, 7);
+ BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2);
+ BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 5);
+ BINSRI_B2_UB(src0b, src0a, src1b, src1a, src0b, src1b, 0);
+ ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1);
+ ST_UH2(dst0, dst1, destination, 8);
+ } else if (pixelsPerRow & 8) {
+ LD_UB2(source, 16, src0, src1);
+ src0r = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0);
+ src0b = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0);
+ SLDI_B2_UB(src0g, src0a, src0r, src0b, src0g, src0a, 1);
+ src0gt = (v16u8)SLLI_B(src0g, 3);
+ src0g = (v16u8)SRLI_B(src0g, 5);
+ src0b = (v16u8)SRLI_B(src0b, 2);
+ src0a = (v16u8)SRLI_B(src0a, 7);
+ src0r = (v16u8)__msa_binsri_b((v16u8)src0r, (v16u8)src0g, 2);
+ src0b = (v16u8)__msa_binsri_b((v16u8)src0gt, (v16u8)src0b, 5);
+ src0b = (v16u8)__msa_binsri_b((v16u8)src0b, (v16u8)src0a, 0);
+ dst0 = (v8u16)__msa_ilvev_b((v16i8)src0r, (v16i8)src0b);
+ ST_UH(dst0, destination);
+ destination += 8;
+ }
+ }
+
+ pixelsPerRow &= 7;
+}
+
+ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort565MSA(const uint8_t*& source, uint16_t*& destination, unsigned& pixelsPerRow)
+{
+ unsigned i;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+ v16u8 src0r, src0b, src1r, src1b, src2r, src2b, src3r, src3b;
+ v16u8 src0g = { 0 }, src1g = { 0 }, src2g = { 0 }, src3g = { 0 };
+ v16u8 src0gt, src1gt, src2gt, src3gt;
+ v8u16 dst0, dst1, dst2, dst3;
+
+ for (i = (pixelsPerRow >> 6); i--;) {
+ LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src7);
+ PCKEV_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0r, src1r, src2r, src3r);
+ PCKOD_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0b, src1b, src2b, src3b);
+ SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
+ SLDI_B2_UB(src2g, src3g, src2r, src3r, src2g, src3g, 1);
+ src0gt = (v16u8)SLLI_B(src0g, 3);
+ src1gt = (v16u8)SLLI_B(src1g, 3);
+ src2gt = (v16u8)SLLI_B(src2g, 3);
+ src3gt = (v16u8)SLLI_B(src3g, 3);
+ SRLI_B4_UB(src0g, src1g, src2g, src3g, 5);
+ SRLI_B4_UB(src0b, src1b, src2b, src3b, 3);
+ BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2);
+ BINSRI_B2_UB(src2r, src2g, src3r, src3g, src2r, src3r, 2);
+ BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4);
+ BINSRI_B2_UB(src2gt, src2b, src3gt, src3b, src2b, src3b, 4);
+ ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1);
+ ILVEV_B2_UH(src2b, src2r, src3b, src3r, dst2, dst3);
+ LD_UB4(source, 16, src0, src1, src2, src3);
+ ST_UH4(dst0, dst1, dst2, dst3, destination, 8);
+ LD_UB4(source, 16, src4, src5, src6, src7);
+ PCKEV_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0r, src1r, src2r, src3r);
+ PCKOD_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0b, src1b, src2b, src3b);
+ SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
+ SLDI_B2_UB(src2g, src3g, src2r, src3r, src2g, src3g, 1);
+ src0gt = (v16u8)SLLI_B(src0g, 3);
+ src1gt = (v16u8)SLLI_B(src1g, 3);
+ src2gt = (v16u8)SLLI_B(src2g, 3);
+ src3gt = (v16u8)SLLI_B(src3g, 3);
+ SRLI_B4_UB(src0g, src1g, src2g, src3g, 5);
+ SRLI_B4_UB(src0b, src1b, src2b, src3b, 3);
+ BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2);
+ BINSRI_B2_UB(src2r, src2g, src3r, src3g, src2r, src3r, 2);
+ BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4);
+ BINSRI_B2_UB(src2gt, src2b, src3gt, src3b, src2b, src3b, 4);
+ ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1);
+ ILVEV_B2_UH(src2b, src2r, src3b, src3r, dst2, dst3);
+ ST_UH4(dst0, dst1, dst2, dst3, destination, 8);
+ }
+
+ if (pixelsPerRow & 63) {
+ if (pixelsPerRow & 32) {
+ if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) {
+ LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src7);
+ PCKEV_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0r, src1r, src2r, src3r);
+ PCKOD_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0b, src1b, src2b, src3b);
+ SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
+ SLDI_B2_UB(src2g, src3g, src2r, src3r, src2g, src3g, 1);
+ src0gt = (v16u8)SLLI_B(src0g, 3);
+ src1gt = (v16u8)SLLI_B(src1g, 3);
+ src2gt = (v16u8)SLLI_B(src2g, 3);
+ src3gt = (v16u8)SLLI_B(src3g, 3);
+ SRLI_B4_UB(src0g, src1g, src2g, src3g, 5);
+ SRLI_B4_UB(src0b, src1b, src2b, src3b, 3);
+ BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2);
+ BINSRI_B2_UB(src2r, src2g, src3r, src3g, src2r, src3r, 2);
+ BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4);
+ BINSRI_B2_UB(src2gt, src2b, src3gt, src3b, src2b, src3b, 4);
+ ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1);
+ ILVEV_B2_UH(src2b, src2r, src3b, src3r, dst2, dst3);
+ LD_UB6(source, 16, src0, src1, src2, src3, src4, src5);
+ ST_UH4(dst0, dst1, dst2, dst3, destination, 8);
+ PCKEV_H3_UB(src1, src0, src3, src2, src5, src4, src0r, src1r, src2r);
+ PCKOD_H3_UB(src1, src0, src3, src2, src5, src4, src0b, src1b, src2b);
+ src0g = SLDI_UB(src0g, src0r, 1);
+ src1g = SLDI_UB(src1g, src1r, 1);
+ src2g = SLDI_UB(src2g, src2r, 1);
+ src0gt = (v16u8)SLLI_B(src0g, 3);
+ src1gt = (v16u8)SLLI_B(src1g, 3);
+ src2gt = (v16u8)SLLI_B(src2g, 3);
+ SRLI_B3_UB(src0g, src1g, src2g, 5);
+ SRLI_B3_UB(src0b, src1b, src2b, 3);
+ BINSRI_B3_UB(src0r, src0g, src1r, src1g, src2r, src2g, src0r, src1r, src2r, 2);
+ BINSRI_B3_UB(src0gt, src0b, src1gt, src1b, src2gt, src2b, src0b, src1b, src2b, 4);
+ ILVEV_B3_UH(src0b, src0r, src1b, src1r, src2b, src2r, dst0, dst1, dst2);
+ ST_UH3(dst0, dst1, dst2, destination, 8);
+ } else if (pixelsPerRow & 16) {
+ LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src7);
+ PCKEV_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0r, src1r, src2r, src3r);
+ PCKOD_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0b, src1b, src2b, src3b);
+ SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
+ SLDI_B2_UB(src2g, src3g, src2r, src3r, src2g, src3g, 1);
+ src0gt = (v16u8)SLLI_B(src0g, 3);
+ src1gt = (v16u8)SLLI_B(src1g, 3);
+ src2gt = (v16u8)SLLI_B(src2g, 3);
+ src3gt = (v16u8)SLLI_B(src3g, 3);
+ SRLI_B4_UB(src0g, src1g, src2g, src3g, 5);
+ SRLI_B4_UB(src0b, src1b, src2b, src3b, 3);
+ BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2);
+ BINSRI_B2_UB(src2r, src2g, src3r, src3g, src2r, src3r, 2);
+ BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4);
+ BINSRI_B2_UB(src2gt, src2b, src3gt, src3b, src2b, src3b, 4);
+ ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1);
+ ILVEV_B2_UH(src2b, src2r, src3b, src3r, dst2, dst3);
+ LD_UB4(source, 16, src0, src1, src2, src3);
+ ST_UH4(dst0, dst1, dst2, dst3, destination, 8);
+ PCKEV_H2_UB(src1, src0, src3, src2, src0r, src1r);
+ PCKOD_H2_UB(src1, src0, src3, src2, src0b, src1b);
+ SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
+ src0gt = (v16u8)SLLI_B(src0g, 3);
+ src1gt = (v16u8)SLLI_B(src1g, 3);
+ SRLI_B2_UB(src0g, src1g, 5);
+ SRLI_B2_UB(src0b, src1b, 3);
+ BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2);
+ BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4);
+ ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1);
+ ST_UH2(dst0, dst1, destination, 8);
+ } else if (pixelsPerRow & 8) {
+ LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src7);
+ PCKEV_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0r, src1r, src2r, src3r);
+ PCKOD_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0b, src1b, src2b, src3b);
+ SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
+ SLDI_B2_UB(src2g, src3g, src2r, src3r, src2g, src3g, 1);
+ src0gt = (v16u8)SLLI_B(src0g, 3);
+ src1gt = (v16u8)SLLI_B(src1g, 3);
+ src2gt = (v16u8)SLLI_B(src2g, 3);
+ src3gt = (v16u8)SLLI_B(src3g, 3);
+ SRLI_B4_UB(src0g, src1g, src2g, src3g, 5);
+ SRLI_B4_UB(src0b, src1b, src2b, src3b, 3);
+ BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2);
+ BINSRI_B2_UB(src2r, src2g, src3r, src3g, src2r, src3r, 2);
+ BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4);
+ BINSRI_B2_UB(src2gt, src2b, src3gt, src3b, src2b, src3b, 4);
+ ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1);
+ ILVEV_B2_UH(src2b, src2r, src3b, src3r, dst2, dst3);
+ LD_UB2(source, 16, src0, src1);
+ ST_UH4(dst0, dst1, dst2, dst3, destination, 8);
+ src0r = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0);
+ src0b = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0);
+ src0g = SLDI_UB(src0g, src0r, 1);
+ src0gt = (v16u8)SLLI_B(src0g, 3);
+ src0g = (v16u8)SRLI_B(src0g, 5);
+ src0b = (v16u8)SRLI_B(src0b, 3);
+ src0r = (v16u8)__msa_binsri_b((v16u8)src0r, (v16u8)src0g, 2);
+ src0b = (v16u8)__msa_binsri_b((v16u8)src0gt, (v16u8)src0b, 4);
+ dst0 = (v8u16)__msa_ilvev_b((v16i8)src0r, (v16i8)src0b);
+ ST_UH(dst0, destination);
+ destination += 8;
+ } else {
+ LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src7);
+ PCKEV_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0r, src1r, src2r, src3r);
+ PCKOD_H4_UB(src1, src0, src3, src2, src5, src4, src7, src6, src0b, src1b, src2b, src3b);
+ SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
+ SLDI_B2_UB(src2g, src3g, src2r, src3r, src2g, src3g, 1);
+ src0gt = (v16u8)SLLI_B(src0g, 3);
+ src1gt = (v16u8)SLLI_B(src1g, 3);
+ src2gt = (v16u8)SLLI_B(src2g, 3);
+ src3gt = (v16u8)SLLI_B(src3g, 3);
+ SRLI_B4_UB(src0g, src1g, src2g, src3g, 5);
+ SRLI_B4_UB(src0b, src1b, src2b, src3b, 3);
+ BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2);
+ BINSRI_B2_UB(src2r, src2g, src3r, src3g, src2r, src3r, 2);
+ BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4);
+ BINSRI_B2_UB(src2gt, src2b, src3gt, src3b, src2b, src3b, 4);
+ ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1);
+ ILVEV_B2_UH(src2b, src2r, src3b, src3r, dst2, dst3);
+ ST_UH4(dst0, dst1, dst2, dst3, destination, 8);
+ }
+ } else if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) {
+ LD_UB6(source, 16, src0, src1, src2, src3, src4, src5);
+ PCKEV_H3_UB(src1, src0, src3, src2, src5, src4, src0r, src1r, src2r);
+ PCKOD_H3_UB(src1, src0, src3, src2, src5, src4, src0b, src1b, src2b);
+ src0g = SLDI_UB(src0g, src0r, 1);
+ src1g = SLDI_UB(src1g, src1r, 1);
+ src2g = SLDI_UB(src2g, src2r, 1);
+ src0gt = (v16u8)SLLI_B(src0g, 3);
+ src1gt = (v16u8)SLLI_B(src1g, 3);
+ src2gt = (v16u8)SLLI_B(src2g, 3);
+ SRLI_B3_UB(src0g, src1g, src2g, 5);
+ SRLI_B3_UB(src0b, src1b, src2b, 3);
+ BINSRI_B3_UB(src0r, src0g, src1r, src1g, src2r, src2g, src0r, src1r, src2r, 2);
+ BINSRI_B3_UB(src0gt, src0b, src1gt, src1b, src2gt, src2b, src0b, src1b, src2b, 4);
+ ILVEV_B3_UH(src0b, src0r, src1b, src1r, src2b, src2r, dst0, dst1, dst2);
+ ST_UH3(dst0, dst1, dst2, destination, 8);
+ } else if (pixelsPerRow & 16) {
+ LD_UB4(source, 16, src0, src1, src2, src3);
+ PCKEV_H2_UB(src1, src0, src3, src2, src0r, src1r);
+ PCKOD_H2_UB(src1, src0, src3, src2, src0b, src1b);
+ SLDI_B2_UB(src0g, src1g, src0r, src1r, src0g, src1g, 1);
+ src0gt = (v16u8)SLLI_B(src0g, 3);
+ src1gt = (v16u8)SLLI_B(src1g, 3);
+ SRLI_B2_UB(src0g, src1g, 5);
+ SRLI_B2_UB(src0b, src1b, 3);
+ BINSRI_B2_UB(src0r, src0g, src1r, src1g, src0r, src1r, 2);
+ BINSRI_B2_UB(src0gt, src0b, src1gt, src1b, src0b, src1b, 4);
+ ILVEV_B2_UH(src0b, src0r, src1b, src1r, dst0, dst1);
+ ST_UH2(dst0, dst1, destination, 8);
+ } else if (pixelsPerRow & 8) {
+ LD_UB2(source, 16, src0, src1);
+ src0r = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0);
+ src0b = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0);
+ src0g = SLDI_UB(src0g, src0r, 1);
+ src0gt = (v16u8)SLLI_B(src0g, 3);
+ src0g = (v16u8)SRLI_B(src0g, 5);
+ src0b = (v16u8)SRLI_B(src0b, 3);
+ src0r = (v16u8)__msa_binsri_b((v16u8)src0r, (v16u8)src0g, 2);
+ src0b = (v16u8)__msa_binsri_b((v16u8)src0gt, (v16u8)src0b, 4);
+ dst0 = (v8u16)__msa_ilvev_b((v16i8)src0r, (v16i8)src0b);
+ ST_UH(dst0, destination);
+ destination += 8;
+ }
+ }
+
+ pixelsPerRow &= 7;
+}
} // namespace SIMD
} // namespace blink

Powered by Google App Engine
This is Rietveld 408576698