Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(16)

Unified Diff: source/row_msa.cc

Issue 2520003004: Add MSA optimized ARGBToRGB565Row_MSA, ARGBToARGB1555Row_MSA, ARGBToARGB4444Row_MSA, ARGBToUV444Row… (Closed)
Patch Set: Clang format Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/row_any.cc ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/row_msa.cc
diff --git a/source/row_msa.cc b/source/row_msa.cc
index 130bc2d2c084477e49d2d3e51ced09523dbcf646..f47871fe7767030ff14d85ef60985ad9682f31f8 100644
--- a/source/row_msa.cc
+++ b/source/row_msa.cc
@@ -788,6 +788,175 @@ void ARGBToRAWRow_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
}
}
+void ARGBToRGB565Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
+ int x;
+ v16u8 src0, src1, dst0;
+ v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v16i8 zero = {0};
+
+ for (x = 0; x < width; x += 8) {
+ src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
+ vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3);
+ vec1 = (v16u8)__msa_slli_b((v16i8)src0, 3);
+ vec2 = (v16u8)__msa_srai_b((v16i8)src0, 5);
+ vec4 = (v16u8)__msa_srai_b((v16i8)src1, 3);
+ vec5 = (v16u8)__msa_slli_b((v16i8)src1, 3);
+ vec6 = (v16u8)__msa_srai_b((v16i8)src1, 5);
+ vec1 = (v16u8)__msa_sldi_b(zero, (v16i8)vec1, 1);
+ vec2 = (v16u8)__msa_sldi_b(zero, (v16i8)vec2, 1);
+ vec5 = (v16u8)__msa_sldi_b(zero, (v16i8)vec5, 1);
+ vec6 = (v16u8)__msa_sldi_b(zero, (v16i8)vec6, 1);
+ vec3 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 2);
+ vec7 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 2);
+ vec0 = __msa_binsli_b(vec0, vec1, 2);
+ vec1 = __msa_binsli_b(vec2, vec3, 4);
+ vec4 = __msa_binsli_b(vec4, vec5, 2);
+ vec5 = __msa_binsli_b(vec6, vec7, 4);
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0);
+ vec4 = (v16u8)__msa_ilvev_b((v16i8)vec5, (v16i8)vec4);
+ dst0 = (v16u8)__msa_pckev_h((v8i16)vec4, (v8i16)vec0);
+ ST_UB(dst0, dst_rgb);
+ src_argb += 32;
+ dst_rgb += 16;
+ }
+}
+
+void ARGBToARGB1555Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
+ int x;
+ v16u8 src0, src1, dst0;
+ v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
+ v16i8 zero = {0};
+
+ for (x = 0; x < width; x += 8) {
+ src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
+ vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3);
+ vec1 = (v16u8)__msa_slli_b((v16i8)src0, 2);
+ vec2 = (v16u8)__msa_srai_b((v16i8)vec0, 3);
+ vec1 = (v16u8)__msa_sldi_b(zero, (v16i8)vec1, 1);
+ vec2 = (v16u8)__msa_sldi_b(zero, (v16i8)vec2, 1);
+ vec3 = (v16u8)__msa_srai_b((v16i8)src0, 1);
+ vec5 = (v16u8)__msa_srai_b((v16i8)src1, 3);
+ vec6 = (v16u8)__msa_slli_b((v16i8)src1, 2);
+ vec7 = (v16u8)__msa_srai_b((v16i8)vec5, 3);
+ vec6 = (v16u8)__msa_sldi_b(zero, (v16i8)vec6, 1);
+ vec7 = (v16u8)__msa_sldi_b(zero, (v16i8)vec7, 1);
+ vec8 = (v16u8)__msa_srai_b((v16i8)src1, 1);
+ vec3 = (v16u8)__msa_sldi_b(zero, (v16i8)vec3, 2);
+ vec8 = (v16u8)__msa_sldi_b(zero, (v16i8)vec8, 2);
+ vec4 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 3);
+ vec9 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 3);
+ vec0 = __msa_binsli_b(vec0, vec1, 2);
+ vec5 = __msa_binsli_b(vec5, vec6, 2);
+ vec1 = __msa_binsli_b(vec2, vec3, 5);
+ vec6 = __msa_binsli_b(vec7, vec8, 5);
+ vec1 = __msa_binsli_b(vec1, vec4, 0);
+ vec6 = __msa_binsli_b(vec6, vec9, 0);
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0);
+ vec1 = (v16u8)__msa_ilvev_b((v16i8)vec6, (v16i8)vec5);
+ dst0 = (v16u8)__msa_pckev_h((v8i16)vec1, (v8i16)vec0);
+ ST_UB(dst0, dst_rgb);
+ src_argb += 32;
+ dst_rgb += 16;
+ }
+}
+
+void ARGBToARGB4444Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
+ int x;
+ v16u8 src0, src1;
+ v16u8 vec0, vec1;
+ v16u8 dst0;
+ v16i8 zero = {0};
+
+ for (x = 0; x < width; x += 8) {
+ src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
+ vec0 = (v16u8)__msa_srai_b((v16i8)src0, 4);
+ vec1 = (v16u8)__msa_srai_b((v16i8)src1, 4);
+ src0 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 1);
+ src1 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 1);
+ vec0 = __msa_binsli_b(vec0, src0, 3);
+ vec1 = __msa_binsli_b(vec1, src1, 3);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ ST_UB(dst0, dst_rgb);
+ src_argb += 32;
+ dst_rgb += 16;
+ }
+}
+
+void ARGBToUV444Row_MSA(const uint8* src_argb,
+ uint8* dst_u,
+ uint8* dst_v,
+ int32 width) {
+ int32 x;
+ v16u8 src0, src1, src2, src3, reg0, reg1, reg2, reg3, dst0, dst1;
+ v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v8u16 vec8, vec9, vec10, vec11;
+ v8u16 const_112 = (v8u16)__msa_ldi_h(112);
fbarchard1 2016/11/22 18:46:15 note these constants will need to change for other
+ v8u16 const_74 = (v8u16)__msa_ldi_h(74);
+ v8u16 const_38 = (v8u16)__msa_ldi_h(38);
+ v8u16 const_94 = (v8u16)__msa_ldi_h(94);
+ v8u16 const_18 = (v8u16)__msa_ldi_h(18);
+ v8u16 const_32896 = (v8u16)__msa_fill_h(32896);
+ v16i8 zero = {0};
+
+ for (x = width; x > 0; x -= 16) {
+ src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
+ src2 = (v16u8)__msa_ld_b((v16i8*)src_argb, 32);
+ src3 = (v16u8)__msa_ld_b((v16i8*)src_argb, 48);
+ reg0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
+ reg1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
+ reg2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
+ reg3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
+ src0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0);
+ src1 = (v16u8)__msa_pckev_b((v16i8)reg3, (v16i8)reg2);
+ src2 = (v16u8)__msa_pckod_b((v16i8)reg1, (v16i8)reg0);
+ vec0 = (v8u16)__msa_ilvr_b(zero, (v16i8)src0);
+ vec1 = (v8u16)__msa_ilvl_b(zero, (v16i8)src0);
+ vec2 = (v8u16)__msa_ilvr_b(zero, (v16i8)src1);
+ vec3 = (v8u16)__msa_ilvl_b(zero, (v16i8)src1);
+ vec4 = (v8u16)__msa_ilvr_b(zero, (v16i8)src2);
+ vec5 = (v8u16)__msa_ilvl_b(zero, (v16i8)src2);
+ vec10 = vec0 * const_18;
+ vec11 = vec1 * const_18;
+ vec8 = vec2 * const_94;
+ vec9 = vec3 * const_94;
+ vec6 = vec4 * const_112;
+ vec7 = vec5 * const_112;
+ vec0 *= const_112;
+ vec1 *= const_112;
+ vec2 *= const_74;
+ vec3 *= const_74;
+ vec4 *= const_38;
+ vec5 *= const_38;
+ vec8 += vec10;
+ vec9 += vec11;
+ vec6 += const_32896;
+ vec7 += const_32896;
+ vec0 += const_32896;
+ vec1 += const_32896;
+ vec2 += vec4;
+ vec3 += vec5;
+ vec0 -= vec2;
+ vec1 -= vec3;
+ vec6 -= vec8;
+ vec7 -= vec9;
+ vec0 = (v8u16)__msa_srai_h((v8i16)vec0, 8);
+ vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8);
+ vec6 = (v8u16)__msa_srai_h((v8i16)vec6, 8);
+ vec7 = (v8u16)__msa_srai_h((v8i16)vec7, 8);
+ dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
+ dst1 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6);
+ ST_UB(dst0, dst_u);
+ ST_UB(dst1, dst_v);
+ src_argb += 64;
+ dst_u += 16;
+ dst_v += 16;
+ }
+}
+
void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444,
uint8* dst_argb,
int width) {
« no previous file with comments | « source/row_any.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698