Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(270)

Side by Side Diff: source/row_msa.cc

Issue 2487913004: Add MSA optimized ARGBToRGB24Row_MSA and ARGBToRAWRow_MSA functions (Closed)
Patch Set: Added missed code Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_any.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 211 matching lines...) Expand 10 before | Expand all | Expand 10 after
222 YUVTORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, 222 YUVTORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
223 vec_br, vec_yg, vec0, vec1, vec2); 223 vec_br, vec_yg, vec0, vec1, vec2);
224 STOREARGB(vec0, vec1, vec2, const_255, rgb_buf); 224 STOREARGB(vec0, vec1, vec2, const_255, rgb_buf);
225 src_y += 8; 225 src_y += 8;
226 src_u += 4; 226 src_u += 4;
227 src_v += 4; 227 src_v += 4;
228 rgb_buf += 32; 228 rgb_buf += 32;
229 } 229 }
230 } 230 }
231 231
232 void YUVTORGBARow_MSA(const uint8* src_y, 232 void I422ToRGBARow_MSA(const uint8* src_y,
233 const uint8* src_u, 233 const uint8* src_u,
234 const uint8* src_v, 234 const uint8* src_v,
235 uint8* rgb_buf, 235 uint8* rgb_buf,
236 const struct YuvConstants* yuvconstants, 236 const struct YuvConstants* yuvconstants,
237 int width) { 237 int width) {
238 int x; 238 int x;
239 v16u8 src0, src1, src2; 239 v16u8 src0, src1, src2;
240 v8i16 vec0, vec1, vec2; 240 v8i16 vec0, vec1, vec2;
241 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; 241 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
242 v16u8 const_255 = (v16u8)__msa_ldi_b(255); 242 v16u8 const_255 = (v16u8)__msa_ldi_b(255);
243 243
244 YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, 244 YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
245 vec_br, vec_yg); 245 vec_br, vec_yg);
246 246
247 for (x = 0; x < width; x += 8) { 247 for (x = 0; x < width; x += 8) {
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
282 src3 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src3); 282 src3 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src3);
283 STOREARGB(vec0, vec1, vec2, src3, rgb_buf); 283 STOREARGB(vec0, vec1, vec2, src3, rgb_buf);
284 src_y += 8; 284 src_y += 8;
285 src_u += 4; 285 src_u += 4;
286 src_v += 4; 286 src_v += 4;
287 src_a += 8; 287 src_a += 8;
288 rgb_buf += 32; 288 rgb_buf += 32;
289 } 289 }
290 } 290 }
291 291
292 void YUVTORGB24Row_MSA(const uint8* src_y, 292 void I422ToRGB24Row_MSA(const uint8* src_y,
293 const uint8* src_u, 293 const uint8* src_u,
294 const uint8* src_v, 294 const uint8* src_v,
295 uint8* rgb_buf, 295 uint8* rgb_buf,
296 const struct YuvConstants* yuvconstants, 296 const struct YuvConstants* yuvconstants,
297 int32 width) { 297 int32 width) {
298 int x; 298 int x;
299 int64 data_u, data_v; 299 int64 data_u, data_v;
300 v16u8 src0, src1, src2, src3, src4, src5, dst0, dst1, dst2; 300 v16u8 src0, src1, src2, src3, src4, src5, dst0, dst1, dst2;
301 v8i16 vec0, vec1, vec2, vec3, vec4, vec5; 301 v8i16 vec0, vec1, vec2, vec3, vec4, vec5;
302 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; 302 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
303 v16u8 reg0, reg1, reg2, reg3; 303 v16u8 reg0, reg1, reg2, reg3;
304 v2i64 zero = {0}; 304 v2i64 zero = {0};
305 v16i8 shuffler0 = {0, 1, 16, 2, 3, 17, 4, 5, 18, 6, 7, 19, 8, 9, 20, 10}; 305 v16i8 shuffler0 = {0, 1, 16, 2, 3, 17, 4, 5, 18, 6, 7, 19, 8, 9, 20, 10};
306 v16i8 shuffler1 = {0, 21, 1, 2, 22, 3, 4, 23, 5, 6, 24, 7, 8, 25, 9, 10}; 306 v16i8 shuffler1 = {0, 21, 1, 2, 22, 3, 4, 23, 5, 6, 24, 7, 8, 25, 9, 10};
307 v16i8 shuffler2 = {26, 6, 7, 27, 8, 9, 28, 10, 307 v16i8 shuffler2 = {26, 6, 7, 27, 8, 9, 28, 10,
(...skipping 25 matching lines...) Expand all
333 ST_UB2(dst0, dst1, rgb_buf, 16); 333 ST_UB2(dst0, dst1, rgb_buf, 16);
334 ST_UB(dst2, (rgb_buf + 32)); 334 ST_UB(dst2, (rgb_buf + 32));
335 src_y += 16; 335 src_y += 16;
336 src_u += 8; 336 src_u += 8;
337 src_v += 8; 337 src_v += 8;
338 rgb_buf += 48; 338 rgb_buf += 48;
339 } 339 }
340 } 340 }
341 341
342 // TODO(fbarchard): Consider AND instead of shift to isolate 5 upper bits of R. 342 // TODO(fbarchard): Consider AND instead of shift to isolate 5 upper bits of R.
343 void YUVTORGB565Row_MSA(const uint8* src_y, 343 void I422ToRGB565Row_MSA(const uint8* src_y,
344 const uint8* src_u, 344 const uint8* src_u,
345 const uint8* src_v, 345 const uint8* src_v,
346 uint8* dst_rgb565, 346 uint8* dst_rgb565,
347 const struct YuvConstants* yuvconstants, 347 const struct YuvConstants* yuvconstants,
348 int width) { 348 int width) {
349 int x; 349 int x;
350 v16u8 src0, src1, src2, dst0; 350 v16u8 src0, src1, src2, dst0;
351 v8i16 vec0, vec1, vec2; 351 v8i16 vec0, vec1, vec2;
352 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; 352 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
353 353
354 YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, 354 YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg,
355 vec_br, vec_yg); 355 vec_br, vec_yg);
356 356
357 for (x = 0; x < width; x += 8) { 357 for (x = 0; x < width; x += 8) {
358 READYUV422(src_y, src_u, src_v, src0, src1, src2); 358 READYUV422(src_y, src_u, src_v, src0, src1, src2);
(...skipping 374 matching lines...) Expand 10 before | Expand all | Expand 10 after
733 dst1 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4); 733 dst1 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4);
734 ST_UB(dst0, dst_u); 734 ST_UB(dst0, dst_u);
735 ST_UB(dst1, dst_v); 735 ST_UB(dst1, dst_v);
736 src_argb0 += 128; 736 src_argb0 += 128;
737 src_argb0_next += 128; 737 src_argb0_next += 128;
738 dst_u += 16; 738 dst_u += 16;
739 dst_v += 16; 739 dst_v += 16;
740 } 740 }
741 } 741 }
742 742
743 void ARGBToRGB24Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
744 int x;
745 v16u8 src0, src1, src2, src3, dst0, dst1, dst2;
746 v16i8 shuffler0 = {0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 16, 17, 18, 20};
747 v16i8 shuffler1 = { 5, 6, 8, 9, 10, 12, 13, 14,
748 16, 17, 18, 20, 21, 22, 24, 25};
749 v16i8 shuffler2 = {10, 12, 13, 14, 16, 17, 18, 20,
750 21, 22, 24, 25, 26, 28, 29, 30};
751
752 for (x = 0; x < width; x += 16) {
753 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
754 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
755 src2 = (v16u8)__msa_ld_b((v16i8*)src_argb, 32);
756 src3 = (v16u8)__msa_ld_b((v16i8*)src_argb, 48);
757 dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0);
758 dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1);
759 dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2);
760 ST_UB2(dst0, dst1, dst_rgb, 16);
761 ST_UB(dst2, (dst_rgb + 32));
762 src_argb += 64;
763 dst_rgb += 48;
764 }
765 }
766
767 void ARGBToRAWRow_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
768 int x;
769 v16u8 src0, src1, src2, src3, dst0, dst1, dst2;
770 v16i8 shuffler0 = {2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, 18, 17, 16, 22};
771 v16i8 shuffler1 = { 5, 4, 10, 9, 8, 14, 13, 12,
772 18, 17, 16, 22, 21, 20, 26, 25};
773 v16i8 shuffler2 = { 8, 14, 13, 12, 18, 17, 16, 22,
774 21, 20, 26, 25, 24, 30, 29, 28};
775
776 for (x = 0; x < width; x += 16) {
777 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
778 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
779 src2 = (v16u8)__msa_ld_b((v16i8*)src_argb, 32);
780 src3 = (v16u8)__msa_ld_b((v16i8*)src_argb, 48);
781 dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0);
782 dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1);
783 dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2);
784 ST_UB2(dst0, dst1, dst_rgb, 16);
785 ST_UB(dst2, (dst_rgb + 32));
786 src_argb += 64;
787 dst_rgb += 48;
788 }
789 }
790
743 void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, 791 void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444,
744 uint8* dst_argb, 792 uint8* dst_argb,
745 int width) { 793 int width) {
746 int x; 794 int x;
747 v16u8 src0, src1; 795 v16u8 src0, src1;
748 v8u16 vec0, vec1, vec2, vec3; 796 v8u16 vec0, vec1, vec2, vec3;
749 v16u8 dst0, dst1, dst2, dst3; 797 v16u8 dst0, dst1, dst2, dst3;
750 798
751 for (x = 0; x < width; x += 16) { 799 for (x = 0; x < width; x += 16) {
752 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 0); 800 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 0);
(...skipping 15 matching lines...) Expand all
768 dst_argb += 64; 816 dst_argb += 64;
769 } 817 }
770 } 818 }
771 819
772 #ifdef __cplusplus 820 #ifdef __cplusplus
773 } // extern "C" 821 } // extern "C"
774 } // namespace libyuv 822 } // namespace libyuv
775 #endif 823 #endif
776 824
777 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) 825 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
OLDNEW
« no previous file with comments | « source/row_any.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698