Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5)

Side by Side Diff: source/row_msa.cc

Issue 2529983002: Add MSA optimized ARGB Multiply/Add/Subtract row functions (Closed)
Patch Set: Corrected patchset files Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_any.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 939 matching lines...) Expand 10 before | Expand all | Expand 10 after
950 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); 950 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
951 dst1 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); 951 dst1 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6);
952 ST_UB(dst0, dst_u); 952 ST_UB(dst0, dst_u);
953 ST_UB(dst1, dst_v); 953 ST_UB(dst1, dst_v);
954 src_argb += 64; 954 src_argb += 64;
955 dst_u += 16; 955 dst_u += 16;
956 dst_v += 16; 956 dst_v += 16;
957 } 957 }
958 } 958 }
959 959
960 void ARGBMultiplyRow_MSA(const uint8* src_argb0,
961 const uint8* src_argb1,
962 uint8* dst_argb,
963 int width) {
964 int x;
965 v16u8 src0, src1, dst0;
966 v8u16 vec0, vec1, vec2, vec3;
967 v4u32 reg0, reg1, reg2, reg3;
968 v8i16 zero = {0};
969
970 for (x = 0; x < width; x += 4) {
971 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0);
972 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 0);
973 vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0);
974 vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0);
975 vec2 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src1);
976 vec3 = (v8u16)__msa_ilvl_b((v16i8)zero, (v16i8)src1);
977 reg0 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec0);
978 reg1 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec0);
979 reg2 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec1);
980 reg3 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec1);
981 reg0 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec2);
982 reg1 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec2);
983 reg2 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec3);
984 reg3 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec3);
985 reg0 = (v4u32)__msa_srai_w((v4i32)reg0, 16);
986 reg1 = (v4u32)__msa_srai_w((v4i32)reg1, 16);
987 reg2 = (v4u32)__msa_srai_w((v4i32)reg2, 16);
988 reg3 = (v4u32)__msa_srai_w((v4i32)reg3, 16);
989 vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0);
990 vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2);
991 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
992 ST_UB(dst0, dst_argb);
993 src_argb0 += 16;
994 src_argb1 += 16;
995 dst_argb += 16;
996 }
997 }
998
999 void ARGBAddRow_MSA(const uint8* src_argb0,
1000 const uint8* src_argb1,
1001 uint8* dst_argb,
1002 int width) {
1003 int x;
1004 v16u8 src0, src1, src2, src3, dst0, dst1;
1005
1006 for (x = 0; x < width; x += 8) {
1007 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0);
1008 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16);
1009 src2 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 0);
1010 src3 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 16);
1011 dst0 = __msa_adds_u_b(src0, src2);
1012 dst1 = __msa_adds_u_b(src1, src3);
1013 ST_UB2(dst0, dst1, dst_argb, 16);
1014 src_argb0 += 32;
1015 src_argb1 += 32;
1016 dst_argb += 32;
1017 }
1018 }
1019
1020 void ARGBSubtractRow_MSA(const uint8* src_argb0,
1021 const uint8* src_argb1,
1022 uint8* dst_argb,
1023 int width) {
1024 int x;
1025 v16u8 src0, src1, src2, src3, dst0, dst1;
1026
1027 for (x = 0; x < width; x += 8) {
1028 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0);
1029 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16);
1030 src2 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 0);
1031 src3 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 16);
1032 dst0 = __msa_subs_u_b(src0, src2);
1033 dst1 = __msa_subs_u_b(src1, src3);
1034 ST_UB2(dst0, dst1, dst_argb, 16);
1035 src_argb0 += 32;
1036 src_argb1 += 32;
1037 dst_argb += 32;
1038 }
1039 }
1040
960 void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, 1041 void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444,
961 uint8* dst_argb, 1042 uint8* dst_argb,
962 int width) { 1043 int width) {
963 int x; 1044 int x;
964 v16u8 src0, src1; 1045 v16u8 src0, src1;
965 v8u16 vec0, vec1, vec2, vec3; 1046 v8u16 vec0, vec1, vec2, vec3;
966 v16u8 dst0, dst1, dst2, dst3; 1047 v16u8 dst0, dst1, dst2, dst3;
967 1048
968 for (x = 0; x < width; x += 16) { 1049 for (x = 0; x < width; x += 16) {
969 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 0); 1050 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 0);
(...skipping 15 matching lines...) Expand all
985 dst_argb += 64; 1066 dst_argb += 64;
986 } 1067 }
987 } 1068 }
988 1069
989 #ifdef __cplusplus 1070 #ifdef __cplusplus
990 } // extern "C" 1071 } // extern "C"
991 } // namespace libyuv 1072 } // namespace libyuv
992 #endif 1073 #endif
993 1074
994 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) 1075 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
OLDNEW
« no previous file with comments | « source/row_any.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698