Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(196)

Side by Side Diff: src/opts/SkBlitRow_opts_SSE2.cpp

Issue 202903004: Xfermode: SSE2 implementation of multiply_modeproc (Closed) Base URL: https://skia.googlesource.com/skia.git@xfermode
Patch Set: fix buildbot failure Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/core/SkXfermode.cpp ('k') | src/opts/SkColor_opts_SSE2.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2012 The Android Open Source Project 2 * Copyright 2012 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 8
9 #include "SkBlitRow_opts_SSE2.h" 9 #include "SkBlitRow_opts_SSE2.h"
10 #include "SkBitmapProcState_opts_SSE2.h" 10 #include "SkBitmapProcState_opts_SSE2.h"
(...skipping 896 matching lines...) Expand 10 before | Expand all | Expand 10 after
907 // Calculate result b. 907 // Calculate result b.
908 __m128i b1 = _mm_srli_epi32(src_pixel1, 908 __m128i b1 = _mm_srli_epi32(src_pixel1,
909 SK_B32_SHIFT + (8 - SK_B16_BITS)); 909 SK_B32_SHIFT + (8 - SK_B16_BITS));
910 b1 = _mm_and_si128(b1, b16_mask); 910 b1 = _mm_and_si128(b1, b16_mask);
911 __m128i b2 = _mm_srli_epi32(src_pixel2, 911 __m128i b2 = _mm_srli_epi32(src_pixel2,
912 SK_B32_SHIFT + (8 - SK_B16_BITS)); 912 SK_B32_SHIFT + (8 - SK_B16_BITS));
913 b2 = _mm_and_si128(b2, b16_mask); 913 b2 = _mm_and_si128(b2, b16_mask);
914 __m128i b = _mm_packs_epi32(b1, b2); 914 __m128i b = _mm_packs_epi32(b1, b2);
915 915
916 // Store 8 16-bit colors in dst. 916 // Store 8 16-bit colors in dst.
917 __m128i d_pixel = SkPackRGB16_SSE(r, g, b); 917 __m128i d_pixel = SkPackRGB16_SSE2(r, g, b);
918 _mm_store_si128(d++, d_pixel); 918 _mm_store_si128(d++, d_pixel);
919 count -= 8; 919 count -= 8;
920 } 920 }
921 src = reinterpret_cast<const SkPMColor*>(s); 921 src = reinterpret_cast<const SkPMColor*>(s);
922 dst = reinterpret_cast<uint16_t*>(d); 922 dst = reinterpret_cast<uint16_t*>(d);
923 } 923 }
924 924
925 if (count > 0) { 925 if (count > 0) {
926 do { 926 do {
927 SkPMColor c = *src++; 927 SkPMColor c = *src++;
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
976 if (src_cmp1 == 0xFFFF && src_cmp2 == 0xFFFF) { 976 if (src_cmp1 == 0xFFFF && src_cmp2 == 0xFFFF) {
977 d++; 977 d++;
978 count -= 8; 978 count -= 8;
979 continue; 979 continue;
980 } 980 }
981 981
982 // Load 8 pixels of dst. 982 // Load 8 pixels of dst.
983 __m128i dst_pixel = _mm_load_si128(d); 983 __m128i dst_pixel = _mm_load_si128(d);
984 984
985 // Extract A from src. 985 // Extract A from src.
986 __m128i sa1 = _mm_slli_epi32(src_pixel1,(24 - SK_A32_SHIFT)); 986 __m128i sa1 = _mm_slli_epi32(src_pixel1, (24 - SK_A32_SHIFT));
987 sa1 = _mm_srli_epi32(sa1, 24); 987 sa1 = _mm_srli_epi32(sa1, 24);
988 __m128i sa2 = _mm_slli_epi32(src_pixel2,(24 - SK_A32_SHIFT)); 988 __m128i sa2 = _mm_slli_epi32(src_pixel2, (24 - SK_A32_SHIFT));
989 sa2 = _mm_srli_epi32(sa2, 24); 989 sa2 = _mm_srli_epi32(sa2, 24);
990 __m128i sa = _mm_packs_epi32(sa1, sa2); 990 __m128i sa = _mm_packs_epi32(sa1, sa2);
991 991
992 // Extract R from src. 992 // Extract R from src.
993 __m128i sr1 = _mm_slli_epi32(src_pixel1,(24 - SK_R32_SHIFT)); 993 __m128i sr1 = _mm_slli_epi32(src_pixel1, (24 - SK_R32_SHIFT));
994 sr1 = _mm_srli_epi32(sr1, 24); 994 sr1 = _mm_srli_epi32(sr1, 24);
995 __m128i sr2 = _mm_slli_epi32(src_pixel2,(24 - SK_R32_SHIFT)); 995 __m128i sr2 = _mm_slli_epi32(src_pixel2, (24 - SK_R32_SHIFT));
996 sr2 = _mm_srli_epi32(sr2, 24); 996 sr2 = _mm_srli_epi32(sr2, 24);
997 __m128i sr = _mm_packs_epi32(sr1, sr2); 997 __m128i sr = _mm_packs_epi32(sr1, sr2);
998 998
999 // Extract G from src. 999 // Extract G from src.
1000 __m128i sg1 = _mm_slli_epi32(src_pixel1,(24 - SK_G32_SHIFT)); 1000 __m128i sg1 = _mm_slli_epi32(src_pixel1, (24 - SK_G32_SHIFT));
1001 sg1 = _mm_srli_epi32(sg1, 24); 1001 sg1 = _mm_srli_epi32(sg1, 24);
1002 __m128i sg2 = _mm_slli_epi32(src_pixel2,(24 - SK_G32_SHIFT)); 1002 __m128i sg2 = _mm_slli_epi32(src_pixel2, (24 - SK_G32_SHIFT));
1003 sg2 = _mm_srli_epi32(sg2, 24); 1003 sg2 = _mm_srli_epi32(sg2, 24);
1004 __m128i sg = _mm_packs_epi32(sg1, sg2); 1004 __m128i sg = _mm_packs_epi32(sg1, sg2);
1005 1005
1006 // Extract B from src. 1006 // Extract B from src.
1007 __m128i sb1 = _mm_slli_epi32(src_pixel1,(24 - SK_B32_SHIFT)); 1007 __m128i sb1 = _mm_slli_epi32(src_pixel1, (24 - SK_B32_SHIFT));
1008 sb1 = _mm_srli_epi32(sb1, 24); 1008 sb1 = _mm_srli_epi32(sb1, 24);
1009 __m128i sb2 = _mm_slli_epi32(src_pixel2,(24 - SK_B32_SHIFT)); 1009 __m128i sb2 = _mm_slli_epi32(src_pixel2, (24 - SK_B32_SHIFT));
1010 sb2 = _mm_srli_epi32(sb2, 24); 1010 sb2 = _mm_srli_epi32(sb2, 24);
1011 __m128i sb = _mm_packs_epi32(sb1, sb2); 1011 __m128i sb = _mm_packs_epi32(sb1, sb2);
1012 1012
1013 // Extract R G B from dst. 1013 // Extract R G B from dst.
1014 __m128i dr = _mm_srli_epi16(dst_pixel,SK_R16_SHIFT); 1014 __m128i dr = _mm_srli_epi16(dst_pixel, SK_R16_SHIFT);
1015 dr = _mm_and_si128(dr, r16_mask); 1015 dr = _mm_and_si128(dr, r16_mask);
1016 __m128i dg = _mm_srli_epi16(dst_pixel,SK_G16_SHIFT); 1016 __m128i dg = _mm_srli_epi16(dst_pixel, SK_G16_SHIFT);
1017 dg = _mm_and_si128(dg, g16_mask); 1017 dg = _mm_and_si128(dg, g16_mask);
1018 __m128i db = _mm_srli_epi16(dst_pixel,SK_B16_SHIFT); 1018 __m128i db = _mm_srli_epi16(dst_pixel, SK_B16_SHIFT);
1019 db = _mm_and_si128(db, b16_mask); 1019 db = _mm_and_si128(db, b16_mask);
1020 1020
1021 __m128i isa = _mm_sub_epi16(var255, sa); // 255 -sa 1021 __m128i isa = _mm_sub_epi16(var255, sa); // 255 -sa
1022 1022
1023 // Calculate R G B of result. 1023 // Calculate R G B of result.
1024 // Original algorithm is in SkSrcOver32To16(). 1024 // Original algorithm is in SkSrcOver32To16().
1025 dr = _mm_add_epi16(sr, SkMul16ShiftRound_SSE(dr, isa, SK_R16_BITS)); 1025 dr = _mm_add_epi16(sr, SkMul16ShiftRound_SSE2(dr, isa, SK_R16_BITS)) ;
1026 dr = _mm_srli_epi16(dr, 8 - SK_R16_BITS); 1026 dr = _mm_srli_epi16(dr, 8 - SK_R16_BITS);
1027 dg = _mm_add_epi16(sg, SkMul16ShiftRound_SSE(dg, isa, SK_G16_BITS)); 1027 dg = _mm_add_epi16(sg, SkMul16ShiftRound_SSE2(dg, isa, SK_G16_BITS)) ;
1028 dg = _mm_srli_epi16(dg, 8 - SK_G16_BITS); 1028 dg = _mm_srli_epi16(dg, 8 - SK_G16_BITS);
1029 db = _mm_add_epi16(sb, SkMul16ShiftRound_SSE(db, isa, SK_B16_BITS)); 1029 db = _mm_add_epi16(sb, SkMul16ShiftRound_SSE2(db, isa, SK_B16_BITS)) ;
1030 db = _mm_srli_epi16(db, 8 - SK_B16_BITS); 1030 db = _mm_srli_epi16(db, 8 - SK_B16_BITS);
1031 1031
1032 // Pack R G B into 16-bit color. 1032 // Pack R G B into 16-bit color.
1033 __m128i d_pixel = SkPackRGB16_SSE(dr, dg, db); 1033 __m128i d_pixel = SkPackRGB16_SSE2(dr, dg, db);
1034 1034
1035 // Store 8 16-bit colors in dst. 1035 // Store 8 16-bit colors in dst.
1036 _mm_store_si128(d++, d_pixel); 1036 _mm_store_si128(d++, d_pixel);
1037 count -= 8; 1037 count -= 8;
1038 } 1038 }
1039 1039
1040 src = reinterpret_cast<const SkPMColor*>(s); 1040 src = reinterpret_cast<const SkPMColor*>(s);
1041 dst = reinterpret_cast<uint16_t*>(d); 1041 dst = reinterpret_cast<uint16_t*>(d);
1042 } 1042 }
1043 1043
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
1136 sb2 = _mm_srli_epi32(sb2, 24); 1136 sb2 = _mm_srli_epi32(sb2, 24);
1137 __m128i sb = _mm_packs_epi32(sb1, sb2); 1137 __m128i sb = _mm_packs_epi32(sb1, sb2);
1138 1138
1139 // SkDITHER_R32To565(sb, dither) 1139 // SkDITHER_R32To565(sb, dither)
1140 __m128i sb_offset = _mm_srli_epi16(sb, 5); 1140 __m128i sb_offset = _mm_srli_epi16(sb, 5);
1141 sb = _mm_add_epi16(sb, dither); 1141 sb = _mm_add_epi16(sb, dither);
1142 sb = _mm_sub_epi16(sb, sb_offset); 1142 sb = _mm_sub_epi16(sb, sb_offset);
1143 sb = _mm_srli_epi16(sb, SK_B32_BITS - SK_B16_BITS); 1143 sb = _mm_srli_epi16(sb, SK_B32_BITS - SK_B16_BITS);
1144 1144
1145 // Pack and store 16-bit dst pixel. 1145 // Pack and store 16-bit dst pixel.
1146 __m128i d_pixel = SkPackRGB16_SSE(sr, sg, sb); 1146 __m128i d_pixel = SkPackRGB16_SSE2(sr, sg, sb);
1147 _mm_store_si128(d++, d_pixel); 1147 _mm_store_si128(d++, d_pixel);
1148 1148
1149 count -= 8; 1149 count -= 8;
1150 x += 8; 1150 x += 8;
1151 } 1151 }
1152 1152
1153 src = reinterpret_cast<const SkPMColor*>(s); 1153 src = reinterpret_cast<const SkPMColor*>(s);
1154 dst = reinterpret_cast<uint16_t*>(d); 1154 dst = reinterpret_cast<uint16_t*>(d);
1155 } 1155 }
1156 1156
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
1235 __m128i g16_mask = _mm_set1_epi16(SK_G16_MASK); 1235 __m128i g16_mask = _mm_set1_epi16(SK_G16_MASK);
1236 __m128i b16_mask = _mm_set1_epi16(SK_B16_MASK); 1236 __m128i b16_mask = _mm_set1_epi16(SK_B16_MASK);
1237 1237
1238 while (count >= 8) { 1238 while (count >= 8) {
1239 // Load 8 pixels of src and dst. 1239 // Load 8 pixels of src and dst.
1240 __m128i src_pixel1 = _mm_loadu_si128(s++); 1240 __m128i src_pixel1 = _mm_loadu_si128(s++);
1241 __m128i src_pixel2 = _mm_loadu_si128(s++); 1241 __m128i src_pixel2 = _mm_loadu_si128(s++);
1242 __m128i dst_pixel = _mm_load_si128(d); 1242 __m128i dst_pixel = _mm_load_si128(d);
1243 1243
1244 // Extract A from src. 1244 // Extract A from src.
1245 __m128i sa1 = _mm_slli_epi32(src_pixel1,(24 - SK_A32_SHIFT)); 1245 __m128i sa1 = _mm_slli_epi32(src_pixel1, (24 - SK_A32_SHIFT));
1246 sa1 = _mm_srli_epi32(sa1, 24); 1246 sa1 = _mm_srli_epi32(sa1, 24);
1247 __m128i sa2 = _mm_slli_epi32(src_pixel2,(24 - SK_A32_SHIFT)); 1247 __m128i sa2 = _mm_slli_epi32(src_pixel2, (24 - SK_A32_SHIFT));
1248 sa2 = _mm_srli_epi32(sa2, 24); 1248 sa2 = _mm_srli_epi32(sa2, 24);
1249 __m128i sa = _mm_packs_epi32(sa1, sa2); 1249 __m128i sa = _mm_packs_epi32(sa1, sa2);
1250 1250
1251 // Calculate current dither value. 1251 // Calculate current dither value.
1252 dither_cur = _mm_mullo_epi16(dither, 1252 dither_cur = _mm_mullo_epi16(dither,
1253 _mm_add_epi16(sa, _mm_set1_epi16(1))); 1253 _mm_add_epi16(sa, _mm_set1_epi16(1)));
1254 dither_cur = _mm_srli_epi16(dither_cur, 8); 1254 dither_cur = _mm_srli_epi16(dither_cur, 8);
1255 1255
1256 // Extract R from src. 1256 // Extract R from src.
1257 __m128i sr1 = _mm_slli_epi32(src_pixel1, (24 - SK_R32_SHIFT)); 1257 __m128i sr1 = _mm_slli_epi32(src_pixel1, (24 - SK_R32_SHIFT));
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
1316 1316
1317 dg = _mm_mullo_epi16(dg, isa); 1317 dg = _mm_mullo_epi16(dg, isa);
1318 dg = _mm_add_epi16(dg, sg); 1318 dg = _mm_add_epi16(dg, sg);
1319 dg = _mm_srli_epi16(dg, 5); 1319 dg = _mm_srli_epi16(dg, 5);
1320 1320
1321 db = _mm_mullo_epi16(db, isa); 1321 db = _mm_mullo_epi16(db, isa);
1322 db = _mm_add_epi16(db, sb); 1322 db = _mm_add_epi16(db, sb);
1323 db = _mm_srli_epi16(db, 5); 1323 db = _mm_srli_epi16(db, 5);
1324 1324
1325 // Package and store dst pixel. 1325 // Package and store dst pixel.
1326 __m128i d_pixel = SkPackRGB16_SSE(dr, dg, db); 1326 __m128i d_pixel = SkPackRGB16_SSE2(dr, dg, db);
1327 _mm_store_si128(d++, d_pixel); 1327 _mm_store_si128(d++, d_pixel);
1328 1328
1329 count -= 8; 1329 count -= 8;
1330 x += 8; 1330 x += 8;
1331 } 1331 }
1332 1332
1333 src = reinterpret_cast<const SkPMColor*>(s); 1333 src = reinterpret_cast<const SkPMColor*>(s);
1334 dst = reinterpret_cast<uint16_t*>(d); 1334 dst = reinterpret_cast<uint16_t*>(d);
1335 } 1335 }
1336 1336
(...skipping 18 matching lines...) Expand all
1355 uint32_t dst_expanded = SkExpand_rgb_16(*dst); 1355 uint32_t dst_expanded = SkExpand_rgb_16(*dst);
1356 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); 1356 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3);
1357 // now src and dst expanded are in g:11 r:10 x:1 b:10 1357 // now src and dst expanded are in g:11 r:10 x:1 b:10
1358 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); 1358 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);
1359 } 1359 }
1360 dst += 1; 1360 dst += 1;
1361 DITHER_INC_X(x); 1361 DITHER_INC_X(x);
1362 } while (--count != 0); 1362 } while (--count != 0);
1363 } 1363 }
1364 } 1364 }
OLDNEW
« no previous file with comments | « src/core/SkXfermode.cpp ('k') | src/opts/SkColor_opts_SSE2.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698