Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(243)

Side by Side Diff: src/opts/SkBlitRow_opts_SSE2.cpp

Issue 179443003: SSE2 implementation of S32A_D565_Opaque_Dither (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright 2012 The Android Open Source Project 2 * Copyright 2012 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 8
9 #include "SkBlitRow_opts_SSE2.h" 9 #include "SkBlitRow_opts_SSE2.h"
10 #include "SkBitmapProcState_opts_SSE2.h" 10 #include "SkBitmapProcState_opts_SSE2.h"
11 #include "SkColorPriv.h" 11 #include "SkColorPriv.h"
12 #include "SkColor_opts_SSE2.h" 12 #include "SkColor_opts_SSE2.h"
13 #include "SkDither.h"
13 #include "SkUtils.h" 14 #include "SkUtils.h"
14 15
15 #include <emmintrin.h> 16 #include <emmintrin.h>
16 17
17 /* SSE2 version of S32_Blend_BlitRow32() 18 /* SSE2 version of S32_Blend_BlitRow32()
18 * portable version is in core/SkBlitRow_D32.cpp 19 * portable version is in core/SkBlitRow_D32.cpp
19 */ 20 */
20 void S32_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, 21 void S32_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst,
21 const SkPMColor* SK_RESTRICT src, 22 const SkPMColor* SK_RESTRICT src,
22 int count, U8CPU alpha) { 23 int count, U8CPU alpha) {
(...skipping 1021 matching lines...) Expand 10 before | Expand all | Expand 10 after
1044 do { 1045 do {
1045 SkPMColor c = *src++; 1046 SkPMColor c = *src++;
1046 SkPMColorAssert(c); 1047 SkPMColorAssert(c);
1047 if (c) { 1048 if (c) {
1048 *dst = SkSrcOver32To16(c, *dst); 1049 *dst = SkSrcOver32To16(c, *dst);
1049 } 1050 }
1050 dst += 1; 1051 dst += 1;
1051 } while (--count != 0); 1052 } while (--count != 0);
1052 } 1053 }
1053 } 1054 }
1055
1056 /* SSE2 version of S32A_D565_Opaque_Dither()
1057 * portable version is in core/SkBlitRow_D16.cpp
1058 */
1059 void S32A_D565_Opaque_Dither_SSE(uint16_t* SK_RESTRICT dst,
1060 const SkPMColor* SK_RESTRICT src,
1061 int count, U8CPU alpha, int x, int y) {
1062 SkASSERT(255 == alpha);
1063
1064 if (count <= 0) {
1065 return;
1066 }
1067
1068 if (count >= 8) {
1069 while (((size_t)dst & 0x0F) != 0) {
1070 DITHER_565_SCAN(y);
1071 SkPMColor c = *src++;
1072 SkPMColorAssert(c);
1073 if (c) {
1074 unsigned a = SkGetPackedA32(c);
1075
1076 int d = SkAlphaMul(DITHER_VALUE(x), SkAlpha255To256(a));
1077
1078 unsigned sr = SkGetPackedR32(c);
1079 unsigned sg = SkGetPackedG32(c);
1080 unsigned sb = SkGetPackedB32(c);
1081 sr = SkDITHER_R32_FOR_565(sr, d);
1082 sg = SkDITHER_G32_FOR_565(sg, d);
1083 sb = SkDITHER_B32_FOR_565(sb, d);
1084
1085 uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2);
1086 uint32_t dst_expanded = SkExpand_rgb_16(*dst);
1087 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3);
1088 // now src and dst expanded are in g:11 r:10 x:1 b:10
1089 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);
1090 }
1091 dst += 1;
1092 DITHER_INC_X(x);
1093 count--;
1094 }
1095
1096 unsigned short dither_value[8];
1097 __m128i dither, dither_cur;
1098 #ifdef ENABLE_DITHER_MATRIX_4X4
1099 const uint8_t* dither_scan = gDitherMatrix_3Bit_4X4[(y) & 3];
1100 dither_value[0] = dither_value[4] = dither_scan[(x) & 3];
1101 dither_value[1] = dither_value[5] = dither_scan[(x + 1) & 3];
1102 dither_value[2] = dither_value[6] = dither_scan[(x + 2) & 3];
1103 dither_value[3] = dither_value[7] = dither_scan[(x + 3) & 3];
1104 #else
1105 const uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];
1106 dither_value[0] = dither_value[4] = (dither_scan
1107 >> (((x) & 3) << 2)) & 0xF;
1108 dither_value[1] = dither_value[5] = (dither_scan
1109 >> (((x + 1) & 3) << 2)) & 0xF;
1110 dither_value[2] = dither_value[6] = (dither_scan
1111 >> (((x + 2) & 3) << 2)) & 0xF;
1112 dither_value[3] = dither_value[7] = (dither_scan
1113 >> (((x + 3) & 3) << 2)) & 0xF;
1114 #endif
1115 dither = _mm_loadu_si128((__m128i*) dither_value);
1116
1117 const __m128i* s = reinterpret_cast<const __m128i*>(src);
1118 __m128i* d = reinterpret_cast<__m128i*>(dst);
1119 __m128i var256 = _mm_set1_epi16(256);
1120 __m128i r16_mask = _mm_set1_epi16(SK_R16_MASK);
1121 __m128i g16_mask = _mm_set1_epi16(SK_G16_MASK);
1122 __m128i b16_mask = _mm_set1_epi16(SK_B16_MASK);
1123
1124 while (count >= 8) {
1125 // Load 8 pixels of src and dst.
1126 __m128i src_pixel1 = _mm_loadu_si128(s++);
1127 __m128i src_pixel2 = _mm_loadu_si128(s++);
1128 __m128i dst_pixel = _mm_load_si128(d);
1129
1130 // Extract A from src.
1131 __m128i sa1 = _mm_slli_epi32(src_pixel1,(24 - SK_A32_SHIFT));
1132 sa1 = _mm_srli_epi32(sa1, 24);
1133 __m128i sa2 = _mm_slli_epi32(src_pixel2,(24 - SK_A32_SHIFT));
1134 sa2 = _mm_srli_epi32(sa2, 24);
1135 __m128i sa = _mm_packs_epi32(sa1, sa2);
1136
1137 // Calculate current dither value.
1138 dither_cur = _mm_mullo_epi16(dither,
1139 _mm_add_epi16(sa, _mm_set1_epi16(1)));
1140 dither_cur = _mm_srli_epi16(dither_cur, 8);
1141
1142 // Extract R from src.
1143 __m128i sr1 = _mm_slli_epi32(src_pixel1, (24 - SK_R32_SHIFT));
1144 sr1 = _mm_srli_epi32(sr1, 24);
1145 __m128i sr2 = _mm_slli_epi32(src_pixel2, (24 - SK_R32_SHIFT));
1146 sr2 = _mm_srli_epi32(sr2, 24);
1147 __m128i sr = _mm_packs_epi32(sr1, sr2);
1148
1149 // SkDITHER_R32_FOR_565(sr, d)
1150 __m128i sr_offset = _mm_srli_epi16(sr, 5);
1151 sr = _mm_add_epi16(sr, dither_cur);
1152 sr = _mm_sub_epi16(sr, sr_offset);
1153
1154 // Expand sr.
1155 sr = _mm_slli_epi16(sr, 2);
1156
1157 // Extract G from src.
1158 __m128i sg1 = _mm_slli_epi32(src_pixel1, (24 - SK_G32_SHIFT));
1159 sg1 = _mm_srli_epi32(sg1, 24);
1160 __m128i sg2 = _mm_slli_epi32(src_pixel2, (24 - SK_G32_SHIFT));
1161 sg2 = _mm_srli_epi32(sg2, 24);
1162 __m128i sg = _mm_packs_epi32(sg1, sg2);
1163
1164 // sg = SkDITHER_G32_FOR_565(sg, d).
1165 __m128i sg_offset = _mm_srli_epi16(sg, 6);
1166 sg = _mm_add_epi16(sg, _mm_srli_epi16(dither_cur, 1));
1167 sg = _mm_sub_epi16(sg, sg_offset);
1168
1169 // Expand sg.
1170 sg = _mm_slli_epi16(sg, 3);
1171
1172 // Extract B from src.
1173 __m128i sb1 = _mm_slli_epi32(src_pixel1, (24 - SK_B32_SHIFT));
1174 sb1 = _mm_srli_epi32(sb1, 24);
1175 __m128i sb2 = _mm_slli_epi32(src_pixel2, (24 - SK_B32_SHIFT));
1176 sb2 = _mm_srli_epi32(sb2, 24);
1177 __m128i sb = _mm_packs_epi32(sb1, sb2);
1178
1179 // sb = SkDITHER_B32_FOR_565(sb, d).
1180 __m128i sb_offset = _mm_srli_epi16(sb, 5);
1181 sb = _mm_add_epi16(sb, dither_cur);
1182 sb = _mm_sub_epi16(sb, sb_offset);
1183
1184 // Expand sb.
1185 sb = _mm_slli_epi16(sb, 2);
1186
1187 // Extract R G B from dst.
1188 __m128i dr = _mm_srli_epi16(dst_pixel, SK_R16_SHIFT);
1189 dr = _mm_and_si128(dr, r16_mask);
1190 __m128i dg = _mm_srli_epi16(dst_pixel, SK_G16_SHIFT);
1191 dg = _mm_and_si128(dg, g16_mask);
1192 __m128i db = _mm_srli_epi16(dst_pixel, SK_B16_SHIFT);
1193 db = _mm_and_si128(db, b16_mask);
1194
1195 // SkAlpha255To256(255 - a) >> 3
1196 __m128i isa = _mm_sub_epi16(var256, sa);
1197 isa = _mm_srli_epi16(isa, 3);
1198
1199 dr = _mm_mullo_epi16(dr, isa);
1200 dr = _mm_add_epi16(dr, sr);
1201 dr = _mm_srli_epi16(dr, 5);
1202
1203 dg = _mm_mullo_epi16(dg, isa);
1204 dg = _mm_add_epi16(dg, sg);
1205 dg = _mm_srli_epi16(dg, 5);
1206
1207 db = _mm_mullo_epi16(db, isa);
1208 db = _mm_add_epi16(db, sb);
1209 db = _mm_srli_epi16(db, 5);
1210
1211 // Package and store dst pixel.
1212 __m128i d_pixel = SkPackRGB16_SSE(dr, dg, db);
1213 _mm_store_si128(d++, d_pixel);
1214
1215 count -= 8;
1216 x += 8;
1217 }
1218
1219 src = reinterpret_cast<const SkPMColor*>(s);
1220 dst = reinterpret_cast<uint16_t*>(d);
1221 }
1222
1223 if (count > 0) {
1224 DITHER_565_SCAN(y);
1225 do {
1226 SkPMColor c = *src++;
1227 SkPMColorAssert(c);
1228 if (c) {
1229 unsigned a = SkGetPackedA32(c);
1230
1231 int d = SkAlphaMul(DITHER_VALUE(x), SkAlpha255To256(a));
1232
1233 unsigned sr = SkGetPackedR32(c);
1234 unsigned sg = SkGetPackedG32(c);
1235 unsigned sb = SkGetPackedB32(c);
1236 sr = SkDITHER_R32_FOR_565(sr, d);
1237 sg = SkDITHER_G32_FOR_565(sg, d);
1238 sb = SkDITHER_B32_FOR_565(sb, d);
1239
1240 uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2);
1241 uint32_t dst_expanded = SkExpand_rgb_16(*dst);
1242 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3);
1243 // now src and dst expanded are in g:11 r:10 x:1 b:10
1244 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);
1245 }
1246 dst += 1;
1247 DITHER_INC_X(x);
1248 } while (--count != 0);
1249 }
1250 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698