Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(885)

Side by Side Diff: src/opts/SkBlitRow_opts_SSE2.cpp

Issue 179443003: SSE2 implementation of S32A_D565_Opaque_Dither (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: fix style Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkBlitRow_opts_SSE2.h ('k') | src/opts/opts_check_SSE2.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2012 The Android Open Source Project 2 * Copyright 2012 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 8
9 #include "SkBlitRow_opts_SSE2.h" 9 #include "SkBlitRow_opts_SSE2.h"
10 #include "SkBitmapProcState_opts_SSE2.h" 10 #include "SkBitmapProcState_opts_SSE2.h"
(...skipping 1148 matching lines...) Expand 10 before | Expand all | Expand 10 after
1159 do { 1159 do {
1160 SkPMColor c = *src++; 1160 SkPMColor c = *src++;
1161 SkPMColorAssert(c); 1161 SkPMColorAssert(c);
1162 1162
1163 unsigned dither = DITHER_VALUE(x); 1163 unsigned dither = DITHER_VALUE(x);
1164 *dst++ = SkDitherRGB32To565(c, dither); 1164 *dst++ = SkDitherRGB32To565(c, dither);
1165 DITHER_INC_X(x); 1165 DITHER_INC_X(x);
1166 } while (--count != 0); 1166 } while (--count != 0);
1167 } 1167 }
1168 } 1168 }
1169
1170 /* SSE2 version of S32A_D565_Opaque_Dither()
1171 * portable version is in core/SkBlitRow_D16.cpp
1172 */
1173 void S32A_D565_Opaque_Dither_SSE2(uint16_t* SK_RESTRICT dst,
1174 const SkPMColor* SK_RESTRICT src,
1175 int count, U8CPU alpha, int x, int y) {
1176 SkASSERT(255 == alpha);
1177
1178 if (count <= 0) {
1179 return;
1180 }
1181
1182 if (count >= 8) {
1183 while (((size_t)dst & 0x0F) != 0) {
1184 DITHER_565_SCAN(y);
1185 SkPMColor c = *src++;
1186 SkPMColorAssert(c);
1187 if (c) {
1188 unsigned a = SkGetPackedA32(c);
1189
1190 int d = SkAlphaMul(DITHER_VALUE(x), SkAlpha255To256(a));
1191
1192 unsigned sr = SkGetPackedR32(c);
1193 unsigned sg = SkGetPackedG32(c);
1194 unsigned sb = SkGetPackedB32(c);
1195 sr = SkDITHER_R32_FOR_565(sr, d);
1196 sg = SkDITHER_G32_FOR_565(sg, d);
1197 sb = SkDITHER_B32_FOR_565(sb, d);
1198
1199 uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2);
1200 uint32_t dst_expanded = SkExpand_rgb_16(*dst);
1201 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3);
1202 // now src and dst expanded are in g:11 r:10 x:1 b:10
1203 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);
1204 }
1205 dst += 1;
1206 DITHER_INC_X(x);
1207 count--;
1208 }
1209
1210 unsigned short dither_value[8];
1211 __m128i dither, dither_cur;
1212 #ifdef ENABLE_DITHER_MATRIX_4X4
1213 const uint8_t* dither_scan = gDitherMatrix_3Bit_4X4[(y) & 3];
1214 dither_value[0] = dither_value[4] = dither_scan[(x) & 3];
1215 dither_value[1] = dither_value[5] = dither_scan[(x + 1) & 3];
1216 dither_value[2] = dither_value[6] = dither_scan[(x + 2) & 3];
1217 dither_value[3] = dither_value[7] = dither_scan[(x + 3) & 3];
1218 #else
1219 const uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3];
1220 dither_value[0] = dither_value[4] = (dither_scan
1221 >> (((x) & 3) << 2)) & 0xF;
1222 dither_value[1] = dither_value[5] = (dither_scan
1223 >> (((x + 1) & 3) << 2)) & 0xF;
1224 dither_value[2] = dither_value[6] = (dither_scan
1225 >> (((x + 2) & 3) << 2)) & 0xF;
1226 dither_value[3] = dither_value[7] = (dither_scan
1227 >> (((x + 3) & 3) << 2)) & 0xF;
1228 #endif
1229 dither = _mm_loadu_si128((__m128i*) dither_value);
1230
1231 const __m128i* s = reinterpret_cast<const __m128i*>(src);
1232 __m128i* d = reinterpret_cast<__m128i*>(dst);
1233 __m128i var256 = _mm_set1_epi16(256);
1234 __m128i r16_mask = _mm_set1_epi16(SK_R16_MASK);
1235 __m128i g16_mask = _mm_set1_epi16(SK_G16_MASK);
1236 __m128i b16_mask = _mm_set1_epi16(SK_B16_MASK);
1237
1238 while (count >= 8) {
1239 // Load 8 pixels of src and dst.
1240 __m128i src_pixel1 = _mm_loadu_si128(s++);
1241 __m128i src_pixel2 = _mm_loadu_si128(s++);
1242 __m128i dst_pixel = _mm_load_si128(d);
1243
1244 // Extract A from src.
1245 __m128i sa1 = _mm_slli_epi32(src_pixel1,(24 - SK_A32_SHIFT));
1246 sa1 = _mm_srli_epi32(sa1, 24);
1247 __m128i sa2 = _mm_slli_epi32(src_pixel2,(24 - SK_A32_SHIFT));
1248 sa2 = _mm_srli_epi32(sa2, 24);
1249 __m128i sa = _mm_packs_epi32(sa1, sa2);
1250
1251 // Calculate current dither value.
1252 dither_cur = _mm_mullo_epi16(dither,
1253 _mm_add_epi16(sa, _mm_set1_epi16(1)));
1254 dither_cur = _mm_srli_epi16(dither_cur, 8);
1255
1256 // Extract R from src.
1257 __m128i sr1 = _mm_slli_epi32(src_pixel1, (24 - SK_R32_SHIFT));
1258 sr1 = _mm_srli_epi32(sr1, 24);
1259 __m128i sr2 = _mm_slli_epi32(src_pixel2, (24 - SK_R32_SHIFT));
1260 sr2 = _mm_srli_epi32(sr2, 24);
1261 __m128i sr = _mm_packs_epi32(sr1, sr2);
1262
1263 // SkDITHER_R32_FOR_565(sr, d)
1264 __m128i sr_offset = _mm_srli_epi16(sr, 5);
1265 sr = _mm_add_epi16(sr, dither_cur);
1266 sr = _mm_sub_epi16(sr, sr_offset);
1267
1268 // Expand sr.
1269 sr = _mm_slli_epi16(sr, 2);
1270
1271 // Extract G from src.
1272 __m128i sg1 = _mm_slli_epi32(src_pixel1, (24 - SK_G32_SHIFT));
1273 sg1 = _mm_srli_epi32(sg1, 24);
1274 __m128i sg2 = _mm_slli_epi32(src_pixel2, (24 - SK_G32_SHIFT));
1275 sg2 = _mm_srli_epi32(sg2, 24);
1276 __m128i sg = _mm_packs_epi32(sg1, sg2);
1277
1278 // sg = SkDITHER_G32_FOR_565(sg, d).
1279 __m128i sg_offset = _mm_srli_epi16(sg, 6);
1280 sg = _mm_add_epi16(sg, _mm_srli_epi16(dither_cur, 1));
1281 sg = _mm_sub_epi16(sg, sg_offset);
1282
1283 // Expand sg.
1284 sg = _mm_slli_epi16(sg, 3);
1285
1286 // Extract B from src.
1287 __m128i sb1 = _mm_slli_epi32(src_pixel1, (24 - SK_B32_SHIFT));
1288 sb1 = _mm_srli_epi32(sb1, 24);
1289 __m128i sb2 = _mm_slli_epi32(src_pixel2, (24 - SK_B32_SHIFT));
1290 sb2 = _mm_srli_epi32(sb2, 24);
1291 __m128i sb = _mm_packs_epi32(sb1, sb2);
1292
1293 // sb = SkDITHER_B32_FOR_565(sb, d).
1294 __m128i sb_offset = _mm_srli_epi16(sb, 5);
1295 sb = _mm_add_epi16(sb, dither_cur);
1296 sb = _mm_sub_epi16(sb, sb_offset);
1297
1298 // Expand sb.
1299 sb = _mm_slli_epi16(sb, 2);
1300
1301 // Extract R G B from dst.
1302 __m128i dr = _mm_srli_epi16(dst_pixel, SK_R16_SHIFT);
1303 dr = _mm_and_si128(dr, r16_mask);
1304 __m128i dg = _mm_srli_epi16(dst_pixel, SK_G16_SHIFT);
1305 dg = _mm_and_si128(dg, g16_mask);
1306 __m128i db = _mm_srli_epi16(dst_pixel, SK_B16_SHIFT);
1307 db = _mm_and_si128(db, b16_mask);
1308
1309 // SkAlpha255To256(255 - a) >> 3
1310 __m128i isa = _mm_sub_epi16(var256, sa);
1311 isa = _mm_srli_epi16(isa, 3);
1312
1313 dr = _mm_mullo_epi16(dr, isa);
1314 dr = _mm_add_epi16(dr, sr);
1315 dr = _mm_srli_epi16(dr, 5);
1316
1317 dg = _mm_mullo_epi16(dg, isa);
1318 dg = _mm_add_epi16(dg, sg);
1319 dg = _mm_srli_epi16(dg, 5);
1320
1321 db = _mm_mullo_epi16(db, isa);
1322 db = _mm_add_epi16(db, sb);
1323 db = _mm_srli_epi16(db, 5);
1324
1325 // Package and store dst pixel.
1326 __m128i d_pixel = SkPackRGB16_SSE(dr, dg, db);
1327 _mm_store_si128(d++, d_pixel);
1328
1329 count -= 8;
1330 x += 8;
1331 }
1332
1333 src = reinterpret_cast<const SkPMColor*>(s);
1334 dst = reinterpret_cast<uint16_t*>(d);
1335 }
1336
1337 if (count > 0) {
1338 DITHER_565_SCAN(y);
1339 do {
1340 SkPMColor c = *src++;
1341 SkPMColorAssert(c);
1342 if (c) {
1343 unsigned a = SkGetPackedA32(c);
1344
1345 int d = SkAlphaMul(DITHER_VALUE(x), SkAlpha255To256(a));
1346
1347 unsigned sr = SkGetPackedR32(c);
1348 unsigned sg = SkGetPackedG32(c);
1349 unsigned sb = SkGetPackedB32(c);
1350 sr = SkDITHER_R32_FOR_565(sr, d);
1351 sg = SkDITHER_G32_FOR_565(sg, d);
1352 sb = SkDITHER_B32_FOR_565(sb, d);
1353
1354 uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2);
1355 uint32_t dst_expanded = SkExpand_rgb_16(*dst);
1356 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3);
1357 // now src and dst expanded are in g:11 r:10 x:1 b:10
1358 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);
1359 }
1360 dst += 1;
1361 DITHER_INC_X(x);
1362 } while (--count != 0);
1363 }
1364 }
OLDNEW
« no previous file with comments | « src/opts/SkBlitRow_opts_SSE2.h ('k') | src/opts/opts_check_SSE2.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698