OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2012 The Android Open Source Project | 2 * Copyright 2012 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 | 8 |
9 #include "SkBlitRow_opts_SSE2.h" | 9 #include "SkBlitRow_opts_SSE2.h" |
10 #include "SkBitmapProcState_opts_SSE2.h" | 10 #include "SkBitmapProcState_opts_SSE2.h" |
(...skipping 1148 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1159 do { | 1159 do { |
1160 SkPMColor c = *src++; | 1160 SkPMColor c = *src++; |
1161 SkPMColorAssert(c); | 1161 SkPMColorAssert(c); |
1162 | 1162 |
1163 unsigned dither = DITHER_VALUE(x); | 1163 unsigned dither = DITHER_VALUE(x); |
1164 *dst++ = SkDitherRGB32To565(c, dither); | 1164 *dst++ = SkDitherRGB32To565(c, dither); |
1165 DITHER_INC_X(x); | 1165 DITHER_INC_X(x); |
1166 } while (--count != 0); | 1166 } while (--count != 0); |
1167 } | 1167 } |
1168 } | 1168 } |
| 1169 |
| 1170 /* SSE2 version of S32A_D565_Opaque_Dither() |
| 1171 * portable version is in core/SkBlitRow_D16.cpp |
| 1172 */ |
| 1173 void S32A_D565_Opaque_Dither_SSE2(uint16_t* SK_RESTRICT dst, |
| 1174 const SkPMColor* SK_RESTRICT src, |
| 1175 int count, U8CPU alpha, int x, int y) { |
| 1176 SkASSERT(255 == alpha); |
| 1177 |
| 1178 if (count <= 0) { |
| 1179 return; |
| 1180 } |
| 1181 |
| 1182 if (count >= 8) { |
| 1183 while (((size_t)dst & 0x0F) != 0) { |
| 1184 DITHER_565_SCAN(y); |
| 1185 SkPMColor c = *src++; |
| 1186 SkPMColorAssert(c); |
| 1187 if (c) { |
| 1188 unsigned a = SkGetPackedA32(c); |
| 1189 |
| 1190 int d = SkAlphaMul(DITHER_VALUE(x), SkAlpha255To256(a)); |
| 1191 |
| 1192 unsigned sr = SkGetPackedR32(c); |
| 1193 unsigned sg = SkGetPackedG32(c); |
| 1194 unsigned sb = SkGetPackedB32(c); |
| 1195 sr = SkDITHER_R32_FOR_565(sr, d); |
| 1196 sg = SkDITHER_G32_FOR_565(sg, d); |
| 1197 sb = SkDITHER_B32_FOR_565(sb, d); |
| 1198 |
| 1199 uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2); |
| 1200 uint32_t dst_expanded = SkExpand_rgb_16(*dst); |
| 1201 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); |
| 1202 // now src and dst expanded are in g:11 r:10 x:1 b:10 |
| 1203 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); |
| 1204 } |
| 1205 dst += 1; |
| 1206 DITHER_INC_X(x); |
| 1207 count--; |
| 1208 } |
| 1209 |
| 1210 unsigned short dither_value[8]; |
| 1211 __m128i dither, dither_cur; |
| 1212 #ifdef ENABLE_DITHER_MATRIX_4X4 |
| 1213 const uint8_t* dither_scan = gDitherMatrix_3Bit_4X4[(y) & 3]; |
| 1214 dither_value[0] = dither_value[4] = dither_scan[(x) & 3]; |
| 1215 dither_value[1] = dither_value[5] = dither_scan[(x + 1) & 3]; |
| 1216 dither_value[2] = dither_value[6] = dither_scan[(x + 2) & 3]; |
| 1217 dither_value[3] = dither_value[7] = dither_scan[(x + 3) & 3]; |
| 1218 #else |
| 1219 const uint16_t dither_scan = gDitherMatrix_3Bit_16[(y) & 3]; |
| 1220 dither_value[0] = dither_value[4] = (dither_scan |
| 1221 >> (((x) & 3) << 2)) & 0xF; |
| 1222 dither_value[1] = dither_value[5] = (dither_scan |
| 1223 >> (((x + 1) & 3) << 2)) & 0xF; |
| 1224 dither_value[2] = dither_value[6] = (dither_scan |
| 1225 >> (((x + 2) & 3) << 2)) & 0xF; |
| 1226 dither_value[3] = dither_value[7] = (dither_scan |
| 1227 >> (((x + 3) & 3) << 2)) & 0xF; |
| 1228 #endif |
| 1229 dither = _mm_loadu_si128((__m128i*) dither_value); |
| 1230 |
| 1231 const __m128i* s = reinterpret_cast<const __m128i*>(src); |
| 1232 __m128i* d = reinterpret_cast<__m128i*>(dst); |
| 1233 __m128i var256 = _mm_set1_epi16(256); |
| 1234 __m128i r16_mask = _mm_set1_epi16(SK_R16_MASK); |
| 1235 __m128i g16_mask = _mm_set1_epi16(SK_G16_MASK); |
| 1236 __m128i b16_mask = _mm_set1_epi16(SK_B16_MASK); |
| 1237 |
| 1238 while (count >= 8) { |
| 1239 // Load 8 pixels of src and dst. |
| 1240 __m128i src_pixel1 = _mm_loadu_si128(s++); |
| 1241 __m128i src_pixel2 = _mm_loadu_si128(s++); |
| 1242 __m128i dst_pixel = _mm_load_si128(d); |
| 1243 |
| 1244 // Extract A from src. |
| 1245 __m128i sa1 = _mm_slli_epi32(src_pixel1,(24 - SK_A32_SHIFT)); |
| 1246 sa1 = _mm_srli_epi32(sa1, 24); |
| 1247 __m128i sa2 = _mm_slli_epi32(src_pixel2,(24 - SK_A32_SHIFT)); |
| 1248 sa2 = _mm_srli_epi32(sa2, 24); |
| 1249 __m128i sa = _mm_packs_epi32(sa1, sa2); |
| 1250 |
| 1251 // Calculate current dither value. |
| 1252 dither_cur = _mm_mullo_epi16(dither, |
| 1253 _mm_add_epi16(sa, _mm_set1_epi16(1))); |
| 1254 dither_cur = _mm_srli_epi16(dither_cur, 8); |
| 1255 |
| 1256 // Extract R from src. |
| 1257 __m128i sr1 = _mm_slli_epi32(src_pixel1, (24 - SK_R32_SHIFT)); |
| 1258 sr1 = _mm_srli_epi32(sr1, 24); |
| 1259 __m128i sr2 = _mm_slli_epi32(src_pixel2, (24 - SK_R32_SHIFT)); |
| 1260 sr2 = _mm_srli_epi32(sr2, 24); |
| 1261 __m128i sr = _mm_packs_epi32(sr1, sr2); |
| 1262 |
| 1263 // SkDITHER_R32_FOR_565(sr, d) |
| 1264 __m128i sr_offset = _mm_srli_epi16(sr, 5); |
| 1265 sr = _mm_add_epi16(sr, dither_cur); |
| 1266 sr = _mm_sub_epi16(sr, sr_offset); |
| 1267 |
| 1268 // Expand sr. |
| 1269 sr = _mm_slli_epi16(sr, 2); |
| 1270 |
| 1271 // Extract G from src. |
| 1272 __m128i sg1 = _mm_slli_epi32(src_pixel1, (24 - SK_G32_SHIFT)); |
| 1273 sg1 = _mm_srli_epi32(sg1, 24); |
| 1274 __m128i sg2 = _mm_slli_epi32(src_pixel2, (24 - SK_G32_SHIFT)); |
| 1275 sg2 = _mm_srli_epi32(sg2, 24); |
| 1276 __m128i sg = _mm_packs_epi32(sg1, sg2); |
| 1277 |
| 1278 // sg = SkDITHER_G32_FOR_565(sg, d). |
| 1279 __m128i sg_offset = _mm_srli_epi16(sg, 6); |
| 1280 sg = _mm_add_epi16(sg, _mm_srli_epi16(dither_cur, 1)); |
| 1281 sg = _mm_sub_epi16(sg, sg_offset); |
| 1282 |
| 1283 // Expand sg. |
| 1284 sg = _mm_slli_epi16(sg, 3); |
| 1285 |
| 1286 // Extract B from src. |
| 1287 __m128i sb1 = _mm_slli_epi32(src_pixel1, (24 - SK_B32_SHIFT)); |
| 1288 sb1 = _mm_srli_epi32(sb1, 24); |
| 1289 __m128i sb2 = _mm_slli_epi32(src_pixel2, (24 - SK_B32_SHIFT)); |
| 1290 sb2 = _mm_srli_epi32(sb2, 24); |
| 1291 __m128i sb = _mm_packs_epi32(sb1, sb2); |
| 1292 |
| 1293 // sb = SkDITHER_B32_FOR_565(sb, d). |
| 1294 __m128i sb_offset = _mm_srli_epi16(sb, 5); |
| 1295 sb = _mm_add_epi16(sb, dither_cur); |
| 1296 sb = _mm_sub_epi16(sb, sb_offset); |
| 1297 |
| 1298 // Expand sb. |
| 1299 sb = _mm_slli_epi16(sb, 2); |
| 1300 |
| 1301 // Extract R G B from dst. |
| 1302 __m128i dr = _mm_srli_epi16(dst_pixel, SK_R16_SHIFT); |
| 1303 dr = _mm_and_si128(dr, r16_mask); |
| 1304 __m128i dg = _mm_srli_epi16(dst_pixel, SK_G16_SHIFT); |
| 1305 dg = _mm_and_si128(dg, g16_mask); |
| 1306 __m128i db = _mm_srli_epi16(dst_pixel, SK_B16_SHIFT); |
| 1307 db = _mm_and_si128(db, b16_mask); |
| 1308 |
| 1309 // SkAlpha255To256(255 - a) >> 3 |
| 1310 __m128i isa = _mm_sub_epi16(var256, sa); |
| 1311 isa = _mm_srli_epi16(isa, 3); |
| 1312 |
| 1313 dr = _mm_mullo_epi16(dr, isa); |
| 1314 dr = _mm_add_epi16(dr, sr); |
| 1315 dr = _mm_srli_epi16(dr, 5); |
| 1316 |
| 1317 dg = _mm_mullo_epi16(dg, isa); |
| 1318 dg = _mm_add_epi16(dg, sg); |
| 1319 dg = _mm_srli_epi16(dg, 5); |
| 1320 |
| 1321 db = _mm_mullo_epi16(db, isa); |
| 1322 db = _mm_add_epi16(db, sb); |
| 1323 db = _mm_srli_epi16(db, 5); |
| 1324 |
| 1325 // Package and store dst pixel. |
| 1326 __m128i d_pixel = SkPackRGB16_SSE(dr, dg, db); |
| 1327 _mm_store_si128(d++, d_pixel); |
| 1328 |
| 1329 count -= 8; |
| 1330 x += 8; |
| 1331 } |
| 1332 |
| 1333 src = reinterpret_cast<const SkPMColor*>(s); |
| 1334 dst = reinterpret_cast<uint16_t*>(d); |
| 1335 } |
| 1336 |
| 1337 if (count > 0) { |
| 1338 DITHER_565_SCAN(y); |
| 1339 do { |
| 1340 SkPMColor c = *src++; |
| 1341 SkPMColorAssert(c); |
| 1342 if (c) { |
| 1343 unsigned a = SkGetPackedA32(c); |
| 1344 |
| 1345 int d = SkAlphaMul(DITHER_VALUE(x), SkAlpha255To256(a)); |
| 1346 |
| 1347 unsigned sr = SkGetPackedR32(c); |
| 1348 unsigned sg = SkGetPackedG32(c); |
| 1349 unsigned sb = SkGetPackedB32(c); |
| 1350 sr = SkDITHER_R32_FOR_565(sr, d); |
| 1351 sg = SkDITHER_G32_FOR_565(sg, d); |
| 1352 sb = SkDITHER_B32_FOR_565(sb, d); |
| 1353 |
| 1354 uint32_t src_expanded = (sg << 24) | (sr << 13) | (sb << 2); |
| 1355 uint32_t dst_expanded = SkExpand_rgb_16(*dst); |
| 1356 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); |
| 1357 // now src and dst expanded are in g:11 r:10 x:1 b:10 |
| 1358 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); |
| 1359 } |
| 1360 dst += 1; |
| 1361 DITHER_INC_X(x); |
| 1362 } while (--count != 0); |
| 1363 } |
| 1364 } |
OLD | NEW |