OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2012 The Android Open Source Project | 2 * Copyright 2012 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include <emmintrin.h> | 8 #include <emmintrin.h> |
9 #include "SkBitmapProcState_opts_SSE2.h" | 9 #include "SkBitmapProcState_opts_SSE2.h" |
10 #include "SkBlitRow_opts_SSE2.h" | 10 #include "SkBlitRow_opts_SSE2.h" |
(...skipping 271 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
282 | 282 |
283 while (count > 0) { | 283 while (count > 0) { |
284 *dst = color + SkAlphaMulQ(*src, scale); | 284 *dst = color + SkAlphaMulQ(*src, scale); |
285 src += 1; | 285 src += 1; |
286 dst += 1; | 286 dst += 1; |
287 count--; | 287 count--; |
288 } | 288 } |
289 } | 289 } |
290 } | 290 } |
291 | 291 |
| 292 void Color32A_D565_SSE2(uint16_t dst[], SkPMColor src, int count, int x, int y)
{ |
| 293 SkASSERT(count > 0); |
| 294 |
| 295 uint32_t src_expand = (SkGetPackedG32(src) << 24) | |
| 296 (SkGetPackedR32(src) << 13) | |
| 297 (SkGetPackedB32(src) << 2); |
| 298 unsigned scale = SkAlpha255To256(0xFF - SkGetPackedA32(src)) >> 3; |
| 299 |
| 300 // Check if we have enough pixels to run SIMD |
| 301 if (count >= (int)(8 + (((16 - (size_t)dst) & 0x0F) >> 1))) { |
| 302 __m128i* dst_wide; |
| 303 const __m128i src_R_wide = _mm_set1_epi16(SkGetPackedR32(src) << 2); |
| 304 const __m128i src_G_wide = _mm_set1_epi16(SkGetPackedG32(src) << 3); |
| 305 const __m128i src_B_wide = _mm_set1_epi16(SkGetPackedB32(src) << 2); |
| 306 const __m128i scale_wide = _mm_set1_epi16(scale); |
| 307 const __m128i mask_blue = _mm_set1_epi16(SK_B16_MASK); |
| 308 const __m128i mask_green = _mm_set1_epi16(SK_G16_MASK << SK_G16_SHIFT); |
| 309 |
| 310 // Align dst to an even 16 byte address (0-7 pixels) |
| 311 while (((((size_t)dst) & 0x0F) != 0) && (count > 0)) { |
| 312 *dst = SkBlend32_RGB16(src_expand, *dst, scale); |
| 313 dst += 1; |
| 314 count--; |
| 315 } |
| 316 |
| 317 dst_wide = reinterpret_cast<__m128i*>(dst); |
| 318 do { |
| 319 // Load eight RGB565 pixels |
| 320 __m128i pixels = _mm_load_si128(dst_wide); |
| 321 |
| 322 // Mask out sub-pixels |
| 323 __m128i pixel_R = _mm_srli_epi16(pixels, SK_R16_SHIFT); |
| 324 __m128i pixel_G = _mm_slli_epi16(pixels, SK_R16_BITS); |
| 325 pixel_G = _mm_srli_epi16(pixel_G, SK_R16_BITS + SK_B16_BITS); |
| 326 __m128i pixel_B = _mm_and_si128(pixels, mask_blue); |
| 327 |
| 328 // Scale with alpha |
| 329 pixel_R = _mm_mullo_epi16(pixel_R, scale_wide); |
| 330 pixel_G = _mm_mullo_epi16(pixel_G, scale_wide); |
| 331 pixel_B = _mm_mullo_epi16(pixel_B, scale_wide); |
| 332 |
| 333 // Add src_X_wide and shift down again |
| 334 pixel_R = _mm_add_epi16(pixel_R, src_R_wide); |
| 335 pixel_R = _mm_srli_epi16(pixel_R, 5); |
| 336 pixel_G = _mm_add_epi16(pixel_G, src_G_wide); |
| 337 pixel_B = _mm_add_epi16(pixel_B, src_B_wide); |
| 338 pixel_B = _mm_srli_epi16(pixel_B, 5); |
| 339 |
| 340 // Combine into RGB565 and store |
| 341 pixel_R = _mm_slli_epi16(pixel_R, SK_R16_SHIFT); |
| 342 pixel_G = _mm_and_si128(pixel_G, mask_green); |
| 343 pixels = _mm_or_si128(pixel_R, pixel_G); |
| 344 pixels = _mm_or_si128(pixels, pixel_B); |
| 345 _mm_store_si128(dst_wide, pixels); |
| 346 count -= 8; |
| 347 dst_wide++; |
| 348 } while (count >= 8); |
| 349 |
| 350 dst = reinterpret_cast<uint16_t*>(dst_wide); |
| 351 } |
| 352 |
| 353 // Small loop to handle remaining pixels. |
| 354 while (count > 0) { |
| 355 *dst = SkBlend32_RGB16(src_expand, *dst, scale); |
| 356 dst += 1; |
| 357 count--; |
| 358 } |
| 359 } |
| 360 |
292 void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* maskPtr, | 361 void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* maskPtr, |
293 size_t maskRB, SkColor origColor, | 362 size_t maskRB, SkColor origColor, |
294 int width, int height) { | 363 int width, int height) { |
295 SkPMColor color = SkPreMultiplyColor(origColor); | 364 SkPMColor color = SkPreMultiplyColor(origColor); |
296 size_t dstOffset = dstRB - (width << 2); | 365 size_t dstOffset = dstRB - (width << 2); |
297 size_t maskOffset = maskRB - width; | 366 size_t maskOffset = maskRB - width; |
298 SkPMColor* dst = (SkPMColor *)device; | 367 SkPMColor* dst = (SkPMColor *)device; |
299 const uint8_t* mask = (const uint8_t*)maskPtr; | 368 const uint8_t* mask = (const uint8_t*)maskPtr; |
300 do { | 369 do { |
301 int count = width; | 370 int count = width; |
(...skipping 844 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1146 uint32_t dst_expanded = SkExpand_rgb_16(*dst); | 1215 uint32_t dst_expanded = SkExpand_rgb_16(*dst); |
1147 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); | 1216 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); |
1148 // now src and dst expanded are in g:11 r:10 x:1 b:10 | 1217 // now src and dst expanded are in g:11 r:10 x:1 b:10 |
1149 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); | 1218 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); |
1150 } | 1219 } |
1151 dst += 1; | 1220 dst += 1; |
1152 DITHER_INC_X(x); | 1221 DITHER_INC_X(x); |
1153 } while (--count != 0); | 1222 } while (--count != 0); |
1154 } | 1223 } |
1155 } | 1224 } |
OLD | NEW |