| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2012 The Android Open Source Project | 2 * Copyright 2012 The Android Open Source Project |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 | 8 #include <emmintrin.h> |
| 9 #include "SkBitmapProcState_opts_SSE2.h" |
| 9 #include "SkBlitRow_opts_SSE2.h" | 10 #include "SkBlitRow_opts_SSE2.h" |
| 10 #include "SkBitmapProcState_opts_SSE2.h" | |
| 11 #include "SkColorPriv.h" | 11 #include "SkColorPriv.h" |
| 12 #include "SkColor_opts_SSE2.h" | 12 #include "SkColor_opts_SSE2.h" |
| 13 #include "SkDither.h" | 13 #include "SkDither.h" |
| 14 #include "SkUtils.h" | 14 #include "SkUtils.h" |
| 15 | 15 |
| 16 #include <emmintrin.h> | |
| 17 | |
| 18 /* SSE2 version of S32_Blend_BlitRow32() | 16 /* SSE2 version of S32_Blend_BlitRow32() |
| 19 * portable version is in core/SkBlitRow_D32.cpp | 17 * portable version is in core/SkBlitRow_D32.cpp |
| 20 */ | 18 */ |
| 21 void S32_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, | 19 void S32_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, |
| 22 const SkPMColor* SK_RESTRICT src, | 20 const SkPMColor* SK_RESTRICT src, |
| 23 int count, U8CPU alpha) { | 21 int count, U8CPU alpha) { |
| 24 SkASSERT(alpha <= 255); | 22 SkASSERT(alpha <= 255); |
| 25 if (count <= 0) { | 23 if (count <= 0) { |
| 26 return; | 24 return; |
| 27 } | 25 } |
| (...skipping 144 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 172 // Combine back into RGBA. | 170 // Combine back into RGBA. |
| 173 dst_pixel = _mm_or_si128(dst_rb, dst_ag); | 171 dst_pixel = _mm_or_si128(dst_rb, dst_ag); |
| 174 | 172 |
| 175 // Add result | 173 // Add result |
| 176 __m128i result = _mm_add_epi8(src_pixel, dst_pixel); | 174 __m128i result = _mm_add_epi8(src_pixel, dst_pixel); |
| 177 _mm_store_si128(d, result); | 175 _mm_store_si128(d, result); |
| 178 s++; | 176 s++; |
| 179 d++; | 177 d++; |
| 180 count -= 4; | 178 count -= 4; |
| 181 } | 179 } |
| 182 #else | 180 #else |
| 183 __m128i rb_mask = _mm_set1_epi32(0x00FF00FF); | 181 __m128i rb_mask = _mm_set1_epi32(0x00FF00FF); |
| 184 __m128i c_256 = _mm_set1_epi16(0x0100); // 8 copies of 256 (16-bit) | 182 __m128i c_256 = _mm_set1_epi16(0x0100); // 8 copies of 256 (16-bit) |
| 185 while (count >= 4) { | 183 while (count >= 4) { |
| 186 // Load 4 pixels | 184 // Load 4 pixels |
| 187 __m128i src_pixel = _mm_loadu_si128(s); | 185 __m128i src_pixel = _mm_loadu_si128(s); |
| 188 __m128i dst_pixel = _mm_load_si128(d); | 186 __m128i dst_pixel = _mm_load_si128(d); |
| 189 | 187 |
| 190 __m128i dst_rb = _mm_and_si128(rb_mask, dst_pixel); | 188 __m128i dst_rb = _mm_and_si128(rb_mask, dst_pixel); |
| 191 __m128i dst_ag = _mm_srli_epi16(dst_pixel, 8); | 189 __m128i dst_ag = _mm_srli_epi16(dst_pixel, 8); |
| 192 | 190 |
| (...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 335 dst++; | 333 dst++; |
| 336 count--; | 334 count--; |
| 337 } | 335 } |
| 338 } | 336 } |
| 339 | 337 |
| 340 /* SSE2 version of Color32() | 338 /* SSE2 version of Color32() |
| 341 * portable version is in core/SkBlitRow_D32.cpp | 339 * portable version is in core/SkBlitRow_D32.cpp |
| 342 */ | 340 */ |
| 343 void Color32_SSE2(SkPMColor dst[], const SkPMColor src[], int count, | 341 void Color32_SSE2(SkPMColor dst[], const SkPMColor src[], int count, |
| 344 SkPMColor color) { | 342 SkPMColor color) { |
| 345 | |
| 346 if (count <= 0) { | 343 if (count <= 0) { |
| 347 return; | 344 return; |
| 348 } | 345 } |
| 349 | 346 |
| 350 if (0 == color) { | 347 if (0 == color) { |
| 351 if (src != dst) { | 348 if (src != dst) { |
| 352 memcpy(dst, src, count * sizeof(SkPMColor)); | 349 memcpy(dst, src, count * sizeof(SkPMColor)); |
| 353 } | 350 } |
| 354 return; | 351 return; |
| 355 } | 352 } |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 399 __m128i result = _mm_add_epi8(color_wide, src_pixel); | 396 __m128i result = _mm_add_epi8(color_wide, src_pixel); |
| 400 | 397 |
| 401 // Store result. | 398 // Store result. |
| 402 _mm_store_si128(d, result); | 399 _mm_store_si128(d, result); |
| 403 s++; | 400 s++; |
| 404 d++; | 401 d++; |
| 405 count -= 4; | 402 count -= 4; |
| 406 } | 403 } |
| 407 src = reinterpret_cast<const SkPMColor*>(s); | 404 src = reinterpret_cast<const SkPMColor*>(s); |
| 408 dst = reinterpret_cast<SkPMColor*>(d); | 405 dst = reinterpret_cast<SkPMColor*>(d); |
| 409 } | 406 } |
| 410 | 407 |
| 411 while (count > 0) { | 408 while (count > 0) { |
| 412 *dst = color + SkAlphaMulQ(*src, scale); | 409 *dst = color + SkAlphaMulQ(*src, scale); |
| 413 src += 1; | 410 src += 1; |
| 414 dst += 1; | 411 dst += 1; |
| 415 count--; | 412 count--; |
| 416 } | 413 } |
| 417 } | 414 } |
| 418 } | 415 } |
| 419 | 416 |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 497 // Add two pixels into result. | 494 // Add two pixels into result. |
| 498 __m128i result = _mm_add_epi8(tmp_src_pixel, dst_pixel); | 495 __m128i result = _mm_add_epi8(tmp_src_pixel, dst_pixel); |
| 499 _mm_store_si128(d, result); | 496 _mm_store_si128(d, result); |
| 500 // load the next 4 pixel | 497 // load the next 4 pixel |
| 501 mask = mask + 4; | 498 mask = mask + 4; |
| 502 d++; | 499 d++; |
| 503 count -= 4; | 500 count -= 4; |
| 504 } | 501 } |
| 505 dst = reinterpret_cast<SkPMColor *>(d); | 502 dst = reinterpret_cast<SkPMColor *>(d); |
| 506 } | 503 } |
| 507 while(count > 0) { | 504 while (count > 0) { |
| 508 *dst= SkBlendARGB32(color, *dst, *mask); | 505 *dst= SkBlendARGB32(color, *dst, *mask); |
| 509 dst += 1; | 506 dst += 1; |
| 510 mask++; | 507 mask++; |
| 511 count --; | 508 count --; |
| 512 } | 509 } |
| 513 dst = (SkPMColor *)((char*)dst + dstOffset); | 510 dst = (SkPMColor *)((char*)dst + dstOffset); |
| 514 mask += maskOffset; | 511 mask += maskOffset; |
| 515 } while (--height != 0); | 512 } while (--height != 0); |
| 516 } | 513 } |
| 517 | 514 |
| (...skipping 837 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1355 uint32_t dst_expanded = SkExpand_rgb_16(*dst); | 1352 uint32_t dst_expanded = SkExpand_rgb_16(*dst); |
| 1356 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); | 1353 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); |
| 1357 // now src and dst expanded are in g:11 r:10 x:1 b:10 | 1354 // now src and dst expanded are in g:11 r:10 x:1 b:10 |
| 1358 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); | 1355 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); |
| 1359 } | 1356 } |
| 1360 dst += 1; | 1357 dst += 1; |
| 1361 DITHER_INC_X(x); | 1358 DITHER_INC_X(x); |
| 1362 } while (--count != 0); | 1359 } while (--count != 0); |
| 1363 } | 1360 } |
| 1364 } | 1361 } |
| OLD | NEW |