| OLD | NEW | 
|---|
| 1 /* | 1 /* | 
| 2  * Copyright 2012 The Android Open Source Project | 2  * Copyright 2012 The Android Open Source Project | 
| 3  * | 3  * | 
| 4  * Use of this source code is governed by a BSD-style license that can be | 4  * Use of this source code is governed by a BSD-style license that can be | 
| 5  * found in the LICENSE file. | 5  * found in the LICENSE file. | 
| 6  */ | 6  */ | 
| 7 | 7 | 
| 8 | 8 #include <emmintrin.h> | 
|  | 9 #include "SkBitmapProcState_opts_SSE2.h" | 
| 9 #include "SkBlitRow_opts_SSE2.h" | 10 #include "SkBlitRow_opts_SSE2.h" | 
| 10 #include "SkBitmapProcState_opts_SSE2.h" |  | 
| 11 #include "SkColorPriv.h" | 11 #include "SkColorPriv.h" | 
| 12 #include "SkColor_opts_SSE2.h" | 12 #include "SkColor_opts_SSE2.h" | 
| 13 #include "SkDither.h" | 13 #include "SkDither.h" | 
| 14 #include "SkUtils.h" | 14 #include "SkUtils.h" | 
| 15 | 15 | 
| 16 #include <emmintrin.h> |  | 
| 17 |  | 
| 18 /* SSE2 version of S32_Blend_BlitRow32() | 16 /* SSE2 version of S32_Blend_BlitRow32() | 
| 19  * portable version is in core/SkBlitRow_D32.cpp | 17  * portable version is in core/SkBlitRow_D32.cpp | 
| 20  */ | 18  */ | 
| 21 void S32_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, | 19 void S32_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, | 
| 22                               const SkPMColor* SK_RESTRICT src, | 20                               const SkPMColor* SK_RESTRICT src, | 
| 23                               int count, U8CPU alpha) { | 21                               int count, U8CPU alpha) { | 
| 24     SkASSERT(alpha <= 255); | 22     SkASSERT(alpha <= 255); | 
| 25     if (count <= 0) { | 23     if (count <= 0) { | 
| 26         return; | 24         return; | 
| 27     } | 25     } | 
| (...skipping 144 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 172             // Combine back into RGBA. | 170             // Combine back into RGBA. | 
| 173             dst_pixel = _mm_or_si128(dst_rb, dst_ag); | 171             dst_pixel = _mm_or_si128(dst_rb, dst_ag); | 
| 174 | 172 | 
| 175             // Add result | 173             // Add result | 
| 176             __m128i result = _mm_add_epi8(src_pixel, dst_pixel); | 174             __m128i result = _mm_add_epi8(src_pixel, dst_pixel); | 
| 177             _mm_store_si128(d, result); | 175             _mm_store_si128(d, result); | 
| 178             s++; | 176             s++; | 
| 179             d++; | 177             d++; | 
| 180             count -= 4; | 178             count -= 4; | 
| 181         } | 179         } | 
| 182     #else | 180 #else | 
| 183         __m128i rb_mask = _mm_set1_epi32(0x00FF00FF); | 181         __m128i rb_mask = _mm_set1_epi32(0x00FF00FF); | 
| 184         __m128i c_256 = _mm_set1_epi16(0x0100);  // 8 copies of 256 (16-bit) | 182         __m128i c_256 = _mm_set1_epi16(0x0100);  // 8 copies of 256 (16-bit) | 
| 185         while (count >= 4) { | 183         while (count >= 4) { | 
| 186             // Load 4 pixels | 184             // Load 4 pixels | 
| 187             __m128i src_pixel = _mm_loadu_si128(s); | 185             __m128i src_pixel = _mm_loadu_si128(s); | 
| 188             __m128i dst_pixel = _mm_load_si128(d); | 186             __m128i dst_pixel = _mm_load_si128(d); | 
| 189 | 187 | 
| 190             __m128i dst_rb = _mm_and_si128(rb_mask, dst_pixel); | 188             __m128i dst_rb = _mm_and_si128(rb_mask, dst_pixel); | 
| 191             __m128i dst_ag = _mm_srli_epi16(dst_pixel, 8); | 189             __m128i dst_ag = _mm_srli_epi16(dst_pixel, 8); | 
| 192 | 190 | 
| (...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 335         dst++; | 333         dst++; | 
| 336         count--; | 334         count--; | 
| 337     } | 335     } | 
| 338 } | 336 } | 
| 339 | 337 | 
| 340 /* SSE2 version of Color32() | 338 /* SSE2 version of Color32() | 
| 341  * portable version is in core/SkBlitRow_D32.cpp | 339  * portable version is in core/SkBlitRow_D32.cpp | 
| 342  */ | 340  */ | 
| 343 void Color32_SSE2(SkPMColor dst[], const SkPMColor src[], int count, | 341 void Color32_SSE2(SkPMColor dst[], const SkPMColor src[], int count, | 
| 344                   SkPMColor color) { | 342                   SkPMColor color) { | 
| 345 |  | 
| 346     if (count <= 0) { | 343     if (count <= 0) { | 
| 347         return; | 344         return; | 
| 348     } | 345     } | 
| 349 | 346 | 
| 350     if (0 == color) { | 347     if (0 == color) { | 
| 351         if (src != dst) { | 348         if (src != dst) { | 
| 352             memcpy(dst, src, count * sizeof(SkPMColor)); | 349             memcpy(dst, src, count * sizeof(SkPMColor)); | 
| 353         } | 350         } | 
| 354         return; | 351         return; | 
| 355     } | 352     } | 
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 399                 __m128i result = _mm_add_epi8(color_wide, src_pixel); | 396                 __m128i result = _mm_add_epi8(color_wide, src_pixel); | 
| 400 | 397 | 
| 401                 // Store result. | 398                 // Store result. | 
| 402                 _mm_store_si128(d, result); | 399                 _mm_store_si128(d, result); | 
| 403                 s++; | 400                 s++; | 
| 404                 d++; | 401                 d++; | 
| 405                 count -= 4; | 402                 count -= 4; | 
| 406             } | 403             } | 
| 407             src = reinterpret_cast<const SkPMColor*>(s); | 404             src = reinterpret_cast<const SkPMColor*>(s); | 
| 408             dst = reinterpret_cast<SkPMColor*>(d); | 405             dst = reinterpret_cast<SkPMColor*>(d); | 
| 409          } | 406         } | 
| 410 | 407 | 
| 411         while (count > 0) { | 408         while (count > 0) { | 
| 412             *dst = color + SkAlphaMulQ(*src, scale); | 409             *dst = color + SkAlphaMulQ(*src, scale); | 
| 413             src += 1; | 410             src += 1; | 
| 414             dst += 1; | 411             dst += 1; | 
| 415             count--; | 412             count--; | 
| 416         } | 413         } | 
| 417     } | 414     } | 
| 418 } | 415 } | 
| 419 | 416 | 
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 497                 // Add two pixels into result. | 494                 // Add two pixels into result. | 
| 498                 __m128i result = _mm_add_epi8(tmp_src_pixel, dst_pixel); | 495                 __m128i result = _mm_add_epi8(tmp_src_pixel, dst_pixel); | 
| 499                 _mm_store_si128(d, result); | 496                 _mm_store_si128(d, result); | 
| 500                 // load the next 4 pixel | 497                 // load the next 4 pixel | 
| 501                 mask = mask + 4; | 498                 mask = mask + 4; | 
| 502                 d++; | 499                 d++; | 
| 503                 count -= 4; | 500                 count -= 4; | 
| 504             } | 501             } | 
| 505             dst = reinterpret_cast<SkPMColor *>(d); | 502             dst = reinterpret_cast<SkPMColor *>(d); | 
| 506         } | 503         } | 
| 507         while(count > 0) { | 504         while (count > 0) { | 
| 508             *dst= SkBlendARGB32(color, *dst, *mask); | 505             *dst= SkBlendARGB32(color, *dst, *mask); | 
| 509             dst += 1; | 506             dst += 1; | 
| 510             mask++; | 507             mask++; | 
| 511             count --; | 508             count --; | 
| 512         } | 509         } | 
| 513         dst = (SkPMColor *)((char*)dst + dstOffset); | 510         dst = (SkPMColor *)((char*)dst + dstOffset); | 
| 514         mask += maskOffset; | 511         mask += maskOffset; | 
| 515     } while (--height != 0); | 512     } while (--height != 0); | 
| 516 } | 513 } | 
| 517 | 514 | 
| (...skipping 837 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 1355                 uint32_t dst_expanded = SkExpand_rgb_16(*dst); | 1352                 uint32_t dst_expanded = SkExpand_rgb_16(*dst); | 
| 1356                 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); | 1353                 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); | 
| 1357                 // now src and dst expanded are in g:11 r:10 x:1 b:10 | 1354                 // now src and dst expanded are in g:11 r:10 x:1 b:10 | 
| 1358                 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); | 1355                 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); | 
| 1359             } | 1356             } | 
| 1360             dst += 1; | 1357             dst += 1; | 
| 1361             DITHER_INC_X(x); | 1358             DITHER_INC_X(x); | 
| 1362         } while (--count != 0); | 1359         } while (--count != 0); | 
| 1363     } | 1360     } | 
| 1364 } | 1361 } | 
| OLD | NEW | 
|---|