Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkSwizzler_opts_DEFINED | 8 #ifndef SkSwizzler_opts_DEFINED |
| 9 #define SkSwizzler_opts_DEFINED | 9 #define SkSwizzler_opts_DEFINED |
| 10 | 10 |
| (...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 91 static void gray_to_RGB1_portable(uint32_t dst[], const void* vsrc, int count) { | 91 static void gray_to_RGB1_portable(uint32_t dst[], const void* vsrc, int count) { |
| 92 const uint8_t* src = (const uint8_t*)vsrc; | 92 const uint8_t* src = (const uint8_t*)vsrc; |
| 93 for (int i = 0; i < count; i++) { | 93 for (int i = 0; i < count; i++) { |
| 94 dst[i] = (uint32_t)0xFF << 24 | 94 dst[i] = (uint32_t)0xFF << 24 |
| 95 | (uint32_t)src[i] << 16 | 95 | (uint32_t)src[i] << 16 |
| 96 | (uint32_t)src[i] << 8 | 96 | (uint32_t)src[i] << 8 |
| 97 | (uint32_t)src[i] << 0; | 97 | (uint32_t)src[i] << 0; |
| 98 } | 98 } |
| 99 } | 99 } |
| 100 | 100 |
| 101 static void grayA_to_RGBA_portable(uint32_t dst[], const void* vsrc, int count) { | |
| 102 const uint8_t* src = (const uint8_t*)vsrc; | |
| 103 for (int i = 0; i < count; i++) { | |
| 104 uint8_t g = src[0], | |
| 105 a = src[1]; | |
| 106 src += 2; | |
| 107 dst[i] = (uint32_t)a << 24 | |
| 108 | (uint32_t)g << 16 | |
| 109 | (uint32_t)g << 8 | |
| 110 | (uint32_t)g << 0; | |
| 111 } | |
| 112 } | |
| 113 | |
| 114 static void grayA_to_rgbA_portable(uint32_t dst[], const void* vsrc, int count) { | |
| 115 const uint8_t* src = (const uint8_t*)vsrc; | |
| 116 for (int i = 0; i < count; i++) { | |
| 117 uint8_t g = src[0], | |
| 118 a = src[1]; | |
| 119 src += 2; | |
| 120 g = (g*a+127)/255; | |
| 121 dst[i] = (uint32_t)a << 24 | |
| 122 | (uint32_t)g << 16 | |
| 123 | (uint32_t)g << 8 | |
| 124 | (uint32_t)g << 0; | |
| 125 } | |
| 126 } | |
| 127 | |
| 101 #if defined(SK_ARM_HAS_NEON) | 128 #if defined(SK_ARM_HAS_NEON) |
| 102 | 129 |
| 103 // Rounded divide by 255, (x + 127) / 255 | 130 // Rounded divide by 255, (x + 127) / 255 |
| 104 static uint8x8_t div255_round(uint16x8_t x) { | 131 static uint8x8_t div255_round(uint16x8_t x) { |
| 105 // result = (x + 127) / 255 | 132 // result = (x + 127) / 255 |
| 106 // result = (x + 127) / 256 + error1 | 133 // result = (x + 127) / 256 + error1 |
| 107 // | 134 // |
| 108 // error1 = (x + 127) / (255 * 256) | 135 // error1 = (x + 127) / (255 * 256) |
| 109 // error1 = (x + 127) / (256 * 256) + error2 | 136 // error1 = (x + 127) / (256 * 256) + error2 |
| 110 // | 137 // |
| (...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 304 // Store 8 pixels. | 331 // Store 8 pixels. |
| 305 vst4_u8((uint8_t*) dst, rgba); | 332 vst4_u8((uint8_t*) dst, rgba); |
| 306 src += 8; | 333 src += 8; |
| 307 dst += 8; | 334 dst += 8; |
| 308 count -= 8; | 335 count -= 8; |
| 309 } | 336 } |
| 310 | 337 |
| 311 gray_to_RGB1_portable(dst, src, count); | 338 gray_to_RGB1_portable(dst, src, count); |
| 312 } | 339 } |
| 313 | 340 |
| 341 template <bool kPremul> | |
| 342 static void expand_grayA(uint32_t dst[], const void* vsrc, int count) { | |
| 343 const uint8_t* src = (const uint8_t*) vsrc; | |
| 344 while (count >= 16) { | |
| 345 // Load 16 pixels. | |
| 346 uint8x16x2_t ga = vld2q_u8(src); | |
|
mtklein
2016/02/03 01:08:15
We sure are getting to use all the vldN / vstN, eh
msarett
2016/02/03 14:48:51
Yeah it's fun to have good uses for all the instru
| |
| 347 | |
| 348 // Premultiply if requested. | |
| 349 if (kPremul) { | |
| 350 ga.val[0] = vcombine_u8( | |
| 351 scale(vget_low_u8(ga.val[0]), vget_low_u8(ga.val[1])), | |
| 352 scale(vget_high_u8(ga.val[0]), vget_high_u8(ga.val[1]))); | |
| 353 } | |
| 354 | |
| 355 // Set each of the color channels. | |
| 356 uint8x16x4_t rgba; | |
| 357 rgba.val[0] = ga.val[0]; | |
| 358 rgba.val[1] = ga.val[0]; | |
| 359 rgba.val[2] = ga.val[0]; | |
| 360 rgba.val[3] = ga.val[1]; | |
| 361 | |
| 362 // Store 16 pixels. | |
| 363 vst4q_u8((uint8_t*) dst, rgba); | |
| 364 src += 16*2; | |
| 365 dst += 16; | |
| 366 count -= 16; | |
| 367 } | |
| 368 | |
| 369 if (count >= 8) { | |
| 370 // Load 8 pixels. | |
| 371 uint8x8x2_t ga = vld2_u8(src); | |
| 372 | |
| 373 // Premultiply if requested. | |
| 374 if (kPremul) { | |
| 375 ga.val[0] = scale(ga.val[0], ga.val[1]); | |
| 376 } | |
| 377 | |
| 378 // Set each of the color channels. | |
| 379 uint8x8x4_t rgba; | |
| 380 rgba.val[0] = ga.val[0]; | |
| 381 rgba.val[1] = ga.val[0]; | |
| 382 rgba.val[2] = ga.val[0]; | |
| 383 rgba.val[3] = ga.val[1]; | |
| 384 | |
| 385 // Store 8 pixels. | |
| 386 vst4_u8((uint8_t*) dst, rgba); | |
| 387 src += 8*2; | |
| 388 dst += 8; | |
| 389 count -= 8; | |
| 390 } | |
| 391 | |
| 392 auto proc = kPremul ? grayA_to_rgbA_portable : grayA_to_RGBA_portable; | |
| 393 proc(dst, src, count); | |
| 394 } | |
| 395 | |
| 396 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { | |
| 397 expand_grayA<false>(dst, src, count); | |
| 398 } | |
| 399 | |
| 400 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { | |
| 401 expand_grayA<true>(dst, src, count); | |
| 402 } | |
| 403 | |
| 314 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 | 404 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
| 315 | 405 |
| 316 template <bool kSwapRB> | 406 template <bool kSwapRB> |
| 317 static void premul_should_swapRB(uint32_t* dst, const void* vsrc, int count) { | 407 static void premul_should_swapRB(uint32_t* dst, const void* vsrc, int count) { |
| 318 auto src = (const uint32_t*)vsrc; | 408 auto src = (const uint32_t*)vsrc; |
| 319 | 409 |
| 320 auto premul8 = [](__m128i* lo, __m128i* hi) { | 410 auto premul8 = [](__m128i* lo, __m128i* hi) { |
| 321 const __m128i zeros = _mm_setzero_si128(); | 411 const __m128i zeros = _mm_setzero_si128(); |
| 322 const __m128i _128 = _mm_set1_epi16(128); | 412 const __m128i _128 = _mm_set1_epi16(128); |
| 323 const __m128i _257 = _mm_set1_epi16(257); | 413 const __m128i _257 = _mm_set1_epi16(257); |
| (...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 475 _mm_storeu_si128((__m128i*) (dst + 12), ggga3); | 565 _mm_storeu_si128((__m128i*) (dst + 12), ggga3); |
| 476 | 566 |
| 477 src += 16; | 567 src += 16; |
| 478 dst += 16; | 568 dst += 16; |
| 479 count -= 16; | 569 count -= 16; |
| 480 } | 570 } |
| 481 | 571 |
| 482 gray_to_RGB1_portable(dst, src, count); | 572 gray_to_RGB1_portable(dst, src, count); |
| 483 } | 573 } |
| 484 | 574 |
| 575 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { | |
| 576 grayA_to_RGBA_portable(dst, src, count); | |
| 577 } | |
| 578 | |
| 579 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { | |
| 580 grayA_to_rgbA_portable(dst, src, count); | |
| 581 } | |
| 582 | |
| 485 #else | 583 #else |
| 486 | 584 |
| 487 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { | 585 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { |
| 488 RGBA_to_rgbA_portable(dst, src, count); | 586 RGBA_to_rgbA_portable(dst, src, count); |
| 489 } | 587 } |
| 490 | 588 |
| 491 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) { | 589 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) { |
| 492 RGBA_to_bgrA_portable(dst, src, count); | 590 RGBA_to_bgrA_portable(dst, src, count); |
| 493 } | 591 } |
| 494 | 592 |
| 495 static void RGBA_to_BGRA(uint32_t* dst, const void* src, int count) { | 593 static void RGBA_to_BGRA(uint32_t* dst, const void* src, int count) { |
| 496 RGBA_to_BGRA_portable(dst, src, count); | 594 RGBA_to_BGRA_portable(dst, src, count); |
| 497 } | 595 } |
| 498 | 596 |
| 499 static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) { | 597 static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) { |
| 500 RGB_to_RGB1_portable(dst, src, count); | 598 RGB_to_RGB1_portable(dst, src, count); |
| 501 } | 599 } |
| 502 | 600 |
| 503 static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { | 601 static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { |
| 504 RGB_to_BGR1_portable(dst, src, count); | 602 RGB_to_BGR1_portable(dst, src, count); |
| 505 } | 603 } |
| 506 | 604 |
| 507 static void gray_to_RGB1(uint32_t dst[], const void* src, int count) { | 605 static void gray_to_RGB1(uint32_t dst[], const void* src, int count) { |
| 508 gray_to_RGB1_portable(dst, src, count); | 606 gray_to_RGB1_portable(dst, src, count); |
| 509 } | 607 } |
| 510 | 608 |
| 609 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { | |
| 610 grayA_to_RGBA_portable(dst, src, count); | |
| 611 } | |
| 612 | |
| 613 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { | |
| 614 grayA_to_rgbA_portable(dst, src, count); | |
| 615 } | |
| 616 | |
| 511 #endif | 617 #endif |
| 512 | 618 |
| 513 } | 619 } |
| 514 | 620 |
| 515 #endif // SkSwizzler_opts_DEFINED | 621 #endif // SkSwizzler_opts_DEFINED |
| OLD | NEW |