Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(10)

Side by Side Diff: src/opts/SkSwizzler_opts.h

Issue 1663623002: NEON optimizations for GrayAlpha -> RGBA/BGRA Premul/Unpremul (Closed) Base URL: https://skia.googlesource.com/skia.git@gralpha
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkOpts_ssse3.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 Google Inc. 2 * Copyright 2016 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkSwizzler_opts_DEFINED 8 #ifndef SkSwizzler_opts_DEFINED
9 #define SkSwizzler_opts_DEFINED 9 #define SkSwizzler_opts_DEFINED
10 10
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
91 static void gray_to_RGB1_portable(uint32_t dst[], const void* vsrc, int count) { 91 static void gray_to_RGB1_portable(uint32_t dst[], const void* vsrc, int count) {
92 const uint8_t* src = (const uint8_t*)vsrc; 92 const uint8_t* src = (const uint8_t*)vsrc;
93 for (int i = 0; i < count; i++) { 93 for (int i = 0; i < count; i++) {
94 dst[i] = (uint32_t)0xFF << 24 94 dst[i] = (uint32_t)0xFF << 24
95 | (uint32_t)src[i] << 16 95 | (uint32_t)src[i] << 16
96 | (uint32_t)src[i] << 8 96 | (uint32_t)src[i] << 8
97 | (uint32_t)src[i] << 0; 97 | (uint32_t)src[i] << 0;
98 } 98 }
99 } 99 }
100 100
101 static void grayA_to_RGBA_portable(uint32_t dst[], const void* vsrc, int count) {
102 const uint8_t* src = (const uint8_t*)vsrc;
103 for (int i = 0; i < count; i++) {
104 uint8_t g = src[0],
105 a = src[1];
106 src += 2;
107 dst[i] = (uint32_t)a << 24
108 | (uint32_t)g << 16
109 | (uint32_t)g << 8
110 | (uint32_t)g << 0;
111 }
112 }
113
114 static void grayA_to_rgbA_portable(uint32_t dst[], const void* vsrc, int count) {
115 const uint8_t* src = (const uint8_t*)vsrc;
116 for (int i = 0; i < count; i++) {
117 uint8_t g = src[0],
118 a = src[1];
119 src += 2;
120 g = (g*a+127)/255;
121 dst[i] = (uint32_t)a << 24
122 | (uint32_t)g << 16
123 | (uint32_t)g << 8
124 | (uint32_t)g << 0;
125 }
126 }
127
101 #if defined(SK_ARM_HAS_NEON) 128 #if defined(SK_ARM_HAS_NEON)
102 129
103 // Rounded divide by 255, (x + 127) / 255 130 // Rounded divide by 255, (x + 127) / 255
104 static uint8x8_t div255_round(uint16x8_t x) { 131 static uint8x8_t div255_round(uint16x8_t x) {
105 // result = (x + 127) / 255 132 // result = (x + 127) / 255
106 // result = (x + 127) / 256 + error1 133 // result = (x + 127) / 256 + error1
107 // 134 //
108 // error1 = (x + 127) / (255 * 256) 135 // error1 = (x + 127) / (255 * 256)
109 // error1 = (x + 127) / (256 * 256) + error2 136 // error1 = (x + 127) / (256 * 256) + error2
110 // 137 //
(...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after
304 // Store 8 pixels. 331 // Store 8 pixels.
305 vst4_u8((uint8_t*) dst, rgba); 332 vst4_u8((uint8_t*) dst, rgba);
306 src += 8; 333 src += 8;
307 dst += 8; 334 dst += 8;
308 count -= 8; 335 count -= 8;
309 } 336 }
310 337
311 gray_to_RGB1_portable(dst, src, count); 338 gray_to_RGB1_portable(dst, src, count);
312 } 339 }
313 340
341 template <bool kPremul>
342 static void expand_grayA(uint32_t dst[], const void* vsrc, int count) {
343 const uint8_t* src = (const uint8_t*) vsrc;
344 while (count >= 16) {
345 // Load 16 pixels.
346 uint8x16x2_t ga = vld2q_u8(src);
mtklein 2016/02/03 01:08:15 We sure are getting to use all the vldN / vstN, eh
msarett 2016/02/03 14:48:51 Yeah it's fun to have good uses for all the instru
347
348 // Premultiply if requested.
349 if (kPremul) {
350 ga.val[0] = vcombine_u8(
351 scale(vget_low_u8(ga.val[0]), vget_low_u8(ga.val[1])),
352 scale(vget_high_u8(ga.val[0]), vget_high_u8(ga.val[1])));
353 }
354
355 // Set each of the color channels.
356 uint8x16x4_t rgba;
357 rgba.val[0] = ga.val[0];
358 rgba.val[1] = ga.val[0];
359 rgba.val[2] = ga.val[0];
360 rgba.val[3] = ga.val[1];
361
362 // Store 16 pixels.
363 vst4q_u8((uint8_t*) dst, rgba);
364 src += 16*2;
365 dst += 16;
366 count -= 16;
367 }
368
369 if (count >= 8) {
370 // Load 8 pixels.
371 uint8x8x2_t ga = vld2_u8(src);
372
373 // Premultiply if requested.
374 if (kPremul) {
375 ga.val[0] = scale(ga.val[0], ga.val[1]);
376 }
377
378 // Set each of the color channels.
379 uint8x8x4_t rgba;
380 rgba.val[0] = ga.val[0];
381 rgba.val[1] = ga.val[0];
382 rgba.val[2] = ga.val[0];
383 rgba.val[3] = ga.val[1];
384
385 // Store 8 pixels.
386 vst4_u8((uint8_t*) dst, rgba);
387 src += 8*2;
388 dst += 8;
389 count -= 8;
390 }
391
392 auto proc = kPremul ? grayA_to_rgbA_portable : grayA_to_RGBA_portable;
393 proc(dst, src, count);
394 }
395
396 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) {
397 expand_grayA<false>(dst, src, count);
398 }
399
400 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) {
401 expand_grayA<true>(dst, src, count);
402 }
403
314 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 404 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
315 405
316 template <bool kSwapRB> 406 template <bool kSwapRB>
317 static void premul_should_swapRB(uint32_t* dst, const void* vsrc, int count) { 407 static void premul_should_swapRB(uint32_t* dst, const void* vsrc, int count) {
318 auto src = (const uint32_t*)vsrc; 408 auto src = (const uint32_t*)vsrc;
319 409
320 auto premul8 = [](__m128i* lo, __m128i* hi) { 410 auto premul8 = [](__m128i* lo, __m128i* hi) {
321 const __m128i zeros = _mm_setzero_si128(); 411 const __m128i zeros = _mm_setzero_si128();
322 const __m128i _128 = _mm_set1_epi16(128); 412 const __m128i _128 = _mm_set1_epi16(128);
323 const __m128i _257 = _mm_set1_epi16(257); 413 const __m128i _257 = _mm_set1_epi16(257);
(...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after
475 _mm_storeu_si128((__m128i*) (dst + 12), ggga3); 565 _mm_storeu_si128((__m128i*) (dst + 12), ggga3);
476 566
477 src += 16; 567 src += 16;
478 dst += 16; 568 dst += 16;
479 count -= 16; 569 count -= 16;
480 } 570 }
481 571
482 gray_to_RGB1_portable(dst, src, count); 572 gray_to_RGB1_portable(dst, src, count);
483 } 573 }
484 574
575 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) {
576 grayA_to_RGBA_portable(dst, src, count);
577 }
578
579 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) {
580 grayA_to_rgbA_portable(dst, src, count);
581 }
582
485 #else 583 #else
486 584
487 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { 585 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) {
488 RGBA_to_rgbA_portable(dst, src, count); 586 RGBA_to_rgbA_portable(dst, src, count);
489 } 587 }
490 588
491 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) { 589 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) {
492 RGBA_to_bgrA_portable(dst, src, count); 590 RGBA_to_bgrA_portable(dst, src, count);
493 } 591 }
494 592
495 static void RGBA_to_BGRA(uint32_t* dst, const void* src, int count) { 593 static void RGBA_to_BGRA(uint32_t* dst, const void* src, int count) {
496 RGBA_to_BGRA_portable(dst, src, count); 594 RGBA_to_BGRA_portable(dst, src, count);
497 } 595 }
498 596
499 static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) { 597 static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) {
500 RGB_to_RGB1_portable(dst, src, count); 598 RGB_to_RGB1_portable(dst, src, count);
501 } 599 }
502 600
503 static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { 601 static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) {
504 RGB_to_BGR1_portable(dst, src, count); 602 RGB_to_BGR1_portable(dst, src, count);
505 } 603 }
506 604
507 static void gray_to_RGB1(uint32_t dst[], const void* src, int count) { 605 static void gray_to_RGB1(uint32_t dst[], const void* src, int count) {
508 gray_to_RGB1_portable(dst, src, count); 606 gray_to_RGB1_portable(dst, src, count);
509 } 607 }
510 608
609 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) {
610 grayA_to_RGBA_portable(dst, src, count);
611 }
612
613 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) {
614 grayA_to_rgbA_portable(dst, src, count);
615 }
616
511 #endif 617 #endif
512 618
513 } 619 }
514 620
515 #endif // SkSwizzler_opts_DEFINED 621 #endif // SkSwizzler_opts_DEFINED
OLDNEW
« no previous file with comments | « src/opts/SkOpts_ssse3.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698