Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/opts/SkSwizzler_opts.h

Issue 1656383002: NEON optimizations for gray -> RGBA (or BGRA) conversions (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Fix comments Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkOpts_ssse3.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 Google Inc. 2 * Copyright 2016 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkSwizzler_opts_DEFINED 8 #ifndef SkSwizzler_opts_DEFINED
9 #define SkSwizzler_opts_DEFINED 9 #define SkSwizzler_opts_DEFINED
10 10
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
81 g = src[1], 81 g = src[1],
82 b = src[2]; 82 b = src[2];
83 src += 3; 83 src += 3;
84 dst[i] = (uint32_t)0xFF << 24 84 dst[i] = (uint32_t)0xFF << 24
85 | (uint32_t)r << 16 85 | (uint32_t)r << 16
86 | (uint32_t)g << 8 86 | (uint32_t)g << 8
87 | (uint32_t)b << 0; 87 | (uint32_t)b << 0;
88 } 88 }
89 } 89 }
90 90
91 static void gray_to_RGB1_portable(uint32_t dst[], const void* vsrc, int count) {
92 const uint8_t* src = (const uint8_t*)vsrc;
93 for (int i = 0; i < count; i++) {
94 dst[i] = (uint32_t)0xFF << 24
95 | (uint32_t)src[i] << 16
96 | (uint32_t)src[i] << 8
97 | (uint32_t)src[i] << 0;
98 }
99 }
100
91 #if defined(SK_ARM_HAS_NEON) 101 #if defined(SK_ARM_HAS_NEON)
92 102
93 // Rounded divide by 255, (x + 127) / 255 103 // Rounded divide by 255, (x + 127) / 255
94 static uint8x8_t div255_round(uint16x8_t x) { 104 static uint8x8_t div255_round(uint16x8_t x) {
95 // result = (x + 127) / 255 105 // result = (x + 127) / 255
96 // result = (x + 127) / 256 + error1 106 // result = (x + 127) / 256 + error1
97 // 107 //
98 // error1 = (x + 127) / (255 * 256) 108 // error1 = (x + 127) / (255 * 256)
99 // error1 = (x + 127) / (256 * 256) + error2 109 // error1 = (x + 127) / (256 * 256) + error2
100 // 110 //
(...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after
253 } 263 }
254 264
255 static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) { 265 static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) {
256 insert_alpha_should_swaprb<false>(dst, src, count); 266 insert_alpha_should_swaprb<false>(dst, src, count);
257 } 267 }
258 268
259 static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { 269 static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) {
260 insert_alpha_should_swaprb<true>(dst, src, count); 270 insert_alpha_should_swaprb<true>(dst, src, count);
261 } 271 }
262 272
273 static void gray_to_RGB1(uint32_t dst[], const void* vsrc, int count) {
274 const uint8_t* src = (const uint8_t*) vsrc;
275 while (count >= 16) {
276 // Load 16 pixels.
277 uint8x16_t gray = vld1q_u8(src);
278
279 // Set each of the color channels.
280 uint8x16x4_t rgba;
281 rgba.val[0] = gray;
282 rgba.val[1] = gray;
283 rgba.val[2] = gray;
284 rgba.val[3] = vdupq_n_u8(0xFF);
285
286 // Store 16 pixels.
287 vst4q_u8((uint8_t*) dst, rgba);
288 src += 16;
289 dst += 16;
290 count -= 16;
291 }
292
293 if (count >= 8) {
294 // Load 8 pixels.
295 uint8x8_t gray = vld1_u8(src);
296
297 // Set each of the color channels.
298 uint8x8x4_t rgba;
299 rgba.val[0] = gray;
300 rgba.val[1] = gray;
301 rgba.val[2] = gray;
302 rgba.val[3] = vdup_n_u8(0xFF);
303
304 // Store 8 pixels.
305 vst4_u8((uint8_t*) dst, rgba);
306 src += 8;
307 dst += 8;
308 count -= 8;
309 }
310
311 gray_to_RGB1_portable(dst, src, count);
312 }
313
263 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 314 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
264 315
265 template <bool kSwapRB> 316 template <bool kSwapRB>
266 static void premul_should_swapRB(uint32_t* dst, const void* vsrc, int count) { 317 static void premul_should_swapRB(uint32_t* dst, const void* vsrc, int count) {
267 auto src = (const uint32_t*)vsrc; 318 auto src = (const uint32_t*)vsrc;
268 319
269 auto premul8 = [](__m128i* lo, __m128i* hi) { 320 auto premul8 = [](__m128i* lo, __m128i* hi) {
270 const __m128i zeros = _mm_setzero_si128(); 321 const __m128i zeros = _mm_setzero_si128();
271 const __m128i _128 = _mm_set1_epi16(128); 322 const __m128i _128 = _mm_set1_epi16(128);
272 const __m128i _257 = _mm_set1_epi16(257); 323 const __m128i _257 = _mm_set1_epi16(257);
(...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after
394 } 445 }
395 446
396 static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) { 447 static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) {
397 insert_alpha_should_swaprb<false>(dst, src, count); 448 insert_alpha_should_swaprb<false>(dst, src, count);
398 } 449 }
399 450
400 static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { 451 static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) {
401 insert_alpha_should_swaprb<true>(dst, src, count); 452 insert_alpha_should_swaprb<true>(dst, src, count);
402 } 453 }
403 454
455 static void gray_to_RGB1(uint32_t dst[], const void* src, int count) {
456 gray_to_RGB1_portable(dst, src, count);
457 }
458
404 #else 459 #else
405 460
406 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { 461 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) {
407 RGBA_to_rgbA_portable(dst, src, count); 462 RGBA_to_rgbA_portable(dst, src, count);
408 } 463 }
409 464
410 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) { 465 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) {
411 RGBA_to_bgrA_portable(dst, src, count); 466 RGBA_to_bgrA_portable(dst, src, count);
412 } 467 }
413 468
414 static void RGBA_to_BGRA(uint32_t* dst, const void* src, int count) { 469 static void RGBA_to_BGRA(uint32_t* dst, const void* src, int count) {
415 RGBA_to_BGRA_portable(dst, src, count); 470 RGBA_to_BGRA_portable(dst, src, count);
416 } 471 }
417 472
418 static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) { 473 static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) {
419 RGB_to_RGB1_portable(dst, src, count); 474 RGB_to_RGB1_portable(dst, src, count);
420 } 475 }
421 476
422 static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { 477 static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) {
423 RGB_to_BGR1_portable(dst, src, count); 478 RGB_to_BGR1_portable(dst, src, count);
424 } 479 }
425 480
481 static void gray_to_RGB1(uint32_t dst[], const void* src, int count) {
482 gray_to_RGB1_portable(dst, src, count);
483 }
484
426 #endif 485 #endif
427 486
428 } 487 }
429 488
430 #endif // SkSwizzler_opts_DEFINED 489 #endif // SkSwizzler_opts_DEFINED
OLDNEW
« no previous file with comments | « src/opts/SkOpts_ssse3.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698