OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkSwizzler_opts_DEFINED | 8 #ifndef SkSwizzler_opts_DEFINED |
9 #define SkSwizzler_opts_DEFINED | 9 #define SkSwizzler_opts_DEFINED |
10 | 10 |
(...skipping 340 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
351 _mm_storeu_si128((__m128i*) dst, bgra); | 351 _mm_storeu_si128((__m128i*) dst, bgra); |
352 | 352 |
353 src += 4; | 353 src += 4; |
354 dst += 4; | 354 dst += 4; |
355 count -= 4; | 355 count -= 4; |
356 } | 356 } |
357 | 357 |
358 RGBA_to_BGRA_portable(dst, src, count); | 358 RGBA_to_BGRA_portable(dst, src, count); |
359 } | 359 } |
360 | 360 |
361 template <bool kSwapRB> | |
362 static void insert_alpha_should_swaprb(uint32_t dst[], const void* vsrc, int cou nt) { | |
363 const uint8_t* src = (const uint8_t*) vsrc; | |
364 | |
365 const __m128i alphaMask = _mm_set1_epi32(0xFF000000); | |
366 __m128i expand; | |
367 const uint8_t X = 0xFF; // Used a placeholder. The value of X is irrelevant . | |
mtklein
2016/01/22 17:51:50
Because X > 127, this actually sets those lanes to
msarett
2016/01/22 18:20:08
Agreed. I was hoping there was something that wou
| |
368 if (kSwapRB) { | |
369 expand = _mm_setr_epi8(2,1,0,X, 5,4,3,X, 8,7,6,X, 11,10,9,X); | |
370 } else { | |
371 expand = _mm_setr_epi8(0,1,2,X, 3,4,5,X, 6,7,8,X, 9,10,11,X); | |
372 } | |
373 | |
374 while (count >= 4) { | |
375 // Load a vector. While this actually contains 5 pixels plus an | |
mtklein
2016/01/22 17:51:50
So isn't this only safe while (count >= 6) ?
mtklein
2016/01/22 18:00:30
Alternatively, since only the load is weird and th
msarett
2016/01/22 18:20:08
I don't understand why it's not always safe?
At t
| |
376 // extra component, we will discard all but the first four pixels on | |
377 // this iteration. | |
378 __m128i rgb = _mm_loadu_si128((const __m128i*) src); | |
379 | |
380 // Expand the first four pixels to RGBX and then mask to RGB(FF). | |
381 __m128i rgba = _mm_or_si128(_mm_shuffle_epi8(rgb, expand), alphaMask); | |
382 | |
383 // Store 4 pixels. | |
384 _mm_storeu_si128((__m128i*) dst, rgba); | |
385 | |
386 src += 4*3; | |
387 dst += 4; | |
388 count -= 4; | |
389 } | |
390 | |
391 // Call portable code to finish up the tail of [0,4) pixels. | |
392 auto proc = kSwapRB ? RGB_to_BGR1_portable : RGB_to_RGB1_portable; | |
393 proc(dst, src, count); | |
394 } | |
395 | |
361 static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) { | 396 static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) { |
362 RGB_to_RGB1_portable(dst, src, count); | 397 insert_alpha_should_swaprb<false>(dst, src, count); |
363 } | 398 } |
364 | 399 |
365 static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { | 400 static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { |
366 RGB_to_BGR1_portable(dst, src, count); | 401 insert_alpha_should_swaprb<true>(dst, src, count); |
367 } | 402 } |
368 | 403 |
369 #else | 404 #else |
370 | 405 |
371 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { | 406 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { |
372 RGBA_to_rgbA_portable(dst, src, count); | 407 RGBA_to_rgbA_portable(dst, src, count); |
373 } | 408 } |
374 | 409 |
375 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) { | 410 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) { |
376 RGBA_to_bgrA_portable(dst, src, count); | 411 RGBA_to_bgrA_portable(dst, src, count); |
377 } | 412 } |
378 | 413 |
379 static void RGBA_to_BGRA(uint32_t* dst, const void* src, int count) { | 414 static void RGBA_to_BGRA(uint32_t* dst, const void* src, int count) { |
380 RGBA_to_BGRA_portable(dst, src, count); | 415 RGBA_to_BGRA_portable(dst, src, count); |
381 } | 416 } |
382 | 417 |
383 static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) { | 418 static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) { |
384 RGB_to_RGB1_portable(dst, src, count); | 419 RGB_to_RGB1_portable(dst, src, count); |
385 } | 420 } |
386 | 421 |
387 static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { | 422 static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { |
388 RGB_to_BGR1_portable(dst, src, count); | 423 RGB_to_BGR1_portable(dst, src, count); |
389 } | 424 } |
390 | 425 |
391 #endif | 426 #endif |
392 | 427 |
393 } | 428 } |
394 | 429 |
395 #endif // SkSwizzler_opts_DEFINED | 430 #endif // SkSwizzler_opts_DEFINED |
OLD | NEW |