Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkSwizzler_opts_DEFINED | 8 #ifndef SkSwizzler_opts_DEFINED |
| 9 #define SkSwizzler_opts_DEFINED | 9 #define SkSwizzler_opts_DEFINED |
| 10 | 10 |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 53 r = src[i] >> 16, | 53 r = src[i] >> 16, |
| 54 g = src[i] >> 8, | 54 g = src[i] >> 8, |
| 55 b = src[i] >> 0; | 55 b = src[i] >> 0; |
| 56 dst[i] = (uint32_t)a << 24 | 56 dst[i] = (uint32_t)a << 24 |
| 57 | (uint32_t)b << 16 | 57 | (uint32_t)b << 16 |
| 58 | (uint32_t)g << 8 | 58 | (uint32_t)g << 8 |
| 59 | (uint32_t)r << 0; | 59 | (uint32_t)r << 0; |
| 60 } | 60 } |
| 61 } | 61 } |
| 62 | 62 |
| 63 static void xxx_xxxa_portable(uint32_t dst[], const uint32_t src[], int count) { | |
| 64 int i8 = 0; | |
| 65 const uint8_t* src8 = (uint8_t*) src; | |
|
msarett
2016/01/21 22:09:07
It's a little strange that we pass in src as a poi
| |
| 66 for (int i32 = 0; i32 < count; i32++) { | |
| 67 uint8_t b = src8[i8++], | |
| 68 g = src8[i8++], | |
| 69 r = src8[i8++]; | |
| 70 dst[i32] = (uint32_t) b << 0 | |
| 71 | (uint32_t) g << 8 | |
| 72 | (uint32_t) r << 16 | |
| 73 | (uint32_t)0xFF << 24; | |
| 74 } | |
| 75 } | |
| 76 | |
| 77 static void xxx_swaprb_xxxa_portable(uint32_t dst[], const uint32_t src[], int c ount) { | |
| 78 int i8 = 0; | |
| 79 const uint8_t* src8 = (uint8_t*) src; | |
| 80 for (int i32 = 0; i32 < count; i32++) { | |
| 81 uint8_t b = src8[i8++], | |
| 82 g = src8[i8++], | |
| 83 r = src8[i8++]; | |
| 84 dst[i32] = (uint32_t) r << 0 | |
| 85 | (uint32_t) g << 8 | |
| 86 | (uint32_t) b << 16 | |
| 87 | (uint32_t)0xFF << 24; | |
| 88 } | |
| 89 } | |
| 90 | |
| 63 #if defined(SK_ARM_HAS_NEON) | 91 #if defined(SK_ARM_HAS_NEON) |
| 64 | 92 |
| 65 // Rounded divide by 255, (x + 127) / 255 | 93 // Rounded divide by 255, (x + 127) / 255 |
| 66 static uint8x8_t div255_round(uint16x8_t x) { | 94 static uint8x8_t div255_round(uint16x8_t x) { |
| 67 // result = (x + 127) / 255 | 95 // result = (x + 127) / 255 |
| 68 // result = (x + 127) / 256 + error1 | 96 // result = (x + 127) / 256 + error1 |
| 69 // | 97 // |
| 70 // error1 = (x + 127) / (255 * 256) | 98 // error1 = (x + 127) / (255 * 256) |
| 71 // error1 = (x + 127) / (256 * 256) + error2 | 99 // error1 = (x + 127) / (256 * 256) + error2 |
| 72 // | 100 // |
| (...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 161 // Store 8 pixels. | 189 // Store 8 pixels. |
| 162 vst4_u8((uint8_t*) dst, bgra); | 190 vst4_u8((uint8_t*) dst, bgra); |
| 163 src += 8; | 191 src += 8; |
| 164 dst += 8; | 192 dst += 8; |
| 165 count -= 8; | 193 count -= 8; |
| 166 } | 194 } |
| 167 | 195 |
| 168 swaprb_xxxa_portable(dst, src, count); | 196 swaprb_xxxa_portable(dst, src, count); |
| 169 } | 197 } |
| 170 | 198 |
| 199 static void xxx_xxxa(uint32_t dst[], const uint32_t src[], int count) { | |
| 200 const uint8_t* src8 = (const uint8_t*) src; | |
| 201 while (count >= 16) { | |
| 202 // Load 16 pixels. | |
| 203 uint8x16x3_t bgr = vld3q_u8(src8); | |
| 204 | |
| 205 // Insert an opaque alpha channel. | |
| 206 uint8x16x4_t bgra; | |
| 207 bgra.val[0] = bgr.val[0]; | |
| 208 bgra.val[1] = bgr.val[1]; | |
| 209 bgra.val[2] = bgr.val[2]; | |
| 210 bgra.val[3] = vdupq_n_u8(0xFF); | |
| 211 | |
| 212 // Store 16 pixels. | |
| 213 vst4q_u8((uint8_t*) dst, bgra); | |
| 214 src8 += 48; | |
| 215 dst += 16; | |
| 216 count -= 16; | |
| 217 } | |
| 218 | |
| 219 if (count >= 8) { | |
| 220 // Load 8 pixels. | |
| 221 uint8x8x3_t bgr = vld3_u8(src8); | |
| 222 | |
| 223 // Insert an opaque alpha channel. | |
| 224 uint8x8x4_t bgra; | |
| 225 bgra.val[0] = bgr.val[0]; | |
| 226 bgra.val[1] = bgr.val[1]; | |
| 227 bgra.val[2] = bgr.val[2]; | |
| 228 bgra.val[3] = vdup_n_u8(0xFF); | |
| 229 | |
| 230 // Store 8 pixels. | |
| 231 vst4_u8((uint8_t*) dst, bgra); | |
| 232 src8 += 24; | |
| 233 dst += 8; | |
| 234 count -= 8; | |
| 235 } | |
| 236 | |
| 237 xxx_xxxa_portable(dst, src, count); | |
| 238 } | |
| 239 | |
| 240 static void xxx_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { | |
| 241 const uint8_t* src8 = (const uint8_t*) src; | |
| 242 while (count >= 16) { | |
| 243 // Load 16 pixels. | |
| 244 uint8x16x3_t bgr = vld3q_u8(src8); | |
| 245 | |
| 246 // Swap r and b and insert an opaque alpha channel. | |
| 247 uint8x16x4_t rgba; | |
| 248 rgba.val[0] = bgr.val[2]; | |
| 249 rgba.val[1] = bgr.val[1]; | |
| 250 rgba.val[2] = bgr.val[0]; | |
| 251 rgba.val[3] = vdupq_n_u8(0xFF); | |
| 252 | |
| 253 // Store 16 pixels. | |
| 254 vst4q_u8((uint8_t*) dst, rgba); | |
| 255 src8 += 48; | |
| 256 dst += 16; | |
| 257 count -= 16; | |
| 258 } | |
| 259 | |
| 260 if (count >= 8) { | |
| 261 // Load 8 pixels. | |
| 262 uint8x8x3_t bgr = vld3_u8(src8); | |
| 263 | |
| 264 // Swap r and b and insert an opaque alpha channel. | |
| 265 uint8x8x4_t rgba; | |
| 266 rgba.val[0] = bgr.val[2]; | |
| 267 rgba.val[1] = bgr.val[1]; | |
| 268 rgba.val[2] = bgr.val[0]; | |
| 269 rgba.val[3] = vdup_n_u8(0xFF); | |
| 270 | |
| 271 // Store 8 pixels. | |
| 272 vst4_u8((uint8_t*) dst, rgba); | |
| 273 src8 += 24; | |
| 274 dst += 8; | |
| 275 count -= 8; | |
| 276 } | |
| 277 | |
| 278 xxx_swaprb_xxxa_portable(dst, src, count); | |
| 279 } | |
| 280 | |
| 171 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 | 281 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
| 172 | 282 |
| 173 template <bool kSwapRB> | 283 template <bool kSwapRB> |
| 174 static void premul_xxxa_should_swaprb(uint32_t dst[], const uint32_t src[], int count) { | 284 static void premul_xxxa_should_swaprb(uint32_t dst[], const uint32_t src[], int count) { |
| 175 | 285 |
| 176 auto premul8 = [](__m128i* lo, __m128i* hi) { | 286 auto premul8 = [](__m128i* lo, __m128i* hi) { |
| 177 const __m128i zeros = _mm_setzero_si128(); | 287 const __m128i zeros = _mm_setzero_si128(); |
| 178 const __m128i _128 = _mm_set1_epi16(128); | 288 const __m128i _128 = _mm_set1_epi16(128); |
| 179 const __m128i _257 = _mm_set1_epi16(257); | 289 const __m128i _257 = _mm_set1_epi16(257); |
| 180 __m128i planar; | 290 __m128i planar; |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 257 _mm_storeu_si128((__m128i*) dst, rgba); | 367 _mm_storeu_si128((__m128i*) dst, rgba); |
| 258 | 368 |
| 259 src += 4; | 369 src += 4; |
| 260 dst += 4; | 370 dst += 4; |
| 261 count -= 4; | 371 count -= 4; |
| 262 } | 372 } |
| 263 | 373 |
| 264 swaprb_xxxa_portable(dst, src, count); | 374 swaprb_xxxa_portable(dst, src, count); |
| 265 } | 375 } |
| 266 | 376 |
| 377 static void xxx_xxxa(uint32_t dst[], const uint32_t src[], int count) { | |
| 378 xxx_xxxa_portable(dst, src, count); | |
| 379 } | |
| 380 | |
| 381 static void xxx_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { | |
| 382 xxx_swaprb_xxxa_portable(dst, src, count); | |
| 383 } | |
| 384 | |
| 267 #else | 385 #else |
| 268 | 386 |
| 269 static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) { | 387 static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| 270 premul_xxxa_portable(dst, src, count); | 388 premul_xxxa_portable(dst, src, count); |
| 271 } | 389 } |
| 272 | 390 |
| 273 static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { | 391 static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| 274 premul_swaprb_xxxa_portable(dst, src, count); | 392 premul_swaprb_xxxa_portable(dst, src, count); |
| 275 } | 393 } |
| 276 | 394 |
| 277 static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { | 395 static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| 278 swaprb_xxxa_portable(dst, src, count); | 396 swaprb_xxxa_portable(dst, src, count); |
| 279 } | 397 } |
| 280 | 398 |
| 399 static void xxx_xxxa(uint32_t dst[], const uint32_t src[], int count) { | |
| 400 xxx_xxxa_portable(dst, src, count); | |
| 401 } | |
| 402 | |
| 403 static void xxx_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { | |
| 404 xxx_swaprb_xxxa_portable(dst, src, count); | |
| 405 } | |
| 406 | |
| 281 #endif | 407 #endif |
| 282 | 408 |
| 283 } | 409 } |
| 284 | 410 |
| 285 #endif // SkSwizzler_opts_DEFINED | 411 #endif // SkSwizzler_opts_DEFINED |
| OLD | NEW |