Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(75)

Side by Side Diff: src/opts/SkSwizzler_opts.h

Issue 1618003002: Use NEON optimizations for RGB -> RGB(FF) or BGR(FF) in SkSwizzler (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« src/opts/SkOpts_ssse3.cpp ('K') | « src/opts/SkOpts_ssse3.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 Google Inc. 2 * Copyright 2016 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkSwizzler_opts_DEFINED 8 #ifndef SkSwizzler_opts_DEFINED
9 #define SkSwizzler_opts_DEFINED 9 #define SkSwizzler_opts_DEFINED
10 10
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
53 r = src[i] >> 16, 53 r = src[i] >> 16,
54 g = src[i] >> 8, 54 g = src[i] >> 8,
55 b = src[i] >> 0; 55 b = src[i] >> 0;
56 dst[i] = (uint32_t)a << 24 56 dst[i] = (uint32_t)a << 24
57 | (uint32_t)b << 16 57 | (uint32_t)b << 16
58 | (uint32_t)g << 8 58 | (uint32_t)g << 8
59 | (uint32_t)r << 0; 59 | (uint32_t)r << 0;
60 } 60 }
61 } 61 }
62 62
63 static void xxx_xxxa_portable(uint32_t dst[], const uint32_t src[], int count) {
64 int i8 = 0;
65 const uint8_t* src8 = (uint8_t*) src;
msarett 2016/01/21 22:09:07 It's a little strange that we pass in src as a poi
66 for (int i32 = 0; i32 < count; i32++) {
67 uint8_t b = src8[i8++],
68 g = src8[i8++],
69 r = src8[i8++];
70 dst[i32] = (uint32_t) b << 0
71 | (uint32_t) g << 8
72 | (uint32_t) r << 16
73 | (uint32_t)0xFF << 24;
74 }
75 }
76
77 static void xxx_swaprb_xxxa_portable(uint32_t dst[], const uint32_t src[], int c ount) {
78 int i8 = 0;
79 const uint8_t* src8 = (uint8_t*) src;
80 for (int i32 = 0; i32 < count; i32++) {
81 uint8_t b = src8[i8++],
82 g = src8[i8++],
83 r = src8[i8++];
84 dst[i32] = (uint32_t) r << 0
85 | (uint32_t) g << 8
86 | (uint32_t) b << 16
87 | (uint32_t)0xFF << 24;
88 }
89 }
90
63 #if defined(SK_ARM_HAS_NEON) 91 #if defined(SK_ARM_HAS_NEON)
64 92
65 // Rounded divide by 255, (x + 127) / 255 93 // Rounded divide by 255, (x + 127) / 255
66 static uint8x8_t div255_round(uint16x8_t x) { 94 static uint8x8_t div255_round(uint16x8_t x) {
67 // result = (x + 127) / 255 95 // result = (x + 127) / 255
68 // result = (x + 127) / 256 + error1 96 // result = (x + 127) / 256 + error1
69 // 97 //
70 // error1 = (x + 127) / (255 * 256) 98 // error1 = (x + 127) / (255 * 256)
71 // error1 = (x + 127) / (256 * 256) + error2 99 // error1 = (x + 127) / (256 * 256) + error2
72 // 100 //
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after
161 // Store 8 pixels. 189 // Store 8 pixels.
162 vst4_u8((uint8_t*) dst, bgra); 190 vst4_u8((uint8_t*) dst, bgra);
163 src += 8; 191 src += 8;
164 dst += 8; 192 dst += 8;
165 count -= 8; 193 count -= 8;
166 } 194 }
167 195
168 swaprb_xxxa_portable(dst, src, count); 196 swaprb_xxxa_portable(dst, src, count);
169 } 197 }
170 198
199 static void xxx_xxxa(uint32_t dst[], const uint32_t src[], int count) {
200 const uint8_t* src8 = (const uint8_t*) src;
201 while (count >= 16) {
202 // Load 16 pixels.
203 uint8x16x3_t bgr = vld3q_u8(src8);
204
205 // Insert an opaque alpha channel.
206 uint8x16x4_t bgra;
207 bgra.val[0] = bgr.val[0];
208 bgra.val[1] = bgr.val[1];
209 bgra.val[2] = bgr.val[2];
210 bgra.val[3] = vdupq_n_u8(0xFF);
211
212 // Store 16 pixels.
213 vst4q_u8((uint8_t*) dst, bgra);
214 src8 += 48;
215 dst += 16;
216 count -= 16;
217 }
218
219 if (count >= 8) {
220 // Load 8 pixels.
221 uint8x8x3_t bgr = vld3_u8(src8);
222
223 // Insert an opaque alpha channel.
224 uint8x8x4_t bgra;
225 bgra.val[0] = bgr.val[0];
226 bgra.val[1] = bgr.val[1];
227 bgra.val[2] = bgr.val[2];
228 bgra.val[3] = vdup_n_u8(0xFF);
229
230 // Store 8 pixels.
231 vst4_u8((uint8_t*) dst, bgra);
232 src8 += 24;
233 dst += 8;
234 count -= 8;
235 }
236
237 xxx_xxxa_portable(dst, src, count);
238 }
239
240 static void xxx_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) {
241 const uint8_t* src8 = (const uint8_t*) src;
242 while (count >= 16) {
243 // Load 16 pixels.
244 uint8x16x3_t bgr = vld3q_u8(src8);
245
246 // Swap r and b and insert an opaque alpha channel.
247 uint8x16x4_t rgba;
248 rgba.val[0] = bgr.val[2];
249 rgba.val[1] = bgr.val[1];
250 rgba.val[2] = bgr.val[0];
251 rgba.val[3] = vdupq_n_u8(0xFF);
252
253 // Store 16 pixels.
254 vst4q_u8((uint8_t*) dst, rgba);
255 src8 += 48;
256 dst += 16;
257 count -= 16;
258 }
259
260 if (count >= 8) {
261 // Load 8 pixels.
262 uint8x8x3_t bgr = vld3_u8(src8);
263
264 // Swap r and b and insert an opaque alpha channel.
265 uint8x8x4_t rgba;
266 rgba.val[0] = bgr.val[2];
267 rgba.val[1] = bgr.val[1];
268 rgba.val[2] = bgr.val[0];
269 rgba.val[3] = vdup_n_u8(0xFF);
270
271 // Store 8 pixels.
272 vst4_u8((uint8_t*) dst, rgba);
273 src8 += 24;
274 dst += 8;
275 count -= 8;
276 }
277
278 xxx_swaprb_xxxa_portable(dst, src, count);
279 }
280
171 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 281 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
172 282
173 template <bool kSwapRB> 283 template <bool kSwapRB>
174 static void premul_xxxa_should_swaprb(uint32_t dst[], const uint32_t src[], int count) { 284 static void premul_xxxa_should_swaprb(uint32_t dst[], const uint32_t src[], int count) {
175 285
176 auto premul8 = [](__m128i* lo, __m128i* hi) { 286 auto premul8 = [](__m128i* lo, __m128i* hi) {
177 const __m128i zeros = _mm_setzero_si128(); 287 const __m128i zeros = _mm_setzero_si128();
178 const __m128i _128 = _mm_set1_epi16(128); 288 const __m128i _128 = _mm_set1_epi16(128);
179 const __m128i _257 = _mm_set1_epi16(257); 289 const __m128i _257 = _mm_set1_epi16(257);
180 __m128i planar; 290 __m128i planar;
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
257 _mm_storeu_si128((__m128i*) dst, rgba); 367 _mm_storeu_si128((__m128i*) dst, rgba);
258 368
259 src += 4; 369 src += 4;
260 dst += 4; 370 dst += 4;
261 count -= 4; 371 count -= 4;
262 } 372 }
263 373
264 swaprb_xxxa_portable(dst, src, count); 374 swaprb_xxxa_portable(dst, src, count);
265 } 375 }
266 376
377 static void xxx_xxxa(uint32_t dst[], const uint32_t src[], int count) {
378 xxx_xxxa_portable(dst, src, count);
379 }
380
381 static void xxx_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) {
382 xxx_swaprb_xxxa_portable(dst, src, count);
383 }
384
267 #else 385 #else
268 386
269 static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) { 387 static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) {
270 premul_xxxa_portable(dst, src, count); 388 premul_xxxa_portable(dst, src, count);
271 } 389 }
272 390
273 static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { 391 static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) {
274 premul_swaprb_xxxa_portable(dst, src, count); 392 premul_swaprb_xxxa_portable(dst, src, count);
275 } 393 }
276 394
277 static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { 395 static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) {
278 swaprb_xxxa_portable(dst, src, count); 396 swaprb_xxxa_portable(dst, src, count);
279 } 397 }
280 398
399 static void xxx_xxxa(uint32_t dst[], const uint32_t src[], int count) {
400 xxx_xxxa_portable(dst, src, count);
401 }
402
403 static void xxx_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) {
404 xxx_swaprb_xxxa_portable(dst, src, count);
405 }
406
281 #endif 407 #endif
282 408
283 } 409 }
284 410
285 #endif // SkSwizzler_opts_DEFINED 411 #endif // SkSwizzler_opts_DEFINED
OLDNEW
« src/opts/SkOpts_ssse3.cpp ('K') | « src/opts/SkOpts_ssse3.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698