Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/opts/SkBlitRow_opts_SSE2.cpp

Issue 923523002: Replace SSE optimization of Color32A_D565 (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Fixed comment comment Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkBlitRow_opts_SSE2.h ('k') | src/opts/SkBlitRow_opts_SSE4.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2012 The Android Open Source Project 2 * Copyright 2012 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include <emmintrin.h> 8 #include <emmintrin.h>
9 #include "SkBitmapProcState_opts_SSE2.h" 9 #include "SkBitmapProcState_opts_SSE2.h"
10 #include "SkBlitRow_opts_SSE2.h" 10 #include "SkBlitRow_opts_SSE2.h"
(...skipping 271 matching lines...) Expand 10 before | Expand all | Expand 10 after
282 282
283 while (count > 0) { 283 while (count > 0) {
284 *dst = color + SkAlphaMulQ(*src, scale); 284 *dst = color + SkAlphaMulQ(*src, scale);
285 src += 1; 285 src += 1;
286 dst += 1; 286 dst += 1;
287 count--; 287 count--;
288 } 288 }
289 } 289 }
290 } 290 }
291 291
292 void Color32A_D565_SSE2(uint16_t dst[], SkPMColor src, int count, int x, int y) {
293 SkASSERT(count > 0);
294
295 uint32_t src_expand = (SkGetPackedG32(src) << 24) |
296 (SkGetPackedR32(src) << 13) |
297 (SkGetPackedB32(src) << 2);
298 unsigned scale = SkAlpha255To256(0xFF - SkGetPackedA32(src)) >> 3;
299
300 // Check if we have enough pixels to run SIMD
301 if (count >= (int)(8 + (((16 - (size_t)dst) & 0x0F) >> 1))) {
302 __m128i* dst_wide;
303 const __m128i src_R_wide = _mm_set1_epi16(SkGetPackedR32(src) << 2);
304 const __m128i src_G_wide = _mm_set1_epi16(SkGetPackedG32(src) << 3);
305 const __m128i src_B_wide = _mm_set1_epi16(SkGetPackedB32(src) << 2);
306 const __m128i scale_wide = _mm_set1_epi16(scale);
307 const __m128i mask_blue = _mm_set1_epi16(SK_B16_MASK);
308 const __m128i mask_green = _mm_set1_epi16(SK_G16_MASK << SK_G16_SHIFT);
309
310 // Align dst to an even 16 byte address (0-7 pixels)
311 while (((((size_t)dst) & 0x0F) != 0) && (count > 0)) {
312 *dst = SkBlend32_RGB16(src_expand, *dst, scale);
313 dst += 1;
314 count--;
315 }
316
317 dst_wide = reinterpret_cast<__m128i*>(dst);
318 do {
319 // Load eight RGB565 pixels
320 __m128i pixels = _mm_load_si128(dst_wide);
321
322 // Mask out sub-pixels
323 __m128i pixel_R = _mm_srli_epi16(pixels, SK_R16_SHIFT);
324 __m128i pixel_G = _mm_slli_epi16(pixels, SK_R16_BITS);
325 pixel_G = _mm_srli_epi16(pixel_G, SK_R16_BITS + SK_B16_BITS);
326 __m128i pixel_B = _mm_and_si128(pixels, mask_blue);
327
328 // Scale with alpha
329 pixel_R = _mm_mullo_epi16(pixel_R, scale_wide);
330 pixel_G = _mm_mullo_epi16(pixel_G, scale_wide);
331 pixel_B = _mm_mullo_epi16(pixel_B, scale_wide);
332
333 // Add src_X_wide and shift down again
334 pixel_R = _mm_add_epi16(pixel_R, src_R_wide);
335 pixel_R = _mm_srli_epi16(pixel_R, 5);
336 pixel_G = _mm_add_epi16(pixel_G, src_G_wide);
337 pixel_B = _mm_add_epi16(pixel_B, src_B_wide);
338 pixel_B = _mm_srli_epi16(pixel_B, 5);
339
340 // Combine into RGB565 and store
341 pixel_R = _mm_slli_epi16(pixel_R, SK_R16_SHIFT);
342 pixel_G = _mm_and_si128(pixel_G, mask_green);
343 pixels = _mm_or_si128(pixel_R, pixel_G);
344 pixels = _mm_or_si128(pixels, pixel_B);
345 _mm_store_si128(dst_wide, pixels);
346 count -= 8;
347 dst_wide++;
348 } while (count >= 8);
349
350 dst = reinterpret_cast<uint16_t*>(dst_wide);
351 }
352
353 // Small loop to handle remaining pixels.
354 while (count > 0) {
355 *dst = SkBlend32_RGB16(src_expand, *dst, scale);
356 dst += 1;
357 count--;
358 }
359 }
360
292 void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* maskPtr, 361 void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* maskPtr,
293 size_t maskRB, SkColor origColor, 362 size_t maskRB, SkColor origColor,
294 int width, int height) { 363 int width, int height) {
295 SkPMColor color = SkPreMultiplyColor(origColor); 364 SkPMColor color = SkPreMultiplyColor(origColor);
296 size_t dstOffset = dstRB - (width << 2); 365 size_t dstOffset = dstRB - (width << 2);
297 size_t maskOffset = maskRB - width; 366 size_t maskOffset = maskRB - width;
298 SkPMColor* dst = (SkPMColor *)device; 367 SkPMColor* dst = (SkPMColor *)device;
299 const uint8_t* mask = (const uint8_t*)maskPtr; 368 const uint8_t* mask = (const uint8_t*)maskPtr;
300 do { 369 do {
301 int count = width; 370 int count = width;
(...skipping 844 matching lines...) Expand 10 before | Expand all | Expand 10 after
1146 uint32_t dst_expanded = SkExpand_rgb_16(*dst); 1215 uint32_t dst_expanded = SkExpand_rgb_16(*dst);
1147 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); 1216 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3);
1148 // now src and dst expanded are in g:11 r:10 x:1 b:10 1217 // now src and dst expanded are in g:11 r:10 x:1 b:10
1149 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); 1218 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);
1150 } 1219 }
1151 dst += 1; 1220 dst += 1;
1152 DITHER_INC_X(x); 1221 DITHER_INC_X(x);
1153 } while (--count != 0); 1222 } while (--count != 0);
1154 } 1223 }
1155 } 1224 }
OLDNEW
« no previous file with comments | « src/opts/SkBlitRow_opts_SSE2.h ('k') | src/opts/SkBlitRow_opts_SSE4.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698