src/opts/SkBlitRow_opts_SSE2.cpp - Issue 923523002: Replace SSE optimization of Color32A_D565

Side by Side Diff: src/opts/SkBlitRow_opts_SSE2.cpp

Issue 923523002: Replace SSE optimization of Color32A_D565 (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Fixed comment comment Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2012 The Android Open Source Project	2 * Copyright 2012 The Android Open Source Project

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #include <emmintrin.h>	8 #include <emmintrin.h>

9 #include "SkBitmapProcState_opts_SSE2.h"	9 #include "SkBitmapProcState_opts_SSE2.h"

10 #include "SkBlitRow_opts_SSE2.h"	10 #include "SkBlitRow_opts_SSE2.h"

(...skipping 271 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
282	282

283 while (count > 0) {	283 while (count > 0) {

284 dst = color + SkAlphaMulQ(src, scale);	284 dst = color + SkAlphaMulQ(src, scale);

285 src += 1;	285 src += 1;

286 dst += 1;	286 dst += 1;

287 count--;	287 count--;

288 }	288 }

289 }	289 }

290 }	290 }

291	291

	292 void Color32A_D565_SSE2(uint16_t dst[], SkPMColor src, int count, int x, int y) {

	293 SkASSERT(count > 0);

	294

	295 uint32_t src_expand = (SkGetPackedG32(src) << 24) \|

	296 (SkGetPackedR32(src) << 13) \|

	297 (SkGetPackedB32(src) << 2);

	298 unsigned scale = SkAlpha255To256(0xFF - SkGetPackedA32(src)) >> 3;

	299

	300 // Check if we have enough pixels to run SIMD

	301 if (count >= (int)(8 + (((16 - (size_t)dst) & 0x0F) >> 1))) {

	302 __m128i* dst_wide;

	303 const __m128i src_R_wide = _mm_set1_epi16(SkGetPackedR32(src) << 2);

	304 const __m128i src_G_wide = _mm_set1_epi16(SkGetPackedG32(src) << 3);

	305 const __m128i src_B_wide = _mm_set1_epi16(SkGetPackedB32(src) << 2);

	306 const __m128i scale_wide = _mm_set1_epi16(scale);

	307 const __m128i mask_blue = _mm_set1_epi16(SK_B16_MASK);

	308 const __m128i mask_green = _mm_set1_epi16(SK_G16_MASK << SK_G16_SHIFT);

	309

	310 // Align dst to an even 16 byte address (0-7 pixels)

	311 while (((((size_t)dst) & 0x0F) != 0) && (count > 0)) {

	312 dst = SkBlend32_RGB16(src_expand, dst, scale);

	313 dst += 1;

	314 count--;

	315 }

	316

	317 dst_wide = reinterpret_cast<__m128i*>(dst);

	318 do {

	319 // Load eight RGB565 pixels

	320 __m128i pixels = _mm_load_si128(dst_wide);

	321

	322 // Mask out sub-pixels

	323 __m128i pixel_R = _mm_srli_epi16(pixels, SK_R16_SHIFT);

	324 __m128i pixel_G = _mm_slli_epi16(pixels, SK_R16_BITS);

	325 pixel_G = _mm_srli_epi16(pixel_G, SK_R16_BITS + SK_B16_BITS);

	326 __m128i pixel_B = _mm_and_si128(pixels, mask_blue);

	327

	328 // Scale with alpha

	329 pixel_R = _mm_mullo_epi16(pixel_R, scale_wide);

	330 pixel_G = _mm_mullo_epi16(pixel_G, scale_wide);

	331 pixel_B = _mm_mullo_epi16(pixel_B, scale_wide);

	332

	333 // Add src_X_wide and shift down again

	334 pixel_R = _mm_add_epi16(pixel_R, src_R_wide);

	335 pixel_R = _mm_srli_epi16(pixel_R, 5);

	336 pixel_G = _mm_add_epi16(pixel_G, src_G_wide);

	337 pixel_B = _mm_add_epi16(pixel_B, src_B_wide);

	338 pixel_B = _mm_srli_epi16(pixel_B, 5);

	339

	340 // Combine into RGB565 and store

	341 pixel_R = _mm_slli_epi16(pixel_R, SK_R16_SHIFT);

	342 pixel_G = _mm_and_si128(pixel_G, mask_green);

	343 pixels = _mm_or_si128(pixel_R, pixel_G);

	344 pixels = _mm_or_si128(pixels, pixel_B);

	345 _mm_store_si128(dst_wide, pixels);

	346 count -= 8;

	347 dst_wide++;

	348 } while (count >= 8);

	349

	350 dst = reinterpret_cast<uint16_t*>(dst_wide);

	351 }

	352

	353 // Small loop to handle remaining pixels.

	354 while (count > 0) {

	355 dst = SkBlend32_RGB16(src_expand, dst, scale);

	356 dst += 1;

	357 count--;

	358 }

	359 }

	360

292 void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* maskPtr,	361 void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* maskPtr,

293 size_t maskRB, SkColor origColor,	362 size_t maskRB, SkColor origColor,

294 int width, int height) {	363 int width, int height) {

295 SkPMColor color = SkPreMultiplyColor(origColor);	364 SkPMColor color = SkPreMultiplyColor(origColor);

296 size_t dstOffset = dstRB - (width << 2);	365 size_t dstOffset = dstRB - (width << 2);

297 size_t maskOffset = maskRB - width;	366 size_t maskOffset = maskRB - width;

298 SkPMColor* dst = (SkPMColor *)device;	367 SkPMColor* dst = (SkPMColor *)device;

299 const uint8_t* mask = (const uint8_t*)maskPtr;	368 const uint8_t* mask = (const uint8_t*)maskPtr;

300 do {	369 do {

301 int count = width;	370 int count = width;

(...skipping 844 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1146 uint32_t dst_expanded = SkExpand_rgb_16(*dst);	1215 uint32_t dst_expanded = SkExpand_rgb_16(*dst);

1147 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3);	1216 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3);

1148 // now src and dst expanded are in g:11 r:10 x:1 b:10	1217 // now src and dst expanded are in g:11 r:10 x:1 b:10

1149 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);	1218 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5);

1150 }	1219 }

1151 dst += 1;	1220 dst += 1;

1152 DITHER_INC_X(x);	1221 DITHER_INC_X(x);

1153 } while (--count != 0);	1222 } while (--count != 0);

1154 }	1223 }

1155 }	1224 }

OLD	NEW

« no previous file with comments | « src/opts/SkBlitRow_opts_SSE2.h ('k') | src/opts/SkBlitRow_opts_SSE4.h » ('j') | no next file with comments »