src/opts/SkBlitRow_opts_arm_neon.cpp - Issue 181523002: ARM Skia NEON patches - 22 - S32_D565_Blend

Side by Side Diff: src/opts/SkBlitRow_opts_arm_neon.cpp

Issue 181523002: ARM Skia NEON patches - 22 - S32_D565_Blend (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Rebase Created 6 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2012 The Android Open Source Project	2 * Copyright 2012 The Android Open Source Project

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #include "SkBlitRow_opts_arm_neon.h"	8 #include "SkBlitRow_opts_arm_neon.h"

9	9

10 #include "SkBlitMask.h"	10 #include "SkBlitMask.h"

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
45 // Leftovers	45 // Leftovers

46 while (count > 0) {	46 while (count > 0) {

47 SkPMColor c = *src++;	47 SkPMColor c = *src++;

48 SkPMColorAssert(c);	48 SkPMColorAssert(c);

49 *dst = SkPixel32ToPixel16_ToU16(c);	49 *dst = SkPixel32ToPixel16_ToU16(c);

50 dst++;	50 dst++;

51 count--;	51 count--;

52 };	52 };

53 }	53 }

54	54

	55 void S32_D565_Blend_neon(uint16_t* SK_RESTRICT dst,

	56 const SkPMColor* SK_RESTRICT src, int count,

	57 U8CPU alpha, int /x/, int /y/) {

	58 SkASSERT(255 > alpha);

	59

	60 uint16x8_t vmask_blue, vscale;

	61

	62 // prepare constants

	63 vscale = vdupq_n_u16(SkAlpha255To256(alpha));

	64 vmask_blue = vmovq_n_u16(0x1F);

	65

	66 while (count >= 8) {

	67 uint16x8_t vdst, vdst_r, vdst_g, vdst_b;

	68 uint16x8_t vres_r, vres_g, vres_b;

	69 uint8x8_t vsrc_r, vsrc_g, vsrc_b;

	70

	71 // Load src

	72 {

	73 register uint8x8_t d0 asm("d0");

	74 register uint8x8_t d1 asm("d1");

	75 register uint8x8_t d2 asm("d2");

	76 register uint8x8_t d3 asm("d3");

	77

	78 asm (

	79 "vld4.8 {d0-d3},[%[src]]!"

	80 : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3), [src] "+&r" (src)

	81 :

	82 );

	83 vsrc_g = d1;

	84 #if SK_PMCOLOR_BYTE_ORDER(B,G,R,A)

	85 vsrc_r = d2; vsrc_b = d0;

	86 #elif SK_PMCOLOR_BYTE_ORDER(R,G,B,A)

	87 vsrc_r = d0; vsrc_b = d2;

	88 #endif

	89 }

	90

	91 // Load and unpack dst

	92 vdst = vld1q_u16(dst);

	93 vdst_g = vshlq_n_u16(vdst, 5); // shift green to top of lanes

	94 vdst_b = vandq_u16(vdst, vmask_blue); // extract blue

	95 vdst_r = vshrq_n_u16(vdst, 6+5); // extract red

	96 vdst_g = vshrq_n_u16(vdst_g, 5+5); // extract green

	97

	98 // Shift src to 565

	99 vsrc_r = vshr_n_u8(vsrc_r, 3); // shift red to 565 range

	100 vsrc_g = vshr_n_u8(vsrc_g, 2); // shift green to 565 range

	101 vsrc_b = vshr_n_u8(vsrc_b, 3); // shift blue to 565 range

	102

	103 // Scale src - dst

	104 vres_r = vmovl_u8(vsrc_r) - vdst_r;

	105 vres_g = vmovl_u8(vsrc_g) - vdst_g;

	106 vres_b = vmovl_u8(vsrc_b) - vdst_b;

	107

	108 vres_r = vshrq_n_u16(vres_r * vscale, 8);

	109 vres_g = vshrq_n_u16(vres_g * vscale, 8);

	110 vres_b = vshrq_n_u16(vres_b * vscale, 8);

	111

	112 vres_r += vdst_r;

	113 vres_g += vdst_g;

	114 vres_b += vdst_b;

	115

	116 // Combine

	117 vres_b = vsliq_n_u16(vres_b, vres_g, 5); // insert green into blue

	118 vres_b = vsliq_n_u16(vres_b, vres_r, 6+5); // insert red into green/blu e

	119

	120 // Store

	121 vst1q_u16(dst, vres_b);

	122 dst += 8;

	123 count -= 8;

	124 }

	125 if (count > 0) {

	126 int scale = SkAlpha255To256(alpha);

	127 do {

	128 SkPMColor c = *src++;

	129 SkPMColorAssert(c);

	130 uint16_t d = *dst;

	131 *dst++ = SkPackRGB16(

	132 SkAlphaBlend(SkPacked32ToR16(c), SkGetPackedR16(d), scale),

	133 SkAlphaBlend(SkPacked32ToG16(c), SkGetPackedG16(d), scale),

	134 SkAlphaBlend(SkPacked32ToB16(c), SkGetPackedB16(d), scale));

	135 } while (--count != 0);

	136 }

	137 }

	138

55 void S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst,	139 void S32A_D565_Opaque_neon(uint16_t* SK_RESTRICT dst,

56 const SkPMColor* SK_RESTRICT src, int count,	140 const SkPMColor* SK_RESTRICT src, int count,

57 U8CPU alpha, int /x/, int /y/) {	141 U8CPU alpha, int /x/, int /y/) {

58 SkASSERT(255 == alpha);	142 SkASSERT(255 == alpha);

59	143

60 if (count >= 8) {	144 if (count >= 8) {

61 uint16_t* SK_RESTRICT keep_dst = 0;	145 uint16_t* SK_RESTRICT keep_dst = 0;

62	146

63 asm volatile (	147 asm volatile (

64 "ands ip, %[count], #7 \n\t"	148 "ands ip, %[count], #7 \n\t"

(...skipping 1313 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1378 dst += 1;	1462 dst += 1;

1379 count--;	1463 count--;

1380 }	1464 }

1381 }	1465 }

1382 }	1466 }

1383	1467

1384 ///////////////////////////////////////////////////////////////////////////////	1468 ///////////////////////////////////////////////////////////////////////////////

1385	1469

1386 const SkBlitRow::Proc sk_blitrow_platform_565_procs_arm_neon[] = {	1470 const SkBlitRow::Proc sk_blitrow_platform_565_procs_arm_neon[] = {

1387 // no dither	1471 // no dither

1388 // NOTE: For the S32_D565_Blend function below, we don't have a special

1389 // version that assumes that each source pixel is opaque. But our

1390 // S32A is still faster than the default, so use it.

1391 S32_D565_Opaque_neon,	1472 S32_D565_Opaque_neon,

1392 S32A_D565_Blend_neon, // really S32_D565_Blend	1473 S32_D565_Blend_neon,

1393 S32A_D565_Opaque_neon,	1474 S32A_D565_Opaque_neon,

1394 S32A_D565_Blend_neon,	1475 S32A_D565_Blend_neon,

1395	1476

1396 // dither	1477 // dither

1397 S32_D565_Opaque_Dither_neon,	1478 S32_D565_Opaque_Dither_neon,

1398 S32_D565_Blend_Dither_neon,	1479 S32_D565_Blend_Dither_neon,

1399 S32A_D565_Opaque_Dither_neon,	1480 S32A_D565_Opaque_Dither_neon,

1400 NULL, // S32A_D565_Blend_Dither	1481 NULL, // S32A_D565_Blend_Dither

1401 };	1482 };

1402	1483

(...skipping 10 matching lines...) Expand all Loading...
1413 * case where we do not inspect the src alpha.	1494 * case where we do not inspect the src alpha.

1414 */	1495 */

1415 #if SK_A32_SHIFT == 24	1496 #if SK_A32_SHIFT == 24

1416 // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor	1497 // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor

1417 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque,	1498 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque,

1418 #else	1499 #else

1419 S32A_Opaque_BlitRow32_neon, // S32A_Opaque,	1500 S32A_Opaque_BlitRow32_neon, // S32A_Opaque,

1420 #endif	1501 #endif

1421 S32A_Blend_BlitRow32_neon // S32A_Blend	1502 S32A_Blend_BlitRow32_neon // S32A_Blend

1422 };	1503 };

OLD	NEW

« no previous file with comments | « expectations/gm/ignored-tests.txt ('k') | no next file » | no next file with comments »