src/core/SkXfermode4f.cpp - Issue 1653943002: unroll srcover_1 for blending a single color

Side by Side Diff: src/core/SkXfermode4f.cpp

Issue 1653943002: unroll srcover_1 for blending a single color (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: remove unneeded templateness, speed up linear a bit more (avoid scaling to unit on dst) Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2016 Google Inc.	2 * Copyright 2016 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #include "SkPM4fPriv.h"	8 #include "SkPM4fPriv.h"

9 #include "SkUtils.h"	9 #include "SkUtils.h"

10 #include "SkXfermode.h"	10 #include "SkXfermode.h"

(...skipping 17 matching lines...) Expand all Loading...
28 }	28 }

29	29

30 template <DstType D> Sk4f load_dst(SkPMColor dstC) {	30 template <DstType D> Sk4f load_dst(SkPMColor dstC) {

31 return (D == kSRGB_Dst) ? Sk4f_fromS32(dstC) : Sk4f_fromL32(dstC);	31 return (D == kSRGB_Dst) ? Sk4f_fromS32(dstC) : Sk4f_fromL32(dstC);

32 }	32 }

33	33

34 template <DstType D> uint32_t store_dst(const Sk4f& x4) {	34 template <DstType D> uint32_t store_dst(const Sk4f& x4) {

35 return (D == kSRGB_Dst) ? Sk4f_toS32(x4) : Sk4f_toL32(x4);	35 return (D == kSRGB_Dst) ? Sk4f_toS32(x4) : Sk4f_toL32(x4);

36 }	36 }

37	37

	38 static Sk4f linear_unit_to_srgb_255(const Sk4f& l4) {

	39 return linear_to_srgb(l4) * Sk4f(255) + Sk4f(0.5f);

	40 }

	41

38 //////////////////////////////////////////////////////////////////////////////// ///////////////////	42 //////////////////////////////////////////////////////////////////////////////// ///////////////////

39	43

40 static Sk4f scale_255_round(const SkPM4f& pm4) {	44 static Sk4f scale_255_round(const SkPM4f& pm4) {

41 return Sk4f::Load(pm4.fVec) * Sk4f(255) + Sk4f(0.5f);	45 return Sk4f::Load(pm4.fVec) * Sk4f(255) + Sk4f(0.5f);

42 }	46 }

43	47

44 static void pm4f_to_linear_32(SkPMColor dst[], const SkPM4f src[], int count) {	48 static void pm4f_to_linear_32(SkPMColor dst[], const SkPM4f src[], int count) {

45 while (count >= 4) {	49 while (count >= 4) {

46 src[0].assertIsUnit();	50 src[0].assertIsUnit();

47 src[1].assertIsUnit();	51 src[1].assertIsUnit();

(...skipping 199 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
247 } else {	251 } else {

248 for (int i = 0; i < count; ++i) {	252 for (int i = 0; i < count; ++i) {

249 Sk4f s4 = Sk4f::Load(src[i].fVec);	253 Sk4f s4 = Sk4f::Load(src[i].fVec);

250 Sk4f d4 = load_dst<D>(dst[i]);	254 Sk4f d4 = load_dst<D>(dst[i]);

251 Sk4f r4 = s4 + d4 * Sk4f(1 - get_alpha(s4));	255 Sk4f r4 = s4 + d4 * Sk4f(1 - get_alpha(s4));

252 dst[i] = store_dst<D>(r4);	256 dst[i] = store_dst<D>(r4);

253 }	257 }

254 }	258 }

255 }	259 }

256	260

257 template <DstType D> void srcover_1(const SkXfermode::PM4fState& state, uint32_t dst[],	261 template <DstType D> void srcover_1(const SkXfermode::PM4fState& state, uint32_t dst[],
mtklein 2016/02/02 18:10:08 At this point I think it'd be clearer to split src At this point I think it'd be clearer to split srcover_1 apart into two explicit specializations: template <DstType D> void srcover_1(const SkXfermode::PM4fState& state, uint32_t dst[], const SkPM4f& src, int count, const SkAlpha aa[]); template <> void srcover_1<kLinear_Dst>(const SkXfermode::PM4fState& state, uint32_t dst[], const SkPM4f& src, int count, const SkAlpha aa[]) { ... } template <> void srcover_1<kSRGB_Dst>(const SkXfermode::PM4fState& state, uint32_t dst[], const SkPM4f& src, int count, const SkAlpha aa[]) { ... } mtklein 2016/02/02 18:10:46 (otherwise lgtm) Show quoted text On 2016/02/02 18:10:08, mtklein wrote: > At this point I think it'd be clearer to split srcover_1 apart into two explicit > specializations: > > template <DstType D> > void srcover_1(const SkXfermode::PM4fState& state, uint32_t dst[], const SkPM4f& > src, int count, const SkAlpha aa[]); > > template <> > void srcover_1<kLinear_Dst>(const SkXfermode::PM4fState& state, uint32_t dst[], > const SkPM4f& src, int count, const SkAlpha aa[]) { > ... > } > > template <> > void srcover_1<kSRGB_Dst>(const SkXfermode::PM4fState& state, uint32_t dst[], > const SkPM4f& src, int count, const SkAlpha aa[]) { > ... > } (otherwise lgtm)
258 const SkPM4f& src, int count, const SkAlpha aa[]) {	262 const SkPM4f& src, int count, const SkAlpha aa[]) {

259 Sk4f s4 = Sk4f::Load(src.fVec);	263 Sk4f s4 = Sk4f::Load(src.fVec);

260 Sk4f scale = Sk4f(1 - get_alpha(s4));	264 Sk4f dst_scale = Sk4f(1 - get_alpha(s4));

261	265

262 if (aa) {	266 if (aa) {

263 for (int i = 0; i < count; ++i) {	267 for (int i = 0; i < count; ++i) {

264 unsigned a = aa[i];	268 unsigned a = aa[i];

265 if (0 == a) {	269 if (0 == a) {

266 continue;	270 continue;

267 }	271 }

268 Sk4f d4 = load_dst<D>(dst[i]);	272 Sk4f d4 = load_dst<D>(dst[i]);

269 Sk4f r4;	273 Sk4f r4;

270 if (a != 0xFF) {	274 if (a != 0xFF) {

271 s4 = scale_by_coverage(s4, a);	275 s4 = scale_by_coverage(s4, a);

272 r4 = s4 + d4 * Sk4f(1 - get_alpha(s4));	276 r4 = s4 + d4 * Sk4f(1 - get_alpha(s4));

273 } else {	277 } else {

274 r4 = s4 + d4 * scale;	278 r4 = s4 + d4 * dst_scale;

275 }	279 }

276 dst[i] = store_dst<D>(r4);	280 dst[i] = store_dst<D>(r4);

277 }	281 }

278 } else {	282 } else {

279 for (int i = 0; i < count; ++i) {	283 if (D == kLinear_Dst) {

280 Sk4f d4 = load_dst<D>(dst[i]);	284 // Do the blend math in 255-bias, since the dst bytes are already li near

281 Sk4f r4 = s4 + d4 * scale;	285 s4 = s4 * Sk4f(255) + Sk4f(0.5f); // +0.5 to pre-bias for rounding

282 dst[i] = store_dst<D>(r4);	286 while (count >= 4) {

	287 Sk4f d0 = to_4f(dst[0]);

	288 Sk4f d1 = to_4f(dst[1]);

	289 Sk4f d2 = to_4f(dst[2]);

	290 Sk4f d3 = to_4f(dst[3]);

	291 Sk4f_ToBytes((uint8_t*)dst,

	292 s4 + d0 * dst_scale,

	293 s4 + d1 * dst_scale,

	294 s4 + d2 * dst_scale,

	295 s4 + d3 * dst_scale);

	296 dst += 4;

	297 count -= 4;

	298 }

	299 for (int i = 0; i < count; ++i) {

	300 Sk4f d4 = to_4f(dst[i]);

	301 dst[i] = to_4b(s4 + d4 * dst_scale);

	302 }

	303 } else {

	304 // Do the blend math in unit-bias, since we have to convert to/from dst sRGB

	305 while (count >= 4) {

	306 Sk4f d0 = load_dst<D>(dst[0]);

	307 Sk4f d1 = load_dst<D>(dst[1]);

	308 Sk4f d2 = load_dst<D>(dst[2]);

	309 Sk4f d3 = load_dst<D>(dst[3]);

	310 Sk4f_ToBytes((uint8_t*)dst,

	311 linear_unit_to_srgb_255(s4 + d0 * dst_scale),

	312 linear_unit_to_srgb_255(s4 + d1 * dst_scale),

	313 linear_unit_to_srgb_255(s4 + d2 * dst_scale),

	314 linear_unit_to_srgb_255(s4 + d3 * dst_scale));

	315 dst += 4;

	316 count -= 4;

	317 }

	318 for (int i = 0; i < count; ++i) {

	319 Sk4f d4 = load_dst<D>(dst[i]);

	320 dst[i] = to_4b(linear_unit_to_srgb_255(s4 + d4 * dst_scale));

	321 }

283 }	322 }

284 }	323 }

285 }	324 }

286	325

287 const XferProcPair gProcs_SrcOver[] = {	326 const XferProcPair gProcs_SrcOver[] = {

288 { srcover_1<kLinear_Dst>, srcover_n<kLinear_Dst> }, // linear alpha	327 { srcover_1<kLinear_Dst>, srcover_n<kLinear_Dst> }, // linear alpha

289 { src_1<kLinear_Dst>, src_n<kLinear_Dst> }, // linear opaque [ we are src-mode ]	328 { src_1<kLinear_Dst>, src_n<kLinear_Dst> }, // linear opaque [ we are src-mode ]

290 { srcover_1<kSRGB_Dst>, srcover_n<kSRGB_Dst> }, // srgb alpha	329 { srcover_1<kSRGB_Dst>, srcover_n<kSRGB_Dst> }, // srgb alpha

291 { src_1<kSRGB_Dst>, src_n<kSRGB_Dst> }, // srgb opaque [ we are src-mode ]	330 { src_1<kSRGB_Dst>, src_n<kSRGB_Dst> }, // srgb opaque [ we are src-mode ]

292 };	331 };

(...skipping 25 matching lines...) Expand all Loading...
318	357

319 SkXfermode::PM4fProc1 SkXfermode::getPM4fProc1(uint32_t flags) const {	358 SkXfermode::PM4fProc1 SkXfermode::getPM4fProc1(uint32_t flags) const {

320 Mode mode;	359 Mode mode;

321 return this->asMode(&mode) ? GetPM4fProc1(mode, flags) : xfer_pm4_proc_1;	360 return this->asMode(&mode) ? GetPM4fProc1(mode, flags) : xfer_pm4_proc_1;

322 }	361 }

323	362

324 SkXfermode::PM4fProcN SkXfermode::getPM4fProcN(uint32_t flags) const {	363 SkXfermode::PM4fProcN SkXfermode::getPM4fProcN(uint32_t flags) const {

325 Mode mode;	364 Mode mode;

326 return this->asMode(&mode) ? GetPM4fProcN(mode, flags) : xfer_pm4_proc_n;	365 return this->asMode(&mode) ? GetPM4fProcN(mode, flags) : xfer_pm4_proc_n;

327 }	366 }

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »