src/core/SkLinearBitmapPipeline.cpp - Issue 1886233006: Add 8888 fast SrcOver mode.

Unified Diff: src/core/SkLinearBitmapPipeline.cpp

Issue 1886233006: Add 8888 fast SrcOver mode. (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Clean CL Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: src/core/SkLinearBitmapPipeline.cpp

diff --git a/src/core/SkLinearBitmapPipeline.cpp b/src/core/SkLinearBitmapPipeline.cpp

index d9b4a8d50432d530774802af049a7f7de7920af2..81783acd0e1b9d602d4a2d94885a55ca8dfc4cb9 100644

--- a/src/core/SkLinearBitmapPipeline.cpp

+++ b/src/core/SkLinearBitmapPipeline.cpp

@@ -555,10 +555,10 @@ private:

// RGBA8888UnitRepeatSrc - A sampler that takes advantage of the fact the the src and destination

// are the same format and do not need in transformations in pixel space. Therefore, there is no

// need to convert them to HiFi pixel format.

-class RGBA8888UnitRepeat final : public SkLinearBitmapPipeline::SampleProcessorInterface,

- public SkLinearBitmapPipeline::DestinationInterface {

+class RGBA8888UnitRepeatSrc final : public SkLinearBitmapPipeline::SampleProcessorInterface,

+ public SkLinearBitmapPipeline::DestinationInterface {

public:

- RGBA8888UnitRepeat(const uint32_t* src, int32_t width)

+ RGBA8888UnitRepeatSrc(const uint32_t* src, int32_t width)

: fSrc{src}, fWidth{width} { }

void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {

@@ -626,6 +626,146 @@ private:

uint32_t* fEnd;

};

+void lerp_pixel(const uint32_t* src, uint32_t* dst) {

mtklein 2016/04/27 16:10:04 Let's not call this lerp. Lerp is a function of t

+ Sk4b srcAsBytes = Sk4b::Load(src);

mtklein 2016/04/27 16:10:04 I think this code would be more readable with fewe

+ Sk4f srcPixel = SkNx_cast<float>(srcAsBytes);

+ srcPixel = sRGBFast::sRGBToLinear(srcPixel / 255.0f);

mtklein 2016/04/27 16:10:04 Try not to write "x / 255.0f". "x *(1/255.0f)" is

+ Sk4b dstAsBytes = Sk4b::Load(dst);

+ Sk4f dstPixel = SkNx_cast<float>(dstAsBytes);

+ dstPixel = sRGBFast::sRGBToLinear(dstPixel / 255.0f);

+ Sk4f invAlpha = 1.0f - Sk4f{srcPixel[3]};

+ dstPixel *= invAlpha;

+ dstPixel += srcPixel;

+ dstPixel = sRGBFast::LinearTosRGB(dstPixel);

+ dstPixel *= 255.0f;

f(malita) 2016/04/27 15:51:17 Can we avoid the 255 divs/mult and lerp in 255-spa

mtklein 2016/04/27 16:10:04 neat.

+ dstAsBytes = SkNx_cast<uint8_t>(dstPixel);

+ dstAsBytes.store(dst);

+void lerp_pixels(const uint32_t* src, uint32_t* dst, size_t count) {

+ while (count --> 0) {

+ lerp_pixel(src, dst);

+ dst += 1;

+ src += 1;

+ }

+uint8_t extract_alpha(const uint32_t* pixel) {

+ const uint8_t* bPixel = reinterpret_cast<const uint8_t*>(pixel);

mtklein 2016/04/27 16:10:04 return *pixel >> 24; ?

+ return bPixel[3];

+// RGBA8888UnitRepeatSrc - A sampler that takes advantage of the fact the the src and destination

+// are the same format and do not need in transformations in pixel space. Therefore, there is no

+// need to convert them to HiFi pixel format.

+class RGBA8888UnitRepeatSrcOver final : public SkLinearBitmapPipeline::SampleProcessorInterface,

+ public SkLinearBitmapPipeline::DestinationInterface {

+public:

+ RGBA8888UnitRepeatSrcOver(const uint32_t* src, int32_t width)

+ : fSrc{src}, fWidth{width} { }

+ void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {

+ SkASSERT(fDest + n <= fEnd);

+ // At this point xs and ys should be >= 0, so trunc is the same as floor.

+ Sk4i iXs = SkNx_cast<int>(xs);

+ Sk4i iYs = SkNx_cast<int>(ys);

+ if (n >= 1) blendPixelAt(iXs[0], iYs[0]);

+ if (n >= 2) blendPixelAt(iXs[1], iYs[1]);

+ if (n >= 3) blendPixelAt(iXs[2], iYs[2]);

+ }

+ void VECTORCALL pointList4(Sk4s xs, Sk4s ys) override {

+ SkASSERT(fDest + 4 <= fEnd);

+ Sk4i iXs = SkNx_cast<int>(xs);

+ Sk4i iYs = SkNx_cast<int>(ys);

+ blendPixelAt(iXs[0], iYs[0]);

+ blendPixelAt(iXs[1], iYs[1]);

+ blendPixelAt(iXs[2], iYs[2]);

+ blendPixelAt(iXs[3], iYs[3]);

+ }

+ void pointSpan(Span span) override {

+ if (span.length() != 0.0f) {

+ this->repeatSpan(span, 1);

+ }

+ void repeatSpan(Span span, int32_t repeatCount) override {

+ SkASSERT(fDest + span.count() * repeatCount <= fEnd);

+ SkASSERT(span.count() > 0);

+ SkASSERT(repeatCount > 0);

+ int32_t x = (int32_t)span.startX();

+ int32_t y = (int32_t)span.startY();

+ const uint32_t* beginSpan = this->pixelAddress(x, y);

+ const uint32_t* endSpan = beginSpan + span.count();

+ while (repeatCount-- > 0) {

+ const uint32_t* src = beginSpan;

+ do {

+ const uint32_t* startSegment = src;

+ uint8_t currentAlpha = extract_alpha(src);

+ switch (currentAlpha) {

+ case 0x00:

+ do {

+ src += 1;

+ } while (src < endSpan && extract_alpha(src) == 0x00);

+ break;

+ case 0xFF:

+ do {

+ src += 1;

+ } while (src < endSpan && extract_alpha(src) == 0xFF);

+ memmove(fDest, startSegment, (src - startSegment) * sizeof(uint32_t));

+ break;

+ default:

+ do {

+ src += 1;

+ } while (src < endSpan

+ && extract_alpha(src) != 0xFF

+ && extract_alpha(src) != 0x00);

+ lerp_pixels(startSegment, fDest, src - startSegment);

+ }

+ fDest += src - startSegment;

+ } while (src < endSpan);

+ }

+ SkASSERT(fDest <= fEnd);

+ }

+ void VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) override { SkFAIL("Not Implemented"); }

+ void bilerpSpan(Span span, SkScalar y) override { SkFAIL("Not Implemented"); }

+ void setDestination(void* dst, int count) override {

+ SkASSERT(count > 0);

+ fDest = static_cast<uint32_t*>(dst);

+ fEnd = fDest + count;

+ }

+private:

+ const uint32_t* pixelAddress(int32_t x, int32_t y) {

+ return &fSrc[fWidth * y + x];

+ }

+ void blendPixelAt(int32_t x, int32_t y) {

+ const uint32_t* src = this->pixelAddress(x, y);

+ uint8_t alpha = extract_alpha(src);

+ if (alpha == 0xff) {

+ *fDest = *src;

+ } else if (alpha != 0x00) {

+ lerp_pixel(src, fDest);

mtklein 2016/04/27 16:10:04 It might make things globally simpler to fuse lerp

+ }

+ fDest += 1;

+ };

+ const uint32_t* const fSrc;

+ const int32_t fWidth;

+ uint32_t* fDest;

+ uint32_t* fEnd;

+};

using Blender = SkLinearBitmapPipeline::BlendProcessorInterface;

template<template <typename, typename> class Sampler>

@@ -797,25 +937,24 @@ bool SkLinearBitmapPipeline::ClonePipelineForBlitting(

SkXfermode::Mode xferMode,

const SkImageInfo& dstInfo)

{

+ if (xferMode == SkXfermode::kSrcOver_Mode

+ && srcPixmap.info().alphaType() == kOpaque_SkAlphaType) {

+ xferMode = SkXfermode::kSrc_Mode;

+ }

if (matrixMask & ~SkMatrix::kTranslate_Mask ) { return false; }

if (filterQuality != SkFilterQuality::kNone_SkFilterQuality) { return false; }

if (finalAlpha != 1.0f) { return false; }

if (srcPixmap.info().colorType() != kRGBA_8888_SkColorType

|| dstInfo.colorType() != kRGBA_8888_SkColorType) { return false; }

- if (srcPixmap.info().profileType() != dstInfo.profileType()) { return false; }

+ if (srcPixmap.info().profileType() != kSRGB_SkColorProfileType

+ || dstInfo.profileType() != kSRGB_SkColorProfileType) { return false; }

- if (xTileMode != SkShader::kRepeat_TileMode || yTileMode != SkShader::kRepeat_TileMode) {

+ if (xferMode != SkXfermode::kSrc_Mode && xferMode != SkXfermode::kSrcOver_Mode) {

return false;

}

- if (xferMode == SkXfermode::kSrcOver_Mode

- && srcPixmap.info().alphaType() == kOpaque_SkAlphaType) {

- xferMode = SkXfermode::kSrc_Mode;

- }

- if (xferMode != SkXfermode::kSrc_Mode) { return false; }

new (blitterStorage) SkLinearBitmapPipeline(pipeline, srcPixmap, xferMode, dstInfo);

return true;

@@ -827,18 +966,26 @@ SkLinearBitmapPipeline::SkLinearBitmapPipeline(

SkXfermode::Mode mode,

const SkImageInfo& dstInfo)

{

- SkASSERT(mode == SkXfermode::kSrc_Mode);

+ SkASSERT(mode == SkXfermode::kSrc_Mode || mode == SkXfermode::kSrcOver_Mode);

SkASSERT(srcPixmap.info().colorType() == dstInfo.colorType()

&& srcPixmap.info().colorType() == kRGBA_8888_SkColorType);

- fSampleStage.initSink<RGBA8888UnitRepeat>(srcPixmap.writable_addr32(0, 0), srcPixmap.width());

+ if (mode == SkXfermode::kSrc_Mode) {

+ fSampleStage.initSink<RGBA8888UnitRepeatSrc>(

+ srcPixmap.writable_addr32(0, 0), srcPixmap.rowBytes() / 4);

+ fLastStage = fSampleStage.getInterface<DestinationInterface, RGBA8888UnitRepeatSrc>();

+ } else {

+ fSampleStage.initSink<RGBA8888UnitRepeatSrcOver>(

+ srcPixmap.writable_addr32(0, 0), srcPixmap.rowBytes() / 4);

+ fLastStage = fSampleStage.getInterface<DestinationInterface, RGBA8888UnitRepeatSrcOver>();

+ }

auto sampleStage = fSampleStage.get();

auto tilerStage = pipeline.fTileStage.cloneStageTo(sampleStage, &fTileStage);

tilerStage = (tilerStage != nullptr) ? tilerStage : sampleStage;

auto matrixStage = pipeline.fMatrixStage.cloneStageTo(tilerStage, &fMatrixStage);

matrixStage = (matrixStage != nullptr) ? matrixStage : tilerStage;

fFirstStage = matrixStage;

- fLastStage = fSampleStage.getInterface<DestinationInterface, RGBA8888UnitRepeat>();

}

void SkLinearBitmapPipeline::shadeSpan4f(int x, int y, SkPM4f* dst, int count) {

« no previous file with comments | « no previous file | src/core/SkLinearBitmapPipeline_core.h » ('j') | no next file with comments »