Index: src/core/SkLinearBitmapPipeline.cpp |
diff --git a/src/core/SkLinearBitmapPipeline.cpp b/src/core/SkLinearBitmapPipeline.cpp |
index d9b4a8d50432d530774802af049a7f7de7920af2..81783acd0e1b9d602d4a2d94885a55ca8dfc4cb9 100644 |
--- a/src/core/SkLinearBitmapPipeline.cpp |
+++ b/src/core/SkLinearBitmapPipeline.cpp |
@@ -555,10 +555,10 @@ private: |
// RGBA8888UnitRepeatSrc - A sampler that takes advantage of the fact the the src and destination |
// are the same format and do not need in transformations in pixel space. Therefore, there is no |
// need to convert them to HiFi pixel format. |
-class RGBA8888UnitRepeat final : public SkLinearBitmapPipeline::SampleProcessorInterface, |
- public SkLinearBitmapPipeline::DestinationInterface { |
+class RGBA8888UnitRepeatSrc final : public SkLinearBitmapPipeline::SampleProcessorInterface, |
+ public SkLinearBitmapPipeline::DestinationInterface { |
public: |
- RGBA8888UnitRepeat(const uint32_t* src, int32_t width) |
+ RGBA8888UnitRepeatSrc(const uint32_t* src, int32_t width) |
: fSrc{src}, fWidth{width} { } |
void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { |
@@ -626,6 +626,146 @@ private: |
uint32_t* fEnd; |
}; |
+void lerp_pixel(const uint32_t* src, uint32_t* dst) { |
mtklein
2016/04/27 16:10:04
Let's not call this lerp. Lerp is a function of t
|
+ Sk4b srcAsBytes = Sk4b::Load(src); |
mtklein
2016/04/27 16:10:04
I think this code would be more readable with fewe
|
+ Sk4f srcPixel = SkNx_cast<float>(srcAsBytes); |
+ srcPixel = sRGBFast::sRGBToLinear(srcPixel / 255.0f); |
mtklein
2016/04/27 16:10:04
Try not to write "x / 255.0f".
"x *(1/255.0f)" is
|
+ Sk4b dstAsBytes = Sk4b::Load(dst); |
+ Sk4f dstPixel = SkNx_cast<float>(dstAsBytes); |
+ dstPixel = sRGBFast::sRGBToLinear(dstPixel / 255.0f); |
+ Sk4f invAlpha = 1.0f - Sk4f{srcPixel[3]}; |
+ dstPixel *= invAlpha; |
+ dstPixel += srcPixel; |
+ dstPixel = sRGBFast::LinearTosRGB(dstPixel); |
+ dstPixel *= 255.0f; |
f(malita)
2016/04/27 15:51:17
Can we avoid the 255 divs/mult and lerp in 255-spa
mtklein
2016/04/27 16:10:04
neat.
|
+ dstAsBytes = SkNx_cast<uint8_t>(dstPixel); |
+ dstAsBytes.store(dst); |
+} |
+ |
+void lerp_pixels(const uint32_t* src, uint32_t* dst, size_t count) { |
+ |
+ while (count --> 0) { |
+ lerp_pixel(src, dst); |
+ dst += 1; |
+ src += 1; |
+ } |
+} |
+ |
+uint8_t extract_alpha(const uint32_t* pixel) { |
+ const uint8_t* bPixel = reinterpret_cast<const uint8_t*>(pixel); |
mtklein
2016/04/27 16:10:04
return *pixel >> 24;
?
|
+ return bPixel[3]; |
+} |
+ |
+// RGBA8888UnitRepeatSrc - A sampler that takes advantage of the fact the the src and destination |
+// are the same format and do not need in transformations in pixel space. Therefore, there is no |
+// need to convert them to HiFi pixel format. |
+class RGBA8888UnitRepeatSrcOver final : public SkLinearBitmapPipeline::SampleProcessorInterface, |
+ public SkLinearBitmapPipeline::DestinationInterface { |
+public: |
+ RGBA8888UnitRepeatSrcOver(const uint32_t* src, int32_t width) |
+ : fSrc{src}, fWidth{width} { } |
+ |
+ void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { |
+ SkASSERT(fDest + n <= fEnd); |
+ // At this point xs and ys should be >= 0, so trunc is the same as floor. |
+ Sk4i iXs = SkNx_cast<int>(xs); |
+ Sk4i iYs = SkNx_cast<int>(ys); |
+ |
+ if (n >= 1) blendPixelAt(iXs[0], iYs[0]); |
+ if (n >= 2) blendPixelAt(iXs[1], iYs[1]); |
+ if (n >= 3) blendPixelAt(iXs[2], iYs[2]); |
+ } |
+ |
+ void VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { |
+ SkASSERT(fDest + 4 <= fEnd); |
+ Sk4i iXs = SkNx_cast<int>(xs); |
+ Sk4i iYs = SkNx_cast<int>(ys); |
+ blendPixelAt(iXs[0], iYs[0]); |
+ blendPixelAt(iXs[1], iYs[1]); |
+ blendPixelAt(iXs[2], iYs[2]); |
+ blendPixelAt(iXs[3], iYs[3]); |
+ } |
+ |
+ void pointSpan(Span span) override { |
+ if (span.length() != 0.0f) { |
+ this->repeatSpan(span, 1); |
+ } |
+ } |
+ |
+ void repeatSpan(Span span, int32_t repeatCount) override { |
+ SkASSERT(fDest + span.count() * repeatCount <= fEnd); |
+ SkASSERT(span.count() > 0); |
+ SkASSERT(repeatCount > 0); |
+ |
+ int32_t x = (int32_t)span.startX(); |
+ int32_t y = (int32_t)span.startY(); |
+ const uint32_t* beginSpan = this->pixelAddress(x, y); |
+ const uint32_t* endSpan = beginSpan + span.count(); |
+ |
+ while (repeatCount-- > 0) { |
+ const uint32_t* src = beginSpan; |
+ do { |
+ const uint32_t* startSegment = src; |
+ uint8_t currentAlpha = extract_alpha(src); |
+ switch (currentAlpha) { |
+ case 0x00: |
+ do { |
+ src += 1; |
+ } while (src < endSpan && extract_alpha(src) == 0x00); |
+ |
+ break; |
+ case 0xFF: |
+ do { |
+ src += 1; |
+ } while (src < endSpan && extract_alpha(src) == 0xFF); |
+ memmove(fDest, startSegment, (src - startSegment) * sizeof(uint32_t)); |
+ break; |
+ default: |
+ do { |
+ src += 1; |
+ } while (src < endSpan |
+ && extract_alpha(src) != 0xFF |
+ && extract_alpha(src) != 0x00); |
+ lerp_pixels(startSegment, fDest, src - startSegment); |
+ } |
+ fDest += src - startSegment; |
+ } while (src < endSpan); |
+ } |
+ SkASSERT(fDest <= fEnd); |
+ } |
+ |
+ void VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) override { SkFAIL("Not Implemented"); } |
+ |
+ void bilerpSpan(Span span, SkScalar y) override { SkFAIL("Not Implemented"); } |
+ |
+ void setDestination(void* dst, int count) override { |
+ SkASSERT(count > 0); |
+ fDest = static_cast<uint32_t*>(dst); |
+ fEnd = fDest + count; |
+ } |
+ |
+private: |
+ const uint32_t* pixelAddress(int32_t x, int32_t y) { |
+ return &fSrc[fWidth * y + x]; |
+ } |
+ |
+ void blendPixelAt(int32_t x, int32_t y) { |
+ const uint32_t* src = this->pixelAddress(x, y); |
+ uint8_t alpha = extract_alpha(src); |
+ if (alpha == 0xff) { |
+ *fDest = *src; |
+ } else if (alpha != 0x00) { |
+ lerp_pixel(src, fDest); |
mtklein
2016/04/27 16:10:04
It might make things globally simpler to fuse lerp
|
+ } |
+ fDest += 1; |
+ }; |
+ |
+ const uint32_t* const fSrc; |
+ const int32_t fWidth; |
+ uint32_t* fDest; |
+ uint32_t* fEnd; |
+}; |
+ |
using Blender = SkLinearBitmapPipeline::BlendProcessorInterface; |
template<template <typename, typename> class Sampler> |
@@ -797,25 +937,24 @@ bool SkLinearBitmapPipeline::ClonePipelineForBlitting( |
SkXfermode::Mode xferMode, |
const SkImageInfo& dstInfo) |
{ |
+ if (xferMode == SkXfermode::kSrcOver_Mode |
+ && srcPixmap.info().alphaType() == kOpaque_SkAlphaType) { |
+ xferMode = SkXfermode::kSrc_Mode; |
+ } |
+ |
if (matrixMask & ~SkMatrix::kTranslate_Mask ) { return false; } |
if (filterQuality != SkFilterQuality::kNone_SkFilterQuality) { return false; } |
if (finalAlpha != 1.0f) { return false; } |
if (srcPixmap.info().colorType() != kRGBA_8888_SkColorType |
|| dstInfo.colorType() != kRGBA_8888_SkColorType) { return false; } |
- if (srcPixmap.info().profileType() != dstInfo.profileType()) { return false; } |
+ if (srcPixmap.info().profileType() != kSRGB_SkColorProfileType |
+ || dstInfo.profileType() != kSRGB_SkColorProfileType) { return false; } |
- if (xTileMode != SkShader::kRepeat_TileMode || yTileMode != SkShader::kRepeat_TileMode) { |
+ if (xferMode != SkXfermode::kSrc_Mode && xferMode != SkXfermode::kSrcOver_Mode) { |
return false; |
} |
- if (xferMode == SkXfermode::kSrcOver_Mode |
- && srcPixmap.info().alphaType() == kOpaque_SkAlphaType) { |
- xferMode = SkXfermode::kSrc_Mode; |
- } |
- |
- if (xferMode != SkXfermode::kSrc_Mode) { return false; } |
- |
new (blitterStorage) SkLinearBitmapPipeline(pipeline, srcPixmap, xferMode, dstInfo); |
return true; |
@@ -827,18 +966,26 @@ SkLinearBitmapPipeline::SkLinearBitmapPipeline( |
SkXfermode::Mode mode, |
const SkImageInfo& dstInfo) |
{ |
- SkASSERT(mode == SkXfermode::kSrc_Mode); |
+ SkASSERT(mode == SkXfermode::kSrc_Mode || mode == SkXfermode::kSrcOver_Mode); |
SkASSERT(srcPixmap.info().colorType() == dstInfo.colorType() |
&& srcPixmap.info().colorType() == kRGBA_8888_SkColorType); |
- fSampleStage.initSink<RGBA8888UnitRepeat>(srcPixmap.writable_addr32(0, 0), srcPixmap.width()); |
+ if (mode == SkXfermode::kSrc_Mode) { |
+ fSampleStage.initSink<RGBA8888UnitRepeatSrc>( |
+ srcPixmap.writable_addr32(0, 0), srcPixmap.rowBytes() / 4); |
+ fLastStage = fSampleStage.getInterface<DestinationInterface, RGBA8888UnitRepeatSrc>(); |
+ } else { |
+ fSampleStage.initSink<RGBA8888UnitRepeatSrcOver>( |
+ srcPixmap.writable_addr32(0, 0), srcPixmap.rowBytes() / 4); |
+ fLastStage = fSampleStage.getInterface<DestinationInterface, RGBA8888UnitRepeatSrcOver>(); |
+ } |
+ |
auto sampleStage = fSampleStage.get(); |
auto tilerStage = pipeline.fTileStage.cloneStageTo(sampleStage, &fTileStage); |
tilerStage = (tilerStage != nullptr) ? tilerStage : sampleStage; |
auto matrixStage = pipeline.fMatrixStage.cloneStageTo(tilerStage, &fMatrixStage); |
matrixStage = (matrixStage != nullptr) ? matrixStage : tilerStage; |
fFirstStage = matrixStage; |
- fLastStage = fSampleStage.getInterface<DestinationInterface, RGBA8888UnitRepeat>(); |
} |
void SkLinearBitmapPipeline::shadeSpan4f(int x, int y, SkPM4f* dst, int count) { |