Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/core/SkLinearBitmapPipeline_sample.h

Issue 2134893002: Redo Tiling (Closed) Base URL: https://skia.googlesource.com/skia.git@reduce-LBP-sample
Patch Set: Fix perf problem. Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/core/SkLinearBitmapPipeline_core.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 Google Inc. 2 * Copyright 2016 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkLinearBitmapPipeline_sampler_DEFINED 8 #ifndef SkLinearBitmapPipeline_sampler_DEFINED
9 #define SkLinearBitmapPipeline_sampler_DEFINED 9 #define SkLinearBitmapPipeline_sampler_DEFINED
10 10
(...skipping 22 matching lines...) Expand all
33 // 33 //
34 // 34 //
35 // Given a pixelxy each is multiplied by a different factor derived from the fra ctional part of x 35 // Given a pixelxy each is multiplied by a different factor derived from the fra ctional part of x
36 // and y: 36 // and y:
37 // * px00 -> (1 - x)(1 - y) = 1 - x - y + xy 37 // * px00 -> (1 - x)(1 - y) = 1 - x - y + xy
38 // * px10 -> x(1 - y) = x - xy 38 // * px10 -> x(1 - y) = x - xy
39 // * px01 -> (1 - x)y = y - xy 39 // * px01 -> (1 - x)y = y - xy
40 // * px11 -> xy 40 // * px11 -> xy
41 // So x * y is calculated first and then used to calculate all the other factors . 41 // So x * y is calculated first and then used to calculate all the other factors .
42 static Sk4s SK_VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10, 42 static Sk4s SK_VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10,
43 Sk4f px01, Sk4f px11) { 43 Sk4f px01, Sk4f px11) {
44 // Calculate fractional xs and ys. 44 // Calculate fractional xs and ys.
45 Sk4s fxs = xs - xs.floor(); 45 Sk4s fxs = xs - xs.floor();
46 Sk4s fys = ys - ys.floor(); 46 Sk4s fys = ys - ys.floor();
47 Sk4s fxys{fxs * fys}; 47 Sk4s fxys{fxs * fys};
48 Sk4f sum = px11 * fxys; 48 Sk4f sum = px11 * fxys;
49 sum = sum + px01 * (fys - fxys); 49 sum = sum + px01 * (fys - fxys);
50 sum = sum + px10 * (fxs - fxys); 50 sum = sum + px10 * (fxs - fxys);
51 sum = sum + px00 * (Sk4f{1.0f} - fxs - fys + fxys); 51 sum = sum + px00 * (Sk4f{1.0f} - fxs - fys + fxys);
52 return sum; 52 return sum;
53 } 53 }
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
127 Sk4f toSk4f(Element pixel) const { 127 Sk4f toSk4f(Element pixel) const {
128 return swizzle_rb( 128 return swizzle_rb(
129 gammaType == kSRGB_SkGammaType ? Sk4f_fromS32(pixel) : Sk4f_f romL32(pixel)); 129 gammaType == kSRGB_SkGammaType ? Sk4f_fromS32(pixel) : Sk4f_f romL32(pixel));
130 } 130 }
131 }; 131 };
132 132
133 template <SkGammaType gammaType> 133 template <SkGammaType gammaType>
134 class PixelConverter<kIndex_8_SkColorType, gammaType> { 134 class PixelConverter<kIndex_8_SkColorType, gammaType> {
135 public: 135 public:
136 using Element = uint8_t; 136 using Element = uint8_t;
137 PixelConverter(const SkPixmap& srcPixmap) { 137 PixelConverter(const SkPixmap& srcPixmap)
138 : fColorTableSize(srcPixmap.ctable()->count()){
138 SkColorTable* skColorTable = srcPixmap.ctable(); 139 SkColorTable* skColorTable = srcPixmap.ctable();
139 SkASSERT(skColorTable != nullptr); 140 SkASSERT(skColorTable != nullptr);
140 141
141 fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get()); 142 fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get());
142 for (int i = 0; i < skColorTable->count(); i++) { 143 for (int i = 0; i < fColorTableSize; i++) {
143 fColorTable[i] = pmcolor_to_rgba<gammaType>((*skColorTable)[i]); 144 fColorTable[i] = pmcolor_to_rgba<gammaType>((*skColorTable)[i]);
144 } 145 }
145 } 146 }
146 147
147 PixelConverter(const PixelConverter& strategy) { 148 PixelConverter(const PixelConverter& strategy)
149 : fColorTableSize{strategy.fColorTableSize}{
148 fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get()); 150 fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get());
149 // TODO: figure out the count. 151 for (int i = 0; i < fColorTableSize; i++) {
150 for (int i = 0; i < 256; i++) {
151 fColorTable[i] = strategy.fColorTable[i]; 152 fColorTable[i] = strategy.fColorTable[i];
152 } 153 }
153 } 154 }
154 155
155 Sk4f toSk4f(Element index) const { 156 Sk4f toSk4f(Element index) const {
156 return fColorTable[index]; 157 return fColorTable[index];
157 } 158 }
158 159
159 private: 160 private:
160 static const size_t kColorTableSize = sizeof(Sk4f[256]) + 12; 161 static const size_t kColorTableSize = sizeof(Sk4f[256]) + 12;
161 162 const int fColorTableSize;
162 SkAutoMalloc fColorTableStorage{kColorTableSize}; 163 SkAutoMalloc fColorTableStorage{kColorTableSize};
163 Sk4f* fColorTable; 164 Sk4f* fColorTable;
164 }; 165 };
165 166
166 template <SkGammaType gammaType> 167 template <SkGammaType gammaType>
167 class PixelConverter<kGray_8_SkColorType, gammaType> { 168 class PixelConverter<kGray_8_SkColorType, gammaType> {
168 public: 169 public:
169 using Element = uint8_t; 170 using Element = uint8_t;
170 PixelConverter(const SkPixmap& srcPixmap) { } 171 PixelConverter(const SkPixmap& srcPixmap) { }
171 172
172 Sk4f toSk4f(Element pixel) const { 173 Sk4f toSk4f(Element pixel) const {
173 float gray = pixel * (1.0f/255.0f); 174 float gray = pixel * (1.0f/255.0f);
(...skipping 14 matching lines...) Expand all
188 return SkHalfToFloat_finite(pixel); 189 return SkHalfToFloat_finite(pixel);
189 } 190 }
190 }; 191 };
191 192
192 class PixelAccessorShim { 193 class PixelAccessorShim {
193 public: 194 public:
194 explicit PixelAccessorShim(SkLinearBitmapPipeline::PixelAccessorInterface* a ccessor) 195 explicit PixelAccessorShim(SkLinearBitmapPipeline::PixelAccessorInterface* a ccessor)
195 : fPixelAccessor(accessor) { } 196 : fPixelAccessor(accessor) { }
196 197
197 void SK_VECTORCALL getFewPixels( 198 void SK_VECTORCALL getFewPixels(
198 int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const { 199 int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const {
199 fPixelAccessor->getFewPixels(n, xs, ys, px0, px1, px2); 200 fPixelAccessor->getFewPixels(n, xs, ys, px0, px1, px2);
200 } 201 }
201 202
202 void SK_VECTORCALL get4Pixels( 203 void SK_VECTORCALL get4Pixels(
203 Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const { 204 Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const {
204 fPixelAccessor->get4Pixels(xs, ys, px0, px1, px2, px3); 205 fPixelAccessor->get4Pixels(xs, ys, px0, px1, px2, px3);
205 } 206 }
206 207
207 void get4Pixels( 208 void get4Pixels(
208 const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const { 209 const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const {
209 fPixelAccessor->get4Pixels(src, index, px0, px1, px2, px3); 210 fPixelAccessor->get4Pixels(src, index, px0, px1, px2, px3);
210 }; 211 };
211 212
212 Sk4f getPixelFromRow(const void* row, int index) const { 213 Sk4f getPixelFromRow(const void* row, int index) const {
213 return fPixelAccessor->getPixelFromRow(row, index); 214 return fPixelAccessor->getPixelFromRow(row, index);
(...skipping 17 matching lines...) Expand all
231 class PixelAccessor final : public SkLinearBitmapPipeline::PixelAccessorInterfac e { 232 class PixelAccessor final : public SkLinearBitmapPipeline::PixelAccessorInterfac e {
232 using Element = typename PixelConverter<colorType, gammaType>::Element; 233 using Element = typename PixelConverter<colorType, gammaType>::Element;
233 public: 234 public:
234 template <typename... Args> 235 template <typename... Args>
235 PixelAccessor(const SkPixmap& srcPixmap, Args&&... args) 236 PixelAccessor(const SkPixmap& srcPixmap, Args&&... args)
236 : fSrc{static_cast<const Element*>(srcPixmap.addr())} 237 : fSrc{static_cast<const Element*>(srcPixmap.addr())}
237 , fWidth{srcPixmap.rowBytesAsPixels()} 238 , fWidth{srcPixmap.rowBytesAsPixels()}
238 , fConverter{srcPixmap, std::move<Args>(args)...} { } 239 , fConverter{srcPixmap, std::move<Args>(args)...} { }
239 240
240 void SK_VECTORCALL getFewPixels ( 241 void SK_VECTORCALL getFewPixels (
241 int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const override { 242 int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const override {
242 Sk4i XIs = SkNx_cast<int, SkScalar>(xs); 243 Sk4i bufferLoc = ys * fWidth + xs;
243 Sk4i YIs = SkNx_cast<int, SkScalar>(ys);
244 Sk4i bufferLoc = YIs * fWidth + XIs;
245 switch (n) { 244 switch (n) {
246 case 3: 245 case 3:
247 *px2 = this->getPixelAt(bufferLoc[2]); 246 *px2 = this->getPixelAt(bufferLoc[2]);
248 case 2: 247 case 2:
249 *px1 = this->getPixelAt(bufferLoc[1]); 248 *px1 = this->getPixelAt(bufferLoc[1]);
250 case 1: 249 case 1:
251 *px0 = this->getPixelAt(bufferLoc[0]); 250 *px0 = this->getPixelAt(bufferLoc[0]);
252 default: 251 default:
253 break; 252 break;
254 } 253 }
255 } 254 }
256 255
257 void SK_VECTORCALL get4Pixels( 256 void SK_VECTORCALL get4Pixels(
258 Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const over ride { 257 Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const over ride {
259 Sk4i XIs = SkNx_cast<int, SkScalar>(xs); 258 Sk4i bufferLoc = ys * fWidth + xs;
260 Sk4i YIs = SkNx_cast<int, SkScalar>(ys);
261 Sk4i bufferLoc = YIs * fWidth + XIs;
262 *px0 = this->getPixelAt(bufferLoc[0]); 259 *px0 = this->getPixelAt(bufferLoc[0]);
263 *px1 = this->getPixelAt(bufferLoc[1]); 260 *px1 = this->getPixelAt(bufferLoc[1]);
264 *px2 = this->getPixelAt(bufferLoc[2]); 261 *px2 = this->getPixelAt(bufferLoc[2]);
265 *px3 = this->getPixelAt(bufferLoc[3]); 262 *px3 = this->getPixelAt(bufferLoc[3]);
266 } 263 }
267 264
268 void get4Pixels( 265 void get4Pixels(
269 const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override { 266 const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override {
270 *px0 = this->getPixelFromRow(src, index + 0); 267 *px0 = this->getPixelFromRow(src, index + 0);
271 *px1 = this->getPixelFromRow(src, index + 1); 268 *px1 = this->getPixelFromRow(src, index + 1);
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
324 } 321 }
325 322
326 while (count > 0) { 323 while (count > 0) {
327 next->blendPixel(strategy->getPixelFromRow(row, ix)); 324 next->blendPixel(strategy->getPixelFromRow(row, ix));
328 ix -= 1; 325 ix -= 1;
329 count -= 1; 326 count -= 1;
330 } 327 }
331 } 328 }
332 } 329 }
333 330
331 // -- NearestNeighborSampler --------------------------------------------------- --------------------
334 // NearestNeighborSampler - use nearest neighbor filtering to create runs of des tination pixels. 332 // NearestNeighborSampler - use nearest neighbor filtering to create runs of des tination pixels.
335 template<typename Accessor, typename Next> 333 template<typename Accessor, typename Next>
336 class NearestNeighborSampler : public SkLinearBitmapPipeline::SampleProcessorInt erface { 334 class NearestNeighborSampler : public SkLinearBitmapPipeline::SampleProcessorInt erface {
337 public: 335 public:
338 template<typename... Args> 336 template<typename... Args>
339 NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next , Args&& ... args) 337 NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next , Args&& ... args)
340 : fNext{next}, fAccessor{std::forward<Args>(args)...} { } 338 : fNext{next}, fAccessor{std::forward<Args>(args)...} { }
341 339
342 NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next , 340 NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next ,
343 const NearestNeighborSampler& sampler) 341 const NearestNeighborSampler& sampler)
344 : fNext{next}, fAccessor{sampler.fAccessor} { } 342 : fNext{next}, fAccessor{sampler.fAccessor} { }
345 343
346 void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { 344 void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
347 SkASSERT(0 < n && n < 4); 345 SkASSERT(0 < n && n < 4);
348 Sk4f px0, px1, px2; 346 Sk4f px0, px1, px2;
349 fAccessor.getFewPixels(n, xs, ys, &px0, &px1, &px2); 347 fAccessor.getFewPixels(n, SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2);
350 if (n >= 1) fNext->blendPixel(px0); 348 if (n >= 1) fNext->blendPixel(px0);
351 if (n >= 2) fNext->blendPixel(px1); 349 if (n >= 2) fNext->blendPixel(px1);
352 if (n >= 3) fNext->blendPixel(px2); 350 if (n >= 3) fNext->blendPixel(px2);
353 } 351 }
354 352
355 void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { 353 void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override {
356 Sk4f px0, px1, px2, px3; 354 Sk4f px0, px1, px2, px3;
357 fAccessor.get4Pixels(xs, ys, &px0, &px1, &px2, &px3); 355 fAccessor.get4Pixels(SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2, &px3);
358 fNext->blend4Pixels(px0, px1, px2, px3); 356 fNext->blend4Pixels(px0, px1, px2, px3);
359 } 357 }
360 358
361 void pointSpan(Span span) override { 359 void pointSpan(Span span) override {
362 SkASSERT(!span.isEmpty()); 360 SkASSERT(!span.isEmpty());
363 SkPoint start; 361 SkPoint start;
364 SkScalar length; 362 SkScalar length;
365 int count; 363 int count;
366 std::tie(start, length, count) = span; 364 std::tie(start, length, count) = span;
367 SkScalar absLength = SkScalarAbs(length); 365 SkScalar absLength = SkScalarAbs(length);
368 if (absLength < (count - 1)) { 366 if (absLength < (count - 1)) {
369 this->spanSlowRate(span); 367 this->spanSlowRate(span);
370 } else if (absLength == (count - 1)) { 368 } else if (absLength == (count - 1)) {
371 src_strategy_blend(span, fNext, &fAccessor); 369 src_strategy_blend(span, fNext, &fAccessor);
372 } else { 370 } else {
373 this->spanFastRate(span); 371 this->spanFastRate(span);
374 } 372 }
375 } 373 }
376 374
377 void repeatSpan(Span span, int32_t repeatCount) override { 375 void repeatSpan(Span span, int32_t repeatCount) override {
378 while (repeatCount > 0) { 376 while (repeatCount > 0) {
379 this->pointSpan(span); 377 this->pointSpan(span);
380 repeatCount--; 378 repeatCount--;
381 } 379 }
382 } 380 }
383 381
384 void SK_VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) override {
385 SkFAIL("Using nearest neighbor sampler, but calling a bilerpEdge.");
386 }
387
388 void bilerpSpan(Span span, SkScalar y) override {
389 SkFAIL("Using nearest neighbor sampler, but calling a bilerpSpan.");
390 }
391
392 private: 382 private:
393 // When moving through source space more slowly than dst space (zoomed in), 383 // When moving through source space more slowly than dst space (zoomed in),
394 // we'll be sampling from the same source pixel more than once. 384 // we'll be sampling from the same source pixel more than once.
395 void spanSlowRate(Span span) { 385 void spanSlowRate(Span span) {
396 SkPoint start; 386 SkPoint start; SkScalar length; int count;
397 SkScalar length;
398 int count;
399 std::tie(start, length, count) = span; 387 std::tie(start, length, count) = span;
400 SkScalar x = X(start); 388 SkScalar x = X(start);
401 SkFixed fx = SkScalarToFixed(x); 389 SkFixed fx = SkScalarToFixed(x);
402 SkScalar dx = length / (count - 1); 390 SkScalar dx = length / (count - 1);
403 SkFixed fdx = SkScalarToFixed(dx); 391 SkFixed fdx = SkScalarToFixed(dx);
404 392
405 const void* row = fAccessor.row((int)std::floor(Y(start))); 393 const void* row = fAccessor.row((int)std::floor(Y(start)));
406 Next* next = fNext; 394 Next* next = fNext;
407 395
408 int ix = SkFixedFloorToInt(fx); 396 int ix = SkFixedFloorToInt(fx);
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
445 // We're moving through source space faster than dst (zoomed out), 433 // We're moving through source space faster than dst (zoomed out),
446 // so we'll never reuse a source pixel or be able to do contiguous loads. 434 // so we'll never reuse a source pixel or be able to do contiguous loads.
447 void spanFastRate(Span span) { 435 void spanFastRate(Span span) {
448 span_fallback(span, this); 436 span_fallback(span, this);
449 } 437 }
450 438
451 Next* const fNext; 439 Next* const fNext;
452 Accessor fAccessor; 440 Accessor fAccessor;
453 }; 441 };
454 442
443 // From an edgType, the integer value of a pixel vs, and the integer value of th e extreme edge
mtklein 2016/07/21 19:19:25 edgeType
herb_g 2016/07/21 19:52:20 Done.
444 // vMax, take the point which might be off the tile by one pixel and either wrap it or pin it to
445 // generate the right pixel. The value vs is on the interval [-1, vMax + 1]. It produces a value
446 // on the interval [0, vMax].
447 // Note: vMax is not width or height, but width-1 or height-1 because it is the largest valid pixel.
448 static inline int adjust_edge(SkShader::TileMode edgeType, int vs, int vMax) {
449 SkASSERT(-1 <= vs && vs <= vMax + 1)
450 switch (edgeType) {
451 case SkShader::kClamp_TileMode:
452 case SkShader::kMirror_TileMode:
453 vs = std::max(vs, 0);
454 vs = std::min(vs, vMax);
455 break;
456 case SkShader::kRepeat_TileMode:
457 vs = (vs <= vMax) ? vs : 0;
458 vs = (vs >= 0) ? vs : vMax;
459 break;
460 }
461 SkASSERT(0 <= vs && vs <= vMax);
462 return vs;
463 }
464
465 // From a sample point on the tile, return the top or left filter value.
466 // The result r should be in the range (0, 1]. Since this represents the weight given to the top
467 // left element, then if x == 0.5 the filter value should be 1.0.
468 // The input sample point must be on the tile, therefore it must be >= 0.
469 static SkScalar sample_to_filter(SkScalar x) {
470 SkASSERT(x >= 0.0f);
471 // The usual form of the top or left edge is x - .5, but since we are workin g on the unit
472 // square, then x + .5 works just as well. This also guarantees that v > 0.0 allowing the use
473 // of trunc.
474 SkScalar v = x + 0.5f;
475 // Produce the top or left offset a value on the range [0, 1).
476 SkScalar f = v - SkScalarTruncToScalar(v);
477 // Produce the filter value which is on the range (0, 1].
478 SkScalar r = 1.0f - f;
479 SkASSERT(0.0f < r && r <= 1.0f);
480 return r;
481 }
482
455 // -- BilerpSampler ------------------------------------------------------------ -------------------- 483 // -- BilerpSampler ------------------------------------------------------------ --------------------
456 // BilerpSampler - use a bilerp filter to create runs of destination pixels. 484 // BilerpSampler - use a bilerp filter to create runs of destination pixels.
485 // Note: in the code below, there are two types of points
486 // * sample points - these are the points passed in by pointList* and Span s.
487 // * filter points - are created from a sample point to form the coordinat es of the points
488 // to use in the filter and to generate the filter value s.
457 template<typename Accessor, typename Next> 489 template<typename Accessor, typename Next>
458 class BilerpSampler : public SkLinearBitmapPipeline::SampleProcessorInterface { 490 class BilerpSampler : public SkLinearBitmapPipeline::SampleProcessorInterface {
459 public: 491 public:
460 template<typename... Args> 492 template<typename... Args>
461 BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, Args&& ... args) 493 BilerpSampler(
462 : fNext{next}, fAccessor{std::forward<Args>(args)...} { } 494 SkLinearBitmapPipeline::BlendProcessorInterface* next,
495 SkISize dimensions,
496 SkShader::TileMode xTile, SkShader::TileMode yTile,
497 Args&& ... args
498 )
499 : fNext{next}
500 , fXEdgeType{xTile}
501 , fXMax{dimensions.width() - 1}
502 , fYEdgeType{yTile}
503 , fYMax{dimensions.height() - 1}
504 , fAccessor{std::forward<Args>(args)...} { }
463 505
464 BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, 506 BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next,
465 const BilerpSampler& sampler) 507 const BilerpSampler& sampler)
466 : fNext{next}, fAccessor{sampler.fAccessor} { } 508 : fNext{next}
467 509 , fXEdgeType{sampler.fXEdgeType}
468 Sk4f bilerpNonEdgePixel(SkScalar x, SkScalar y) { 510 , fXMax{sampler.fXMax}
469 Sk4f px00, px10, px01, px11; 511 , fYEdgeType{sampler.fYEdgeType}
470 512 , fYMax{sampler.fYMax}
471 // bilerp4() expects xs, ys are the top-lefts of the 2x2 kernel. 513 , fAccessor{sampler.fAccessor} { }
472 Sk4f xs = Sk4f{x} - 0.5f;
473 Sk4f ys = Sk4f{y} - 0.5f;
474 Sk4f sampleXs = xs + Sk4f{0.0f, 1.0f, 0.0f, 1.0f};
475 Sk4f sampleYs = ys + Sk4f{0.0f, 0.0f, 1.0f, 1.0f};
476 fAccessor.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11);
477 return bilerp4(xs, ys, px00, px10, px01, px11);
478 }
479 514
480 void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { 515 void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
481 SkASSERT(0 < n && n < 4); 516 SkASSERT(0 < n && n < 4);
482 auto bilerpPixel = [&](int index) { 517 auto bilerpPixel = [&](int index) {
483 return this->bilerpNonEdgePixel(xs[index], ys[index]); 518 return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]});
484 }; 519 };
485 520
486 if (n >= 1) fNext->blendPixel(bilerpPixel(0)); 521 if (n >= 1) fNext->blendPixel(bilerpPixel(0));
487 if (n >= 2) fNext->blendPixel(bilerpPixel(1)); 522 if (n >= 2) fNext->blendPixel(bilerpPixel(1));
488 if (n >= 3) fNext->blendPixel(bilerpPixel(2)); 523 if (n >= 3) fNext->blendPixel(bilerpPixel(2));
489 } 524 }
490 525
491 void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { 526 void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override {
492 auto bilerpPixel = [&](int index) { 527 auto bilerpPixel = [&](int index) {
493 return this->bilerpNonEdgePixel(xs[index], ys[index]); 528 return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]});
494 }; 529 };
495 fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bile rpPixel(3)); 530 fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bile rpPixel(3));
496 } 531 }
497 532
498 void pointSpan(Span span) override { 533 void pointSpan(Span span) override {
499 this->bilerpSpan(span, span.startY());
500 }
501
502 void repeatSpan(Span span, int32_t repeatCount) override {
503 while (repeatCount > 0) {
504 this->pointSpan(span);
505 repeatCount--;
506 }
507 }
508
509 void SK_VECTORCALL bilerpEdge(Sk4s sampleXs, Sk4s sampleYs) override {
510 Sk4f px00, px10, px01, px11;
511 Sk4f xs = Sk4f{sampleXs[0]};
512 Sk4f ys = Sk4f{sampleYs[0]};
513 fAccessor.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11);
514 Sk4f pixel = bilerp4(xs, ys, px00, px10, px01, px11);
515 fNext->blendPixel(pixel);
516 }
517
518 void bilerpSpan(Span span, SkScalar y) override {
519 SkASSERT(!span.isEmpty()); 534 SkASSERT(!span.isEmpty());
520 SkPoint start; 535 SkPoint start;
521 SkScalar length; 536 SkScalar length;
522 int count; 537 int count;
523 std::tie(start, length, count) = span; 538 std::tie(start, length, count) = span;
539
540 // Nothing to do.
541 if (count == 0) {
542 return;
543 }
544
545 // Trivial case. No sample points are generated other than start.
546 if (count == 1) {
547 fNext->blendPixel(this->bilerpSamplePoint(start));
548 return;
549 }
550
524 SkScalar absLength = SkScalarAbs(length); 551 SkScalar absLength = SkScalarAbs(length);
mtklein 2016/07/21 19:19:25 May be clearer to calculate dx here and write the
herb_g 2016/07/21 19:52:20 Done.
525 if (absLength == 0.0f) { 552 if (absLength == 0.0f) {
526 this->spanZeroRate(span, y); 553 // length is zero, so clamp an edge pixel.
554 this->spanZeroRate(span);
527 } else if (absLength < (count - 1)) { 555 } else if (absLength < (count - 1)) {
528 this->spanSlowRate(span, y); 556 // 0 < |dx| < 1.
557 this->spanSlowRate(span);
529 } else if (absLength == (count - 1)) { 558 } else if (absLength == (count - 1)) {
530 if (std::fmod(span.startX() - 0.5f, 1.0f) == 0.0f) { 559 if (sample_to_filter(span.startX()) == 1.0f) {
mtklein 2016/07/21 19:19:25 if (s_to_f(x) == 1 && s_to_f(y) == 1) { src_str
herb_g 2016/07/21 19:52:20 Done.
531 if (std::fmod(span.startY() - 0.5f, 1.0f) == 0.0f) { 560 // |dx| == 1.
561 if (sample_to_filter(span.startY()) == 1.0f) {
562 // In fact, the src & dst pixels all line up, so go fast.
532 src_strategy_blend(span, fNext, &fAccessor); 563 src_strategy_blend(span, fNext, &fAccessor);
533 } else { 564 } else {
534 this->spanUnitRateAlignedX(span, y); 565 // |dx| == 1, but still must bilerp because y has fractional offset.
566 this->spanUnitRate(span);
535 } 567 }
536 } else { 568 } else {
537 this->spanUnitRate(span, y); 569 // |dx| == 1, x and y have fractional offsets.
570 this->spanUnitRate(span);
538 } 571 }
539 } else { 572 } else {
540 this->spanFastRate(span, y); 573 // |dx| > 1.
574 if (absLength < 2.0f * (count - 1)) {
mtklein 2016/07/21 19:19:25 might as well lift this if into the containing if
herb_g 2016/07/21 19:52:20 Done.
575 // 1 < |dx| < 2.
576 this->spanMediumRate(span);
577 } else {
578 // |dx| >= 2.
579 this->spanFastRate(span);
580 }
581 }
582 }
583
584 void repeatSpan(Span span, int32_t repeatCount) override {
585 while (repeatCount > 0) {
586 this->pointSpan(span);
587 repeatCount--;
541 } 588 }
542 } 589 }
543 590
544 private: 591 private:
545 void spanZeroRate(Span span, SkScalar y1) { 592
546 SkScalar y0 = span.startY() - 0.5f; 593 // Convert a sample point to the points used by the filter.
547 y1 += 0.5f; 594 void filterPoints(SkPoint sample, Sk4i* filterXs, Sk4i* filterYs) {
548 int iy0 = SkScalarFloorToInt(y0); 595 // May be less than zero. Be careful to use Floor.
549 SkScalar filterY1 = y0 - iy0; 596 int x0 = adjust_edge(fXEdgeType, SkScalarFloorToInt(X(sample) - 0.5), fX Max);
550 SkScalar filterY0 = 1.0f - filterY1; 597 // Always greater than zero. Use the faster Trunc.
551 int iy1 = SkScalarFloorToInt(y1); 598 int x1 = adjust_edge(fXEdgeType, SkScalarTruncToInt(X(sample) + 0.5), fX Max);
552 int ix = SkScalarFloorToInt(span.startX()); 599 int y0 = adjust_edge(fYEdgeType, SkScalarFloorToInt(Y(sample) - 0.5), fY Max);
553 Sk4f pixelY0 = fAccessor.getPixelFromRow(fAccessor.row(iy0), ix); 600 int y1 = adjust_edge(fYEdgeType, SkScalarTruncToInt(Y(sample) + 0.5), fY Max);
554 Sk4f pixelY1 = fAccessor.getPixelFromRow(fAccessor.row(iy1), ix); 601
555 Sk4f filterPixel = pixelY0 * filterY0 + pixelY1 * filterY1; 602 *filterXs = Sk4i{x0, x1, x0, x1};
556 int count = span.count(); 603 *filterYs = Sk4i{y0, y0, y1, y1};
604 }
605
606 // Given a sample point, generate a color by bilerping the four filter point s.
607 Sk4f bilerpSamplePoint(SkPoint sample) {
608 Sk4i iXs, iYs;
609 filterPoints(sample, &iXs, &iYs);
610 Sk4f px00, px10, px01, px11;
611 fAccessor.get4Pixels(iXs, iYs, &px00, &px10, &px01, &px11);
612 return bilerp4(Sk4f{X(sample) - 0.5f}, Sk4f{Y(sample) - 0.5f}, px00, px1 0, px01, px11);
613 }
614
615 // Get two pixels at x from row0 and row1.
616 void get2PixelColumn(const void* row0, const void* row1, int x, Sk4f* px0, S k4f* px1) {
617 *px0 = fAccessor.getPixelFromRow(row0, x);
618 *px1 = fAccessor.getPixelFromRow(row1, x);
619 }
620
621 // |dx| == 0. This code assumes that length is zero.
622 void spanZeroRate(Span span) {
623 SkPoint start; SkScalar length; int count;
624 std::tie(start, length, count) = span;
625 SkASSERT(length == 0.0f);
626
627 // Filter for the blending of the top and bottom pixels.
628 SkScalar filterY = sample_to_filter(Y(start));
629
630 // Generate the four filter points from the sample point start. Generate the row* values.
631 Sk4i iXs, iYs;
632 this->filterPoints(start, &iXs, &iYs);
633 const void* const row0 = fAccessor.row(iYs[0]);
634 const void* const row1 = fAccessor.row(iYs[2]);
635
636 // Get the two pixels that make up the clamping pixel.
637 Sk4f pxTop, pxBottom;
638 this->get2PixelColumn(row0, row1, SkScalarFloorToInt(X(start)), &pxTop, &pxBottom);
639 Sk4f pixel = pxTop * filterY + (1.0f - filterY) * pxBottom;
640
557 while (count >= 4) { 641 while (count >= 4) {
558 fNext->blend4Pixels(filterPixel, filterPixel, filterPixel, filterPix el); 642 fNext->blend4Pixels(pixel, pixel, pixel, pixel);
559 count -= 4; 643 count -= 4;
560 } 644 }
561 while (count > 0) { 645 while (count > 0) {
562 fNext->blendPixel(filterPixel); 646 fNext->blendPixel(pixel);
563 count -= 1; 647 count -= 1;
564 } 648 }
565 } 649 }
566 650
567 // When moving through source space more slowly than dst space (zoomed in), 651 // 0 < |dx| < 1. This code reuses the calculations from previous pixels to r educe
568 // we'll be sampling from the same source pixel more than once. 652 // computation. In particular, several destination pixels maybe generated fr om the same four
569 void spanSlowRate(Span span, SkScalar ry1) { 653 // source pixels.
570 SkPoint start; 654 // In the following code a "part" is a combination of two pixels from the sa me column of the
571 SkScalar length; 655 // filter.
572 int count; 656 void spanSlowRate(Span span) {
657 SkPoint start; SkScalar length; int count;
573 std::tie(start, length, count) = span; 658 std::tie(start, length, count) = span;
574 SkFixed fx = SkScalarToFixed(X(start)-0.5f); 659
575 660 // Calculate the distance between each sample point.
576 SkFixed fdx = SkScalarToFixed(length / (count - 1)); 661 const SkScalar dx = length / (count - 1);
577 662 SkASSERT(-1.0f < dx && dx < 1.0f && dx != 0.0f);
578 Sk4f xAdjust; 663
579 if (fdx >= 0) { 664 // Generate the filter values for the top-left corner.
580 xAdjust = Sk4f{-1.0f}; 665 // Note: these values are in filter space; this has implications about h ow to adjust
666 // these values at each step. For example, as the sample point increases , the filter
667 // value decreases, this is because the filter and position are related by
668 // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite
669 // direction of the sample point which is increasing by dx.
670 SkScalar filterX = sample_to_filter(X(start));
671 SkScalar filterY = sample_to_filter(Y(start));
672
673 // Generate the four filter points from the sample point start. Generate the row* values.
674 Sk4i iXs, iYs;
675 this->filterPoints(start, &iXs, &iYs);
676 const void* const row0 = fAccessor.row(iYs[0]);
677 const void* const row1 = fAccessor.row(iYs[2]);
678
679 // Generate part of the filter value at xColumn.
680 auto partAtColumn = [&](int xColumn) {
681 int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
682 Sk4f pxTop, pxBottom;
683 this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom) ;
684 return pxTop * filterY + (1.0f - filterY) * pxBottom;
685 };
686
687 // The leftPart is made up of two pixels from the left column of the fil ter, right part
688 // is similar. The top and bottom pixels in the *Part are created as a l inear blend of
689 // the top and bottom pixels using filterY. See the partAtColumn functio n above.
690 Sk4f leftPart = partAtColumn(iXs[0]);
691 Sk4f rightPart = partAtColumn(iXs[1]);
692
693 // Create a destination color by blending together a left and right part using filterX.
694 auto bilerp = [&]() {
695 Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX);
696 return check_pixel(pixel);
697 };
698
699 // Send the first pixel to the destination. This simplifies the loop str ucture so that no
700 // extra pixels are fetched for the last iteration of the loop.
701 fNext->blendPixel(bilerp());
702 count -= 1;
703
704 if (dx > 0.0f) {
705 // * positive direction - generate destination pixels by sliding the filter from left
706 // to right.
707 int rightPartCursor = iXs[1];
708
709 // Advance the filter from left to right. Remember that moving the t op-left corner of
710 // the filter to the right actually makes the filter value smaller.
711 auto advanceFilter = [&]() {
712 filterX -= dx;
713 if (filterX <= 0.0f) {
714 filterX += 1.0f;
715 leftPart = rightPart;
716 rightPartCursor += 1;
717 rightPart = partAtColumn(rightPartCursor);
718 }
719 SkASSERT(0.0f < filterX && filterX <= 1.0f);
720
721 return bilerp();
722 };
723
724 while (count >= 4) {
725 Sk4f px0 = advanceFilter(),
726 px1 = advanceFilter(),
727 px2 = advanceFilter(),
728 px3 = advanceFilter();
729 fNext->blend4Pixels(px0, px1, px2, px3);
730 count -= 4;
731 }
732
733 while (count > 0) {
734 fNext->blendPixel(advanceFilter());
735 count -= 1;
736 }
581 } else { 737 } else {
582 xAdjust = Sk4f{1.0f}; 738 // * negative direction - generate destination pixels by sliding the filter from
583 } 739 // right to left.
584 int ix = SkFixedFloorToInt(fx); 740 int leftPartCursor = iXs[0];
585 int ioldx = ix; 741
586 Sk4f x{SkFixedToScalar(fx) - ix}; 742 // Advance the filter from right to left. Remember that moving the t op-left corner of
587 Sk4f dx{SkFixedToScalar(fdx)}; 743 // the filter to the left actually makes the filter value larger.
588 SkScalar ry0 = Y(start) - 0.5f; 744 auto advanceFilter = [&]() {
589 ry1 += 0.5f; 745 filterX -= dx;
mtklein 2016/07/21 19:19:25 // remember, dx < 0
herb_g 2016/07/21 19:52:20 Done.
590 SkScalar yFloor = std::floor(ry0); 746 // At this point filterX may be > 1, and needs to be wrapped bac k on to the filter
591 Sk4f y1 = Sk4f{ry0 - yFloor}; 747 // interval, and the next column in the filter is calculated.
592 Sk4f y0 = Sk4f{1.0f} - y1; 748 if (filterX > 1.0f) {
593 const void* const row0 = fAccessor.row(SkScalarFloorToInt(ry0)); 749 filterX -= 1.0f;
594 const void* const row1 = fAccessor.row(SkScalarFloorToInt(ry1)); 750 rightPart = leftPart;
595 Sk4f fpixel00 = y0 * fAccessor.getPixelFromRow(row0, ix); 751 leftPartCursor -= 1;
596 Sk4f fpixel01 = y1 * fAccessor.getPixelFromRow(row1, ix); 752 leftPart = partAtColumn(leftPartCursor);
597 Sk4f fpixel10 = y0 * fAccessor.getPixelFromRow(row0, ix + 1); 753 }
598 Sk4f fpixel11 = y1 * fAccessor.getPixelFromRow(row1, ix + 1); 754 SkASSERT(0.0f < filterX && filterX <= 1.0f);
599 auto getNextPixel = [&]() { 755
600 if (ix != ioldx) { 756 return bilerp();
601 fpixel00 = fpixel10; 757 };
602 fpixel01 = fpixel11; 758
603 fpixel10 = y0 * fAccessor.getPixelFromRow(row0, ix + 1); 759 while (count >= 4) {
604 fpixel11 = y1 * fAccessor.getPixelFromRow(row1, ix + 1); 760 Sk4f px0 = advanceFilter(),
605 ioldx = ix; 761 px1 = advanceFilter(),
606 x = x + xAdjust; 762 px2 = advanceFilter(),
607 } 763 px3 = advanceFilter();
608 764 fNext->blend4Pixels(px0, px1, px2, px3);
609 Sk4f x0, x1; 765 count -= 4;
610 x0 = Sk4f{1.0f} - x; 766 }
611 x1 = x; 767
612 Sk4f fpixel = x0 * (fpixel00 + fpixel01) + x1 * (fpixel10 + fpixel11 ); 768 while (count > 0) {
613 fx += fdx; 769 fNext->blendPixel(advanceFilter());
614 ix = SkFixedFloorToInt(fx); 770 count -= 1;
615 x = x + dx; 771 }
616 return fpixel; 772 }
617 }; 773 }
618 774
619 while (count >= 4) { 775 // |dx| == 1. Moving through source space at a rate of 1 source pixel per 1 dst pixel.
620 Sk4f fpixel0 = getNextPixel(); 776 // Every filter part is used for two destination pixels, and the code can bu lk load four
621 Sk4f fpixel1 = getNextPixel(); 777 // pixels at a time.
622 Sk4f fpixel2 = getNextPixel(); 778 void spanUnitRate(Span span) {
623 Sk4f fpixel3 = getNextPixel(); 779 SkPoint start; SkScalar length; int count;
624 780 std::tie(start, length, count) = span;
625 fNext->blend4Pixels(fpixel0, fpixel1, fpixel2, fpixel3); 781 SkASSERT(SkScalarAbs(length) == (count - 1));
626 count -= 4; 782
627 } 783 // Calculate the four filter points of start, and use the two different Y values to
628 784 // generate the row pointers.
629 while (count > 0) { 785 Sk4i iXs, iYs;
630 fNext->blendPixel(getNextPixel()); 786 filterPoints(start, &iXs, &iYs);
631 787 const void* row0 = fAccessor.row(iYs[0]);
632 count -= 1; 788 const void* row1 = fAccessor.row(iYs[2]);
633 } 789
634 } 790 // Calculate the filter values for the top-left filter element.
635 791 const SkScalar filterX = sample_to_filter(X(start));
636 // We're moving through source space at a rate of 1 source pixel per 1 dst p ixel. 792 const SkScalar filterY = sample_to_filter(Y(start));
637 // We'll never re-use pixels, but we can at least load contiguous pixels. 793
638 void spanUnitRate(Span span, SkScalar y1) { 794 // Generate part of the filter value at xColumn.
639 y1 += 0.5f; 795 auto partAtColumn = [&](int xColumn) {
640 SkScalar y0 = span.startY() - 0.5f; 796 int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
641 int iy0 = SkScalarFloorToInt(y0); 797 Sk4f pxTop, pxBottom;
642 SkScalar filterY1 = y0 - iy0; 798 this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom) ;
643 SkScalar filterY0 = 1.0f - filterY1; 799 return pxTop * filterY + (1.0f - filterY) * pxBottom;
644 int iy1 = SkScalarFloorToInt(y1); 800 };
645 const void* rowY0 = fAccessor.row(iy0); 801
646 const void* rowY1 = fAccessor.row(iy1); 802 auto get4Parts = [&](int ix, Sk4f* part0, Sk4f* part1, Sk4f* part2, Sk4f * part3) {
647 SkScalar x0 = span.startX() - 0.5f; 803 // Check if the pixels needed are near the edges. If not go fast usi ng bulk pixels,
648 int ix0 = SkScalarFloorToInt(x0); 804 // otherwise be careful.
649 SkScalar filterX1 = x0 - ix0; 805 if (0 <= ix && ix <= fXMax - 3) {
650 SkScalar filterX0 = 1.0f - filterX1; 806 Sk4f px00, px10, px20, px30,
651 807 px01, px11, px21, px31;
652 auto getPixelY0 = [&]() { 808 fAccessor.get4Pixels(row0, ix, &px00, &px10, &px20, &px30);
653 Sk4f px = fAccessor.getPixelFromRow(rowY0, ix0); 809 fAccessor.get4Pixels(row1, ix, &px01, &px11, &px21, &px31);
654 return px * filterY0; 810 *part0 = filterY * px00 + (1.0f - filterY) * px01;
655 }; 811 *part1 = filterY * px10 + (1.0f - filterY) * px11;
656 812 *part2 = filterY * px20 + (1.0f - filterY) * px21;
657 auto getPixelY1 = [&]() { 813 *part3 = filterY * px30 + (1.0f - filterY) * px31;
658 Sk4f px = fAccessor.getPixelFromRow(rowY1, ix0); 814 } else {
659 return px * filterY1; 815 *part0 = partAtColumn(ix + 0);
660 }; 816 *part1 = partAtColumn(ix + 1);
661 817 *part2 = partAtColumn(ix + 2);
662 auto get4PixelsY0 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* p x3) { 818 *part3 = partAtColumn(ix + 3);
663 fAccessor.get4Pixels(rowY0, ix, px0, px1, px2, px3); 819 }
664 *px0 = *px0 * filterY0; 820 };
665 *px1 = *px1 * filterY0; 821
666 *px2 = *px2 * filterY0; 822 auto bilerp = [&](Sk4f& part0, Sk4f& part1) {
667 *px3 = *px3 * filterY0; 823 return part0 * filterX + part1 * (1.0f - filterX);
668 }; 824 };
669 825
670 auto get4PixelsY1 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* p x3) { 826 if (length > 0) {
671 fAccessor.get4Pixels(rowY1, ix, px0, px1, px2, px3); 827 // * positive direction - generate destination pixels by sliding the filter from left
672 *px0 = *px0 * filterY1; 828 // to right.
673 *px1 = *px1 * filterY1; 829
674 *px2 = *px2 * filterY1; 830 // overlapPart is the filter part from the end of the previous four pixels used at
675 *px3 = *px3 * filterY1; 831 // the start of the next four pixels.
676 }; 832 Sk4f overlapPart = partAtColumn(iXs[0]);
677 833 int rightColumnCursor = iXs[1];
678 auto lerp = [&](Sk4f& pixelX0, Sk4f& pixelX1) { 834 while (count >= 4) {
679 return pixelX0 * filterX0 + pixelX1 * filterX1; 835 Sk4f part0, part1, part2, part3;
680 }; 836 get4Parts(rightColumnCursor, &part0, &part1, &part2, &part3);
681 837 Sk4f px0 = bilerp(overlapPart, part0);
682 // Mid making 4 unit rate. 838 Sk4f px1 = bilerp(part0, part1);
683 Sk4f pxB = getPixelY0() + getPixelY1(); 839 Sk4f px2 = bilerp(part1, part2);
684 if (span.length() > 0) { 840 Sk4f px3 = bilerp(part2, part3);
685 int count = span.count(); 841 overlapPart = part3;
686 while (count >= 4) { 842 fNext->blend4Pixels(px0, px1, px2, px3);
687 Sk4f px00, px10, px20, px30; 843 rightColumnCursor += 4;
688 get4PixelsY0(ix0, &px00, &px10, &px20, &px30); 844 count -= 4;
689 Sk4f px01, px11, px21, px31; 845 }
690 get4PixelsY1(ix0, &px01, &px11, &px21, &px31); 846
691 Sk4f pxS0 = px00 + px01; 847 while (count > 0) {
692 Sk4f px0 = lerp(pxB, pxS0); 848 Sk4f rightPart = partAtColumn(rightColumnCursor);
693 Sk4f pxS1 = px10 + px11; 849
694 Sk4f px1 = lerp(pxS0, pxS1); 850 fNext->blendPixel(bilerp(overlapPart, rightPart));
695 Sk4f pxS2 = px20 + px21; 851 overlapPart = rightPart;
696 Sk4f px2 = lerp(pxS1, pxS2); 852 rightColumnCursor += 1;
697 Sk4f pxS3 = px30 + px31;
698 Sk4f px3 = lerp(pxS2, pxS3);
699 pxB = pxS3;
700 fNext->blend4Pixels(px0, px1, px2, px3);
701 ix0 += 4;
702 count -= 4;
703 }
704 while (count > 0) {
705 Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix0);
706 Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix0);
707
708 fNext->blendPixel(lerp(pixelY0, pixelY1));
709 ix0 += 1;
710 count -= 1; 853 count -= 1;
711 } 854 }
712 } else { 855 } else {
713 int count = span.count(); 856 // * negative direction - generate destination pixels by sliding the filter from
714 while (count >= 4) { 857 // right to left.
715 Sk4f px00, px10, px20, px30; 858 Sk4f overlapPart = partAtColumn(iXs[1]);
716 get4PixelsY0(ix0 - 3, &px00, &px10, &px20, &px30); 859 int leftColumnCursor = iXs[0];
717 Sk4f px01, px11, px21, px31; 860
718 get4PixelsY1(ix0 - 3, &px01, &px11, &px21, &px31); 861 while (count >= 4) {
719 Sk4f pxS3 = px30 + px31; 862 Sk4f part0, part1, part2, part3;
720 Sk4f px0 = lerp(pxS3, pxB); 863 get4Parts(leftColumnCursor - 3, &part3, &part2, &part1, &part0);
721 Sk4f pxS2 = px20 + px21; 864 Sk4f px0 = bilerp(part0, overlapPart);
722 Sk4f px1 = lerp(pxS2, pxS3); 865 Sk4f px1 = bilerp(part1, part0);
723 Sk4f pxS1 = px10 + px11; 866 Sk4f px2 = bilerp(part2, part1);
724 Sk4f px2 = lerp(pxS1, pxS2); 867 Sk4f px3 = bilerp(part3, part2);
725 Sk4f pxS0 = px00 + px01; 868 overlapPart = part3;
726 Sk4f px3 = lerp(pxS0, pxS1); 869 fNext->blend4Pixels(px0, px1, px2, px3);
727 pxB = pxS0; 870 leftColumnCursor -= 4;
728 fNext->blend4Pixels(px0, px1, px2, px3); 871 count -= 4;
729 ix0 -= 4; 872 }
730 count -= 4; 873
731 } 874 while (count > 0) {
732 while (count > 0) { 875 Sk4f leftPart = partAtColumn(leftColumnCursor);
733 Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix0); 876
734 Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix0); 877 fNext->blendPixel(bilerp(leftPart, overlapPart));
735 878 overlapPart = leftPart;
736 fNext->blendPixel(lerp(pixelY0, pixelY1)); 879 leftColumnCursor -= 1;
737 ix0 -= 1; 880 count -= 1;
738 count -= 1; 881 }
739 } 882 }
740 } 883 }
741 } 884
742 885 // 1 < |dx| < 2. Going through the source pixels at a faster rate than the d est pixels, but
743 void spanUnitRateAlignedX(Span span, SkScalar y1) { 886 // still slow enough to take advantage of previous calculations.
744 SkScalar y0 = span.startY() - 0.5f; 887 void spanMediumRate(Span span) {
745 y1 += 0.5f; 888 SkPoint start; SkScalar length; int count;
746 int iy0 = SkScalarFloorToInt(y0); 889 std::tie(start, length, count) = span;
747 SkScalar filterY1 = y0 - iy0; 890
748 SkScalar filterY0 = 1.0f - filterY1; 891 // Calculate the distance between each sample point.
749 int iy1 = SkScalarFloorToInt(y1); 892 const SkScalar dx = length / (count - 1);
750 int ix = SkScalarFloorToInt(span.startX()); 893 SkASSERT((-2.0f < dx && dx < -1.0f) || (1.0f < dx && dx < 2.0f));
751 const void* rowY0 = fAccessor.row(iy0); 894
752 const void* rowY1 = fAccessor.row(iy1); 895 // Generate the filter values for the top-left corner.
753 auto lerp = [&](Sk4f* pixelY0, Sk4f* pixelY1) { 896 // Note: these values are in filter space; this has implications about h ow to adjust
754 return *pixelY0 * filterY0 + *pixelY1 * filterY1; 897 // these values at each step. For example, as the sample point increases , the filter
755 }; 898 // value decreases, this is because the filter and position are related by
756 899 // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite
757 if (span.length() > 0) { 900 // direction of the sample point which is increasing by dx.
758 int count = span.count(); 901 SkScalar filterX = sample_to_filter(X(start));
759 while (count >= 4) { 902 SkScalar filterY = sample_to_filter(Y(start));
760 Sk4f px00, px10, px20, px30; 903
761 fAccessor.get4Pixels(rowY0, ix, &px00, &px10, &px20, &px30); 904 // Generate the four filter points from the sample point start. Generate the row* values.
762 Sk4f px01, px11, px21, px31; 905 Sk4i iXs, iYs;
763 fAccessor.get4Pixels(rowY1, ix, &px01, &px11, &px21, &px31); 906 this->filterPoints(start, &iXs, &iYs);
764 fNext->blend4Pixels( 907 const void* const row0 = fAccessor.row(iYs[0]);
765 lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31)); 908 const void* const row1 = fAccessor.row(iYs[2]);
766 ix += 4; 909
767 count -= 4; 910 // Generate part of the filter value at xColumn.
768 } 911 auto partAtColumn = [&](int xColumn) {
769 while (count > 0) { 912 int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
770 Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix); 913 Sk4f pxTop, pxBottom;
771 Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix); 914 this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom) ;
772 915 return pxTop * filterY + (1.0f - filterY) * pxBottom;
773 fNext->blendPixel(lerp(&pixelY0, &pixelY1)); 916 };
774 ix += 1; 917
918 // The leftPart is made up of two pixels from the left column of the fil ter, right part
919 // is similar. The top and bottom pixels in the *Part are created as a l inear blend of
920 // the top and bottom pixels using filterY. See the nextPart function be low.
921 Sk4f leftPart = partAtColumn(iXs[0]);
922 Sk4f rightPart = partAtColumn(iXs[1]);
923
924 // Create a destination color by blending together a left and right part using filterX.
925 auto bilerp = [&]() {
926 Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX);
927 return check_pixel(pixel);
928 };
929
930 // Send the first pixel to the destination. This simplifies the loop str ucture so that no
931 // extra pixels are fetched for the last iteration of the loop.
932 fNext->blendPixel(bilerp());
933 count -= 1;
934
935 if (dx > 0.0f) {
936 // * positive direction - generate destination pixels by sliding the filter from left
937 // to right.
938 int rightPartCursor = iXs[1];
939
940 // Advance the filter from left to right. Remember that moving the t op-left corner of
941 // the filter to the right actually makes the filter value smaller.
942 auto advanceFilter = [&]() {
943 filterX -= dx;
944 // At this point filterX is less than zero, but might actually b e less than -1.
945 if (filterX > -1.0f) {
946 filterX += 1.0f;
947 leftPart = rightPart;
948 rightPartCursor += 1;
949 rightPart = partAtColumn(rightPartCursor);
950 } else {
951 filterX += 2.0f;
952 rightPartCursor += 2;
953 leftPart = partAtColumn(rightPartCursor - 1);
954 rightPart = partAtColumn(rightPartCursor);
955 }
956 SkASSERT(0.0f < filterX && filterX <= 1.0f);
957
958 return bilerp();
959 };
960
961 while (count >= 4) {
962 Sk4f px0 = advanceFilter(),
963 px1 = advanceFilter(),
964 px2 = advanceFilter(),
965 px3 = advanceFilter();
966 fNext->blend4Pixels(px0, px1, px2, px3);
967 count -= 4;
968 }
969
970 while (count > 0) {
971 fNext->blendPixel(advanceFilter());
775 count -= 1; 972 count -= 1;
776 } 973 }
777 } else { 974 } else {
778 int count = span.count(); 975 // * negative direction - generate destination pixels by sliding the filter from
779 while (count >= 4) { 976 // right to left.
780 Sk4f px00, px10, px20, px30; 977 int leftPartCursor = iXs[0];
781 fAccessor.get4Pixels(rowY0, ix - 3, &px30, &px20, &px10, &px00); 978
782 Sk4f px01, px11, px21, px31; 979 auto advanceFilter = [&]() {
783 fAccessor.get4Pixels(rowY1, ix - 3, &px31, &px21, &px11, &px01); 980 filterX -= dx;
mtklein 2016/07/21 19:19:25 // remember, dx < 0, so filterX is increasing.
herb_g 2016/07/21 19:52:20 Done.
784 fNext->blend4Pixels( 981 // At this point, filterX is greater than one, but may actually be greater than two.
785 lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31)); 982 if (filterX < 2.0f) {
786 ix -= 4; 983 filterX -= 1.0f;
787 count -= 4; 984 rightPart = leftPart;
788 } 985 leftPartCursor -= 1;
789 while (count > 0) { 986 leftPart = partAtColumn(leftPartCursor);
790 Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix); 987 } else {
791 Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix); 988 filterX -= 2.0f;
792 989 leftPartCursor -= 2;
793 fNext->blendPixel(lerp(&pixelY0, &pixelY1)); 990 rightPart = partAtColumn(leftPartCursor - 1);
794 ix -= 1; 991 leftPart = partAtColumn(leftPartCursor);
795 count -= 1; 992 }
796 } 993 SkASSERT(0.0f < filterX && filterX <= 1.0f);
797 } 994 return bilerp();
995 };
996
997 while (count >= 4) {
998 Sk4f px0 = advanceFilter(),
999 px1 = advanceFilter(),
1000 px2 = advanceFilter(),
1001 px3 = advanceFilter();
1002 fNext->blend4Pixels(px0, px1, px2, px3);
1003 count -= 4;
1004 }
1005
1006 while (count > 0) {
1007 fNext->blendPixel(advanceFilter());
1008 count -= 1;
1009 }
1010 }
798 } 1011 }
799 1012
800 // We're moving through source space faster than dst (zoomed out), 1013 // We're moving through source space faster than dst (zoomed out),
801 // so we'll never reuse a source pixel or be able to do contiguous loads. 1014 // so we'll never reuse a source pixel or be able to do contiguous loads.
802 void spanFastRate(Span span, SkScalar y1) { 1015 void spanFastRate(Span span) {
803 SkPoint start; 1016 SkPoint start; SkScalar length; int count;
804 SkScalar length;
805 int count;
806 std::tie(start, length, count) = span; 1017 std::tie(start, length, count) = span;
807 SkScalar x = X(start); 1018 SkScalar x = X(start);
808 SkScalar y = Y(start); 1019 SkScalar y = Y(start);
809 1020
810 // In this sampler, it is assumed that if span.StartY() and y1 are the s ame then both 1021 SkScalar dx = length / (count - 1);
811 // y-lines are on the same tile. 1022 while (count > 0) {
812 if (y == y1) { 1023 fNext->blendPixel(this->bilerpSamplePoint(SkPoint{x, y}));
813 // Both y-lines are on the same tile. 1024 x += dx;
814 span_fallback(span, this); 1025 count -= 1;
815 } else {
816 // The y-lines are on different tiles.
817 SkScalar dx = length / (count - 1);
818 Sk4f ys = {y - 0.5f, y - 0.5f, y1 + 0.5f, y1 + 0.5f};
819 while (count > 0) {
820 Sk4f xs = Sk4f{-0.5f, 0.5f, -0.5f, 0.5f} + Sk4f{x};
821 this->bilerpEdge(xs, ys);
822 x += dx;
823 count -= 1;
824 }
825 } 1026 }
826 } 1027 }
827 1028
828 Next* const fNext; 1029 Next* const fNext;
829 Accessor fAccessor; 1030 const SkShader::TileMode fXEdgeType;
1031 const int fXMax;
1032 const SkShader::TileMode fYEdgeType;
1033 const int fYMax;
1034 Accessor fAccessor;
830 }; 1035 };
831 1036
832 } // namespace 1037 } // namespace
833 1038
834 #endif // SkLinearBitmapPipeline_sampler_DEFINED 1039 #endif // SkLinearBitmapPipeline_sampler_DEFINED
OLDNEW
« no previous file with comments | « src/core/SkLinearBitmapPipeline_core.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698