OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkLinearBitmapPipeline_sampler_DEFINED | 8 #ifndef SkLinearBitmapPipeline_sampler_DEFINED |
9 #define SkLinearBitmapPipeline_sampler_DEFINED | 9 #define SkLinearBitmapPipeline_sampler_DEFINED |
10 | 10 |
(...skipping 22 matching lines...) Expand all Loading... | |
33 // | 33 // |
34 // | 34 // |
35 // Given a pixelxy each is multiplied by a different factor derived from the fra ctional part of x | 35 // Given a pixelxy each is multiplied by a different factor derived from the fra ctional part of x |
36 // and y: | 36 // and y: |
37 // * px00 -> (1 - x)(1 - y) = 1 - x - y + xy | 37 // * px00 -> (1 - x)(1 - y) = 1 - x - y + xy |
38 // * px10 -> x(1 - y) = x - xy | 38 // * px10 -> x(1 - y) = x - xy |
39 // * px01 -> (1 - x)y = y - xy | 39 // * px01 -> (1 - x)y = y - xy |
40 // * px11 -> xy | 40 // * px11 -> xy |
41 // So x * y is calculated first and then used to calculate all the other factors . | 41 // So x * y is calculated first and then used to calculate all the other factors . |
42 static Sk4s SK_VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10, | 42 static Sk4s SK_VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10, |
43 Sk4f px01, Sk4f px11) { | 43 Sk4f px01, Sk4f px11) { |
44 // Calculate fractional xs and ys. | 44 // Calculate fractional xs and ys. |
45 Sk4s fxs = xs - xs.floor(); | 45 Sk4s fxs = xs - xs.floor(); |
46 Sk4s fys = ys - ys.floor(); | 46 Sk4s fys = ys - ys.floor(); |
47 Sk4s fxys{fxs * fys}; | 47 Sk4s fxys{fxs * fys}; |
48 Sk4f sum = px11 * fxys; | 48 Sk4f sum = px11 * fxys; |
49 sum = sum + px01 * (fys - fxys); | 49 sum = sum + px01 * (fys - fxys); |
50 sum = sum + px10 * (fxs - fxys); | 50 sum = sum + px10 * (fxs - fxys); |
51 sum = sum + px00 * (Sk4f{1.0f} - fxs - fys + fxys); | 51 sum = sum + px00 * (Sk4f{1.0f} - fxs - fys + fxys); |
52 return sum; | 52 return sum; |
53 } | 53 } |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
127 Sk4f toSk4f(Element pixel) const { | 127 Sk4f toSk4f(Element pixel) const { |
128 return swizzle_rb( | 128 return swizzle_rb( |
129 gammaType == kSRGB_SkGammaType ? Sk4f_fromS32(pixel) : Sk4f_f romL32(pixel)); | 129 gammaType == kSRGB_SkGammaType ? Sk4f_fromS32(pixel) : Sk4f_f romL32(pixel)); |
130 } | 130 } |
131 }; | 131 }; |
132 | 132 |
133 template <SkGammaType gammaType> | 133 template <SkGammaType gammaType> |
134 class PixelConverter<kIndex_8_SkColorType, gammaType> { | 134 class PixelConverter<kIndex_8_SkColorType, gammaType> { |
135 public: | 135 public: |
136 using Element = uint8_t; | 136 using Element = uint8_t; |
137 PixelConverter(const SkPixmap& srcPixmap) { | 137 PixelConverter(const SkPixmap& srcPixmap) |
138 : fColorTableSize(srcPixmap.ctable()->count()){ | |
138 SkColorTable* skColorTable = srcPixmap.ctable(); | 139 SkColorTable* skColorTable = srcPixmap.ctable(); |
139 SkASSERT(skColorTable != nullptr); | 140 SkASSERT(skColorTable != nullptr); |
140 | 141 |
141 fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get()); | 142 fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get()); |
142 for (int i = 0; i < skColorTable->count(); i++) { | 143 for (int i = 0; i < fColorTableSize; i++) { |
143 fColorTable[i] = pmcolor_to_rgba<gammaType>((*skColorTable)[i]); | 144 fColorTable[i] = pmcolor_to_rgba<gammaType>((*skColorTable)[i]); |
144 } | 145 } |
145 } | 146 } |
146 | 147 |
147 PixelConverter(const PixelConverter& strategy) { | 148 PixelConverter(const PixelConverter& strategy) |
149 : fColorTableSize{strategy.fColorTableSize}{ | |
148 fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get()); | 150 fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get()); |
149 // TODO: figure out the count. | 151 for (int i = 0; i < fColorTableSize; i++) { |
150 for (int i = 0; i < 256; i++) { | |
151 fColorTable[i] = strategy.fColorTable[i]; | 152 fColorTable[i] = strategy.fColorTable[i]; |
152 } | 153 } |
153 } | 154 } |
154 | 155 |
155 Sk4f toSk4f(Element index) const { | 156 Sk4f toSk4f(Element index) const { |
156 return fColorTable[index]; | 157 return fColorTable[index]; |
157 } | 158 } |
158 | 159 |
159 private: | 160 private: |
160 static const size_t kColorTableSize = sizeof(Sk4f[256]) + 12; | 161 static const size_t kColorTableSize = sizeof(Sk4f[256]) + 12; |
161 | 162 const int fColorTableSize; |
162 SkAutoMalloc fColorTableStorage{kColorTableSize}; | 163 SkAutoMalloc fColorTableStorage{kColorTableSize}; |
163 Sk4f* fColorTable; | 164 Sk4f* fColorTable; |
164 }; | 165 }; |
165 | 166 |
166 template <SkGammaType gammaType> | 167 template <SkGammaType gammaType> |
167 class PixelConverter<kGray_8_SkColorType, gammaType> { | 168 class PixelConverter<kGray_8_SkColorType, gammaType> { |
168 public: | 169 public: |
169 using Element = uint8_t; | 170 using Element = uint8_t; |
170 PixelConverter(const SkPixmap& srcPixmap) { } | 171 PixelConverter(const SkPixmap& srcPixmap) { } |
171 | 172 |
172 Sk4f toSk4f(Element pixel) const { | 173 Sk4f toSk4f(Element pixel) const { |
173 float gray = pixel * (1.0f/255.0f); | 174 float gray = pixel * (1.0f/255.0f); |
(...skipping 14 matching lines...) Expand all Loading... | |
188 return SkHalfToFloat_finite(pixel); | 189 return SkHalfToFloat_finite(pixel); |
189 } | 190 } |
190 }; | 191 }; |
191 | 192 |
192 class PixelAccessorShim { | 193 class PixelAccessorShim { |
193 public: | 194 public: |
194 explicit PixelAccessorShim(SkLinearBitmapPipeline::PixelAccessorInterface* a ccessor) | 195 explicit PixelAccessorShim(SkLinearBitmapPipeline::PixelAccessorInterface* a ccessor) |
195 : fPixelAccessor(accessor) { } | 196 : fPixelAccessor(accessor) { } |
196 | 197 |
197 void SK_VECTORCALL getFewPixels( | 198 void SK_VECTORCALL getFewPixels( |
198 int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const { | 199 int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const { |
199 fPixelAccessor->getFewPixels(n, xs, ys, px0, px1, px2); | 200 fPixelAccessor->getFewPixels(n, xs, ys, px0, px1, px2); |
200 } | 201 } |
201 | 202 |
202 void SK_VECTORCALL get4Pixels( | 203 void SK_VECTORCALL get4Pixels( |
203 Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const { | 204 Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const { |
204 fPixelAccessor->get4Pixels(xs, ys, px0, px1, px2, px3); | 205 fPixelAccessor->get4Pixels(xs, ys, px0, px1, px2, px3); |
205 } | 206 } |
206 | 207 |
207 void get4Pixels( | 208 void get4Pixels( |
208 const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const { | 209 const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const { |
209 fPixelAccessor->get4Pixels(src, index, px0, px1, px2, px3); | 210 fPixelAccessor->get4Pixels(src, index, px0, px1, px2, px3); |
210 }; | 211 }; |
211 | 212 |
212 Sk4f getPixelFromRow(const void* row, int index) const { | 213 Sk4f getPixelFromRow(const void* row, int index) const { |
213 return fPixelAccessor->getPixelFromRow(row, index); | 214 return fPixelAccessor->getPixelFromRow(row, index); |
(...skipping 17 matching lines...) Expand all Loading... | |
231 class PixelAccessor final : public SkLinearBitmapPipeline::PixelAccessorInterfac e { | 232 class PixelAccessor final : public SkLinearBitmapPipeline::PixelAccessorInterfac e { |
232 using Element = typename PixelConverter<colorType, gammaType>::Element; | 233 using Element = typename PixelConverter<colorType, gammaType>::Element; |
233 public: | 234 public: |
234 template <typename... Args> | 235 template <typename... Args> |
235 PixelAccessor(const SkPixmap& srcPixmap, Args&&... args) | 236 PixelAccessor(const SkPixmap& srcPixmap, Args&&... args) |
236 : fSrc{static_cast<const Element*>(srcPixmap.addr())} | 237 : fSrc{static_cast<const Element*>(srcPixmap.addr())} |
237 , fWidth{srcPixmap.rowBytesAsPixels()} | 238 , fWidth{srcPixmap.rowBytesAsPixels()} |
238 , fConverter{srcPixmap, std::move<Args>(args)...} { } | 239 , fConverter{srcPixmap, std::move<Args>(args)...} { } |
239 | 240 |
240 void SK_VECTORCALL getFewPixels ( | 241 void SK_VECTORCALL getFewPixels ( |
241 int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const override { | 242 int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const override { |
242 Sk4i XIs = SkNx_cast<int, SkScalar>(xs); | 243 Sk4i bufferLoc = ys * fWidth + xs; |
243 Sk4i YIs = SkNx_cast<int, SkScalar>(ys); | |
244 Sk4i bufferLoc = YIs * fWidth + XIs; | |
245 switch (n) { | 244 switch (n) { |
246 case 3: | 245 case 3: |
247 *px2 = this->getPixelAt(bufferLoc[2]); | 246 *px2 = this->getPixelAt(bufferLoc[2]); |
248 case 2: | 247 case 2: |
249 *px1 = this->getPixelAt(bufferLoc[1]); | 248 *px1 = this->getPixelAt(bufferLoc[1]); |
250 case 1: | 249 case 1: |
251 *px0 = this->getPixelAt(bufferLoc[0]); | 250 *px0 = this->getPixelAt(bufferLoc[0]); |
252 default: | 251 default: |
253 break; | 252 break; |
254 } | 253 } |
255 } | 254 } |
256 | 255 |
257 void SK_VECTORCALL get4Pixels( | 256 void SK_VECTORCALL get4Pixels( |
258 Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const over ride { | 257 Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const over ride { |
259 Sk4i XIs = SkNx_cast<int, SkScalar>(xs); | 258 Sk4i bufferLoc = ys * fWidth + xs; |
260 Sk4i YIs = SkNx_cast<int, SkScalar>(ys); | |
261 Sk4i bufferLoc = YIs * fWidth + XIs; | |
262 *px0 = this->getPixelAt(bufferLoc[0]); | 259 *px0 = this->getPixelAt(bufferLoc[0]); |
263 *px1 = this->getPixelAt(bufferLoc[1]); | 260 *px1 = this->getPixelAt(bufferLoc[1]); |
264 *px2 = this->getPixelAt(bufferLoc[2]); | 261 *px2 = this->getPixelAt(bufferLoc[2]); |
265 *px3 = this->getPixelAt(bufferLoc[3]); | 262 *px3 = this->getPixelAt(bufferLoc[3]); |
266 } | 263 } |
267 | 264 |
268 void get4Pixels( | 265 void get4Pixels( |
269 const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override { | 266 const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override { |
270 *px0 = this->getPixelFromRow(src, index + 0); | 267 *px0 = this->getPixelFromRow(src, index + 0); |
271 *px1 = this->getPixelFromRow(src, index + 1); | 268 *px1 = this->getPixelFromRow(src, index + 1); |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
324 } | 321 } |
325 | 322 |
326 while (count > 0) { | 323 while (count > 0) { |
327 next->blendPixel(strategy->getPixelFromRow(row, ix)); | 324 next->blendPixel(strategy->getPixelFromRow(row, ix)); |
328 ix -= 1; | 325 ix -= 1; |
329 count -= 1; | 326 count -= 1; |
330 } | 327 } |
331 } | 328 } |
332 } | 329 } |
333 | 330 |
331 // -- NearestNeighborSampler --------------------------------------------------- -------------------- | |
334 // NearestNeighborSampler - use nearest neighbor filtering to create runs of des tination pixels. | 332 // NearestNeighborSampler - use nearest neighbor filtering to create runs of des tination pixels. |
335 template<typename Accessor, typename Next> | 333 template<typename Accessor, typename Next> |
336 class NearestNeighborSampler : public SkLinearBitmapPipeline::SampleProcessorInt erface { | 334 class NearestNeighborSampler : public SkLinearBitmapPipeline::SampleProcessorInt erface { |
337 public: | 335 public: |
338 template<typename... Args> | 336 template<typename... Args> |
339 NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next , Args&& ... args) | 337 NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next , Args&& ... args) |
340 : fNext{next}, fAccessor{std::forward<Args>(args)...} { } | 338 : fNext{next}, fAccessor{std::forward<Args>(args)...} { } |
341 | 339 |
342 NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next , | 340 NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next , |
343 const NearestNeighborSampler& sampler) | 341 const NearestNeighborSampler& sampler) |
344 : fNext{next}, fAccessor{sampler.fAccessor} { } | 342 : fNext{next}, fAccessor{sampler.fAccessor} { } |
345 | 343 |
346 void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { | 344 void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { |
347 SkASSERT(0 < n && n < 4); | 345 SkASSERT(0 < n && n < 4); |
348 Sk4f px0, px1, px2; | 346 Sk4f px0, px1, px2; |
349 fAccessor.getFewPixels(n, xs, ys, &px0, &px1, &px2); | 347 fAccessor.getFewPixels(n, SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2); |
350 if (n >= 1) fNext->blendPixel(px0); | 348 if (n >= 1) fNext->blendPixel(px0); |
351 if (n >= 2) fNext->blendPixel(px1); | 349 if (n >= 2) fNext->blendPixel(px1); |
352 if (n >= 3) fNext->blendPixel(px2); | 350 if (n >= 3) fNext->blendPixel(px2); |
353 } | 351 } |
354 | 352 |
355 void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { | 353 void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { |
356 Sk4f px0, px1, px2, px3; | 354 Sk4f px0, px1, px2, px3; |
357 fAccessor.get4Pixels(xs, ys, &px0, &px1, &px2, &px3); | 355 fAccessor.get4Pixels(SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2, &px3); |
358 fNext->blend4Pixels(px0, px1, px2, px3); | 356 fNext->blend4Pixels(px0, px1, px2, px3); |
359 } | 357 } |
360 | 358 |
361 void pointSpan(Span span) override { | 359 void pointSpan(Span span) override { |
362 SkASSERT(!span.isEmpty()); | 360 SkASSERT(!span.isEmpty()); |
363 SkPoint start; | 361 SkPoint start; |
364 SkScalar length; | 362 SkScalar length; |
365 int count; | 363 int count; |
366 std::tie(start, length, count) = span; | 364 std::tie(start, length, count) = span; |
367 SkScalar absLength = SkScalarAbs(length); | 365 SkScalar absLength = SkScalarAbs(length); |
368 if (absLength < (count - 1)) { | 366 if (absLength < (count - 1)) { |
369 this->spanSlowRate(span); | 367 this->spanSlowRate(span); |
370 } else if (absLength == (count - 1)) { | 368 } else if (absLength == (count - 1)) { |
371 src_strategy_blend(span, fNext, &fAccessor); | 369 src_strategy_blend(span, fNext, &fAccessor); |
372 } else { | 370 } else { |
373 this->spanFastRate(span); | 371 this->spanFastRate(span); |
374 } | 372 } |
375 } | 373 } |
376 | 374 |
377 void repeatSpan(Span span, int32_t repeatCount) override { | 375 void repeatSpan(Span span, int32_t repeatCount) override { |
378 while (repeatCount > 0) { | 376 while (repeatCount > 0) { |
379 this->pointSpan(span); | 377 this->pointSpan(span); |
380 repeatCount--; | 378 repeatCount--; |
381 } | 379 } |
382 } | 380 } |
383 | 381 |
384 void SK_VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) override { | |
385 SkFAIL("Using nearest neighbor sampler, but calling a bilerpEdge."); | |
386 } | |
387 | |
388 void bilerpSpan(Span span, SkScalar y) override { | |
389 SkFAIL("Using nearest neighbor sampler, but calling a bilerpSpan."); | |
390 } | |
391 | |
392 private: | 382 private: |
393 // When moving through source space more slowly than dst space (zoomed in), | 383 // When moving through source space more slowly than dst space (zoomed in), |
394 // we'll be sampling from the same source pixel more than once. | 384 // we'll be sampling from the same source pixel more than once. |
395 void spanSlowRate(Span span) { | 385 void spanSlowRate(Span span) { |
396 SkPoint start; | 386 SkPoint start; SkScalar length; int count; |
397 SkScalar length; | |
398 int count; | |
399 std::tie(start, length, count) = span; | 387 std::tie(start, length, count) = span; |
400 SkScalar x = X(start); | 388 SkScalar x = X(start); |
401 SkFixed fx = SkScalarToFixed(x); | 389 SkFixed fx = SkScalarToFixed(x); |
402 SkScalar dx = length / (count - 1); | 390 SkScalar dx = length / (count - 1); |
403 SkFixed fdx = SkScalarToFixed(dx); | 391 SkFixed fdx = SkScalarToFixed(dx); |
404 | 392 |
405 const void* row = fAccessor.row((int)std::floor(Y(start))); | 393 const void* row = fAccessor.row((int)std::floor(Y(start))); |
406 Next* next = fNext; | 394 Next* next = fNext; |
407 | 395 |
408 int ix = SkFixedFloorToInt(fx); | 396 int ix = SkFixedFloorToInt(fx); |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
445 // We're moving through source space faster than dst (zoomed out), | 433 // We're moving through source space faster than dst (zoomed out), |
446 // so we'll never reuse a source pixel or be able to do contiguous loads. | 434 // so we'll never reuse a source pixel or be able to do contiguous loads. |
447 void spanFastRate(Span span) { | 435 void spanFastRate(Span span) { |
448 span_fallback(span, this); | 436 span_fallback(span, this); |
449 } | 437 } |
450 | 438 |
451 Next* const fNext; | 439 Next* const fNext; |
452 Accessor fAccessor; | 440 Accessor fAccessor; |
453 }; | 441 }; |
454 | 442 |
443 // From an edgType, the integer value of a pixel vs, and the integer value of th e extreme edge | |
mtklein
2016/07/21 19:19:25
edgeType
herb_g
2016/07/21 19:52:20
Done.
| |
444 // vMax, take the point which might be off the tile by one pixel and either wrap it or pin it to | |
445 // generate the right pixel. The value vs is on the interval [-1, vMax + 1]. It produces a value | |
446 // on the interval [0, vMax]. | |
447 // Note: vMax is not width or height, but width-1 or height-1 because it is the largest valid pixel. | |
448 static inline int adjust_edge(SkShader::TileMode edgeType, int vs, int vMax) { | |
449 SkASSERT(-1 <= vs && vs <= vMax + 1) | |
450 switch (edgeType) { | |
451 case SkShader::kClamp_TileMode: | |
452 case SkShader::kMirror_TileMode: | |
453 vs = std::max(vs, 0); | |
454 vs = std::min(vs, vMax); | |
455 break; | |
456 case SkShader::kRepeat_TileMode: | |
457 vs = (vs <= vMax) ? vs : 0; | |
458 vs = (vs >= 0) ? vs : vMax; | |
459 break; | |
460 } | |
461 SkASSERT(0 <= vs && vs <= vMax); | |
462 return vs; | |
463 } | |
464 | |
465 // From a sample point on the tile, return the top or left filter value. | |
466 // The result r should be in the range (0, 1]. Since this represents the weight given to the top | |
467 // left element, then if x == 0.5 the filter value should be 1.0. | |
468 // The input sample point must be on the tile, therefore it must be >= 0. | |
469 static SkScalar sample_to_filter(SkScalar x) { | |
470 SkASSERT(x >= 0.0f); | |
471 // The usual form of the top or left edge is x - .5, but since we are workin g on the unit | |
472 // square, then x + .5 works just as well. This also guarantees that v > 0.0 allowing the use | |
473 // of trunc. | |
474 SkScalar v = x + 0.5f; | |
475 // Produce the top or left offset a value on the range [0, 1). | |
476 SkScalar f = v - SkScalarTruncToScalar(v); | |
477 // Produce the filter value which is on the range (0, 1]. | |
478 SkScalar r = 1.0f - f; | |
479 SkASSERT(0.0f < r && r <= 1.0f); | |
480 return r; | |
481 } | |
482 | |
455 // -- BilerpSampler ------------------------------------------------------------ -------------------- | 483 // -- BilerpSampler ------------------------------------------------------------ -------------------- |
456 // BilerpSampler - use a bilerp filter to create runs of destination pixels. | 484 // BilerpSampler - use a bilerp filter to create runs of destination pixels. |
485 // Note: in the code below, there are two types of points | |
486 // * sample points - these are the points passed in by pointList* and Span s. | |
487 // * filter points - are created from a sample point to form the coordinat es of the points | |
488 // to use in the filter and to generate the filter value s. | |
457 template<typename Accessor, typename Next> | 489 template<typename Accessor, typename Next> |
458 class BilerpSampler : public SkLinearBitmapPipeline::SampleProcessorInterface { | 490 class BilerpSampler : public SkLinearBitmapPipeline::SampleProcessorInterface { |
459 public: | 491 public: |
460 template<typename... Args> | 492 template<typename... Args> |
461 BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, Args&& ... args) | 493 BilerpSampler( |
462 : fNext{next}, fAccessor{std::forward<Args>(args)...} { } | 494 SkLinearBitmapPipeline::BlendProcessorInterface* next, |
495 SkISize dimensions, | |
496 SkShader::TileMode xTile, SkShader::TileMode yTile, | |
497 Args&& ... args | |
498 ) | |
499 : fNext{next} | |
500 , fXEdgeType{xTile} | |
501 , fXMax{dimensions.width() - 1} | |
502 , fYEdgeType{yTile} | |
503 , fYMax{dimensions.height() - 1} | |
504 , fAccessor{std::forward<Args>(args)...} { } | |
463 | 505 |
464 BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, | 506 BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, |
465 const BilerpSampler& sampler) | 507 const BilerpSampler& sampler) |
466 : fNext{next}, fAccessor{sampler.fAccessor} { } | 508 : fNext{next} |
467 | 509 , fXEdgeType{sampler.fXEdgeType} |
468 Sk4f bilerpNonEdgePixel(SkScalar x, SkScalar y) { | 510 , fXMax{sampler.fXMax} |
469 Sk4f px00, px10, px01, px11; | 511 , fYEdgeType{sampler.fYEdgeType} |
470 | 512 , fYMax{sampler.fYMax} |
471 // bilerp4() expects xs, ys are the top-lefts of the 2x2 kernel. | 513 , fAccessor{sampler.fAccessor} { } |
472 Sk4f xs = Sk4f{x} - 0.5f; | |
473 Sk4f ys = Sk4f{y} - 0.5f; | |
474 Sk4f sampleXs = xs + Sk4f{0.0f, 1.0f, 0.0f, 1.0f}; | |
475 Sk4f sampleYs = ys + Sk4f{0.0f, 0.0f, 1.0f, 1.0f}; | |
476 fAccessor.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11); | |
477 return bilerp4(xs, ys, px00, px10, px01, px11); | |
478 } | |
479 | 514 |
480 void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { | 515 void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { |
481 SkASSERT(0 < n && n < 4); | 516 SkASSERT(0 < n && n < 4); |
482 auto bilerpPixel = [&](int index) { | 517 auto bilerpPixel = [&](int index) { |
483 return this->bilerpNonEdgePixel(xs[index], ys[index]); | 518 return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]}); |
484 }; | 519 }; |
485 | 520 |
486 if (n >= 1) fNext->blendPixel(bilerpPixel(0)); | 521 if (n >= 1) fNext->blendPixel(bilerpPixel(0)); |
487 if (n >= 2) fNext->blendPixel(bilerpPixel(1)); | 522 if (n >= 2) fNext->blendPixel(bilerpPixel(1)); |
488 if (n >= 3) fNext->blendPixel(bilerpPixel(2)); | 523 if (n >= 3) fNext->blendPixel(bilerpPixel(2)); |
489 } | 524 } |
490 | 525 |
491 void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { | 526 void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { |
492 auto bilerpPixel = [&](int index) { | 527 auto bilerpPixel = [&](int index) { |
493 return this->bilerpNonEdgePixel(xs[index], ys[index]); | 528 return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]}); |
494 }; | 529 }; |
495 fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bile rpPixel(3)); | 530 fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bile rpPixel(3)); |
496 } | 531 } |
497 | 532 |
498 void pointSpan(Span span) override { | 533 void pointSpan(Span span) override { |
499 this->bilerpSpan(span, span.startY()); | |
500 } | |
501 | |
502 void repeatSpan(Span span, int32_t repeatCount) override { | |
503 while (repeatCount > 0) { | |
504 this->pointSpan(span); | |
505 repeatCount--; | |
506 } | |
507 } | |
508 | |
509 void SK_VECTORCALL bilerpEdge(Sk4s sampleXs, Sk4s sampleYs) override { | |
510 Sk4f px00, px10, px01, px11; | |
511 Sk4f xs = Sk4f{sampleXs[0]}; | |
512 Sk4f ys = Sk4f{sampleYs[0]}; | |
513 fAccessor.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11); | |
514 Sk4f pixel = bilerp4(xs, ys, px00, px10, px01, px11); | |
515 fNext->blendPixel(pixel); | |
516 } | |
517 | |
518 void bilerpSpan(Span span, SkScalar y) override { | |
519 SkASSERT(!span.isEmpty()); | 534 SkASSERT(!span.isEmpty()); |
520 SkPoint start; | 535 SkPoint start; |
521 SkScalar length; | 536 SkScalar length; |
522 int count; | 537 int count; |
523 std::tie(start, length, count) = span; | 538 std::tie(start, length, count) = span; |
539 | |
540 // Nothing to do. | |
541 if (count == 0) { | |
542 return; | |
543 } | |
544 | |
545 // Trivial case. No sample points are generated other than start. | |
546 if (count == 1) { | |
547 fNext->blendPixel(this->bilerpSamplePoint(start)); | |
548 return; | |
549 } | |
550 | |
524 SkScalar absLength = SkScalarAbs(length); | 551 SkScalar absLength = SkScalarAbs(length); |
mtklein
2016/07/21 19:19:25
May be clearer to calculate dx here and write the
herb_g
2016/07/21 19:52:20
Done.
| |
525 if (absLength == 0.0f) { | 552 if (absLength == 0.0f) { |
526 this->spanZeroRate(span, y); | 553 // length is zero, so clamp an edge pixel. |
554 this->spanZeroRate(span); | |
527 } else if (absLength < (count - 1)) { | 555 } else if (absLength < (count - 1)) { |
528 this->spanSlowRate(span, y); | 556 // 0 < |dx| < 1. |
557 this->spanSlowRate(span); | |
529 } else if (absLength == (count - 1)) { | 558 } else if (absLength == (count - 1)) { |
530 if (std::fmod(span.startX() - 0.5f, 1.0f) == 0.0f) { | 559 if (sample_to_filter(span.startX()) == 1.0f) { |
mtklein
2016/07/21 19:19:25
if (s_to_f(x) == 1 && s_to_f(y) == 1) {
src_str
herb_g
2016/07/21 19:52:20
Done.
| |
531 if (std::fmod(span.startY() - 0.5f, 1.0f) == 0.0f) { | 560 // |dx| == 1. |
561 if (sample_to_filter(span.startY()) == 1.0f) { | |
562 // In fact, the src & dst pixels all line up, so go fast. | |
532 src_strategy_blend(span, fNext, &fAccessor); | 563 src_strategy_blend(span, fNext, &fAccessor); |
533 } else { | 564 } else { |
534 this->spanUnitRateAlignedX(span, y); | 565 // |dx| == 1, but still must bilerp because y has fractional offset. |
566 this->spanUnitRate(span); | |
535 } | 567 } |
536 } else { | 568 } else { |
537 this->spanUnitRate(span, y); | 569 // |dx| == 1, x and y have fractional offsets. |
570 this->spanUnitRate(span); | |
538 } | 571 } |
539 } else { | 572 } else { |
540 this->spanFastRate(span, y); | 573 // |dx| > 1. |
574 if (absLength < 2.0f * (count - 1)) { | |
mtklein
2016/07/21 19:19:25
might as well lift this if into the containing if
herb_g
2016/07/21 19:52:20
Done.
| |
575 // 1 < |dx| < 2. | |
576 this->spanMediumRate(span); | |
577 } else { | |
578 // |dx| >= 2. | |
579 this->spanFastRate(span); | |
580 } | |
581 } | |
582 } | |
583 | |
584 void repeatSpan(Span span, int32_t repeatCount) override { | |
585 while (repeatCount > 0) { | |
586 this->pointSpan(span); | |
587 repeatCount--; | |
541 } | 588 } |
542 } | 589 } |
543 | 590 |
544 private: | 591 private: |
545 void spanZeroRate(Span span, SkScalar y1) { | 592 |
546 SkScalar y0 = span.startY() - 0.5f; | 593 // Convert a sample point to the points used by the filter. |
547 y1 += 0.5f; | 594 void filterPoints(SkPoint sample, Sk4i* filterXs, Sk4i* filterYs) { |
548 int iy0 = SkScalarFloorToInt(y0); | 595 // May be less than zero. Be careful to use Floor. |
549 SkScalar filterY1 = y0 - iy0; | 596 int x0 = adjust_edge(fXEdgeType, SkScalarFloorToInt(X(sample) - 0.5), fX Max); |
550 SkScalar filterY0 = 1.0f - filterY1; | 597 // Always greater than zero. Use the faster Trunc. |
551 int iy1 = SkScalarFloorToInt(y1); | 598 int x1 = adjust_edge(fXEdgeType, SkScalarTruncToInt(X(sample) + 0.5), fX Max); |
552 int ix = SkScalarFloorToInt(span.startX()); | 599 int y0 = adjust_edge(fYEdgeType, SkScalarFloorToInt(Y(sample) - 0.5), fY Max); |
553 Sk4f pixelY0 = fAccessor.getPixelFromRow(fAccessor.row(iy0), ix); | 600 int y1 = adjust_edge(fYEdgeType, SkScalarTruncToInt(Y(sample) + 0.5), fY Max); |
554 Sk4f pixelY1 = fAccessor.getPixelFromRow(fAccessor.row(iy1), ix); | 601 |
555 Sk4f filterPixel = pixelY0 * filterY0 + pixelY1 * filterY1; | 602 *filterXs = Sk4i{x0, x1, x0, x1}; |
556 int count = span.count(); | 603 *filterYs = Sk4i{y0, y0, y1, y1}; |
604 } | |
605 | |
606 // Given a sample point, generate a color by bilerping the four filter point s. | |
607 Sk4f bilerpSamplePoint(SkPoint sample) { | |
608 Sk4i iXs, iYs; | |
609 filterPoints(sample, &iXs, &iYs); | |
610 Sk4f px00, px10, px01, px11; | |
611 fAccessor.get4Pixels(iXs, iYs, &px00, &px10, &px01, &px11); | |
612 return bilerp4(Sk4f{X(sample) - 0.5f}, Sk4f{Y(sample) - 0.5f}, px00, px1 0, px01, px11); | |
613 } | |
614 | |
615 // Get two pixels at x from row0 and row1. | |
616 void get2PixelColumn(const void* row0, const void* row1, int x, Sk4f* px0, S k4f* px1) { | |
617 *px0 = fAccessor.getPixelFromRow(row0, x); | |
618 *px1 = fAccessor.getPixelFromRow(row1, x); | |
619 } | |
620 | |
621 // |dx| == 0. This code assumes that length is zero. | |
622 void spanZeroRate(Span span) { | |
623 SkPoint start; SkScalar length; int count; | |
624 std::tie(start, length, count) = span; | |
625 SkASSERT(length == 0.0f); | |
626 | |
627 // Filter for the blending of the top and bottom pixels. | |
628 SkScalar filterY = sample_to_filter(Y(start)); | |
629 | |
630 // Generate the four filter points from the sample point start. Generate the row* values. | |
631 Sk4i iXs, iYs; | |
632 this->filterPoints(start, &iXs, &iYs); | |
633 const void* const row0 = fAccessor.row(iYs[0]); | |
634 const void* const row1 = fAccessor.row(iYs[2]); | |
635 | |
636 // Get the two pixels that make up the clamping pixel. | |
637 Sk4f pxTop, pxBottom; | |
638 this->get2PixelColumn(row0, row1, SkScalarFloorToInt(X(start)), &pxTop, &pxBottom); | |
639 Sk4f pixel = pxTop * filterY + (1.0f - filterY) * pxBottom; | |
640 | |
557 while (count >= 4) { | 641 while (count >= 4) { |
558 fNext->blend4Pixels(filterPixel, filterPixel, filterPixel, filterPix el); | 642 fNext->blend4Pixels(pixel, pixel, pixel, pixel); |
559 count -= 4; | 643 count -= 4; |
560 } | 644 } |
561 while (count > 0) { | 645 while (count > 0) { |
562 fNext->blendPixel(filterPixel); | 646 fNext->blendPixel(pixel); |
563 count -= 1; | 647 count -= 1; |
564 } | 648 } |
565 } | 649 } |
566 | 650 |
567 // When moving through source space more slowly than dst space (zoomed in), | 651 // 0 < |dx| < 1. This code reuses the calculations from previous pixels to r educe |
568 // we'll be sampling from the same source pixel more than once. | 652 // computation. In particular, several destination pixels maybe generated fr om the same four |
569 void spanSlowRate(Span span, SkScalar ry1) { | 653 // source pixels. |
570 SkPoint start; | 654 // In the following code a "part" is a combination of two pixels from the sa me column of the |
571 SkScalar length; | 655 // filter. |
572 int count; | 656 void spanSlowRate(Span span) { |
657 SkPoint start; SkScalar length; int count; | |
573 std::tie(start, length, count) = span; | 658 std::tie(start, length, count) = span; |
574 SkFixed fx = SkScalarToFixed(X(start)-0.5f); | 659 |
575 | 660 // Calculate the distance between each sample point. |
576 SkFixed fdx = SkScalarToFixed(length / (count - 1)); | 661 const SkScalar dx = length / (count - 1); |
577 | 662 SkASSERT(-1.0f < dx && dx < 1.0f && dx != 0.0f); |
578 Sk4f xAdjust; | 663 |
579 if (fdx >= 0) { | 664 // Generate the filter values for the top-left corner. |
580 xAdjust = Sk4f{-1.0f}; | 665 // Note: these values are in filter space; this has implications about h ow to adjust |
666 // these values at each step. For example, as the sample point increases , the filter | |
667 // value decreases, this is because the filter and position are related by | |
668 // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite | |
669 // direction of the sample point which is increasing by dx. | |
670 SkScalar filterX = sample_to_filter(X(start)); | |
671 SkScalar filterY = sample_to_filter(Y(start)); | |
672 | |
673 // Generate the four filter points from the sample point start. Generate the row* values. | |
674 Sk4i iXs, iYs; | |
675 this->filterPoints(start, &iXs, &iYs); | |
676 const void* const row0 = fAccessor.row(iYs[0]); | |
677 const void* const row1 = fAccessor.row(iYs[2]); | |
678 | |
679 // Generate part of the filter value at xColumn. | |
680 auto partAtColumn = [&](int xColumn) { | |
681 int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax); | |
682 Sk4f pxTop, pxBottom; | |
683 this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom) ; | |
684 return pxTop * filterY + (1.0f - filterY) * pxBottom; | |
685 }; | |
686 | |
687 // The leftPart is made up of two pixels from the left column of the fil ter, right part | |
688 // is similar. The top and bottom pixels in the *Part are created as a l inear blend of | |
689 // the top and bottom pixels using filterY. See the partAtColumn functio n above. | |
690 Sk4f leftPart = partAtColumn(iXs[0]); | |
691 Sk4f rightPart = partAtColumn(iXs[1]); | |
692 | |
693 // Create a destination color by blending together a left and right part using filterX. | |
694 auto bilerp = [&]() { | |
695 Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX); | |
696 return check_pixel(pixel); | |
697 }; | |
698 | |
699 // Send the first pixel to the destination. This simplifies the loop str ucture so that no | |
700 // extra pixels are fetched for the last iteration of the loop. | |
701 fNext->blendPixel(bilerp()); | |
702 count -= 1; | |
703 | |
704 if (dx > 0.0f) { | |
705 // * positive direction - generate destination pixels by sliding the filter from left | |
706 // to right. | |
707 int rightPartCursor = iXs[1]; | |
708 | |
709 // Advance the filter from left to right. Remember that moving the t op-left corner of | |
710 // the filter to the right actually makes the filter value smaller. | |
711 auto advanceFilter = [&]() { | |
712 filterX -= dx; | |
713 if (filterX <= 0.0f) { | |
714 filterX += 1.0f; | |
715 leftPart = rightPart; | |
716 rightPartCursor += 1; | |
717 rightPart = partAtColumn(rightPartCursor); | |
718 } | |
719 SkASSERT(0.0f < filterX && filterX <= 1.0f); | |
720 | |
721 return bilerp(); | |
722 }; | |
723 | |
724 while (count >= 4) { | |
725 Sk4f px0 = advanceFilter(), | |
726 px1 = advanceFilter(), | |
727 px2 = advanceFilter(), | |
728 px3 = advanceFilter(); | |
729 fNext->blend4Pixels(px0, px1, px2, px3); | |
730 count -= 4; | |
731 } | |
732 | |
733 while (count > 0) { | |
734 fNext->blendPixel(advanceFilter()); | |
735 count -= 1; | |
736 } | |
581 } else { | 737 } else { |
582 xAdjust = Sk4f{1.0f}; | 738 // * negative direction - generate destination pixels by sliding the filter from |
583 } | 739 // right to left. |
584 int ix = SkFixedFloorToInt(fx); | 740 int leftPartCursor = iXs[0]; |
585 int ioldx = ix; | 741 |
586 Sk4f x{SkFixedToScalar(fx) - ix}; | 742 // Advance the filter from right to left. Remember that moving the t op-left corner of |
587 Sk4f dx{SkFixedToScalar(fdx)}; | 743 // the filter to the left actually makes the filter value larger. |
588 SkScalar ry0 = Y(start) - 0.5f; | 744 auto advanceFilter = [&]() { |
589 ry1 += 0.5f; | 745 filterX -= dx; |
mtklein
2016/07/21 19:19:25
// remember, dx < 0
herb_g
2016/07/21 19:52:20
Done.
| |
590 SkScalar yFloor = std::floor(ry0); | 746 // At this point filterX may be > 1, and needs to be wrapped bac k on to the filter |
591 Sk4f y1 = Sk4f{ry0 - yFloor}; | 747 // interval, and the next column in the filter is calculated. |
592 Sk4f y0 = Sk4f{1.0f} - y1; | 748 if (filterX > 1.0f) { |
593 const void* const row0 = fAccessor.row(SkScalarFloorToInt(ry0)); | 749 filterX -= 1.0f; |
594 const void* const row1 = fAccessor.row(SkScalarFloorToInt(ry1)); | 750 rightPart = leftPart; |
595 Sk4f fpixel00 = y0 * fAccessor.getPixelFromRow(row0, ix); | 751 leftPartCursor -= 1; |
596 Sk4f fpixel01 = y1 * fAccessor.getPixelFromRow(row1, ix); | 752 leftPart = partAtColumn(leftPartCursor); |
597 Sk4f fpixel10 = y0 * fAccessor.getPixelFromRow(row0, ix + 1); | 753 } |
598 Sk4f fpixel11 = y1 * fAccessor.getPixelFromRow(row1, ix + 1); | 754 SkASSERT(0.0f < filterX && filterX <= 1.0f); |
599 auto getNextPixel = [&]() { | 755 |
600 if (ix != ioldx) { | 756 return bilerp(); |
601 fpixel00 = fpixel10; | 757 }; |
602 fpixel01 = fpixel11; | 758 |
603 fpixel10 = y0 * fAccessor.getPixelFromRow(row0, ix + 1); | 759 while (count >= 4) { |
604 fpixel11 = y1 * fAccessor.getPixelFromRow(row1, ix + 1); | 760 Sk4f px0 = advanceFilter(), |
605 ioldx = ix; | 761 px1 = advanceFilter(), |
606 x = x + xAdjust; | 762 px2 = advanceFilter(), |
607 } | 763 px3 = advanceFilter(); |
608 | 764 fNext->blend4Pixels(px0, px1, px2, px3); |
609 Sk4f x0, x1; | 765 count -= 4; |
610 x0 = Sk4f{1.0f} - x; | 766 } |
611 x1 = x; | 767 |
612 Sk4f fpixel = x0 * (fpixel00 + fpixel01) + x1 * (fpixel10 + fpixel11 ); | 768 while (count > 0) { |
613 fx += fdx; | 769 fNext->blendPixel(advanceFilter()); |
614 ix = SkFixedFloorToInt(fx); | 770 count -= 1; |
615 x = x + dx; | 771 } |
616 return fpixel; | 772 } |
617 }; | 773 } |
618 | 774 |
619 while (count >= 4) { | 775 // |dx| == 1. Moving through source space at a rate of 1 source pixel per 1 dst pixel. |
620 Sk4f fpixel0 = getNextPixel(); | 776 // Every filter part is used for two destination pixels, and the code can bu lk load four |
621 Sk4f fpixel1 = getNextPixel(); | 777 // pixels at a time. |
622 Sk4f fpixel2 = getNextPixel(); | 778 void spanUnitRate(Span span) { |
623 Sk4f fpixel3 = getNextPixel(); | 779 SkPoint start; SkScalar length; int count; |
624 | 780 std::tie(start, length, count) = span; |
625 fNext->blend4Pixels(fpixel0, fpixel1, fpixel2, fpixel3); | 781 SkASSERT(SkScalarAbs(length) == (count - 1)); |
626 count -= 4; | 782 |
627 } | 783 // Calculate the four filter points of start, and use the two different Y values to |
628 | 784 // generate the row pointers. |
629 while (count > 0) { | 785 Sk4i iXs, iYs; |
630 fNext->blendPixel(getNextPixel()); | 786 filterPoints(start, &iXs, &iYs); |
631 | 787 const void* row0 = fAccessor.row(iYs[0]); |
632 count -= 1; | 788 const void* row1 = fAccessor.row(iYs[2]); |
633 } | 789 |
634 } | 790 // Calculate the filter values for the top-left filter element. |
635 | 791 const SkScalar filterX = sample_to_filter(X(start)); |
636 // We're moving through source space at a rate of 1 source pixel per 1 dst p ixel. | 792 const SkScalar filterY = sample_to_filter(Y(start)); |
637 // We'll never re-use pixels, but we can at least load contiguous pixels. | 793 |
638 void spanUnitRate(Span span, SkScalar y1) { | 794 // Generate part of the filter value at xColumn. |
639 y1 += 0.5f; | 795 auto partAtColumn = [&](int xColumn) { |
640 SkScalar y0 = span.startY() - 0.5f; | 796 int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax); |
641 int iy0 = SkScalarFloorToInt(y0); | 797 Sk4f pxTop, pxBottom; |
642 SkScalar filterY1 = y0 - iy0; | 798 this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom) ; |
643 SkScalar filterY0 = 1.0f - filterY1; | 799 return pxTop * filterY + (1.0f - filterY) * pxBottom; |
644 int iy1 = SkScalarFloorToInt(y1); | 800 }; |
645 const void* rowY0 = fAccessor.row(iy0); | 801 |
646 const void* rowY1 = fAccessor.row(iy1); | 802 auto get4Parts = [&](int ix, Sk4f* part0, Sk4f* part1, Sk4f* part2, Sk4f * part3) { |
647 SkScalar x0 = span.startX() - 0.5f; | 803 // Check if the pixels needed are near the edges. If not go fast usi ng bulk pixels, |
648 int ix0 = SkScalarFloorToInt(x0); | 804 // otherwise be careful. |
649 SkScalar filterX1 = x0 - ix0; | 805 if (0 <= ix && ix <= fXMax - 3) { |
650 SkScalar filterX0 = 1.0f - filterX1; | 806 Sk4f px00, px10, px20, px30, |
651 | 807 px01, px11, px21, px31; |
652 auto getPixelY0 = [&]() { | 808 fAccessor.get4Pixels(row0, ix, &px00, &px10, &px20, &px30); |
653 Sk4f px = fAccessor.getPixelFromRow(rowY0, ix0); | 809 fAccessor.get4Pixels(row1, ix, &px01, &px11, &px21, &px31); |
654 return px * filterY0; | 810 *part0 = filterY * px00 + (1.0f - filterY) * px01; |
655 }; | 811 *part1 = filterY * px10 + (1.0f - filterY) * px11; |
656 | 812 *part2 = filterY * px20 + (1.0f - filterY) * px21; |
657 auto getPixelY1 = [&]() { | 813 *part3 = filterY * px30 + (1.0f - filterY) * px31; |
658 Sk4f px = fAccessor.getPixelFromRow(rowY1, ix0); | 814 } else { |
659 return px * filterY1; | 815 *part0 = partAtColumn(ix + 0); |
660 }; | 816 *part1 = partAtColumn(ix + 1); |
661 | 817 *part2 = partAtColumn(ix + 2); |
662 auto get4PixelsY0 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* p x3) { | 818 *part3 = partAtColumn(ix + 3); |
663 fAccessor.get4Pixels(rowY0, ix, px0, px1, px2, px3); | 819 } |
664 *px0 = *px0 * filterY0; | 820 }; |
665 *px1 = *px1 * filterY0; | 821 |
666 *px2 = *px2 * filterY0; | 822 auto bilerp = [&](Sk4f& part0, Sk4f& part1) { |
667 *px3 = *px3 * filterY0; | 823 return part0 * filterX + part1 * (1.0f - filterX); |
668 }; | 824 }; |
669 | 825 |
670 auto get4PixelsY1 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* p x3) { | 826 if (length > 0) { |
671 fAccessor.get4Pixels(rowY1, ix, px0, px1, px2, px3); | 827 // * positive direction - generate destination pixels by sliding the filter from left |
672 *px0 = *px0 * filterY1; | 828 // to right. |
673 *px1 = *px1 * filterY1; | 829 |
674 *px2 = *px2 * filterY1; | 830 // overlapPart is the filter part from the end of the previous four pixels used at |
675 *px3 = *px3 * filterY1; | 831 // the start of the next four pixels. |
676 }; | 832 Sk4f overlapPart = partAtColumn(iXs[0]); |
677 | 833 int rightColumnCursor = iXs[1]; |
678 auto lerp = [&](Sk4f& pixelX0, Sk4f& pixelX1) { | 834 while (count >= 4) { |
679 return pixelX0 * filterX0 + pixelX1 * filterX1; | 835 Sk4f part0, part1, part2, part3; |
680 }; | 836 get4Parts(rightColumnCursor, &part0, &part1, &part2, &part3); |
681 | 837 Sk4f px0 = bilerp(overlapPart, part0); |
682 // Mid making 4 unit rate. | 838 Sk4f px1 = bilerp(part0, part1); |
683 Sk4f pxB = getPixelY0() + getPixelY1(); | 839 Sk4f px2 = bilerp(part1, part2); |
684 if (span.length() > 0) { | 840 Sk4f px3 = bilerp(part2, part3); |
685 int count = span.count(); | 841 overlapPart = part3; |
686 while (count >= 4) { | 842 fNext->blend4Pixels(px0, px1, px2, px3); |
687 Sk4f px00, px10, px20, px30; | 843 rightColumnCursor += 4; |
688 get4PixelsY0(ix0, &px00, &px10, &px20, &px30); | 844 count -= 4; |
689 Sk4f px01, px11, px21, px31; | 845 } |
690 get4PixelsY1(ix0, &px01, &px11, &px21, &px31); | 846 |
691 Sk4f pxS0 = px00 + px01; | 847 while (count > 0) { |
692 Sk4f px0 = lerp(pxB, pxS0); | 848 Sk4f rightPart = partAtColumn(rightColumnCursor); |
693 Sk4f pxS1 = px10 + px11; | 849 |
694 Sk4f px1 = lerp(pxS0, pxS1); | 850 fNext->blendPixel(bilerp(overlapPart, rightPart)); |
695 Sk4f pxS2 = px20 + px21; | 851 overlapPart = rightPart; |
696 Sk4f px2 = lerp(pxS1, pxS2); | 852 rightColumnCursor += 1; |
697 Sk4f pxS3 = px30 + px31; | |
698 Sk4f px3 = lerp(pxS2, pxS3); | |
699 pxB = pxS3; | |
700 fNext->blend4Pixels(px0, px1, px2, px3); | |
701 ix0 += 4; | |
702 count -= 4; | |
703 } | |
704 while (count > 0) { | |
705 Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix0); | |
706 Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix0); | |
707 | |
708 fNext->blendPixel(lerp(pixelY0, pixelY1)); | |
709 ix0 += 1; | |
710 count -= 1; | 853 count -= 1; |
711 } | 854 } |
712 } else { | 855 } else { |
713 int count = span.count(); | 856 // * negative direction - generate destination pixels by sliding the filter from |
714 while (count >= 4) { | 857 // right to left. |
715 Sk4f px00, px10, px20, px30; | 858 Sk4f overlapPart = partAtColumn(iXs[1]); |
716 get4PixelsY0(ix0 - 3, &px00, &px10, &px20, &px30); | 859 int leftColumnCursor = iXs[0]; |
717 Sk4f px01, px11, px21, px31; | 860 |
718 get4PixelsY1(ix0 - 3, &px01, &px11, &px21, &px31); | 861 while (count >= 4) { |
719 Sk4f pxS3 = px30 + px31; | 862 Sk4f part0, part1, part2, part3; |
720 Sk4f px0 = lerp(pxS3, pxB); | 863 get4Parts(leftColumnCursor - 3, &part3, &part2, &part1, &part0); |
721 Sk4f pxS2 = px20 + px21; | 864 Sk4f px0 = bilerp(part0, overlapPart); |
722 Sk4f px1 = lerp(pxS2, pxS3); | 865 Sk4f px1 = bilerp(part1, part0); |
723 Sk4f pxS1 = px10 + px11; | 866 Sk4f px2 = bilerp(part2, part1); |
724 Sk4f px2 = lerp(pxS1, pxS2); | 867 Sk4f px3 = bilerp(part3, part2); |
725 Sk4f pxS0 = px00 + px01; | 868 overlapPart = part3; |
726 Sk4f px3 = lerp(pxS0, pxS1); | 869 fNext->blend4Pixels(px0, px1, px2, px3); |
727 pxB = pxS0; | 870 leftColumnCursor -= 4; |
728 fNext->blend4Pixels(px0, px1, px2, px3); | 871 count -= 4; |
729 ix0 -= 4; | 872 } |
730 count -= 4; | 873 |
731 } | 874 while (count > 0) { |
732 while (count > 0) { | 875 Sk4f leftPart = partAtColumn(leftColumnCursor); |
733 Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix0); | 876 |
734 Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix0); | 877 fNext->blendPixel(bilerp(leftPart, overlapPart)); |
735 | 878 overlapPart = leftPart; |
736 fNext->blendPixel(lerp(pixelY0, pixelY1)); | 879 leftColumnCursor -= 1; |
737 ix0 -= 1; | 880 count -= 1; |
738 count -= 1; | 881 } |
739 } | 882 } |
740 } | 883 } |
741 } | 884 |
742 | 885 // 1 < |dx| < 2. Going through the source pixels at a faster rate than the d est pixels, but |
743 void spanUnitRateAlignedX(Span span, SkScalar y1) { | 886 // still slow enough to take advantage of previous calculations. |
744 SkScalar y0 = span.startY() - 0.5f; | 887 void spanMediumRate(Span span) { |
745 y1 += 0.5f; | 888 SkPoint start; SkScalar length; int count; |
746 int iy0 = SkScalarFloorToInt(y0); | 889 std::tie(start, length, count) = span; |
747 SkScalar filterY1 = y0 - iy0; | 890 |
748 SkScalar filterY0 = 1.0f - filterY1; | 891 // Calculate the distance between each sample point. |
749 int iy1 = SkScalarFloorToInt(y1); | 892 const SkScalar dx = length / (count - 1); |
750 int ix = SkScalarFloorToInt(span.startX()); | 893 SkASSERT((-2.0f < dx && dx < -1.0f) || (1.0f < dx && dx < 2.0f)); |
751 const void* rowY0 = fAccessor.row(iy0); | 894 |
752 const void* rowY1 = fAccessor.row(iy1); | 895 // Generate the filter values for the top-left corner. |
753 auto lerp = [&](Sk4f* pixelY0, Sk4f* pixelY1) { | 896 // Note: these values are in filter space; this has implications about h ow to adjust |
754 return *pixelY0 * filterY0 + *pixelY1 * filterY1; | 897 // these values at each step. For example, as the sample point increases , the filter |
755 }; | 898 // value decreases, this is because the filter and position are related by |
756 | 899 // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite |
757 if (span.length() > 0) { | 900 // direction of the sample point which is increasing by dx. |
758 int count = span.count(); | 901 SkScalar filterX = sample_to_filter(X(start)); |
759 while (count >= 4) { | 902 SkScalar filterY = sample_to_filter(Y(start)); |
760 Sk4f px00, px10, px20, px30; | 903 |
761 fAccessor.get4Pixels(rowY0, ix, &px00, &px10, &px20, &px30); | 904 // Generate the four filter points from the sample point start. Generate the row* values. |
762 Sk4f px01, px11, px21, px31; | 905 Sk4i iXs, iYs; |
763 fAccessor.get4Pixels(rowY1, ix, &px01, &px11, &px21, &px31); | 906 this->filterPoints(start, &iXs, &iYs); |
764 fNext->blend4Pixels( | 907 const void* const row0 = fAccessor.row(iYs[0]); |
765 lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31)); | 908 const void* const row1 = fAccessor.row(iYs[2]); |
766 ix += 4; | 909 |
767 count -= 4; | 910 // Generate part of the filter value at xColumn. |
768 } | 911 auto partAtColumn = [&](int xColumn) { |
769 while (count > 0) { | 912 int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax); |
770 Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix); | 913 Sk4f pxTop, pxBottom; |
771 Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix); | 914 this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom) ; |
772 | 915 return pxTop * filterY + (1.0f - filterY) * pxBottom; |
773 fNext->blendPixel(lerp(&pixelY0, &pixelY1)); | 916 }; |
774 ix += 1; | 917 |
918 // The leftPart is made up of two pixels from the left column of the fil ter, right part | |
919 // is similar. The top and bottom pixels in the *Part are created as a l inear blend of | |
920 // the top and bottom pixels using filterY. See the nextPart function be low. | |
921 Sk4f leftPart = partAtColumn(iXs[0]); | |
922 Sk4f rightPart = partAtColumn(iXs[1]); | |
923 | |
924 // Create a destination color by blending together a left and right part using filterX. | |
925 auto bilerp = [&]() { | |
926 Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX); | |
927 return check_pixel(pixel); | |
928 }; | |
929 | |
930 // Send the first pixel to the destination. This simplifies the loop str ucture so that no | |
931 // extra pixels are fetched for the last iteration of the loop. | |
932 fNext->blendPixel(bilerp()); | |
933 count -= 1; | |
934 | |
935 if (dx > 0.0f) { | |
936 // * positive direction - generate destination pixels by sliding the filter from left | |
937 // to right. | |
938 int rightPartCursor = iXs[1]; | |
939 | |
940 // Advance the filter from left to right. Remember that moving the t op-left corner of | |
941 // the filter to the right actually makes the filter value smaller. | |
942 auto advanceFilter = [&]() { | |
943 filterX -= dx; | |
944 // At this point filterX is less than zero, but might actually b e less than -1. | |
945 if (filterX > -1.0f) { | |
946 filterX += 1.0f; | |
947 leftPart = rightPart; | |
948 rightPartCursor += 1; | |
949 rightPart = partAtColumn(rightPartCursor); | |
950 } else { | |
951 filterX += 2.0f; | |
952 rightPartCursor += 2; | |
953 leftPart = partAtColumn(rightPartCursor - 1); | |
954 rightPart = partAtColumn(rightPartCursor); | |
955 } | |
956 SkASSERT(0.0f < filterX && filterX <= 1.0f); | |
957 | |
958 return bilerp(); | |
959 }; | |
960 | |
961 while (count >= 4) { | |
962 Sk4f px0 = advanceFilter(), | |
963 px1 = advanceFilter(), | |
964 px2 = advanceFilter(), | |
965 px3 = advanceFilter(); | |
966 fNext->blend4Pixels(px0, px1, px2, px3); | |
967 count -= 4; | |
968 } | |
969 | |
970 while (count > 0) { | |
971 fNext->blendPixel(advanceFilter()); | |
775 count -= 1; | 972 count -= 1; |
776 } | 973 } |
777 } else { | 974 } else { |
778 int count = span.count(); | 975 // * negative direction - generate destination pixels by sliding the filter from |
779 while (count >= 4) { | 976 // right to left. |
780 Sk4f px00, px10, px20, px30; | 977 int leftPartCursor = iXs[0]; |
781 fAccessor.get4Pixels(rowY0, ix - 3, &px30, &px20, &px10, &px00); | 978 |
782 Sk4f px01, px11, px21, px31; | 979 auto advanceFilter = [&]() { |
783 fAccessor.get4Pixels(rowY1, ix - 3, &px31, &px21, &px11, &px01); | 980 filterX -= dx; |
mtklein
2016/07/21 19:19:25
// remember, dx < 0, so filterX is increasing.
herb_g
2016/07/21 19:52:20
Done.
| |
784 fNext->blend4Pixels( | 981 // At this point, filterX is greater than one, but may actually be greater than two. |
785 lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31)); | 982 if (filterX < 2.0f) { |
786 ix -= 4; | 983 filterX -= 1.0f; |
787 count -= 4; | 984 rightPart = leftPart; |
788 } | 985 leftPartCursor -= 1; |
789 while (count > 0) { | 986 leftPart = partAtColumn(leftPartCursor); |
790 Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix); | 987 } else { |
791 Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix); | 988 filterX -= 2.0f; |
792 | 989 leftPartCursor -= 2; |
793 fNext->blendPixel(lerp(&pixelY0, &pixelY1)); | 990 rightPart = partAtColumn(leftPartCursor - 1); |
794 ix -= 1; | 991 leftPart = partAtColumn(leftPartCursor); |
795 count -= 1; | 992 } |
796 } | 993 SkASSERT(0.0f < filterX && filterX <= 1.0f); |
797 } | 994 return bilerp(); |
995 }; | |
996 | |
997 while (count >= 4) { | |
998 Sk4f px0 = advanceFilter(), | |
999 px1 = advanceFilter(), | |
1000 px2 = advanceFilter(), | |
1001 px3 = advanceFilter(); | |
1002 fNext->blend4Pixels(px0, px1, px2, px3); | |
1003 count -= 4; | |
1004 } | |
1005 | |
1006 while (count > 0) { | |
1007 fNext->blendPixel(advanceFilter()); | |
1008 count -= 1; | |
1009 } | |
1010 } | |
798 } | 1011 } |
799 | 1012 |
800 // We're moving through source space faster than dst (zoomed out), | 1013 // We're moving through source space faster than dst (zoomed out), |
801 // so we'll never reuse a source pixel or be able to do contiguous loads. | 1014 // so we'll never reuse a source pixel or be able to do contiguous loads. |
802 void spanFastRate(Span span, SkScalar y1) { | 1015 void spanFastRate(Span span) { |
803 SkPoint start; | 1016 SkPoint start; SkScalar length; int count; |
804 SkScalar length; | |
805 int count; | |
806 std::tie(start, length, count) = span; | 1017 std::tie(start, length, count) = span; |
807 SkScalar x = X(start); | 1018 SkScalar x = X(start); |
808 SkScalar y = Y(start); | 1019 SkScalar y = Y(start); |
809 | 1020 |
810 // In this sampler, it is assumed that if span.StartY() and y1 are the s ame then both | 1021 SkScalar dx = length / (count - 1); |
811 // y-lines are on the same tile. | 1022 while (count > 0) { |
812 if (y == y1) { | 1023 fNext->blendPixel(this->bilerpSamplePoint(SkPoint{x, y})); |
813 // Both y-lines are on the same tile. | 1024 x += dx; |
814 span_fallback(span, this); | 1025 count -= 1; |
815 } else { | |
816 // The y-lines are on different tiles. | |
817 SkScalar dx = length / (count - 1); | |
818 Sk4f ys = {y - 0.5f, y - 0.5f, y1 + 0.5f, y1 + 0.5f}; | |
819 while (count > 0) { | |
820 Sk4f xs = Sk4f{-0.5f, 0.5f, -0.5f, 0.5f} + Sk4f{x}; | |
821 this->bilerpEdge(xs, ys); | |
822 x += dx; | |
823 count -= 1; | |
824 } | |
825 } | 1026 } |
826 } | 1027 } |
827 | 1028 |
828 Next* const fNext; | 1029 Next* const fNext; |
829 Accessor fAccessor; | 1030 const SkShader::TileMode fXEdgeType; |
1031 const int fXMax; | |
1032 const SkShader::TileMode fYEdgeType; | |
1033 const int fYMax; | |
1034 Accessor fAccessor; | |
830 }; | 1035 }; |
831 | 1036 |
832 } // namespace | 1037 } // namespace |
833 | 1038 |
834 #endif // SkLinearBitmapPipeline_sampler_DEFINED | 1039 #endif // SkLinearBitmapPipeline_sampler_DEFINED |
OLD | NEW |