OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <algorithm> | 5 #include <algorithm> |
6 | 6 |
| 7 #include "base/logging.h" |
7 #include "skia/ext/convolver.h" | 8 #include "skia/ext/convolver.h" |
8 #include "skia/ext/convolver_SSE2.h" | 9 #include "skia/ext/convolver_SSE2.h" |
| 10 #include "third_party/skia/include/core/SkSize.h" |
9 #include "third_party/skia/include/core/SkTypes.h" | 11 #include "third_party/skia/include/core/SkTypes.h" |
10 | 12 |
11 namespace skia { | 13 namespace skia { |
12 | 14 |
13 namespace { | 15 namespace { |
14 | 16 |
15 // Converts the argument to an 8-bit unsigned value by clamping to the range | 17 // Converts the argument to an 8-bit unsigned value by clamping to the range |
16 // 0-255. | 18 // 0-255. |
17 inline unsigned char ClampTo8(int a) { | 19 inline unsigned char ClampTo8(int a) { |
18 if (static_cast<unsigned>(a) < 256) | 20 if (static_cast<unsigned>(a) < 256) |
19 return a; // Avoid the extra check in the common case. | 21 return a; // Avoid the extra check in the common case. |
20 if (a < 0) | 22 if (a < 0) |
21 return 0; | 23 return 0; |
22 return 255; | 24 return 255; |
23 } | 25 } |
24 | 26 |
| 27 // Takes the value produced by accumulating element-wise product of image with |
| 28 // a kernel and brings it back into range. |
| 29 // All of the filter scaling factors are in fixed point with kShiftBits bits of |
| 30 // fractional part. |
| 31 inline unsigned char BringBackTo8(int a, bool take_absolute) { |
| 32 a >>= ConvolutionFilter1D::kShiftBits; |
| 33 if (take_absolute) |
| 34 a = std::abs(a); |
| 35 return ClampTo8(a); |
| 36 } |
| 37 |
25 // Stores a list of rows in a circular buffer. The usage is you write into it | 38 // Stores a list of rows in a circular buffer. The usage is you write into it |
26 // by calling AdvanceRow. It will keep track of which row in the buffer it | 39 // by calling AdvanceRow. It will keep track of which row in the buffer it |
27 // should use next, and the total number of rows added. | 40 // should use next, and the total number of rows added. |
28 class CircularRowBuffer { | 41 class CircularRowBuffer { |
29 public: | 42 public: |
30 // The number of pixels in each row is given in |source_row_pixel_width|. | 43 // The number of pixels in each row is given in |source_row_pixel_width|. |
31 // The maximum number of rows needed in the buffer is |max_y_filter_size| | 44 // The maximum number of rows needed in the buffer is |max_y_filter_size| |
32 // (we only need to store enough rows for the biggest filter). | 45 // (we only need to store enough rows for the biggest filter). |
33 // | 46 // |
34 // We use the |first_input_row| to compute the coordinates of all of the | 47 // We use the |first_input_row| to compute the coordinates of all of the |
(...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
264 AddFilter(filter_offset, &fixed_values[0], filter_length); | 277 AddFilter(filter_offset, &fixed_values[0], filter_length); |
265 } | 278 } |
266 | 279 |
267 void ConvolutionFilter1D::AddFilter(int filter_offset, | 280 void ConvolutionFilter1D::AddFilter(int filter_offset, |
268 const Fixed* filter_values, | 281 const Fixed* filter_values, |
269 int filter_length) { | 282 int filter_length) { |
270 // It is common for leading/trailing filter values to be zeros. In such | 283 // It is common for leading/trailing filter values to be zeros. In such |
271 // cases it is beneficial to only store the central factors. | 284 // cases it is beneficial to only store the central factors. |
272 // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on | 285 // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on |
273 // a 1080p image this optimization gives a ~10% speed improvement. | 286 // a 1080p image this optimization gives a ~10% speed improvement. |
| 287 int filter_size = filter_length; |
274 int first_non_zero = 0; | 288 int first_non_zero = 0; |
275 while (first_non_zero < filter_length && filter_values[first_non_zero] == 0) | 289 while (first_non_zero < filter_length && filter_values[first_non_zero] == 0) |
276 first_non_zero++; | 290 first_non_zero++; |
277 | 291 |
278 if (first_non_zero < filter_length) { | 292 if (first_non_zero < filter_length) { |
279 // Here we have at least one non-zero factor. | 293 // Here we have at least one non-zero factor. |
280 int last_non_zero = filter_length - 1; | 294 int last_non_zero = filter_length - 1; |
281 while (last_non_zero >= 0 && filter_values[last_non_zero] == 0) | 295 while (last_non_zero >= 0 && filter_values[last_non_zero] == 0) |
282 last_non_zero--; | 296 last_non_zero--; |
283 | 297 |
284 filter_offset += first_non_zero; | 298 filter_offset += first_non_zero; |
285 filter_length = last_non_zero + 1 - first_non_zero; | 299 filter_length = last_non_zero + 1 - first_non_zero; |
286 SkASSERT(filter_length > 0); | 300 SkASSERT(filter_length > 0); |
287 | 301 |
288 for (int i = first_non_zero; i <= last_non_zero; i++) | 302 for (int i = first_non_zero; i <= last_non_zero; i++) |
289 filter_values_.push_back(filter_values[i]); | 303 filter_values_.push_back(filter_values[i]); |
290 } else { | 304 } else { |
291 // Here all the factors were zeroes. | 305 // Here all the factors were zeroes. |
292 filter_length = 0; | 306 filter_length = 0; |
293 } | 307 } |
294 | 308 |
295 FilterInstance instance; | 309 FilterInstance instance; |
296 | 310 |
297 // We pushed filter_length elements onto filter_values_ | 311 // We pushed filter_length elements onto filter_values_ |
298 instance.data_location = (static_cast<int>(filter_values_.size()) - | 312 instance.data_location = (static_cast<int>(filter_values_.size()) - |
299 filter_length); | 313 filter_length); |
300 instance.offset = filter_offset; | 314 instance.offset = filter_offset; |
301 instance.length = filter_length; | 315 instance.trimmed_length = filter_length; |
| 316 instance.length = filter_size; |
302 filters_.push_back(instance); | 317 filters_.push_back(instance); |
303 | 318 |
304 max_filter_ = std::max(max_filter_, filter_length); | 319 max_filter_ = std::max(max_filter_, filter_length); |
305 } | 320 } |
306 | 321 |
| 322 const ConvolutionFilter1D::Fixed* ConvolutionFilter1D::GetSingleFilter( |
| 323 int* specified_filter_length, |
| 324 int* filter_offset, |
| 325 int* filter_length) const { |
| 326 const FilterInstance& filter = filters_[0]; |
| 327 *filter_offset = filter.offset; |
| 328 *filter_length = filter.trimmed_length; |
| 329 *specified_filter_length = filter.length; |
| 330 if (filter.trimmed_length == 0) |
| 331 return NULL; |
| 332 |
| 333 return &filter_values_[filter.data_location]; |
| 334 } |
| 335 |
307 typedef void (*ConvolveVertically_pointer)( | 336 typedef void (*ConvolveVertically_pointer)( |
308 const ConvolutionFilter1D::Fixed* filter_values, | 337 const ConvolutionFilter1D::Fixed* filter_values, |
309 int filter_length, | 338 int filter_length, |
310 unsigned char* const* source_data_rows, | 339 unsigned char* const* source_data_rows, |
311 int pixel_width, | 340 int pixel_width, |
312 unsigned char* out_row, | 341 unsigned char* out_row, |
313 bool has_alpha); | 342 bool has_alpha); |
314 typedef void (*Convolve4RowsHorizontally_pointer)( | 343 typedef void (*Convolve4RowsHorizontally_pointer)( |
315 const unsigned char* src_data[4], | 344 const unsigned char* src_data[4], |
316 const ConvolutionFilter1D& filter, | 345 const ConvolutionFilter1D& filter, |
(...skipping 154 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
471 source_has_alpha); | 500 source_has_alpha); |
472 } else { | 501 } else { |
473 ConvolveVertically(filter_values, filter_length, | 502 ConvolveVertically(filter_values, filter_length, |
474 first_row_for_filter, | 503 first_row_for_filter, |
475 filter_x.num_values(), cur_output_row, | 504 filter_x.num_values(), cur_output_row, |
476 source_has_alpha); | 505 source_has_alpha); |
477 } | 506 } |
478 } | 507 } |
479 } | 508 } |
480 | 509 |
| 510 void SingleChannelConvolveX1D(const unsigned char* source_data, |
| 511 int source_byte_row_stride, |
| 512 int input_channel_index, |
| 513 int input_channel_count, |
| 514 const ConvolutionFilter1D& filter, |
| 515 const SkISize& image_size, |
| 516 unsigned char* output, |
| 517 int output_byte_row_stride, |
| 518 int output_channel_index, |
| 519 int output_channel_count, |
| 520 bool absolute_values) { |
| 521 int filter_offset, filter_length, filter_size; |
| 522 // Very much unlike BGRAConvolve2D, here we expect to have the same filter |
| 523 // for all pixels. |
| 524 const ConvolutionFilter1D::Fixed* filter_values = |
| 525 filter.GetSingleFilter(&filter_size, &filter_offset, &filter_length); |
| 526 |
| 527 if (filter_values == NULL || image_size.width() < filter_size) { |
| 528 NOTREACHED(); |
| 529 return; |
| 530 } |
| 531 |
| 532 int centrepoint = filter_length / 2; |
| 533 if (filter_size - filter_offset != 2 * filter_offset) { |
| 534 // This means the original filter was not symmetrical AND |
| 535 // got clipped from one side more than from the other. |
| 536 centrepoint = filter_size / 2 - filter_offset; |
| 537 } |
| 538 |
| 539 const unsigned char* source_data_row = source_data; |
| 540 unsigned char* output_row = output; |
| 541 |
| 542 for (int r = 0; r < image_size.height(); ++r) { |
| 543 unsigned char* target_byte = output_row + output_channel_index; |
| 544 // Process the lead part, padding image to the left with the first pixel. |
| 545 int c = 0; |
| 546 for (; c < centrepoint; ++c, target_byte += output_channel_count) { |
| 547 int accval = 0; |
| 548 int i = 0; |
| 549 int pixel_byte_index = input_channel_index; |
| 550 for (; i < centrepoint - c; ++i) // Padding part. |
| 551 accval += filter_values[i] * source_data_row[pixel_byte_index]; |
| 552 |
| 553 for (; i < filter_length; ++i, pixel_byte_index += input_channel_count) |
| 554 accval += filter_values[i] * source_data_row[pixel_byte_index]; |
| 555 |
| 556 *target_byte = BringBackTo8(accval, absolute_values); |
| 557 } |
| 558 |
| 559 // Now for the main event. |
| 560 for (; c < image_size.width() - centrepoint; |
| 561 ++c, target_byte += output_channel_count) { |
| 562 int accval = 0; |
| 563 int pixel_byte_index = (c - centrepoint) * input_channel_count + |
| 564 input_channel_index; |
| 565 |
| 566 for (int i = 0; i < filter_length; |
| 567 ++i, pixel_byte_index += input_channel_count) { |
| 568 accval += filter_values[i] * source_data_row[pixel_byte_index]; |
| 569 } |
| 570 |
| 571 *target_byte = BringBackTo8(accval, absolute_values); |
| 572 } |
| 573 |
| 574 for (; c < image_size.width(); ++c, target_byte += output_channel_count) { |
| 575 int accval = 0; |
| 576 int overlap_taps = image_size.width() - c + centrepoint; |
| 577 int pixel_byte_index = (c - centrepoint) * input_channel_count + |
| 578 input_channel_index; |
| 579 int i = 0; |
| 580 for (; i < overlap_taps - 1; ++i, pixel_byte_index += input_channel_count) |
| 581 accval += filter_values[i] * source_data_row[pixel_byte_index]; |
| 582 |
| 583 for (; i < filter_length; ++i) |
| 584 accval += filter_values[i] * source_data_row[pixel_byte_index]; |
| 585 |
| 586 *target_byte = BringBackTo8(accval, absolute_values); |
| 587 } |
| 588 |
| 589 source_data_row += source_byte_row_stride; |
| 590 output_row += output_byte_row_stride; |
| 591 } |
| 592 } |
| 593 |
| 594 void SingleChannelConvolveY1D(const unsigned char* source_data, |
| 595 int source_byte_row_stride, |
| 596 int input_channel_index, |
| 597 int input_channel_count, |
| 598 const ConvolutionFilter1D& filter, |
| 599 const SkISize& image_size, |
| 600 unsigned char* output, |
| 601 int output_byte_row_stride, |
| 602 int output_channel_index, |
| 603 int output_channel_count, |
| 604 bool absolute_values) { |
| 605 int filter_offset, filter_length, filter_size; |
| 606 // Very much unlike BGRAConvolve2D, here we expect to have the same filter |
| 607 // for all pixels. |
| 608 const ConvolutionFilter1D::Fixed* filter_values = |
| 609 filter.GetSingleFilter(&filter_size, &filter_offset, &filter_length); |
| 610 |
| 611 if (filter_values == NULL || image_size.height() < filter_size) { |
| 612 NOTREACHED(); |
| 613 return; |
| 614 } |
| 615 |
| 616 int centrepoint = filter_length / 2; |
| 617 if (filter_size - filter_offset != 2 * filter_offset) { |
| 618 // This means the original filter was not symmetrical AND |
| 619 // got clipped from one side more than from the other. |
| 620 centrepoint = filter_size / 2 - filter_offset; |
| 621 } |
| 622 |
| 623 for (int c = 0; c < image_size.width(); ++c) { |
| 624 unsigned char* target_byte = output + c * output_channel_count + |
| 625 output_channel_index; |
| 626 int r = 0; |
| 627 |
| 628 for (; r < centrepoint; ++r, target_byte += output_byte_row_stride) { |
| 629 int accval = 0; |
| 630 int i = 0; |
| 631 int pixel_byte_index = c * input_channel_count + input_channel_index; |
| 632 |
| 633 for (; i < centrepoint - r; ++i) // Padding part. |
| 634 accval += filter_values[i] * source_data[pixel_byte_index]; |
| 635 |
| 636 for (; i < filter_length; ++i, pixel_byte_index += source_byte_row_stride) |
| 637 accval += filter_values[i] * source_data[pixel_byte_index]; |
| 638 |
| 639 *target_byte = BringBackTo8(accval, absolute_values); |
| 640 } |
| 641 |
| 642 for (; r < image_size.height() - centrepoint; |
| 643 ++r, target_byte += output_byte_row_stride) { |
| 644 int accval = 0; |
| 645 int pixel_byte_index = (r - centrepoint) * source_byte_row_stride + |
| 646 c * input_channel_count + input_channel_index; |
| 647 for (int i = 0; i < filter_length; |
| 648 ++i, pixel_byte_index += source_byte_row_stride) { |
| 649 accval += filter_values[i] * source_data[pixel_byte_index]; |
| 650 } |
| 651 |
| 652 *target_byte = BringBackTo8(accval, absolute_values); |
| 653 } |
| 654 |
| 655 for (; r < image_size.height(); |
| 656 ++r, target_byte += output_byte_row_stride) { |
| 657 int accval = 0; |
| 658 int overlap_taps = image_size.height() - r + centrepoint; |
| 659 int pixel_byte_index = (r - centrepoint) * source_byte_row_stride + |
| 660 c * input_channel_count + input_channel_index; |
| 661 int i = 0; |
| 662 for (; i < overlap_taps - 1; |
| 663 ++i, pixel_byte_index += source_byte_row_stride) { |
| 664 accval += filter_values[i] * source_data[pixel_byte_index]; |
| 665 } |
| 666 |
| 667 for (; i < filter_length; ++i) |
| 668 accval += filter_values[i] * source_data[pixel_byte_index]; |
| 669 |
| 670 *target_byte = BringBackTo8(accval, absolute_values); |
| 671 } |
| 672 } |
| 673 } |
| 674 |
481 } // namespace skia | 675 } // namespace skia |
OLD | NEW |