ui/gfx/content_analysis.cc - Issue 13947013: Complete (but inefficient) implementation of the image retargetting method.

Unified Diff: ui/gfx/content_analysis.cc

Issue 13947013: Complete (but inefficient) implementation of the image retargetting method. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 7 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: ui/gfx/content_analysis.cc

diff --git a/ui/gfx/content_analysis.cc b/ui/gfx/content_analysis.cc

new file mode 100644

index 0000000000000000000000000000000000000000..62a93c39522c061512000c46fe5708153c15ef23

--- /dev/null

+++ b/ui/gfx/content_analysis.cc

@@ -0,0 +1,455 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "ui/gfx/content_analysis.h"

+#include <algorithm>

+#include <cmath>

+#include <deque>

+#include <functional>

+#include <limits>

+#include <numeric>

+#include <vector>

+#include "base/logging.h"

+#include "skia/ext/convolver.h"

+#include "third_party/skia/include/core/SkBitmap.h"

+#include "third_party/skia/include/core/SkSize.h"

+#include "ui/gfx/color_analysis.h"

+namespace {

+template<class InputIterator, class OutputIterator, class Compare>

+void SlidingWindowMinMax(InputIterator first,

+ InputIterator last,

+ OutputIterator output,

+ int window_size,

+ Compare cmp) {

+ typedef std::deque<std::pair<typename InputIterator::value_type, int> >

+ deque_type;

+ deque_type slider;

+ int front_tail_length = window_size / 2;

+ int i = 0;

+ DCHECK(front_tail_length < last - first);

+ // This min-max filter functions the way image filters do. The min/max we

+ // compute is placed in the center of the window. Thus, first we need to

+ // 'pre-load' the window with the slider with right-tail of the filter.

+ for (; first < last && i < front_tail_length; ++i, ++first)

+ slider.push_back(std::make_pair(*first, i));

+ for (; first < last; ++i, ++first, ++output) {

+ while (!slider.empty() && !cmp(slider.back().first, *first))

+ slider.pop_back();

+ slider.push_back(std::make_pair(*first, i));

+ while (slider.front().second <= i - window_size)

+ slider.pop_front();

+ *output = slider.front().first;

+ }

+ // Now at the tail-end we will simply need to use whatever value is left of

+ // the filter to compute the remaining front_tail_length taps in the output.

+ // If input shorter than window, remainder length needs to be adjusted.

+ front_tail_length = std::min(front_tail_length, i);

+ for (;front_tail_length >= 0; --front_tail_length, ++i) {

+ while (slider.front().second <= i - window_size)

+ slider.pop_front();

+ *output = slider.front().first;

+ }

+} // namespace

+namespace color_utils {

+void ApplyGaussianGradientMagnitudeFilter(SkBitmap* input_bitmap,

+ float kernel_sigma) {

+ // The purpose of this function is to highlight salient

+ // (attention-attracting?) features of the image for use in image

+ // retargetting.

+ SkAutoLockPixels source_lock(*input_bitmap);

+ DCHECK(input_bitmap);

+ DCHECK(input_bitmap->getPixels());

+ DCHECK_EQ(SkBitmap::kA8_Config, input_bitmap->config());

+ int tail_length = static_cast<int>(4.0f * kernel_sigma + 0.5f);

+ int kernel_size = tail_length * 2 + 1;

+ float smoother_weights[kernel_size];

+ float sigmasq = kernel_sigma * kernel_sigma;

+ float kernel_sum = 1.0f;

+ smoother_weights[tail_length] = 1.0f;

+ for (int ii = 1; ii <= tail_length; ++ii) {

+ float v = std::exp(-0.5f * ii * ii / sigmasq);

+ smoother_weights[tail_length + ii] = v;

+ smoother_weights[tail_length - ii] = v;

+ kernel_sum += 2.0f * v;

+ }

+ for (int i = 0; i <= kernel_size; ++i)

+ smoother_weights[i] /= kernel_sum;

+ float gradient_weights[kernel_size];

+ gradient_weights[tail_length] = 0.0;

+ for (int ii = 1; ii <= tail_length; ++ii) {

+ float v = sigmasq * smoother_weights[tail_length + ii] / ii;

+ gradient_weights[tail_length + ii] = v;

+ gradient_weights[tail_length - ii] = -v;

+ }

+ skia::ConvolutionFilter1D smoothing_filter;

+ skia::ConvolutionFilter1D gradient_filter;

+ smoothing_filter.AddFilter(0, smoother_weights, kernel_size);

+ gradient_filter.AddFilter(0, gradient_weights, kernel_size);

+ // To perform computations we will need one intermediate buffer. It can

+ // very well be just another bitmap.

+ const SkISize image_size = SkISize::Make(input_bitmap->width(),

+ input_bitmap->height());

+ SkBitmap intermediate;

+ intermediate.setConfig(

+ input_bitmap->config(), image_size.width(), image_size.height());

+ intermediate.allocPixels();

+ skia::SingleChannelConvolve1D_X(

+ static_cast<const uint8*>(input_bitmap->getPixels()),

Stephen White 2013/04/10 15:20:31 Suggestion: use input_bitmap->getAddr8(0, 0) to h

motek. 2013/04/11 16:18:38 Done.

+ static_cast<int>(input_bitmap->rowBytes()),

+ 0, input_bitmap->bytesPerPixel(),

+ smoothing_filter,

+ image_size,

+ static_cast<uint8*>(intermediate.getPixels()),

+ static_cast<int>(intermediate.rowBytes()),

+ 0, intermediate.bytesPerPixel(), false);

+ skia::SingleChannelConvolve1D_Y(

+ static_cast<const uint8*>(intermediate.getPixels()),

+ static_cast<int>(intermediate.rowBytes()),

+ 0, intermediate.bytesPerPixel(),

+ smoothing_filter,

+ image_size,

+ static_cast<uint8*>(input_bitmap->getPixels()),

+ static_cast<int>(input_bitmap->rowBytes()),

+ 0, input_bitmap->bytesPerPixel(), false);

+ // Now the gradient operator (we will need two buffers).

+ SkBitmap intermediate2;

+ intermediate2.setConfig(

+ input_bitmap->config(), image_size.width(), image_size.height());

+ intermediate2.allocPixels();

+ skia::SingleChannelConvolve1D_X(

+ static_cast<const uint8*>(input_bitmap->getPixels()),

+ static_cast<int>(input_bitmap->rowBytes()),

+ 0, input_bitmap->bytesPerPixel(),

+ gradient_filter,

+ image_size,

+ static_cast<uint8*>(intermediate.getPixels()),

+ static_cast<int>(intermediate.rowBytes()),

+ 0, intermediate.bytesPerPixel(), true);

+ skia::SingleChannelConvolve1D_Y(

+ static_cast<const uint8*>(input_bitmap->getPixels()),

+ static_cast<int>(input_bitmap->rowBytes()),

+ 0, input_bitmap->bytesPerPixel(),

+ gradient_filter,

+ image_size,

+ static_cast<uint8*>(intermediate2.getPixels()),

+ static_cast<int>(intermediate2.rowBytes()),

+ 0, intermediate2.bytesPerPixel(), true);

+ uint grad_max = 0;

+ for (int r = 0; r < image_size.height(); ++r) {

+ const uint8* grad_x_row = static_cast<const uint8*>(

+ intermediate.getPixels()) + r * intermediate.rowBytes();

+ const uint8* grad_y_row = static_cast<const uint8*>(

+ intermediate2.getPixels()) + r * intermediate2.rowBytes();

+ for (int c = 0; c < image_size.width(); ++c) {

+ uint grad_x = grad_x_row[c];

+ uint grad_y = grad_y_row[c];

+ grad_max = std::max(grad_max, grad_x * grad_x + grad_y * grad_y);

+ }

+ int bit_shift = 0;

+ if (grad_max > 255)

+ bit_shift = static_cast<int>(

+ std::log10(static_cast<float>(grad_max)) / std::log10(2.0f)) - 7;

Stephen White 2013/04/10 15:20:31 Not that it really matters, but these could probab

motek. 2013/04/11 16:18:38 They certainly could. I need log2 which is not ava

+ for (int r = 0; r < image_size.height(); ++r) {

+ const uint8* grad_x_row = static_cast<const uint8*>(

+ intermediate.getPixels()) + r * intermediate.rowBytes();

Stephen White 2013/04/10 15:20:31 intermediate.getAddr8(0, r) should work here, and

motek. 2013/04/11 16:18:38 Done.

+ const uint8* grad_y_row = static_cast<const uint8*>(

+ intermediate2.getPixels()) + r * intermediate2.rowBytes();

+ uint8* target_row = static_cast<uint8*>(

+ input_bitmap->getPixels()) + r * input_bitmap->rowBytes();

+ for (int c = 0; c < image_size.width(); ++c) {

+ uint grad_x = grad_x_row[c];

+ uint grad_y = grad_y_row[c];

+ target_row[c] = (grad_x * grad_x + grad_y * grad_y) >> bit_shift;

+ }

+void ExtractImageProfileInformation(const SkBitmap& input_bitmap,

+ const gfx::Rect& area,

+ const gfx::Size& target_size,

+ bool apply_log,

+ std::vector<float>* rows,

+ std::vector<float>* columns) {

+ SkAutoLockPixels source_lock(input_bitmap);

+ DCHECK(rows);

+ DCHECK(columns);

+ DCHECK(input_bitmap.getPixels());

+ DCHECK_EQ(SkBitmap::kA8_Config, input_bitmap.config());

+ DCHECK_GE(area.x(), 0);

+ DCHECK_GE(area.y(), 0);

+ DCHECK_LE(area.right(), input_bitmap.width());

+ DCHECK_LE(area.bottom(), input_bitmap.height());

+ // Make sure rows and columns are allocated and initialized to 0.

+ rows->resize(0);

+ columns->resize(0);

+ rows->resize(area.height(), 0);

+ columns->resize(area.width(), 0);

+ for (int r = 0; r < area.height(); ++r) {

+ const uint8* image_row = static_cast<const uint8*>(

+ input_bitmap.getPixels()) +

+ (r + area.y()) * input_bitmap.rowBytes() +

+ area.x(); // Points to the first byte of the row in the rectangle.

+ unsigned row_sum = 0;

+ for (int c = 0; c < area.width(); ++c, ++image_row) {

+ row_sum += *image_row;

+ (*columns)[c] += *image_row;

+ }

+ (*rows)[r] = row_sum;

+ }

+ if (apply_log) {

+ // Generally for processing we will need to take logarithm of this data.

+ // The option not to apply it is left principally as a test seam.

+ std::vector<float>::iterator it;

+ for (it = columns->begin(); it < columns->end(); ++it)

+ *it = std::log(1.0f + *it);

+ for (it = rows->begin(); it < rows->end(); ++it)

+ *it = std::log(1.0f + *it);

+ }

+ if (!target_size.IsEmpty()) {

+ // If the target size is given, profiles should be further processed through

+ // morphological closing. The idea is to close valleys smaller than what

+ // can be seen after scaling down to avoid deforming noticable features

+ // when profiles are used.

+ // Morphological closing is defined as dilation followed by errosion. In

+ // normal-speak: sliding-window maximum followed by minimum.

+ int column_window_size = 1 +

+ 2 * static_cast<int>(0.5f * area.width() / target_size.width() + 0.5);

Stephen White 2013/04/10 15:20:31 0.5 should probably be 0.5f, unless you really nee

motek. 2013/04/11 16:18:38 Done.

+ int row_window_size = 1 +

+ 2 * static_cast<int>(0.5f * area.height() / target_size.height() + 0.5);

Stephen White 2013/04/10 15:20:31 Same here.

motek. 2013/04/11 16:18:38 Done.

+ // Dilate and erode each profile with the given window size.

+ if (column_window_size >= 3) {

+ SlidingWindowMinMax(columns->begin(),

+ columns->end(),

+ columns->begin(),

+ column_window_size,

+ std::greater<float>());

+ SlidingWindowMinMax(columns->begin(),

+ columns->end(),

+ columns->begin(),

+ column_window_size,

+ std::less<float>());

+ }

+ if (row_window_size >= 3) {

+ SlidingWindowMinMax(rows->begin(),

+ rows->end(),

+ rows->begin(),

+ row_window_size,

+ std::greater<float>());

+ SlidingWindowMinMax(rows->begin(),

+ rows->end(),

+ rows->begin(),

+ row_window_size,

+ std::less<float>());

+ }

+float AutoSegmentPeaks(const std::vector<float>& input) {

+ // This is a thresholding operation based on Otsu's method.

+ std::vector<int> histogram(256, 0);

+ std::vector<float>::const_iterator it;

+ float value_min = std::numeric_limits<float>::max();

+ float value_max = std::numeric_limits<float>::min();

+ for (it = input.begin(); it < input.end(); ++it) {

Alexei Svitkine (slow) 2013/04/09 18:02:23 Be consistent whether you use iterators or indices

motek. 2013/04/11 16:18:38 I think I am consistent. I use iterators everywher

+ value_min = std::min(value_min, *it);

+ value_max = std::max(value_max, *it);

+ }

+ if (value_max - value_min <= std::numeric_limits<float>::epsilon() * 100) {

+ // Scaling won't work and there is nothing really to segment anyway.

+ return value_min;

+ }

+ float value_span = value_max - value_min;

+ for (it = input.begin(); it < input.end(); ++it) {

+ float scaled_value = (*it - value_min) / value_span * 255;

+ histogram[static_cast<int>(scaled_value)] += 1;

+ }

+ // Otsu's method seeks to maximize variance between two classes of pixels

+ // correspondng to valleys and peaks of the profile.

+ double w1 = histogram[0]; // Total weight of the first class.

+ double t1 = 0.5 * w1;

+ double w2 = 0;

+ double t2 = 0;

+ for (unsigned i = 1; i < histogram.size(); ++i) {

Alexei Svitkine (slow) 2013/04/09 18:02:23 Use size_t, here and throughout for the indices.

motek. 2013/04/11 16:18:38 Done.

+ w2 += histogram[i];

+ t2 += (0.5 + i) * histogram[i];

+ }

+ int max_index = 0;

+ double m1 = t1 / w1;

+ double m2 = t2 / w2;

+ double max_variance_score = w1 * w2 * (m1 - m2) * (m1 - m2);

+ // Iterate through all possible ways of splitting the histogram.

+ for (unsigned i = 1; i < histogram.size() - 1; i++) {

+ double bin_volume = (0.5 + i) * histogram[i];

+ w1 += histogram[i];

+ w2 -= histogram[i];

+ t2 -= bin_volume;

+ t1 += bin_volume;

+ m1 = t1 / w1;

+ m2 = t2 / w2;

+ double variance_score = w1 * w2 * (m1 - m2) * (m1 - m2);

+ if (variance_score >= max_variance_score) {

+ max_variance_score = variance_score;

+ max_index = i;

+ }

+ // max_index refers to the bin *after* which we need to split. The sought

+ // threshold is the centre of this bin, scaled back to the original range.

+ return value_span * (max_index + 0.5f) / 255.0f + value_min;

+SkBitmap* ComputeDecimatedImage(const SkBitmap& bitmap,

+ const std::vector<bool>& rows,

+ const std::vector<bool>& columns) {

+ SkAutoLockPixels source_lock(bitmap);

+ DCHECK(bitmap.getPixels());

+ DCHECK_GT(bitmap.bytesPerPixel(), 0);

+ DCHECK_EQ(bitmap.width(), static_cast<int>(columns.size()));

+ DCHECK_EQ(bitmap.height(), static_cast<int>(rows.size()));

+ unsigned tgt_row_count = std::count(rows.begin(), rows.end(), true);

+ unsigned tgt_column_count = std::count(columns.begin(), columns.end(), true);

+ if (tgt_row_count == 0 || tgt_column_count == 0)

+ return NULL; // Not quite an error, so no DCHECK. Just return null.

+ if (tgt_row_count == rows.size() && tgt_column_count == columns.size())

+ return NULL; // Equivalent of the situation where the target is empty.

+ // Declare and allocate the target image.

+ SkBitmap target;

+ target.setConfig(bitmap.config(), tgt_column_count, tgt_row_count);

+ target.allocPixels();

+ int tgt_row = 0;

+ for (int r = 0; r < bitmap.height(); ++r) {

+ if (!rows[r])

+ continue; // We can just skip this one.

+ uint8* src_row =

+ static_cast<uint8*>(bitmap.getPixels()) + r * bitmap.rowBytes();

+ uint8* insertion_target =

+ static_cast<uint8*>(target.getPixels()) + tgt_row * target.rowBytes();

+ int left_copy_pixel = -1;

+ for (int c = 0; c < bitmap.width(); ++c) {

+ if (left_copy_pixel < 0 && columns[c]) {

+ left_copy_pixel = c; // Next time we will start copying from here.

+ } else if (left_copy_pixel >= 0 && !columns[c]) {

+ // This closes a fragment we want to copy. We do it now.

+ size_t bytes_to_copy = (c - left_copy_pixel) * bitmap.bytesPerPixel();

+ memcpy(insertion_target,

+ src_row + left_copy_pixel * bitmap.bytesPerPixel(),

+ bytes_to_copy);

+ left_copy_pixel = -1;

+ insertion_target += bytes_to_copy;

+ }

+ // We can still have the tail end to process here.

+ if (left_copy_pixel >= 0) {

+ size_t bytes_to_copy =

+ (bitmap.width() - left_copy_pixel) * bitmap.bytesPerPixel();

+ memcpy(insertion_target,

+ src_row + left_copy_pixel * bitmap.bytesPerPixel(),

+ bytes_to_copy);

+ }

+ tgt_row++;

+ }

+ return new SkBitmap(target);

+SkBitmap* CreateRetargettedThumbnailImage(

+ const SkBitmap& source_bitmap,

+ const gfx::Size& target_size,

+ float kernel_sigma) {

+ // First thing we need for this method is to color-reduce the source_bitmap.

+ SkBitmap reduced_color;

+ reduced_color.setConfig(

+ SkBitmap::kA8_Config, source_bitmap.width(), source_bitmap.height());

+ reduced_color.allocPixels();

+ if (!ComputePrincipalComponentImage(source_bitmap, &reduced_color)) {

+ // CCIR601 luminance conversion vector.

+ gfx::Vector3dF transform(0.299f, 0.587f, 0.114f);

+ if (!ApplyColorReduction(source_bitmap, transform, true, &reduced_color)) {

+ DLOG(WARNING) << "Failed to compute luminance image from a screenshot. "

+ << "Cannot compute retargetted thumbnail.";

+ return NULL;

+ }

+ DLOG(WARNING) << "Could not compute principal color image for a thumbnail. "

+ << "Using luminance instead.";

+ }

+ // Turn 'color-reduced' image into the 'energy' image.

+ ApplyGaussianGradientMagnitudeFilter(&reduced_color, kernel_sigma);

+ // Extract vertical and horizontal projection of image features.

+ std::vector<float> row_profile;

+ std::vector<float> column_profile;

+ ExtractImageProfileInformation(reduced_color,

+ gfx::Rect(reduced_color.width(),

+ reduced_color.height()),

+ target_size,

+ true,

+ &row_profile,

+ &column_profile);

+ float threshold_rows = AutoSegmentPeaks(row_profile);

+ float threshold_columns = AutoSegmentPeaks(column_profile);

+ // Apply thresholding.

+ std::vector<bool> included_rows(row_profile.size(), false);

+ std::transform(row_profile.begin(),

+ row_profile.end(),

+ included_rows.begin(),

+ std::bind2nd(std::greater<float>(), threshold_rows));

+ std::vector<bool> included_columns(column_profile.size(), false);

+ std::transform(column_profile.begin(),

+ column_profile.end(),

+ included_columns.begin(),

+ std::bind2nd(std::greater<float>(), threshold_columns));

+ // Use the original image and computed inclusion vectors to create a resized

+ // image.

+ return ComputeDecimatedImage(source_bitmap, included_rows, included_columns);

+} // color_utils

« skia/ext/convolver.cc ('K') | « ui/gfx/content_analysis.h ('k') | ui/gfx/content_analysis_unittest.cc » ('j') | ui/gfx/content_analysis_unittest.cc » ('J')