Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(92)

Side by Side Diff: ui/gfx/content_analysis.cc

Issue 13947013: Complete (but inefficient) implementation of the image retargetting method. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Addressed reviewers' remarks. Created 7 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "ui/gfx/content_analysis.h"
6
7 #include <algorithm>
8 #include <cmath>
9 #include <deque>
10 #include <functional>
11 #include <limits>
12 #include <numeric>
13 #include <vector>
14
15 #include "base/logging.h"
16 #include "skia/ext/convolver.h"
17 #include "third_party/skia/include/core/SkBitmap.h"
18 #include "third_party/skia/include/core/SkSize.h"
19 #include "ui/gfx/color_analysis.h"
20
21 namespace {
22
23 template<class InputIterator, class OutputIterator, class Compare>
24 void SlidingWindowMinMax(InputIterator first,
25 InputIterator last,
26 OutputIterator output,
27 int window_size,
28 Compare cmp) {
29 typedef std::deque<std::pair<typename InputIterator::value_type, int> >
30 deque_type;
31 deque_type slider;
32 int front_tail_length = window_size / 2;
33 int i = 0;
34 DCHECK(front_tail_length < last - first);
Alexei Svitkine (slow) 2013/04/11 18:25:37 Can you use DCHECK_LT?
motek. 2013/04/12 10:50:59 Done.
35 // This min-max filter functions the way image filters do. The min/max we
36 // compute is placed in the center of the window. Thus, first we need to
37 // 'pre-load' the window with the slider with right-tail of the filter.
38 for (; first < last && i < front_tail_length; ++i, ++first)
39 slider.push_back(std::make_pair(*first, i));
40
41 for (; first < last; ++i, ++first, ++output) {
42 while (!slider.empty() && !cmp(slider.back().first, *first))
43 slider.pop_back();
44 slider.push_back(std::make_pair(*first, i));
45
46 while (slider.front().second <= i - window_size)
47 slider.pop_front();
48 *output = slider.front().first;
49 }
50
51 // Now at the tail-end we will simply need to use whatever value is left of
52 // the filter to compute the remaining front_tail_length taps in the output.
53
54 // If input shorter than window, remainder length needs to be adjusted.
55 front_tail_length = std::min(front_tail_length, i);
56 for (;front_tail_length >= 0; --front_tail_length, ++i) {
Alexei Svitkine (slow) 2013/04/11 18:25:37 Nit: Add a space after the first ;
motek. 2013/04/12 10:50:59 Done.
57 while (slider.front().second <= i - window_size)
58 slider.pop_front();
59 *output = slider.front().first;
60 }
61 }
62
63 } // namespace
64
65 namespace color_utils {
66
67 void ApplyGaussianGradientMagnitudeFilter(SkBitmap* input_bitmap,
68 float kernel_sigma) {
69 // The purpose of this function is to highlight salient
70 // (attention-attracting?) features of the image for use in image
71 // retargetting.
72 SkAutoLockPixels source_lock(*input_bitmap);
73 DCHECK(input_bitmap);
74 DCHECK(input_bitmap->getPixels());
75 DCHECK_EQ(SkBitmap::kA8_Config, input_bitmap->config());
76
77 int tail_length = static_cast<int>(4.0f * kernel_sigma + 0.5f);
78 int kernel_size = tail_length * 2 + 1;
79 float smoother_weights[kernel_size];
80 float sigmasq = kernel_sigma * kernel_sigma;
81 float kernel_sum = 1.0f;
82
83 smoother_weights[tail_length] = 1.0f;
84
85 for (int ii = 1; ii <= tail_length; ++ii) {
86 float v = std::exp(-0.5f * ii * ii / sigmasq);
87 smoother_weights[tail_length + ii] = v;
88 smoother_weights[tail_length - ii] = v;
89 kernel_sum += 2.0f * v;
90 }
91
92 for (int i = 0; i <= kernel_size; ++i)
93 smoother_weights[i] /= kernel_sum;
94
95 float gradient_weights[kernel_size];
96 gradient_weights[tail_length] = 0.0;
97 for (int ii = 1; ii <= tail_length; ++ii) {
98 float v = sigmasq * smoother_weights[tail_length + ii] / ii;
99 gradient_weights[tail_length + ii] = v;
100 gradient_weights[tail_length - ii] = -v;
101 }
102
103 skia::ConvolutionFilter1D smoothing_filter;
104 skia::ConvolutionFilter1D gradient_filter;
105 smoothing_filter.AddFilter(0, smoother_weights, kernel_size);
106 gradient_filter.AddFilter(0, gradient_weights, kernel_size);
107
108 // To perform computations we will need one intermediate buffer. It can
109 // very well be just another bitmap.
110 const SkISize image_size = SkISize::Make(input_bitmap->width(),
111 input_bitmap->height());
112 SkBitmap intermediate;
113 intermediate.setConfig(
114 input_bitmap->config(), image_size.width(), image_size.height());
115 intermediate.allocPixels();
116
117 skia::SingleChannelConvolveX1D(
118 input_bitmap->getAddr8(0, 0),
119 static_cast<int>(input_bitmap->rowBytes()),
120 0, input_bitmap->bytesPerPixel(),
121 smoothing_filter,
122 image_size,
123 intermediate.getAddr8(0, 0),
124 static_cast<int>(intermediate.rowBytes()),
125 0, intermediate.bytesPerPixel(), false);
126 skia::SingleChannelConvolveY1D(
127 intermediate.getAddr8(0, 0),
128 static_cast<int>(intermediate.rowBytes()),
129 0, intermediate.bytesPerPixel(),
130 smoothing_filter,
131 image_size,
132 input_bitmap->getAddr8(0, 0),
133 static_cast<int>(input_bitmap->rowBytes()),
134 0, input_bitmap->bytesPerPixel(), false);
135
136 // Now the gradient operator (we will need two buffers).
137 SkBitmap intermediate2;
138 intermediate2.setConfig(
139 input_bitmap->config(), image_size.width(), image_size.height());
140 intermediate2.allocPixels();
141
142 skia::SingleChannelConvolveX1D(
143 input_bitmap->getAddr8(0, 0),
144 static_cast<int>(input_bitmap->rowBytes()),
145 0, input_bitmap->bytesPerPixel(),
146 gradient_filter,
147 image_size,
148 intermediate.getAddr8(0, 0),
149 static_cast<int>(intermediate.rowBytes()),
150 0, intermediate.bytesPerPixel(), true);
151 skia::SingleChannelConvolveY1D(
152 input_bitmap->getAddr8(0, 0),
153 static_cast<int>(input_bitmap->rowBytes()),
154 0, input_bitmap->bytesPerPixel(),
155 gradient_filter,
156 image_size,
157 intermediate2.getAddr8(0, 0),
158 static_cast<int>(intermediate2.rowBytes()),
159 0, intermediate2.bytesPerPixel(), true);
160
161 uint grad_max = 0;
162 for (int r = 0; r < image_size.height(); ++r) {
163 const uint8* grad_x_row =
164 intermediate.getAddr8(0, 0) + r * intermediate.rowBytes();
165 const uint8* grad_y_row =
166 intermediate2.getAddr8(0, 0) + r * intermediate2.rowBytes();
167 for (int c = 0; c < image_size.width(); ++c) {
168 uint grad_x = grad_x_row[c];
169 uint grad_y = grad_y_row[c];
170 grad_max = std::max(grad_max, grad_x * grad_x + grad_y * grad_y);
171 }
172 }
173
174 int bit_shift = 0;
175 if (grad_max > 255)
176 bit_shift = static_cast<int>(
177 std::log10(static_cast<float>(grad_max)) / std::log10(2.0f)) - 7;
178 for (int r = 0; r < image_size.height(); ++r) {
179 const uint8* grad_x_row =
180 intermediate.getAddr8(0, 0) + r * intermediate.rowBytes();
Stephen White 2013/04/11 17:16:38 Just a suggestion, but this could be intermediate.
motek. 2013/04/12 10:50:59 Done.
181 const uint8* grad_y_row =
182 intermediate2.getAddr8(0, 0) + r * intermediate2.rowBytes();
Stephen White 2013/04/11 17:16:38 Same here.
motek. 2013/04/12 10:50:59 Done.
183 uint8* target_row =
184 input_bitmap->getAddr8(0, 0) + r * input_bitmap->rowBytes();
Stephen White 2013/04/11 17:16:38 Same here.
motek. 2013/04/12 10:50:59 Done.
185 for (int c = 0; c < image_size.width(); ++c) {
186 uint grad_x = grad_x_row[c];
187 uint grad_y = grad_y_row[c];
188 target_row[c] = (grad_x * grad_x + grad_y * grad_y) >> bit_shift;
189 }
190 }
191 }
192
193 void ExtractImageProfileInformation(const SkBitmap& input_bitmap,
194 const gfx::Rect& area,
195 const gfx::Size& target_size,
196 bool apply_log,
197 std::vector<float>* rows,
198 std::vector<float>* columns) {
199 SkAutoLockPixels source_lock(input_bitmap);
200 DCHECK(rows);
201 DCHECK(columns);
202 DCHECK(input_bitmap.getPixels());
203 DCHECK_EQ(SkBitmap::kA8_Config, input_bitmap.config());
204 DCHECK_GE(area.x(), 0);
205 DCHECK_GE(area.y(), 0);
206 DCHECK_LE(area.right(), input_bitmap.width());
207 DCHECK_LE(area.bottom(), input_bitmap.height());
208
209 // Make sure rows and columns are allocated and initialized to 0.
210 rows->resize(0);
211 columns->resize(0);
212 rows->resize(area.height(), 0);
213 columns->resize(area.width(), 0);
214
215 for (int r = 0; r < area.height(); ++r) {
216 const uint8* image_row = input_bitmap.getAddr8(0, 0) +
Stephen White 2013/04/11 17:16:38 Same here.
motek. 2013/04/12 10:50:59 Done.
217 (r + area.y()) * input_bitmap.rowBytes() +
218 area.x(); // Points to the first byte of the row in the rectangle.
219 unsigned row_sum = 0;
220 for (int c = 0; c < area.width(); ++c, ++image_row) {
221 row_sum += *image_row;
222 (*columns)[c] += *image_row;
223 }
224 (*rows)[r] = row_sum;
225 }
226
227 if (apply_log) {
228 // Generally for processing we will need to take logarithm of this data.
229 // The option not to apply it is left principally as a test seam.
230 std::vector<float>::iterator it;
231 for (it = columns->begin(); it < columns->end(); ++it)
232 *it = std::log(1.0f + *it);
233
234 for (it = rows->begin(); it < rows->end(); ++it)
235 *it = std::log(1.0f + *it);
236 }
237
238 if (!target_size.IsEmpty()) {
239 // If the target size is given, profiles should be further processed through
240 // morphological closing. The idea is to close valleys smaller than what
241 // can be seen after scaling down to avoid deforming noticable features
242 // when profiles are used.
243 // Morphological closing is defined as dilation followed by errosion. In
244 // normal-speak: sliding-window maximum followed by minimum.
245 int column_window_size = 1 + 2 *
246 static_cast<int>(0.5f * area.width() / target_size.width() + 0.5f);
247 int row_window_size = 1 + 2 *
248 static_cast<int>(0.5f * area.height() / target_size.height() + 0.5f);
249
250 // Dilate and erode each profile with the given window size.
251 if (column_window_size >= 3) {
252 SlidingWindowMinMax(columns->begin(),
253 columns->end(),
254 columns->begin(),
255 column_window_size,
256 std::greater<float>());
257 SlidingWindowMinMax(columns->begin(),
258 columns->end(),
259 columns->begin(),
260 column_window_size,
261 std::less<float>());
262 }
263
264 if (row_window_size >= 3) {
265 SlidingWindowMinMax(rows->begin(),
266 rows->end(),
267 rows->begin(),
268 row_window_size,
269 std::greater<float>());
270 SlidingWindowMinMax(rows->begin(),
271 rows->end(),
272 rows->begin(),
273 row_window_size,
274 std::less<float>());
275 }
276 }
277 }
278
279 float AutoSegmentPeaks(const std::vector<float>& input) {
280 // This is a thresholding operation based on Otsu's method.
281 std::vector<int> histogram(256, 0);
282 std::vector<float>::const_iterator it;
283
284 float value_min = std::numeric_limits<float>::max();
285 float value_max = std::numeric_limits<float>::min();
286
287 for (it = input.begin(); it < input.end(); ++it) {
288 value_min = std::min(value_min, *it);
289 value_max = std::max(value_max, *it);
290 }
291
292 if (value_max - value_min <= std::numeric_limits<float>::epsilon() * 100) {
293 // Scaling won't work and there is nothing really to segment anyway.
294 return value_min;
295 }
296
297 float value_span = value_max - value_min;
298 for (it = input.begin(); it < input.end(); ++it) {
299 float scaled_value = (*it - value_min) / value_span * 255;
300 histogram[static_cast<int>(scaled_value)] += 1;
301 }
302
303 // Otsu's method seeks to maximize variance between two classes of pixels
304 // correspondng to valleys and peaks of the profile.
305 double w1 = histogram[0]; // Total weight of the first class.
306 double t1 = 0.5 * w1;
307 double w2 = 0;
308 double t2 = 0;
309 for (size_t i = 1; i < histogram.size(); ++i) {
310 w2 += histogram[i];
311 t2 += (0.5 + i) * histogram[i];
312 }
313
314 size_t max_index = 0;
315 double m1 = t1 / w1;
316 double m2 = t2 / w2;
317 double max_variance_score = w1 * w2 * (m1 - m2) * (m1 - m2);
318 // Iterate through all possible ways of splitting the histogram.
319 for (size_t i = 1; i < histogram.size() - 1; i++) {
320 double bin_volume = (0.5 + i) * histogram[i];
321 w1 += histogram[i];
322 w2 -= histogram[i];
323 t2 -= bin_volume;
324 t1 += bin_volume;
325 m1 = t1 / w1;
326 m2 = t2 / w2;
Alexei Svitkine (slow) 2013/04/11 18:25:37 Does the algorithm ensure that w1 and w2 are never
motek. 2013/04/12 10:50:59 I believe it effectively does. First of all, if mi
327 double variance_score = w1 * w2 * (m1 - m2) * (m1 - m2);
328 if (variance_score >= max_variance_score) {
329 max_variance_score = variance_score;
330 max_index = i;
331 }
332 }
333
334 // max_index refers to the bin *after* which we need to split. The sought
335 // threshold is the centre of this bin, scaled back to the original range.
336 return value_span * (max_index + 0.5f) / 255.0f + value_min;
337 }
338
339 SkBitmap* ComputeDecimatedImage(const SkBitmap& bitmap,
340 const std::vector<bool>& rows,
341 const std::vector<bool>& columns) {
342 SkAutoLockPixels source_lock(bitmap);
343 DCHECK(bitmap.getPixels());
344 DCHECK_GT(bitmap.bytesPerPixel(), 0);
345 DCHECK_EQ(bitmap.width(), static_cast<int>(columns.size()));
346 DCHECK_EQ(bitmap.height(), static_cast<int>(rows.size()));
347
348 unsigned tgt_row_count = std::count(rows.begin(), rows.end(), true);
Alexei Svitkine (slow) 2013/04/11 18:25:37 Please don't use abbrevs; target_row_count.
motek. 2013/04/12 10:50:59 Done.
349 unsigned tgt_column_count = std::count(columns.begin(), columns.end(), true);
Alexei Svitkine (slow) 2013/04/11 18:25:37 Please don't use abbrevs; target_column_count.
motek. 2013/04/12 10:50:59 Done.
350
351 if (tgt_row_count == 0 || tgt_column_count == 0)
Alexei Svitkine (slow) 2013/04/11 18:25:37 Put the checks for |target_row_count| right after
motek. 2013/04/12 10:50:59 This isn't quite what the code does. The second ch
352 return NULL; // Not quite an error, so no DCHECK. Just return null.
353
354 if (tgt_row_count == rows.size() && tgt_column_count == columns.size())
355 return NULL; // Equivalent of the situation where the target is empty.
356
357 // Declare and allocate the target image.
358 SkBitmap target;
359 target.setConfig(bitmap.config(), tgt_column_count, tgt_row_count);
360 target.allocPixels();
361
362 int tgt_row = 0;
363 for (int r = 0; r < bitmap.height(); ++r) {
364 if (!rows[r])
365 continue; // We can just skip this one.
366 uint8* src_row =
367 static_cast<uint8*>(bitmap.getPixels()) + r * bitmap.rowBytes();
368 uint8* insertion_target =
369 static_cast<uint8*>(target.getPixels()) + tgt_row * target.rowBytes();
370 int left_copy_pixel = -1;
371 for (int c = 0; c < bitmap.width(); ++c) {
372 if (left_copy_pixel < 0 && columns[c]) {
373 left_copy_pixel = c; // Next time we will start copying from here.
374 } else if (left_copy_pixel >= 0 && !columns[c]) {
375 // This closes a fragment we want to copy. We do it now.
376 size_t bytes_to_copy = (c - left_copy_pixel) * bitmap.bytesPerPixel();
377 memcpy(insertion_target,
378 src_row + left_copy_pixel * bitmap.bytesPerPixel(),
379 bytes_to_copy);
380 left_copy_pixel = -1;
381 insertion_target += bytes_to_copy;
382 }
383 }
384 // We can still have the tail end to process here.
385 if (left_copy_pixel >= 0) {
386 size_t bytes_to_copy =
387 (bitmap.width() - left_copy_pixel) * bitmap.bytesPerPixel();
388 memcpy(insertion_target,
389 src_row + left_copy_pixel * bitmap.bytesPerPixel(),
390 bytes_to_copy);
391 }
392 tgt_row++;
393 }
394
395 return new SkBitmap(target);
396 }
397
398 SkBitmap* CreateRetargettedThumbnailImage(
399 const SkBitmap& source_bitmap,
400 const gfx::Size& target_size,
401 float kernel_sigma) {
402 // First thing we need for this method is to color-reduce the source_bitmap.
403 SkBitmap reduced_color;
404 reduced_color.setConfig(
405 SkBitmap::kA8_Config, source_bitmap.width(), source_bitmap.height());
406 reduced_color.allocPixels();
407
408 if (!ComputePrincipalComponentImage(source_bitmap, &reduced_color)) {
409 // CCIR601 luminance conversion vector.
410 gfx::Vector3dF transform(0.299f, 0.587f, 0.114f);
411 if (!ApplyColorReduction(source_bitmap, transform, true, &reduced_color)) {
412 DLOG(WARNING) << "Failed to compute luminance image from a screenshot. "
413 << "Cannot compute retargetted thumbnail.";
414 return NULL;
415 }
416 DLOG(WARNING) << "Could not compute principal color image for a thumbnail. "
417 << "Using luminance instead.";
418 }
419
420 // Turn 'color-reduced' image into the 'energy' image.
421 ApplyGaussianGradientMagnitudeFilter(&reduced_color, kernel_sigma);
422
423 // Extract vertical and horizontal projection of image features.
424 std::vector<float> row_profile;
425 std::vector<float> column_profile;
426 ExtractImageProfileInformation(reduced_color,
427 gfx::Rect(reduced_color.width(),
428 reduced_color.height()),
429 target_size,
430 true,
431 &row_profile,
432 &column_profile);
433 float threshold_rows = AutoSegmentPeaks(row_profile);
434 float threshold_columns = AutoSegmentPeaks(column_profile);
435
436 // Apply thresholding.
437 std::vector<bool> included_rows(row_profile.size(), false);
438 std::transform(row_profile.begin(),
439 row_profile.end(),
440 included_rows.begin(),
441 std::bind2nd(std::greater<float>(), threshold_rows));
442
443 std::vector<bool> included_columns(column_profile.size(), false);
444 std::transform(column_profile.begin(),
445 column_profile.end(),
446 included_columns.begin(),
447 std::bind2nd(std::greater<float>(), threshold_columns));
448
449 // Use the original image and computed inclusion vectors to create a resized
450 // image.
451 return ComputeDecimatedImage(source_bitmap, included_rows, included_columns);
452 }
453
454 } // color_utils
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698