Chromium Code Reviews| Index: media/filters/wsola_internals.cc |
| diff --git a/media/filters/wsola_internals.cc b/media/filters/wsola_internals.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..27d07a05ac8eb6cd7333aef2af3479bdfeecd05d |
| --- /dev/null |
| +++ b/media/filters/wsola_internals.cc |
| @@ -0,0 +1,246 @@ |
| +// Copyright 2013 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "media/filters/wsola_internals.h" |
| + |
| +#include <algorithm> |
| +#include <cmath> |
| +#include <limits> |
| + |
| +#include "base/logging.h" |
| +#include "base/memory/scoped_ptr.h" |
| +#include "media/base/audio_bus.h" |
| + |
| +namespace media { |
| + |
| +namespace internal { |
| + |
| +bool InInterval(int n, Interval q) { |
| + return n >= q.first && n <= q.second; |
| +} |
| + |
| +float MultiChannelSimilarityMeasure(const float* dot_prod_a_b, |
| + const float* energy_a, |
| + const float* energy_b, |
| + int channels) { |
| + const float kEpsilon = 1e-12; |
| + float similarity_measure = 0; |
| + for (int n = 0; n < channels; ++n) { |
| + similarity_measure += dot_prod_a_b[n] / sqrt(energy_a[n] * energy_b[n] + |
| + kEpsilon); |
| + } |
| + return similarity_measure; |
| +} |
| + |
| +void MultiChannelDotProduct(const AudioBus* a, |
| + int frame_offset_a, |
| + const AudioBus* b, |
| + int frame_offset_b, |
| + int num_frames, |
| + float* dot_product) { |
| + DCHECK_EQ(a->channels(), b->channels()); |
| + DCHECK_GE(frame_offset_a, 0); |
| + DCHECK_GE(frame_offset_b, 0); |
| + DCHECK_LE(frame_offset_a + num_frames, a->frames()); |
| + DCHECK_LE(frame_offset_b + num_frames, b->frames()); |
| + |
| + memset(dot_product, 0, sizeof(*dot_product) * a->channels()); |
| + for (int k = 0; k < a->channels(); ++k) { |
| + const float* ch_a = a->channel(k) + frame_offset_a; |
| + const float* ch_b = b->channel(k) + frame_offset_b; |
| + for (int n = 0; n < num_frames; ++n) { |
| + dot_product[k] += *ch_a++ * *ch_b++; |
| + } |
| + } |
| +} |
| + |
| +void MultiChannelMovingWindowEnergies(const AudioBus* input, |
|
marpan
2013/08/02 17:56:54
The usage of terms: frames, blocks, and windows, m
turaj
2013/08/02 23:45:59
I tired to be more consistent.
On 2013/08/02 17:5
|
| + int frames_per_window, |
| + float* energy) { |
| + int num_blocks = input->frames() - (frames_per_window - 1); |
|
marpan
2013/08/02 17:56:54
Is it more consistent to use "num_frames" here ins
turaj
2013/08/02 23:45:59
But this is really the number of blocks. We have t
marpan
2013/08/06 17:14:10
Ok. Makes sense. You may want to add your comment
|
| + int channels = input->channels(); |
| + |
| + for (int k = 0; k < input->channels(); ++k) { |
| + const float* input_channel = input->channel(k); |
| + |
| + energy[k] = 0; |
| + |
| + // First window of channel |k|. |
| + for (int m = 0; m < frames_per_window; ++m) |
| + energy[k] += input_channel[m] * input_channel[m]; |
|
marpan
2013/08/02 17:56:54
Easier to follow if you put { } around this one-li
turaj
2013/08/02 23:45:59
Done.
|
| + |
| + const float* slide_out = input_channel; |
| + const float* slide_in = &input_channel[frames_per_window]; |
| + for (int n = 1; n < num_blocks; ++n, ++slide_in, ++slide_out) { |
| + energy[k + n * channels] = energy[k + (n - 1) * channels] - *slide_out * |
| + *slide_out + *slide_in * *slide_in; |
| + } |
| + } |
| +} |
| + |
| +// Fit the curve f(x) = a * x^2 + b * x + c such that |
| +// f(-1) = |y[0]| |
| +// f(0) = |y[1]| |
| +// f(1) = |y[2]|. |
| +void CubicInterpolation(const float* y_values, |
| + float* extremum, |
| + float* extremum_value) { |
| + float a = 0.5f * (y_values[2] + y_values[0]) - y_values[1]; |
| + float b = 0.5f * (y_values[2] - y_values[0]); |
| + float c = y_values[1]; |
| + |
| + *extremum = -b / (2.f * a); |
|
marpan
2013/08/02 17:56:54
"a" will never be zero here because this is only c
turaj
2013/08/02 23:45:59
Right, but maybe I should consider two other cases
|
| + *extremum_value = a * (*extremum) * (*extremum) + b * (*extremum) + c; |
| +} |
| + |
| +int DecimatedSearch(int decimation, |
| + Interval exclude_interval, |
| + const AudioBus* target_block, |
| + const AudioBus* search_segment, |
| + const float* energy_target_block, |
| + const float* energy_candid_blocks) { |
| + int channels = search_segment->channels(); |
| + int block_size = target_block->frames(); |
| + int num_candid_frames = search_segment->frames() - (block_size - 1); |
| + scoped_ptr<float[]> dot_prod(new float[channels]); |
| + float similarity[3]; // Three elements for cubic interpolation. |
| + |
| + int n = 0; |
| + MultiChannelDotProduct(target_block, 0, search_segment, n, block_size, |
| + dot_prod.get()); |
| + similarity[0] = MultiChannelSimilarityMeasure( |
| + dot_prod.get(), energy_target_block, &energy_candid_blocks[n * channels], |
| + channels); |
| + |
| + // Set the starting point as optimal point. |
| + float best_similarity = similarity[0]; |
| + int optimal_index = 0; |
| + |
| + n += decimation; |
| + MultiChannelDotProduct(target_block, 0, search_segment, n, block_size, |
| + dot_prod.get()); |
| + similarity[1] = MultiChannelSimilarityMeasure( |
| + dot_prod.get(), energy_target_block, &energy_candid_blocks[n * channels], |
| + channels); |
| + |
| + n += decimation; |
|
marpan
2013/08/02 17:56:54
What if n>=num_candid_frames before entering loop,
turaj
2013/08/02 23:45:59
Although with the setting in AudioRendererAlgorith
|
| + for (; n < num_candid_frames; n += decimation) { |
| + MultiChannelDotProduct(target_block, 0, search_segment, n, block_size, |
| + dot_prod.get()); |
| + |
| + similarity[2] = MultiChannelSimilarityMeasure( |
| + dot_prod.get(), energy_target_block, |
| + &energy_candid_blocks[n * channels], channels); |
| + |
| + if (similarity[1] > similarity[0] && |
| + similarity[1] > similarity[2]) { |
| + // A local maximum is found. Do a cubic interpolation for a better |
| + // estimate of candid maximum. |
| + float normalized_candid_optimal_index; |
| + float candid_best_similarity; |
| + CubicInterpolation(similarity, &normalized_candid_optimal_index, |
| + &candid_best_similarity); |
| + |
| + int candid_optimal_index = n - decimation + |
| + static_cast<int>(floor(normalized_candid_optimal_index * decimation + |
| + 0.5f)); |
| + if (candid_best_similarity > best_similarity && |
| + !InInterval(candid_optimal_index, exclude_interval)) { |
| + optimal_index = candid_optimal_index; |
| + best_similarity = candid_best_similarity; |
| + } |
| + } else if (n + decimation >= num_candid_frames && |
| + similarity[2] > best_similarity && !InInterval(n, exclude_interval)) { |
| + // If this is the end-point and has a better similarity-measure than |
| + // optimal, then we accept it as optimal point. |
| + optimal_index = n; |
| + best_similarity = similarity[2]; |
| + } |
| + memmove(similarity, &similarity[1], 2 * sizeof(*similarity)); |
| + } |
| + return optimal_index; |
| +} |
| + |
| +int PartialSearch(int lim_low, |
| + int lim_high, |
| + Interval exclude_interval, |
| + const AudioBus* target_block, |
| + const AudioBus* search_segment, |
| + const float* energy_target_block, |
| + const float* energy_candid_blocks) { |
| + int channels = search_segment->channels(); |
| + int block_size = target_block->frames(); |
| + scoped_ptr<float[]> dot_prod(new float[channels]); |
| + |
| + float best_similarity = std::numeric_limits<float>::min(); |
| + int optimal_index = 0; |
| + |
| + for (int n = lim_low; n <= lim_high; ++n) { |
| + if (InInterval(n, exclude_interval)) { |
| + continue; |
| + } |
| + MultiChannelDotProduct(target_block, 0, search_segment, n, block_size, |
| + dot_prod.get()); |
| + |
| + float similarity = MultiChannelSimilarityMeasure( |
| + dot_prod.get(), energy_target_block, |
| + &energy_candid_blocks[n * channels], channels); |
| + |
| + if (similarity > best_similarity) { |
| + best_similarity = similarity; |
| + optimal_index = n; |
| + } |
| + } |
| + |
| + return optimal_index; |
| +} |
| + |
| +int OptimalIndex(const AudioBus* search_segment, |
| + const AudioBus* target_block, |
| + Interval exclude_interval) { |
| + int channels = search_segment->channels(); |
| + DCHECK(channels == target_block->channels()); |
| + int block_size = target_block->frames(); |
| + int num_candid_frames = search_segment->frames() - (block_size - 1); |
|
marpan
2013/08/02 17:56:54
Why the subtraction of the second term?, is that a
turaj
2013/08/02 23:45:59
The total frames (samples) in search-block is all
|
| + const int kSearchDecimation = 5; |
| + |
| + scoped_ptr<float[]> energy_target_frame(new float[channels]); |
| + scoped_ptr<float[]> energy_candid_frames( |
| + new float[channels * num_candid_frames]); |
| + |
| + // Energy of all candid frames. |
| + MultiChannelMovingWindowEnergies(search_segment, block_size, |
| + energy_candid_frames.get()); |
| + |
| + // Energy of target frame. |
| + MultiChannelDotProduct(target_block, 0, target_block, 0, |
| + block_size, energy_target_frame.get()); |
| + |
| + int optimal_index = DecimatedSearch(kSearchDecimation, |
| + exclude_interval, target_block, |
| + search_segment, energy_target_frame.get(), |
| + energy_candid_frames.get()); |
| + |
| + int lim_low = std::max(0, optimal_index - kSearchDecimation); |
| + int lim_high = std::min(num_candid_frames - 1, |
| + optimal_index + kSearchDecimation); |
| + return PartialSearch(lim_low, lim_high, exclude_interval, target_block, |
| + search_segment, energy_target_frame.get(), |
| + energy_candid_frames.get()); |
| +} |
| + |
| +void GetSymmetricHanningWindow(int window_length, float* window) { |
| + const float kPi = 3.14159265f; |
| + const float scale = 2.f * kPi / static_cast<float>(window_length); |
| + for (int n = 0; n < window_length; ++n) |
| + window[n] = 0.5 * (1 - cos(n * scale)); |
| +} |
| + |
| +} // namespace internal |
| + |
| +} // namespace media |
| + |
| + |
| + |
| + |