media/filters/wsola_internals.cc - Issue 19111004: Upgrade AudioRendererAlgorithm to use WSOLA,

Unified Diff: media/filters/wsola_internals.cc

Issue 19111004: Upgrade AudioRendererAlgorithm to use WSOLA, (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Avoid malloc in every iteration by defining some member varibles. Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: media/filters/wsola_internals.cc

diff --git a/media/filters/wsola_internals.cc b/media/filters/wsola_internals.cc

new file mode 100644

index 0000000000000000000000000000000000000000..27d07a05ac8eb6cd7333aef2af3479bdfeecd05d

--- /dev/null

+++ b/media/filters/wsola_internals.cc

@@ -0,0 +1,246 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "media/filters/wsola_internals.h"

+#include <algorithm>

+#include <cmath>

+#include <limits>

+#include "base/logging.h"

+#include "base/memory/scoped_ptr.h"

+#include "media/base/audio_bus.h"

+namespace media {

+namespace internal {

+bool InInterval(int n, Interval q) {

+ return n >= q.first && n <= q.second;

+float MultiChannelSimilarityMeasure(const float* dot_prod_a_b,

+ const float* energy_a,

+ const float* energy_b,

+ int channels) {

+ const float kEpsilon = 1e-12;

+ float similarity_measure = 0;

+ for (int n = 0; n < channels; ++n) {

+ similarity_measure += dot_prod_a_b[n] / sqrt(energy_a[n] * energy_b[n] +

+ kEpsilon);

+ }

+ return similarity_measure;

+void MultiChannelDotProduct(const AudioBus* a,

+ int frame_offset_a,

+ const AudioBus* b,

+ int frame_offset_b,

+ int num_frames,

+ float* dot_product) {

+ DCHECK_EQ(a->channels(), b->channels());

+ DCHECK_GE(frame_offset_a, 0);

+ DCHECK_GE(frame_offset_b, 0);

+ DCHECK_LE(frame_offset_a + num_frames, a->frames());

+ DCHECK_LE(frame_offset_b + num_frames, b->frames());

+ memset(dot_product, 0, sizeof(*dot_product) * a->channels());

+ for (int k = 0; k < a->channels(); ++k) {

+ const float* ch_a = a->channel(k) + frame_offset_a;

+ const float* ch_b = b->channel(k) + frame_offset_b;

+ for (int n = 0; n < num_frames; ++n) {

+ dot_product[k] += *ch_a++ * *ch_b++;

+ }

+void MultiChannelMovingWindowEnergies(const AudioBus* input,

marpan 2013/08/02 17:56:54 The usage of terms: frames, blocks, and windows, m

turaj 2013/08/02 23:45:59 I tired to be more consistent. On 2013/08/02 17:5

+ int frames_per_window,

+ float* energy) {

+ int num_blocks = input->frames() - (frames_per_window - 1);

marpan 2013/08/02 17:56:54 Is it more consistent to use "num_frames" here ins

turaj 2013/08/02 23:45:59 But this is really the number of blocks. We have t

marpan 2013/08/06 17:14:10 Ok. Makes sense. You may want to add your comment

+ int channels = input->channels();

+ for (int k = 0; k < input->channels(); ++k) {

+ const float* input_channel = input->channel(k);

+ energy[k] = 0;

+ // First window of channel |k|.

+ for (int m = 0; m < frames_per_window; ++m)

+ energy[k] += input_channel[m] * input_channel[m];

marpan 2013/08/02 17:56:54 Easier to follow if you put { } around this one-li

turaj 2013/08/02 23:45:59 Done.

+ const float* slide_out = input_channel;

+ const float* slide_in = &input_channel[frames_per_window];

+ for (int n = 1; n < num_blocks; ++n, ++slide_in, ++slide_out) {

+ energy[k + n * channels] = energy[k + (n - 1) * channels] - *slide_out *

+ *slide_out + *slide_in * *slide_in;

+ }

+// Fit the curve f(x) = a * x^2 + b * x + c such that

+// f(-1) = |y[0]|

+// f(0) = |y[1]|

+// f(1) = |y[2]|.

+void CubicInterpolation(const float* y_values,

+ float* extremum,

+ float* extremum_value) {

+ float a = 0.5f * (y_values[2] + y_values[0]) - y_values[1];

+ float b = 0.5f * (y_values[2] - y_values[0]);

+ float c = y_values[1];

+ *extremum = -b / (2.f * a);

marpan 2013/08/02 17:56:54 "a" will never be zero here because this is only c

turaj 2013/08/02 23:45:59 Right, but maybe I should consider two other cases

+ *extremum_value = a * (*extremum) * (*extremum) + b * (*extremum) + c;

+int DecimatedSearch(int decimation,

+ Interval exclude_interval,

+ const AudioBus* target_block,

+ const AudioBus* search_segment,

+ const float* energy_target_block,

+ const float* energy_candid_blocks) {

+ int channels = search_segment->channels();

+ int block_size = target_block->frames();

+ int num_candid_frames = search_segment->frames() - (block_size - 1);

+ scoped_ptr<float[]> dot_prod(new float[channels]);

+ float similarity[3]; // Three elements for cubic interpolation.

+ int n = 0;

+ MultiChannelDotProduct(target_block, 0, search_segment, n, block_size,

+ dot_prod.get());

+ similarity[0] = MultiChannelSimilarityMeasure(

+ dot_prod.get(), energy_target_block, &energy_candid_blocks[n * channels],

+ channels);

+ // Set the starting point as optimal point.

+ float best_similarity = similarity[0];

+ int optimal_index = 0;

+ n += decimation;

+ MultiChannelDotProduct(target_block, 0, search_segment, n, block_size,

+ dot_prod.get());

+ similarity[1] = MultiChannelSimilarityMeasure(

+ dot_prod.get(), energy_target_block, &energy_candid_blocks[n * channels],

+ channels);

+ n += decimation;

marpan 2013/08/02 17:56:54 What if n>=num_candid_frames before entering loop,

turaj 2013/08/02 23:45:59 Although with the setting in AudioRendererAlgorith

+ for (; n < num_candid_frames; n += decimation) {

+ MultiChannelDotProduct(target_block, 0, search_segment, n, block_size,

+ dot_prod.get());

+ similarity[2] = MultiChannelSimilarityMeasure(

+ dot_prod.get(), energy_target_block,

+ &energy_candid_blocks[n * channels], channels);

+ if (similarity[1] > similarity[0] &&

+ similarity[1] > similarity[2]) {

+ // A local maximum is found. Do a cubic interpolation for a better

+ // estimate of candid maximum.

+ float normalized_candid_optimal_index;

+ float candid_best_similarity;

+ CubicInterpolation(similarity, &normalized_candid_optimal_index,

+ &candid_best_similarity);

+ int candid_optimal_index = n - decimation +

+ static_cast<int>(floor(normalized_candid_optimal_index * decimation +

+ 0.5f));

+ if (candid_best_similarity > best_similarity &&

+ !InInterval(candid_optimal_index, exclude_interval)) {

+ optimal_index = candid_optimal_index;

+ best_similarity = candid_best_similarity;

+ }

+ } else if (n + decimation >= num_candid_frames &&

+ similarity[2] > best_similarity && !InInterval(n, exclude_interval)) {

+ // If this is the end-point and has a better similarity-measure than

+ // optimal, then we accept it as optimal point.

+ optimal_index = n;

+ best_similarity = similarity[2];

+ }

+ memmove(similarity, &similarity[1], 2 * sizeof(*similarity));

+ }

+ return optimal_index;

+int PartialSearch(int lim_low,

+ int lim_high,

+ Interval exclude_interval,

+ const AudioBus* target_block,

+ const AudioBus* search_segment,

+ const float* energy_target_block,

+ const float* energy_candid_blocks) {

+ int channels = search_segment->channels();

+ int block_size = target_block->frames();

+ scoped_ptr<float[]> dot_prod(new float[channels]);

+ float best_similarity = std::numeric_limits<float>::min();

+ int optimal_index = 0;

+ for (int n = lim_low; n <= lim_high; ++n) {

+ if (InInterval(n, exclude_interval)) {

+ continue;

+ }

+ MultiChannelDotProduct(target_block, 0, search_segment, n, block_size,

+ dot_prod.get());

+ float similarity = MultiChannelSimilarityMeasure(

+ dot_prod.get(), energy_target_block,

+ &energy_candid_blocks[n * channels], channels);

+ if (similarity > best_similarity) {

+ best_similarity = similarity;

+ optimal_index = n;

+ }

+ return optimal_index;

+int OptimalIndex(const AudioBus* search_segment,

+ const AudioBus* target_block,

+ Interval exclude_interval) {

+ int channels = search_segment->channels();

+ DCHECK(channels == target_block->channels());

+ int block_size = target_block->frames();

+ int num_candid_frames = search_segment->frames() - (block_size - 1);

marpan 2013/08/02 17:56:54 Why the subtraction of the second term?, is that a

turaj 2013/08/02 23:45:59 The total frames (samples) in search-block is all

+ const int kSearchDecimation = 5;

+ scoped_ptr<float[]> energy_target_frame(new float[channels]);

+ scoped_ptr<float[]> energy_candid_frames(

+ new float[channels * num_candid_frames]);

+ // Energy of all candid frames.

+ MultiChannelMovingWindowEnergies(search_segment, block_size,

+ energy_candid_frames.get());

+ // Energy of target frame.

+ MultiChannelDotProduct(target_block, 0, target_block, 0,

+ block_size, energy_target_frame.get());

+ int optimal_index = DecimatedSearch(kSearchDecimation,

+ exclude_interval, target_block,

+ search_segment, energy_target_frame.get(),

+ energy_candid_frames.get());

+ int lim_low = std::max(0, optimal_index - kSearchDecimation);

+ int lim_high = std::min(num_candid_frames - 1,

+ optimal_index + kSearchDecimation);

+ return PartialSearch(lim_low, lim_high, exclude_interval, target_block,

+ search_segment, energy_target_frame.get(),

+ energy_candid_frames.get());

+void GetSymmetricHanningWindow(int window_length, float* window) {

+ const float kPi = 3.14159265f;

+ const float scale = 2.f * kPi / static_cast<float>(window_length);

+ for (int n = 0; n < window_length; ++n)

+ window[n] = 0.5 * (1 - cos(n * scale));

+} // namespace internal

+} // namespace media

« media/filters/wsola_internals.h ('K') | « media/filters/wsola_internals.h ('k') | media/media.gyp » ('j') | no next file with comments »