media/filters/wsola_internals.cc - Issue 19111004: Upgrade AudioRendererAlgorithm to use WSOLA,

Side by Side Diff: media/filters/wsola_internals.cc

Issue 19111004: Upgrade AudioRendererAlgorithm to use WSOLA, (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Valgrind warning addressed, and small fixes to unittests. Created 7 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright 2013 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 // MSVC++ requires this to be set before any other includes to get M_PI.

	6 #define _USE_MATH_DEFINES

	7

	8 #include "media/filters/wsola_internals.h"

	9

	10 #include <algorithm>

	11 #include <cmath>

	12 #include <limits>

	13

	14 #include "base/logging.h"

	15 #include "base/memory/scoped_ptr.h"

	16 #include "media/base/audio_bus.h"

	17

	18 namespace media {

	19

	20 namespace internal {

	21

	22 bool InInterval(int n, Interval q) {

	23 return n >= q.first && n <= q.second;

	24 }

	25

	26 float MultiChannelSimilarityMeasure(const float* dot_prod_a_b,

	27 const float* energy_a,

	28 const float* energy_b,

	29 int channels) {

	30 const float kEpsilon = 1e-12;

	31 float similarity_measure = 0;

	32 for (int n = 0; n < channels; ++n) {

	33 similarity_measure += dot_prod_a_b[n] / sqrt(energy_a[n] * energy_b[n] +

	34 kEpsilon);

	35 }

	36 return similarity_measure;

	37 }

	38

	39 void MultiChannelDotProduct(const AudioBus* a,

	40 int frame_offset_a,

	41 const AudioBus* b,

	42 int frame_offset_b,

	43 int num_frames,

	44 float* dot_product) {

	45 DCHECK_EQ(a->channels(), b->channels());

	46 DCHECK_GE(frame_offset_a, 0);

	47 DCHECK_GE(frame_offset_b, 0);

	48 DCHECK_LE(frame_offset_a + num_frames, a->frames());

	49 DCHECK_LE(frame_offset_b + num_frames, b->frames());

	50

	51 memset(dot_product, 0, sizeof(dot_product) a->channels());

	52 for (int k = 0; k < a->channels(); ++k) {

	53 const float* ch_a = a->channel(k) + frame_offset_a;

	54 const float* ch_b = b->channel(k) + frame_offset_b;

	55 for (int n = 0; n < num_frames; ++n) {

	56 dot_product[k] += ch_a++ *ch_b++;

	57 }

	58 }

	59 }

	60

	61 void MultiChannelMovingBlockEnergies(const AudioBus* input,

	62 int frames_per_block,

	63 float* energy) {

	64 int num_blocks = input->frames() - (frames_per_block - 1);

	65 int channels = input->channels();

	66

	67 for (int k = 0; k < input->channels(); ++k) {

	68 const float* input_channel = input->channel(k);

	69

	70 energy[k] = 0;

	71

	72 // First block of channel \|k\|.

	73 for (int m = 0; m < frames_per_block; ++m) {

	74 energy[k] += input_channel[m] * input_channel[m];

	75 }

	76

	77 const float* slide_out = input_channel;

	78 const float* slide_in = input_channel + frames_per_block;

	79 for (int n = 1; n < num_blocks; ++n, ++slide_in, ++slide_out) {

	80 energy[k + n * channels] = energy[k + (n - 1) * channels] - slide_out

	81 slide_out + slide_in * *slide_in;

	82 }

	83 }

	84 }

	85

	86 // Fit the curve f(x) = a * x^2 + b * x + c such that

	87 // f(-1) = \|y[0]\|

	88 // f(0) = \|y[1]\|

	89 // f(1) = \|y[2]\|.

	90 void CubicInterpolation(const float* y_values,

	91 float* extremum,

	92 float* extremum_value) {

	93 float a = 0.5f * (y_values[2] + y_values[0]) - y_values[1];

	94 float b = 0.5f * (y_values[2] - y_values[0]);

	95 float c = y_values[1];

	96

	97 DCHECK_NE(a, 0);

	98 extremum = -b / (2.f a);

	99 extremum_value = a (extremum) (extremum) + b (*extremum) + c;

	100 }

	101

	102 int DecimatedSearch(int decimation,

	103 Interval exclude_interval,

	104 const AudioBus* target_block,

	105 const AudioBus* search_segment,

	106 const float* energy_target_block,

	107 const float* energy_candidate_blocks) {

	108 int channels = search_segment->channels();

	109 int block_size = target_block->frames();

	110 int num_candidate_blocks = search_segment->frames() - (block_size - 1);

	111 scoped_ptr<float[]> dot_prod(new float[channels]);

	112 float similarity[3]; // Three elements for cubic interpolation.

	113

	114 int n = 0;

	115 MultiChannelDotProduct(target_block, 0, search_segment, n, block_size,

	116 dot_prod.get());

	117 similarity[0] = MultiChannelSimilarityMeasure(

	118 dot_prod.get(), energy_target_block,

	119 &energy_candidate_blocks[n * channels], channels);

	120

	121 // Set the starting point as optimal point.

	122 float best_similarity = similarity[0];

	123 int optimal_index = 0;

	124

	125 n += decimation;

	126 if (n >= num_candidate_blocks) {

	127 return 0;

	128 }

	129

	130 MultiChannelDotProduct(target_block, 0, search_segment, n, block_size,

	131 dot_prod.get());

	132 similarity[1] = MultiChannelSimilarityMeasure(

	133 dot_prod.get(), energy_target_block,

	134 &energy_candidate_blocks[n * channels], channels);

	135

	136 n += decimation;

	137 if (n >= num_candidate_blocks) {

	138 // We cannot do any more sampling. Compare these two values and return the

	139 // optimal index.

	140 return similarity[1] > similarity[0] ? decimation : 0;

	141 }

	142

	143 for (; n < num_candidate_blocks; n += decimation) {

	144 MultiChannelDotProduct(target_block, 0, search_segment, n, block_size,

	145 dot_prod.get());

	146

	147 similarity[2] = MultiChannelSimilarityMeasure(

	148 dot_prod.get(), energy_target_block,

	149 &energy_candidate_blocks[n * channels], channels);

	150

	151 if ((similarity[1] > similarity[0] && similarity[1] >= similarity[2]) \|\|

	152 (similarity[1] >= similarity[0] && similarity[1] > similarity[2])) {

	153 // A local maximum is found. Do a cubic interpolation for a better

	154 // estimate of candidate maximum.

	155 float normalized_candidate_index;

	156 float candidate_similarity;

	157 CubicInterpolation(similarity, &normalized_candidate_index,

	158 &candidate_similarity);

	159

	160 int candidate_index = n - decimation + static_cast<int>(

	161 normalized_candidate_index * decimation + 0.5f);

	162 if (candidate_similarity > best_similarity &&

	163 !InInterval(candidate_index, exclude_interval)) {

	164 optimal_index = candidate_index;

	165 best_similarity = candidate_similarity;

	166 }

	167 } else if (n + decimation >= num_candidate_blocks &&

	168 similarity[2] > best_similarity &&

	169 !InInterval(n, exclude_interval)) {

	170 // If this is the end-point and has a better similarity-measure than

	171 // optimal, then we accept it as optimal point.

	172 optimal_index = n;

	173 best_similarity = similarity[2];

	174 }

	175 memmove(similarity, &similarity[1], 2 * sizeof(*similarity));

	176 }

	177 return optimal_index;

	178 }

	179

	180 int FullSearch(int low_limit,

	181 int high_limit,

	182 Interval exclude_interval,

	183 const AudioBus* target_block,

	184 const AudioBus* search_block,

	185 const float* energy_target_block,

	186 const float* energy_candidate_blocks) {

	187 int channels = search_block->channels();

	188 int block_size = target_block->frames();

	189 scoped_ptr<float[]> dot_prod(new float[channels]);

	190

	191 float best_similarity = std::numeric_limits<float>::min();

	192 int optimal_index = 0;

	193

	194 for (int n = low_limit; n <= high_limit; ++n) {

	195 if (InInterval(n, exclude_interval)) {

	196 continue;

	197 }

	198 MultiChannelDotProduct(target_block, 0, search_block, n, block_size,

	199 dot_prod.get());

	200

	201 float similarity = MultiChannelSimilarityMeasure(

	202 dot_prod.get(), energy_target_block,

	203 &energy_candidate_blocks[n * channels], channels);

	204

	205 if (similarity > best_similarity) {

	206 best_similarity = similarity;

	207 optimal_index = n;

	208 }

	209 }

	210

	211 return optimal_index;

	212 }

	213

	214 int OptimalIndex(const AudioBus* search_block,

	215 const AudioBus* target_block,

	216 Interval exclude_interval) {

	217 int channels = search_block->channels();

	218 DCHECK_EQ(channels, target_block->channels());

	219 int target_size = target_block->frames();

	220 int num_candidate_blocks = search_block->frames() - (target_size - 1);

	221

	222 // This is a compromise between complexity reduction and search accuracy. I

	223 // don't have a proof that down sample of order 5 is optimal. One can compute

	224 // a decimation factor that minimizes complexity given the size of

	225 // \|search_block\| and \|target_block\|. However, my experiments show the rate of

	226 // missing the optimal index is significant. This value is chosen

	227 // heuristically based on experiments.

	228 const int kSearchDecimation = 5;

	229

	230 scoped_ptr<float[]> energy_target_block(new float[channels]);

	231 scoped_ptr<float[]> energy_candidate_blocks(

	232 new float[channels * num_candidate_blocks]);

	233

	234 // Energy of all candid frames.

	235 MultiChannelMovingBlockEnergies(search_block, target_size,

	236 energy_candidate_blocks.get());

	237

	238 // Energy of target frame.

	239 MultiChannelDotProduct(target_block, 0, target_block, 0,

	240 target_size, energy_target_block.get());

	241

	242 int optimal_index = DecimatedSearch(kSearchDecimation,

	243 exclude_interval, target_block,

	244 search_block, energy_target_block.get(),

	245 energy_candidate_blocks.get());

	246

	247 int lim_low = std::max(0, optimal_index - kSearchDecimation);

	248 int lim_high = std::min(num_candidate_blocks - 1,

	249 optimal_index + kSearchDecimation);

	250 return FullSearch(lim_low, lim_high, exclude_interval, target_block,

	251 search_block, energy_target_block.get(),

	252 energy_candidate_blocks.get());

	253 }

	254

	255 void GetSymmetricHanningWindow(int window_length, float* window) {

	256 const float scale = 2.0f * M_PI / static_cast<float>(window_length);
	DaleCurtis 2013/08/22 22:36:36 cast should be unnecessary. cast should be unnecessary. turaj 2013/08/23 21:14:09 Done. I suppose M_PI is considered double, and the Done. I suppose M_PI is considered double, and the rhs is evaluated in double then down cast to float. Don't like it that much. On 2013/08/22 22:36:36, DaleCurtis wrote: Show quoted text > cast should be unnecessary.
	257 for (int n = 0; n < window_length; ++n)

	258 window[n] = 0.5f * (1.0f - cosf(n * scale));

	259 }

	260

	261 } // namespace internal

	262

	263 } // namespace media

	264

OLD	NEW

« media/filters/audio_renderer_algorithm_unittest.cc ('K') | « media/filters/wsola_internals.h ('k') | media/media.gyp » ('j') | no next file with comments »