media/filters/audio_renderer_algorithm_util.cc - Issue 19111004: Upgrade AudioRendererAlgorithm to use WSOLA,

Side by Side Diff: media/filters/audio_renderer_algorithm_util.cc

Issue 19111004: Upgrade AudioRendererAlgorithm to use WSOLA, (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
	DaleCurtis 2013/07/16 00:18:40 2013 and no (c). Here and other files. 2013 and no (c). Here and other files. turaj 2013/07/29 22:09:57 Done. Show quoted text On 2013/07/16 00:18:40, DaleCurtis wrote: > 2013 and no (c). Here and other files. Done.
	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "media/filters/audio_renderer_algorithm_util.h"

	6

	7 #include <algorithm>

	8 #include <cmath>

	9 #include <limits>

	10

	11 #include "base/logging.h"

	12 #include "base/memory/scoped_ptr.h"

	13 #include "media/base/audio_bus.h"

	14

	15 namespace media {

	16

	17 bool InInterval(int n, interval q) {
	DaleCurtis 2013/07/16 00:18:40 Types should have the first letter capitalized; so Types should have the first letter capitalized; so Interval instead of interval. turaj 2013/07/29 22:09:57 Done. Show quoted text On 2013/07/16 00:18:40, DaleCurtis wrote: > Types should have the first letter capitalized; so Interval instead of interval. Done.
	18 return n >= q.first && n <= q.second;

	19 }

	20

	21 float MultiChannelSimilarityMeasure(const float* dot_prod_a_b,

	22 const float* energy_a,

	23 const float* energy_b,

	24 int channels) {

	25 float similarity_measure = 0;

	26 for (int n = 0; n < channels; ++n) {

	27 similarity_measure += dot_prod_a_b[n] / sqrt(energy_a[n] * energy_b[n] +
	DaleCurtis 2013/07/16 00:18:40 What is 1e-12 for? Additionally vector_math.h mig What is 1e-12 for? Additionally vector_math.h might be a good home for some of the vector math primitives used by these functions. turaj 2013/07/29 22:09:57 It is to prevent dividing by zero. I change it to It is to prevent dividing by zero. I change it to const. And thank for pointing to vector_math.h, but so far there are only two functions one implementing "a = a + b * scale" and "a = b * scale." I'll use them if I need such functions. On 2013/07/16 00:18:40, DaleCurtis wrote: Show quoted text > What is 1e-12 for? > > Additionally vector_math.h might be a good home for some of the vector math > primitives used by these functions.
	28 1e-12);

	29 }

	30 return similarity_measure;

	31 }

	32

	33 void MultiChannelDotProduct(const AudioBus* a,

	34 int frame_offset_a,

	35 const AudioBus* b,

	36 int frame_offset_b,

	37 int num_frames,

	38 float* dot_product) {

	39 DCHECK(a->channels() == b->channels());
	DaleCurtis 2013/07/16 00:18:40 use DCHECK_EQ, DCHECK_GE, DCHECK_LE etc, instead. use DCHECK_EQ, DCHECK_GE, DCHECK_LE etc, instead. turaj 2013/07/29 22:09:57 Done. Show quoted text On 2013/07/16 00:18:40, DaleCurtis wrote: > use DCHECK_EQ, DCHECK_GE, DCHECK_LE etc, instead. Done.
	40 DCHECK(frame_offset_a >= 0);

	41 DCHECK(frame_offset_b >= 0);

	42 DCHECK(frame_offset_a + num_frames <= a->frames());

	43 DCHECK(frame_offset_b + num_frames <= b->frames());

	44

	45 memset(dot_product, 0, sizeof(dot_product) a->channels());

	46 for (int k = 0; k < a->channels(); ++k) {
	ajm 2013/07/23 18:03:28 I suppose using i here and j in the inner loop is I suppose using i here and j in the inner loop is more natural.
	47 const float* ch_a = a->channel(k) + frame_offset_a;

	48 const float* ch_b = b->channel(k) + frame_offset_b;

	49 for (int n = 0; n < num_frames; ++n) {

	50 dot_product[k] += ch_a++ *ch_b++;
	ajm 2013/07/23 18:03:28 Any reason to prefer ch_a++ over ch_a[n]? Any reason to prefer ch_a++ over ch_a[n]?
	51 }

	52 }

	53 }

	54

	55 void MultiChannelMovingWindowEnergies(const AudioBus* input,

	56 int frames_per_window,

	57 float* energy) {

	58 int num_blocks = input->frames() - (frames_per_window - 1);

	59 int channels = input->channels();

	60

	61 for (int k = 0; k < input->channels(); ++k) {
	ajm 2013/07/23 18:03:28 Again, I think i, j, k ... is more natural. Again, I think i, j, k ... is more natural.
	62 const float* input_channel = input->channel(k);

	63

	64 energy[k] = 0;

	65 // First window of channel \|k\|.

	66 for(int m = 0; m < frames_per_window; ++m)
	ajm 2013/07/23 18:03:28 for ( for (
	67 energy[k] += input_channel[m] * input_channel[m];

	68

	69 const float* slide_out = input_channel;

	70 const float* slide_in = &input_channel[frames_per_window];

	71 for (int n = 1; n < num_blocks; ++n, ++slide_in, ++slide_out) {
	ajm 2013/07/23 18:03:28 This looks fine, but some comments explaining it c This looks fine, but some comments explaining it could be instructive.
	72 energy[k + n * channels] = energy[k + (n - 1) * channels] - slide_out

	73 slide_out + slide_in * *slide_in;

	74 }

	75 }

	76 }

	77

	78 // Fit the curve f(x) = a * x^2 + b * x + c such that

	79 // f(-1) = \|y[0]\|

	80 // f(0) = \|y[1]\|

	81 // f(1) = \|y[2]\|.

	82 void CubicInterpol(const float* y_values,

	83 float* extremum,

	84 float* extremum_value) {

	85 float a = 0.5f * (y_values[2] + y_values[0]) - y_values[1];

	86 float b = 0.5f * (y_values[2] - y_values[0]);

	87 float c = y_values[1];

	88

	89 extremum = -b / (2.f a);

	90 extremum_value = a (extremum) (extremum) + b (*extremum) + c;

	91 }

	92

	93 int DecimatedSearch(int decimation,

	94 interval exclude_interval,

	95 const AudioBus* target_block,

	96 const AudioBus* search_segment,

	97 const float* energy_target_block,

	98 const float* energy_candid_blocks) {

	99 int channels = search_segment->channels();

	100 int block_size = target_block->frames();

	101 int num_candid_frames = search_segment->frames() - (block_size - 1);

	102 scoped_ptr<float[]> dot_prod(new float[channels]);

	103 float similarity[3]; // Three elements for cubic interpolation.

	104

	105 int n = 0;

	106 MultiChannelDotProduct(target_block, 0, search_segment, n, block_size,

	107 dot_prod.get());

	108 similarity[0] = MultiChannelSimilarityMeasure(

	109 dot_prod.get(), energy_target_block, &energy_candid_blocks[n * channels],

	110 channels);

	111

	112 // Set the starting point as optimal point.

	113 float best_similarity = similarity[0];

	114 int optimal_index = 0;

	115

	116 n += decimation;

	117 MultiChannelDotProduct(target_block, 0, search_segment, n, block_size,

	118 dot_prod.get());

	119 similarity[1] = MultiChannelSimilarityMeasure(

	120 dot_prod.get(), energy_target_block, &energy_candid_blocks[n * channels],

	121 channels);

	122

	123 n += decimation;

	124 for (; n < num_candid_frames; n += decimation) {

	125 MultiChannelDotProduct(target_block, 0, search_segment, n, block_size,

	126 dot_prod.get());

	127

	128 similarity[2] = MultiChannelSimilarityMeasure(

	129 dot_prod.get(), energy_target_block,

	130 &energy_candid_blocks[n * channels], channels);

	131

	132 if (similarity[1] > similarity[0] &&

	133 similarity[1] > similarity[2]) {

	134 // A local maximum is found. Do a cubic interpolation for a better

	135 // estimate of candid maximum.

	136 float normalized_candid_optimal_index;

	137 float candid_best_similarity;

	138 CubicInterpol(similarity, &normalized_candid_optimal_index,

	139 &candid_best_similarity);

	140

	141 int candid_optimal_index = n - decimation +

	142 static_cast<int>(floor(normalized_candid_optimal_index * decimation +

	143 0.5f));

	144 if (candid_best_similarity > best_similarity &&

	145 !InInterval(candid_optimal_index, exclude_interval)) {

	146 optimal_index = candid_optimal_index;

	147 best_similarity = candid_best_similarity;

	148 }

	149 } else if (n + decimation >= num_candid_frames &&

	150 similarity[2] > best_similarity && !InInterval(n, exclude_interval)) {

	151 // If this is the end-point and has a better similarity-measure than

	152 // optimal, then we accept it as optimal point.

	153 optimal_index = n;

	154 best_similarity = similarity[2];

	155 }

	156 memmove(similarity, &similarity[1], 2 * sizeof(*similarity));

	157 }

	158 return optimal_index;

	159 }

	160

	161 int PartialSearch(int lim_low,

	162 int lim_high,

	163 interval exclude_interval,

	164 const AudioBus* target_block,

	165 const AudioBus* search_segment,

	166 const float* energy_target_block,

	167 const float* energy_candid_blocks) {

	168 int channels = search_segment->channels();

	169 int block_size = target_block->frames();

	170 scoped_ptr<float[]> dot_prod(new float[channels]);

	171

	172 float best_similarity = std::numeric_limits<float>::min();

	173 int optimal_index = 0;

	174

	175 for (int n = lim_low; n <= lim_high; ++n) {

	176 if (InInterval(n, exclude_interval)) {

	177 continue;

	178 }

	179 MultiChannelDotProduct(target_block, 0, search_segment, n, block_size,

	180 dot_prod.get());

	181

	182 float similarity = MultiChannelSimilarityMeasure(

	183 dot_prod.get(), energy_target_block,

	184 &energy_candid_blocks[n * channels], channels);

	185

	186 if (similarity > best_similarity) {

	187 best_similarity = similarity;

	188 optimal_index = n;

	189 }

	190 }

	191

	192 return optimal_index;

	193 }

	194

	195 int OptimalIndex(const AudioBus* search_segment,

	196 const AudioBus* target_block,

	197 interval exclude_interval) {

	198 int channels = search_segment->channels();

	199 DCHECK(channels == target_block->channels());

	200 int block_size = target_block->frames();

	201 int num_candid_frames = search_segment->frames() - (block_size - 1);

	202 const int kSearchDecimation = 5;

	203

	204 scoped_ptr<float[]> energy_target_frame(new float[channels]);

	205 scoped_ptr<float[]> energy_candid_frames(

	206 new float[channels * num_candid_frames]);

	207

	208 // Energy of all candid frames.

	209 MultiChannelMovingWindowEnergies(search_segment, block_size,

	210 energy_candid_frames.get());

	211

	212 // Energy of target frame.

	213 MultiChannelDotProduct(target_block, 0, target_block, 0,

	214 block_size, energy_target_frame.get());

	215

	216 int optimal_index = DecimatedSearch(kSearchDecimation,

	217 exclude_interval, target_block,

	218 search_segment, energy_target_frame.get(),

	219 energy_candid_frames.get());

	220

	221 int lim_low = std::max(0, optimal_index - kSearchDecimation);

	222 int lim_high = std::min(num_candid_frames - 1,

	223 optimal_index + kSearchDecimation);

	224 return PartialSearch(lim_low, lim_high, exclude_interval, target_block,

	225 search_segment, energy_target_frame.get(),

	226 energy_candid_frames.get());

	227 }

	228

	229 void HannSym(int window_length, float* window) {

	230 const float kPi = 3.14159265f;

	231 const float scale = 2.f * kPi / static_cast<float>(window_length);

	232 for (int n = 0; n < window_length; ++n)

	233 window[n] = 0.5 * (1 - cos(n * scale));

	234 }

	235

	236 } // namespace media

	237

	238

	239

	240

OLD	NEW

« media/filters/audio_renderer_algorithm_util.h ('K') | « media/filters/audio_renderer_algorithm_util.h ('k') | media/media.gyp » ('j') | no next file with comments »