Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(435)

Side by Side Diff: media/filters/wsola_internals.cc

Issue 19111004: Upgrade AudioRendererAlgorithm to use WSOLA, (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Avoid malloc in every iteration by defining some member varibles. Created 7 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "media/filters/wsola_internals.h"
6
7 #include <algorithm>
8 #include <cmath>
9 #include <limits>
10
11 #include "base/logging.h"
12 #include "base/memory/scoped_ptr.h"
13 #include "media/base/audio_bus.h"
14
15 namespace media {
16
17 namespace internal {
18
19 bool InInterval(int n, Interval q) {
20 return n >= q.first && n <= q.second;
21 }
22
23 float MultiChannelSimilarityMeasure(const float* dot_prod_a_b,
24 const float* energy_a,
25 const float* energy_b,
26 int channels) {
27 const float kEpsilon = 1e-12;
28 float similarity_measure = 0;
29 for (int n = 0; n < channels; ++n) {
30 similarity_measure += dot_prod_a_b[n] / sqrt(energy_a[n] * energy_b[n] +
31 kEpsilon);
32 }
33 return similarity_measure;
34 }
35
36 void MultiChannelDotProduct(const AudioBus* a,
37 int frame_offset_a,
38 const AudioBus* b,
39 int frame_offset_b,
40 int num_frames,
41 float* dot_product) {
42 DCHECK_EQ(a->channels(), b->channels());
43 DCHECK_GE(frame_offset_a, 0);
44 DCHECK_GE(frame_offset_b, 0);
45 DCHECK_LE(frame_offset_a + num_frames, a->frames());
46 DCHECK_LE(frame_offset_b + num_frames, b->frames());
47
48 memset(dot_product, 0, sizeof(*dot_product) * a->channels());
49 for (int k = 0; k < a->channels(); ++k) {
50 const float* ch_a = a->channel(k) + frame_offset_a;
51 const float* ch_b = b->channel(k) + frame_offset_b;
52 for (int n = 0; n < num_frames; ++n) {
53 dot_product[k] += *ch_a++ * *ch_b++;
54 }
55 }
56 }
57
58 void MultiChannelMovingWindowEnergies(const AudioBus* input,
marpan 2013/08/02 17:56:54 The usage of terms: frames, blocks, and windows, m
turaj 2013/08/02 23:45:59 I tired to be more consistent. On 2013/08/02 17:5
59 int frames_per_window,
60 float* energy) {
61 int num_blocks = input->frames() - (frames_per_window - 1);
marpan 2013/08/02 17:56:54 Is it more consistent to use "num_frames" here ins
turaj 2013/08/02 23:45:59 But this is really the number of blocks. We have t
marpan 2013/08/06 17:14:10 Ok. Makes sense. You may want to add your comment
62 int channels = input->channels();
63
64 for (int k = 0; k < input->channels(); ++k) {
65 const float* input_channel = input->channel(k);
66
67 energy[k] = 0;
68
69 // First window of channel |k|.
70 for (int m = 0; m < frames_per_window; ++m)
71 energy[k] += input_channel[m] * input_channel[m];
marpan 2013/08/02 17:56:54 Easier to follow if you put { } around this one-li
turaj 2013/08/02 23:45:59 Done.
72
73 const float* slide_out = input_channel;
74 const float* slide_in = &input_channel[frames_per_window];
75 for (int n = 1; n < num_blocks; ++n, ++slide_in, ++slide_out) {
76 energy[k + n * channels] = energy[k + (n - 1) * channels] - *slide_out *
77 *slide_out + *slide_in * *slide_in;
78 }
79 }
80 }
81
82 // Fit the curve f(x) = a * x^2 + b * x + c such that
83 // f(-1) = |y[0]|
84 // f(0) = |y[1]|
85 // f(1) = |y[2]|.
86 void CubicInterpolation(const float* y_values,
87 float* extremum,
88 float* extremum_value) {
89 float a = 0.5f * (y_values[2] + y_values[0]) - y_values[1];
90 float b = 0.5f * (y_values[2] - y_values[0]);
91 float c = y_values[1];
92
93 *extremum = -b / (2.f * a);
marpan 2013/08/02 17:56:54 "a" will never be zero here because this is only c
turaj 2013/08/02 23:45:59 Right, but maybe I should consider two other cases
94 *extremum_value = a * (*extremum) * (*extremum) + b * (*extremum) + c;
95 }
96
97 int DecimatedSearch(int decimation,
98 Interval exclude_interval,
99 const AudioBus* target_block,
100 const AudioBus* search_segment,
101 const float* energy_target_block,
102 const float* energy_candid_blocks) {
103 int channels = search_segment->channels();
104 int block_size = target_block->frames();
105 int num_candid_frames = search_segment->frames() - (block_size - 1);
106 scoped_ptr<float[]> dot_prod(new float[channels]);
107 float similarity[3]; // Three elements for cubic interpolation.
108
109 int n = 0;
110 MultiChannelDotProduct(target_block, 0, search_segment, n, block_size,
111 dot_prod.get());
112 similarity[0] = MultiChannelSimilarityMeasure(
113 dot_prod.get(), energy_target_block, &energy_candid_blocks[n * channels],
114 channels);
115
116 // Set the starting point as optimal point.
117 float best_similarity = similarity[0];
118 int optimal_index = 0;
119
120 n += decimation;
121 MultiChannelDotProduct(target_block, 0, search_segment, n, block_size,
122 dot_prod.get());
123 similarity[1] = MultiChannelSimilarityMeasure(
124 dot_prod.get(), energy_target_block, &energy_candid_blocks[n * channels],
125 channels);
126
127 n += decimation;
marpan 2013/08/02 17:56:54 What if n>=num_candid_frames before entering loop,
turaj 2013/08/02 23:45:59 Although with the setting in AudioRendererAlgorith
128 for (; n < num_candid_frames; n += decimation) {
129 MultiChannelDotProduct(target_block, 0, search_segment, n, block_size,
130 dot_prod.get());
131
132 similarity[2] = MultiChannelSimilarityMeasure(
133 dot_prod.get(), energy_target_block,
134 &energy_candid_blocks[n * channels], channels);
135
136 if (similarity[1] > similarity[0] &&
137 similarity[1] > similarity[2]) {
138 // A local maximum is found. Do a cubic interpolation for a better
139 // estimate of candid maximum.
140 float normalized_candid_optimal_index;
141 float candid_best_similarity;
142 CubicInterpolation(similarity, &normalized_candid_optimal_index,
143 &candid_best_similarity);
144
145 int candid_optimal_index = n - decimation +
146 static_cast<int>(floor(normalized_candid_optimal_index * decimation +
147 0.5f));
148 if (candid_best_similarity > best_similarity &&
149 !InInterval(candid_optimal_index, exclude_interval)) {
150 optimal_index = candid_optimal_index;
151 best_similarity = candid_best_similarity;
152 }
153 } else if (n + decimation >= num_candid_frames &&
154 similarity[2] > best_similarity && !InInterval(n, exclude_interval)) {
155 // If this is the end-point and has a better similarity-measure than
156 // optimal, then we accept it as optimal point.
157 optimal_index = n;
158 best_similarity = similarity[2];
159 }
160 memmove(similarity, &similarity[1], 2 * sizeof(*similarity));
161 }
162 return optimal_index;
163 }
164
165 int PartialSearch(int lim_low,
166 int lim_high,
167 Interval exclude_interval,
168 const AudioBus* target_block,
169 const AudioBus* search_segment,
170 const float* energy_target_block,
171 const float* energy_candid_blocks) {
172 int channels = search_segment->channels();
173 int block_size = target_block->frames();
174 scoped_ptr<float[]> dot_prod(new float[channels]);
175
176 float best_similarity = std::numeric_limits<float>::min();
177 int optimal_index = 0;
178
179 for (int n = lim_low; n <= lim_high; ++n) {
180 if (InInterval(n, exclude_interval)) {
181 continue;
182 }
183 MultiChannelDotProduct(target_block, 0, search_segment, n, block_size,
184 dot_prod.get());
185
186 float similarity = MultiChannelSimilarityMeasure(
187 dot_prod.get(), energy_target_block,
188 &energy_candid_blocks[n * channels], channels);
189
190 if (similarity > best_similarity) {
191 best_similarity = similarity;
192 optimal_index = n;
193 }
194 }
195
196 return optimal_index;
197 }
198
199 int OptimalIndex(const AudioBus* search_segment,
200 const AudioBus* target_block,
201 Interval exclude_interval) {
202 int channels = search_segment->channels();
203 DCHECK(channels == target_block->channels());
204 int block_size = target_block->frames();
205 int num_candid_frames = search_segment->frames() - (block_size - 1);
marpan 2013/08/02 17:56:54 Why the subtraction of the second term?, is that a
turaj 2013/08/02 23:45:59 The total frames (samples) in search-block is all
206 const int kSearchDecimation = 5;
207
208 scoped_ptr<float[]> energy_target_frame(new float[channels]);
209 scoped_ptr<float[]> energy_candid_frames(
210 new float[channels * num_candid_frames]);
211
212 // Energy of all candid frames.
213 MultiChannelMovingWindowEnergies(search_segment, block_size,
214 energy_candid_frames.get());
215
216 // Energy of target frame.
217 MultiChannelDotProduct(target_block, 0, target_block, 0,
218 block_size, energy_target_frame.get());
219
220 int optimal_index = DecimatedSearch(kSearchDecimation,
221 exclude_interval, target_block,
222 search_segment, energy_target_frame.get(),
223 energy_candid_frames.get());
224
225 int lim_low = std::max(0, optimal_index - kSearchDecimation);
226 int lim_high = std::min(num_candid_frames - 1,
227 optimal_index + kSearchDecimation);
228 return PartialSearch(lim_low, lim_high, exclude_interval, target_block,
229 search_segment, energy_target_frame.get(),
230 energy_candid_frames.get());
231 }
232
233 void GetSymmetricHanningWindow(int window_length, float* window) {
234 const float kPi = 3.14159265f;
235 const float scale = 2.f * kPi / static_cast<float>(window_length);
236 for (int n = 0; n < window_length; ++n)
237 window[n] = 0.5 * (1 - cos(n * scale));
238 }
239
240 } // namespace internal
241
242 } // namespace media
243
244
245
246
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698