OLD | NEW |
| (Empty) |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "media/cast/audio_sender/audio_encoder.h" | |
6 | |
7 #include <algorithm> | |
8 | |
9 #include "base/bind.h" | |
10 #include "base/bind_helpers.h" | |
11 #include "base/location.h" | |
12 #include "base/stl_util.h" | |
13 #include "base/sys_byteorder.h" | |
14 #include "base/time/time.h" | |
15 #include "media/base/audio_bus.h" | |
16 #include "media/cast/cast_defines.h" | |
17 #include "media/cast/cast_environment.h" | |
18 #include "third_party/opus/src/include/opus.h" | |
19 | |
20 namespace media { | |
21 namespace cast { | |
22 | |
23 namespace { | |
24 | |
25 // The fixed number of audio frames per second and, inversely, the duration of | |
26 // one frame's worth of samples. | |
27 const int kFramesPerSecond = 100; | |
28 const int kFrameDurationMillis = 1000 / kFramesPerSecond; // No remainder! | |
29 | |
30 // Threshold used to decide whether audio being delivered to the encoder is | |
31 // coming in too slow with respect to the capture timestamps. | |
32 const int kUnderrunThresholdMillis = 3 * kFrameDurationMillis; | |
33 | |
34 } // namespace | |
35 | |
36 | |
37 // Base class that handles the common problem of feeding one or more AudioBus' | |
38 // data into a buffer and then, once the buffer is full, encoding the signal and | |
39 // emitting an EncodedFrame via the FrameEncodedCallback. | |
40 // | |
41 // Subclasses complete the implementation by handling the actual encoding | |
42 // details. | |
43 class AudioEncoder::ImplBase | |
44 : public base::RefCountedThreadSafe<AudioEncoder::ImplBase> { | |
45 public: | |
46 ImplBase(const scoped_refptr<CastEnvironment>& cast_environment, | |
47 transport::Codec codec, | |
48 int num_channels, | |
49 int sampling_rate, | |
50 const FrameEncodedCallback& callback) | |
51 : cast_environment_(cast_environment), | |
52 codec_(codec), | |
53 num_channels_(num_channels), | |
54 samples_per_frame_(sampling_rate / kFramesPerSecond), | |
55 callback_(callback), | |
56 cast_initialization_status_(STATUS_AUDIO_UNINITIALIZED), | |
57 buffer_fill_end_(0), | |
58 frame_id_(0), | |
59 frame_rtp_timestamp_(0) { | |
60 // Support for max sampling rate of 48KHz, 2 channels, 100 ms duration. | |
61 const int kMaxSamplesTimesChannelsPerFrame = 48 * 2 * 100; | |
62 if (num_channels_ <= 0 || samples_per_frame_ <= 0 || | |
63 sampling_rate % kFramesPerSecond != 0 || | |
64 samples_per_frame_ * num_channels_ > kMaxSamplesTimesChannelsPerFrame) { | |
65 cast_initialization_status_ = STATUS_INVALID_AUDIO_CONFIGURATION; | |
66 } | |
67 } | |
68 | |
69 CastInitializationStatus InitializationResult() const { | |
70 return cast_initialization_status_; | |
71 } | |
72 | |
73 void EncodeAudio(scoped_ptr<AudioBus> audio_bus, | |
74 const base::TimeTicks& recorded_time) { | |
75 DCHECK_EQ(cast_initialization_status_, STATUS_AUDIO_INITIALIZED); | |
76 DCHECK(!recorded_time.is_null()); | |
77 | |
78 // Determine whether |recorded_time| is consistent with the amount of audio | |
79 // data having been processed in the past. Resolve the underrun problem by | |
80 // dropping data from the internal buffer and skipping ahead the next | |
81 // frame's RTP timestamp by the estimated number of frames missed. On the | |
82 // other hand, don't attempt to resolve overruns: A receiver should | |
83 // gracefully deal with an excess of audio data. | |
84 const base::TimeDelta frame_duration = | |
85 base::TimeDelta::FromMilliseconds(kFrameDurationMillis); | |
86 base::TimeDelta buffer_fill_duration = | |
87 buffer_fill_end_ * frame_duration / samples_per_frame_; | |
88 if (!frame_capture_time_.is_null()) { | |
89 const base::TimeDelta amount_ahead_by = | |
90 recorded_time - (frame_capture_time_ + buffer_fill_duration); | |
91 if (amount_ahead_by > | |
92 base::TimeDelta::FromMilliseconds(kUnderrunThresholdMillis)) { | |
93 buffer_fill_end_ = 0; | |
94 buffer_fill_duration = base::TimeDelta(); | |
95 const int64 num_frames_missed = amount_ahead_by / | |
96 base::TimeDelta::FromMilliseconds(kFrameDurationMillis); | |
97 frame_rtp_timestamp_ += | |
98 static_cast<uint32>(num_frames_missed * samples_per_frame_); | |
99 DVLOG(1) << "Skipping RTP timestamp ahead to account for " | |
100 << num_frames_missed * samples_per_frame_ | |
101 << " samples' worth of underrun."; | |
102 } | |
103 } | |
104 frame_capture_time_ = recorded_time - buffer_fill_duration; | |
105 | |
106 // Encode all audio in |audio_bus| into zero or more frames. | |
107 int src_pos = 0; | |
108 while (src_pos < audio_bus->frames()) { | |
109 const int num_samples_to_xfer = std::min( | |
110 samples_per_frame_ - buffer_fill_end_, audio_bus->frames() - src_pos); | |
111 DCHECK_EQ(audio_bus->channels(), num_channels_); | |
112 TransferSamplesIntoBuffer( | |
113 audio_bus.get(), src_pos, buffer_fill_end_, num_samples_to_xfer); | |
114 src_pos += num_samples_to_xfer; | |
115 buffer_fill_end_ += num_samples_to_xfer; | |
116 | |
117 if (buffer_fill_end_ < samples_per_frame_) | |
118 break; | |
119 | |
120 scoped_ptr<transport::EncodedFrame> audio_frame( | |
121 new transport::EncodedFrame()); | |
122 audio_frame->dependency = transport::EncodedFrame::KEY; | |
123 audio_frame->frame_id = frame_id_; | |
124 audio_frame->referenced_frame_id = frame_id_; | |
125 audio_frame->rtp_timestamp = frame_rtp_timestamp_; | |
126 audio_frame->reference_time = frame_capture_time_; | |
127 | |
128 if (EncodeFromFilledBuffer(&audio_frame->data)) { | |
129 cast_environment_->PostTask( | |
130 CastEnvironment::MAIN, | |
131 FROM_HERE, | |
132 base::Bind(callback_, base::Passed(&audio_frame))); | |
133 } | |
134 | |
135 // Reset the internal buffer, frame ID, and timestamps for the next frame. | |
136 buffer_fill_end_ = 0; | |
137 ++frame_id_; | |
138 frame_rtp_timestamp_ += samples_per_frame_; | |
139 frame_capture_time_ += frame_duration; | |
140 } | |
141 } | |
142 | |
143 protected: | |
144 friend class base::RefCountedThreadSafe<ImplBase>; | |
145 virtual ~ImplBase() {} | |
146 | |
147 virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus, | |
148 int source_offset, | |
149 int buffer_fill_offset, | |
150 int num_samples) = 0; | |
151 virtual bool EncodeFromFilledBuffer(std::string* out) = 0; | |
152 | |
153 const scoped_refptr<CastEnvironment> cast_environment_; | |
154 const transport::Codec codec_; | |
155 const int num_channels_; | |
156 const int samples_per_frame_; | |
157 const FrameEncodedCallback callback_; | |
158 | |
159 // Subclass' ctor is expected to set this to STATUS_AUDIO_INITIALIZED. | |
160 CastInitializationStatus cast_initialization_status_; | |
161 | |
162 private: | |
163 // In the case where a call to EncodeAudio() cannot completely fill the | |
164 // buffer, this points to the position at which to populate data in a later | |
165 // call. | |
166 int buffer_fill_end_; | |
167 | |
168 // A counter used to label EncodedFrames. | |
169 uint32 frame_id_; | |
170 | |
171 // The RTP timestamp for the next frame of encoded audio. This is defined as | |
172 // the number of audio samples encoded so far, plus the estimated number of | |
173 // samples that were missed due to data underruns. A receiver uses this value | |
174 // to detect gaps in the audio signal data being provided. Per the spec, RTP | |
175 // timestamp values are allowed to overflow and roll around past zero. | |
176 uint32 frame_rtp_timestamp_; | |
177 | |
178 // The local system time associated with the start of the next frame of | |
179 // encoded audio. This value is passed on to a receiver as a reference clock | |
180 // timestamp for the purposes of synchronizing audio and video. Its | |
181 // progression is expected to drift relative to the elapsed time implied by | |
182 // the RTP timestamps. | |
183 base::TimeTicks frame_capture_time_; | |
184 | |
185 DISALLOW_COPY_AND_ASSIGN(ImplBase); | |
186 }; | |
187 | |
188 class AudioEncoder::OpusImpl : public AudioEncoder::ImplBase { | |
189 public: | |
190 OpusImpl(const scoped_refptr<CastEnvironment>& cast_environment, | |
191 int num_channels, | |
192 int sampling_rate, | |
193 int bitrate, | |
194 const FrameEncodedCallback& callback) | |
195 : ImplBase(cast_environment, | |
196 transport::CODEC_AUDIO_OPUS, | |
197 num_channels, | |
198 sampling_rate, | |
199 callback), | |
200 encoder_memory_(new uint8[opus_encoder_get_size(num_channels)]), | |
201 opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())), | |
202 buffer_(new float[num_channels * samples_per_frame_]) { | |
203 if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED) | |
204 return; | |
205 if (opus_encoder_init(opus_encoder_, | |
206 sampling_rate, | |
207 num_channels, | |
208 OPUS_APPLICATION_AUDIO) != OPUS_OK) { | |
209 ImplBase::cast_initialization_status_ = | |
210 STATUS_INVALID_AUDIO_CONFIGURATION; | |
211 return; | |
212 } | |
213 ImplBase::cast_initialization_status_ = STATUS_AUDIO_INITIALIZED; | |
214 | |
215 if (bitrate <= 0) { | |
216 // Note: As of 2013-10-31, the encoder in "auto bitrate" mode would use a | |
217 // variable bitrate up to 102kbps for 2-channel, 48 kHz audio and a 10 ms | |
218 // frame size. The opus library authors may, of course, adjust this in | |
219 // later versions. | |
220 bitrate = OPUS_AUTO; | |
221 } | |
222 CHECK_EQ(opus_encoder_ctl(opus_encoder_, OPUS_SET_BITRATE(bitrate)), | |
223 OPUS_OK); | |
224 } | |
225 | |
226 private: | |
227 virtual ~OpusImpl() {} | |
228 | |
229 virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus, | |
230 int source_offset, | |
231 int buffer_fill_offset, | |
232 int num_samples) OVERRIDE { | |
233 // Opus requires channel-interleaved samples in a single array. | |
234 for (int ch = 0; ch < audio_bus->channels(); ++ch) { | |
235 const float* src = audio_bus->channel(ch) + source_offset; | |
236 const float* const src_end = src + num_samples; | |
237 float* dest = buffer_.get() + buffer_fill_offset * num_channels_ + ch; | |
238 for (; src < src_end; ++src, dest += num_channels_) | |
239 *dest = *src; | |
240 } | |
241 } | |
242 | |
243 virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE { | |
244 out->resize(kOpusMaxPayloadSize); | |
245 const opus_int32 result = | |
246 opus_encode_float(opus_encoder_, | |
247 buffer_.get(), | |
248 samples_per_frame_, | |
249 reinterpret_cast<uint8*>(string_as_array(out)), | |
250 kOpusMaxPayloadSize); | |
251 if (result > 1) { | |
252 out->resize(result); | |
253 return true; | |
254 } else if (result < 0) { | |
255 LOG(ERROR) << "Error code from opus_encode_float(): " << result; | |
256 return false; | |
257 } else { | |
258 // Do nothing: The documentation says that a return value of zero or | |
259 // one byte means the packet does not need to be transmitted. | |
260 return false; | |
261 } | |
262 } | |
263 | |
264 const scoped_ptr<uint8[]> encoder_memory_; | |
265 OpusEncoder* const opus_encoder_; | |
266 const scoped_ptr<float[]> buffer_; | |
267 | |
268 // This is the recommended value, according to documentation in | |
269 // third_party/opus/src/include/opus.h, so that the Opus encoder does not | |
270 // degrade the audio due to memory constraints. | |
271 // | |
272 // Note: Whereas other RTP implementations do not, the cast library is | |
273 // perfectly capable of transporting larger than MTU-sized audio frames. | |
274 static const int kOpusMaxPayloadSize = 4000; | |
275 | |
276 DISALLOW_COPY_AND_ASSIGN(OpusImpl); | |
277 }; | |
278 | |
279 class AudioEncoder::Pcm16Impl : public AudioEncoder::ImplBase { | |
280 public: | |
281 Pcm16Impl(const scoped_refptr<CastEnvironment>& cast_environment, | |
282 int num_channels, | |
283 int sampling_rate, | |
284 const FrameEncodedCallback& callback) | |
285 : ImplBase(cast_environment, | |
286 transport::CODEC_AUDIO_PCM16, | |
287 num_channels, | |
288 sampling_rate, | |
289 callback), | |
290 buffer_(new int16[num_channels * samples_per_frame_]) { | |
291 if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED) | |
292 return; | |
293 cast_initialization_status_ = STATUS_AUDIO_INITIALIZED; | |
294 } | |
295 | |
296 private: | |
297 virtual ~Pcm16Impl() {} | |
298 | |
299 virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus, | |
300 int source_offset, | |
301 int buffer_fill_offset, | |
302 int num_samples) OVERRIDE { | |
303 audio_bus->ToInterleavedPartial( | |
304 source_offset, | |
305 num_samples, | |
306 sizeof(int16), | |
307 buffer_.get() + buffer_fill_offset * num_channels_); | |
308 } | |
309 | |
310 virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE { | |
311 // Output 16-bit PCM integers in big-endian byte order. | |
312 out->resize(num_channels_ * samples_per_frame_ * sizeof(int16)); | |
313 const int16* src = buffer_.get(); | |
314 const int16* const src_end = src + num_channels_ * samples_per_frame_; | |
315 uint16* dest = reinterpret_cast<uint16*>(&out->at(0)); | |
316 for (; src < src_end; ++src, ++dest) | |
317 *dest = base::HostToNet16(*src); | |
318 return true; | |
319 } | |
320 | |
321 private: | |
322 const scoped_ptr<int16[]> buffer_; | |
323 | |
324 DISALLOW_COPY_AND_ASSIGN(Pcm16Impl); | |
325 }; | |
326 | |
327 AudioEncoder::AudioEncoder( | |
328 const scoped_refptr<CastEnvironment>& cast_environment, | |
329 int num_channels, | |
330 int sampling_rate, | |
331 int bitrate, | |
332 transport::Codec codec, | |
333 const FrameEncodedCallback& frame_encoded_callback) | |
334 : cast_environment_(cast_environment) { | |
335 // Note: It doesn't matter which thread constructs AudioEncoder, just so long | |
336 // as all calls to InsertAudio() are by the same thread. | |
337 insert_thread_checker_.DetachFromThread(); | |
338 switch (codec) { | |
339 case transport::CODEC_AUDIO_OPUS: | |
340 impl_ = new OpusImpl(cast_environment, | |
341 num_channels, | |
342 sampling_rate, | |
343 bitrate, | |
344 frame_encoded_callback); | |
345 break; | |
346 case transport::CODEC_AUDIO_PCM16: | |
347 impl_ = new Pcm16Impl(cast_environment, | |
348 num_channels, | |
349 sampling_rate, | |
350 frame_encoded_callback); | |
351 break; | |
352 default: | |
353 NOTREACHED() << "Unsupported or unspecified codec for audio encoder"; | |
354 break; | |
355 } | |
356 } | |
357 | |
358 AudioEncoder::~AudioEncoder() {} | |
359 | |
360 CastInitializationStatus AudioEncoder::InitializationResult() const { | |
361 DCHECK(insert_thread_checker_.CalledOnValidThread()); | |
362 if (impl_) { | |
363 return impl_->InitializationResult(); | |
364 } | |
365 return STATUS_UNSUPPORTED_AUDIO_CODEC; | |
366 } | |
367 | |
368 void AudioEncoder::InsertAudio(scoped_ptr<AudioBus> audio_bus, | |
369 const base::TimeTicks& recorded_time) { | |
370 DCHECK(insert_thread_checker_.CalledOnValidThread()); | |
371 DCHECK(audio_bus.get()); | |
372 if (!impl_) { | |
373 NOTREACHED(); | |
374 return; | |
375 } | |
376 cast_environment_->PostTask(CastEnvironment::AUDIO, | |
377 FROM_HERE, | |
378 base::Bind(&AudioEncoder::ImplBase::EncodeAudio, | |
379 impl_, | |
380 base::Passed(&audio_bus), | |
381 recorded_time)); | |
382 } | |
383 | |
384 } // namespace cast | |
385 } // namespace media | |
OLD | NEW |