| OLD | NEW |
| (Empty) |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "media/cast/audio_sender/audio_encoder.h" | |
| 6 | |
| 7 #include <algorithm> | |
| 8 | |
| 9 #include "base/bind.h" | |
| 10 #include "base/bind_helpers.h" | |
| 11 #include "base/location.h" | |
| 12 #include "base/stl_util.h" | |
| 13 #include "base/sys_byteorder.h" | |
| 14 #include "base/time/time.h" | |
| 15 #include "media/base/audio_bus.h" | |
| 16 #include "media/cast/cast_defines.h" | |
| 17 #include "media/cast/cast_environment.h" | |
| 18 #include "third_party/opus/src/include/opus.h" | |
| 19 | |
| 20 namespace media { | |
| 21 namespace cast { | |
| 22 | |
| 23 namespace { | |
| 24 | |
| 25 // The fixed number of audio frames per second and, inversely, the duration of | |
| 26 // one frame's worth of samples. | |
| 27 const int kFramesPerSecond = 100; | |
| 28 const int kFrameDurationMillis = 1000 / kFramesPerSecond; // No remainder! | |
| 29 | |
| 30 // Threshold used to decide whether audio being delivered to the encoder is | |
| 31 // coming in too slow with respect to the capture timestamps. | |
| 32 const int kUnderrunThresholdMillis = 3 * kFrameDurationMillis; | |
| 33 | |
| 34 } // namespace | |
| 35 | |
| 36 | |
| 37 // Base class that handles the common problem of feeding one or more AudioBus' | |
| 38 // data into a buffer and then, once the buffer is full, encoding the signal and | |
| 39 // emitting an EncodedFrame via the FrameEncodedCallback. | |
| 40 // | |
| 41 // Subclasses complete the implementation by handling the actual encoding | |
| 42 // details. | |
| 43 class AudioEncoder::ImplBase | |
| 44 : public base::RefCountedThreadSafe<AudioEncoder::ImplBase> { | |
| 45 public: | |
| 46 ImplBase(const scoped_refptr<CastEnvironment>& cast_environment, | |
| 47 transport::Codec codec, | |
| 48 int num_channels, | |
| 49 int sampling_rate, | |
| 50 const FrameEncodedCallback& callback) | |
| 51 : cast_environment_(cast_environment), | |
| 52 codec_(codec), | |
| 53 num_channels_(num_channels), | |
| 54 samples_per_frame_(sampling_rate / kFramesPerSecond), | |
| 55 callback_(callback), | |
| 56 cast_initialization_status_(STATUS_AUDIO_UNINITIALIZED), | |
| 57 buffer_fill_end_(0), | |
| 58 frame_id_(0), | |
| 59 frame_rtp_timestamp_(0) { | |
| 60 // Support for max sampling rate of 48KHz, 2 channels, 100 ms duration. | |
| 61 const int kMaxSamplesTimesChannelsPerFrame = 48 * 2 * 100; | |
| 62 if (num_channels_ <= 0 || samples_per_frame_ <= 0 || | |
| 63 sampling_rate % kFramesPerSecond != 0 || | |
| 64 samples_per_frame_ * num_channels_ > kMaxSamplesTimesChannelsPerFrame) { | |
| 65 cast_initialization_status_ = STATUS_INVALID_AUDIO_CONFIGURATION; | |
| 66 } | |
| 67 } | |
| 68 | |
| 69 CastInitializationStatus InitializationResult() const { | |
| 70 return cast_initialization_status_; | |
| 71 } | |
| 72 | |
| 73 void EncodeAudio(scoped_ptr<AudioBus> audio_bus, | |
| 74 const base::TimeTicks& recorded_time) { | |
| 75 DCHECK_EQ(cast_initialization_status_, STATUS_AUDIO_INITIALIZED); | |
| 76 DCHECK(!recorded_time.is_null()); | |
| 77 | |
| 78 // Determine whether |recorded_time| is consistent with the amount of audio | |
| 79 // data having been processed in the past. Resolve the underrun problem by | |
| 80 // dropping data from the internal buffer and skipping ahead the next | |
| 81 // frame's RTP timestamp by the estimated number of frames missed. On the | |
| 82 // other hand, don't attempt to resolve overruns: A receiver should | |
| 83 // gracefully deal with an excess of audio data. | |
| 84 const base::TimeDelta frame_duration = | |
| 85 base::TimeDelta::FromMilliseconds(kFrameDurationMillis); | |
| 86 base::TimeDelta buffer_fill_duration = | |
| 87 buffer_fill_end_ * frame_duration / samples_per_frame_; | |
| 88 if (!frame_capture_time_.is_null()) { | |
| 89 const base::TimeDelta amount_ahead_by = | |
| 90 recorded_time - (frame_capture_time_ + buffer_fill_duration); | |
| 91 if (amount_ahead_by > | |
| 92 base::TimeDelta::FromMilliseconds(kUnderrunThresholdMillis)) { | |
| 93 buffer_fill_end_ = 0; | |
| 94 buffer_fill_duration = base::TimeDelta(); | |
| 95 const int64 num_frames_missed = amount_ahead_by / | |
| 96 base::TimeDelta::FromMilliseconds(kFrameDurationMillis); | |
| 97 frame_rtp_timestamp_ += | |
| 98 static_cast<uint32>(num_frames_missed * samples_per_frame_); | |
| 99 DVLOG(1) << "Skipping RTP timestamp ahead to account for " | |
| 100 << num_frames_missed * samples_per_frame_ | |
| 101 << " samples' worth of underrun."; | |
| 102 } | |
| 103 } | |
| 104 frame_capture_time_ = recorded_time - buffer_fill_duration; | |
| 105 | |
| 106 // Encode all audio in |audio_bus| into zero or more frames. | |
| 107 int src_pos = 0; | |
| 108 while (src_pos < audio_bus->frames()) { | |
| 109 const int num_samples_to_xfer = std::min( | |
| 110 samples_per_frame_ - buffer_fill_end_, audio_bus->frames() - src_pos); | |
| 111 DCHECK_EQ(audio_bus->channels(), num_channels_); | |
| 112 TransferSamplesIntoBuffer( | |
| 113 audio_bus.get(), src_pos, buffer_fill_end_, num_samples_to_xfer); | |
| 114 src_pos += num_samples_to_xfer; | |
| 115 buffer_fill_end_ += num_samples_to_xfer; | |
| 116 | |
| 117 if (buffer_fill_end_ < samples_per_frame_) | |
| 118 break; | |
| 119 | |
| 120 scoped_ptr<transport::EncodedFrame> audio_frame( | |
| 121 new transport::EncodedFrame()); | |
| 122 audio_frame->dependency = transport::EncodedFrame::KEY; | |
| 123 audio_frame->frame_id = frame_id_; | |
| 124 audio_frame->referenced_frame_id = frame_id_; | |
| 125 audio_frame->rtp_timestamp = frame_rtp_timestamp_; | |
| 126 audio_frame->reference_time = frame_capture_time_; | |
| 127 | |
| 128 if (EncodeFromFilledBuffer(&audio_frame->data)) { | |
| 129 cast_environment_->PostTask( | |
| 130 CastEnvironment::MAIN, | |
| 131 FROM_HERE, | |
| 132 base::Bind(callback_, base::Passed(&audio_frame))); | |
| 133 } | |
| 134 | |
| 135 // Reset the internal buffer, frame ID, and timestamps for the next frame. | |
| 136 buffer_fill_end_ = 0; | |
| 137 ++frame_id_; | |
| 138 frame_rtp_timestamp_ += samples_per_frame_; | |
| 139 frame_capture_time_ += frame_duration; | |
| 140 } | |
| 141 } | |
| 142 | |
| 143 protected: | |
| 144 friend class base::RefCountedThreadSafe<ImplBase>; | |
| 145 virtual ~ImplBase() {} | |
| 146 | |
| 147 virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus, | |
| 148 int source_offset, | |
| 149 int buffer_fill_offset, | |
| 150 int num_samples) = 0; | |
| 151 virtual bool EncodeFromFilledBuffer(std::string* out) = 0; | |
| 152 | |
| 153 const scoped_refptr<CastEnvironment> cast_environment_; | |
| 154 const transport::Codec codec_; | |
| 155 const int num_channels_; | |
| 156 const int samples_per_frame_; | |
| 157 const FrameEncodedCallback callback_; | |
| 158 | |
| 159 // Subclass' ctor is expected to set this to STATUS_AUDIO_INITIALIZED. | |
| 160 CastInitializationStatus cast_initialization_status_; | |
| 161 | |
| 162 private: | |
| 163 // In the case where a call to EncodeAudio() cannot completely fill the | |
| 164 // buffer, this points to the position at which to populate data in a later | |
| 165 // call. | |
| 166 int buffer_fill_end_; | |
| 167 | |
| 168 // A counter used to label EncodedFrames. | |
| 169 uint32 frame_id_; | |
| 170 | |
| 171 // The RTP timestamp for the next frame of encoded audio. This is defined as | |
| 172 // the number of audio samples encoded so far, plus the estimated number of | |
| 173 // samples that were missed due to data underruns. A receiver uses this value | |
| 174 // to detect gaps in the audio signal data being provided. Per the spec, RTP | |
| 175 // timestamp values are allowed to overflow and roll around past zero. | |
| 176 uint32 frame_rtp_timestamp_; | |
| 177 | |
| 178 // The local system time associated with the start of the next frame of | |
| 179 // encoded audio. This value is passed on to a receiver as a reference clock | |
| 180 // timestamp for the purposes of synchronizing audio and video. Its | |
| 181 // progression is expected to drift relative to the elapsed time implied by | |
| 182 // the RTP timestamps. | |
| 183 base::TimeTicks frame_capture_time_; | |
| 184 | |
| 185 DISALLOW_COPY_AND_ASSIGN(ImplBase); | |
| 186 }; | |
| 187 | |
| 188 class AudioEncoder::OpusImpl : public AudioEncoder::ImplBase { | |
| 189 public: | |
| 190 OpusImpl(const scoped_refptr<CastEnvironment>& cast_environment, | |
| 191 int num_channels, | |
| 192 int sampling_rate, | |
| 193 int bitrate, | |
| 194 const FrameEncodedCallback& callback) | |
| 195 : ImplBase(cast_environment, | |
| 196 transport::CODEC_AUDIO_OPUS, | |
| 197 num_channels, | |
| 198 sampling_rate, | |
| 199 callback), | |
| 200 encoder_memory_(new uint8[opus_encoder_get_size(num_channels)]), | |
| 201 opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())), | |
| 202 buffer_(new float[num_channels * samples_per_frame_]) { | |
| 203 if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED) | |
| 204 return; | |
| 205 if (opus_encoder_init(opus_encoder_, | |
| 206 sampling_rate, | |
| 207 num_channels, | |
| 208 OPUS_APPLICATION_AUDIO) != OPUS_OK) { | |
| 209 ImplBase::cast_initialization_status_ = | |
| 210 STATUS_INVALID_AUDIO_CONFIGURATION; | |
| 211 return; | |
| 212 } | |
| 213 ImplBase::cast_initialization_status_ = STATUS_AUDIO_INITIALIZED; | |
| 214 | |
| 215 if (bitrate <= 0) { | |
| 216 // Note: As of 2013-10-31, the encoder in "auto bitrate" mode would use a | |
| 217 // variable bitrate up to 102kbps for 2-channel, 48 kHz audio and a 10 ms | |
| 218 // frame size. The opus library authors may, of course, adjust this in | |
| 219 // later versions. | |
| 220 bitrate = OPUS_AUTO; | |
| 221 } | |
| 222 CHECK_EQ(opus_encoder_ctl(opus_encoder_, OPUS_SET_BITRATE(bitrate)), | |
| 223 OPUS_OK); | |
| 224 } | |
| 225 | |
| 226 private: | |
| 227 virtual ~OpusImpl() {} | |
| 228 | |
| 229 virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus, | |
| 230 int source_offset, | |
| 231 int buffer_fill_offset, | |
| 232 int num_samples) OVERRIDE { | |
| 233 // Opus requires channel-interleaved samples in a single array. | |
| 234 for (int ch = 0; ch < audio_bus->channels(); ++ch) { | |
| 235 const float* src = audio_bus->channel(ch) + source_offset; | |
| 236 const float* const src_end = src + num_samples; | |
| 237 float* dest = buffer_.get() + buffer_fill_offset * num_channels_ + ch; | |
| 238 for (; src < src_end; ++src, dest += num_channels_) | |
| 239 *dest = *src; | |
| 240 } | |
| 241 } | |
| 242 | |
| 243 virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE { | |
| 244 out->resize(kOpusMaxPayloadSize); | |
| 245 const opus_int32 result = | |
| 246 opus_encode_float(opus_encoder_, | |
| 247 buffer_.get(), | |
| 248 samples_per_frame_, | |
| 249 reinterpret_cast<uint8*>(string_as_array(out)), | |
| 250 kOpusMaxPayloadSize); | |
| 251 if (result > 1) { | |
| 252 out->resize(result); | |
| 253 return true; | |
| 254 } else if (result < 0) { | |
| 255 LOG(ERROR) << "Error code from opus_encode_float(): " << result; | |
| 256 return false; | |
| 257 } else { | |
| 258 // Do nothing: The documentation says that a return value of zero or | |
| 259 // one byte means the packet does not need to be transmitted. | |
| 260 return false; | |
| 261 } | |
| 262 } | |
| 263 | |
| 264 const scoped_ptr<uint8[]> encoder_memory_; | |
| 265 OpusEncoder* const opus_encoder_; | |
| 266 const scoped_ptr<float[]> buffer_; | |
| 267 | |
| 268 // This is the recommended value, according to documentation in | |
| 269 // third_party/opus/src/include/opus.h, so that the Opus encoder does not | |
| 270 // degrade the audio due to memory constraints. | |
| 271 // | |
| 272 // Note: Whereas other RTP implementations do not, the cast library is | |
| 273 // perfectly capable of transporting larger than MTU-sized audio frames. | |
| 274 static const int kOpusMaxPayloadSize = 4000; | |
| 275 | |
| 276 DISALLOW_COPY_AND_ASSIGN(OpusImpl); | |
| 277 }; | |
| 278 | |
| 279 class AudioEncoder::Pcm16Impl : public AudioEncoder::ImplBase { | |
| 280 public: | |
| 281 Pcm16Impl(const scoped_refptr<CastEnvironment>& cast_environment, | |
| 282 int num_channels, | |
| 283 int sampling_rate, | |
| 284 const FrameEncodedCallback& callback) | |
| 285 : ImplBase(cast_environment, | |
| 286 transport::CODEC_AUDIO_PCM16, | |
| 287 num_channels, | |
| 288 sampling_rate, | |
| 289 callback), | |
| 290 buffer_(new int16[num_channels * samples_per_frame_]) { | |
| 291 if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED) | |
| 292 return; | |
| 293 cast_initialization_status_ = STATUS_AUDIO_INITIALIZED; | |
| 294 } | |
| 295 | |
| 296 private: | |
| 297 virtual ~Pcm16Impl() {} | |
| 298 | |
| 299 virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus, | |
| 300 int source_offset, | |
| 301 int buffer_fill_offset, | |
| 302 int num_samples) OVERRIDE { | |
| 303 audio_bus->ToInterleavedPartial( | |
| 304 source_offset, | |
| 305 num_samples, | |
| 306 sizeof(int16), | |
| 307 buffer_.get() + buffer_fill_offset * num_channels_); | |
| 308 } | |
| 309 | |
| 310 virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE { | |
| 311 // Output 16-bit PCM integers in big-endian byte order. | |
| 312 out->resize(num_channels_ * samples_per_frame_ * sizeof(int16)); | |
| 313 const int16* src = buffer_.get(); | |
| 314 const int16* const src_end = src + num_channels_ * samples_per_frame_; | |
| 315 uint16* dest = reinterpret_cast<uint16*>(&out->at(0)); | |
| 316 for (; src < src_end; ++src, ++dest) | |
| 317 *dest = base::HostToNet16(*src); | |
| 318 return true; | |
| 319 } | |
| 320 | |
| 321 private: | |
| 322 const scoped_ptr<int16[]> buffer_; | |
| 323 | |
| 324 DISALLOW_COPY_AND_ASSIGN(Pcm16Impl); | |
| 325 }; | |
| 326 | |
| 327 AudioEncoder::AudioEncoder( | |
| 328 const scoped_refptr<CastEnvironment>& cast_environment, | |
| 329 int num_channels, | |
| 330 int sampling_rate, | |
| 331 int bitrate, | |
| 332 transport::Codec codec, | |
| 333 const FrameEncodedCallback& frame_encoded_callback) | |
| 334 : cast_environment_(cast_environment) { | |
| 335 // Note: It doesn't matter which thread constructs AudioEncoder, just so long | |
| 336 // as all calls to InsertAudio() are by the same thread. | |
| 337 insert_thread_checker_.DetachFromThread(); | |
| 338 switch (codec) { | |
| 339 case transport::CODEC_AUDIO_OPUS: | |
| 340 impl_ = new OpusImpl(cast_environment, | |
| 341 num_channels, | |
| 342 sampling_rate, | |
| 343 bitrate, | |
| 344 frame_encoded_callback); | |
| 345 break; | |
| 346 case transport::CODEC_AUDIO_PCM16: | |
| 347 impl_ = new Pcm16Impl(cast_environment, | |
| 348 num_channels, | |
| 349 sampling_rate, | |
| 350 frame_encoded_callback); | |
| 351 break; | |
| 352 default: | |
| 353 NOTREACHED() << "Unsupported or unspecified codec for audio encoder"; | |
| 354 break; | |
| 355 } | |
| 356 } | |
| 357 | |
| 358 AudioEncoder::~AudioEncoder() {} | |
| 359 | |
| 360 CastInitializationStatus AudioEncoder::InitializationResult() const { | |
| 361 DCHECK(insert_thread_checker_.CalledOnValidThread()); | |
| 362 if (impl_) { | |
| 363 return impl_->InitializationResult(); | |
| 364 } | |
| 365 return STATUS_UNSUPPORTED_AUDIO_CODEC; | |
| 366 } | |
| 367 | |
| 368 void AudioEncoder::InsertAudio(scoped_ptr<AudioBus> audio_bus, | |
| 369 const base::TimeTicks& recorded_time) { | |
| 370 DCHECK(insert_thread_checker_.CalledOnValidThread()); | |
| 371 DCHECK(audio_bus.get()); | |
| 372 if (!impl_) { | |
| 373 NOTREACHED(); | |
| 374 return; | |
| 375 } | |
| 376 cast_environment_->PostTask(CastEnvironment::AUDIO, | |
| 377 FROM_HERE, | |
| 378 base::Bind(&AudioEncoder::ImplBase::EncodeAudio, | |
| 379 impl_, | |
| 380 base::Passed(&audio_bus), | |
| 381 recorded_time)); | |
| 382 } | |
| 383 | |
| 384 } // namespace cast | |
| 385 } // namespace media | |
| OLD | NEW |