| OLD | NEW |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "media/cast/sender/audio_encoder.h" | 5 #include "media/cast/sender/audio_encoder.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 | 8 |
| 9 #include "base/bind.h" | 9 #include "base/bind.h" |
| 10 #include "base/bind_helpers.h" | 10 #include "base/bind_helpers.h" |
| 11 #include "base/location.h" | 11 #include "base/location.h" |
| 12 #include "base/stl_util.h" | 12 #include "base/stl_util.h" |
| 13 #include "base/sys_byteorder.h" | 13 #include "base/sys_byteorder.h" |
| 14 #include "base/time/time.h" | 14 #include "base/time/time.h" |
| 15 #include "media/base/audio_bus.h" | 15 #include "media/base/audio_bus.h" |
| 16 #include "media/cast/cast_defines.h" | 16 #include "media/cast/cast_defines.h" |
| 17 #include "media/cast/cast_environment.h" | 17 #include "media/cast/cast_environment.h" |
| 18 #include "third_party/opus/src/include/opus.h" | 18 #include "third_party/opus/src/include/opus.h" |
| 19 | 19 |
| 20 namespace media { | 20 namespace media { |
| 21 namespace cast { | 21 namespace cast { |
| 22 | 22 |
| 23 namespace { | 23 namespace { |
| 24 | 24 |
| 25 // The fixed number of audio frames per second and, inversely, the duration of | 25 const int kUnderrunSkipThreshold = 3; |
| 26 // one frame's worth of samples. | 26 const int kDefaultFramesPerSecond = 100; |
| 27 const int kFramesPerSecond = 100; | |
| 28 const int kFrameDurationMillis = 1000 / kFramesPerSecond; // No remainder! | |
| 29 | |
| 30 // Threshold used to decide whether audio being delivered to the encoder is | |
| 31 // coming in too slow with respect to the capture timestamps. | |
| 32 const int kUnderrunThresholdMillis = 3 * kFrameDurationMillis; | |
| 33 | 27 |
| 34 } // namespace | 28 } // namespace |
| 35 | 29 |
| 36 | |
| 37 // Base class that handles the common problem of feeding one or more AudioBus' | 30 // Base class that handles the common problem of feeding one or more AudioBus' |
| 38 // data into a buffer and then, once the buffer is full, encoding the signal and | 31 // data into a buffer and then, once the buffer is full, encoding the signal and |
| 39 // emitting an EncodedFrame via the FrameEncodedCallback. | 32 // emitting an EncodedFrame via the FrameEncodedCallback. |
| 40 // | 33 // |
| 41 // Subclasses complete the implementation by handling the actual encoding | 34 // Subclasses complete the implementation by handling the actual encoding |
| 42 // details. | 35 // details. |
| 43 class AudioEncoder::ImplBase | 36 class AudioEncoder::ImplBase |
| 44 : public base::RefCountedThreadSafe<AudioEncoder::ImplBase> { | 37 : public base::RefCountedThreadSafe<AudioEncoder::ImplBase> { |
| 45 public: | 38 public: |
| 46 ImplBase(const scoped_refptr<CastEnvironment>& cast_environment, | 39 ImplBase(const scoped_refptr<CastEnvironment>& cast_environment, |
| 47 Codec codec, | 40 Codec codec, |
| 48 int num_channels, | 41 int num_channels, |
| 49 int sampling_rate, | 42 int sampling_rate, |
| 43 int samples_per_frame, |
| 50 const FrameEncodedCallback& callback) | 44 const FrameEncodedCallback& callback) |
| 51 : cast_environment_(cast_environment), | 45 : cast_environment_(cast_environment), |
| 52 codec_(codec), | 46 codec_(codec), |
| 53 num_channels_(num_channels), | 47 num_channels_(num_channels), |
| 54 samples_per_frame_(sampling_rate / kFramesPerSecond), | 48 samples_per_frame_(samples_per_frame), |
| 55 callback_(callback), | 49 callback_(callback), |
| 56 cast_initialization_status_(STATUS_AUDIO_UNINITIALIZED), | 50 cast_initialization_status_(STATUS_AUDIO_UNINITIALIZED), |
| 51 frame_duration_(base::TimeDelta::FromMicroseconds( |
| 52 base::Time::kMicrosecondsPerSecond * samples_per_frame_ / |
| 53 sampling_rate)), |
| 57 buffer_fill_end_(0), | 54 buffer_fill_end_(0), |
| 58 frame_id_(0), | 55 frame_id_(0), |
| 59 frame_rtp_timestamp_(0), | 56 frame_rtp_timestamp_(0), |
| 60 samples_dropped_from_buffer_(0) { | 57 samples_dropped_from_buffer_(0) { |
| 61 // Support for max sampling rate of 48KHz, 2 channels, 100 ms duration. | 58 // Support for max sampling rate of 48KHz, 2 channels, 100 ms duration. |
| 62 const int kMaxSamplesTimesChannelsPerFrame = 48 * 2 * 100; | 59 const int kMaxSamplesTimesChannelsPerFrame = 48 * 2 * 100; |
| 63 if (num_channels_ <= 0 || samples_per_frame_ <= 0 || | 60 if (num_channels_ <= 0 || samples_per_frame_ <= 0 || |
| 64 sampling_rate % kFramesPerSecond != 0 || | 61 frame_duration_ == base::TimeDelta() || |
| 65 samples_per_frame_ * num_channels_ > kMaxSamplesTimesChannelsPerFrame) { | 62 samples_per_frame_ * num_channels_ > kMaxSamplesTimesChannelsPerFrame) { |
| 66 cast_initialization_status_ = STATUS_INVALID_AUDIO_CONFIGURATION; | 63 cast_initialization_status_ = STATUS_INVALID_AUDIO_CONFIGURATION; |
| 67 } | 64 } |
| 68 } | 65 } |
| 69 | 66 |
| 70 CastInitializationStatus InitializationResult() const { | 67 CastInitializationStatus InitializationResult() const { |
| 71 return cast_initialization_status_; | 68 return cast_initialization_status_; |
| 72 } | 69 } |
| 73 | 70 |
| 74 int samples_per_frame() const { | 71 int samples_per_frame() const { |
| 75 return samples_per_frame_; | 72 return samples_per_frame_; |
| 76 } | 73 } |
| 77 | 74 |
| 75 base::TimeDelta frame_duration() const { return frame_duration_; } |
| 76 |
| 78 void EncodeAudio(scoped_ptr<AudioBus> audio_bus, | 77 void EncodeAudio(scoped_ptr<AudioBus> audio_bus, |
| 79 const base::TimeTicks& recorded_time) { | 78 const base::TimeTicks& recorded_time) { |
| 80 DCHECK_EQ(cast_initialization_status_, STATUS_AUDIO_INITIALIZED); | 79 DCHECK_EQ(cast_initialization_status_, STATUS_AUDIO_INITIALIZED); |
| 81 DCHECK(!recorded_time.is_null()); | 80 DCHECK(!recorded_time.is_null()); |
| 82 | 81 |
| 83 // Determine whether |recorded_time| is consistent with the amount of audio | 82 // Determine whether |recorded_time| is consistent with the amount of audio |
| 84 // data having been processed in the past. Resolve the underrun problem by | 83 // data having been processed in the past. Resolve the underrun problem by |
| 85 // dropping data from the internal buffer and skipping ahead the next | 84 // dropping data from the internal buffer and skipping ahead the next |
| 86 // frame's RTP timestamp by the estimated number of frames missed. On the | 85 // frame's RTP timestamp by the estimated number of frames missed. On the |
| 87 // other hand, don't attempt to resolve overruns: A receiver should | 86 // other hand, don't attempt to resolve overruns: A receiver should |
| 88 // gracefully deal with an excess of audio data. | 87 // gracefully deal with an excess of audio data. |
| 89 const base::TimeDelta frame_duration = | |
| 90 base::TimeDelta::FromMilliseconds(kFrameDurationMillis); | |
| 91 base::TimeDelta buffer_fill_duration = | 88 base::TimeDelta buffer_fill_duration = |
| 92 buffer_fill_end_ * frame_duration / samples_per_frame_; | 89 buffer_fill_end_ * frame_duration_ / samples_per_frame_; |
| 93 if (!frame_capture_time_.is_null()) { | 90 if (!frame_capture_time_.is_null()) { |
| 94 const base::TimeDelta amount_ahead_by = | 91 const base::TimeDelta amount_ahead_by = |
| 95 recorded_time - (frame_capture_time_ + buffer_fill_duration); | 92 recorded_time - (frame_capture_time_ + buffer_fill_duration); |
| 96 if (amount_ahead_by > | 93 const int64 num_frames_missed = amount_ahead_by / frame_duration_; |
| 97 base::TimeDelta::FromMilliseconds(kUnderrunThresholdMillis)) { | 94 if (num_frames_missed > kUnderrunSkipThreshold) { |
| 98 samples_dropped_from_buffer_ += buffer_fill_end_; | 95 samples_dropped_from_buffer_ += buffer_fill_end_; |
| 99 buffer_fill_end_ = 0; | 96 buffer_fill_end_ = 0; |
| 100 buffer_fill_duration = base::TimeDelta(); | 97 buffer_fill_duration = base::TimeDelta(); |
| 101 const int64 num_frames_missed = amount_ahead_by / | |
| 102 base::TimeDelta::FromMilliseconds(kFrameDurationMillis); | |
| 103 frame_rtp_timestamp_ += | 98 frame_rtp_timestamp_ += |
| 104 static_cast<uint32>(num_frames_missed * samples_per_frame_); | 99 static_cast<uint32>(num_frames_missed * samples_per_frame_); |
| 105 DVLOG(1) << "Skipping RTP timestamp ahead to account for " | 100 DVLOG(1) << "Skipping RTP timestamp ahead to account for " |
| 106 << num_frames_missed * samples_per_frame_ | 101 << num_frames_missed * samples_per_frame_ |
| 107 << " samples' worth of underrun."; | 102 << " samples' worth of underrun."; |
| 108 } | 103 } |
| 109 } | 104 } |
| 110 frame_capture_time_ = recorded_time - buffer_fill_duration; | 105 frame_capture_time_ = recorded_time - buffer_fill_duration; |
| 111 | 106 |
| 112 // Encode all audio in |audio_bus| into zero or more frames. | 107 // Encode all audio in |audio_bus| into zero or more frames. |
| (...skipping 25 matching lines...) Expand all Loading... |
| 138 base::Bind(callback_, | 133 base::Bind(callback_, |
| 139 base::Passed(&audio_frame), | 134 base::Passed(&audio_frame), |
| 140 samples_dropped_from_buffer_)); | 135 samples_dropped_from_buffer_)); |
| 141 samples_dropped_from_buffer_ = 0; | 136 samples_dropped_from_buffer_ = 0; |
| 142 } | 137 } |
| 143 | 138 |
| 144 // Reset the internal buffer, frame ID, and timestamps for the next frame. | 139 // Reset the internal buffer, frame ID, and timestamps for the next frame. |
| 145 buffer_fill_end_ = 0; | 140 buffer_fill_end_ = 0; |
| 146 ++frame_id_; | 141 ++frame_id_; |
| 147 frame_rtp_timestamp_ += samples_per_frame_; | 142 frame_rtp_timestamp_ += samples_per_frame_; |
| 148 frame_capture_time_ += frame_duration; | 143 frame_capture_time_ += frame_duration_; |
| 149 } | 144 } |
| 150 } | 145 } |
| 151 | 146 |
| 152 protected: | 147 protected: |
| 153 friend class base::RefCountedThreadSafe<ImplBase>; | 148 friend class base::RefCountedThreadSafe<ImplBase>; |
| 154 virtual ~ImplBase() {} | 149 virtual ~ImplBase() {} |
| 155 | 150 |
| 156 virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus, | 151 virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus, |
| 157 int source_offset, | 152 int source_offset, |
| 158 int buffer_fill_offset, | 153 int buffer_fill_offset, |
| 159 int num_samples) = 0; | 154 int num_samples) = 0; |
| 160 virtual bool EncodeFromFilledBuffer(std::string* out) = 0; | 155 virtual bool EncodeFromFilledBuffer(std::string* out) = 0; |
| 161 | 156 |
| 162 const scoped_refptr<CastEnvironment> cast_environment_; | 157 const scoped_refptr<CastEnvironment> cast_environment_; |
| 163 const Codec codec_; | 158 const Codec codec_; |
| 164 const int num_channels_; | 159 const int num_channels_; |
| 165 const int samples_per_frame_; | 160 const int samples_per_frame_; |
| 166 const FrameEncodedCallback callback_; | 161 const FrameEncodedCallback callback_; |
| 167 | 162 |
| 168 // Subclass' ctor is expected to set this to STATUS_AUDIO_INITIALIZED. | 163 // Subclass' ctor is expected to set this to STATUS_AUDIO_INITIALIZED. |
| 169 CastInitializationStatus cast_initialization_status_; | 164 CastInitializationStatus cast_initialization_status_; |
| 170 | 165 |
| 166 // The duration of one frame of encoded audio samples. Derived from |
| 167 // |samples_per_frame_| and the sampling rate. |
| 168 const base::TimeDelta frame_duration_; |
| 169 |
| 171 private: | 170 private: |
| 172 // In the case where a call to EncodeAudio() cannot completely fill the | 171 // In the case where a call to EncodeAudio() cannot completely fill the |
| 173 // buffer, this points to the position at which to populate data in a later | 172 // buffer, this points to the position at which to populate data in a later |
| 174 // call. | 173 // call. |
| 175 int buffer_fill_end_; | 174 int buffer_fill_end_; |
| 176 | 175 |
| 177 // A counter used to label EncodedFrames. | 176 // A counter used to label EncodedFrames. |
| 178 uint32 frame_id_; | 177 uint32 frame_id_; |
| 179 | 178 |
| 180 // The RTP timestamp for the next frame of encoded audio. This is defined as | 179 // The RTP timestamp for the next frame of encoded audio. This is defined as |
| (...skipping 21 matching lines...) Expand all Loading... |
| 202 public: | 201 public: |
| 203 OpusImpl(const scoped_refptr<CastEnvironment>& cast_environment, | 202 OpusImpl(const scoped_refptr<CastEnvironment>& cast_environment, |
| 204 int num_channels, | 203 int num_channels, |
| 205 int sampling_rate, | 204 int sampling_rate, |
| 206 int bitrate, | 205 int bitrate, |
| 207 const FrameEncodedCallback& callback) | 206 const FrameEncodedCallback& callback) |
| 208 : ImplBase(cast_environment, | 207 : ImplBase(cast_environment, |
| 209 CODEC_AUDIO_OPUS, | 208 CODEC_AUDIO_OPUS, |
| 210 num_channels, | 209 num_channels, |
| 211 sampling_rate, | 210 sampling_rate, |
| 211 sampling_rate / kDefaultFramesPerSecond, /* 10 ms frames */ |
| 212 callback), | 212 callback), |
| 213 encoder_memory_(new uint8[opus_encoder_get_size(num_channels)]), | 213 encoder_memory_(new uint8[opus_encoder_get_size(num_channels)]), |
| 214 opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())), | 214 opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())), |
| 215 buffer_(new float[num_channels * samples_per_frame_]) { | 215 buffer_(new float[num_channels * samples_per_frame_]) { |
| 216 if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED) | 216 if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED || |
| 217 sampling_rate % samples_per_frame_ != 0 || |
| 218 !IsValidFrameDuration(frame_duration_)) { |
| 217 return; | 219 return; |
| 220 } |
| 218 if (opus_encoder_init(opus_encoder_, | 221 if (opus_encoder_init(opus_encoder_, |
| 219 sampling_rate, | 222 sampling_rate, |
| 220 num_channels, | 223 num_channels, |
| 221 OPUS_APPLICATION_AUDIO) != OPUS_OK) { | 224 OPUS_APPLICATION_AUDIO) != OPUS_OK) { |
| 222 ImplBase::cast_initialization_status_ = | 225 ImplBase::cast_initialization_status_ = |
| 223 STATUS_INVALID_AUDIO_CONFIGURATION; | 226 STATUS_INVALID_AUDIO_CONFIGURATION; |
| 224 return; | 227 return; |
| 225 } | 228 } |
| 226 ImplBase::cast_initialization_status_ = STATUS_AUDIO_INITIALIZED; | 229 ImplBase::cast_initialization_status_ = STATUS_AUDIO_INITIALIZED; |
| 227 | 230 |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 267 } else if (result < 0) { | 270 } else if (result < 0) { |
| 268 LOG(ERROR) << "Error code from opus_encode_float(): " << result; | 271 LOG(ERROR) << "Error code from opus_encode_float(): " << result; |
| 269 return false; | 272 return false; |
| 270 } else { | 273 } else { |
| 271 // Do nothing: The documentation says that a return value of zero or | 274 // Do nothing: The documentation says that a return value of zero or |
| 272 // one byte means the packet does not need to be transmitted. | 275 // one byte means the packet does not need to be transmitted. |
| 273 return false; | 276 return false; |
| 274 } | 277 } |
| 275 } | 278 } |
| 276 | 279 |
| 280 static bool IsValidFrameDuration(base::TimeDelta duration) { |
| 281 // See https://tools.ietf.org/html/rfc6716#section-2.1.4 |
| 282 return duration == base::TimeDelta::FromMicroseconds(2500) || |
| 283 duration == base::TimeDelta::FromMilliseconds(5) || |
| 284 duration == base::TimeDelta::FromMilliseconds(10) || |
| 285 duration == base::TimeDelta::FromMilliseconds(20) || |
| 286 duration == base::TimeDelta::FromMilliseconds(40) || |
| 287 duration == base::TimeDelta::FromMilliseconds(60); |
| 288 } |
| 289 |
| 277 const scoped_ptr<uint8[]> encoder_memory_; | 290 const scoped_ptr<uint8[]> encoder_memory_; |
| 278 OpusEncoder* const opus_encoder_; | 291 OpusEncoder* const opus_encoder_; |
| 279 const scoped_ptr<float[]> buffer_; | 292 const scoped_ptr<float[]> buffer_; |
| 280 | 293 |
| 281 // This is the recommended value, according to documentation in | 294 // This is the recommended value, according to documentation in |
| 282 // third_party/opus/src/include/opus.h, so that the Opus encoder does not | 295 // third_party/opus/src/include/opus.h, so that the Opus encoder does not |
| 283 // degrade the audio due to memory constraints. | 296 // degrade the audio due to memory constraints. |
| 284 // | 297 // |
| 285 // Note: Whereas other RTP implementations do not, the cast library is | 298 // Note: Whereas other RTP implementations do not, the cast library is |
| 286 // perfectly capable of transporting larger than MTU-sized audio frames. | 299 // perfectly capable of transporting larger than MTU-sized audio frames. |
| 287 static const int kOpusMaxPayloadSize = 4000; | 300 static const int kOpusMaxPayloadSize = 4000; |
| 288 | 301 |
| 289 DISALLOW_COPY_AND_ASSIGN(OpusImpl); | 302 DISALLOW_COPY_AND_ASSIGN(OpusImpl); |
| 290 }; | 303 }; |
| 291 | 304 |
| 292 class AudioEncoder::Pcm16Impl : public AudioEncoder::ImplBase { | 305 class AudioEncoder::Pcm16Impl : public AudioEncoder::ImplBase { |
| 293 public: | 306 public: |
| 294 Pcm16Impl(const scoped_refptr<CastEnvironment>& cast_environment, | 307 Pcm16Impl(const scoped_refptr<CastEnvironment>& cast_environment, |
| 295 int num_channels, | 308 int num_channels, |
| 296 int sampling_rate, | 309 int sampling_rate, |
| 297 const FrameEncodedCallback& callback) | 310 const FrameEncodedCallback& callback) |
| 298 : ImplBase(cast_environment, | 311 : ImplBase(cast_environment, |
| 299 CODEC_AUDIO_PCM16, | 312 CODEC_AUDIO_PCM16, |
| 300 num_channels, | 313 num_channels, |
| 301 sampling_rate, | 314 sampling_rate, |
| 315 sampling_rate / kDefaultFramesPerSecond, /* 10 ms frames */ |
| 302 callback), | 316 callback), |
| 303 buffer_(new int16[num_channels * samples_per_frame_]) { | 317 buffer_(new int16[num_channels * samples_per_frame_]) { |
| 304 if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED) | 318 if (ImplBase::cast_initialization_status_ != STATUS_AUDIO_UNINITIALIZED) |
| 305 return; | 319 return; |
| 306 cast_initialization_status_ = STATUS_AUDIO_INITIALIZED; | 320 cast_initialization_status_ = STATUS_AUDIO_INITIALIZED; |
| 307 } | 321 } |
| 308 | 322 |
| 309 private: | 323 private: |
| 310 ~Pcm16Impl() override {} | 324 ~Pcm16Impl() override {} |
| 311 | 325 |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 380 | 394 |
| 381 int AudioEncoder::GetSamplesPerFrame() const { | 395 int AudioEncoder::GetSamplesPerFrame() const { |
| 382 DCHECK(insert_thread_checker_.CalledOnValidThread()); | 396 DCHECK(insert_thread_checker_.CalledOnValidThread()); |
| 383 if (InitializationResult() != STATUS_AUDIO_INITIALIZED) { | 397 if (InitializationResult() != STATUS_AUDIO_INITIALIZED) { |
| 384 NOTREACHED(); | 398 NOTREACHED(); |
| 385 return std::numeric_limits<int>::max(); | 399 return std::numeric_limits<int>::max(); |
| 386 } | 400 } |
| 387 return impl_->samples_per_frame(); | 401 return impl_->samples_per_frame(); |
| 388 } | 402 } |
| 389 | 403 |
| 404 base::TimeDelta AudioEncoder::GetFrameDuration() const { |
| 405 DCHECK(insert_thread_checker_.CalledOnValidThread()); |
| 406 if (InitializationResult() != STATUS_AUDIO_INITIALIZED) { |
| 407 NOTREACHED(); |
| 408 return base::TimeDelta(); |
| 409 } |
| 410 return impl_->frame_duration(); |
| 411 } |
| 412 |
| 390 void AudioEncoder::InsertAudio(scoped_ptr<AudioBus> audio_bus, | 413 void AudioEncoder::InsertAudio(scoped_ptr<AudioBus> audio_bus, |
| 391 const base::TimeTicks& recorded_time) { | 414 const base::TimeTicks& recorded_time) { |
| 392 DCHECK(insert_thread_checker_.CalledOnValidThread()); | 415 DCHECK(insert_thread_checker_.CalledOnValidThread()); |
| 393 DCHECK(audio_bus.get()); | 416 DCHECK(audio_bus.get()); |
| 394 if (InitializationResult() != STATUS_AUDIO_INITIALIZED) { | 417 if (InitializationResult() != STATUS_AUDIO_INITIALIZED) { |
| 395 NOTREACHED(); | 418 NOTREACHED(); |
| 396 return; | 419 return; |
| 397 } | 420 } |
| 398 cast_environment_->PostTask(CastEnvironment::AUDIO, | 421 cast_environment_->PostTask(CastEnvironment::AUDIO, |
| 399 FROM_HERE, | 422 FROM_HERE, |
| 400 base::Bind(&AudioEncoder::ImplBase::EncodeAudio, | 423 base::Bind(&AudioEncoder::ImplBase::EncodeAudio, |
| 401 impl_, | 424 impl_, |
| 402 base::Passed(&audio_bus), | 425 base::Passed(&audio_bus), |
| 403 recorded_time)); | 426 recorded_time)); |
| 404 } | 427 } |
| 405 | 428 |
| 406 } // namespace cast | 429 } // namespace cast |
| 407 } // namespace media | 430 } // namespace media |
| OLD | NEW |