media/cast/sender/h264_vt_encoder.cc - Issue 450693006: VideoToolbox encoder for cast senders.

Side by Side Diff: media/cast/sender/h264_vt_encoder.cc

Issue 450693006: VideoToolbox encoder for cast senders. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Remove useless include and clean up in unit tests. Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright 2014 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "media/cast/sender/h264_vt_encoder.h"

	6

	7 #include <string>

	8

	9 #include "base/big_endian.h"

	10 #include "base/bind.h"

	11 #include "base/bind_helpers.h"

	12 #include "base/location.h"

	13 #include "base/logging.h"

	14 #include "media/base/mac/corevideo_glue.h"

	15 #include "media/base/mac/video_frame_mac.h"

	16 #include "media/cast/sender/video_frame_factory.h"

	17

	18 namespace media {

	19 namespace cast {

	20

	21 namespace {

	22

	23 // Container for the associated data of a video frame being processed.

	24 struct InProgressFrameEncode {

	25 const RtpTimestamp rtp_timestamp;

	26 const base::TimeTicks reference_time;

	27 const VideoEncoder::FrameEncodedCallback frame_encoded_callback;

	28

	29 InProgressFrameEncode(RtpTimestamp rtp,

	30 base::TimeTicks r_time,

	31 VideoEncoder::FrameEncodedCallback callback)

	32 : rtp_timestamp(rtp),

	33 reference_time(r_time),

	34 frame_encoded_callback(callback) {}

	35 };

	36

	37 base::ScopedCFTypeRef<CFDictionaryRef> DictionaryWithKeyValue(CFTypeRef key,

	38 CFTypeRef value) {

	39 CFTypeRef keys[1] = {key};

	40 CFTypeRef values[1] = {value};

	41 return base::ScopedCFTypeRef<CFDictionaryRef>(CFDictionaryCreate(

	42 kCFAllocatorDefault, keys, values, 1, &kCFTypeDictionaryKeyCallBacks,

	43 &kCFTypeDictionaryValueCallBacks));

	44 }

	45

	46 template <typename NalSizeType>

	47 void CopyNalsToAnnexB(char* avcc_buffer,

	48 const size_t avcc_size,

	49 std::string* annexb_buffer) {

	50 COMPILE_ASSERT(sizeof(NalSizeType) == 1 \|\| sizeof(NalSizeType) == 2 \|\|

	51 sizeof(NalSizeType) == 4,

	52 "NAL size type has unsupported size");

	53 static const char startcode_3[3] = {0, 0, 1};

	54 DCHECK(avcc_buffer);

	55 DCHECK(annexb_buffer);

	56 size_t bytes_left = avcc_size;

	57 while (bytes_left > 0) {

	58 DCHECK_GT(bytes_left, sizeof(NalSizeType));

	59 NalSizeType nal_size;

	60 base::ReadBigEndian(avcc_buffer, &nal_size);

	61 bytes_left -= sizeof(NalSizeType);

	62 avcc_buffer += sizeof(NalSizeType);

	63

	64 DCHECK_GE(bytes_left, nal_size);

	65 annexb_buffer->append(startcode_3, sizeof(startcode_3));

	66 annexb_buffer->append(avcc_buffer, nal_size);

	67 bytes_left -= nal_size;

	68 avcc_buffer += nal_size;

	69 }

	70 }

	71

	72 // Copy a H.264 frame stored in a CM sample buffer to an Annex B buffer. Copies

	73 // parameter sets for keyframes before the frame data as well.

	74 void CopySampleBufferToAnnexBBuffer(CoreMediaGlue::CMSampleBufferRef sbuf,

	75 std::string* annexb_buffer,

	76 bool keyframe) {

	77 // Perform two pass, one to figure out the total output size, and another to

	78 // copy the data after having performed a single output allocation. Note that

	79 // we'll allocate a bit more because we'll count 4 bytes instead of 3 for

	80 // video NALs.

	81

	82 OSStatus status;

	83

	84 // Get the sample buffer's block buffer and format description.

	85 auto bb = CoreMediaGlue::CMSampleBufferGetDataBuffer(sbuf);

	86 DCHECK(bb);

	87 auto fdesc = CoreMediaGlue::CMSampleBufferGetFormatDescription(sbuf);

	88 DCHECK(fdesc);

	89

	90 size_t bb_size = CoreMediaGlue::CMBlockBufferGetDataLength(bb);

	91 size_t total_bytes = bb_size;

	92

	93 size_t pset_count;

	94 int nal_size_field_bytes;

	95 status = CoreMediaGlue::CMVideoFormatDescriptionGetH264ParameterSetAtIndex(

	96 fdesc, 0, nullptr, nullptr, &pset_count, &nal_size_field_bytes);

	97 if (status ==

	98 CoreMediaGlue::kCMFormatDescriptionBridgeError_InvalidParameter) {

	99 DLOG(WARNING) << " assuming 2 parameter sets and 4 bytes NAL length header";

	100 pset_count = 2;

	101 nal_size_field_bytes = 4;

	102 } else if (status != noErr) {

	103 DLOG(ERROR)

	104 << " CMVideoFormatDescriptionGetH264ParameterSetAtIndex failed: "

	105 << status;

	106 return;

	107 }

	108

	109 if (keyframe) {

	110 const uint8_t* pset;

	111 size_t pset_size;

	112 for (size_t pset_i = 0; pset_i < pset_count; ++pset_i) {

	113 status =

	114 CoreMediaGlue::CMVideoFormatDescriptionGetH264ParameterSetAtIndex(

	115 fdesc, pset_i, &pset, &pset_size, nullptr, nullptr);

	116 if (status != noErr) {

	117 DLOG(ERROR)

	118 << " CMVideoFormatDescriptionGetH264ParameterSetAtIndex failed: "

	119 << status;

	120 return;

	121 }

	122 total_bytes += pset_size + nal_size_field_bytes;

	123 }

	124 }

	125

	126 annexb_buffer->reserve(total_bytes);

	127

	128 // Copy all parameter sets before keyframes.

	129 if (keyframe) {

	130 const uint8_t* pset;

	131 size_t pset_size;

	132 for (size_t pset_i = 0; pset_i < pset_count; ++pset_i) {

	133 status =

	134 CoreMediaGlue::CMVideoFormatDescriptionGetH264ParameterSetAtIndex(

	135 fdesc, pset_i, &pset, &pset_size, nullptr, nullptr);

	136 if (status != noErr) {

	137 DLOG(ERROR)

	138 << " CMVideoFormatDescriptionGetH264ParameterSetAtIndex failed: "

	139 << status;

	140 return;

	141 }

	142 static const char startcode_4[4] = {0, 0, 0, 1};

	143 annexb_buffer->append(startcode_4, sizeof(startcode_4));

	144 annexb_buffer->append(reinterpret_cast<const char*>(pset), pset_size);

	145 }

	146 }

	147

	148 // Block buffers can be composed of non-contiguous chunks. For the sake of

	149 // keeping this code simple, flatten non-contiguous block buffers.

	150 base::ScopedCFTypeRef<CoreMediaGlue::CMBlockBufferRef> contiguous_bb(

	151 bb, base::scoped_policy::RETAIN);

	152 if (!CoreMediaGlue::CMBlockBufferIsRangeContiguous(bb, 0, 0)) {

	153 contiguous_bb.reset();

	154 status = CoreMediaGlue::CMBlockBufferCreateContiguous(

	155 kCFAllocatorDefault, bb, kCFAllocatorDefault, nullptr, 0, 0, 0,

	156 contiguous_bb.InitializeInto());

	157 if (status != noErr) {

	158 DLOG(ERROR) << " CMBlockBufferCreateContiguous failed: " << status;

	159 return;

	160 }

	161 }

	162

	163 // Copy all the NAL units. In the process convert them from AVCC format

	164 // (length header) to AnnexB format (start code).

	165 char* bb_data;

	166 status = CoreMediaGlue::CMBlockBufferGetDataPointer(contiguous_bb, 0, nullptr,

	167 nullptr, &bb_data);

	168 if (status != noErr) {

	169 DLOG(ERROR) << " CMBlockBufferGetDataPointer failed: " << status;

	170 return;

	171 }

	172

	173 if (nal_size_field_bytes == 1) {

	174 CopyNalsToAnnexB<uint8_t>(bb_data, bb_size, annexb_buffer);

	175 } else if (nal_size_field_bytes == 2) {

	176 CopyNalsToAnnexB<uint16_t>(bb_data, bb_size, annexb_buffer);

	177 } else if (nal_size_field_bytes == 4) {

	178 CopyNalsToAnnexB<uint32_t>(bb_data, bb_size, annexb_buffer);

	179 } else {

	180 NOTREACHED();

	181 }

	182 }

	183

	184 // Implementation of the VideoFrameFactory interface using \|CVPixelBufferPool\|.

	185 class VideoFrameFactoryCVPixelBufferPoolImpl : public VideoFrameFactory {

	186 public:

	187 VideoFrameFactoryCVPixelBufferPoolImpl(

	188 const base::ScopedCFTypeRef<CVPixelBufferPoolRef>& pool)

	189 : pool_(pool) {}

	190

	191 ~VideoFrameFactoryCVPixelBufferPoolImpl() override {}

	192

	193 scoped_refptr<VideoFrame> CreateFrame(base::TimeDelta timestamp) override {

	194 base::ScopedCFTypeRef<CVPixelBufferRef> buffer;

	195 CHECK_EQ(kCVReturnSuccess,

	196 CVPixelBufferPoolCreatePixelBuffer(kCFAllocatorDefault, pool_,

	197 buffer.InitializeInto()));

	198 return VideoFrame::WrapCVPixelBuffer(buffer, timestamp);

	199 }

	200

	201 private:

	202 base::ScopedCFTypeRef<CVPixelBufferPoolRef> pool_;

	203

	204 DISALLOW_COPY_AND_ASSIGN(VideoFrameFactoryCVPixelBufferPoolImpl);

	205 };

	206

	207 } // namespace

	208

	209 H264VideoToolboxEncoder::H264VideoToolboxEncoder(

	210 scoped_refptr<CastEnvironment> cast_environment,

	211 const VideoSenderConfig& video_config,

	212 const CastInitializationCallback& initialization_cb)

	213 : cast_environment_(cast_environment),

	214 videotoolbox_glue_(VideoToolboxGlue::Get()),

	215 frame_id_(kStartFrameId),

	216 encode_next_frame_as_keyframe_(false) {

	217 DCHECK(!initialization_cb.is_null());

	218 CastInitializationStatus initialization_status;

	219 if (videotoolbox_glue_) {

	220 initialization_status = (Initialize(video_config))

	221 ? STATUS_VIDEO_INITIALIZED

	222 : STATUS_INVALID_VIDEO_CONFIGURATION;

	223 } else {

	224 LOG(ERROR) << " VideoToolbox is not available";

	225 initialization_status = STATUS_HW_VIDEO_ENCODER_NOT_SUPPORTED;

	226 }

	227 cast_environment_->PostTask(

	228 CastEnvironment::MAIN, FROM_HERE,

	229 base::Bind(initialization_cb, initialization_status));

	230 }

	231

	232 H264VideoToolboxEncoder::~H264VideoToolboxEncoder() {

	233 Teardown();

	234 }

	235

	236 bool H264VideoToolboxEncoder::Initialize(

	237 const VideoSenderConfig& video_config) {

	238 DCHECK(thread_checker_.CalledOnValidThread());

	239 DCHECK(!compression_session_);

	240

	241 // Note that the encoder object is given to the compression session as the

	242 // callback context using a raw pointer. The C API does not allow us to use

	243 // a smart pointer, nor is this encoder ref counted. However, this is still

	244 // safe, because we 1) we own the compression session and 2) we tear it down

	245 // safely. When destructing the encoder, the compression session is flushed

	246 // and invalidated. Internally, VideoToolbox will join all of its threads

	247 // before returning to the client. Therefore, when control returns to us, we

	248 // are guaranteed that the output callback will not execute again.

	249

	250 // On OS X, allow the hardware encoder. Don't require it, it does not support

	251 // all configurations (some of which are used for testing).

	252 base::ScopedCFTypeRef<CFDictionaryRef> encoder_spec;

	253 #if !defined(OS_IOS)

	254 encoder_spec = DictionaryWithKeyValue(

	255 videotoolbox_glue_

	256 ->kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder() ,

	257 kCFBooleanTrue);

	258 #endif

	259

	260 VTCompressionSessionRef session;

	261 OSStatus status = videotoolbox_glue_->VTCompressionSessionCreate(

	262 kCFAllocatorDefault, video_config.width, video_config.height,

	263 CoreMediaGlue::kCMVideoCodecType_H264, encoder_spec,

	264 nullptr /* sourceImageBufferAttributes */,

	265 nullptr /* compressedDataAllocator */,

	266 &H264VideoToolboxEncoder::CompressionCallback,

	267 reinterpret_cast<void*>(this), &session);

	268 if (status != noErr) {

	269 DLOG(ERROR) << " VTCompressionSessionCreate failed: " << status;

	270 return false;

	271 }

	272 compression_session_.reset(session);

	273

	274 ConfigureSession(video_config);

	275

	276 return true;

	277 }

	278

	279 void H264VideoToolboxEncoder::ConfigureSession(

	280 const VideoSenderConfig& video_config) {

	281 SetSessionProperty(

	282 videotoolbox_glue_->kVTCompressionPropertyKey_ProfileLevel(),

	283 videotoolbox_glue_->kVTProfileLevel_H264_Main_AutoLevel());

	284 SetSessionProperty(videotoolbox_glue_->kVTCompressionPropertyKey_RealTime(),

	285 true);

	286 SetSessionProperty(

	287 videotoolbox_glue_->kVTCompressionPropertyKey_AllowFrameReordering(),

	288 false);

	289 SetSessionProperty(

	290 videotoolbox_glue_->kVTCompressionPropertyKey_MaxKeyFrameInterval(), 240);

	291 SetSessionProperty(

	292 videotoolbox_glue_

	293 ->kVTCompressionPropertyKey_MaxKeyFrameIntervalDuration(),

	294 240);

	295 // TODO(jfroy): implement better bitrate control

	296 // https://crbug.com/425352

	297 SetSessionProperty(

	298 videotoolbox_glue_->kVTCompressionPropertyKey_AverageBitRate(),

	299 (video_config.min_bitrate + video_config.max_bitrate) / 2);

	300 SetSessionProperty(

	301 videotoolbox_glue_->kVTCompressionPropertyKey_ExpectedFrameRate(),

	302 video_config.max_frame_rate);

	303 SetSessionProperty(

	304 videotoolbox_glue_->kVTCompressionPropertyKey_ColorPrimaries(),

	305 kCVImageBufferColorPrimaries_ITU_R_709_2);

	306 SetSessionProperty(

	307 videotoolbox_glue_->kVTCompressionPropertyKey_TransferFunction(),

	308 kCVImageBufferTransferFunction_ITU_R_709_2);

	309 SetSessionProperty(

	310 videotoolbox_glue_->kVTCompressionPropertyKey_YCbCrMatrix(),

	311 kCVImageBufferYCbCrMatrix_ITU_R_709_2);

	312 }

	313

	314 void H264VideoToolboxEncoder::Teardown() {

	315 DCHECK(thread_checker_.CalledOnValidThread());

	316

	317 // If the compression session exists, invalidate it. This blocks until all

	318 // pending output callbacks have returned and any internal threads have

	319 // joined, ensuring no output callback ever sees a dangling encoder pointer.

	320 if (compression_session_) {

	321 videotoolbox_glue_->VTCompressionSessionInvalidate(compression_session_);

	322 compression_session_.reset();

	323 }

	324 }

	325

	326 bool H264VideoToolboxEncoder::EncodeVideoFrame(

	327 const scoped_refptr<media::VideoFrame>& video_frame,

	328 const base::TimeTicks& reference_time,

	329 const FrameEncodedCallback& frame_encoded_callback) {

	330 DCHECK(thread_checker_.CalledOnValidThread());

	331 DCHECK(!reference_time.is_null());

	332

	333 if (!compression_session_) {

	334 DLOG(ERROR) << " compression session is null";

	335 return false;

	336 }

	337

	338 // Wrap the VideoFrame in a CVPixelBuffer. In all cases, no data will be

	339 // copied. If the VideoFrame was created by this encoder's video frame

	340 // factory, then the returned CVPixelBuffer will have been obtained from the

	341 // compression session's pixel buffer pool. This will eliminate a copy of the

	342 // frame into memory visible by the hardware encoder. The VideoFrame's

	343 // lifetime is extended for the lifetime of the returned CVPixelBuffer.

	344 auto pixel_buffer = media::WrapVideoFrameInCVPixelBuffer(*video_frame);

	345 if (!pixel_buffer) {

	346 return false;

	347 }

	348

	349 auto timestamp_cm = CoreMediaGlue::CMTimeMake(

	350 (reference_time - base::TimeTicks()).InMicroseconds(), USEC_PER_SEC);

	351

	352 scoped_ptr<InProgressFrameEncode> request(new InProgressFrameEncode(

	353 TimeDeltaToRtpDelta(video_frame->timestamp(), kVideoFrequency),

	354 reference_time, frame_encoded_callback));

	355

	356 base::ScopedCFTypeRef<CFDictionaryRef> frame_props;

	357 if (encode_next_frame_as_keyframe_) {

	358 frame_props = DictionaryWithKeyValue(

	359 videotoolbox_glue_->kVTEncodeFrameOptionKey_ForceKeyFrame(),

	360 kCFBooleanTrue);

	361 encode_next_frame_as_keyframe_ = false;

	362 }

	363

	364 VTEncodeInfoFlags info;

	365 OSStatus status = videotoolbox_glue_->VTCompressionSessionEncodeFrame(

	366 compression_session_, pixel_buffer, timestamp_cm,

	367 CoreMediaGlue::CMTime{0, 0, 0, 0}, frame_props,

	368 reinterpret_cast<void*>(request.release()), &info);

	369 if (status != noErr) {

	370 DLOG(ERROR) << " VTCompressionSessionEncodeFrame failed: " << status;

	371 return false;

	372 }

	373 if ((info & VideoToolboxGlue::kVTEncodeInfo_FrameDropped)) {

	374 DLOG(ERROR) << " frame dropped";

	375 return false;

	376 }

	377

	378 return true;

	379 }

	380

	381 void H264VideoToolboxEncoder::SetBitRate(int new_bit_rate) {

	382 DCHECK(thread_checker_.CalledOnValidThread());

	383 // VideoToolbox does not seem to support bitrate reconfiguration.

	384 }

	385

	386 void H264VideoToolboxEncoder::GenerateKeyFrame() {

	387 DCHECK(thread_checker_.CalledOnValidThread());

	388 DCHECK(compression_session_);

	389

	390 encode_next_frame_as_keyframe_ = true;

	391 }

	392

	393 void H264VideoToolboxEncoder::LatestFrameIdToReference(uint32 /frame_id/) {

	394 // Not supported by VideoToolbox in any meaningful manner.

	395 }

	396

	397 scoped_ptr<VideoFrameFactory>

	398 H264VideoToolboxEncoder::CreateVideoFrameFactory() {

	399 base::ScopedCFTypeRef<CVPixelBufferPoolRef> pool(

	400 videotoolbox_glue_->VTCompressionSessionGetPixelBufferPool(

	401 compression_session_),

	402 base::scoped_policy::RETAIN);

	403 return scoped_ptr<VideoFrameFactory>(

	404 new VideoFrameFactoryCVPixelBufferPoolImpl(pool));

	405 }

	406

	407 bool H264VideoToolboxEncoder::SetSessionProperty(CFStringRef key,

	408 int32_t value) {

	409 base::ScopedCFTypeRef<CFNumberRef> cfvalue(

	410 CFNumberCreate(nullptr, kCFNumberSInt32Type, &value));

	411 return videotoolbox_glue_->VTSessionSetProperty(compression_session_, key,

	412 cfvalue) == noErr;

	413 }

	414

	415 bool H264VideoToolboxEncoder::SetSessionProperty(CFStringRef key, bool value) {

	416 CFBooleanRef cfvalue = (value) ? kCFBooleanTrue : kCFBooleanFalse;

	417 return videotoolbox_glue_->VTSessionSetProperty(compression_session_, key,

	418 cfvalue) == noErr;

	419 }

	420

	421 bool H264VideoToolboxEncoder::SetSessionProperty(CFStringRef key,

	422 CFStringRef value) {

	423 return videotoolbox_glue_->VTSessionSetProperty(compression_session_, key,

	424 value) == noErr;

	425 }

	426

	427 void H264VideoToolboxEncoder::CompressionCallback(void* encoder_opaque,

	428 void* request_opaque,

	429 OSStatus status,

	430 VTEncodeInfoFlags info,

	431 CMSampleBufferRef sbuf) {

	432 if (status != noErr) {

	433 DLOG(ERROR) << " encoding failed: " << status;

	434 return;

	435 }

	436 if ((info & VideoToolboxGlue::kVTEncodeInfo_FrameDropped)) {

	437 DVLOG(2) << " frame dropped";

	438 return;

	439 }

	440

	441 auto encoder = reinterpret_cast<H264VideoToolboxEncoder*>(encoder_opaque);

	442 const scoped_ptr<InProgressFrameEncode> request(

	443 reinterpret_cast<InProgressFrameEncode*>(request_opaque));

	444 auto sample_attachments = static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(

	445 CoreMediaGlue::CMSampleBufferGetSampleAttachmentsArray(sbuf, true), 0));

	446

	447 // If the NotSync key is not present, it implies Sync, which indicates a

	448 // keyframe (at least I think, VT documentation is, erm, sparse). Could

	449 // alternatively use kCMSampleAttachmentKey_DependsOnOthers == false.

	450 bool keyframe =

	451 !CFDictionaryContainsKey(sample_attachments,

	452 CoreMediaGlue::kCMSampleAttachmentKey_NotSync());

	453

	454 // Increment the encoder-scoped frame id and assign the new value to this

	455 // frame. VideoToolbox calls the output callback serially, so this is safe.

	456 uint32 frame_id = ++encoder->frame_id_;

	457

	458 scoped_ptr<EncodedFrame> encoded_frame(new EncodedFrame());

	459 encoded_frame->frame_id = frame_id;

	460 encoded_frame->reference_time = request->reference_time;

	461 encoded_frame->rtp_timestamp = request->rtp_timestamp;

	462 if (keyframe) {

	463 encoded_frame->dependency = EncodedFrame::KEY;

	464 encoded_frame->referenced_frame_id = frame_id;

	465 } else {

	466 encoded_frame->dependency = EncodedFrame::DEPENDENT;

	467 // H.264 supports complex frame reference schemes (multiple reference

	468 // frames, slice references, backward and forward references, etc). Cast

	469 // doesn't support the concept of forward-referencing frame dependencies or

	470 // multiple frame dependencies; so pretend that all frames are only

	471 // decodable after their immediately preceding frame is decoded. This will

	472 // ensure a Cast receiver only attempts to decode the frames sequentially

	473 // and in order. Furthermore, the encoder is configured to never use forward

	474 // references (see \|kVTCompressionPropertyKey_AllowFrameReordering\|). There

	475 // is no way to prevent multiple reference frames.

	476 encoded_frame->referenced_frame_id = frame_id - 1;

	477 }

	478

	479 CopySampleBufferToAnnexBBuffer(sbuf, &encoded_frame->data, keyframe);

	480

	481 encoder->cast_environment_->PostTask(

	482 CastEnvironment::MAIN, FROM_HERE,

	483 base::Bind(request->frame_encoded_callback,

	484 base::Passed(&encoded_frame)));

	485 }

	486

	487 } // namespace cast

	488 } // namespace media

OLD	NEW

« no previous file with comments | « media/cast/sender/h264_vt_encoder.h ('k') | media/cast/sender/h264_vt_encoder_unittest.cc » ('j') | no next file with comments »