Chromium Code Reviews| Index: content/common/gpu/media/v4l2_slice_video_decode_accelerator.h |
| diff --git a/content/common/gpu/media/v4l2_slice_video_decode_accelerator.h b/content/common/gpu/media/v4l2_slice_video_decode_accelerator.h |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..d9333cdd4a02d17f1ec8af2f9c104e64d38b59d0 |
| --- /dev/null |
| +++ b/content/common/gpu/media/v4l2_slice_video_decode_accelerator.h |
| @@ -0,0 +1,509 @@ |
| +// Copyright 2015 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#ifndef CONTENT_COMMON_GPU_MEDIA_V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_ |
| +#define CONTENT_COMMON_GPU_MEDIA_V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_ |
| + |
| +#include <linux/videodev2.h> |
| +#include <queue> |
| +#include <vector> |
| + |
| +#include "base/memory/linked_ptr.h" |
| +#include "base/memory/ref_counted.h" |
| +#include "base/memory/scoped_ptr.h" |
| +#include "base/memory/weak_ptr.h" |
| +#include "base/synchronization/waitable_event.h" |
| +#include "base/threading/thread.h" |
| +#include "content/common/content_export.h" |
| +#include "content/common/gpu/media/h264_decoder.h" |
| +#include "content/common/gpu/media/v4l2_video_device.h" |
| +#include "content/common/gpu/media/vp8_decoder.h" |
| +#include "media/video/video_decode_accelerator.h" |
| + |
| +namespace content { |
| + |
| +// An implementation of VideoDecodeAccelerator that utilizes the V4L2 slice |
| +// level codec API for decoding. The slice level API provides only a low-level |
| +// decoding functionality and requires userspace to provide support for parsing |
| +// the input stream and managing decoder state across frames. |
| +class CONTENT_EXPORT V4L2SliceVideoDecodeAccelerator |
| + : public media::VideoDecodeAccelerator { |
| + public: |
| + class V4L2DecodeSurface : public base::RefCounted<V4L2DecodeSurface> { |
| + public: |
| + using ReleaseCB = base::Callback<void(int)>; |
| + |
| + V4L2DecodeSurface(int32 bitstream_id, |
| + int input_record, |
| + int output_record, |
| + const ReleaseCB& release_cb); |
| + virtual ~V4L2DecodeSurface(); |
| + |
| + // Mark the surface as decoded. This will also release all references, as |
| + // they are not needed anymore. |
| + void SetDecoded(); |
| + bool decoded() const { return decoded_; } |
| + |
| + int32 bitstream_id() const { return bitstream_id_; } |
| + int input_record() const { return input_record_; } |
| + int output_record() const { return output_record_; } |
| + uint32_t config_store() const { return config_store_; } |
| + |
| + // Take references to each reference surface and keep them until the |
| + // target surface is decoded. |
| + void SetReferenceSurfaces( |
| + const std::vector<scoped_refptr<V4L2DecodeSurface>>& ref_surfaces); |
| + |
| + std::string ToString() const; |
|
Owen Lin
2015/01/09 09:56:14
const std::string&
Pawel Osciak
2015/01/12 07:18:19
I'm not sure we want a reference, the string is lo
|
| + |
| + private: |
| + int32 bitstream_id_; |
| + int input_record_; |
| + int output_record_; |
| + uint32_t config_store_; |
| + |
| + bool decoded_; |
| + ReleaseCB release_cb_; |
| + |
| + std::vector<scoped_refptr<V4L2DecodeSurface>> reference_surfaces_; |
| + |
| + DISALLOW_COPY_AND_ASSIGN(V4L2DecodeSurface); |
| + }; |
| + |
| + V4L2SliceVideoDecodeAccelerator( |
| + const scoped_refptr<V4L2Device>& device, |
| + EGLDisplay egl_display, |
| + EGLContext egl_context, |
| + const base::WeakPtr<Client>& io_client_, |
| + const base::Callback<bool(void)>& make_context_current, |
| + const scoped_refptr<base::MessageLoopProxy>& io_message_loop_proxy); |
| + virtual ~V4L2SliceVideoDecodeAccelerator(); |
| + |
| + // media::VideoDecodeAccelerator implementation. |
| + virtual bool Initialize(media::VideoCodecProfile profile, |
| + VideoDecodeAccelerator::Client* client) override; |
| + virtual void Decode(const media::BitstreamBuffer& bitstream_buffer) override; |
| + virtual void AssignPictureBuffers( |
| + const std::vector<media::PictureBuffer>& buffers) override; |
| + virtual void ReusePictureBuffer(int32 picture_buffer_id) override; |
| + virtual void Flush() override; |
| + virtual void Reset() override; |
| + virtual void Destroy() override; |
| + virtual bool CanDecodeOnIOThread() override; |
| + |
| + bool SubmitSlice(int index, const uint8_t* data, size_t size); |
|
Owen Lin
2015/01/09 09:56:14
I am confused, which one feed the input to this cl
Pawel Osciak
2015/01/12 07:18:20
Added docs.
|
| + bool SubmitExtControls(struct v4l2_ext_controls* ext_ctrls); |
| + |
| + private: |
| + class V4L2H264Accelerator : public H264Decoder::H264Accelerator { |
| + public: |
| + V4L2H264Accelerator(V4L2SliceVideoDecodeAccelerator* v4l2_dec); |
| + virtual ~V4L2H264Accelerator() {} |
| + |
| + // H264Decoder::H264Accelerator implementation. |
| + scoped_refptr<H264Picture> CreateH264Picture() override; |
| + |
| + bool SubmitFrameMetadata(const media::H264SPS* sps, |
| + const media::H264PPS* pps, |
| + const H264DPB& dpb, |
| + const H264Picture::Vector& ref_pic_listp0, |
| + const H264Picture::Vector& ref_pic_listb0, |
| + const H264Picture::Vector& ref_pic_listb1, |
| + const scoped_refptr<H264Picture>& pic) override; |
| + |
| + bool SubmitSlice(const media::H264PPS* pps, |
| + const media::H264SliceHeader* slice_hdr, |
| + const H264Picture::Vector& ref_pic_list0, |
| + const H264Picture::Vector& ref_pic_list1, |
| + const scoped_refptr<H264Picture>& pic, |
| + const uint8_t* data, |
| + size_t size) override; |
| + |
| + bool SubmitDecode(const scoped_refptr<H264Picture>& pic) override; |
| + bool OutputPicture(const scoped_refptr<H264Picture>& pic) override; |
| + |
| + private: |
| + void H264PictureListToDPBIndicesList( |
| + const H264Picture::Vector& src_pic_list, |
| + uint8_t dst_list[32]); |
| + |
| + void H264DPBToV4L2DPB( |
| + const H264DPB& dpb, |
| + std::vector<scoped_refptr<V4L2DecodeSurface>>* ref_surfaces); |
| + |
| + scoped_refptr<V4L2DecodeSurface> H264PictureToV4L2DecodeSurface( |
| + const scoped_refptr<H264Picture>& pic); |
| + |
| + size_t num_slices_; |
| + V4L2SliceVideoDecodeAccelerator* v4l2_dec_; |
| + |
| + // TODO(posciak): This should be queried from hardware once supported. |
| + static const size_t kMaxSlices = 16; |
| + struct v4l2_ctrl_h264_slice_param v4l2_slice_params_[kMaxSlices]; |
| + struct v4l2_ctrl_h264_decode_param v4l2_decode_param_; |
| + |
| + DISALLOW_COPY_AND_ASSIGN(V4L2H264Accelerator); |
| + }; |
| + |
| + class V4L2VP8Accelerator : public VP8Decoder::VP8Accelerator { |
| + public: |
| + V4L2VP8Accelerator(V4L2SliceVideoDecodeAccelerator* v4l2_dec); |
| + virtual ~V4L2VP8Accelerator() {} |
| + |
| + // H264Decoder::VP8Accelerator implementation. |
| + scoped_refptr<VP8Picture> CreateVP8Picture() override; |
| + |
| + bool SubmitDecode(const scoped_refptr<VP8Picture>& pic, |
| + const media::VP8FrameHeader* frame_hdr, |
| + const scoped_refptr<VP8Picture>& last_frame, |
| + const scoped_refptr<VP8Picture>& golden_frame, |
| + const scoped_refptr<VP8Picture>& alt_frame) override; |
| + |
| + bool OutputPicture(const scoped_refptr<VP8Picture>& pic) override; |
| + |
| + private: |
| + scoped_refptr<V4L2DecodeSurface> VP8PictureToV4L2DecodeSurface( |
| + const scoped_refptr<VP8Picture>& pic); |
| + |
| + V4L2SliceVideoDecodeAccelerator* v4l2_dec_; |
| + |
| + DISALLOW_COPY_AND_ASSIGN(V4L2VP8Accelerator); |
| + }; |
| + |
| + // Record for input buffers. |
| + struct InputRecord { |
| + InputRecord(); |
| + int32 input_id; |
| + void* address; |
| + size_t length; |
| + size_t bytes_used; |
| + bool at_device; |
| + }; |
| + |
| + // Record for output buffers. |
| + struct OutputRecord { |
| + OutputRecord(); |
| + bool at_device; |
| + bool at_client; |
| + int32 picture_id; |
| + EGLImageKHR egl_image; |
| + EGLSyncKHR egl_sync; |
| + bool cleared; |
| + }; |
| + |
| + enum { |
| + // See http://crbug.com/255116. |
| + // Input bitstream buffer size for up to 1080p streams. |
| + kInputBufferMaxSizeFor1080p = 1024 * 1024, |
|
kcwu
2015/01/09 15:36:46
const size_t, not enum.
Pawel Osciak
2015/01/12 07:18:19
Done.
|
| + // Input bitstream buffer size for up to 4k streams. |
| + kInputBufferMaxSizeFor4k = 4 * kInputBufferMaxSizeFor1080p, |
| + }; |
| + |
| + // Recycle V4L2 output buffer with |index|. Used as surface release callback. |
| + void ReuseOutputBuffer(int index); |
| + |
| + // Queue a |dec_surface| to device for decoding. |
| + void Enqueue(const scoped_refptr<V4L2DecodeSurface>& dec_surface); |
| + |
| + // Dequeue any V4L2 buffers available and process. |
| + void Dequeue(); |
| + |
| + // V4L2 QBUF helpers. |
| + bool EnqueueInputRecord(int index, uint32_t config_store); |
| + bool EnqueueOutputRecord(int index); |
| + |
| + // Set input and output formats in hardware. |
| + bool SetupFormats(); |
| + |
| + // Create input and output buffers. |
| + bool CreateInputBuffers(); |
| + bool CreateOutputBuffers(); |
| + |
| + // Destroy input buffers. |
| + void DestroyInputBuffers(); |
| + |
| + // Destroy output buffers and release associated resources (textures, |
| + // EGLImages). If |dismiss| is true, also dismissing the associated |
| + // PictureBuffers. |
| + void DestroyOutputs(bool dismiss); |
| + |
| + // Used by DestroyOutputs. |
| + void DestroyOutputBuffers(); |
| + |
| + // Dismiss all |picture_buffer_ids| via Client::DismissPictureBuffer() |
| + // and signal |done| after finishing. |
| + void DismissPictures(std::vector<int32> picture_buffer_ids, |
| + base::WaitableEvent* done); |
| + |
| + // Task to finish initialization on decoder_thread_. |
| + void InitializeTask(); |
| + |
| + // Surface set change (resolution change) flow. |
| + // If we have no surfaces allocated, just allocate them and return. |
| + // Otherwise mark us as pending for surface set change. |
| + void InitiateSurfaceSetChange(); |
| + // If a surface set change is pending and we are ready, stop the device, |
| + // destroy outputs, releasing resources and dismissing pictures as required, |
| + // followed by allocating a new set for the new resolution/DPB size |
| + // as provided by decoder. Finally, try to resume decoding. |
| + void FinishSurfaceSetChangeIfNeeded(); |
| + |
| + void NotifyError(Error error); |
| + void DestroyTask(); |
| + |
| + // Flush flow when requested by client. |
| + // When Flush() is called, it posts a FlushTask, which checks the input queue. |
| + // If nothing is pending for decode on decoder_input_queue_, we call |
| + // InitiateFlush() directly. Otherwise, we push a dummy BitstreamBufferRef |
| + // onto the decoder_input_queue_ to schedule a flush. When we reach it later |
| + // on, we call InitiateFlush() to perform it at the correct time. |
| + void FlushTask(); |
| + // Tell the decoder to flush all frames, reset it and mark us as scheduled |
| + // for flush, so that we can finish it once all pending decodes are finished. |
| + void InitiateFlush(); |
| + // If all pending frames are decoded and we are waiting to flush, perform it. |
| + // This will send all pending pictures to client and notify the client that |
| + // flush is complete and puts us in a state ready to resume. |
| + void FinishFlushIfNeeded(); |
| + |
| + // Reset flow when requested by client. |
| + // Drop all inputs and reset the decoder and mark us as pending for reset. |
| + void ResetTask(); |
| + // If all pending frames are decoded and we are waiting to reset, perform it. |
| + // This drops all pending outputs (client is not interested anymore), |
| + // notifies the client we are done and puts us in a state ready to resume. |
| + void FinishResetIfNeeded(); |
| + |
| + // Performed on decoder_thread_ as a consequence of poll() on decoder_thread_ |
| + // returning an event. |
| + void ServiceDeviceTask(); |
| + |
| + // Attempt to start/stop device_poll_thread_. |
| + bool StartDevicePoll(); |
| + bool StopDevicePoll(bool keep_input_state); |
| + |
| + // Ran on device_poll_thread_ to wait for device events. |
| + void DevicePollTask(bool poll_device); |
| + |
| + enum State { |
| + // We are in this state until InitializeTask() finishes successfully. |
| + kUninitialized, |
| + // This state allows making progress decoding more input stream. |
| + kDecoding, |
| + // Transitional state when we are not decoding any more stream, but are |
| + // performing flush, reset, resolution change or are destroying ourselves. |
| + kIdle, |
| + // Error state, set when sending NotifyError to client. |
| + kError, |
| + }; |
| + // Sets state on decoder thread if running. |
| + void SetDecoderState(State state); |
| + |
| + enum BufferId { |
| + kFlushBufferId = -2 // Buffer id for flush buffer, queued by FlushTask(). |
|
kcwu
2015/01/09 15:36:46
Give -1 a name as well. Otherwise -2 looks weird.
Pawel Osciak
2015/01/12 07:18:19
Done.
|
| + }; |
| + |
| + void DecodeTask(const media::BitstreamBuffer& bitstream_buffer); |
| + void DecodeBufferTask(); |
| + void ScheduleDecodeBufferTaskIfNeeded(); |
| + bool TrySetNewBistreamBuffer(); |
| + |
| + // Auto-destruction reference for EGLSync (for message-passing). |
| + struct EGLSyncKHRRef; |
| + void ReusePictureBufferTask(int32 picture_buffer_id, |
| + scoped_ptr<EGLSyncKHRRef> egl_sync_ref); |
| + |
| + // Called by accelerator implementations: |
| + // Decode of |dec_surface| is ready to be submitted and all codec-specific |
| + // settings are set in hardware. |
| + void DecodeSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface); |
| + |
| + // |dec_surface| is ready to be outputted once decode is finished. |
| + // This can be called before decode is actually done in hardware, and this |
| + // method is responsible to maintain the order, i.e. the surfaces will |
| + // be outputted in the same order as SurfaceReady calls. To do so, the |
| + // surfaces are put on decoder_display_queue_ and sent to output in that |
| + // order once all preceding surfaces are sent. |
| + void SurfaceReady(const scoped_refptr<V4L2DecodeSurface>& dec_surface); |
| + |
| + // Called to actually send |dec_surface| to the client, after it is decoded |
| + // preserving the order in which it was scheduled via SurfaceReady(). |
| + void OutputSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface); |
| + |
| + // Goes over the |decoder_display_queue_| and sends all buffers from the |
| + // front of the queue that are already decoded to the client, in order. |
| + void TryOutputSurfaces(); |
| + |
| + // Creates a new decode surface or returns nullptr if one is not available. |
| + scoped_refptr<V4L2DecodeSurface> CreateSurface(); |
| + |
| + // Send decoded pictures to PictureReady. |
| + void SendPictureReady(); |
| + |
| + // Callback that indicates a picture has been cleared. |
| + void PictureCleared(); |
| + |
| + size_t input_planes_count_; |
| + size_t output_planes_count_; |
| + |
| + // GPU Child thread message loop. |
| + const scoped_refptr<base::MessageLoopProxy> child_message_loop_proxy_; |
| + |
| + // IO thread message loop. |
| + scoped_refptr<base::MessageLoopProxy> io_message_loop_proxy_; |
| + |
| + // WeakPtr<> pointing to |this| for use in posting tasks from the decoder or |
| + // device worker threads back to the child thread. |
| + base::WeakPtr<V4L2SliceVideoDecodeAccelerator> weak_this_; |
| + |
| + // To expose client callbacks from VideoDecodeAccelerator. |
| + // NOTE: all calls to these objects *MUST* be executed on |
| + // child_message_loop_proxy_. |
| + scoped_ptr<base::WeakPtrFactory<VideoDecodeAccelerator::Client>> |
| + client_ptr_factory_; |
| + base::WeakPtr<VideoDecodeAccelerator::Client> client_; |
| + // Callbacks to |io_client_| must be executed on |io_message_loop_proxy_|. |
| + base::WeakPtr<Client> io_client_; |
| + |
| + // V4L2 device in use. |
| + scoped_refptr<V4L2Device> device_; |
| + |
| + // Thread to communicate with the device on. |
| + base::Thread decoder_thread_; |
| + scoped_refptr<base::MessageLoopProxy> decoder_thread_proxy_; |
| + |
| + // Thread used to poll the device for events. |
| + base::Thread device_poll_thread_; |
| + |
| + // Input queue state. |
| + bool input_streamon_; |
| + // Number of input buffers enqueued to the device. |
| + int input_buffer_queued_count_; |
| + // Input buffers ready to use; LIFO since we don't care about ordering. |
| + std::list<int> free_input_buffers_; |
| + // Mapping of int index to an input buffer record. |
| + std::vector<InputRecord> input_buffer_map_; |
| + |
| + // Output queue state. |
| + bool output_streamon_; |
| + // Number of output buffers enqueued to the device. |
| + int output_buffer_queued_count_; |
| + // Output buffers ready to use. |
| + std::list<int> free_output_buffers_; |
| + // Mapping of int index to an output buffer record. |
| + std::vector<OutputRecord> output_buffer_map_; |
| + |
| + media::VideoCodecProfile video_profile_; |
| + uint32_t output_format_fourcc_; |
| + gfx::Size frame_buffer_size_; |
| + size_t output_dpb_size_; |
| + |
| + struct BitstreamBufferRef; |
| + // Input queue of stream buffers coming from the client. |
| + std::queue<linked_ptr<BitstreamBufferRef>> decoder_input_queue_; |
| + // BitstreamBuffer currently being processed. |
| + scoped_ptr<BitstreamBufferRef> decoder_current_bitstream_buffer_; |
| + |
| + // Queue storing decode surfaces ready to be output as soon as they are |
| + // decoded. The surfaces must be output in order they are queued. |
| + std::queue<scoped_refptr<V4L2DecodeSurface>> decoder_display_queue_; |
| + |
| + // Decoder state. |
| + State state_; |
| + |
| + // If any of these are true, we are waiting for the device to finish decoding |
| + // all previously-queued frames, so we can finish the flush/reset/surface |
| + // change flows. These can stack. |
| + bool decoder_flushing_; |
| + bool decoder_resetting_; |
| + bool surface_set_change_pending_; |
| + |
| + // Hardware accelerators. |
| + // TODO(posciak): Try to have a superclass here if possible. |
| + scoped_ptr<V4L2H264Accelerator> h264_accelerator_; |
| + scoped_ptr<V4L2VP8Accelerator> vp8_accelerator_; |
| + |
| + // Codec-specific software decoder in use. |
| + scoped_ptr<AcceleratedVideoDecoder> decoder_; |
| + |
| + // Surfaces queued to device to keep references to them while decoded. |
| + using V4L2DecodeSurfaceByOutputId = |
| + std::map<int, scoped_refptr<V4L2DecodeSurface>>; |
| + V4L2DecodeSurfaceByOutputId surfaces_at_device_; |
| + |
| + // Surfaces sent to client to keep references to them while displayed. |
| + using V4L2DecodeSurfaceByPictureBufferId = |
| + std::map<int32, scoped_refptr<V4L2DecodeSurface>>; |
| + V4L2DecodeSurfaceByPictureBufferId surfaces_at_display_; |
| + |
| + // Record for decoded pictures that can be sent to PictureReady. |
| + struct PictureRecord; |
| + // Pictures that are ready but not sent to PictureReady yet. |
| + std::queue<PictureRecord> pending_picture_ready_; |
| + |
| + // The number of pictures that are sent to PictureReady and will be cleared. |
| + int picture_clearing_count_; |
| + |
| + // Used by the decoder thread to wait for AssignPictureBuffers to arrive |
| + // to avoid races with potential Reset requests. |
| + base::WaitableEvent pictures_assigned_; |
| + |
| + // Make the GL context current callback. |
| + base::Callback<bool(void)> make_context_current_; |
| + |
| + // EGL state |
| + EGLDisplay egl_display_; |
| + EGLContext egl_context_; |
| + |
| + // The WeakPtrFactory for |weak_this_|. |
| + base::WeakPtrFactory<V4L2SliceVideoDecodeAccelerator> weak_this_factory_; |
| + |
| + DISALLOW_COPY_AND_ASSIGN(V4L2SliceVideoDecodeAccelerator); |
| +}; |
| + |
| +// Codec-specific subclasses of software decoder picture classes. |
| +// This allows us to keep decoders oblivious of our implementation details. |
| +class V4L2H264Picture : public H264Picture { |
| + public: |
| + V4L2H264Picture(const scoped_refptr< |
| + V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface>& dec_surface) |
| + : dec_surface_(dec_surface) {} |
| + virtual ~V4L2H264Picture() {} |
| + |
| + V4L2H264Picture* AsV4L2H264Picture() override { return this; } |
|
Owen Lin
2015/01/09 09:56:14
Why we need this ? What it overrides, there is no
Pawel Osciak
2015/01/12 07:18:19
There is one, sorry didn't upload the file for pre
|
| + scoped_refptr<V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface> |
| + dec_surface() { |
| + return dec_surface_; |
| + } |
| + |
| + private: |
| + scoped_refptr<V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface> |
| + dec_surface_; |
| + |
| + DISALLOW_COPY_AND_ASSIGN(V4L2H264Picture); |
| +}; |
| + |
| +class V4L2VP8Picture : public VP8Picture { |
| + public: |
| + V4L2VP8Picture(const scoped_refptr< |
| + V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface>& dec_surface) |
| + : dec_surface_(dec_surface) {} |
| + virtual ~V4L2VP8Picture() {} |
| + |
| + V4L2VP8Picture* AsV4L2VP8Picture() override { return this; } |
| + scoped_refptr<V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface> |
| + dec_surface() { |
| + return dec_surface_; |
| + } |
| + |
| + private: |
| + scoped_refptr<V4L2SliceVideoDecodeAccelerator::V4L2DecodeSurface> |
| + dec_surface_; |
| + |
| + DISALLOW_COPY_AND_ASSIGN(V4L2VP8Picture); |
| +}; |
| + |
| +} // namespace content |
| + |
| +#endif // CONTENT_COMMON_GPU_MEDIA_V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_ |