Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(336)

Unified Diff: content/common/gpu/media/v4l2_slice_video_decode_accelerator.h

Issue 813693006: Add accelerated video decoder interface, VP8 and H.264 implementations and hook up to V4L2SVDA (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: content/common/gpu/media/v4l2_slice_video_decode_accelerator.h
diff --git a/content/common/gpu/media/v4l2_slice_video_decode_accelerator.h b/content/common/gpu/media/v4l2_slice_video_decode_accelerator.h
new file mode 100644
index 0000000000000000000000000000000000000000..52ee106e1aad5d4f5286ed0804d44c10e9ec1ccc
--- /dev/null
+++ b/content/common/gpu/media/v4l2_slice_video_decode_accelerator.h
@@ -0,0 +1,391 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CONTENT_COMMON_GPU_MEDIA_V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
+#define CONTENT_COMMON_GPU_MEDIA_V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
+
+#include <linux/videodev2.h>
+#include <queue>
+#include <vector>
+
+#include "base/memory/linked_ptr.h"
+#include "base/memory/ref_counted.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/memory/weak_ptr.h"
+#include "base/synchronization/waitable_event.h"
+#include "base/threading/thread.h"
+#include "content/common/content_export.h"
+#include "content/common/gpu/media/h264_decoder.h"
+#include "content/common/gpu/media/v4l2_video_device.h"
+#include "content/common/gpu/media/vp8_decoder.h"
+#include "media/video/video_decode_accelerator.h"
+
+namespace content {
+
+// An implementation of VideoDecodeAccelerator that utilizes the V4L2 slice
+// level codec API for decoding. The slice level API provides only a low-level
+// decoding functionality and requires userspace to provide support for parsing
+// the input stream and managing decoder state across frames.
+class CONTENT_EXPORT V4L2SliceVideoDecodeAccelerator
+ : public media::VideoDecodeAccelerator {
+ public:
+ class V4L2DecodeSurface;
+
+ V4L2SliceVideoDecodeAccelerator(
+ const scoped_refptr<V4L2Device>& device,
+ EGLDisplay egl_display,
+ EGLContext egl_context,
+ const base::WeakPtr<Client>& io_client_,
+ const base::Callback<bool(void)>& make_context_current,
+ const scoped_refptr<base::MessageLoopProxy>& io_message_loop_proxy);
+ virtual ~V4L2SliceVideoDecodeAccelerator() override;
+
+ // media::VideoDecodeAccelerator implementation.
+ bool Initialize(media::VideoCodecProfile profile,
+ VideoDecodeAccelerator::Client* client) override;
+ void Decode(const media::BitstreamBuffer& bitstream_buffer) override;
+ void AssignPictureBuffers(
+ const std::vector<media::PictureBuffer>& buffers) override;
+ void ReusePictureBuffer(int32 picture_buffer_id) override;
+ void Flush() override;
+ void Reset() override;
+ void Destroy() override;
+ bool CanDecodeOnIOThread() override;
+
+ private:
+ class V4L2H264Accelerator;
+ class V4L2VP8Accelerator;
+
+ // Record for input buffers.
+ struct InputRecord {
+ InputRecord();
+ int32 input_id;
+ void* address;
+ size_t length;
+ size_t bytes_used;
+ bool at_device;
+ };
+
+ // Record for output buffers.
+ struct OutputRecord {
+ OutputRecord();
+ bool at_device;
+ bool at_client;
+ int32 picture_id;
+ EGLImageKHR egl_image;
+ EGLSyncKHR egl_sync;
+ bool cleared;
+ };
+
+ // See http://crbug.com/255116.
+ // Input bitstream buffer size for up to 1080p streams.
+ const size_t kInputBufferMaxSizeFor1080p = 1024 * 1024;
+ // Input bitstream buffer size for up to 4k streams.
+ const size_t kInputBufferMaxSizeFor4k = 4 * kInputBufferMaxSizeFor1080p;
+ const size_t kNumInputBuffers = 16;
+
+ //
+ // Below methods are used by accelerator implementations.
+ //
+ // Append slice data in |data| of size |size| to pending hardware
+ // input buffer with |index|. This buffer will be submitted for decode
+ // on the next DecodeSurface(). Return true on success.
+ bool SubmitSlice(int index, const uint8_t* data, size_t size);
+
+ // Submit controls in |ext_ctrls| to hardware. Return true on success.
+ bool SubmitExtControls(struct v4l2_ext_controls* ext_ctrls);
+
+ // Decode of |dec_surface| is ready to be submitted and all codec-specific
+ // settings are set in hardware.
+ void DecodeSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
+
+ // |dec_surface| is ready to be outputted once decode is finished.
+ // This can be called before decode is actually done in hardware, and this
+ // method is responsible for maintaining the ordering, i.e. the surfaces will
+ // be outputted in the same order as SurfaceReady calls. To do so, the
+ // surfaces are put on decoder_display_queue_ and sent to output in that
+ // order once all preceding surfaces are sent.
+ void SurfaceReady(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
+
+ //
+ // Internal methods of this class.
+ //
+ // Recycle V4L2 output buffer with |index|. Used as surface release callback.
+ void ReuseOutputBuffer(int index);
+
+ // Queue a |dec_surface| to device for decoding.
+ void Enqueue(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
+
+ // Dequeue any V4L2 buffers available and process.
+ void Dequeue();
+
+ // V4L2 QBUF helpers.
+ bool EnqueueInputRecord(int index, uint32_t config_store);
+ bool EnqueueOutputRecord(int index);
+
+ // Set input and output formats in hardware.
+ bool SetupFormats();
+
+ // Create input and output buffers.
+ bool CreateInputBuffers();
+ bool CreateOutputBuffers();
+
+ // Destroy input buffers.
+ void DestroyInputBuffers();
+
+ // Destroy output buffers and release associated resources (textures,
+ // EGLImages). If |dismiss| is true, also dismissing the associated
+ // PictureBuffers.
+ bool DestroyOutputs(bool dismiss);
+
+ // Used by DestroyOutputs.
+ bool DestroyOutputBuffers();
+
+ // Dismiss all |picture_buffer_ids| via Client::DismissPictureBuffer()
+ // and signal |done| after finishing.
+ void DismissPictures(std::vector<int32> picture_buffer_ids,
+ base::WaitableEvent* done);
+
+ // Task to finish initialization on decoder_thread_.
+ void InitializeTask();
+
+ // Surface set change (resolution change) flow.
+ // If we have no surfaces allocated, just allocate them and return.
+ // Otherwise mark us as pending for surface set change.
+ void InitiateSurfaceSetChange();
+ // If a surface set change is pending and we are ready, stop the device,
+ // destroy outputs, releasing resources and dismissing pictures as required,
+ // followed by allocating a new set for the new resolution/DPB size
+ // as provided by decoder. Finally, try to resume decoding.
+ void FinishSurfaceSetChangeIfNeeded();
+
+ void NotifyError(Error error);
+ void DestroyTask();
+
+ // Sets the state to kError and notifies client if needed.
+ void SetErrorState(Error error);
+
+ // Flush flow when requested by client.
+ // When Flush() is called, it posts a FlushTask, which checks the input queue.
+ // If nothing is pending for decode on decoder_input_queue_, we call
+ // InitiateFlush() directly. Otherwise, we push a dummy BitstreamBufferRef
+ // onto the decoder_input_queue_ to schedule a flush. When we reach it later
+ // on, we call InitiateFlush() to perform it at the correct time.
+ void FlushTask();
+ // Tell the decoder to flush all frames, reset it and mark us as scheduled
+ // for flush, so that we can finish it once all pending decodes are finished.
+ void InitiateFlush();
+ // If all pending frames are decoded and we are waiting to flush, perform it.
+ // This will send all pending pictures to client and notify the client that
+ // flush is complete and puts us in a state ready to resume.
+ void FinishFlushIfNeeded();
+
+ // Reset flow when requested by client.
+ // Drop all inputs and reset the decoder and mark us as pending for reset.
+ void ResetTask();
+ // If all pending frames are decoded and we are waiting to reset, perform it.
+ // This drops all pending outputs (client is not interested anymore),
+ // notifies the client we are done and puts us in a state ready to resume.
+ void FinishResetIfNeeded();
+
+ // Process pending events if any.
+ void ProcessPendingEventsIfNeeded();
+
+ // Performed on decoder_thread_ as a consequence of poll() on decoder_thread_
+ // returning an event.
+ void ServiceDeviceTask();
+
+ // Schedule poll if we have any buffers queued and the poll thread
+ // is not stopped (on surface set change).
+ void SchedulePollIfNeeded();
+
+ // Attempt to start/stop device_poll_thread_.
+ bool StartDevicePoll();
+ bool StopDevicePoll(bool keep_input_state);
+
+ // Ran on device_poll_thread_ to wait for device events.
+ void DevicePollTask(bool poll_device);
+
+ enum State {
+ // We are in this state until Initialize() returns successfully.
+ // We can't post errors to the client in this state yet.
+ kUninitialized,
+ // Initialize() returned successfully.
+ kInitialized,
+ // This state allows making progress decoding more input stream.
+ kDecoding,
+ // Transitional state when we are not decoding any more stream, but are
+ // performing flush, reset, resolution change or are destroying ourselves.
+ kIdle,
+ // Error state, set when sending NotifyError to client.
+ kError,
+ };
+
+ // Buffer id for flush buffer, queued by FlushTask().
+ const int kFlushBufferId = -2;
+
+ // Handler for Decode() on decoder_thread_.
+ void DecodeTask(const media::BitstreamBuffer& bitstream_buffer);
+
+ // Schedule a new DecodeBufferTask if we are decoding.
+ void ScheduleDecodeBufferTaskIfNeeded();
+
+ // Main decoder loop. Keep decoding the current buffer in decoder_, asking
+ // for more stream via TrySetNewBistreamBuffer() if decoder_ requests so,
+ // and handle other returns from it appropriately.
+ void DecodeBufferTask();
+
+ // Check decoder_input_queue_ for any available buffers to decode and
+ // set the decoder_current_bitstream_buffer_ to the next buffer if one is
+ // available, taking it off the queue. Also set the current stream pointer
+ // in decoder_, and return true.
+ // Return false if no buffers are pending on decoder_input_queue_.
+ bool TrySetNewBistreamBuffer();
+
+ // Auto-destruction reference for EGLSync (for message-passing).
+ struct EGLSyncKHRRef;
+ void ReusePictureBufferTask(int32 picture_buffer_id,
+ scoped_ptr<EGLSyncKHRRef> egl_sync_ref);
+
+ // Called to actually send |dec_surface| to the client, after it is decoded
+ // preserving the order in which it was scheduled via SurfaceReady().
+ void OutputSurface(const scoped_refptr<V4L2DecodeSurface>& dec_surface);
+
+ // Goes over the |decoder_display_queue_| and sends all buffers from the
+ // front of the queue that are already decoded to the client, in order.
+ void TryOutputSurfaces();
+
+ // Creates a new decode surface or returns nullptr if one is not available.
+ scoped_refptr<V4L2DecodeSurface> CreateSurface();
+
+ // Send decoded pictures to PictureReady.
+ void SendPictureReady();
+
+ // Callback that indicates a picture has been cleared.
+ void PictureCleared();
+
+ size_t input_planes_count_;
+ size_t output_planes_count_;
+
+ // GPU Child thread message loop.
+ const scoped_refptr<base::MessageLoopProxy> child_message_loop_proxy_;
+
+ // IO thread message loop.
+ scoped_refptr<base::MessageLoopProxy> io_message_loop_proxy_;
+
+ // WeakPtr<> pointing to |this| for use in posting tasks from the decoder or
+ // device worker threads back to the child thread.
+ base::WeakPtr<V4L2SliceVideoDecodeAccelerator> weak_this_;
+
+ // To expose client callbacks from VideoDecodeAccelerator.
+ // NOTE: all calls to these objects *MUST* be executed on
+ // child_message_loop_proxy_.
+ scoped_ptr<base::WeakPtrFactory<VideoDecodeAccelerator::Client>>
+ client_ptr_factory_;
+ base::WeakPtr<VideoDecodeAccelerator::Client> client_;
+ // Callbacks to |io_client_| must be executed on |io_message_loop_proxy_|.
+ base::WeakPtr<Client> io_client_;
+
+ // V4L2 device in use.
+ scoped_refptr<V4L2Device> device_;
+
+ // Thread to communicate with the device on.
+ base::Thread decoder_thread_;
+ scoped_refptr<base::MessageLoopProxy> decoder_thread_proxy_;
+
+ // Thread used to poll the device for events.
+ base::Thread device_poll_thread_;
+
+ // Input queue state.
+ bool input_streamon_;
+ // Number of input buffers enqueued to the device.
+ int input_buffer_queued_count_;
+ // Input buffers ready to use; LIFO since we don't care about ordering.
+ std::list<int> free_input_buffers_;
+ // Mapping of int index to an input buffer record.
+ std::vector<InputRecord> input_buffer_map_;
+
+ // Output queue state.
+ bool output_streamon_;
+ // Number of output buffers enqueued to the device.
+ int output_buffer_queued_count_;
+ // Output buffers ready to use.
+ std::list<int> free_output_buffers_;
+ // Mapping of int index to an output buffer record.
+ std::vector<OutputRecord> output_buffer_map_;
+
+ media::VideoCodecProfile video_profile_;
+ uint32_t output_format_fourcc_;
+ gfx::Size frame_buffer_size_;
+ size_t output_dpb_size_;
+
+ struct BitstreamBufferRef;
+ // Input queue of stream buffers coming from the client.
+ std::queue<linked_ptr<BitstreamBufferRef>> decoder_input_queue_;
+ // BitstreamBuffer currently being processed.
+ scoped_ptr<BitstreamBufferRef> decoder_current_bitstream_buffer_;
+
+ // Queue storing decode surfaces ready to be output as soon as they are
+ // decoded. The surfaces must be output in order they are queued.
+ std::queue<scoped_refptr<V4L2DecodeSurface>> decoder_display_queue_;
+
+ // Decoder state.
+ State state_;
+
+ // If any of these are true, we are waiting for the device to finish decoding
+ // all previously-queued frames, so we can finish the flush/reset/surface
+ // change flows. These can stack.
+ bool decoder_flushing_;
+ bool decoder_resetting_;
+ bool surface_set_change_pending_;
+
+ // Hardware accelerators.
+ // TODO(posciak): Try to have a superclass here if possible.
+ scoped_ptr<V4L2H264Accelerator> h264_accelerator_;
+ scoped_ptr<V4L2VP8Accelerator> vp8_accelerator_;
+
+ // Codec-specific software decoder in use.
+ scoped_ptr<AcceleratedVideoDecoder> decoder_;
+
+ // Surfaces queued to device to keep references to them while decoded.
+ using V4L2DecodeSurfaceByOutputId =
+ std::map<int, scoped_refptr<V4L2DecodeSurface>>;
+ V4L2DecodeSurfaceByOutputId surfaces_at_device_;
+
+ // Surfaces sent to client to keep references to them while displayed.
+ using V4L2DecodeSurfaceByPictureBufferId =
+ std::map<int32, scoped_refptr<V4L2DecodeSurface>>;
+ V4L2DecodeSurfaceByPictureBufferId surfaces_at_display_;
+
+ // Record for decoded pictures that can be sent to PictureReady.
+ struct PictureRecord;
+ // Pictures that are ready but not sent to PictureReady yet.
+ std::queue<PictureRecord> pending_picture_ready_;
+
+ // The number of pictures that are sent to PictureReady and will be cleared.
+ int picture_clearing_count_;
+
+ // Used by the decoder thread to wait for AssignPictureBuffers to arrive
+ // to avoid races with potential Reset requests.
+ base::WaitableEvent pictures_assigned_;
+
+ // Make the GL context current callback.
+ base::Callback<bool(void)> make_context_current_;
+
+ // EGL state
+ EGLDisplay egl_display_;
+ EGLContext egl_context_;
+
+ // The WeakPtrFactory for |weak_this_|.
+ base::WeakPtrFactory<V4L2SliceVideoDecodeAccelerator> weak_this_factory_;
+
+ DISALLOW_COPY_AND_ASSIGN(V4L2SliceVideoDecodeAccelerator);
+};
+
+class V4L2H264Picture;
+class V4L2VP8Picture;
+
+} // namespace content
+
+#endif // CONTENT_COMMON_GPU_MEDIA_V4L2_SLICE_VIDEO_DECODE_ACCELERATOR_H_
« no previous file with comments | « content/common/gpu/media/v4l2_image_processor.cc ('k') | content/common/gpu/media/v4l2_slice_video_decode_accelerator.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698