Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(527)

Unified Diff: ui/surface/accelerated_surface_transformer_win.cc

Issue 11280318: YUV conversion on the GPU. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Test improvements. Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: ui/surface/accelerated_surface_transformer_win.cc
diff --git a/ui/surface/accelerated_surface_transformer_win.cc b/ui/surface/accelerated_surface_transformer_win.cc
index cfc031b020cf4c693d74e0cde8d830cc6d5eab7a..fb2082c999ac6c0c2e7a31d50d4a13bb8bb07055 100644
--- a/ui/surface/accelerated_surface_transformer_win.cc
+++ b/ui/surface/accelerated_surface_transformer_win.cc
@@ -9,6 +9,7 @@
#include "accelerated_surface_transformer_win_hlsl_compiled.h"
#include "base/debug/trace_event.h"
#include "base/memory/ref_counted.h"
+#include "base/metrics/histogram.h"
#include "base/single_thread_task_runner.h"
#include "base/synchronization/lock.h"
#include "base/synchronization/waitable_event.h"
@@ -21,6 +22,16 @@
using base::win::ScopedComPtr;
using std::vector;
+using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoY8UV44;
+using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertUV44toU2V2;
+using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsOneTexture;
+using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch2Pixels;
+using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch4Pixels;
+using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsOneTextureFlipY;
+using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch4PixelsScale2;
+using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoY;
+using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoU;
+using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoV;
namespace d3d_utils = ui_surface_d3d9_utils;
@@ -37,6 +48,23 @@ const static D3DVERTEXELEMENT9 g_vertexElements[] = {
D3DDECL_END()
};
+class ScopedRenderTargetRestorer {
+ public:
+ ScopedRenderTargetRestorer(IDirect3DDevice9* device,
+ int render_target_id)
+ : device_(device),
+ target_id_(render_target_id) {
+ device_->GetRenderTarget(target_id_, original_render_target_.Receive());
+ }
+ ~ScopedRenderTargetRestorer() {
+ device_->SetRenderTarget(target_id_, original_render_target_);
+ }
+ private:
+ ScopedComPtr<IDirect3DDevice9> device_;
+ int target_id_;
+ ScopedComPtr<IDirect3DSurface9> original_render_target_;
+};
+
// Calculate the number necessary to transform |src_subrect| into |dst_size|
// by repeating downsampling of the image of |src_subrect| by a factor no more
// than 2.
@@ -69,45 +97,90 @@ gfx::Size GetHalfSizeNoLessThan(const gfx::Size& size,
std::max(min_size.height(), size.height() / 2));
}
-gfx::Size GetSize(IDirect3DSurface9* surface) {
- D3DSURFACE_DESC surface_description;
- HRESULT hr = surface->GetDesc(&surface_description);
- if (FAILED(hr))
- return gfx::Size(0, 0);
- return gfx::Size(surface_description.Width, surface_description.Height);
-}
-
} // namespace
-
-AcceleratedSurfaceTransformer::AcceleratedSurfaceTransformer() {}
+AcceleratedSurfaceTransformer::AcceleratedSurfaceTransformer()
+ : device_supports_multiple_render_targets_(false) {
+}
bool AcceleratedSurfaceTransformer::Init(IDirect3DDevice9* device) {
- device_ = device;
- if (!InitShaderCombo(
- ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsOneTexture,
- ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsOneTexture,
- SIMPLE_TEXTURE)) {
+ bool result = DoInit(device); // and DoInit() and DoInit well.
+ if (!result) {
ReleaseAll();
+ }
+ return result;
+}
+
+bool AcceleratedSurfaceTransformer::DoInit(IDirect3DDevice9* device) {
+ device_ = device;
+
+ {
+ D3DCAPS9 caps;
+ HRESULT hr = device->GetDeviceCaps(&caps);
+ if (FAILED(hr))
+ return false;
+
+ device_supports_multiple_render_targets_ = (caps.NumSimultaneousRTs >= 2);
+
+ // Log statistics about which paths we take.
+ UMA_HISTOGRAM_BOOLEAN("GPU.AcceleratedSurfaceTransformerCanUseMRT",
+ device_supports_multiple_render_targets());
+ }
+
+ if (!InitShaderCombo(ONE_TEXTURE_FLIP_Y,
+ kVsOneTextureFlipY,
+ kPsOneTexture)) {
+ return false;
+ }
+
+ if (device_supports_multiple_render_targets()) {
+ if (!InitShaderCombo(RGB_TO_YV12_FAST__PASS_1_OF_2,
+ kVsFetch4Pixels,
+ kPsConvertRGBtoY8UV44)) {
+ return false;
+ }
+
+ if (!InitShaderCombo(RGB_TO_YV12_FAST__PASS_2_OF_2,
+ kVsFetch2Pixels,
+ kPsConvertUV44toU2V2)) {
+ return false;
+ }
+ }
+
miu 2012/12/27 21:40:17 It doesn't seem like the InitShaderCombo() calls f
ncarter (slow) 2013/01/07 22:49:10 The tests actually needed both paths to work, thou
+ if (!InitShaderCombo(RGB_TO_YV12_SLOW__PASS_1_OF_3,
+ kVsFetch4Pixels,
+ kPsConvertRGBtoY)) {
+ return false;
+ }
+
+ if (!InitShaderCombo(RGB_TO_YV12_SLOW__PASS_2_OF_3,
+ kVsFetch4PixelsScale2,
+ kPsConvertRGBtoU)) {
+ return false;
+ }
+
+ if (!InitShaderCombo(RGB_TO_YV12_SLOW__PASS_3_OF_3,
+ kVsFetch4PixelsScale2,
+ kPsConvertRGBtoV)) {
return false;
}
base::win::ScopedComPtr<IDirect3DVertexDeclaration9> vertex_declaration;
HRESULT hr = device_->CreateVertexDeclaration(g_vertexElements,
vertex_declaration.Receive());
- if (!SUCCEEDED(hr)) {
- ReleaseAll();
+ if (FAILED(hr))
+ return false;
+ hr = device_->SetVertexDeclaration(vertex_declaration);
+ if (FAILED(hr))
return false;
- }
- device_->SetVertexDeclaration(vertex_declaration);
return true;
}
bool AcceleratedSurfaceTransformer::InitShaderCombo(
+ ShaderCombo shader_combo_name,
const BYTE vertex_shader_instructions[],
- const BYTE pixel_shader_instructions[],
- ShaderCombo shader_combo_name) {
+ const BYTE pixel_shader_instructions[]) {
HRESULT hr = device_->CreateVertexShader(
reinterpret_cast<const DWORD*>(vertex_shader_instructions),
vertex_shaders_[shader_combo_name].Receive());
@@ -143,29 +216,40 @@ bool AcceleratedSurfaceTransformer::CopyInverted(
IDirect3DTexture9* src_texture,
IDirect3DSurface9* dst_surface,
const gfx::Size& dst_size) {
- base::win::ScopedComPtr<IDirect3DSurface9> default_color_target;
- device()->GetRenderTarget(0, default_color_target.Receive());
- if (!SetShaderCombo(SIMPLE_TEXTURE))
+ if (!SetShaderCombo(ONE_TEXTURE_FLIP_Y))
return false;
+ ScopedRenderTargetRestorer render_target_restorer(device(), 0);
device()->SetRenderTarget(0, dst_surface);
device()->SetTexture(0, src_texture);
+ device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
apatrick_chromium 2013/01/07 22:56:53 These render states might affect something else us
ncarter (slow) 2013/01/07 23:24:52 AcceleratedSurface isn't doing any 3D rendering at
+ device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
+ device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
+ device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
- D3DVIEWPORT9 viewport = {
apatrick_chromium 2013/01/07 22:56:53 How did you get away without setting the viewport
ncarter (slow) 2013/01/07 23:24:52 http://msdn.microsoft.com/en-us/library/windows/de
- 0, 0,
- dst_size.width(), dst_size.height(),
- 0, 1
- };
- device()->SetViewport(&viewport);
+ DrawScreenAlignedQuad(dst_size);
+
+ // Clear surface references.
+ device()->SetTexture(0, NULL);
+ return true;
+}
- float halfPixelX = -1.0f / dst_size.width();
- float halfPixelY = 1.0f / dst_size.height();
+void AcceleratedSurfaceTransformer::DrawScreenAlignedQuad(
+ const gfx::Size& size) {
+ const float target_size[] = { size.width(), size.height() };
+
+ // Set the uniform shader constant |kRenderTargetSize|, which is bound
+ // to register c0.
+ device()->SetVertexShaderConstantF(0, target_size, arraysize(target_size));
+
+ // We always send down the same vertices. The vertex program will take
+ // care of doing resolution-dependent position adjustment.
Vertex vertices[] = {
- { halfPixelX - 1, halfPixelY + 1, 0.5f, 1, 0, 1 },
- { halfPixelX + 1, halfPixelY + 1, 0.5f, 1, 1, 1 },
- { halfPixelX + 1, halfPixelY - 1, 0.5f, 1, 1, 0 },
- { halfPixelX - 1, halfPixelY - 1, 0.5f, 1, 0, 0 }
+ { -1, +1, 0.5f, 1, 0, 0 },
+ { +1, +1, 0.5f, 1, 1, 0 },
+ { +1, -1, 0.5f, 1, 1, 1 },
+ { -1, -1, 0.5f, 1, 0, 1 }
};
device()->BeginScene();
@@ -175,10 +259,6 @@ bool AcceleratedSurfaceTransformer::CopyInverted(
sizeof(vertices[0]));
device()->EndScene();
- // Clear surface references.
- device()->SetRenderTarget(0, default_color_target);
- device()->SetTexture(0, NULL);
- return true;
}
// Resize an RGB surface using repeated linear interpolation.
@@ -186,8 +266,8 @@ bool AcceleratedSurfaceTransformer::ResizeBilinear(
IDirect3DSurface9* src_surface,
const gfx::Rect& src_subrect,
IDirect3DSurface9* dst_surface) {
- gfx::Size src_size = GetSize(src_surface);
- gfx::Size dst_size = GetSize(dst_surface);
+ gfx::Size src_size = d3d_utils::GetSize(src_surface);
+ gfx::Size dst_size = d3d_utils::GetSize(dst_surface);
if (src_size.IsEmpty() || dst_size.IsEmpty())
return false;
@@ -246,10 +326,212 @@ bool AcceleratedSurfaceTransformer::ResizeBilinear(
return true;
}
+bool AcceleratedSurfaceTransformer::TransformRGBToYV12(
+ IDirect3DTexture9* src_surface,
+ const gfx::Size& dst_size,
+ IDirect3DSurface9** dst_y,
+ IDirect3DSurface9** dst_u,
+ IDirect3DSurface9** dst_v) {
+ gfx::Size packed_y_size;
+ gfx::Size packed_uv_size;
+ if (!AllocYUVBuffers(dst_size, &packed_y_size, &packed_uv_size,
+ dst_y, dst_u, dst_v)) {
+ return false;
+ }
+
+ if (device_supports_multiple_render_targets()) {
+ return TransformRGBToYV12_MRT(src_surface,
+ dst_size,
+ packed_y_size,
+ packed_uv_size,
+ *dst_y,
+ *dst_u,
+ *dst_v);
+ } else {
+ return TransformRGBToYV12_WithoutMRT(src_surface,
+ dst_size,
+ packed_y_size,
+ packed_uv_size,
+ *dst_y,
+ *dst_u,
+ *dst_v);
+ }
+}
+
+bool AcceleratedSurfaceTransformer::AllocYUVBuffers(
+ const gfx::Size& dst_size,
+ gfx::Size* y_size,
+ gfx::Size* uv_size,
+ IDirect3DSurface9** dst_y,
+ IDirect3DSurface9** dst_u,
+ IDirect3DSurface9** dst_v) {
+
+ // Y is full height, packed into 4 components.
+ *y_size = gfx::Size((dst_size.width() + 3) / 4, dst_size.height());
+
+ // U and V are half the size (rounded up) of Y.
+ *uv_size = gfx::Size((y_size->width() + 1) / 2, (y_size->height() + 1) / 2);
+
+ if (!d3d_utils::CreateTemporaryLockableSurface(device(), *y_size, dst_y))
+ return false;
+ if (!d3d_utils::CreateTemporaryLockableSurface(device(), *uv_size, dst_u))
+ return false;
+ if (!d3d_utils::CreateTemporaryLockableSurface(device(), *uv_size, dst_v))
+ return false;
+ return true;
+}
+
+bool AcceleratedSurfaceTransformer::TransformRGBToYV12_MRT(
+ IDirect3DTexture9* src_surface,
+ const gfx::Size& dst_size,
+ const gfx::Size& packed_y_size,
+ const gfx::Size& packed_uv_size,
+ IDirect3DSurface9* dst_y,
+ IDirect3DSurface9* dst_u,
+ IDirect3DSurface9* dst_v) {
+ TRACE_EVENT0("gpu", "RGBToYV12_MRT");
+
+ ScopedRenderTargetRestorer color0_restorer(device(), 0);
+ ScopedRenderTargetRestorer color1_restorer(device(), 1);
+
+ // Create an intermediate surface to hold the UUVV values. This is color
+ // target 1 for the first pass, and texture 0 for the second pass. Its
+ // values are not read afterwards.
+ base::win::ScopedComPtr<IDirect3DTexture9> uv_as_texture;
+ base::win::ScopedComPtr<IDirect3DSurface9> uv_as_surface;
+ if (!d3d_utils::CreateTemporaryRenderTargetTexture(device(),
+ packed_y_size,
+ uv_as_texture.Receive(),
+ uv_as_surface.Receive())) {
+ return false;
+ }
+
+ // Clamping is required if (dst_size.width() % 8 != 0) or if
+ // (dst_size.height != 0), so we set it always. Both passes rely on this.
+ device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
+ device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
+
+ /////////////////////////////////////////
+ // Pass 1: RGB --(scaled)--> YYYY + UUVV
+ SetShaderCombo(RGB_TO_YV12_FAST__PASS_1_OF_2);
+
+ // Enable bilinear filtering if scaling is required. The filtering will take
+ // place entirely in the first pass.
+ if (d3d_utils::GetSize(src_surface) != dst_size) {
+ device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR);
+ device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
+ } else {
+ device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
+ device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
+ }
+
+ device()->SetTexture(0, src_surface);
+ device()->SetRenderTarget(0, dst_y);
+ device()->SetRenderTarget(1, uv_as_surface);
+ DrawScreenAlignedQuad(dst_size);
+
+ /////////////////////////////////////////
+ // Pass 2: UUVV -> UUUU + VVVV
+ SetShaderCombo(RGB_TO_YV12_FAST__PASS_2_OF_2);
+
+ // The second pass uses bilinear minification to achieve vertical scaling,
+ // so enable it always.
+ device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
+ device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
+
+ device()->SetTexture(0, uv_as_texture);
+ device()->SetRenderTarget(0, dst_u);
+ device()->SetRenderTarget(1, dst_v);
+ DrawScreenAlignedQuad(packed_y_size);
+
+ // Clear surface references.
+ device()->SetTexture(0, NULL);
+ return true;
+}
+
+bool AcceleratedSurfaceTransformer::TransformRGBToYV12_WithoutMRT(
+ IDirect3DTexture9* src_surface,
+ const gfx::Size& dst_size,
+ const gfx::Size& packed_y_size,
+ const gfx::Size& packed_uv_size,
+ IDirect3DSurface9* dst_y,
+ IDirect3DSurface9* dst_u,
+ IDirect3DSurface9* dst_v) {
+ TRACE_EVENT0("gpu", "RGBToYV12_WithoutMRT");
+
+ ScopedRenderTargetRestorer color0_restorer(device(), 0);
+
+ base::win::ScopedComPtr<IDirect3DTexture9> scaled_src_surface;
+
+ // If scaling is requested, do it to a temporary texture. The MRT path
+ // gets a scale for free, so we need to support it here too (even though
+ // it's an extra operation).
+ if (d3d_utils::GetSize(src_surface) == dst_size) {
+ scaled_src_surface = src_surface;
+ } else {
+ base::win::ScopedComPtr<IDirect3DSurface9> src_level0;
+ HRESULT hr = src_surface->GetSurfaceLevel(0, src_level0.Receive());
+ if (FAILED(hr))
+ return false;
+
+ base::win::ScopedComPtr<IDirect3DSurface9> dst_level0;
+ if (!d3d_utils::CreateTemporaryRenderTargetTexture(
+ device(), dst_size,
+ scaled_src_surface.Receive(), dst_level0.Receive())) {
+ return false;
+ }
+
+ device()->StretchRect(src_level0, NULL, dst_level0, NULL, D3DTEXF_LINEAR);
apatrick_chromium 2013/01/07 22:56:53 Maybe this isn't an issue for your purposes but if
ncarter (slow) 2013/01/07 23:24:52 Yes, definitely; for the first phase I'm planning
+ }
+
+ // Input texture is the same for all three passes.
+ device()->SetTexture(0, scaled_src_surface);
+
+ // Clamping is required if (dst_size.width() % 8 != 0) or if
+ // (dst_size.height != 0), so we set it always. All passes rely on this.
+ device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
+ device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
+
+ /////////////////////
+ // Pass 1: RGB -> Y.
+ SetShaderCombo(RGB_TO_YV12_SLOW__PASS_1_OF_3);
+
+ // Pass 1 just needs point sampling.
+ device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
+ device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
+
+ device()->SetRenderTarget(0, dst_y);
+ DrawScreenAlignedQuad(dst_size);
+
+ // Passes 2 and 3 rely on bilinear minification to downsample U and V.
+ device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
+ device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR);
+
+ /////////////////////
+ // Pass 2: RGB -> U.
+ SetShaderCombo(RGB_TO_YV12_SLOW__PASS_2_OF_3);
+ device()->SetRenderTarget(0, dst_u);
+ DrawScreenAlignedQuad(dst_size);
+
+ /////////////////////
+ // Pass 3: RGB -> V.
+ SetShaderCombo(RGB_TO_YV12_SLOW__PASS_3_OF_3);
+ device()->SetRenderTarget(0, dst_v);
+ DrawScreenAlignedQuad(dst_size);
+
+ // Clear surface references.
+ device()->SetTexture(0, NULL);
+ return true;
+}
+
IDirect3DDevice9* AcceleratedSurfaceTransformer::device() {
return device_;
}
+bool AcceleratedSurfaceTransformer::device_supports_multiple_render_targets() {
miu 2012/12/27 21:40:17 nit: Consider inlining (allowed for simple accesso
ncarter (slow) 2013/01/07 22:49:10 Done.
+ return device_supports_multiple_render_targets_;
+}
+
bool AcceleratedSurfaceTransformer::SetShaderCombo(ShaderCombo combo) {
HRESULT hr = device()->SetVertexShader(vertex_shaders_[combo]);
if (!SUCCEEDED(hr))
@@ -258,4 +540,4 @@ bool AcceleratedSurfaceTransformer::SetShaderCombo(ShaderCombo combo) {
if (!SUCCEEDED(hr))
return false;
return true;
-}
+}

Powered by Google App Engine
This is Rietveld 408576698