Chromium Code Reviews| Index: ui/surface/accelerated_surface_transformer_win.cc |
| diff --git a/ui/surface/accelerated_surface_transformer_win.cc b/ui/surface/accelerated_surface_transformer_win.cc |
| index cfc031b020cf4c693d74e0cde8d830cc6d5eab7a..fb2082c999ac6c0c2e7a31d50d4a13bb8bb07055 100644 |
| --- a/ui/surface/accelerated_surface_transformer_win.cc |
| +++ b/ui/surface/accelerated_surface_transformer_win.cc |
| @@ -9,6 +9,7 @@ |
| #include "accelerated_surface_transformer_win_hlsl_compiled.h" |
| #include "base/debug/trace_event.h" |
| #include "base/memory/ref_counted.h" |
| +#include "base/metrics/histogram.h" |
| #include "base/single_thread_task_runner.h" |
| #include "base/synchronization/lock.h" |
| #include "base/synchronization/waitable_event.h" |
| @@ -21,6 +22,16 @@ |
| using base::win::ScopedComPtr; |
| using std::vector; |
| +using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoY8UV44; |
| +using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertUV44toU2V2; |
| +using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsOneTexture; |
| +using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch2Pixels; |
| +using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch4Pixels; |
| +using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsOneTextureFlipY; |
| +using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch4PixelsScale2; |
| +using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoY; |
| +using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoU; |
| +using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoV; |
| namespace d3d_utils = ui_surface_d3d9_utils; |
| @@ -37,6 +48,23 @@ const static D3DVERTEXELEMENT9 g_vertexElements[] = { |
| D3DDECL_END() |
| }; |
| +class ScopedRenderTargetRestorer { |
| + public: |
| + ScopedRenderTargetRestorer(IDirect3DDevice9* device, |
| + int render_target_id) |
| + : device_(device), |
| + target_id_(render_target_id) { |
| + device_->GetRenderTarget(target_id_, original_render_target_.Receive()); |
| + } |
| + ~ScopedRenderTargetRestorer() { |
| + device_->SetRenderTarget(target_id_, original_render_target_); |
| + } |
| + private: |
| + ScopedComPtr<IDirect3DDevice9> device_; |
| + int target_id_; |
| + ScopedComPtr<IDirect3DSurface9> original_render_target_; |
| +}; |
| + |
| // Calculate the number necessary to transform |src_subrect| into |dst_size| |
| // by repeating downsampling of the image of |src_subrect| by a factor no more |
| // than 2. |
| @@ -69,45 +97,90 @@ gfx::Size GetHalfSizeNoLessThan(const gfx::Size& size, |
| std::max(min_size.height(), size.height() / 2)); |
| } |
| -gfx::Size GetSize(IDirect3DSurface9* surface) { |
| - D3DSURFACE_DESC surface_description; |
| - HRESULT hr = surface->GetDesc(&surface_description); |
| - if (FAILED(hr)) |
| - return gfx::Size(0, 0); |
| - return gfx::Size(surface_description.Width, surface_description.Height); |
| -} |
| - |
| } // namespace |
| - |
| -AcceleratedSurfaceTransformer::AcceleratedSurfaceTransformer() {} |
| +AcceleratedSurfaceTransformer::AcceleratedSurfaceTransformer() |
| + : device_supports_multiple_render_targets_(false) { |
| +} |
| bool AcceleratedSurfaceTransformer::Init(IDirect3DDevice9* device) { |
| - device_ = device; |
| - if (!InitShaderCombo( |
| - ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsOneTexture, |
| - ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsOneTexture, |
| - SIMPLE_TEXTURE)) { |
| + bool result = DoInit(device); // and DoInit() and DoInit well. |
| + if (!result) { |
| ReleaseAll(); |
| + } |
| + return result; |
| +} |
| + |
| +bool AcceleratedSurfaceTransformer::DoInit(IDirect3DDevice9* device) { |
| + device_ = device; |
| + |
| + { |
| + D3DCAPS9 caps; |
| + HRESULT hr = device->GetDeviceCaps(&caps); |
| + if (FAILED(hr)) |
| + return false; |
| + |
| + device_supports_multiple_render_targets_ = (caps.NumSimultaneousRTs >= 2); |
| + |
| + // Log statistics about which paths we take. |
| + UMA_HISTOGRAM_BOOLEAN("GPU.AcceleratedSurfaceTransformerCanUseMRT", |
| + device_supports_multiple_render_targets()); |
| + } |
| + |
| + if (!InitShaderCombo(ONE_TEXTURE_FLIP_Y, |
| + kVsOneTextureFlipY, |
| + kPsOneTexture)) { |
| + return false; |
| + } |
| + |
| + if (device_supports_multiple_render_targets()) { |
| + if (!InitShaderCombo(RGB_TO_YV12_FAST__PASS_1_OF_2, |
| + kVsFetch4Pixels, |
| + kPsConvertRGBtoY8UV44)) { |
| + return false; |
| + } |
| + |
| + if (!InitShaderCombo(RGB_TO_YV12_FAST__PASS_2_OF_2, |
| + kVsFetch2Pixels, |
| + kPsConvertUV44toU2V2)) { |
| + return false; |
| + } |
| + } |
| + |
|
miu
2012/12/27 21:40:17
It doesn't seem like the InitShaderCombo() calls f
ncarter (slow)
2013/01/07 22:49:10
The tests actually needed both paths to work, thou
|
| + if (!InitShaderCombo(RGB_TO_YV12_SLOW__PASS_1_OF_3, |
| + kVsFetch4Pixels, |
| + kPsConvertRGBtoY)) { |
| + return false; |
| + } |
| + |
| + if (!InitShaderCombo(RGB_TO_YV12_SLOW__PASS_2_OF_3, |
| + kVsFetch4PixelsScale2, |
| + kPsConvertRGBtoU)) { |
| + return false; |
| + } |
| + |
| + if (!InitShaderCombo(RGB_TO_YV12_SLOW__PASS_3_OF_3, |
| + kVsFetch4PixelsScale2, |
| + kPsConvertRGBtoV)) { |
| return false; |
| } |
| base::win::ScopedComPtr<IDirect3DVertexDeclaration9> vertex_declaration; |
| HRESULT hr = device_->CreateVertexDeclaration(g_vertexElements, |
| vertex_declaration.Receive()); |
| - if (!SUCCEEDED(hr)) { |
| - ReleaseAll(); |
| + if (FAILED(hr)) |
| + return false; |
| + hr = device_->SetVertexDeclaration(vertex_declaration); |
| + if (FAILED(hr)) |
| return false; |
| - } |
| - device_->SetVertexDeclaration(vertex_declaration); |
| return true; |
| } |
| bool AcceleratedSurfaceTransformer::InitShaderCombo( |
| + ShaderCombo shader_combo_name, |
| const BYTE vertex_shader_instructions[], |
| - const BYTE pixel_shader_instructions[], |
| - ShaderCombo shader_combo_name) { |
| + const BYTE pixel_shader_instructions[]) { |
| HRESULT hr = device_->CreateVertexShader( |
| reinterpret_cast<const DWORD*>(vertex_shader_instructions), |
| vertex_shaders_[shader_combo_name].Receive()); |
| @@ -143,29 +216,40 @@ bool AcceleratedSurfaceTransformer::CopyInverted( |
| IDirect3DTexture9* src_texture, |
| IDirect3DSurface9* dst_surface, |
| const gfx::Size& dst_size) { |
| - base::win::ScopedComPtr<IDirect3DSurface9> default_color_target; |
| - device()->GetRenderTarget(0, default_color_target.Receive()); |
| - if (!SetShaderCombo(SIMPLE_TEXTURE)) |
| + if (!SetShaderCombo(ONE_TEXTURE_FLIP_Y)) |
| return false; |
| + ScopedRenderTargetRestorer render_target_restorer(device(), 0); |
| device()->SetRenderTarget(0, dst_surface); |
| device()->SetTexture(0, src_texture); |
| + device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); |
|
apatrick_chromium
2013/01/07 22:56:53
These render states might affect something else us
ncarter (slow)
2013/01/07 23:24:52
AcceleratedSurface isn't doing any 3D rendering at
|
| + device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); |
| + device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP); |
| + device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP); |
| - D3DVIEWPORT9 viewport = { |
|
apatrick_chromium
2013/01/07 22:56:53
How did you get away without setting the viewport
ncarter (slow)
2013/01/07 23:24:52
http://msdn.microsoft.com/en-us/library/windows/de
|
| - 0, 0, |
| - dst_size.width(), dst_size.height(), |
| - 0, 1 |
| - }; |
| - device()->SetViewport(&viewport); |
| + DrawScreenAlignedQuad(dst_size); |
| + |
| + // Clear surface references. |
| + device()->SetTexture(0, NULL); |
| + return true; |
| +} |
| - float halfPixelX = -1.0f / dst_size.width(); |
| - float halfPixelY = 1.0f / dst_size.height(); |
| +void AcceleratedSurfaceTransformer::DrawScreenAlignedQuad( |
| + const gfx::Size& size) { |
| + const float target_size[] = { size.width(), size.height() }; |
| + |
| + // Set the uniform shader constant |kRenderTargetSize|, which is bound |
| + // to register c0. |
| + device()->SetVertexShaderConstantF(0, target_size, arraysize(target_size)); |
| + |
| + // We always send down the same vertices. The vertex program will take |
| + // care of doing resolution-dependent position adjustment. |
| Vertex vertices[] = { |
| - { halfPixelX - 1, halfPixelY + 1, 0.5f, 1, 0, 1 }, |
| - { halfPixelX + 1, halfPixelY + 1, 0.5f, 1, 1, 1 }, |
| - { halfPixelX + 1, halfPixelY - 1, 0.5f, 1, 1, 0 }, |
| - { halfPixelX - 1, halfPixelY - 1, 0.5f, 1, 0, 0 } |
| + { -1, +1, 0.5f, 1, 0, 0 }, |
| + { +1, +1, 0.5f, 1, 1, 0 }, |
| + { +1, -1, 0.5f, 1, 1, 1 }, |
| + { -1, -1, 0.5f, 1, 0, 1 } |
| }; |
| device()->BeginScene(); |
| @@ -175,10 +259,6 @@ bool AcceleratedSurfaceTransformer::CopyInverted( |
| sizeof(vertices[0])); |
| device()->EndScene(); |
| - // Clear surface references. |
| - device()->SetRenderTarget(0, default_color_target); |
| - device()->SetTexture(0, NULL); |
| - return true; |
| } |
| // Resize an RGB surface using repeated linear interpolation. |
| @@ -186,8 +266,8 @@ bool AcceleratedSurfaceTransformer::ResizeBilinear( |
| IDirect3DSurface9* src_surface, |
| const gfx::Rect& src_subrect, |
| IDirect3DSurface9* dst_surface) { |
| - gfx::Size src_size = GetSize(src_surface); |
| - gfx::Size dst_size = GetSize(dst_surface); |
| + gfx::Size src_size = d3d_utils::GetSize(src_surface); |
| + gfx::Size dst_size = d3d_utils::GetSize(dst_surface); |
| if (src_size.IsEmpty() || dst_size.IsEmpty()) |
| return false; |
| @@ -246,10 +326,212 @@ bool AcceleratedSurfaceTransformer::ResizeBilinear( |
| return true; |
| } |
| +bool AcceleratedSurfaceTransformer::TransformRGBToYV12( |
| + IDirect3DTexture9* src_surface, |
| + const gfx::Size& dst_size, |
| + IDirect3DSurface9** dst_y, |
| + IDirect3DSurface9** dst_u, |
| + IDirect3DSurface9** dst_v) { |
| + gfx::Size packed_y_size; |
| + gfx::Size packed_uv_size; |
| + if (!AllocYUVBuffers(dst_size, &packed_y_size, &packed_uv_size, |
| + dst_y, dst_u, dst_v)) { |
| + return false; |
| + } |
| + |
| + if (device_supports_multiple_render_targets()) { |
| + return TransformRGBToYV12_MRT(src_surface, |
| + dst_size, |
| + packed_y_size, |
| + packed_uv_size, |
| + *dst_y, |
| + *dst_u, |
| + *dst_v); |
| + } else { |
| + return TransformRGBToYV12_WithoutMRT(src_surface, |
| + dst_size, |
| + packed_y_size, |
| + packed_uv_size, |
| + *dst_y, |
| + *dst_u, |
| + *dst_v); |
| + } |
| +} |
| + |
| +bool AcceleratedSurfaceTransformer::AllocYUVBuffers( |
| + const gfx::Size& dst_size, |
| + gfx::Size* y_size, |
| + gfx::Size* uv_size, |
| + IDirect3DSurface9** dst_y, |
| + IDirect3DSurface9** dst_u, |
| + IDirect3DSurface9** dst_v) { |
| + |
| + // Y is full height, packed into 4 components. |
| + *y_size = gfx::Size((dst_size.width() + 3) / 4, dst_size.height()); |
| + |
| + // U and V are half the size (rounded up) of Y. |
| + *uv_size = gfx::Size((y_size->width() + 1) / 2, (y_size->height() + 1) / 2); |
| + |
| + if (!d3d_utils::CreateTemporaryLockableSurface(device(), *y_size, dst_y)) |
| + return false; |
| + if (!d3d_utils::CreateTemporaryLockableSurface(device(), *uv_size, dst_u)) |
| + return false; |
| + if (!d3d_utils::CreateTemporaryLockableSurface(device(), *uv_size, dst_v)) |
| + return false; |
| + return true; |
| +} |
| + |
| +bool AcceleratedSurfaceTransformer::TransformRGBToYV12_MRT( |
| + IDirect3DTexture9* src_surface, |
| + const gfx::Size& dst_size, |
| + const gfx::Size& packed_y_size, |
| + const gfx::Size& packed_uv_size, |
| + IDirect3DSurface9* dst_y, |
| + IDirect3DSurface9* dst_u, |
| + IDirect3DSurface9* dst_v) { |
| + TRACE_EVENT0("gpu", "RGBToYV12_MRT"); |
| + |
| + ScopedRenderTargetRestorer color0_restorer(device(), 0); |
| + ScopedRenderTargetRestorer color1_restorer(device(), 1); |
| + |
| + // Create an intermediate surface to hold the UUVV values. This is color |
| + // target 1 for the first pass, and texture 0 for the second pass. Its |
| + // values are not read afterwards. |
| + base::win::ScopedComPtr<IDirect3DTexture9> uv_as_texture; |
| + base::win::ScopedComPtr<IDirect3DSurface9> uv_as_surface; |
| + if (!d3d_utils::CreateTemporaryRenderTargetTexture(device(), |
| + packed_y_size, |
| + uv_as_texture.Receive(), |
| + uv_as_surface.Receive())) { |
| + return false; |
| + } |
| + |
| + // Clamping is required if (dst_size.width() % 8 != 0) or if |
| + // (dst_size.height != 0), so we set it always. Both passes rely on this. |
| + device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP); |
| + device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP); |
| + |
| + ///////////////////////////////////////// |
| + // Pass 1: RGB --(scaled)--> YYYY + UUVV |
| + SetShaderCombo(RGB_TO_YV12_FAST__PASS_1_OF_2); |
| + |
| + // Enable bilinear filtering if scaling is required. The filtering will take |
| + // place entirely in the first pass. |
| + if (d3d_utils::GetSize(src_surface) != dst_size) { |
| + device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); |
| + device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); |
| + } else { |
| + device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); |
| + device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); |
| + } |
| + |
| + device()->SetTexture(0, src_surface); |
| + device()->SetRenderTarget(0, dst_y); |
| + device()->SetRenderTarget(1, uv_as_surface); |
| + DrawScreenAlignedQuad(dst_size); |
| + |
| + ///////////////////////////////////////// |
| + // Pass 2: UUVV -> UUUU + VVVV |
| + SetShaderCombo(RGB_TO_YV12_FAST__PASS_2_OF_2); |
| + |
| + // The second pass uses bilinear minification to achieve vertical scaling, |
| + // so enable it always. |
| + device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); |
| + device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); |
| + |
| + device()->SetTexture(0, uv_as_texture); |
| + device()->SetRenderTarget(0, dst_u); |
| + device()->SetRenderTarget(1, dst_v); |
| + DrawScreenAlignedQuad(packed_y_size); |
| + |
| + // Clear surface references. |
| + device()->SetTexture(0, NULL); |
| + return true; |
| +} |
| + |
| +bool AcceleratedSurfaceTransformer::TransformRGBToYV12_WithoutMRT( |
| + IDirect3DTexture9* src_surface, |
| + const gfx::Size& dst_size, |
| + const gfx::Size& packed_y_size, |
| + const gfx::Size& packed_uv_size, |
| + IDirect3DSurface9* dst_y, |
| + IDirect3DSurface9* dst_u, |
| + IDirect3DSurface9* dst_v) { |
| + TRACE_EVENT0("gpu", "RGBToYV12_WithoutMRT"); |
| + |
| + ScopedRenderTargetRestorer color0_restorer(device(), 0); |
| + |
| + base::win::ScopedComPtr<IDirect3DTexture9> scaled_src_surface; |
| + |
| + // If scaling is requested, do it to a temporary texture. The MRT path |
| + // gets a scale for free, so we need to support it here too (even though |
| + // it's an extra operation). |
| + if (d3d_utils::GetSize(src_surface) == dst_size) { |
| + scaled_src_surface = src_surface; |
| + } else { |
| + base::win::ScopedComPtr<IDirect3DSurface9> src_level0; |
| + HRESULT hr = src_surface->GetSurfaceLevel(0, src_level0.Receive()); |
| + if (FAILED(hr)) |
| + return false; |
| + |
| + base::win::ScopedComPtr<IDirect3DSurface9> dst_level0; |
| + if (!d3d_utils::CreateTemporaryRenderTargetTexture( |
| + device(), dst_size, |
| + scaled_src_surface.Receive(), dst_level0.Receive())) { |
| + return false; |
| + } |
| + |
| + device()->StretchRect(src_level0, NULL, dst_level0, NULL, D3DTEXF_LINEAR); |
|
apatrick_chromium
2013/01/07 22:56:53
Maybe this isn't an issue for your purposes but if
ncarter (slow)
2013/01/07 23:24:52
Yes, definitely; for the first phase I'm planning
|
| + } |
| + |
| + // Input texture is the same for all three passes. |
| + device()->SetTexture(0, scaled_src_surface); |
| + |
| + // Clamping is required if (dst_size.width() % 8 != 0) or if |
| + // (dst_size.height != 0), so we set it always. All passes rely on this. |
| + device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP); |
| + device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP); |
| + |
| + ///////////////////// |
| + // Pass 1: RGB -> Y. |
| + SetShaderCombo(RGB_TO_YV12_SLOW__PASS_1_OF_3); |
| + |
| + // Pass 1 just needs point sampling. |
| + device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); |
| + device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); |
| + |
| + device()->SetRenderTarget(0, dst_y); |
| + DrawScreenAlignedQuad(dst_size); |
| + |
| + // Passes 2 and 3 rely on bilinear minification to downsample U and V. |
| + device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); |
| + device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); |
| + |
| + ///////////////////// |
| + // Pass 2: RGB -> U. |
| + SetShaderCombo(RGB_TO_YV12_SLOW__PASS_2_OF_3); |
| + device()->SetRenderTarget(0, dst_u); |
| + DrawScreenAlignedQuad(dst_size); |
| + |
| + ///////////////////// |
| + // Pass 3: RGB -> V. |
| + SetShaderCombo(RGB_TO_YV12_SLOW__PASS_3_OF_3); |
| + device()->SetRenderTarget(0, dst_v); |
| + DrawScreenAlignedQuad(dst_size); |
| + |
| + // Clear surface references. |
| + device()->SetTexture(0, NULL); |
| + return true; |
| +} |
| + |
| IDirect3DDevice9* AcceleratedSurfaceTransformer::device() { |
| return device_; |
| } |
| +bool AcceleratedSurfaceTransformer::device_supports_multiple_render_targets() { |
|
miu
2012/12/27 21:40:17
nit: Consider inlining (allowed for simple accesso
ncarter (slow)
2013/01/07 22:49:10
Done.
|
| + return device_supports_multiple_render_targets_; |
| +} |
| + |
| bool AcceleratedSurfaceTransformer::SetShaderCombo(ShaderCombo combo) { |
| HRESULT hr = device()->SetVertexShader(vertex_shaders_[combo]); |
| if (!SUCCEEDED(hr)) |
| @@ -258,4 +540,4 @@ bool AcceleratedSurfaceTransformer::SetShaderCombo(ShaderCombo combo) { |
| if (!SUCCEEDED(hr)) |
| return false; |
| return true; |
| -} |
| +} |