Index: ui/surface/accelerated_surface_transformer_win.cc |
diff --git a/ui/surface/accelerated_surface_transformer_win.cc b/ui/surface/accelerated_surface_transformer_win.cc |
index cfc031b020cf4c693d74e0cde8d830cc6d5eab7a..fb2082c999ac6c0c2e7a31d50d4a13bb8bb07055 100644 |
--- a/ui/surface/accelerated_surface_transformer_win.cc |
+++ b/ui/surface/accelerated_surface_transformer_win.cc |
@@ -9,6 +9,7 @@ |
#include "accelerated_surface_transformer_win_hlsl_compiled.h" |
#include "base/debug/trace_event.h" |
#include "base/memory/ref_counted.h" |
+#include "base/metrics/histogram.h" |
#include "base/single_thread_task_runner.h" |
#include "base/synchronization/lock.h" |
#include "base/synchronization/waitable_event.h" |
@@ -21,6 +22,16 @@ |
using base::win::ScopedComPtr; |
using std::vector; |
+using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoY8UV44; |
+using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertUV44toU2V2; |
+using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsOneTexture; |
+using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch2Pixels; |
+using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch4Pixels; |
+using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsOneTextureFlipY; |
+using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsFetch4PixelsScale2; |
+using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoY; |
+using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoU; |
+using ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsConvertRGBtoV; |
namespace d3d_utils = ui_surface_d3d9_utils; |
@@ -37,6 +48,23 @@ const static D3DVERTEXELEMENT9 g_vertexElements[] = { |
D3DDECL_END() |
}; |
+class ScopedRenderTargetRestorer { |
+ public: |
+ ScopedRenderTargetRestorer(IDirect3DDevice9* device, |
+ int render_target_id) |
+ : device_(device), |
+ target_id_(render_target_id) { |
+ device_->GetRenderTarget(target_id_, original_render_target_.Receive()); |
+ } |
+ ~ScopedRenderTargetRestorer() { |
+ device_->SetRenderTarget(target_id_, original_render_target_); |
+ } |
+ private: |
+ ScopedComPtr<IDirect3DDevice9> device_; |
+ int target_id_; |
+ ScopedComPtr<IDirect3DSurface9> original_render_target_; |
+}; |
+ |
// Calculate the number necessary to transform |src_subrect| into |dst_size| |
// by repeating downsampling of the image of |src_subrect| by a factor no more |
// than 2. |
@@ -69,45 +97,90 @@ gfx::Size GetHalfSizeNoLessThan(const gfx::Size& size, |
std::max(min_size.height(), size.height() / 2)); |
} |
-gfx::Size GetSize(IDirect3DSurface9* surface) { |
- D3DSURFACE_DESC surface_description; |
- HRESULT hr = surface->GetDesc(&surface_description); |
- if (FAILED(hr)) |
- return gfx::Size(0, 0); |
- return gfx::Size(surface_description.Width, surface_description.Height); |
-} |
- |
} // namespace |
- |
-AcceleratedSurfaceTransformer::AcceleratedSurfaceTransformer() {} |
+AcceleratedSurfaceTransformer::AcceleratedSurfaceTransformer() |
+ : device_supports_multiple_render_targets_(false) { |
+} |
bool AcceleratedSurfaceTransformer::Init(IDirect3DDevice9* device) { |
- device_ = device; |
- if (!InitShaderCombo( |
- ui_surface::AcceleratedSurfaceTransformerWinHLSL::kVsOneTexture, |
- ui_surface::AcceleratedSurfaceTransformerWinHLSL::kPsOneTexture, |
- SIMPLE_TEXTURE)) { |
+ bool result = DoInit(device); // and DoInit() and DoInit well. |
+ if (!result) { |
ReleaseAll(); |
+ } |
+ return result; |
+} |
+ |
+bool AcceleratedSurfaceTransformer::DoInit(IDirect3DDevice9* device) { |
+ device_ = device; |
+ |
+ { |
+ D3DCAPS9 caps; |
+ HRESULT hr = device->GetDeviceCaps(&caps); |
+ if (FAILED(hr)) |
+ return false; |
+ |
+ device_supports_multiple_render_targets_ = (caps.NumSimultaneousRTs >= 2); |
+ |
+ // Log statistics about which paths we take. |
+ UMA_HISTOGRAM_BOOLEAN("GPU.AcceleratedSurfaceTransformerCanUseMRT", |
+ device_supports_multiple_render_targets()); |
+ } |
+ |
+ if (!InitShaderCombo(ONE_TEXTURE_FLIP_Y, |
+ kVsOneTextureFlipY, |
+ kPsOneTexture)) { |
+ return false; |
+ } |
+ |
+ if (device_supports_multiple_render_targets()) { |
+ if (!InitShaderCombo(RGB_TO_YV12_FAST__PASS_1_OF_2, |
+ kVsFetch4Pixels, |
+ kPsConvertRGBtoY8UV44)) { |
+ return false; |
+ } |
+ |
+ if (!InitShaderCombo(RGB_TO_YV12_FAST__PASS_2_OF_2, |
+ kVsFetch2Pixels, |
+ kPsConvertUV44toU2V2)) { |
+ return false; |
+ } |
+ } |
+ |
miu
2012/12/27 21:40:17
It doesn't seem like the InitShaderCombo() calls f
ncarter (slow)
2013/01/07 22:49:10
The tests actually needed both paths to work, thou
|
+ if (!InitShaderCombo(RGB_TO_YV12_SLOW__PASS_1_OF_3, |
+ kVsFetch4Pixels, |
+ kPsConvertRGBtoY)) { |
+ return false; |
+ } |
+ |
+ if (!InitShaderCombo(RGB_TO_YV12_SLOW__PASS_2_OF_3, |
+ kVsFetch4PixelsScale2, |
+ kPsConvertRGBtoU)) { |
+ return false; |
+ } |
+ |
+ if (!InitShaderCombo(RGB_TO_YV12_SLOW__PASS_3_OF_3, |
+ kVsFetch4PixelsScale2, |
+ kPsConvertRGBtoV)) { |
return false; |
} |
base::win::ScopedComPtr<IDirect3DVertexDeclaration9> vertex_declaration; |
HRESULT hr = device_->CreateVertexDeclaration(g_vertexElements, |
vertex_declaration.Receive()); |
- if (!SUCCEEDED(hr)) { |
- ReleaseAll(); |
+ if (FAILED(hr)) |
+ return false; |
+ hr = device_->SetVertexDeclaration(vertex_declaration); |
+ if (FAILED(hr)) |
return false; |
- } |
- device_->SetVertexDeclaration(vertex_declaration); |
return true; |
} |
bool AcceleratedSurfaceTransformer::InitShaderCombo( |
+ ShaderCombo shader_combo_name, |
const BYTE vertex_shader_instructions[], |
- const BYTE pixel_shader_instructions[], |
- ShaderCombo shader_combo_name) { |
+ const BYTE pixel_shader_instructions[]) { |
HRESULT hr = device_->CreateVertexShader( |
reinterpret_cast<const DWORD*>(vertex_shader_instructions), |
vertex_shaders_[shader_combo_name].Receive()); |
@@ -143,29 +216,40 @@ bool AcceleratedSurfaceTransformer::CopyInverted( |
IDirect3DTexture9* src_texture, |
IDirect3DSurface9* dst_surface, |
const gfx::Size& dst_size) { |
- base::win::ScopedComPtr<IDirect3DSurface9> default_color_target; |
- device()->GetRenderTarget(0, default_color_target.Receive()); |
- if (!SetShaderCombo(SIMPLE_TEXTURE)) |
+ if (!SetShaderCombo(ONE_TEXTURE_FLIP_Y)) |
return false; |
+ ScopedRenderTargetRestorer render_target_restorer(device(), 0); |
device()->SetRenderTarget(0, dst_surface); |
device()->SetTexture(0, src_texture); |
+ device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); |
apatrick_chromium
2013/01/07 22:56:53
These render states might affect something else us
ncarter (slow)
2013/01/07 23:24:52
AcceleratedSurface isn't doing any 3D rendering at
|
+ device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); |
+ device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP); |
+ device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP); |
- D3DVIEWPORT9 viewport = { |
apatrick_chromium
2013/01/07 22:56:53
How did you get away without setting the viewport
ncarter (slow)
2013/01/07 23:24:52
http://msdn.microsoft.com/en-us/library/windows/de
|
- 0, 0, |
- dst_size.width(), dst_size.height(), |
- 0, 1 |
- }; |
- device()->SetViewport(&viewport); |
+ DrawScreenAlignedQuad(dst_size); |
+ |
+ // Clear surface references. |
+ device()->SetTexture(0, NULL); |
+ return true; |
+} |
- float halfPixelX = -1.0f / dst_size.width(); |
- float halfPixelY = 1.0f / dst_size.height(); |
+void AcceleratedSurfaceTransformer::DrawScreenAlignedQuad( |
+ const gfx::Size& size) { |
+ const float target_size[] = { size.width(), size.height() }; |
+ |
+ // Set the uniform shader constant |kRenderTargetSize|, which is bound |
+ // to register c0. |
+ device()->SetVertexShaderConstantF(0, target_size, arraysize(target_size)); |
+ |
+ // We always send down the same vertices. The vertex program will take |
+ // care of doing resolution-dependent position adjustment. |
Vertex vertices[] = { |
- { halfPixelX - 1, halfPixelY + 1, 0.5f, 1, 0, 1 }, |
- { halfPixelX + 1, halfPixelY + 1, 0.5f, 1, 1, 1 }, |
- { halfPixelX + 1, halfPixelY - 1, 0.5f, 1, 1, 0 }, |
- { halfPixelX - 1, halfPixelY - 1, 0.5f, 1, 0, 0 } |
+ { -1, +1, 0.5f, 1, 0, 0 }, |
+ { +1, +1, 0.5f, 1, 1, 0 }, |
+ { +1, -1, 0.5f, 1, 1, 1 }, |
+ { -1, -1, 0.5f, 1, 0, 1 } |
}; |
device()->BeginScene(); |
@@ -175,10 +259,6 @@ bool AcceleratedSurfaceTransformer::CopyInverted( |
sizeof(vertices[0])); |
device()->EndScene(); |
- // Clear surface references. |
- device()->SetRenderTarget(0, default_color_target); |
- device()->SetTexture(0, NULL); |
- return true; |
} |
// Resize an RGB surface using repeated linear interpolation. |
@@ -186,8 +266,8 @@ bool AcceleratedSurfaceTransformer::ResizeBilinear( |
IDirect3DSurface9* src_surface, |
const gfx::Rect& src_subrect, |
IDirect3DSurface9* dst_surface) { |
- gfx::Size src_size = GetSize(src_surface); |
- gfx::Size dst_size = GetSize(dst_surface); |
+ gfx::Size src_size = d3d_utils::GetSize(src_surface); |
+ gfx::Size dst_size = d3d_utils::GetSize(dst_surface); |
if (src_size.IsEmpty() || dst_size.IsEmpty()) |
return false; |
@@ -246,10 +326,212 @@ bool AcceleratedSurfaceTransformer::ResizeBilinear( |
return true; |
} |
+bool AcceleratedSurfaceTransformer::TransformRGBToYV12( |
+ IDirect3DTexture9* src_surface, |
+ const gfx::Size& dst_size, |
+ IDirect3DSurface9** dst_y, |
+ IDirect3DSurface9** dst_u, |
+ IDirect3DSurface9** dst_v) { |
+ gfx::Size packed_y_size; |
+ gfx::Size packed_uv_size; |
+ if (!AllocYUVBuffers(dst_size, &packed_y_size, &packed_uv_size, |
+ dst_y, dst_u, dst_v)) { |
+ return false; |
+ } |
+ |
+ if (device_supports_multiple_render_targets()) { |
+ return TransformRGBToYV12_MRT(src_surface, |
+ dst_size, |
+ packed_y_size, |
+ packed_uv_size, |
+ *dst_y, |
+ *dst_u, |
+ *dst_v); |
+ } else { |
+ return TransformRGBToYV12_WithoutMRT(src_surface, |
+ dst_size, |
+ packed_y_size, |
+ packed_uv_size, |
+ *dst_y, |
+ *dst_u, |
+ *dst_v); |
+ } |
+} |
+ |
+bool AcceleratedSurfaceTransformer::AllocYUVBuffers( |
+ const gfx::Size& dst_size, |
+ gfx::Size* y_size, |
+ gfx::Size* uv_size, |
+ IDirect3DSurface9** dst_y, |
+ IDirect3DSurface9** dst_u, |
+ IDirect3DSurface9** dst_v) { |
+ |
+ // Y is full height, packed into 4 components. |
+ *y_size = gfx::Size((dst_size.width() + 3) / 4, dst_size.height()); |
+ |
+ // U and V are half the size (rounded up) of Y. |
+ *uv_size = gfx::Size((y_size->width() + 1) / 2, (y_size->height() + 1) / 2); |
+ |
+ if (!d3d_utils::CreateTemporaryLockableSurface(device(), *y_size, dst_y)) |
+ return false; |
+ if (!d3d_utils::CreateTemporaryLockableSurface(device(), *uv_size, dst_u)) |
+ return false; |
+ if (!d3d_utils::CreateTemporaryLockableSurface(device(), *uv_size, dst_v)) |
+ return false; |
+ return true; |
+} |
+ |
+bool AcceleratedSurfaceTransformer::TransformRGBToYV12_MRT( |
+ IDirect3DTexture9* src_surface, |
+ const gfx::Size& dst_size, |
+ const gfx::Size& packed_y_size, |
+ const gfx::Size& packed_uv_size, |
+ IDirect3DSurface9* dst_y, |
+ IDirect3DSurface9* dst_u, |
+ IDirect3DSurface9* dst_v) { |
+ TRACE_EVENT0("gpu", "RGBToYV12_MRT"); |
+ |
+ ScopedRenderTargetRestorer color0_restorer(device(), 0); |
+ ScopedRenderTargetRestorer color1_restorer(device(), 1); |
+ |
+ // Create an intermediate surface to hold the UUVV values. This is color |
+ // target 1 for the first pass, and texture 0 for the second pass. Its |
+ // values are not read afterwards. |
+ base::win::ScopedComPtr<IDirect3DTexture9> uv_as_texture; |
+ base::win::ScopedComPtr<IDirect3DSurface9> uv_as_surface; |
+ if (!d3d_utils::CreateTemporaryRenderTargetTexture(device(), |
+ packed_y_size, |
+ uv_as_texture.Receive(), |
+ uv_as_surface.Receive())) { |
+ return false; |
+ } |
+ |
+ // Clamping is required if (dst_size.width() % 8 != 0) or if |
+ // (dst_size.height != 0), so we set it always. Both passes rely on this. |
+ device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP); |
+ device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP); |
+ |
+ ///////////////////////////////////////// |
+ // Pass 1: RGB --(scaled)--> YYYY + UUVV |
+ SetShaderCombo(RGB_TO_YV12_FAST__PASS_1_OF_2); |
+ |
+ // Enable bilinear filtering if scaling is required. The filtering will take |
+ // place entirely in the first pass. |
+ if (d3d_utils::GetSize(src_surface) != dst_size) { |
+ device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); |
+ device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); |
+ } else { |
+ device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); |
+ device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); |
+ } |
+ |
+ device()->SetTexture(0, src_surface); |
+ device()->SetRenderTarget(0, dst_y); |
+ device()->SetRenderTarget(1, uv_as_surface); |
+ DrawScreenAlignedQuad(dst_size); |
+ |
+ ///////////////////////////////////////// |
+ // Pass 2: UUVV -> UUUU + VVVV |
+ SetShaderCombo(RGB_TO_YV12_FAST__PASS_2_OF_2); |
+ |
+ // The second pass uses bilinear minification to achieve vertical scaling, |
+ // so enable it always. |
+ device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); |
+ device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); |
+ |
+ device()->SetTexture(0, uv_as_texture); |
+ device()->SetRenderTarget(0, dst_u); |
+ device()->SetRenderTarget(1, dst_v); |
+ DrawScreenAlignedQuad(packed_y_size); |
+ |
+ // Clear surface references. |
+ device()->SetTexture(0, NULL); |
+ return true; |
+} |
+ |
+bool AcceleratedSurfaceTransformer::TransformRGBToYV12_WithoutMRT( |
+ IDirect3DTexture9* src_surface, |
+ const gfx::Size& dst_size, |
+ const gfx::Size& packed_y_size, |
+ const gfx::Size& packed_uv_size, |
+ IDirect3DSurface9* dst_y, |
+ IDirect3DSurface9* dst_u, |
+ IDirect3DSurface9* dst_v) { |
+ TRACE_EVENT0("gpu", "RGBToYV12_WithoutMRT"); |
+ |
+ ScopedRenderTargetRestorer color0_restorer(device(), 0); |
+ |
+ base::win::ScopedComPtr<IDirect3DTexture9> scaled_src_surface; |
+ |
+ // If scaling is requested, do it to a temporary texture. The MRT path |
+ // gets a scale for free, so we need to support it here too (even though |
+ // it's an extra operation). |
+ if (d3d_utils::GetSize(src_surface) == dst_size) { |
+ scaled_src_surface = src_surface; |
+ } else { |
+ base::win::ScopedComPtr<IDirect3DSurface9> src_level0; |
+ HRESULT hr = src_surface->GetSurfaceLevel(0, src_level0.Receive()); |
+ if (FAILED(hr)) |
+ return false; |
+ |
+ base::win::ScopedComPtr<IDirect3DSurface9> dst_level0; |
+ if (!d3d_utils::CreateTemporaryRenderTargetTexture( |
+ device(), dst_size, |
+ scaled_src_surface.Receive(), dst_level0.Receive())) { |
+ return false; |
+ } |
+ |
+ device()->StretchRect(src_level0, NULL, dst_level0, NULL, D3DTEXF_LINEAR); |
apatrick_chromium
2013/01/07 22:56:53
Maybe this isn't an issue for your purposes but if
ncarter (slow)
2013/01/07 23:24:52
Yes, definitely; for the first phase I'm planning
|
+ } |
+ |
+ // Input texture is the same for all three passes. |
+ device()->SetTexture(0, scaled_src_surface); |
+ |
+ // Clamping is required if (dst_size.width() % 8 != 0) or if |
+ // (dst_size.height != 0), so we set it always. All passes rely on this. |
+ device()->SetSamplerState(0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP); |
+ device()->SetSamplerState(0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP); |
+ |
+ ///////////////////// |
+ // Pass 1: RGB -> Y. |
+ SetShaderCombo(RGB_TO_YV12_SLOW__PASS_1_OF_3); |
+ |
+ // Pass 1 just needs point sampling. |
+ device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); |
+ device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); |
+ |
+ device()->SetRenderTarget(0, dst_y); |
+ DrawScreenAlignedQuad(dst_size); |
+ |
+ // Passes 2 and 3 rely on bilinear minification to downsample U and V. |
+ device()->SetSamplerState(0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); |
+ device()->SetSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); |
+ |
+ ///////////////////// |
+ // Pass 2: RGB -> U. |
+ SetShaderCombo(RGB_TO_YV12_SLOW__PASS_2_OF_3); |
+ device()->SetRenderTarget(0, dst_u); |
+ DrawScreenAlignedQuad(dst_size); |
+ |
+ ///////////////////// |
+ // Pass 3: RGB -> V. |
+ SetShaderCombo(RGB_TO_YV12_SLOW__PASS_3_OF_3); |
+ device()->SetRenderTarget(0, dst_v); |
+ DrawScreenAlignedQuad(dst_size); |
+ |
+ // Clear surface references. |
+ device()->SetTexture(0, NULL); |
+ return true; |
+} |
+ |
IDirect3DDevice9* AcceleratedSurfaceTransformer::device() { |
return device_; |
} |
+bool AcceleratedSurfaceTransformer::device_supports_multiple_render_targets() { |
miu
2012/12/27 21:40:17
nit: Consider inlining (allowed for simple accesso
ncarter (slow)
2013/01/07 22:49:10
Done.
|
+ return device_supports_multiple_render_targets_; |
+} |
+ |
bool AcceleratedSurfaceTransformer::SetShaderCombo(ShaderCombo combo) { |
HRESULT hr = device()->SetVertexShader(vertex_shaders_[combo]); |
if (!SUCCEEDED(hr)) |
@@ -258,4 +540,4 @@ bool AcceleratedSurfaceTransformer::SetShaderCombo(ShaderCombo combo) { |
if (!SUCCEEDED(hr)) |
return false; |
return true; |
-} |
+} |