Index: ui/surface/accelerated_surface_win.cc |
=================================================================== |
--- ui/surface/accelerated_surface_win.cc (revision 175160) |
+++ ui/surface/accelerated_surface_win.cc (working copy) |
@@ -8,6 +8,7 @@ |
#include <windows.h> |
#include <algorithm> |
+#include "accelerated_surface_win_hlsl_compiled.h" |
#include "base/bind.h" |
#include "base/bind_helpers.h" |
#include "base/callback.h" |
@@ -29,15 +30,33 @@ |
#include "ui/base/win/hwnd_util.h" |
#include "ui/gfx/rect.h" |
#include "ui/gl/gl_switches.h" |
-#include "ui/surface/accelerated_surface_transformer_win.h" |
-#include "ui/surface/d3d9_utils_win.h" |
-namespace d3d_utils = ui_surface_d3d9_utils; |
+using ui_surface::AcceleratedSurfaceWinHLSL::kVsOneTexture; |
+using ui_surface::AcceleratedSurfaceWinHLSL::kPsOneTexture; |
+ |
+ |
namespace { |
+typedef HRESULT (WINAPI *Direct3DCreate9ExFunc)(UINT sdk_version, |
+ IDirect3D9Ex **d3d); |
+ |
+const wchar_t kD3D9ModuleName[] = L"d3d9.dll"; |
+const char kCreate3D9DeviceExName[] = "Direct3DCreate9Ex"; |
+ |
const char kUseOcclusionQuery[] = "use-occlusion-query"; |
+struct Vertex { |
+ float x, y, z, w; |
+ float u, v; |
+}; |
+ |
+const static D3DVERTEXELEMENT9 g_vertexElements[] = { |
+ { 0, 0, D3DDECLTYPE_FLOAT4, 0, D3DDECLUSAGE_POSITION, 0 }, |
+ { 0, 16, D3DDECLTYPE_FLOAT2, 0, D3DDECLUSAGE_TEXCOORD, 0 }, |
+ D3DDECL_END() |
+}; |
+ |
UINT GetPresentationInterval() { |
if (CommandLine::ForCurrentProcess()->HasSwitch(switches::kDisableGpuVsync)) |
return D3DPRESENT_INTERVAL_IMMEDIATE; |
@@ -49,6 +68,53 @@ |
return CommandLine::ForCurrentProcess()->HasSwitch(kUseOcclusionQuery); |
} |
+// Calculate the number necessary to transform |src_subrect| into |dst_size| |
+// by repeating downsampling of the image of |src_subrect| by a factor no more |
+// than 2. |
+int GetResampleCount(const gfx::Rect& src_subrect, |
+ const gfx::Size& dst_size, |
+ const gfx::Size& back_buffer_size) { |
+ // At least one copy is required, since the back buffer itself is not |
+ // lockable. |
+ int min_resample_count = 1; |
+ int width_count = 0; |
+ int width = src_subrect.width(); |
+ while (width > dst_size.width()) { |
+ ++width_count; |
+ width >>= 1; |
+ } |
+ int height_count = 0; |
+ int height = src_subrect.height(); |
+ while (height > dst_size.height()) { |
+ ++height_count; |
+ height >>= 1; |
+ } |
+ return std::max(std::max(width_count, height_count), |
+ min_resample_count); |
+} |
+ |
+// Returns half the size of |size| no smaller than |min_size|. |
+gfx::Size GetHalfSizeNoLessThan(const gfx::Size& size, |
+ const gfx::Size& min_size) { |
+ return gfx::Size(std::max(min_size.width(), size.width() / 2), |
+ std::max(min_size.height(), size.height() / 2)); |
+} |
+ |
+bool CreateTemporarySurface(IDirect3DDevice9* device, |
+ const gfx::Size& size, |
+ IDirect3DSurface9** surface) { |
+ HRESULT hr = device->CreateRenderTarget( |
+ size.width(), |
+ size.height(), |
+ D3DFMT_A8R8G8B8, |
+ D3DMULTISAMPLE_NONE, |
+ 0, |
+ TRUE, |
+ surface, |
+ NULL); |
+ return SUCCEEDED(hr); |
+} |
+ |
} // namespace |
// A PresentThread is a thread that is dedicated to presenting surfaces to a |
@@ -60,9 +126,6 @@ |
IDirect3DDevice9Ex* device() { return device_.get(); } |
IDirect3DQuery9* query() { return query_.get(); } |
- AcceleratedSurfaceTransformer* surface_transformer() { |
- return &surface_transformer_; |
- } |
void InitDevice(); |
void ResetDevice(); |
@@ -78,11 +141,11 @@ |
base::ScopedNativeLibrary d3d_module_; |
base::win::ScopedComPtr<IDirect3DDevice9Ex> device_; |
+ |
// This query is used to wait until a certain amount of progress has been |
// made by the GPU and it is safe for the producer to modify its shared |
// texture again. |
base::win::ScopedComPtr<IDirect3DQuery9> query_; |
- AcceleratedSurfaceTransformer surface_transformer_; |
DISALLOW_COPY_AND_ASSIGN(PresentThread); |
}; |
@@ -135,7 +198,7 @@ |
return; |
TRACE_EVENT0("gpu", "PresentThread::Init"); |
- d3d_utils::LoadD3D9(&d3d_module_); |
+ d3d_module_.Reset(base::LoadNativeLibrary(FilePath(kD3D9ModuleName), NULL)); |
ResetDevice(); |
} |
@@ -147,32 +210,92 @@ |
query_ = NULL; |
device_ = NULL; |
- if (!d3d_utils::CreateDevice(d3d_module_, |
- D3DDEVTYPE_HAL, |
- GetPresentationInterval(), |
- device_.Receive())) { |
+ Direct3DCreate9ExFunc create_func = reinterpret_cast<Direct3DCreate9ExFunc>( |
+ d3d_module_.GetFunctionPointer(kCreate3D9DeviceExName)); |
+ if (!create_func) |
return; |
- } |
+ base::win::ScopedComPtr<IDirect3D9Ex> d3d; |
+ HRESULT hr = create_func(D3D_SDK_VERSION, d3d.Receive()); |
+ if (FAILED(hr)) |
+ return; |
+ |
+ // Any old window will do to create the device. In practice the window to |
+ // present to is an argument to IDirect3DDevice9::Present. |
+ HWND window = GetShellWindow(); |
+ |
+ D3DPRESENT_PARAMETERS parameters = { 0 }; |
+ parameters.BackBufferWidth = 1; |
+ parameters.BackBufferHeight = 1; |
+ parameters.BackBufferCount = 1; |
+ parameters.BackBufferFormat = D3DFMT_A8R8G8B8; |
+ parameters.hDeviceWindow = window; |
+ parameters.Windowed = TRUE; |
+ parameters.Flags = 0; |
+ parameters.PresentationInterval = GetPresentationInterval(); |
+ parameters.SwapEffect = D3DSWAPEFFECT_COPY; |
+ |
+ hr = d3d->CreateDeviceEx( |
+ D3DADAPTER_DEFAULT, |
+ D3DDEVTYPE_HAL, |
+ window, |
+ D3DCREATE_FPU_PRESERVE | D3DCREATE_SOFTWARE_VERTEXPROCESSING | |
+ D3DCREATE_DISABLE_PSGP_THREADING | D3DCREATE_MULTITHREADED, |
+ ¶meters, |
+ NULL, |
+ device_.Receive()); |
+ if (FAILED(hr)) |
+ return; |
+ |
if (UsingOcclusionQuery()) { |
- HRESULT hr = device_->CreateQuery(D3DQUERYTYPE_OCCLUSION, query_.Receive()); |
+ hr = device_->CreateQuery(D3DQUERYTYPE_OCCLUSION, query_.Receive()); |
if (FAILED(hr)) { |
device_ = NULL; |
return; |
} |
} else { |
- HRESULT hr = device_->CreateQuery(D3DQUERYTYPE_EVENT, query_.Receive()); |
+ hr = device_->CreateQuery(D3DQUERYTYPE_EVENT, query_.Receive()); |
if (FAILED(hr)) { |
device_ = NULL; |
return; |
} |
} |
- if (!surface_transformer_.Init(device_)) { |
+ base::win::ScopedComPtr<IDirect3DVertexShader9> vertex_shader; |
+ hr = device_->CreateVertexShader( |
+ reinterpret_cast<const DWORD*>(kVsOneTexture), |
+ vertex_shader.Receive()); |
+ if (FAILED(hr)) { |
+ device_ = NULL; |
query_ = NULL; |
+ return; |
+ } |
+ |
+ device_->SetVertexShader(vertex_shader); |
+ |
+ base::win::ScopedComPtr<IDirect3DPixelShader9> pixel_shader; |
+ hr = device_->CreatePixelShader( |
+ reinterpret_cast<const DWORD*>(kPsOneTexture), |
+ pixel_shader.Receive()); |
+ |
+ if (FAILED(hr)) { |
device_ = NULL; |
+ query_ = NULL; |
return; |
} |
+ |
+ device_->SetPixelShader(pixel_shader); |
+ |
+ base::win::ScopedComPtr<IDirect3DVertexDeclaration9> vertex_declaration; |
+ hr = device_->CreateVertexDeclaration(g_vertexElements, |
+ vertex_declaration.Receive()); |
+ if (FAILED(hr)) { |
+ device_ = NULL; |
+ query_ = NULL; |
+ return; |
+ } |
+ |
+ device_->SetVertexDeclaration(vertex_declaration); |
} |
void PresentThread::Init() { |
@@ -182,7 +305,6 @@ |
void PresentThread::CleanUp() { |
// The D3D device and query are leaked because destroying the associated D3D |
// query crashes some Intel drivers. |
- surface_transformer_.DetachAll(); |
device_.Detach(); |
query_.Detach(); |
} |
@@ -260,7 +382,6 @@ |
hidden_(true) { |
} |
-// static |
scoped_refptr<AcceleratedPresenter> AcceleratedPresenter::GetForWindow( |
gfx::PluginWindowHandle window) { |
return g_accelerated_presenter_map.Pointer()->GetPresenter(window); |
@@ -349,9 +470,6 @@ |
if (!swap_chain_) |
return false; |
- AcceleratedSurfaceTransformer* gpu_ops = |
- present_thread_->surface_transformer(); |
- |
base::win::ScopedComPtr<IDirect3DSurface9> back_buffer; |
HRESULT hr = swap_chain_->GetBackBuffer(0, |
D3DBACKBUFFER_TYPE_MONO, |
@@ -372,23 +490,65 @@ |
// the requested src subset. Clip to the actual back buffer. |
gfx::Rect src_subrect = requested_src_subrect; |
src_subrect.Intersect(gfx::Rect(back_buffer_size)); |
+ |
+ // Set up intermediate buffers needed for downsampling. |
+ const int resample_count = |
+ GetResampleCount(src_subrect, dst_size, back_buffer_size); |
base::win::ScopedComPtr<IDirect3DSurface9> final_surface; |
- { |
- TRACE_EVENT0("gpu", "CreateTemporaryLockableSurface"); |
- if (!d3d_utils::CreateTemporaryLockableSurface(present_thread_->device(), |
- dst_size, |
- final_surface.Receive())) { |
+ base::win::ScopedComPtr<IDirect3DSurface9> temp_buffer[2]; |
+ if (resample_count == 0) |
+ final_surface = back_buffer; |
+ if (resample_count > 0) { |
+ TRACE_EVENT0("gpu", "CreateTemporarySurface"); |
+ if (!CreateTemporarySurface(present_thread_->device(), |
+ dst_size, |
+ final_surface.Receive())) |
return false; |
- } |
} |
+ const gfx::Size half_size = |
+ GetHalfSizeNoLessThan(src_subrect.size(), dst_size); |
+ if (resample_count > 1) { |
+ TRACE_EVENT0("gpu", "CreateTemporarySurface"); |
+ if (!CreateTemporarySurface(present_thread_->device(), |
+ half_size, |
+ temp_buffer[0].Receive())) |
+ return false; |
+ } |
+ if (resample_count > 2) { |
+ TRACE_EVENT0("gpu", "CreateTemporarySurface"); |
+ const gfx::Size quarter_size = GetHalfSizeNoLessThan(half_size, dst_size); |
+ if (!CreateTemporarySurface(present_thread_->device(), |
+ quarter_size, |
+ temp_buffer[1].Receive())) |
+ return false; |
+ } |
- { |
- // Let the surface transformer start the resize into |final_surface|. |
- TRACE_EVENT0("gpu", "ResizeBilinear"); |
- if (!gpu_ops->ResizeBilinear(back_buffer, src_subrect, final_surface)) |
+ // Repeat downsampling the surface until its size becomes identical to |
+ // |dst_size|. We keep the factor of each downsampling no more than two |
+ // because using a factor more than two can introduce aliasing. |
+ RECT read_rect = src_subrect.ToRECT(); |
+ gfx::Size write_size = half_size; |
+ int read_buffer_index = 1; |
+ int write_buffer_index = 0; |
+ for (int i = 0; i < resample_count; ++i) { |
+ TRACE_EVENT0("gpu", "StretchRect"); |
+ base::win::ScopedComPtr<IDirect3DSurface9> read_buffer = |
+ (i == 0) ? back_buffer : temp_buffer[read_buffer_index]; |
+ base::win::ScopedComPtr<IDirect3DSurface9> write_buffer = |
+ (i == resample_count - 1) ? final_surface : |
+ temp_buffer[write_buffer_index]; |
+ RECT write_rect = gfx::Rect(write_size).ToRECT(); |
+ hr = present_thread_->device()->StretchRect(read_buffer, |
+ &read_rect, |
+ write_buffer, |
+ &write_rect, |
+ D3DTEXF_LINEAR); |
+ if (FAILED(hr)) |
return false; |
+ read_rect = write_rect; |
+ write_size = GetHalfSizeNoLessThan(write_size, dst_size); |
+ std::swap(read_buffer_index, write_buffer_index); |
} |
- |
D3DLOCKED_RECT locked_rect; |
// Empirical evidence seems to suggest that LockRect and memcpy are faster |
@@ -556,13 +716,18 @@ |
} |
if (!source_texture_.get()) { |
- TRACE_EVENT0("gpu", "OpenSharedTexture"); |
- if (!d3d_utils::OpenSharedTexture(present_thread_->device(), |
- surface_handle, |
- size, |
- source_texture_.Receive())) { |
+ TRACE_EVENT0("gpu", "CreateTexture"); |
+ HANDLE handle = reinterpret_cast<HANDLE>(surface_handle); |
+ hr = present_thread_->device()->CreateTexture(size.width(), |
+ size.height(), |
+ 1, |
+ D3DUSAGE_RENDERTARGET, |
+ D3DFMT_A8R8G8B8, |
+ D3DPOOL_DEFAULT, |
+ source_texture_.Receive(), |
+ &handle); |
+ if (FAILED(hr)) |
return; |
- } |
} |
base::win::ScopedComPtr<IDirect3DSurface9> source_surface; |
@@ -589,15 +754,44 @@ |
{ |
TRACE_EVENT0("gpu", "Copy"); |
+ // Use a simple pixel / vertex shader pair to render a quad that flips the |
+ // source texture on the vertical axis. |
+ IDirect3DSurface9 *default_render_target = NULL; |
+ present_thread_->device()->GetRenderTarget(0, &default_render_target); |
+ |
+ present_thread_->device()->SetRenderTarget(0, dest_surface); |
+ present_thread_->device()->SetTexture(0, source_texture_); |
+ |
+ D3DVIEWPORT9 viewport = { |
+ 0, 0, |
+ size.width(), size.height(), |
+ 0, 1 |
+ }; |
+ present_thread_->device()->SetViewport(&viewport); |
+ |
+ float halfPixelX = -1.0f / size.width(); |
+ float halfPixelY = 1.0f / size.height(); |
+ Vertex vertices[] = { |
+ { halfPixelX - 1, halfPixelY + 1, 0.5f, 1, 0, 1 }, |
+ { halfPixelX + 1, halfPixelY + 1, 0.5f, 1, 1, 1 }, |
+ { halfPixelX + 1, halfPixelY - 1, 0.5f, 1, 1, 0 }, |
+ { halfPixelX - 1, halfPixelY - 1, 0.5f, 1, 0, 0 } |
+ }; |
+ |
if (UsingOcclusionQuery()) { |
present_thread_->query()->Issue(D3DISSUE_BEGIN); |
} |
- // Copy while flipping the source texture on the vertical axis. |
- bool result = present_thread_->surface_transformer()->CopyInverted( |
- source_texture_, dest_surface, size); |
- if (!result) |
- return; |
+ present_thread_->device()->BeginScene(); |
+ present_thread_->device()->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, |
+ 2, |
+ vertices, |
+ sizeof(vertices[0])); |
+ present_thread_->device()->EndScene(); |
+ |
+ present_thread_->device()->SetTexture(0, NULL); |
+ present_thread_->device()->SetRenderTarget(0, default_render_target); |
+ default_render_target->Release(); |
} |
hr = present_thread_->query()->Issue(D3DISSUE_END); |