ui/surface/accelerated_surface_transformer_win.hlsl - Issue 11280318: YUV conversion on the GPU.

Unified Diff: ui/surface/accelerated_surface_transformer_win.hlsl

Issue 11280318: YUV conversion on the GPU. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Test improvements. Created 8 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« ui/surface/accelerated_surface_transformer_win.cc ('K') | « ui/surface/accelerated_surface_transformer_win.cc ('k') | ui/surface/accelerated_surface_transformer_win_unittest.cc » ('j') | ui/surface/accelerated_surface_transformer_win_unittest.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: ui/surface/accelerated_surface_transformer_win.hlsl

diff --git a/ui/surface/accelerated_surface_transformer_win.hlsl b/ui/surface/accelerated_surface_transformer_win.hlsl

index 0bd6fda78df22ef6e39b0b06c33911606afcd0bf..90b5cf8abb4d9355488ce0e5e17aa06a2599abf8 100644

--- a/ui/surface/accelerated_surface_transformer_win.hlsl

+++ b/ui/surface/accelerated_surface_transformer_win.hlsl

@@ -1,28 +1,301 @@

-// Use of this source code is governed by a BSD-style license that can be

-// found in the LICENSE file.

-// @gyp_namespace(ui_surface)

-// Compiles into C++ as 'accelerated_surface_transformer_win_hlsl_compiled.h'

-struct Vertex {

- float4 position : POSITION;

- float2 texCoord : TEXCOORD0;

-};

-texture t;

-sampler s;

-// @gyp_compile(vs_2_0, vsOneTexture)

-//

-// Passes a position and texture coordinate to the pixel shader.

-Vertex vsOneTexture(Vertex input) {

- return input;

-};

-// @gyp_compile(ps_2_0, psOneTexture)

-//

-// Samples a texture at the given texture coordinate and returns the result.

-float4 psOneTexture(float2 texCoord : TEXCOORD0) : COLOR0 {

- return tex2D(s, texCoord);

-};

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+// @gyp_namespace(ui_surface)

+// Compiles into C++ as 'accelerated_surface_transformer_win_hlsl_compiled.h'

+struct Vertex {

+ float4 position : POSITION;

+ float2 texCoord : TEXCOORD0;

+};

+texture t;

+sampler s;

+extern uniform float2 kRenderTargetSize : c0;

+// @gyp_compile(vs_2_0, vsOneTextureFlipY)

+//

+// Passes a position and texture coordinate to the pixel shader.

+Vertex vsOneTextureFlipY(Vertex input) {

+ input.texCoord.y = 1 - input.texCoord.y;

+ input.position.x += -1 / kRenderTargetSize.x;

+ input.position.y += 1 / kRenderTargetSize.y;

+ return input;

+};

+// @gyp_compile(ps_2_0, psOneTexture)

+//

+// Samples a texture at the given texture coordinate and returns the result.

+float4 psOneTexture(float2 texCoord : TEXCOORD0) : COLOR0 {

+ return tex2D(s, texCoord);

+};

+// Return |value| rounded up to the nearest multiple of |multiple|.

+float alignTo(float value, float multiple) {

+ // |multiple| is usually a compile-time constant; this check allows

+ // the compiler to avoid the fmod when possible.

+ if (multiple == 1)

+ return value;

+ // Biasing the value provides numeric stability. We expect |value| to

+ // be an integer; this prevents 4.001 from being rounded up to 8.

+ float biased_value = value - 0.5;

+ return biased_value + multiple - fmod(biased_value, multiple);

+float4 packForByteOrder(float4 value) {

+ return value.bgra;

+// Adjust the input vertex to address the correct range of texels. This depends

+// on the value of the shader constant |kRenderTargetSize|, as well as an

+// alignment factor |align| that effectively specifies the footprint of the

+// texel samples done by this shader pass, and is used to correct when that

+// footprint size doesn't align perfectly with the actual input size.

+Vertex adjustForAlignmentAndPacking(Vertex vtx, float2 align) {

+ float src_width = kRenderTargetSize.x;

+ float src_height = kRenderTargetSize.y;

+ // Because our caller expects to be sampling |align.x| many pixels from src at

+ // a time, if src's width isn't evenly divisible by |align.x|, it is necessary

+ // to pretend that the source is slightly bigger than it is.

+ float bloated_src_width = alignTo(src_width, align.x);

+ float bloated_src_height = alignTo(src_height, align.y);

+ // When bloated_src_width != src_width, we'll adjust the texture coordinates

+ // to sample past the edge of the vtx; clamping will produce extra copies of

+ // the last row.

+ float texture_x_scale = bloated_src_width / src_width;

+ float texture_y_scale = bloated_src_height / src_height;

+ // Adjust positions so that we're addressing full fragments in the output, per

+ // the top-left filling convention. The shifts would be equivalent to

+ // 1/dst_width and 1/dst_height, if we were to calculate those explicitly.

+ vtx.position.x -= align.x / bloated_src_width;

+ vtx.position.y += align.y / bloated_src_height;

+ // Apply the texture scale

+ vtx.texCoord.x *= texture_x_scale;

+ vtx.texCoord.y *= texture_y_scale;

+ return vtx;

+///////////////////////////////////////////////////////////////////////

+// RGB24 to YV12 in two passes; writing two 8888 targets each pass.

+//

+// YV12 is full-resolution luma and half-resolution red/green chroma.

miu 2012/12/27 21:40:17 s/green/blue/

ncarter (slow) 2013/01/07 22:49:10 Done.

+//

+// (original)

+// XRGB XRGB XRGB XRGB XRGB XRGB XRGB XRGB

+// |

+// | (y plane) (temporary)

+// | YYYY YYYY UVUV UVUV

+// +--> { YYYY YYYY + UVUV UVUV }

+// YYYY YYYY UVUV UVUV

+// First YYYY YYYY UVUV UVUV

+// pass YYYY YYYY UVUV UVUV

+// YYYY YYYY UVUV UVUV

+// |

+// | (u plane) (v plane)

+// Second | UUUU VVVV

+// pass +--> { UUUU + VVVV }

+// UUUU VVVV

+//

+///////////////////////////////////////////////////////////////////////

+// Phase one of RGB24->YV12 conversion: vsFetch4Pixels/psConvertRGBtoY8UV44

+//

+// @gyp_compile(vs_2_0, vsFetch4Pixels)

+// @gyp_compile(ps_2_0, psConvertRGBtoY8UV44)

+//

+// Writes four source pixels at a time to a full-size Y plane and a half-width

+// interleaved UV plane. After execution, the Y plane is complete but the UV

+// planes still need to be de-interleaved and vertically scaled.

+//

+void vsFetch4Pixels(in Vertex vertex,

+ out float4 position : POSITION,

+ out float2 texCoord0 : TEXCOORD0,

+ out float2 texCoord1 : TEXCOORD1,

+ out float2 texCoord2 : TEXCOORD2,

+ out float2 texCoord3 : TEXCOORD3) {

+ Vertex adjusted = adjustForAlignmentAndPacking(vertex, float2(4, 1));

+ // Set up four taps, aligned to texel centers if the src's true size is

+ // |kRenderTargetSize|, and doing bilinear interpolation otherwise.

+ float2 one_texel_x = float2(1 / kRenderTargetSize.x, 0);

+ position = adjusted.position;

+ texCoord0 = adjusted.texCoord - 1.5f * one_texel_x;

+ texCoord1 = adjusted.texCoord - 0.5f * one_texel_x;

+ texCoord2 = adjusted.texCoord + 0.5f * one_texel_x;

+ texCoord3 = adjusted.texCoord + 1.5f * one_texel_x;

+};

+struct YV16QuadPixel

+ float4 YYYY : COLOR0;

+ float4 UUVV : COLOR1;

+};

+// Color conversion constants.

+static const float3x1 rgb_to_y = float3x1( +0.257f, +0.504f, +0.098f );

+static const float3x1 rgb_to_u = float3x1( -0.148f, -0.291f, +0.439f );

+static const float3x1 rgb_to_v = float3x1( +0.439f, -0.368f, -0.071f );

+static const float y_bias = 0.0625f;

+static const float uv_bias = 0.5f;

+YV16QuadPixel psConvertRGBtoY8UV44(float2 texCoord0 : TEXCOORD0,

+ float2 texCoord1 : TEXCOORD1,

+ float2 texCoord2 : TEXCOORD2,

+ float2 texCoord3 : TEXCOORD3) {

+ // Color conversion constants.

+ const float3x1 rgb_to_u_half = rgb_to_u / 2;

miu 2012/12/27 21:40:17 Should these be static constants?

ncarter (slow) 2013/01/07 22:49:10 Deleted these; they were cruft.

+ const float3x1 rgb_to_v_half = rgb_to_v / 2;

+ // Load the four texture samples into a matrix.

+ float4x3 rgb_quad_pixel = float4x3(tex2D(s, texCoord0).rgb,

+ tex2D(s, texCoord1).rgb,

+ tex2D(s, texCoord2).rgb,

+ tex2D(s, texCoord3).rgb);

+ // RGB -> Y conversion (x4).

+ float4 yyyy = mul(rgb_quad_pixel, rgb_to_y) + y_bias;

+ // Average adjacent texture samples while converting RGB->UV. This is the same

+ // as color converting then averaging, but slightly less math. These values

+ // will be in the range [-0.439f, +0.439f] and still need to have the bias

+ // term applied.

+ float2x3 rgb_double_pixel = float2x3(rgb_quad_pixel[0] + rgb_quad_pixel[1],

+ rgb_quad_pixel[2] + rgb_quad_pixel[3]);

+ float2 uu = mul(rgb_double_pixel, rgb_to_u / 2);

miu 2012/12/27 21:40:17 Use rgb_to_u_half here (and rgb_to_v_half in next

ncarter (slow) 2013/01/07 22:49:10 The compiler is able to optimize this appropriatel

+ float2 vv = mul(rgb_double_pixel, rgb_to_v / 2);

+ // Package the result to account for BGRA byte ordering.

+ YV16QuadPixel result;

+ result.YYYY = packForByteOrder(yyyy);

+ result.UUVV.xyzw = float4(uu, vv) + uv_bias; // Apply uv bias.

+ return result;

+};

+// Phase two of RGB24->YV12 conversion: vsFetch2Pixels/psConvertUV44toU2V2

+//

+// @gyp_compile(vs_2_0, vsFetch2Pixels)

+// @gyp_compile(ps_2_0, psConvertUV44toU2V2)

+//

+// Deals with UV only. Input is interleaved UV pixels, already scaled

+// horizontally, packed two per RGBA texel. Output is two color planes U and V,

+// packed four to a RGBA pixel.

+//

+// Vertical scaling happens via a half-texel offset and bilinear interpolation

+// during texture sampling.

+void vsFetch2Pixels(in Vertex vertex,

+ out float4 position : POSITION,

+ out float2 texCoord0 : TEXCOORD0,

+ out float2 texCoord1 : TEXCOORD1) {

+ // We fetch two texels in the horizontal direction, and scale by 2 in the

+ // vertical direction.

+ Vertex adjusted = adjustForAlignmentAndPacking(vertex, float2(2, 2));

+ // Setup the two texture coordinates. No need to adjust texCoord.y; it's

+ // already at the mid-way point between the two rows. Horizontally, we'll

+ // fetch two texels so that we have enough data to fill our output.

+ float2 one_texel_x = float2(1 / kRenderTargetSize.x, 0);

+ position = adjusted.position;

+ texCoord0 = adjusted.texCoord - 0.5f * one_texel_x;

+ texCoord1 = adjusted.texCoord + 0.5f * one_texel_x;

+};

+struct UV8QuadPixel {

+ float4 UUUU : COLOR0;

+ float4 VVVV : COLOR1;

+};

+UV8QuadPixel psConvertUV44toU2V2(float2 texCoord0 : TEXCOORD0,

+ float2 texCoord1 : TEXCOORD1) {

+ // We're just sampling two pixels and unswizzling them. There's no need to do

+ // vertical scaling with math, since bilinear interpolation in the sampler

+ // takes care of that.

+ float4 lo_uuvv = tex2D(s, texCoord0);

+ float4 hi_uuvv = tex2D(s, texCoord1);

+ UV8QuadPixel result;

+ result.UUUU = packForByteOrder(float4(lo_uuvv.xy, hi_uuvv.xy));

+ result.VVVV = packForByteOrder(float4(lo_uuvv.zw, hi_uuvv.zw));

+ return result;

+};

+///////////////////////////////////////////////////////////////////////

+// RGB24 to YV12 in three passes, without MRT: one pass per output color plane.

+// vsFetch4Pixels is the common vertex shader for all three passes.

+//

+// Note that this technique will not do full bilinear filtering on its RGB

+// input (you'd get correctly filtered Y, but aliasing in U and V).

+//

+// Pass 1: vsFetch4Pixels + psConvertRGBToY

+// Pass 2: vsFetch4Pixels_Scale2 + psConvertRGBToU

+// Pass 3: vsFetch4Pixels_Scale2 + psConvertRGBToV

+//

+// @gyp_compile(vs_2_0, vsFetch4Pixels_Scale2)

+// @gyp_compile(ps_2_0, psConvertRGBtoY)

+// @gyp_compile(ps_2_0, psConvertRGBtoU)

+// @gyp_compile(ps_2_0, psConvertRGBtoV)

+//

+///////////////////////////////////////////////////////////////////////

+void vsFetch4Pixels_Scale2(in Vertex vertex,

+ out float4 position : POSITION,

+ out float2 texCoord0 : TEXCOORD0,

+ out float2 texCoord1 : TEXCOORD1,

+ out float2 texCoord2 : TEXCOORD2,

+ out float2 texCoord3 : TEXCOORD3) {

+ Vertex adjusted = adjustForAlignmentAndPacking(vertex, float2(8, 2));

+ // Set up four taps, each of which samples a 2x2 texel quad at the midpoint.

+ float2 one_texel_x = float2(1 / kRenderTargetSize.x, 0);

+ position = adjusted.position;

+ texCoord0 = adjusted.texCoord - 3 * one_texel_x;

+ texCoord1 = adjusted.texCoord - 1 * one_texel_x;

+ texCoord2 = adjusted.texCoord + 1 * one_texel_x;

+ texCoord3 = adjusted.texCoord + 3 * one_texel_x;

+};

+// RGB -> Y, four samples at a time.

+float4 psConvertRGBtoY(float2 texCoord0 : TEXCOORD0,

+ float2 texCoord1 : TEXCOORD1,

+ float2 texCoord2 : TEXCOORD2,

+ float2 texCoord3 : TEXCOORD3) : COLOR0 {

+ float4x3 rgb_quad_pixel = float4x3(tex2D(s, texCoord0).rgb,

+ tex2D(s, texCoord1).rgb,

+ tex2D(s, texCoord2).rgb,

+ tex2D(s, texCoord3).rgb);

+ return packForByteOrder(mul(rgb_quad_pixel, rgb_to_y) + y_bias);

+// RGB -> U, four samples at a time.

+float4 psConvertRGBtoU(float2 texCoord0 : TEXCOORD0,

+ float2 texCoord1 : TEXCOORD1,

+ float2 texCoord2 : TEXCOORD2,

+ float2 texCoord3 : TEXCOORD3) : COLOR0 {

+ float4x3 rgb_quad_pixel = float4x3(tex2D(s, texCoord0).rgb,

+ tex2D(s, texCoord1).rgb,

+ tex2D(s, texCoord2).rgb,

+ tex2D(s, texCoord3).rgb);

+ return packForByteOrder(mul(rgb_quad_pixel, rgb_to_u) + uv_bias);

+// RGB -> V, four samples at a time.

+float4 psConvertRGBtoV(float2 texCoord0 : TEXCOORD0,

+ float2 texCoord1 : TEXCOORD1,

+ float2 texCoord2 : TEXCOORD2,

+ float2 texCoord3 : TEXCOORD3) : COLOR0 {

+ float4x3 rgb_quad_pixel = float4x3(tex2D(s, texCoord0).rgb,

+ tex2D(s, texCoord1).rgb,

+ tex2D(s, texCoord2).rgb,

+ tex2D(s, texCoord3).rgb);

+ return packForByteOrder(mul(rgb_quad_pixel, rgb_to_v) + uv_bias);