ui/surface/accelerated_surface_transformer_win.hlsl - Issue 11280318: YUV conversion on the GPU.

Side by Side Diff: ui/surface/accelerated_surface_transformer_win.hlsl

Issue 11280318: YUV conversion on the GPU. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: "Yet more line endings." Created 7 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // @gyp_namespace(ui_surface)	5 // @gyp_namespace(ui_surface)

6 // Compiles into C++ as 'accelerated_surface_transformer_win_hlsl_compiled.h'	6 // Compiles into C++ as 'accelerated_surface_transformer_win_hlsl_compiled.h'

7	7

8 struct Vertex {	8 struct Vertex {

9 float4 position : POSITION;	9 float4 position : POSITION;

10 float2 texCoord : TEXCOORD0;	10 float2 texCoord : TEXCOORD0;

11 };	11 };

12	12

13 texture t;	13 texture t;

14 sampler s;	14 sampler s;

15	15

	16 extern uniform float2 kRenderTargetSize : c0;

	17 extern uniform float2 kTextureScale : c1;

	18

16 // @gyp_compile(vs_2_0, vsOneTexture)	19 // @gyp_compile(vs_2_0, vsOneTexture)

17 //	20 //

18 // Passes a position and texture coordinate to the pixel shader.	21 // Passes a position and texture coordinate to the pixel shader.

19 Vertex vsOneTexture(Vertex input) {	22 Vertex vsOneTexture(Vertex input) {

	23 // Texture scale is typically just 1 (to do nothing) or -1 (to flip).

	24 input.texCoord = ((2 * (input.texCoord - 0.5) * kTextureScale) + 1) / 2;

	25 input.position.x += -1 / kRenderTargetSize.x;

	26 input.position.y += 1 / kRenderTargetSize.y;

20 return input;	27 return input;

21 };	28 };

22	29

23 // @gyp_compile(ps_2_0, psOneTexture)	30 // @gyp_compile(ps_2_0, psOneTexture)

24 //	31 //

25 // Samples a texture at the given texture coordinate and returns the result.	32 // Samples a texture at the given texture coordinate and returns the result.

26 float4 psOneTexture(float2 texCoord : TEXCOORD0) : COLOR0 {	33 float4 psOneTexture(float2 texCoord : TEXCOORD0) : COLOR0 {

27 return tex2D(s, texCoord);	34 return tex2D(s, texCoord);

28 };	35 };

	36

	37 // Return \|value\| rounded up to the nearest multiple of \|multiple\|.

	38 float alignTo(float value, float multiple) {

	39 // \|multiple\| is usually a compile-time constant; this check allows

	40 // the compiler to avoid the fmod when possible.

	41 if (multiple == 1)

	42 return value;

	43

	44 // Biasing the value provides numeric stability. We expect \|value\| to

	45 // be an integer; this prevents 4.001 from being rounded up to 8.

	46 float biased_value = value - 0.5;

	47 return biased_value + multiple - fmod(biased_value, multiple);

	48 }

	49

	50 float4 packForByteOrder(float4 value) {

	51 return value.bgra;

	52 }

	53

	54 // Adjust the input vertex to address the correct range of texels. This depends

	55 // on the value of the shader constant \|kRenderTargetSize\|, as well as an

	56 // alignment factor \|align\| that effectively specifies the footprint of the

	57 // texel samples done by this shader pass, and is used to correct when that

	58 // footprint size doesn't align perfectly with the actual input size.

	59 Vertex adjustForAlignmentAndPacking(Vertex vtx, float2 align) {

	60 float src_width = kRenderTargetSize.x;

	61 float src_height = kRenderTargetSize.y;

	62

	63 // Because our caller expects to be sampling \|align.x\| many pixels from src at

	64 // a time, if src's width isn't evenly divisible by \|align.x\|, it is necessary

	65 // to pretend that the source is slightly bigger than it is.

	66 float bloated_src_width = alignTo(src_width, align.x);

	67 float bloated_src_height = alignTo(src_height, align.y);

	68

	69 // When bloated_src_width != src_width, we'll adjust the texture coordinates

	70 // to sample past the edge of the vtx; clamping will produce extra copies of

	71 // the last row.

	72 float texture_x_scale = bloated_src_width / src_width;

	73 float texture_y_scale = bloated_src_height / src_height;

	74

	75 // Adjust positions so that we're addressing full fragments in the output, per

	76 // the top-left filling convention. The shifts would be equivalent to

	77 // 1/dst_width and 1/dst_height, if we were to calculate those explicitly.

	78 vtx.position.x -= align.x / bloated_src_width;

	79 vtx.position.y += align.y / bloated_src_height;

	80

	81 // Apply the texture scale

	82 vtx.texCoord.x *= texture_x_scale;

	83 vtx.texCoord.y *= texture_y_scale;

	84

	85 return vtx;

	86 }

	87

	88 ///////////////////////////////////////////////////////////////////////

	89 // RGB24 to YV12 in two passes; writing two 8888 targets each pass.

	90 //

	91 // YV12 is full-resolution luma and half-resolution blue/red chroma.

	92 //

	93 // (original)

	94 // XRGB XRGB XRGB XRGB XRGB XRGB XRGB XRGB

	95 // XRGB XRGB XRGB XRGB XRGB XRGB XRGB XRGB

	96 // XRGB XRGB XRGB XRGB XRGB XRGB XRGB XRGB

	97 // XRGB XRGB XRGB XRGB XRGB XRGB XRGB XRGB

	98 // XRGB XRGB XRGB XRGB XRGB XRGB XRGB XRGB

	99 // XRGB XRGB XRGB XRGB XRGB XRGB XRGB XRGB

	100 // \|

	101 // \| (y plane) (temporary)

	102 // \| YYYY YYYY UVUV UVUV

	103 // +--> { YYYY YYYY + UVUV UVUV }

	104 // YYYY YYYY UVUV UVUV

	105 // First YYYY YYYY UVUV UVUV

	106 // pass YYYY YYYY UVUV UVUV

	107 // YYYY YYYY UVUV UVUV

	108 // \|

	109 // \| (u plane) (v plane)

	110 // Second \| UUUU VVVV

	111 // pass +--> { UUUU + VVVV }

	112 // UUUU VVVV

	113 //

	114 ///////////////////////////////////////////////////////////////////////

	115

	116 // Phase one of RGB24->YV12 conversion: vsFetch4Pixels/psConvertRGBtoY8UV44

	117 //

	118 // @gyp_compile(vs_2_0, vsFetch4Pixels)

	119 // @gyp_compile(ps_2_0, psConvertRGBtoY8UV44)

	120 //

	121 // Writes four source pixels at a time to a full-size Y plane and a half-width

	122 // interleaved UV plane. After execution, the Y plane is complete but the UV

	123 // planes still need to be de-interleaved and vertically scaled.

	124 //

	125 void vsFetch4Pixels(in Vertex vertex,

	126 out float4 position : POSITION,

	127 out float2 texCoord0 : TEXCOORD0,

	128 out float2 texCoord1 : TEXCOORD1,

	129 out float2 texCoord2 : TEXCOORD2,

	130 out float2 texCoord3 : TEXCOORD3) {

	131 Vertex adjusted = adjustForAlignmentAndPacking(vertex, float2(4, 1));

	132

	133 // Set up four taps, aligned to texel centers if the src's true size is

	134 // \|kRenderTargetSize\|, and doing bilinear interpolation otherwise.

	135 float2 one_texel_x = float2(1 / kRenderTargetSize.x, 0);

	136 position = adjusted.position;

	137 texCoord0 = adjusted.texCoord - 1.5f * one_texel_x;

	138 texCoord1 = adjusted.texCoord - 0.5f * one_texel_x;

	139 texCoord2 = adjusted.texCoord + 0.5f * one_texel_x;

	140 texCoord3 = adjusted.texCoord + 1.5f * one_texel_x;

	141 };

	142

	143 struct YV16QuadPixel

	144 {

	145 float4 YYYY : COLOR0;

	146 float4 UUVV : COLOR1;

	147 };

	148

	149 // Color conversion constants.

	150 static const float3x1 rgb_to_y = float3x1( +0.257f, +0.504f, +0.098f );

	151 static const float3x1 rgb_to_u = float3x1( -0.148f, -0.291f, +0.439f );

	152 static const float3x1 rgb_to_v = float3x1( +0.439f, -0.368f, -0.071f );

	153 static const float y_bias = 0.0625f;

	154 static const float uv_bias = 0.5f;

	155

	156 YV16QuadPixel psConvertRGBtoY8UV44(float2 texCoord0 : TEXCOORD0,

	157 float2 texCoord1 : TEXCOORD1,

	158 float2 texCoord2 : TEXCOORD2,

	159 float2 texCoord3 : TEXCOORD3) {

	160 // Load the four texture samples into a matrix.

	161 float4x3 rgb_quad_pixel = float4x3(tex2D(s, texCoord0).rgb,

	162 tex2D(s, texCoord1).rgb,

	163 tex2D(s, texCoord2).rgb,

	164 tex2D(s, texCoord3).rgb);

	165

	166 // RGB -> Y conversion (x4).

	167 float4 yyyy = mul(rgb_quad_pixel, rgb_to_y) + y_bias;

	168

	169 // Average adjacent texture samples while converting RGB->UV. This is the same

	170 // as color converting then averaging, but slightly less math. These values

	171 // will be in the range [-0.439f, +0.439f] and still need to have the bias

	172 // term applied.

	173 float2x3 rgb_double_pixel = float2x3(rgb_quad_pixel[0] + rgb_quad_pixel[1],

	174 rgb_quad_pixel[2] + rgb_quad_pixel[3]);

	175 float2 uu = mul(rgb_double_pixel, rgb_to_u / 2);

	176 float2 vv = mul(rgb_double_pixel, rgb_to_v / 2);

	177

	178 // Package the result to account for BGRA byte ordering.

	179 YV16QuadPixel result;

	180 result.YYYY = packForByteOrder(yyyy);

	181 result.UUVV.xyzw = float4(uu, vv) + uv_bias; // Apply uv bias.

	182 return result;

	183 };

	184

	185 // Phase two of RGB24->YV12 conversion: vsFetch2Pixels/psConvertUV44toU2V2

	186 //

	187 // @gyp_compile(vs_2_0, vsFetch2Pixels)

	188 // @gyp_compile(ps_2_0, psConvertUV44toU2V2)

	189 //

	190 // Deals with UV only. Input is interleaved UV pixels, already scaled

	191 // horizontally, packed two per RGBA texel. Output is two color planes U and V,

	192 // packed four to a RGBA pixel.

	193 //

	194 // Vertical scaling happens via a half-texel offset and bilinear interpolation

	195 // during texture sampling.

	196 void vsFetch2Pixels(in Vertex vertex,

	197 out float4 position : POSITION,

	198 out float2 texCoord0 : TEXCOORD0,

	199 out float2 texCoord1 : TEXCOORD1) {

	200 // We fetch two texels in the horizontal direction, and scale by 2 in the

	201 // vertical direction.

	202 Vertex adjusted = adjustForAlignmentAndPacking(vertex, float2(2, 2));

	203

	204 // Setup the two texture coordinates. No need to adjust texCoord.y; it's

	205 // already at the mid-way point between the two rows. Horizontally, we'll

	206 // fetch two texels so that we have enough data to fill our output.

	207 float2 one_texel_x = float2(1 / kRenderTargetSize.x, 0);

	208 position = adjusted.position;

	209 texCoord0 = adjusted.texCoord - 0.5f * one_texel_x;

	210 texCoord1 = adjusted.texCoord + 0.5f * one_texel_x;

	211 };

	212

	213 struct UV8QuadPixel {

	214 float4 UUUU : COLOR0;

	215 float4 VVVV : COLOR1;

	216 };

	217

	218 UV8QuadPixel psConvertUV44toU2V2(float2 texCoord0 : TEXCOORD0,

	219 float2 texCoord1 : TEXCOORD1) {

	220 // We're just sampling two pixels and unswizzling them. There's no need to do

	221 // vertical scaling with math, since bilinear interpolation in the sampler

	222 // takes care of that.

	223 float4 lo_uuvv = tex2D(s, texCoord0);

	224 float4 hi_uuvv = tex2D(s, texCoord1);

	225 UV8QuadPixel result;

	226 result.UUUU = packForByteOrder(float4(lo_uuvv.xy, hi_uuvv.xy));

	227 result.VVVV = packForByteOrder(float4(lo_uuvv.zw, hi_uuvv.zw));

	228 return result;

	229 };

	230

	231

	232 ///////////////////////////////////////////////////////////////////////

	233 // RGB24 to YV12 in three passes, without MRT: one pass per output color plane.

	234 // vsFetch4Pixels is the common vertex shader for all three passes.

	235 //

	236 // Note that this technique will not do full bilinear filtering on its RGB

	237 // input (you'd get correctly filtered Y, but aliasing in U and V).

	238 //

	239 // Pass 1: vsFetch4Pixels + psConvertRGBToY

	240 // Pass 2: vsFetch4Pixels_Scale2 + psConvertRGBToU

	241 // Pass 3: vsFetch4Pixels_Scale2 + psConvertRGBToV

	242 //

	243 // @gyp_compile(vs_2_0, vsFetch4Pixels_Scale2)

	244 // @gyp_compile(ps_2_0, psConvertRGBtoY)

	245 // @gyp_compile(ps_2_0, psConvertRGBtoU)

	246 // @gyp_compile(ps_2_0, psConvertRGBtoV)

	247 //

	248 ///////////////////////////////////////////////////////////////////////

	249 void vsFetch4Pixels_Scale2(in Vertex vertex,

	250 out float4 position : POSITION,

	251 out float2 texCoord0 : TEXCOORD0,

	252 out float2 texCoord1 : TEXCOORD1,

	253 out float2 texCoord2 : TEXCOORD2,

	254 out float2 texCoord3 : TEXCOORD3) {

	255 Vertex adjusted = adjustForAlignmentAndPacking(vertex, float2(8, 2));

	256

	257 // Set up four taps, each of which samples a 2x2 texel quad at the midpoint.

	258 float2 one_texel_x = float2(1 / kRenderTargetSize.x, 0);

	259 position = adjusted.position;

	260 texCoord0 = adjusted.texCoord - 3 * one_texel_x;

	261 texCoord1 = adjusted.texCoord - 1 * one_texel_x;

	262 texCoord2 = adjusted.texCoord + 1 * one_texel_x;

	263 texCoord3 = adjusted.texCoord + 3 * one_texel_x;

	264 };

	265

	266 // RGB -> Y, four samples at a time.

	267 float4 psConvertRGBtoY(float2 texCoord0 : TEXCOORD0,

	268 float2 texCoord1 : TEXCOORD1,

	269 float2 texCoord2 : TEXCOORD2,

	270 float2 texCoord3 : TEXCOORD3) : COLOR0 {

	271 float4x3 rgb_quad_pixel = float4x3(tex2D(s, texCoord0).rgb,

	272 tex2D(s, texCoord1).rgb,

	273 tex2D(s, texCoord2).rgb,

	274 tex2D(s, texCoord3).rgb);

	275 return packForByteOrder(mul(rgb_quad_pixel, rgb_to_y) + y_bias);

	276 }

	277

	278 // RGB -> U, four samples at a time.

	279 float4 psConvertRGBtoU(float2 texCoord0 : TEXCOORD0,

	280 float2 texCoord1 : TEXCOORD1,

	281 float2 texCoord2 : TEXCOORD2,

	282 float2 texCoord3 : TEXCOORD3) : COLOR0 {

	283 float4x3 rgb_quad_pixel = float4x3(tex2D(s, texCoord0).rgb,

	284 tex2D(s, texCoord1).rgb,

	285 tex2D(s, texCoord2).rgb,

	286 tex2D(s, texCoord3).rgb);

	287 return packForByteOrder(mul(rgb_quad_pixel, rgb_to_u) + uv_bias);

	288 }

	289

	290 // RGB -> V, four samples at a time.

	291 float4 psConvertRGBtoV(float2 texCoord0 : TEXCOORD0,

	292 float2 texCoord1 : TEXCOORD1,

	293 float2 texCoord2 : TEXCOORD2,

	294 float2 texCoord3 : TEXCOORD3) : COLOR0 {

	295 float4x3 rgb_quad_pixel = float4x3(tex2D(s, texCoord0).rgb,

	296 tex2D(s, texCoord1).rgb,

	297 tex2D(s, texCoord2).rgb,

	298 tex2D(s, texCoord3).rgb);

	299 return packForByteOrder(mul(rgb_quad_pixel, rgb_to_v) + uv_bias);

	300 }

OLD	NEW

« no previous file with comments | « ui/surface/accelerated_surface_transformer_win.cc ('k') | ui/surface/accelerated_surface_transformer_win_unittest.cc » ('j') | no next file with comments »