ui/surface/accelerated_surface_transformer_win.hlsl - Issue 11280318: YUV conversion on the GPU.

Side by Side Diff: ui/surface/accelerated_surface_transformer_win.hlsl

Issue 11280318: YUV conversion on the GPU. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Test improvements. Created 7 years, 12 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« ui/surface/accelerated_surface_transformer_win.cc ('K') | « ui/surface/accelerated_surface_transformer_win.cc ('k') | ui/surface/accelerated_surface_transformer_win_unittest.cc » ('j') | ui/surface/accelerated_surface_transformer_win_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // @gyp_namespace(ui_surface)	5 // @gyp_namespace(ui_surface)

6 // Compiles into C++ as 'accelerated_surface_transformer_win_hlsl_compiled.h'	6 // Compiles into C++ as 'accelerated_surface_transformer_win_hlsl_compiled.h'

7	7

8 struct Vertex {	8 struct Vertex {

9 float4 position : POSITION;	9 float4 position : POSITION;

10 float2 texCoord : TEXCOORD0;	10 float2 texCoord : TEXCOORD0;

11 };	11 };

12	12

13 texture t;	13 texture t;

14 sampler s;	14 sampler s;

15	15 extern uniform float2 kRenderTargetSize : c0;

16 // @gyp_compile(vs_2_0, vsOneTexture)	16

	17 // @gyp_compile(vs_2_0, vsOneTextureFlipY)

17 //	18 //

18 // Passes a position and texture coordinate to the pixel shader.	19 // Passes a position and texture coordinate to the pixel shader.

19 Vertex vsOneTexture(Vertex input) {	20 Vertex vsOneTextureFlipY(Vertex input) {

	21 input.texCoord.y = 1 - input.texCoord.y;

	22 input.position.x += -1 / kRenderTargetSize.x;

	23 input.position.y += 1 / kRenderTargetSize.y;

20 return input;	24 return input;

21 };	25 };

22	26

23 // @gyp_compile(ps_2_0, psOneTexture)	27 // @gyp_compile(ps_2_0, psOneTexture)

24 //	28 //

25 // Samples a texture at the given texture coordinate and returns the result.	29 // Samples a texture at the given texture coordinate and returns the result.

26 float4 psOneTexture(float2 texCoord : TEXCOORD0) : COLOR0 {	30 float4 psOneTexture(float2 texCoord : TEXCOORD0) : COLOR0 {

27 return tex2D(s, texCoord);	31 return tex2D(s, texCoord);

28 };	32 };

	33

	34 // Return \|value\| rounded up to the nearest multiple of \|multiple\|.

	35 float alignTo(float value, float multiple) {

	36 // \|multiple\| is usually a compile-time constant; this check allows

	37 // the compiler to avoid the fmod when possible.

	38 if (multiple == 1)

	39 return value;

	40

	41 // Biasing the value provides numeric stability. We expect \|value\| to

	42 // be an integer; this prevents 4.001 from being rounded up to 8.

	43 float biased_value = value - 0.5;

	44 return biased_value + multiple - fmod(biased_value, multiple);

	45 }

	46

	47 float4 packForByteOrder(float4 value) {

	48 return value.bgra;

	49 }

	50

	51 // Adjust the input vertex to address the correct range of texels. This depends

	52 // on the value of the shader constant \|kRenderTargetSize\|, as well as an

	53 // alignment factor \|align\| that effectively specifies the footprint of the

	54 // texel samples done by this shader pass, and is used to correct when that

	55 // footprint size doesn't align perfectly with the actual input size.

	56 Vertex adjustForAlignmentAndPacking(Vertex vtx, float2 align) {

	57 float src_width = kRenderTargetSize.x;

	58 float src_height = kRenderTargetSize.y;

	59

	60 // Because our caller expects to be sampling \|align.x\| many pixels from src at

	61 // a time, if src's width isn't evenly divisible by \|align.x\|, it is necessary

	62 // to pretend that the source is slightly bigger than it is.

	63 float bloated_src_width = alignTo(src_width, align.x);

	64 float bloated_src_height = alignTo(src_height, align.y);

	65

	66 // When bloated_src_width != src_width, we'll adjust the texture coordinates

	67 // to sample past the edge of the vtx; clamping will produce extra copies of

	68 // the last row.

	69 float texture_x_scale = bloated_src_width / src_width;

	70 float texture_y_scale = bloated_src_height / src_height;

	71

	72 // Adjust positions so that we're addressing full fragments in the output, per

	73 // the top-left filling convention. The shifts would be equivalent to

	74 // 1/dst_width and 1/dst_height, if we were to calculate those explicitly.

	75 vtx.position.x -= align.x / bloated_src_width;

	76 vtx.position.y += align.y / bloated_src_height;

	77

	78 // Apply the texture scale

	79 vtx.texCoord.x *= texture_x_scale;

	80 vtx.texCoord.y *= texture_y_scale;

	81

	82 return vtx;

	83 }

	84

	85 ///////////////////////////////////////////////////////////////////////

	86 // RGB24 to YV12 in two passes; writing two 8888 targets each pass.

	87 //

	88 // YV12 is full-resolution luma and half-resolution red/green chroma.
	miu 2012/12/27 21:40:17 s/green/blue/ s/green/blue/ ncarter (slow) 2013/01/07 22:49:10 Done. Show quoted text On 2012/12/27 21:40:17, Yuri wrote: > s/green/blue/ Done.
	89 //

	90 // (original)

	91 // XRGB XRGB XRGB XRGB XRGB XRGB XRGB XRGB

	92 // XRGB XRGB XRGB XRGB XRGB XRGB XRGB XRGB

	93 // XRGB XRGB XRGB XRGB XRGB XRGB XRGB XRGB

	94 // XRGB XRGB XRGB XRGB XRGB XRGB XRGB XRGB

	95 // XRGB XRGB XRGB XRGB XRGB XRGB XRGB XRGB

	96 // XRGB XRGB XRGB XRGB XRGB XRGB XRGB XRGB

	97 // \|

	98 // \| (y plane) (temporary)

	99 // \| YYYY YYYY UVUV UVUV

	100 // +--> { YYYY YYYY + UVUV UVUV }

	101 // YYYY YYYY UVUV UVUV

	102 // First YYYY YYYY UVUV UVUV

	103 // pass YYYY YYYY UVUV UVUV

	104 // YYYY YYYY UVUV UVUV

	105 // \|

	106 // \| (u plane) (v plane)

	107 // Second \| UUUU VVVV

	108 // pass +--> { UUUU + VVVV }

	109 // UUUU VVVV

	110 //

	111 ///////////////////////////////////////////////////////////////////////

	112

	113 // Phase one of RGB24->YV12 conversion: vsFetch4Pixels/psConvertRGBtoY8UV44

	114 //

	115 // @gyp_compile(vs_2_0, vsFetch4Pixels)

	116 // @gyp_compile(ps_2_0, psConvertRGBtoY8UV44)

	117 //

	118 // Writes four source pixels at a time to a full-size Y plane and a half-width

	119 // interleaved UV plane. After execution, the Y plane is complete but the UV

	120 // planes still need to be de-interleaved and vertically scaled.

	121 //

	122 void vsFetch4Pixels(in Vertex vertex,

	123 out float4 position : POSITION,

	124 out float2 texCoord0 : TEXCOORD0,

	125 out float2 texCoord1 : TEXCOORD1,

	126 out float2 texCoord2 : TEXCOORD2,

	127 out float2 texCoord3 : TEXCOORD3) {

	128 Vertex adjusted = adjustForAlignmentAndPacking(vertex, float2(4, 1));

	129

	130 // Set up four taps, aligned to texel centers if the src's true size is

	131 // \|kRenderTargetSize\|, and doing bilinear interpolation otherwise.

	132 float2 one_texel_x = float2(1 / kRenderTargetSize.x, 0);

	133 position = adjusted.position;

	134 texCoord0 = adjusted.texCoord - 1.5f * one_texel_x;

	135 texCoord1 = adjusted.texCoord - 0.5f * one_texel_x;

	136 texCoord2 = adjusted.texCoord + 0.5f * one_texel_x;

	137 texCoord3 = adjusted.texCoord + 1.5f * one_texel_x;

	138 };

	139

	140 struct YV16QuadPixel

	141 {

	142 float4 YYYY : COLOR0;

	143 float4 UUVV : COLOR1;

	144 };

	145

	146 // Color conversion constants.

	147 static const float3x1 rgb_to_y = float3x1( +0.257f, +0.504f, +0.098f );

	148 static const float3x1 rgb_to_u = float3x1( -0.148f, -0.291f, +0.439f );

	149 static const float3x1 rgb_to_v = float3x1( +0.439f, -0.368f, -0.071f );

	150 static const float y_bias = 0.0625f;

	151 static const float uv_bias = 0.5f;

	152

	153 YV16QuadPixel psConvertRGBtoY8UV44(float2 texCoord0 : TEXCOORD0,

	154 float2 texCoord1 : TEXCOORD1,

	155 float2 texCoord2 : TEXCOORD2,

	156 float2 texCoord3 : TEXCOORD3) {

	157 // Color conversion constants.

	158 const float3x1 rgb_to_u_half = rgb_to_u / 2;
	miu 2012/12/27 21:40:17 Should these be static constants? Should these be static constants? ncarter (slow) 2013/01/07 22:49:10 Deleted these; they were cruft. Show quoted text On 2012/12/27 21:40:17, Yuri wrote: > Should these be static constants? Deleted these; they were cruft.
	159 const float3x1 rgb_to_v_half = rgb_to_v / 2;

	160

	161 // Load the four texture samples into a matrix.

	162 float4x3 rgb_quad_pixel = float4x3(tex2D(s, texCoord0).rgb,

	163 tex2D(s, texCoord1).rgb,

	164 tex2D(s, texCoord2).rgb,

	165 tex2D(s, texCoord3).rgb);

	166

	167 // RGB -> Y conversion (x4).

	168 float4 yyyy = mul(rgb_quad_pixel, rgb_to_y) + y_bias;

	169

	170 // Average adjacent texture samples while converting RGB->UV. This is the same

	171 // as color converting then averaging, but slightly less math. These values

	172 // will be in the range [-0.439f, +0.439f] and still need to have the bias

	173 // term applied.

	174 float2x3 rgb_double_pixel = float2x3(rgb_quad_pixel[0] + rgb_quad_pixel[1],

	175 rgb_quad_pixel[2] + rgb_quad_pixel[3]);

	176 float2 uu = mul(rgb_double_pixel, rgb_to_u / 2);
	miu 2012/12/27 21:40:17 Use rgb_to_u_half here (and rgb_to_v_half in next Use rgb_to_u_half here (and rgb_to_v_half in next statement)? ncarter (slow) 2013/01/07 22:49:10 The compiler is able to optimize this appropriatel Show quoted text On 2012/12/27 21:40:17, Yuri wrote: > Use rgb_to_u_half here (and rgb_to_v_half in next statement)? The compiler is able to optimize this appropriately; I've eliminated the rgb_to_u_half constants in the interest of readability.
	177 float2 vv = mul(rgb_double_pixel, rgb_to_v / 2);

	178

	179 // Package the result to account for BGRA byte ordering.

	180 YV16QuadPixel result;

	181 result.YYYY = packForByteOrder(yyyy);

	182 result.UUVV.xyzw = float4(uu, vv) + uv_bias; // Apply uv bias.

	183 return result;

	184 };

	185

	186 // Phase two of RGB24->YV12 conversion: vsFetch2Pixels/psConvertUV44toU2V2

	187 //

	188 // @gyp_compile(vs_2_0, vsFetch2Pixels)

	189 // @gyp_compile(ps_2_0, psConvertUV44toU2V2)

	190 //

	191 // Deals with UV only. Input is interleaved UV pixels, already scaled

	192 // horizontally, packed two per RGBA texel. Output is two color planes U and V,

	193 // packed four to a RGBA pixel.

	194 //

	195 // Vertical scaling happens via a half-texel offset and bilinear interpolation

	196 // during texture sampling.

	197 void vsFetch2Pixels(in Vertex vertex,

	198 out float4 position : POSITION,

	199 out float2 texCoord0 : TEXCOORD0,

	200 out float2 texCoord1 : TEXCOORD1) {

	201 // We fetch two texels in the horizontal direction, and scale by 2 in the

	202 // vertical direction.

	203 Vertex adjusted = adjustForAlignmentAndPacking(vertex, float2(2, 2));

	204

	205 // Setup the two texture coordinates. No need to adjust texCoord.y; it's

	206 // already at the mid-way point between the two rows. Horizontally, we'll

	207 // fetch two texels so that we have enough data to fill our output.

	208 float2 one_texel_x = float2(1 / kRenderTargetSize.x, 0);

	209 position = adjusted.position;

	210 texCoord0 = adjusted.texCoord - 0.5f * one_texel_x;

	211 texCoord1 = adjusted.texCoord + 0.5f * one_texel_x;

	212 };

	213

	214 struct UV8QuadPixel {

	215 float4 UUUU : COLOR0;

	216 float4 VVVV : COLOR1;

	217 };

	218

	219 UV8QuadPixel psConvertUV44toU2V2(float2 texCoord0 : TEXCOORD0,

	220 float2 texCoord1 : TEXCOORD1) {

	221 // We're just sampling two pixels and unswizzling them. There's no need to do

	222 // vertical scaling with math, since bilinear interpolation in the sampler

	223 // takes care of that.

	224 float4 lo_uuvv = tex2D(s, texCoord0);

	225 float4 hi_uuvv = tex2D(s, texCoord1);

	226 UV8QuadPixel result;

	227 result.UUUU = packForByteOrder(float4(lo_uuvv.xy, hi_uuvv.xy));

	228 result.VVVV = packForByteOrder(float4(lo_uuvv.zw, hi_uuvv.zw));

	229 return result;

	230 };

	231

	232

	233 ///////////////////////////////////////////////////////////////////////

	234 // RGB24 to YV12 in three passes, without MRT: one pass per output color plane.

	235 // vsFetch4Pixels is the common vertex shader for all three passes.

	236 //

	237 // Note that this technique will not do full bilinear filtering on its RGB

	238 // input (you'd get correctly filtered Y, but aliasing in U and V).

	239 //

	240 // Pass 1: vsFetch4Pixels + psConvertRGBToY

	241 // Pass 2: vsFetch4Pixels_Scale2 + psConvertRGBToU

	242 // Pass 3: vsFetch4Pixels_Scale2 + psConvertRGBToV

	243 //

	244 // @gyp_compile(vs_2_0, vsFetch4Pixels_Scale2)

	245 // @gyp_compile(ps_2_0, psConvertRGBtoY)

	246 // @gyp_compile(ps_2_0, psConvertRGBtoU)

	247 // @gyp_compile(ps_2_0, psConvertRGBtoV)

	248 //

	249 ///////////////////////////////////////////////////////////////////////

	250 void vsFetch4Pixels_Scale2(in Vertex vertex,

	251 out float4 position : POSITION,

	252 out float2 texCoord0 : TEXCOORD0,

	253 out float2 texCoord1 : TEXCOORD1,

	254 out float2 texCoord2 : TEXCOORD2,

	255 out float2 texCoord3 : TEXCOORD3) {

	256 Vertex adjusted = adjustForAlignmentAndPacking(vertex, float2(8, 2));

	257

	258 // Set up four taps, each of which samples a 2x2 texel quad at the midpoint.

	259 float2 one_texel_x = float2(1 / kRenderTargetSize.x, 0);

	260 position = adjusted.position;

	261 texCoord0 = adjusted.texCoord - 3 * one_texel_x;

	262 texCoord1 = adjusted.texCoord - 1 * one_texel_x;

	263 texCoord2 = adjusted.texCoord + 1 * one_texel_x;

	264 texCoord3 = adjusted.texCoord + 3 * one_texel_x;

	265 };

	266

	267 // RGB -> Y, four samples at a time.

	268 float4 psConvertRGBtoY(float2 texCoord0 : TEXCOORD0,

	269 float2 texCoord1 : TEXCOORD1,

	270 float2 texCoord2 : TEXCOORD2,

	271 float2 texCoord3 : TEXCOORD3) : COLOR0 {

	272 float4x3 rgb_quad_pixel = float4x3(tex2D(s, texCoord0).rgb,

	273 tex2D(s, texCoord1).rgb,

	274 tex2D(s, texCoord2).rgb,

	275 tex2D(s, texCoord3).rgb);

	276 return packForByteOrder(mul(rgb_quad_pixel, rgb_to_y) + y_bias);

	277 }

	278

	279 // RGB -> U, four samples at a time.

	280 float4 psConvertRGBtoU(float2 texCoord0 : TEXCOORD0,

	281 float2 texCoord1 : TEXCOORD1,

	282 float2 texCoord2 : TEXCOORD2,

	283 float2 texCoord3 : TEXCOORD3) : COLOR0 {

	284 float4x3 rgb_quad_pixel = float4x3(tex2D(s, texCoord0).rgb,

	285 tex2D(s, texCoord1).rgb,

	286 tex2D(s, texCoord2).rgb,

	287 tex2D(s, texCoord3).rgb);

	288 return packForByteOrder(mul(rgb_quad_pixel, rgb_to_u) + uv_bias);

	289 }

	290

	291 // RGB -> V, four samples at a time.

	292 float4 psConvertRGBtoV(float2 texCoord0 : TEXCOORD0,

	293 float2 texCoord1 : TEXCOORD1,

	294 float2 texCoord2 : TEXCOORD2,

	295 float2 texCoord3 : TEXCOORD3) : COLOR0 {

	296 float4x3 rgb_quad_pixel = float4x3(tex2D(s, texCoord0).rgb,

	297 tex2D(s, texCoord1).rgb,

	298 tex2D(s, texCoord2).rgb,

	299 tex2D(s, texCoord3).rgb);

	300 return packForByteOrder(mul(rgb_quad_pixel, rgb_to_v) + uv_bias);

	301 }

OLD	NEW