Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(314)

Side by Side Diff: gpu/command_buffer/service/gles2_cmd_apply_framebuffer_attachment_cmaa_intel.cc

Issue 2055713003: gpu: Implement GL_INTEL_framebuffer_CMAA via shaders in the GPU Service (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@lkgr
Patch Set: Fix component & windows build. Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "gpu/command_buffer/service/gles2_cmd_apply_framebuffer_attachment_cmaa _intel.h"
6
7 #include "base/logging.h"
8 #include "gpu/command_buffer/service/framebuffer_manager.h"
9 #include "gpu/command_buffer/service/gles2_cmd_decoder.h"
10 #include "ui/gl/gl_context.h"
11 #include "ui/gl/gl_gl_api_implementation.h"
12 #include "ui/gl/gl_version_info.h"
13
14 namespace gpu {
15
16 ApplyFramebufferAttachmentCMAAINTELResourceManager::
17 ApplyFramebufferAttachmentCMAAINTELResourceManager()
18 : initialized_(false),
19 textures_initialized_(false),
20 is_in_gamma_correct_mode_(false),
21 supports_usampler_(true),
22 supports_r8_image_(true),
23 supports_r8_read_format_(true),
24 is_gles31_compatible_(false),
25 frame_id_(0),
26 width_(0),
27 height_(0),
28 copy_to_framebuffer_shader_(0),
29 copy_to_image_shader_(0),
30 edges0_shader_(0),
31 edges1_shader_(0),
32 edges_combine_shader_(0),
33 process_and_apply_shader_(0),
34 debug_display_edges_shader_(0),
35 cmaa_framebuffer_(0),
36 copy_framebuffer_(0),
37 rgba8_texture_(0),
38 working_color_texture_(0),
39 edges0_texture_(0),
40 edges1_texture_(0),
41 mini4_edge_texture_(0),
42 mini4_edge_depth_texture_(0),
43 edges1_shader_result_texture_float4_slot1_(0),
44 edges1_shader_result_texture_(0),
45 edges_combine_shader_result_texture_float4_slot1_(0),
46 process_and_apply_shader_result_texture_float4_slot1_(0),
47 edges_combine_shader_result_texture_slot2_(0),
48 copy_to_image_shader_outTexture_(0) {}
49
50 ApplyFramebufferAttachmentCMAAINTELResourceManager::
51 ~ApplyFramebufferAttachmentCMAAINTELResourceManager() {
52 Destroy();
53 }
54
55 void ApplyFramebufferAttachmentCMAAINTELResourceManager::Initialize(
56 gles2::GLES2Decoder* decoder) {
57 DCHECK(decoder);
58 is_gles31_compatible_ =
59 decoder->GetGLContext()->GetVersionInfo()->IsAtLeastGLES(3, 1);
60
61 copy_to_image_shader_ = CreateProgram("", vert_str_, copy_frag_str_);
62 copy_to_framebuffer_shader_ =
63 CreateProgram("#define OUT_FBO 1\n", vert_str_, copy_frag_str_);
64
65 // Check if RGBA8UI is supported as an FBO colour target with depth.
66 // If not supported, GLSL needs to convert the data to/from float so there is
67 // a small extra cost.
68 {
69 GLuint rgba8ui_texture = 0, depth_texture = 0;
70 glGenTextures(1, &rgba8ui_texture);
71 glBindTexture(GL_TEXTURE_2D, rgba8ui_texture);
72 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_RGBA8UI, 4, 4);
73
74 glGenTextures(1, &depth_texture);
75 glBindTexture(GL_TEXTURE_2D, depth_texture);
76 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_DEPTH_COMPONENT16, 4, 4);
77
78 // Create the FBO
79 GLuint rgba8ui_framebuffer = 0;
80 glGenFramebuffersEXT(1, &rgba8ui_framebuffer);
81 glBindFramebufferEXT(GL_FRAMEBUFFER, rgba8ui_framebuffer);
82
83 // Bind to the FBO to test support
84 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
85 GL_TEXTURE_2D, rgba8ui_texture, 0);
86 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT,
87 GL_TEXTURE_2D, depth_texture, 0);
88 GLenum status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER);
89
90 supports_usampler_ = (status == GL_FRAMEBUFFER_COMPLETE);
91
92 glDeleteFramebuffersEXT(1, &rgba8ui_framebuffer);
93 glDeleteTextures(1, &rgba8ui_texture);
94 glDeleteTextures(1, &depth_texture);
95 }
96
97 // Check to see if R8 images are supported
98 // If not supported, images are bound as R32F for write targets, not R8.
99 {
100 GLuint r8_texture = 0;
101 glGenTextures(1, &r8_texture);
102 glBindTexture(GL_TEXTURE_2D, r8_texture);
103 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_R8, 4, 4);
104
105 glGetError(); // reset all previous errors
106 glBindImageTextureEXT(0, r8_texture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R8);
107 if (glGetError() != GL_NO_ERROR)
108 supports_r8_image_ = false;
109
110 glDeleteTextures(1, &r8_texture);
111 }
112
113 // Check if R8 GLSL read formats are supported.
114 // If not supported, r32f is used instead.
115 {
116 const char shader_source[] =
117 "layout(r8) restrict writeonly uniform highp image2D g_r8Image; \n"
118 "void main() \n"
119 "{ \n"
120 " imageStore(g_r8Image, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 0.0)); \n"
121 "} \n";
122
123 GLuint shader = CreateShader(GL_FRAGMENT_SHADER, "", shader_source);
124 supports_r8_read_format_ = (shader != 0);
125 if (shader != 0) {
126 glDeleteShader(shader);
127 }
128 }
129
130 VLOG(1) << "ApplyFramebufferAttachmentCMAAINTEL: "
131 << "Supports USampler is " << (supports_usampler_ ? "true" : "false");
132 VLOG(1) << "ApplyFramebufferAttachmentCMAAINTEL: "
133 << "Supports R8 Images is "
134 << (supports_r8_image_ ? "true" : "false");
135 VLOG(1) << "ApplyFramebufferAttachmentCMAAINTEL: "
136 << "Supports R8 Read Format is "
137 << (supports_r8_read_format_ ? "true" : "false");
138
139 // Create the shaders
140 std::ostringstream defines, edge1, edge2, combineEdges, blur, displayEdges,
141 cmaa_frag;
142
143 cmaa_frag << cmaa_frag_s1_ << cmaa_frag_s2_;
144 std::string cmaa_frag_string = cmaa_frag.str();
145 const char* cmaa_frag_c_str = cmaa_frag_string.c_str();
146
147 if (supports_usampler_) {
148 defines << "#define SUPPORTS_USAMPLER2D\n";
149 }
150
151 if (is_in_gamma_correct_mode_) {
152 defines << "#define IN_GAMMA_CORRECT_MODE\n";
153 }
154
155 if (supports_r8_read_format_) {
156 defines << "#define EDGE_READ_FORMAT r8\n";
157 } else {
158 defines << "#define EDGE_READ_FORMAT r32f\n";
159 }
160
161 displayEdges << defines.str() << "#define DISPLAY_EDGES\n";
162 debug_display_edges_shader_ =
163 CreateProgram(displayEdges.str().c_str(), vert_str_, cmaa_frag_c_str);
164
165 edge1 << defines.str() << "#define DETECT_EDGES1\n";
166 edges0_shader_ =
167 CreateProgram(edge1.str().c_str(), vert_str_, cmaa_frag_c_str);
168
169 edge2 << defines.str() << "#define DETECT_EDGES2\n";
170 edges1_shader_ =
171 CreateProgram(edge2.str().c_str(), vert_str_, cmaa_frag_c_str);
172
173 combineEdges << defines.str() << "#define COMBINE_EDGES\n";
174 edges_combine_shader_ =
175 CreateProgram(combineEdges.str().c_str(), vert_str_, cmaa_frag_c_str);
176
177 blur << defines.str() << "#define BLUR_EDGES\n";
178 process_and_apply_shader_ =
179 CreateProgram(blur.str().c_str(), vert_str_, cmaa_frag_c_str);
180
181 edges1_shader_result_texture_float4_slot1_ =
182 glGetUniformLocation(edges0_shader_, "g_resultTextureFlt4Slot1");
183 edges1_shader_result_texture_ =
184 glGetUniformLocation(edges1_shader_, "g_resultTexture");
185 edges_combine_shader_result_texture_float4_slot1_ =
186 glGetUniformLocation(edges_combine_shader_, "g_resultTextureFlt4Slot1");
187 edges_combine_shader_result_texture_slot2_ =
188 glGetUniformLocation(edges_combine_shader_, "g_resultTextureSlot2");
189 process_and_apply_shader_result_texture_float4_slot1_ = glGetUniformLocation(
190 process_and_apply_shader_, "g_resultTextureFlt4Slot1");
191 copy_to_image_shader_outTexture_ =
192 glGetUniformLocation(copy_to_image_shader_, "outTexture");
193
194 initialized_ = true;
195 }
196
197 void ApplyFramebufferAttachmentCMAAINTELResourceManager::Destroy() {
198 if (!initialized_)
199 return;
200
201 ReleaseTextures();
202
203 glDeleteProgram(copy_to_image_shader_);
204 glDeleteProgram(copy_to_framebuffer_shader_);
205 glDeleteProgram(process_and_apply_shader_);
206 glDeleteProgram(edges_combine_shader_);
207 glDeleteProgram(edges1_shader_);
208 glDeleteProgram(edges0_shader_);
209 glDeleteProgram(debug_display_edges_shader_);
210
211 initialized_ = false;
212 }
213
214 // Apply CMAA(Conservative Morphological Anti-Aliasing) algorithm to the
215 // color attachments of currently bound draw framebuffer.
216 // Reference GL_INTEL_framebuffer_CMAA for details.
217 void ApplyFramebufferAttachmentCMAAINTELResourceManager::
218 ApplyFramebufferAttachmentCMAAINTEL(gles2::GLES2Decoder* decoder,
219 gles2::Framebuffer* framebuffer) {
220 DCHECK(decoder);
221 DCHECK(initialized_);
222 if (!framebuffer)
223 return;
224
225 GLuint last_framebuffer = framebuffer->service_id();
226
227 // Process each color attachment of the current draw framebuffer.
228 uint32_t max_draw_buffers = decoder->GetContextGroup()->max_draw_buffers();
229 for (uint32_t i = 0; i < max_draw_buffers; i++) {
230 const gles2::Framebuffer::Attachment* attachment =
231 framebuffer->GetAttachment(GL_COLOR_ATTACHMENT0 + i);
232 if (attachment && attachment->IsTextureAttachment()) {
233 // Get the texture info.
234 GLuint source_texture_client_id = attachment->object_name();
235 GLuint source_texture = 0;
236 if (!decoder->GetServiceTextureId(source_texture_client_id,
237 &source_texture))
238 continue;
239 GLsizei width = attachment->width();
240 GLsizei height = attachment->height();
241 GLenum internal_format = attachment->internal_format();
242
243 // Resize internal structures - only if needed.
244 OnSize(width, height);
245
246 // CMAA internally expects GL_RGBA8 textures.
247 // Process using a GL_RGBA8 copy if this is not the case.
248 bool do_copy = internal_format != GL_RGBA8;
249
250 // Copy source_texture to rgba8_texture_
251 if (do_copy) {
252 CopyTexture(source_texture, rgba8_texture_, false);
253 }
254
255 // CMAA Effect
256 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, last_framebuffer);
257 if (do_copy) {
258 ApplyCMAAEffectTexture(rgba8_texture_, rgba8_texture_);
259 } else {
260 ApplyCMAAEffectTexture(source_texture, source_texture);
261 }
262
263 // Copy rgba8_texture_ to source_texture
264 if (do_copy) {
265 // Move source_texture to the first color attachment of the copy fbo.
266 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, last_framebuffer);
267 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + i,
268 GL_TEXTURE_2D, 0, 0);
269 glBindFramebufferEXT(GL_FRAMEBUFFER, copy_framebuffer_);
270 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
271 GL_TEXTURE_2D, source_texture, 0);
272
273 CopyTexture(rgba8_texture_, source_texture, true);
274
275 // Restore color attachments
276 glBindFramebufferEXT(GL_FRAMEBUFFER, copy_framebuffer_);
277 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
278 GL_TEXTURE_2D, rgba8_texture_, 0);
279 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, last_framebuffer);
280 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + i,
281 GL_TEXTURE_2D, source_texture, 0);
282 }
283 }
284 }
285
286 // Restore state
287 decoder->RestoreAllAttributes();
288 decoder->RestoreTextureUnitBindings(0);
289 decoder->RestoreTextureUnitBindings(1);
290 decoder->RestoreActiveTexture();
291 decoder->RestoreProgramBindings();
292 decoder->RestoreBufferBindings();
293 decoder->RestoreFramebufferBindings();
294 decoder->RestoreGlobalState();
295 }
296
297 void ApplyFramebufferAttachmentCMAAINTELResourceManager::ApplyCMAAEffectTexture(
298 GLuint source_texture,
299 GLuint dest_texture) {
300 frame_id_++;
301
302 GLuint edge_texture_a;
303 GLuint edge_texture_b;
304
305 // Flip flop - One pass clears the texture that needs clearing for the other
306 // one (actually it's only important that it clears the highest bit)
307 if ((frame_id_ % 2) == 0) {
308 edge_texture_a = edges0_texture_;
309 edge_texture_b = edges1_texture_;
310 } else {
311 edge_texture_a = edges1_texture_;
312 edge_texture_b = edges0_texture_;
313 }
314
315 // Setup the main fbo
316 glBindFramebufferEXT(GL_FRAMEBUFFER, cmaa_framebuffer_);
317 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
318 mini4_edge_texture_, 0);
319 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
320 mini4_edge_depth_texture_, 0);
321 #if DCHECK_IS_ON()
322 GLenum status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER);
323 if (status != GL_FRAMEBUFFER_COMPLETE) {
324 DLOG(ERROR) << "ApplyFramebufferAttachmentCMAAINTEL: "
325 << "Incomplete framebuffer.";
326 Destroy();
327 return;
328 }
329 #endif
330
331 // Setup the viewport to match the fbo
332 glViewport(0, 0, (width_ + 1) / 2, (height_ + 1) / 2);
333 glEnable(GL_DEPTH_TEST);
334
335 // Detect edges Pass 0
336 // - For every pixel detect edges to the right and down and output depth
337 // mask where edges detected (1 - far, for detected, 0-near for empty
338 // pixels)
339
340 // Inputs
341 // g_screenTexture source_texture tex0
342 // Outputs
343 // gl_FragDepth mini4_edge_depth_texture_ fbo.depth
344 // out uvec4 outEdges mini4_edge_texture_ fbo.col
345 // image2D g_resultTextureFlt4Slot1 working_color_texture_ image1
346 GLenum edge_format = supports_r8_image_ ? GL_R8 : GL_R32F;
347
348 {
349 glUseProgram(edges0_shader_);
350 glUniform1f(0, 1.0f);
351 glUniform2f(1, 1.0f / width_, 1.0f / height_);
352 glDepthMask(GL_TRUE);
353 glDepthFunc(GL_ALWAYS);
354 glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
355
356 if (!is_gles31_compatible_) {
357 glUniform1i(edges1_shader_result_texture_float4_slot1_, 1);
358 }
359 glBindImageTextureEXT(1, working_color_texture_, 0, GL_FALSE, 0,
360 GL_WRITE_ONLY, GL_RGBA8);
361
362 glActiveTexture(GL_TEXTURE0);
363 glBindTexture(GL_TEXTURE_2D, source_texture);
364 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
365 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
366
367 glDrawArrays(GL_TRIANGLES, 0, 3);
368 }
369
370 // Detect edges Pass 1 (finish the previous pass edge processing).
371 // Do the culling of non-dominant local edges (leave mainly locally dominant
372 // edges) and merge Right and Bottom edges into TopRightBottomLeft
373
374 // Inputs
375 // g_src0Texture4Uint mini4_edge_texture_ tex1
376 // Outputs
377 // image2D g_resultTexture edge_texture_b image0
378 {
379 glUseProgram(edges1_shader_);
380 glUniform1f(0, 0.0f);
381 glUniform2f(1, 1.0f / width_, 1.0f / height_);
382 glDepthMask(GL_FALSE);
383 glDepthFunc(GL_LESS);
384 glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
385
386 if (!is_gles31_compatible_) {
387 glUniform1i(edges1_shader_result_texture_, 0);
388 }
389 glBindImageTextureEXT(0, edge_texture_b, 0, GL_FALSE, 0, GL_WRITE_ONLY,
390 edge_format);
391
392 glActiveTexture(GL_TEXTURE1);
393 glBindTexture(GL_TEXTURE_2D, mini4_edge_texture_);
394 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
395 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
396
397 glDrawArrays(GL_TRIANGLES, 0, 3);
398 }
399
400 // - Combine RightBottom (.xy) edges from previous pass into
401 // RightBottomLeftTop (.xyzw) edges and output it into the mask (have to
402 // fill in the whole buffer including empty ones for the line length
403 // detection to work correctly).
404 // - On all pixels with any edge, input buffer into a temporary color buffer
405 // needed for correct blending in the next pass (other pixels not needed
406 // so not copied to avoid bandwidth use).
407 // - On all pixels with 2 or more edges output positive depth mask for the
408 // next pass.
409
410 // Inputs
411 // g_src0TextureFlt edge_texture_b tex1 //ps
412 // Outputs
413 // image2D g_resultTextureSlot2 edge_texture_a image2
414 // gl_FragDepth mini4_edge_texture_ fbo.depth
415 {
416 // Combine edges: each pixel will now contain info on all (top, right,
417 // bottom, left) edges; also create depth mask as above depth and mark
418 // potential Z sAND also copy source color data but only on edge pixels
419 glUseProgram(edges_combine_shader_);
420 glUniform1f(0, 1.0f);
421 glUniform2f(1, 1.0f / width_, 1.0f / height_);
422 glDepthMask(GL_TRUE);
423 glDepthFunc(GL_ALWAYS);
424 glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
425
426 if (!is_gles31_compatible_) {
427 glUniform1i(edges_combine_shader_result_texture_float4_slot1_, 1);
428 glUniform1i(edges_combine_shader_result_texture_slot2_, 2);
429 }
430 glBindImageTextureEXT(1, dest_texture, 0, GL_FALSE, 0, GL_WRITE_ONLY,
431 GL_RGBA8);
432 glBindImageTextureEXT(2, edge_texture_a, 0, GL_FALSE, 0, GL_WRITE_ONLY,
433 edge_format);
434
435 glActiveTexture(GL_TEXTURE1);
436 glBindTexture(GL_TEXTURE_2D, edge_texture_b);
437 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
438 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
439
440 glDrawArrays(GL_TRIANGLES, 0, 3);
441 }
442
443 // Using depth mask and [earlydepthstencil] to work on pixels with 2, 3, 4
444 // edges:
445 // - First blend simple blur map for 2,3,4 edge pixels
446 // - Then do the lines (line length counter -should- guarantee no overlap
447 // with other pixels - pixels with 1 edge are excluded in the previous
448 // pass and the pixels with 2 parallel edges are excluded in the simple
449 // blur)
450
451 // Inputs
452 // g_screenTexture working_color_texture_ tex0
453 // g_src0TextureFlt edge_texture_a tex1 //ps
454 // sampled
455 // Outputs
456 // g_resultTextureFlt4Slot1 dest_texture image1
457 // gl_FragDepth mini4_edge_texture_ fbo.depth
458 {
459 glUseProgram(process_and_apply_shader_);
460 glUniform1f(0, 0.0f);
461 glUniform2f(1, 1.0f / width_, 1.0f / height_);
462 glDepthMask(GL_FALSE);
463 glDepthFunc(GL_LESS);
464 glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
465
466 if (!is_gles31_compatible_) {
467 glUniform1i(process_and_apply_shader_result_texture_float4_slot1_, 1);
468 }
469 glBindImageTextureEXT(1, dest_texture, 0, GL_FALSE, 0, GL_WRITE_ONLY,
470 GL_RGBA8);
471
472 glActiveTexture(GL_TEXTURE0);
473 glBindTexture(GL_TEXTURE_2D, working_color_texture_);
474 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
475 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
476
477 glActiveTexture(GL_TEXTURE1);
478 glBindTexture(GL_TEXTURE_2D, edge_texture_a);
479 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
480 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
481
482 glDrawArrays(GL_TRIANGLES, 0, 3);
483 }
484
485 glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
486 glDisable(GL_DEPTH_TEST);
487 glDepthMask(GL_FALSE);
488 glActiveTexture(GL_TEXTURE0);
489 }
490
491 void ApplyFramebufferAttachmentCMAAINTELResourceManager::OnSize(GLint width,
492 GLint height) {
493 if (height_ == height && width_ == width)
494 return;
495
496 ReleaseTextures();
497
498 height_ = height;
499 width_ = width;
500
501 glGenFramebuffersEXT(1, &copy_framebuffer_);
502 glGenTextures(1, &rgba8_texture_);
503 glBindTexture(GL_TEXTURE_2D, rgba8_texture_);
504 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_RGBA8, width, height);
505
506 // Edges texture - R8
507 // OpenGLES has no single component 8/16-bit image support, so needs to be R32
508 // Although CHT does support R8.
509 GLenum edge_format = supports_r8_image_ ? GL_R8 : GL_R32F;
510 glGenTextures(1, &edges0_texture_);
511 glBindTexture(GL_TEXTURE_2D, edges0_texture_);
512 glTexStorage2DEXT(GL_TEXTURE_2D, 1, edge_format, width, height);
513
514 glGenTextures(1, &edges1_texture_);
515 glBindTexture(GL_TEXTURE_2D, edges1_texture_);
516 glTexStorage2DEXT(GL_TEXTURE_2D, 1, edge_format, width, height);
517
518 // Color working texture - RGBA8
519 glGenTextures(1, &working_color_texture_);
520 glBindTexture(GL_TEXTURE_2D, working_color_texture_);
521 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_RGBA8, width, height);
522
523 // Half*half compressed 4-edge-per-pixel texture - RGBA8
524 glGenTextures(1, &mini4_edge_texture_);
525 glBindTexture(GL_TEXTURE_2D, mini4_edge_texture_);
526 GLenum format = GL_RGBA8UI;
527 if (!supports_usampler_) {
528 format = GL_RGBA8;
529 }
530 glTexStorage2DEXT(GL_TEXTURE_2D, 1, format, (width + 1) / 2,
531 (height + 1) / 2);
532
533 // Depth
534 glGenTextures(1, &mini4_edge_depth_texture_);
535 glBindTexture(GL_TEXTURE_2D, mini4_edge_depth_texture_);
536 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_DEPTH_COMPONENT16, (width + 1) / 2,
537 (height + 1) / 2);
538
539 // Create the FBO
540 glGenFramebuffersEXT(1, &cmaa_framebuffer_);
541 glBindFramebufferEXT(GL_FRAMEBUFFER, cmaa_framebuffer_);
542
543 // We need to clear the textures before they are first used.
544 // The algorithm self-clears them later.
545 glViewport(0, 0, width_, height_);
546 glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
547
548 glBindFramebufferEXT(GL_FRAMEBUFFER, cmaa_framebuffer_);
549 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
550 edges0_texture_, 0);
551 glClear(GL_COLOR_BUFFER_BIT);
552
553 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
554 edges1_texture_, 0);
555 glClear(GL_COLOR_BUFFER_BIT);
556
557 textures_initialized_ = true;
558 }
559
560 void ApplyFramebufferAttachmentCMAAINTELResourceManager::ReleaseTextures() {
561 if (textures_initialized_) {
562 glDeleteFramebuffersEXT(1, &copy_framebuffer_);
563 glDeleteFramebuffersEXT(1, &cmaa_framebuffer_);
564 glDeleteTextures(1, &rgba8_texture_);
565 glDeleteTextures(1, &edges0_texture_);
566 glDeleteTextures(1, &edges1_texture_);
567 glDeleteTextures(1, &mini4_edge_texture_);
568 glDeleteTextures(1, &mini4_edge_depth_texture_);
569 glDeleteTextures(1, &working_color_texture_);
570 }
571 textures_initialized_ = false;
572 }
573
574 void ApplyFramebufferAttachmentCMAAINTELResourceManager::CopyTexture(
575 GLint source,
576 GLint dest,
577 bool via_fbo) {
578 glViewport(0, 0, width_, height_);
579 glActiveTexture(GL_TEXTURE0);
580 glBindTexture(GL_TEXTURE_2D, source);
581
582 if (!via_fbo) {
583 glUseProgram(copy_to_image_shader_);
584 if (!is_gles31_compatible_) {
585 glUniform1i(copy_to_image_shader_outTexture_, 0);
586 }
587 glBindImageTextureEXT(0, dest, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8);
588 } else {
589 glDisable(GL_DEPTH_TEST);
590 glDisable(GL_STENCIL_TEST);
591 glDisable(GL_CULL_FACE);
592 glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
593 glDepthMask(GL_FALSE);
594 glDisable(GL_BLEND);
595 glUseProgram(copy_to_framebuffer_shader_);
596 }
597
598 glDrawArrays(GL_TRIANGLES, 0, 3);
599 glUseProgram(0);
600 glBindTexture(GL_TEXTURE_2D, 0);
601 }
602
603 GLuint ApplyFramebufferAttachmentCMAAINTELResourceManager::CreateProgram(
604 const char* defines,
605 const char* vs_source,
606 const char* fs_source) {
607 GLuint program = glCreateProgram();
608
609 GLuint vs = CreateShader(GL_VERTEX_SHADER, defines, vs_source);
610 GLuint fs = CreateShader(GL_FRAGMENT_SHADER, defines, fs_source);
611
612 glAttachShader(program, vs);
613 glDeleteShader(vs);
614 glAttachShader(program, fs);
615 glDeleteShader(fs);
616
617 glLinkProgram(program);
618 GLint link_status;
619 glGetProgramiv(program, GL_LINK_STATUS, &link_status);
620
621 if (link_status == 0) {
622 #if DCHECK_IS_ON()
623 GLint info_log_length;
624 glGetProgramiv(program, GL_INFO_LOG_LENGTH, &info_log_length);
625 std::vector<GLchar> info_log(info_log_length);
626 glGetProgramInfoLog(program, static_cast<GLsizei>(info_log.size()), NULL,
627 &info_log[0]);
628 DLOG(ERROR) << "ApplyFramebufferAttachmentCMAAINTEL: "
629 << "program link failed: " << &info_log[0];
630 #endif
631 glDeleteProgram(program);
632 program = 0;
633 }
634
635 return program;
636 }
637
638 GLuint ApplyFramebufferAttachmentCMAAINTELResourceManager::CreateShader(
639 GLenum type,
640 const char* defines,
641 const char* source) {
642 GLuint shader = glCreateShader(type);
643
644 const char header_es31[] =
645 "#version 310 es \n";
646 const char header_gl30[] =
647 "#version 130 \n"
648 "#extension GL_ARB_shading_language_420pack : require \n"
649 "#extension GL_ARB_texture_gather : require \n"
650 "#extension GL_ARB_explicit_uniform_location : require \n"
651 "#extension GL_ARB_explicit_attrib_location : require \n"
652 "#extension GL_ARB_shader_image_load_store : require \n";
653
654 const char* header = NULL;
655 if (is_gles31_compatible_) {
656 header = header_es31;
657 } else {
658 header = header_gl30;
659 }
660
661 const char* source_array[4] = {header, defines, "\n", source};
662 glShaderSource(shader, 4, source_array, NULL);
663
664 glCompileShader(shader);
665
666 GLint compile_result;
667 glGetShaderiv(shader, GL_COMPILE_STATUS, &compile_result);
668 if (compile_result == 0) {
669 #if DCHECK_IS_ON()
670 GLint info_log_length;
671 glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &info_log_length);
672 std::vector<GLchar> info_log(info_log_length);
673 glGetShaderInfoLog(shader, static_cast<GLsizei>(info_log.size()), NULL,
674 &info_log[0]);
675 DLOG(ERROR) << "ApplyFramebufferAttachmentCMAAINTEL: "
676 << "shader compilation failed: "
677 << (type == GL_VERTEX_SHADER
678 ? "GL_VERTEX_SHADER"
679 : (type == GL_FRAGMENT_SHADER ? "GL_FRAGMENT_SHADER"
680 : "UNKNOWN_SHADER"))
681 << " shader compilation failed: " << &info_log[0];
682 #endif
683 glDeleteShader(shader);
684 shader = 0;
685 }
686
687 return shader;
688 }
689
690 // Shaders used in the CMAA algorithm.
691 const char ApplyFramebufferAttachmentCMAAINTELResourceManager::vert_str_[] =
692 "precision highp float; \n"
693 "layout(location = 0) uniform float g_Depth; \n"
694 "// No input data. \n"
695 "// Verts are autogenerated. \n"
696 "// \n"
697 "// vertexID 0,1,2 should generate \n"
698 "// POS: (-1,-1), (+3,-1), (-1,+3) \n"
699 "// \n"
700 "// This generates a triangle that completely covers the -1->1 viewport \n"
701 "// \n"
702 "void main() \n"
703 "{ \n"
704 " float x = -1.0 + float((gl_VertexID & 1) << 2); \n"
705 " float y = -1.0 + float((gl_VertexID & 2) << 1); \n"
706 " gl_Position = vec4(x, y, g_Depth, 1.0); \n"
707 "} \n"
708 " \n";
709
710 const char ApplyFramebufferAttachmentCMAAINTELResourceManager::cmaa_frag_s1_[] =
711 "precision highp float; \n"
712 "precision highp int; \n"
713 " \n"
714 "#define SETTINGS_ALLOW_SHORT_Zs 1 \n"
715 "#define EDGE_DETECT_THRESHOLD 13.0f \n"
716 " \n"
717 "#define saturate(x) clamp((x), 0.0, 1.0) \n"
718 " \n"
719 "// bind to location 0 \n"
720 "layout(location = 0) uniform float g_Depth; \n"
721 "// bind to a uniform buffer bind point 0 \n"
722 "layout(location = 1) uniform vec2 g_OneOverScreenSize; \n"
723 "#ifndef EDGE_DETECT_THRESHOLD \n"
724 "layout(location = 2) uniform float g_ColorThreshold; \n"
725 "#endif \n"
726 " \n"
727 "#ifdef SUPPORTS_USAMPLER2D \n"
728 "#define USAMPLER usampler2D \n"
729 "#define UVEC4 uvec4 \n"
730 "#define LOAD_UINT(arg) arg \n"
731 "#define STORE_UVEC4(arg) arg \n"
732 "#else \n"
733 "#define USAMPLER sampler2D \n"
734 "#define UVEC4 vec4 \n"
735 "#define LOAD_UINT(arg) uint(arg * 255.0f) \n"
736 "#define STORE_UVEC4(arg) vec4(float(arg.x) / 255.0f, \n"
737 " float(arg.y) / 255.0f, \n"
738 " float(arg.z) / 255.0f, \n"
739 " float(arg.w) / 255.0f) \n"
740 "#endif \n"
741 " \n"
742 "// bind to texture stage 0/1 \n"
743 "layout(binding = 0) uniform highp sampler2D g_screenTexture; \n"
744 "layout(binding = 1) uniform highp sampler2D g_src0TextureFlt; \n"
745 "layout(binding = 1) uniform highp USAMPLER g_src0Texture4Uint; \n"
746 " \n"
747 "// bind to image stage 0/1/2 \n"
748 "#ifdef GL_ES \n"
749 "layout(binding = 0, EDGE_READ_FORMAT) restrict writeonly uniform highp \n"
750 " image2D g_resultTexture; \n"
751 "layout(binding = 1, rgba8) restrict writeonly uniform highp \n"
752 " image2D g_resultTextureFlt4Slot1; \n"
753 "layout(binding = 2, EDGE_READ_FORMAT) restrict writeonly uniform highp \n"
754 " image2D g_resultTextureSlot2; \n"
755 "#else \n"
756 "layout(EDGE_READ_FORMAT) restrict writeonly uniform highp \n"
757 " image2D g_resultTexture; \n"
758 "layout(rgba8) restrict writeonly uniform highp \n"
759 " image2D g_resultTextureFlt4Slot1; \n"
760 "layout(EDGE_READ_FORMAT) restrict writeonly uniform highp \n"
761 " image2D g_resultTextureSlot2; \n"
762 "#endif \n"
763 " \n"
764 "// Constants \n"
765 "const vec4 c_lumWeights = vec4(0.2126f, 0.7152f, 0.0722f, 0.0000f); \n"
766 " \n"
767 "#ifdef EDGE_DETECT_THRESHOLD \n"
768 "const float c_ColorThreshold = 1.0f / EDGE_DETECT_THRESHOLD; \n"
769 "#endif \n"
770 " \n"
771 "// Must be even number; Will work with ~16 pretty good too for \n"
772 "// additional performance, or with ~64 for highest quality. \n"
773 "const int c_maxLineLength = 64; \n"
774 " \n"
775 "const vec4 c_edgeDebugColours[5] = vec4[5](vec4(0.5, 0.5, 0.5, 0.4), \n"
776 " vec4(1.0, 0.1, 1.0, 0.8), \n"
777 " vec4(0.9, 0.0, 0.0, 0.8), \n"
778 " vec4(0.0, 0.9, 0.0, 0.8), \n"
779 " vec4(0.0, 0.0, 0.9, 0.8)); \n"
780 " \n"
781 "// this isn't needed if colour UAV is _SRGB but that doesn't work \n"
782 "// everywhere \n"
783 "#ifdef IN_GAMMA_CORRECT_MODE \n"
784 "///////////////////////////////////////////////////////////////////////\n"
785 "// \n"
786 "// SRGB Helper Functions taken from D3DX_DXGIFormatConvert.inl \n"
787 "float D3DX_FLOAT_to_SRGB(float val) { \n"
788 " if (val < 0.0031308f) \n"
789 " val *= 12.92f; \n"
790 " else { \n"
791 " val = 1.055f * pow(val, 1.0f / 2.4f) - 0.055f; \n"
792 " } \n"
793 " return val; \n"
794 "} \n"
795 "// \n"
796 "vec3 D3DX_FLOAT3_to_SRGB(vec3 val) { \n"
797 " vec3 outVal; \n"
798 " outVal.x = D3DX_FLOAT_to_SRGB(val.x); \n"
799 " outVal.y = D3DX_FLOAT_to_SRGB(val.y); \n"
800 " outVal.z = D3DX_FLOAT_to_SRGB(val.z); \n"
801 " return outVal; \n"
802 "} \n"
803 "// \n"
804 "///////////////////////////////////////////////////////////////////////\n"
805 "#endif // IN_GAMMA_CORRECT_MODE \n"
806 " \n"
807 "// how .rgba channels from the edge texture maps to pixel edges: \n"
808 "// \n"
809 "// A - 0x08 \n"
810 "// |¯¯¯¯¯¯¯¯¯| \n"
811 "// | | \n"
812 "// 0x04 - B | pixel | R - 0x01 \n"
813 "// | | \n"
814 "// |_________| \n"
815 "// G - 0x02 \n"
816 "// \n"
817 "// (A - there's an edge between us and a pixel above us) \n"
818 "// (R - there's an edge between us and a pixel to the right) \n"
819 "// (G - there's an edge between us and a pixel at the bottom) \n"
820 "// (B - there's an edge between us and a pixel to the left) \n"
821 " \n"
822 "// Expecting values of 1 and 0 only! \n"
823 "uint PackEdge(uvec4 edges) { \n"
824 " return (edges.x << 0u) | (edges.y << 1u) | (edges.z << 2u) | \n"
825 " (edges.w << 3u); \n"
826 "} \n"
827 " \n"
828 "uvec4 UnpackEdge(uint value) { \n"
829 " uvec4 ret; \n"
830 " ret.x = (value & 0x01u) != 0u ? 1u : 0u; \n"
831 " ret.y = (value & 0x02u) != 0u ? 1u : 0u; \n"
832 " ret.z = (value & 0x04u) != 0u ? 1u : 0u; \n"
833 " ret.w = (value & 0x08u) != 0u ? 1u : 0u; \n"
834 " return ret; \n"
835 "} \n"
836 " \n"
837 "uint PackZ(const uvec2 screenPos, const bool invertedZShape) { \n"
838 " uint retVal = screenPos.x | (screenPos.y << 15u); \n"
839 " if (invertedZShape) \n"
840 " retVal |= (1u << 30u); \n"
841 " return retVal; \n"
842 "} \n"
843 " \n"
844 "void UnpackZ(uint packedZ, out uvec2 screenPos, \n"
845 " out bool invertedZShape) \n"
846 "{ \n"
847 " screenPos.x = packedZ & 0x7FFFu; \n"
848 " screenPos.y = (packedZ >> 15u) & 0x7FFFu; \n"
849 " invertedZShape = (packedZ >> 30u) == 1u; \n"
850 "} \n"
851 " \n"
852 "uint PackZ(const uvec2 screenPos, \n"
853 " const bool invertedZShape, \n"
854 " const bool horizontal) { \n"
855 " uint retVal = screenPos.x | (screenPos.y << 15u); \n"
856 " if (invertedZShape) \n"
857 " retVal |= (1u << 30u); \n"
858 " if (horizontal) \n"
859 " retVal |= (1u << 31u); \n"
860 " return retVal; \n"
861 "} \n"
862 " \n"
863 "void UnpackZ(uint packedZ, \n"
864 " out uvec2 screenPos, \n"
865 " out bool invertedZShape, \n"
866 " out bool horizontal) { \n"
867 " screenPos.x = packedZ & 0x7FFFu; \n"
868 " screenPos.y = (packedZ >> 15u) & 0x7FFFu; \n"
869 " invertedZShape = (packedZ & (1u << 30u)) != 0u; \n"
870 " horizontal = (packedZ & (1u << 31u)) != 0u; \n"
871 "} \n"
872 " \n"
873 "vec4 PackBlurAAInfo(ivec2 pixelPos, uint shapeType) { \n"
874 " uint packedEdges = uint( \n"
875 " texelFetch(g_src0TextureFlt, pixelPos, 0).r * 255.5); \n"
876 " \n"
877 " float retval = float(packedEdges + (shapeType << 4u)); \n"
878 " \n"
879 " return vec4(retval / 255.0); \n"
880 "} \n"
881 " \n"
882 "void UnpackBlurAAInfo(float packedValue, out uint edges, \n"
883 " out uint shapeType) { \n"
884 " uint packedValueInt = uint(packedValue * 255.5); \n"
885 " edges = packedValueInt & 0xFu; \n"
886 " shapeType = packedValueInt >> 4u; \n"
887 "} \n"
888 " \n"
889 "float EdgeDetectColorCalcDiff(vec3 colorA, vec3 colorB) { \n"
890 "#ifdef IN_BGR_MODE \n"
891 " vec3 LumWeights = c_lumWeights.bgr; \n"
892 "#else \n"
893 " vec3 LumWeights = c_lumWeights.rgb; \n"
894 "#endif \n"
895 " \n"
896 " return dot(abs(colorA.rgb - colorB.rgb), LumWeights); \n"
897 "} \n"
898 " \n"
899 "bool EdgeDetectColor(vec3 colorA, vec3 colorB) { \n"
900 "#ifdef EDGE_DETECT_THRESHOLD \n"
901 " return EdgeDetectColorCalcDiff(colorA, colorB) > c_ColorThreshold; \n"
902 "#else \n"
903 " return EdgeDetectColorCalcDiff(colorA, colorB) > g_ColorThreshold; \n"
904 "#endif \n"
905 "} \n"
906 " \n"
907 "void FindLineLength(out int lineLengthLeft, \n"
908 " out int lineLengthRight, \n"
909 " ivec2 screenPos, \n"
910 " const bool horizontal, \n"
911 " const bool invertedZShape, \n"
912 " const ivec2 stepRight) { \n"
913 " // TODO: there must be a cleaner and faster way to get to these - \n"
914 " // a precalculated array indexing maybe? \n"
915 " uint maskLeft, bitsContinueLeft, maskRight, bitsContinueRight; \n"
916 " { \n"
917 " // Horizontal (vertical is the same, just rotated 90º \n"
918 " // counter-clockwise) \n"
919 " // Inverted Z case: // Normal Z case: \n"
920 " // __ // __ \n"
921 " // X| // X| \n"
922 " // -- // -- \n"
923 " // \n"
924 " uint maskTraceLeft, maskTraceRight; \n"
925 " uint maskStopLeft, maskStopRight; \n"
926 " if (horizontal) { \n"
927 " if (invertedZShape) { \n"
928 " maskTraceLeft = 0x02u; // tracing bottom edge \n"
929 " maskTraceRight = 0x08u; // tracing top edge \n"
930 " } else { \n"
931 " maskTraceLeft = 0x08u; // tracing top edge \n"
932 " maskTraceRight = 0x02u; // tracing bottom edge \n"
933 " } \n"
934 " maskStopLeft = 0x01u; // stop on right edge \n"
935 " maskStopRight = 0x04u; // stop on left edge \n"
936 " } else { \n"
937 " if (invertedZShape) { \n"
938 " maskTraceLeft = 0x01u; // tracing right edge \n"
939 " maskTraceRight = 0x04u; // tracing left edge \n"
940 " } else { \n"
941 " maskTraceLeft = 0x04u; // tracing left edge \n"
942 " maskTraceRight = 0x01u; // tracing right edge \n"
943 " } \n"
944 " maskStopLeft = 0x08u; // stop on top edge \n"
945 " maskStopRight = 0x02u; // stop on bottom edge \n"
946 " } \n"
947 " \n"
948 " maskLeft = maskTraceLeft | maskStopLeft; \n"
949 " bitsContinueLeft = maskTraceLeft; \n"
950 " maskRight = maskTraceRight | maskStopRight; \n"
951 " bitsContinueRight = maskTraceRight; \n"
952 " } \n"
953 "///////////////////////////////////////////////////////////////////////\n"
954 " \n"
955 "#ifdef SETTINGS_ALLOW_SHORT_Zs \n"
956 " int i = 1; \n"
957 "#else \n"
958 " int i = 2; // starting from 2 because we already know it's at least 2\n"
959 "#endif \n"
960 " for (; i < c_maxLineLength; i++) { \n"
961 " uint edgeLeft = uint( \n"
962 " texelFetch(g_src0TextureFlt, \n"
963 " ivec2(screenPos.xy - stepRight * i), 0).r * 255.5); \n"
964 " uint edgeRight = uint( \n"
965 " texelFetch(g_src0TextureFlt, \n"
966 " ivec2(screenPos.xy + stepRight * (i + 1)), \n"
967 " 0).r * 255.5); \n"
968 " \n"
969 " // stop on encountering 'stopping' edge (as defined by masks) \n"
970 " int stopLeft = (edgeLeft & maskLeft) != bitsContinueLeft ? 1 : 0; \n"
971 " int stopRight = \n"
972 " (edgeRight & maskRight) != bitsContinueRight ? 1 : 0; \n"
973 " \n"
974 " if (bool(stopLeft) || bool(stopRight)) { \n"
975 " lineLengthLeft = 1 + i - stopLeft; \n"
976 " lineLengthRight = 1 + i - stopRight; \n"
977 " return; \n"
978 " } \n"
979 " } \n"
980 " lineLengthLeft = lineLengthRight = i; \n"
981 " return; \n"
982 "} \n"
983 " \n"
984 "void ProcessDetectedZ(ivec2 screenPos, bool horizontal, \n"
985 " bool invertedZShape) { \n"
986 " int lineLengthLeft, lineLengthRight; \n"
987 " \n"
988 " ivec2 stepRight = (horizontal) ? (ivec2(1, 0)) : (ivec2(0, -1)); \n"
989 " vec2 blendDir = (horizontal) ? (vec2(0, -1)) : (vec2(-1, 0)); \n"
990 " \n"
991 " FindLineLength(lineLengthLeft, lineLengthRight, screenPos, \n"
992 " horizontal, invertedZShape, stepRight); \n"
993 " \n"
994 " vec2 pixelSize = g_OneOverScreenSize; \n"
995 " \n"
996 " float leftOdd = 0.15 * float(lineLengthLeft % 2); \n"
997 " float rightOdd = 0.15 * float(lineLengthRight % 2); \n"
998 " \n"
999 " int loopFrom = -int((lineLengthLeft + 1) / 2) + 1; \n"
1000 " int loopTo = int((lineLengthRight + 1) / 2); \n"
1001 " \n"
1002 " float totalLength = float(loopTo - loopFrom) + 1.0 - leftOdd - \n"
1003 " rightOdd; \n"
1004 " \n"
1005 " for (int i = loopFrom; i <= loopTo; i++) { \n"
1006 " highp ivec2 pixelPos = screenPos + stepRight * i; \n"
1007 " vec2 pixelPosFlt = vec2(float(pixelPos.x) + 0.5, \n"
1008 " float(pixelPos.y) + 0.5); \n"
1009 " \n"
1010 "#ifdef DEBUG_OUTPUT_AAINFO \n"
1011 " imageStore(g_resultTextureSlot2, pixelPos, \n"
1012 " PackBlurAAInfo(pixelPos, 1u)); \n"
1013 "#endif \n"
1014 " \n"
1015 " float m = (float(i) + 0.5 - leftOdd - float(loopFrom)) / \n"
1016 " totalLength; \n"
1017 " m = saturate(m); \n"
1018 " float k = m - ((i > 0) ? 1.0 : 0.0); \n"
1019 " k = (invertedZShape) ? (-k) : (k); \n"
1020 " \n"
1021 " vec4 color = textureLod(g_screenTexture, \n"
1022 " (pixelPosFlt + blendDir * k) * pixelSize, \n"
1023 " 0.0); \n"
1024 " \n"
1025 "#ifdef IN_GAMMA_CORRECT_MODE \n"
1026 " color.rgb = D3DX_FLOAT3_to_SRGB(color.rgb); \n"
1027 "#endif \n"
1028 " imageStore(g_resultTextureFlt4Slot1, pixelPos, color); \n"
1029 " } \n"
1030 "} \n"
1031 " \n"
1032 "vec4 CalcDbgDisplayColor(const vec4 blurMap) { \n"
1033 " vec3 pixelC = vec3(0.0, 0.0, 0.0); \n"
1034 " vec3 pixelL = vec3(0.0, 0.0, 1.0); \n"
1035 " vec3 pixelT = vec3(1.0, 0.0, 0.0); \n"
1036 " vec3 pixelR = vec3(0.0, 1.0, 0.0); \n"
1037 " vec3 pixelB = vec3(0.8, 0.8, 0.0); \n"
1038 " \n"
1039 " const float centerWeight = 1.0; \n"
1040 " float fromBelowWeight = (1.0 / (1.0 - blurMap.x)) - 1.0; \n"
1041 " float fromAboveWeight = (1.0 / (1.0 - blurMap.y)) - 1.0; \n"
1042 " float fromRightWeight = (1.0 / (1.0 - blurMap.z)) - 1.0; \n"
1043 " float fromLeftWeight = (1.0 / (1.0 - blurMap.w)) - 1.0; \n"
1044 " \n"
1045 " float weightSum = centerWeight + dot(vec4(fromBelowWeight, \n"
1046 " fromAboveWeight, \n"
1047 " fromRightWeight, \n"
1048 " fromLeftWeight), \n"
1049 " vec4(1, 1, 1, 1)); \n"
1050 " \n"
1051 " vec4 pixel; \n"
1052 " \n"
1053 " pixel.rgb = pixelC.rgb + fromAboveWeight * pixelT + \n"
1054 " fromBelowWeight * pixelB + \n"
1055 " fromLeftWeight * pixelL + \n"
1056 " fromRightWeight * pixelR; \n"
1057 " pixel.rgb /= weightSum; \n"
1058 " \n"
1059 " pixel.a = dot(pixel.rgb, vec3(1, 1, 1)) * 100.0; \n"
1060 " \n"
1061 " return saturate(pixel); \n"
1062 "} \n"
1063 " \n"
1064 "#ifdef DETECT_EDGES1 \n"
1065 "layout(location = 0) out UVEC4 outEdges; \n"
1066 "void DetectEdges1() { \n"
1067 " uvec4 outputEdges; \n"
1068 " ivec2 screenPosI = ivec2(gl_FragCoord.xy) * ivec2(2, 2); \n"
1069 " \n"
1070 " // .rgb contains colour, .a contains flag whether to output it to \n"
1071 " // working colour texture \n"
1072 " vec4 pixel00 = texelFetch(g_screenTexture, screenPosI.xy, 0); \n"
1073 " vec4 pixel10 = \n"
1074 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(1, 0));\n"
1075 " vec4 pixel20 = \n"
1076 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(2, 0));\n"
1077 " vec4 pixel01 = \n"
1078 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(0, 1));\n"
1079 " vec4 pixel11 = \n"
1080 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(1, 1));\n"
1081 " vec4 pixel21 = \n"
1082 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(2, 1));\n"
1083 " vec4 pixel02 = \n"
1084 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(0, 2));\n"
1085 " vec4 pixel12 = \n"
1086 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(1, 2));\n"
1087 " \n"
1088 " float storeFlagPixel00 = 0.0; \n"
1089 " float storeFlagPixel10 = 0.0; \n"
1090 " float storeFlagPixel20 = 0.0; \n"
1091 " float storeFlagPixel01 = 0.0; \n"
1092 " float storeFlagPixel11 = 0.0; \n"
1093 " float storeFlagPixel21 = 0.0; \n"
1094 " float storeFlagPixel02 = 0.0; \n"
1095 " float storeFlagPixel12 = 0.0; \n"
1096 " \n"
1097 " vec2 et; \n"
1098 " \n"
1099 "#ifdef EDGE_DETECT_THRESHOLD \n"
1100 " float threshold = c_ColorThreshold; \n"
1101 "#else \n"
1102 " float threshold = g_ColorThreshold; \n"
1103 "#endif \n"
1104 " \n"
1105 " { \n"
1106 " et.x = EdgeDetectColorCalcDiff(pixel00.rgb, pixel10.rgb); \n"
1107 " et.y = EdgeDetectColorCalcDiff(pixel00.rgb, pixel01.rgb); \n"
1108 " et = saturate(et - threshold); \n"
1109 " ivec2 eti = ivec2(et * 15.0 + 0.99); \n"
1110 " outputEdges.x = uint(eti.x | (eti.y << 4)); \n"
1111 " \n"
1112 " storeFlagPixel00 += et.x; \n"
1113 " storeFlagPixel00 += et.y; \n"
1114 " storeFlagPixel10 += et.x; \n"
1115 " storeFlagPixel01 += et.y; \n"
1116 " } \n"
1117 " \n"
1118 " { \n"
1119 " et.x = EdgeDetectColorCalcDiff(pixel10.rgb, pixel20.rgb); \n"
1120 " et.y = EdgeDetectColorCalcDiff(pixel10.rgb, pixel11.rgb); \n"
1121 " et = saturate(et - threshold); \n"
1122 " ivec2 eti = ivec2(et * 15.0 + 0.99); \n"
1123 " outputEdges.y = uint(eti.x | (eti.y << 4)); \n"
1124 " \n"
1125 " storeFlagPixel10 += et.x; \n"
1126 " storeFlagPixel10 += et.y; \n"
1127 " storeFlagPixel20 += et.x; \n"
1128 " storeFlagPixel11 += et.y; \n"
1129 " } \n"
1130 " \n"
1131 " { \n"
1132 " et.x = EdgeDetectColorCalcDiff(pixel01.rgb, pixel11.rgb); \n"
1133 " et.y = EdgeDetectColorCalcDiff(pixel01.rgb, pixel02.rgb); \n"
1134 " et = saturate(et - threshold); \n"
1135 " ivec2 eti = ivec2(et * 15.0 + 0.99); \n"
1136 " outputEdges.z = uint(eti.x | (eti.y << 4)); \n"
1137 " \n"
1138 " storeFlagPixel01 += et.x; \n"
1139 " storeFlagPixel01 += et.y; \n"
1140 " storeFlagPixel11 += et.x; \n"
1141 " storeFlagPixel02 += et.y; \n"
1142 " } \n"
1143 " \n"
1144 " { \n"
1145 " et.x = EdgeDetectColorCalcDiff(pixel11.rgb, pixel21.rgb); \n"
1146 " et.y = EdgeDetectColorCalcDiff(pixel11.rgb, pixel12.rgb); \n"
1147 " et = saturate(et - threshold); \n"
1148 " ivec2 eti = ivec2(et * 15.0 + 0.99); \n"
1149 " outputEdges.w = uint(eti.x | (eti.y << 4)); \n"
1150 " \n"
1151 " storeFlagPixel11 += et.x; \n"
1152 " storeFlagPixel11 += et.y; \n"
1153 " storeFlagPixel21 += et.x; \n"
1154 " storeFlagPixel12 += et.y; \n"
1155 " } \n"
1156 " \n"
1157 " gl_FragDepth = any(bvec4(outputEdges)) ? 1.0 : 0.0; \n"
1158 " \n"
1159 " if (gl_FragDepth != 0.0) { \n"
1160 " if (storeFlagPixel00 != 0.0) \n"
1161 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(0, 0),\n"
1162 " pixel00); \n"
1163 " if (storeFlagPixel10 != 0.0) \n"
1164 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(1, 0),\n"
1165 " pixel10); \n"
1166 " if (storeFlagPixel20 != 0.0) \n"
1167 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(2, 0),\n"
1168 " pixel20); \n"
1169 " if (storeFlagPixel01 != 0.0) \n"
1170 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(0, 1),\n"
1171 " pixel01); \n"
1172 " if (storeFlagPixel02 != 0.0) \n"
1173 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(0, 2),\n"
1174 " pixel02); \n"
1175 " if (storeFlagPixel11 != 0.0) \n"
1176 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(1, 1),\n"
1177 " pixel11); \n"
1178 " if (storeFlagPixel21 != 0.0) \n"
1179 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(2, 1),\n"
1180 " pixel21); \n"
1181 " if (storeFlagPixel12 != 0.0) \n"
1182 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(1, 2),\n"
1183 " pixel12); \n"
1184 " } \n"
1185 " outEdges = STORE_UVEC4(outputEdges); \n"
1186 "} \n"
1187 "#endif // DETECT_EDGES1 \n"
1188 " \n"
1189 "vec2 UnpackThresholds(uint val) { \n"
1190 " return vec2(val & 0x0Fu, val >> 4u) / 15.0f; \n"
1191 "} \n"
1192 " \n"
1193 "uint PruneNonDominantEdges(vec4 edges[3]) { \n"
1194 " vec4 maxE4 = vec4(0.0, 0.0, 0.0, 0.0); \n"
1195 " \n"
1196 " float avg = 0.0; \n"
1197 " \n"
1198 " for (int i = 0; i < 3; i++) { \n"
1199 " maxE4 = max(maxE4, edges[i]); \n"
1200 " \n"
1201 " avg = dot(edges[i], vec4(1, 1, 1, 1) / (3.0 * 4.0)); \n"
1202 " } \n"
1203 " \n"
1204 " vec2 maxE2 = max(maxE4.xy, maxE4.zw); \n"
1205 " float maxE = max(maxE2.x, maxE2.y); \n"
1206 " \n"
1207 " float threshold = avg * 0.65 + maxE * 0.35; \n"
1208 " \n"
1209 " // threshold = 0.0001; // this disables non-dominant edge pruning! \n"
1210 " \n"
1211 " uint cx = edges[0].x >= threshold ? 1u : 0u; \n"
1212 " uint cy = edges[0].y >= threshold ? 1u : 0u; \n"
1213 " return PackEdge(uvec4(cx, cy, 0, 0)); \n"
1214 "} \n"
1215 " \n"
1216 "void CollectEdges(int offX, \n"
1217 " int offY, \n"
1218 " out vec4 edges[3], \n"
1219 " const uint packedVals[6 * 6]) { \n"
1220 " vec2 pixelP0P0 = UnpackThresholds(packedVals[(offX)*6+(offY)]); \n"
1221 " vec2 pixelP1P0 = UnpackThresholds(packedVals[(offX+1)*6+(offY)]); \n"
1222 " vec2 pixelP0P1 = UnpackThresholds(packedVals[(offX)*6+(offY+1)]); \n"
1223 " vec2 pixelM1P0 = UnpackThresholds(packedVals[(offX-1)*6 +(offY)]); \n"
1224 " vec2 pixelP0M1 = UnpackThresholds(packedVals[(offX)*6+(offY-1)]); \n"
1225 " vec2 pixelP1M1 = UnpackThresholds(packedVals[(offX+1)*6 +(offY-1)]); \n"
1226 " vec2 pixelM1P1 = UnpackThresholds(packedVals[(offX-1)*6+(offY+1)]); \n"
1227 " \n"
1228 " edges[0].x = pixelP0P0.x; \n"
1229 " edges[0].y = pixelP0P0.y; \n"
1230 " edges[0].z = pixelP1P0.x; \n"
1231 " edges[0].w = pixelP1P0.y; \n"
1232 " edges[1].x = pixelP0P1.x; \n"
1233 " edges[1].y = pixelP0P1.y; \n"
1234 " edges[1].z = pixelM1P0.x; \n"
1235 " edges[1].w = pixelM1P0.y; \n"
1236 " edges[2].x = pixelP0M1.x; \n"
1237 " edges[2].y = pixelP0M1.y; \n"
1238 " edges[2].z = pixelP1M1.y; \n"
1239 " edges[2].w = pixelM1P1.x; \n"
1240 "} \n"
1241 " \n"
1242 "#ifdef DETECT_EDGES2 \n"
1243 "layout(early_fragment_tests) in; \n"
1244 "void DetectEdges2() { \n"
1245 " ivec2 screenPosI = ivec2(gl_FragCoord.xy); \n"
1246 " \n"
1247 " // source : edge differences from previous pass \n"
1248 " uint packedVals[6 * 6]; \n"
1249 " \n"
1250 " // center pixel (our output) \n"
1251 " UVEC4 packedQ4 = texelFetch(g_src0Texture4Uint, screenPosI.xy, 0); \n"
1252 " packedVals[(2) * 6 + (2)] = LOAD_UINT(packedQ4.x); \n"
1253 " packedVals[(3) * 6 + (2)] = LOAD_UINT(packedQ4.y); \n"
1254 " packedVals[(2) * 6 + (3)] = LOAD_UINT(packedQ4.z); \n"
1255 " packedVals[(3) * 6 + (3)] = LOAD_UINT(packedQ4.w); \n"
1256 " \n"
1257 " vec4 edges[3]; \n"
1258 " if (bool(packedVals[(2) * 6 + (2)]) || \n"
1259 " bool(packedVals[(3) * 6 + (2)])) { \n"
1260 " UVEC4 packedQ1 = texelFetchOffset(g_src0Texture4Uint, \n"
1261 " screenPosI.xy, 0, ivec2(0, -1)); \n"
1262 " packedVals[(2) * 6 + (0)] = LOAD_UINT(packedQ1.x); \n"
1263 " packedVals[(3) * 6 + (0)] = LOAD_UINT(packedQ1.y); \n"
1264 " packedVals[(2) * 6 + (1)] = LOAD_UINT(packedQ1.z); \n"
1265 " packedVals[(3) * 6 + (1)] = LOAD_UINT(packedQ1.w); \n"
1266 " } \n"
1267 " \n"
1268 " if (bool(packedVals[(2) * 6 + (2)]) || \n"
1269 " bool(packedVals[(2) * 6 + (3)])) { \n"
1270 " UVEC4 packedQ3 = texelFetchOffset(g_src0Texture4Uint, \n"
1271 " screenPosI.xy, 0, ivec2(-1, 0)); \n"
1272 " packedVals[(0) * 6 + (2)] = LOAD_UINT(packedQ3.x); \n"
1273 " packedVals[(1) * 6 + (2)] = LOAD_UINT(packedQ3.y); \n"
1274 " packedVals[(0) * 6 + (3)] = LOAD_UINT(packedQ3.z); \n"
1275 " packedVals[(1) * 6 + (3)] = LOAD_UINT(packedQ3.w); \n"
1276 " } \n"
1277 " \n"
1278 " if (bool(packedVals[(2) * 6 + (2)])) { \n"
1279 " CollectEdges(2, 2, edges, packedVals); \n"
1280 " uint pe = PruneNonDominantEdges(edges); \n"
1281 " if (pe != 0u) { \n"
1282 " imageStore(g_resultTexture, 2 * screenPosI.xy + ivec2(0, 0), \n"
1283 " vec4(float(0x80u | pe) / 255.0, 0, 0, 0)); \n"
1284 " } \n"
1285 " } \n"
1286 " \n"
1287 " if (bool(packedVals[(3) * 6 + (2)]) || \n"
1288 " bool(packedVals[(3) * 6 + (3)])) { \n"
1289 " UVEC4 packedQ5 = texelFetchOffset(g_src0Texture4Uint, \n"
1290 " screenPosI.xy, 0, ivec2(1, 0)); \n"
1291 " packedVals[(4) * 6 + (2)] = LOAD_UINT(packedQ5.x); \n"
1292 " packedVals[(5) * 6 + (2)] = LOAD_UINT(packedQ5.y); \n"
1293 " packedVals[(4) * 6 + (3)] = LOAD_UINT(packedQ5.z); \n"
1294 " packedVals[(5) * 6 + (3)] = LOAD_UINT(packedQ5.w); \n"
1295 " } \n"
1296 " \n"
1297 " if (bool(packedVals[(3) * 6 + (2)])) { \n"
1298 " UVEC4 packedQ2 = texelFetchOffset(g_src0Texture4Uint, \n"
1299 " screenPosI.xy, 0, ivec2(1, -1)); \n"
1300 " packedVals[(4) * 6 + (0)] = LOAD_UINT(packedQ2.x); \n"
1301 " packedVals[(5) * 6 + (0)] = LOAD_UINT(packedQ2.y); \n"
1302 " packedVals[(4) * 6 + (1)] = LOAD_UINT(packedQ2.z); \n"
1303 " packedVals[(5) * 6 + (1)] = LOAD_UINT(packedQ2.w); \n"
1304 " \n"
1305 " CollectEdges(3, 2, edges, packedVals); \n"
1306 " uint pe = PruneNonDominantEdges(edges); \n"
1307 " if (pe != 0u) { \n"
1308 " imageStore(g_resultTexture, 2 * screenPosI.xy + ivec2(1, 0), \n"
1309 " vec4(float(0x80u | pe) / 255.0, 0, 0, 0)); \n"
1310 " } \n"
1311 " } \n"
1312 " \n"
1313 " if (bool(packedVals[(2) * 6 + (3)]) || \n"
1314 " bool(packedVals[(3) * 6 + (3)])) { \n"
1315 " UVEC4 packedQ7 = texelFetchOffset(g_src0Texture4Uint, \n"
1316 " screenPosI.xy, 0, ivec2(0, 1)); \n"
1317 " packedVals[(2) * 6 + (4)] = LOAD_UINT(packedQ7.x); \n"
1318 " packedVals[(3) * 6 + (4)] = LOAD_UINT(packedQ7.y); \n"
1319 " packedVals[(2) * 6 + (5)] = LOAD_UINT(packedQ7.z); \n"
1320 " packedVals[(3) * 6 + (5)] = LOAD_UINT(packedQ7.w); \n"
1321 " } \n"
1322 " \n"
1323 " if (bool(packedVals[(2) * 6 + (3)])) { \n"
1324 " UVEC4 packedQ6 = texelFetchOffset(g_src0Texture4Uint, \n"
1325 " screenPosI.xy, 0, ivec2(-1, -1));\n"
1326 " packedVals[(0) * 6 + (4)] = LOAD_UINT(packedQ6.x); \n"
1327 " packedVals[(1) * 6 + (4)] = LOAD_UINT(packedQ6.y); \n"
1328 " packedVals[(0) * 6 + (5)] = LOAD_UINT(packedQ6.z); \n"
1329 " packedVals[(1) * 6 + (5)] = LOAD_UINT(packedQ6.w); \n"
1330 " \n"
1331 " CollectEdges(2, 3, edges, packedVals); \n"
1332 " uint pe = PruneNonDominantEdges(edges); \n"
1333 " if (pe != 0u) { \n"
1334 " imageStore(g_resultTexture, 2 * screenPosI.xy + ivec2(0, 1), \n"
1335 " vec4(float(0x80u | pe) / 255.0, 0, 0, 0)); \n"
1336 " } \n"
1337 " } \n"
1338 " \n"
1339 " if (bool(packedVals[(3) * 6 + (3)])) { \n"
1340 " CollectEdges(3, 3, edges, packedVals); \n"
1341 " uint pe = PruneNonDominantEdges(edges); \n"
1342 " if (pe != 0u) { \n"
1343 " imageStore(g_resultTexture, 2 * screenPosI.xy + ivec2(1, 1), \n"
1344 " vec4(float(0x80u | pe) / 255.0, 0, 0, 0)); \n"
1345 " } \n"
1346 " } \n"
1347 "} \n"
1348 "#endif // DETECT_EDGES2 \n"
1349 " \n";
1350
1351 const char ApplyFramebufferAttachmentCMAAINTELResourceManager::cmaa_frag_s2_[] =
1352 "#ifdef COMBINE_EDGES \n"
1353 "void CombineEdges() { \n"
1354 " ivec3 screenPosIBase = ivec3(ivec2(gl_FragCoord.xy) * 2, 0); \n"
1355 " vec3 screenPosBase = vec3(screenPosIBase); \n"
1356 " uint packedEdgesArray[3 * 3]; \n"
1357 " \n"
1358 " // use only if it has the 'prev frame' flag:[sample * 255.0 - 127.5] \n"
1359 " //-> if it has the last bit flag (128), it's going to stay above 0 \n"
1360 " uvec4 sampA = uvec4( \n"
1361 " textureGatherOffset(g_src0TextureFlt, \n"
1362 " screenPosBase.xy * g_OneOverScreenSize, \n"
1363 " ivec2(1, 0)) * 255.0 - 127.5); \n"
1364 " uvec4 sampB = uvec4( \n"
1365 " textureGatherOffset(g_src0TextureFlt, \n"
1366 " screenPosBase.xy * g_OneOverScreenSize, \n"
1367 " ivec2(0, 1)) * 255.0 - 127.5); \n"
1368 " uint sampC = uint( \n"
1369 " texelFetchOffset(g_src0TextureFlt, screenPosIBase.xy, 0, \n"
1370 " ivec2(1, 1)).r * 255.0 - 127.5); \n"
1371 " \n"
1372 " packedEdgesArray[(0) * 3 + (0)] = 0u; \n"
1373 " packedEdgesArray[(1) * 3 + (0)] = sampA.w; \n"
1374 " packedEdgesArray[(2) * 3 + (0)] = sampA.z; \n"
1375 " packedEdgesArray[(1) * 3 + (1)] = sampA.x; \n"
1376 " packedEdgesArray[(2) * 3 + (1)] = sampA.y; \n"
1377 " packedEdgesArray[(0) * 3 + (1)] = sampB.w; \n"
1378 " packedEdgesArray[(0) * 3 + (2)] = sampB.x; \n"
1379 " packedEdgesArray[(1) * 3 + (2)] = sampB.y; \n"
1380 " packedEdgesArray[(2) * 3 + (2)] = sampC; \n"
1381 " \n"
1382 " uvec4 pixelsC = uvec4(packedEdgesArray[(1 + 0) * 3 + (1 + 0)], \n"
1383 " packedEdgesArray[(1 + 1) * 3 + (1 + 0)], \n"
1384 " packedEdgesArray[(1 + 0) * 3 + (1 + 1)], \n"
1385 " packedEdgesArray[(1 + 1) * 3 + (1 + 1)]); \n"
1386 " uvec4 pixelsL = uvec4(packedEdgesArray[(0 + 0) * 3 + (1 + 0)], \n"
1387 " packedEdgesArray[(0 + 1) * 3 + (1 + 0)], \n"
1388 " packedEdgesArray[(0 + 0) * 3 + (1 + 1)], \n"
1389 " packedEdgesArray[(0 + 1) * 3 + (1 + 1)]); \n"
1390 " uvec4 pixelsU = uvec4(packedEdgesArray[(1 + 0) * 3 + (0 + 0)], \n"
1391 " packedEdgesArray[(1 + 1) * 3 + (0 + 0)], \n"
1392 " packedEdgesArray[(1 + 0) * 3 + (0 + 1)], \n"
1393 " packedEdgesArray[(1 + 1) * 3 + (0 + 1)]); \n"
1394 " \n"
1395 " uvec4 outEdge4 = \n"
1396 " pixelsC | ((pixelsL & 0x01u) << 2u) | ((pixelsU & 0x02u) << 2u); \n"
1397 " vec4 outEdge4Flt = vec4(outEdge4) / 255.0; \n"
1398 " \n"
1399 " imageStore(g_resultTextureSlot2, screenPosIBase.xy + ivec2(0, 0), \n"
1400 " outEdge4Flt.xxxx); \n"
1401 " imageStore(g_resultTextureSlot2, screenPosIBase.xy + ivec2(1, 0), \n"
1402 " outEdge4Flt.yyyy); \n"
1403 " imageStore(g_resultTextureSlot2, screenPosIBase.xy + ivec2(0, 1), \n"
1404 " outEdge4Flt.zzzz); \n"
1405 " imageStore(g_resultTextureSlot2, screenPosIBase.xy + ivec2(1, 1), \n"
1406 " outEdge4Flt.wwww); \n"
1407 " \n"
1408 " // uvec4 numberOfEdges4 = uvec4(bitCount(outEdge4)); \n"
1409 " // gl_FragDepth = \n"
1410 " // any(greaterThan(numberOfEdges4, uvec4(1))) ? 1.0 : 0.0; \n"
1411 " \n"
1412 " gl_FragDepth = \n"
1413 " any(greaterThan(outEdge4, uvec4(1))) ? 1.0 : 0.0; \n"
1414 "} \n"
1415 "#endif // COMBINE_EDGES \n"
1416 " \n"
1417 "#ifdef BLUR_EDGES \n"
1418 "layout(early_fragment_tests) in; \n"
1419 "void BlurEdges() { \n"
1420 " int _i; \n"
1421 " \n"
1422 " ivec3 screenPosIBase = ivec3(ivec2(gl_FragCoord.xy) * 2, 0); \n"
1423 " vec3 screenPosBase = vec3(screenPosIBase); \n"
1424 " uint forFollowUpCount = 0u; \n"
1425 " ivec4 forFollowUpCoords[4]; \n"
1426 " \n"
1427 " uint packedEdgesArray[4 * 4]; \n"
1428 " \n"
1429 " uvec4 sampA = uvec4( \n"
1430 " textureGatherOffset(g_src0TextureFlt, \n"
1431 " screenPosBase.xy * g_OneOverScreenSize, \n"
1432 " ivec2(0, 0)) *255.5); \n"
1433 " uvec4 sampB = uvec4( \n"
1434 " textureGatherOffset(g_src0TextureFlt, \n"
1435 " screenPosBase.xy * g_OneOverScreenSize, \n"
1436 " ivec2(2, 0)) *255.5); \n"
1437 " uvec4 sampC = uvec4( \n"
1438 " textureGatherOffset(g_src0TextureFlt, \n"
1439 " screenPosBase.xy * g_OneOverScreenSize, \n"
1440 " ivec2(0, 2)) *255.5); \n"
1441 " uvec4 sampD = uvec4( \n"
1442 " textureGatherOffset(g_src0TextureFlt, \n"
1443 " screenPosBase.xy * g_OneOverScreenSize, \n"
1444 " ivec2(2, 2)) *255.5); \n"
1445 " \n"
1446 " packedEdgesArray[(0) * 4 + (0)] = sampA.w; \n"
1447 " packedEdgesArray[(1) * 4 + (0)] = sampA.z; \n"
1448 " packedEdgesArray[(0) * 4 + (1)] = sampA.x; \n"
1449 " packedEdgesArray[(1) * 4 + (1)] = sampA.y; \n"
1450 " packedEdgesArray[(2) * 4 + (0)] = sampB.w; \n"
1451 " packedEdgesArray[(3) * 4 + (0)] = sampB.z; \n"
1452 " packedEdgesArray[(2) * 4 + (1)] = sampB.x; \n"
1453 " packedEdgesArray[(3) * 4 + (1)] = sampB.y; \n"
1454 " packedEdgesArray[(0) * 4 + (2)] = sampC.w; \n"
1455 " packedEdgesArray[(1) * 4 + (2)] = sampC.z; \n"
1456 " packedEdgesArray[(0) * 4 + (3)] = sampC.x; \n"
1457 " packedEdgesArray[(1) * 4 + (3)] = sampC.y; \n"
1458 " packedEdgesArray[(2) * 4 + (2)] = sampD.w; \n"
1459 " packedEdgesArray[(3) * 4 + (2)] = sampD.z; \n"
1460 " packedEdgesArray[(2) * 4 + (3)] = sampD.x; \n"
1461 " packedEdgesArray[(3) * 4 + (3)] = sampD.y; \n"
1462 " \n"
1463 " for (_i = 0; _i < 4; _i++) { \n"
1464 " int _x = _i % 2; \n"
1465 " int _y = _i / 2; \n"
1466 " \n"
1467 " ivec3 screenPosI = screenPosIBase + ivec3(_x, _y, 0); \n"
1468 " \n"
1469 " uint packedEdgesC = packedEdgesArray[(1 + _x) * 4 + (1 + _y)]; \n"
1470 " \n"
1471 " uvec4 edges = UnpackEdge(packedEdgesC); \n"
1472 " vec4 edgesFlt = vec4(edges); \n"
1473 " \n"
1474 " float numberOfEdges = dot(edgesFlt, vec4(1, 1, 1, 1)); \n"
1475 " if (numberOfEdges < 2.0) \n"
1476 " continue; \n"
1477 " \n"
1478 " float fromRight = edgesFlt.r; \n"
1479 " float fromBelow = edgesFlt.g; \n"
1480 " float fromLeft = edgesFlt.b; \n"
1481 " float fromAbove = edgesFlt.a; \n"
1482 " \n"
1483 " vec4 xFroms = vec4(fromBelow, fromAbove, fromRight, fromLeft); \n"
1484 " \n"
1485 " float blurCoeff = 0.0; \n"
1486 " \n"
1487 " // These are additional blurs that complement the main line-based \n"
1488 " // blurring; Unlike line-based, these do not necessarily preserve \n"
1489 " // the total amount of screen colour as they will take \n"
1490 " // neighbouring pixel colours and apply them to the one currently \n"
1491 " // processed. \n"
1492 " \n"
1493 " // 1.) L-like shape. \n"
1494 " // For this shape, the total amount of screen colour will be \n"
1495 " // preserved when this is a part of a (zigzag) diagonal line as the\n"
1496 " // corners from the other side will do the same and take some of \n"
1497 " // the current pixel's colour in return. \n"
1498 " // However, in the case when this is an actual corner, the pixel's \n"
1499 " // colour will be partially overwritten by it's 2 neighbours. \n"
1500 " // if( numberOfEdges > 1.0 ) \n"
1501 " { \n"
1502 " // with value of 0.15, the pixel will retain approx 77% of its \n"
1503 " // colour and the remaining 23% will come from its 2 neighbours \n"
1504 " // (which are likely to be blurred too in the opposite direction)\n"
1505 " blurCoeff = 0.08; \n"
1506 " \n"
1507 " // Only do blending if it's L shape - if we're between two \n"
1508 " // parallel edges, don't do anything \n"
1509 " blurCoeff *= (1.0 - fromBelow * fromAbove) * \n"
1510 " (1.0 - fromRight * fromLeft); \n"
1511 " } \n"
1512 " \n"
1513 " // 2.) U-like shape (surrounded with edges from 3 sides) \n"
1514 " if (numberOfEdges > 2.0) { \n"
1515 " // with value of 0.13, the pixel will retain approx 72% of its \n"
1516 " // colour and the remaining 28% will be picked from its 3 \n"
1517 " // neighbours (which are unlikely to be blurred too but could be)\n"
1518 " blurCoeff = 0.11; \n"
1519 " } \n"
1520 " \n"
1521 " // 3.) Completely surrounded with edges from all 4 sides \n"
1522 " if (numberOfEdges > 3.0) { \n"
1523 " // with value of 0.07, the pixel will retain 78% of its colour \n"
1524 " // and the remaining 22% will come from its 4 neighbours (which \n"
1525 " // are unlikely to be blurred) \n"
1526 " blurCoeff = 0.05; \n"
1527 " } \n"
1528 " \n"
1529 " if (blurCoeff == 0.0) { \n"
1530 " // this avoids Z search below as well but that's ok because a Z \n"
1531 " // shape will also always have some blurCoeff \n"
1532 " continue; \n"
1533 " } \n"
1534 " \n"
1535 " vec4 blurMap = xFroms * blurCoeff; \n"
1536 " \n"
1537 " vec4 pixelC = texelFetch(g_screenTexture, screenPosI.xy, 0); \n"
1538 " \n"
1539 " const float centerWeight = 1.0; \n"
1540 " float fromBelowWeight = blurMap.x; \n"
1541 " float fromAboveWeight = blurMap.y; \n"
1542 " float fromRightWeight = blurMap.z; \n"
1543 " float fromLeftWeight = blurMap.w; \n"
1544 " \n"
1545 " // this would be the proper math for blending if we were handling \n"
1546 " // lines (Zs) and mini kernel smoothing here, but since we're doing\n"
1547 " // lines separately, no need to complicate, just tweak the settings\n"
1548 " // float fromBelowWeight = (1.0 / (1.0 - blurMap.x)) - 1.0; \n"
1549 " // float fromAboveWeight = (1.0 / (1.0 - blurMap.y)) - 1.0; \n"
1550 " // float fromRightWeight = (1.0 / (1.0 - blurMap.z)) - 1.0; \n"
1551 " // float fromLeftWeight = (1.0 / (1.0 - blurMap.w)) - 1.0; \n"
1552 " \n"
1553 " float fourWeightSum = dot(blurMap, vec4(1, 1, 1, 1)); \n"
1554 " float allWeightSum = centerWeight + fourWeightSum; \n"
1555 " \n"
1556 " vec4 color = vec4(0, 0, 0, 0); \n"
1557 " if (fromLeftWeight > 0.0) { \n"
1558 " vec3 pixelL = texelFetchOffset(g_screenTexture, screenPosI.xy, 0,\n"
1559 " ivec2(-1, 0)).rgb; \n"
1560 " color.rgb += fromLeftWeight * pixelL; \n"
1561 " } \n"
1562 " if (fromAboveWeight > 0.0) { \n"
1563 " vec3 pixelT = texelFetchOffset(g_screenTexture, screenPosI.xy, 0,\n"
1564 " ivec2(0, -1)).rgb; \n"
1565 " color.rgb += fromAboveWeight * pixelT; \n"
1566 " } \n"
1567 " if (fromRightWeight > 0.0) { \n"
1568 " vec3 pixelR = texelFetchOffset(g_screenTexture, screenPosI.xy, 0,\n"
1569 " ivec2(1, 0)).rgb; \n"
1570 " color.rgb += fromRightWeight * pixelR; \n"
1571 " } \n"
1572 " if (fromBelowWeight > 0.0) { \n"
1573 " vec3 pixelB = texelFetchOffset(g_screenTexture, screenPosI.xy, 0,\n"
1574 " ivec2(0, 1)).rgb; \n"
1575 " color.rgb += fromBelowWeight * pixelB; \n"
1576 " } \n"
1577 " \n"
1578 " color /= fourWeightSum + 0.0001; \n"
1579 " color.a = 1.0 - centerWeight / allWeightSum; \n"
1580 " \n"
1581 " color.rgb = mix(pixelC.rgb, color.rgb, color.a).rgb; \n"
1582 "#ifdef IN_GAMMA_CORRECT_MODE \n"
1583 " color.rgb = D3DX_FLOAT3_to_SRGB(color.rgb); \n"
1584 "#endif \n"
1585 " \n"
1586 "#ifdef DEBUG_OUTPUT_AAINFO \n"
1587 " imageStore(g_resultTextureSlot2, screenPosI.xy, \n"
1588 " PackBlurAAInfo(screenPosI.xy, uint(numberOfEdges))); \n"
1589 "#endif \n"
1590 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy, \n"
1591 " vec4(color.rgb, pixelC.a)); \n"
1592 " \n"
1593 " if (numberOfEdges == 2.0) { \n"
1594 " uint packedEdgesL = packedEdgesArray[(0 + _x) * 4 + (1 + _y)]; \n"
1595 " uint packedEdgesT = packedEdgesArray[(1 + _x) * 4 + (0 + _y)]; \n"
1596 " uint packedEdgesR = packedEdgesArray[(2 + _x) * 4 + (1 + _y)]; \n"
1597 " uint packedEdgesB = packedEdgesArray[(1 + _x) * 4 + (2 + _y)]; \n"
1598 " \n"
1599 " bool isHorizontalA = ((packedEdgesC) == (0x01u | 0x02u)) && \n"
1600 " ((packedEdgesR & (0x01u | 0x08u)) == (0x08u)); \n"
1601 " bool isHorizontalB = ((packedEdgesC) == (0x01u | 0x08u)) && \n"
1602 " ((packedEdgesR & (0x01u | 0x02u)) == (0x02u)); \n"
1603 " \n"
1604 " bool isHCandidate = isHorizontalA || isHorizontalB; \n"
1605 " \n"
1606 " bool isVerticalA = ((packedEdgesC) == (0x08u | 0x01u)) && \n"
1607 " ((packedEdgesT & (0x08u | 0x04u)) == (0x04u)); \n"
1608 " bool isVerticalB = ((packedEdgesC) == (0x08u | 0x04u)) && \n"
1609 " ((packedEdgesT & (0x08u | 0x01u)) == (0x01u)); \n"
1610 " bool isVCandidate = isVerticalA || isVerticalB; \n"
1611 " \n"
1612 " bool isCandidate = isHCandidate || isVCandidate; \n"
1613 " \n"
1614 " if (!isCandidate) \n"
1615 " continue; \n"
1616 " \n"
1617 " bool horizontal = isHCandidate; \n"
1618 " \n"
1619 " // what if both are candidates? do additional pruning (still not \n"
1620 " // 100% but gets rid of worst case errors) \n"
1621 " if (isHCandidate && isVCandidate) \n"
1622 " horizontal = \n"
1623 " (isHorizontalA && ((packedEdgesL & 0x02u) == 0x02u)) || \n"
1624 " (isHorizontalB && ((packedEdgesL & 0x08u) == 0x08u)); \n"
1625 " \n"
1626 " ivec2 offsetC; \n"
1627 " uint packedEdgesM1P0; \n"
1628 " uint packedEdgesP1P0; \n"
1629 " if (horizontal) { \n"
1630 " packedEdgesM1P0 = packedEdgesL; \n"
1631 " packedEdgesP1P0 = packedEdgesR; \n"
1632 " offsetC = ivec2(2, 0); \n"
1633 " } else { \n"
1634 " packedEdgesM1P0 = packedEdgesB; \n"
1635 " packedEdgesP1P0 = packedEdgesT; \n"
1636 " offsetC = ivec2(0, -2); \n"
1637 " } \n"
1638 " \n"
1639 " uvec4 edgesM1P0 = UnpackEdge(packedEdgesM1P0); \n"
1640 " uvec4 edgesP1P0 = UnpackEdge(packedEdgesP1P0); \n"
1641 " uvec4 edgesP2P0 = UnpackEdge(uint(texelFetch( \n"
1642 " g_src0TextureFlt, screenPosI.xy + offsetC, 0).r * 255.5)); \n"
1643 " \n"
1644 " uvec4 arg0; \n"
1645 " uvec4 arg1; \n"
1646 " uvec4 arg2; \n"
1647 " uvec4 arg3; \n"
1648 " bool arg4; \n"
1649 " \n"
1650 " if (horizontal) { \n"
1651 " arg0 = uvec4(edges); \n"
1652 " arg1 = edgesM1P0; \n"
1653 " arg2 = edgesP1P0; \n"
1654 " arg3 = edgesP2P0; \n"
1655 " arg4 = true; \n"
1656 " } else { \n"
1657 " // Reuse the same code for vertical (used for horizontal above)\n"
1658 " // but rotate input data 90º counter-clockwise, so that: \n"
1659 " // left becomes bottom \n"
1660 " // top becomes left \n"
1661 " // right becomes top \n"
1662 " // bottom becomes right \n"
1663 " \n"
1664 " // we also have to rotate edges, thus .argb \n"
1665 " arg0 = uvec4(edges.argb); \n"
1666 " arg1 = edgesM1P0.argb; \n"
1667 " arg2 = edgesP1P0.argb; \n"
1668 " arg3 = edgesP2P0.argb; \n"
1669 " arg4 = false; \n"
1670 " } \n"
1671 " \n"
1672 " { \n"
1673 " ivec2 screenPos = screenPosI.xy; \n"
1674 " uvec4 _edges = arg0; \n"
1675 " uvec4 _edgesM1P0 = arg1; \n"
1676 " uvec4 _edgesP1P0 = arg2; \n"
1677 " uvec4 _edgesP2P0 = arg3; \n"
1678 " bool horizontal = arg4; \n"
1679 " // Inverted Z case: \n"
1680 " // __ \n"
1681 " // X| \n"
1682 " // ¯¯ \n"
1683 " bool isInvertedZ = false; \n"
1684 " bool isNormalZ = false; \n"
1685 " { \n"
1686 "#ifndef SETTINGS_ALLOW_SHORT_Zs \n"
1687 " // (1u-_edges.a) constraint can be removed; it was added for \n"
1688 " // some rare cases \n"
1689 " uint isZShape = _edges.r * _edges.g * _edgesM1P0.g * \n"
1690 " _edgesP1P0.a *_edgesP2P0.a * (1u - _edges.b) * \n"
1691 " (1u - _edgesP1P0.r) * (1u - _edges.a) * \n"
1692 " (1u - _edgesP1P0.g); \n"
1693 "#else \n"
1694 " uint isZShape = _edges.r * _edges.g * _edgesP1P0.a * \n"
1695 " (1u - _edges.b) * (1u - _edgesP1P0.r) * (1u - _edges.a) *\n"
1696 " (1u - _edgesP1P0.g); \n"
1697 " isZShape *= (_edgesM1P0.g + _edgesP2P0.a); \n"
1698 " // and at least one of these need to be there\n"
1699 "#endif \n"
1700 " if (isZShape > 0u) { \n"
1701 " isInvertedZ = true; \n"
1702 " } \n"
1703 " } \n"
1704 " \n"
1705 " // Normal Z case: \n"
1706 " // __ \n"
1707 " // X| \n"
1708 " // ¯¯ \n"
1709 " { \n"
1710 "#ifndef SETTINGS_ALLOW_SHORT_Zs \n"
1711 " uint isZShape = _edges.r * _edges.a * _edgesM1P0.a * \n"
1712 " _edgesP1P0.g * _edgesP2P0.g * (1u - _edges.b) * \n"
1713 " (1u - _edgesP1P0.r) * (1u - _edges.g) * \n"
1714 " (1u - _edgesP1P0.a); \n"
1715 "#else \n"
1716 " uint isZShape = _edges.r * _edges.a * _edgesP1P0.g * \n"
1717 " (1u - _edges.b) * (1u - _edgesP1P0.r) * (1u - _edges.g) *\n"
1718 " (1u - _edgesP1P0.a); \n"
1719 " isZShape *= \n"
1720 " (_edgesM1P0.a + _edgesP2P0.g); \n"
1721 " // and at least one of these need to be there\n"
1722 "#endif \n"
1723 " \n"
1724 " if (isZShape > 0u) { \n"
1725 " isNormalZ = true; \n"
1726 " } \n"
1727 " } \n"
1728 " \n"
1729 " bool isZ = isInvertedZ || isNormalZ; \n"
1730 " if (isZ) { \n"
1731 " forFollowUpCoords[forFollowUpCount++] = \n"
1732 " ivec4(screenPosI.xy, horizontal, isInvertedZ); \n"
1733 " } \n"
1734 " } \n"
1735 " } \n"
1736 " } \n"
1737 " \n"
1738 " // This code below is the only potential bug with this algorithm : \n"
1739 " // it HAS to be executed after the simple shapes above. It used to be\n"
1740 " // executed as separate compute shader (by storing the packed \n"
1741 " // 'forFollowUpCoords' in an append buffer and consuming it later) \n"
1742 " // but the whole thing (append/consume buffers, using CS) appears to \n"
1743 " // be too inefficient on most hardware. \n"
1744 " // However, it seems to execute fairly efficiently here and without \n"
1745 " // any issues, although there is no 100% guarantee that this code \n"
1746 " // below will execute across all pixels (it has a c_maxLineLength \n"
1747 " // wide kernel) after other shaders processing same pixels have done \n"
1748 " // solving simple shapes. It appears to work regardless, across all \n"
1749 " // hardware; pixels with 1-edge or two opposing edges are ignored by \n"
1750 " // simple shapes anyway and other shapes stop the long line \n"
1751 " // algorithm from executing the only danger appears to be simple \n"
1752 " // shape L's colliding with Z shapes from neighbouring pixels but I \n"
1753 " // couldn't reproduce any problems on any hardware. \n"
1754 " for (uint _i = 0u; _i < forFollowUpCount; _i++) { \n"
1755 " ivec4 data = forFollowUpCoords[_i]; \n"
1756 " ProcessDetectedZ(data.xy, bool(data.z), bool(data.w)); \n"
1757 " } \n"
1758 "} \n"
1759 "#endif // BLUR_EDGES \n"
1760 " \n"
1761 "#ifdef DISPLAY_EDGES \n"
1762 "layout(location = 0) out vec4 color; \n"
1763 "layout(location = 1) out vec4 hasEdges; \n"
1764 "void DisplayEdges() { \n"
1765 " ivec2 screenPosI = ivec2(gl_FragCoord.xy); \n"
1766 " \n"
1767 " uint packedEdges, shapeType; \n"
1768 " UnpackBlurAAInfo(texelFetch(g_src0TextureFlt, screenPosI, 0).r, \n"
1769 " packedEdges, shapeType); \n"
1770 " \n"
1771 " vec4 edges = vec4(UnpackEdge(packedEdges)); \n"
1772 " if (any(greaterThan(edges.xyzw, vec4(0)))) { \n"
1773 "#ifdef IN_BGR_MODE \n"
1774 " color = c_edgeDebugColours[shapeType].bgra; \n"
1775 "#else \n"
1776 " color = c_edgeDebugColours[shapeType]; \n"
1777 "#endif \n"
1778 " hasEdges = vec4(1.0); \n"
1779 " } else { \n"
1780 " color = vec4(0); \n"
1781 " hasEdges = vec4(0.0); \n"
1782 " } \n"
1783 "} \n"
1784 "#endif // DISPLAY_EDGES \n"
1785 " \n"
1786 "void main() { \n"
1787 "#ifdef DETECT_EDGES1 \n"
1788 " DetectEdges1(); \n"
1789 "#endif \n"
1790 "#if defined DETECT_EDGES2 \n"
1791 " DetectEdges2(); \n"
1792 "#endif \n"
1793 "#if defined COMBINE_EDGES \n"
1794 " CombineEdges(); \n"
1795 "#endif \n"
1796 "#if defined BLUR_EDGES \n"
1797 " BlurEdges(); \n"
1798 "#endif \n"
1799 "#if defined DISPLAY_EDGES \n"
1800 " DisplayEdges(); \n"
1801 "#endif \n"
1802 "} \n";
1803
1804 const char
1805 ApplyFramebufferAttachmentCMAAINTELResourceManager::copy_frag_str_[] =
1806 "precision highp float; \n"
1807 "layout(binding = 0) uniform highp sampler2D inTexture; \n"
1808 "layout(location = 0) out vec4 outColor; \n"
1809 "#ifdef GL_ES \n"
1810 "layout(binding = 0, rgba8) restrict writeonly uniform highp \n"
1811 " image2D outTexture; \n"
1812 "#else \n"
1813 "layout(rgba8) restrict writeonly uniform highp image2D outTexture; \n"
1814 "#endif \n"
1815 " \n"
1816 "void main() { \n"
1817 " ivec2 screenPosI = ivec2( gl_FragCoord.xy ); \n"
1818 " vec4 pixel = texelFetch(inTexture, screenPosI, 0); \n"
1819 "#ifdef OUT_FBO \n"
1820 " outColor = pixel; \n"
1821 "#else \n"
1822 " imageStore(outTexture, screenPosI, pixel); \n"
1823 "#endif \n"
1824 "} \n";
1825
1826 } // namespace gpu
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698