OLD | NEW |
(Empty) | |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "gpu/command_buffer/service/gles2_cmd_apply_framebuffer_attachment_cmaa
_intel.h" |
| 6 |
| 7 #include "base/logging.h" |
| 8 #include "gpu/command_buffer/service/framebuffer_manager.h" |
| 9 #include "gpu/command_buffer/service/gles2_cmd_decoder.h" |
| 10 #include "ui/gl/gl_context.h" |
| 11 #include "ui/gl/gl_gl_api_implementation.h" |
| 12 #include "ui/gl/gl_version_info.h" |
| 13 |
| 14 namespace gpu { |
| 15 |
| 16 ApplyFramebufferAttachmentCMAAINTELResourceManager:: |
| 17 ApplyFramebufferAttachmentCMAAINTELResourceManager() |
| 18 : initialized_(false), |
| 19 textures_initialized_(false), |
| 20 is_in_gamma_correct_mode_(false), |
| 21 supports_usampler_(true), |
| 22 supports_r8_image_(true), |
| 23 supports_r8_read_format_(true), |
| 24 is_gles31_compatible_(false), |
| 25 frame_id_(0), |
| 26 width_(0), |
| 27 height_(0), |
| 28 copy_to_framebuffer_shader_(0), |
| 29 copy_to_image_shader_(0), |
| 30 edges0_shader_(0), |
| 31 edges1_shader_(0), |
| 32 edges_combine_shader_(0), |
| 33 process_and_apply_shader_(0), |
| 34 debug_display_edges_shader_(0), |
| 35 cmaa_framebuffer_(0), |
| 36 copy_framebuffer_(0), |
| 37 rgba8_texture_(0), |
| 38 working_color_texture_(0), |
| 39 edges0_texture_(0), |
| 40 edges1_texture_(0), |
| 41 mini4_edge_texture_(0), |
| 42 mini4_edge_depth_texture_(0), |
| 43 edges1_shader_result_texture_float4_slot1_(0), |
| 44 edges1_shader_result_texture_(0), |
| 45 edges_combine_shader_result_texture_float4_slot1_(0), |
| 46 process_and_apply_shader_result_texture_float4_slot1_(0), |
| 47 edges_combine_shader_result_texture_slot2_(0), |
| 48 copy_to_image_shader_outTexture_(0) {} |
| 49 |
| 50 ApplyFramebufferAttachmentCMAAINTELResourceManager:: |
| 51 ~ApplyFramebufferAttachmentCMAAINTELResourceManager() { |
| 52 Destroy(); |
| 53 } |
| 54 |
| 55 void ApplyFramebufferAttachmentCMAAINTELResourceManager::Initialize( |
| 56 gles2::GLES2Decoder* decoder) { |
| 57 DCHECK(decoder); |
| 58 is_gles31_compatible_ = |
| 59 decoder->GetGLContext()->GetVersionInfo()->IsAtLeastGLES(3, 1); |
| 60 |
| 61 copy_to_image_shader_ = CreateProgram("", vert_str_, copy_frag_str_); |
| 62 copy_to_framebuffer_shader_ = |
| 63 CreateProgram("#define OUT_FBO 1\n", vert_str_, copy_frag_str_); |
| 64 |
| 65 // Check if RGBA8UI is supported as an FBO colour target with depth. |
| 66 // If not supported, GLSL needs to convert the data to/from float so there is |
| 67 // a small extra cost. |
| 68 { |
| 69 GLuint rgba8ui_texture = 0, depth_texture = 0; |
| 70 glGenTextures(1, &rgba8ui_texture); |
| 71 glBindTexture(GL_TEXTURE_2D, rgba8ui_texture); |
| 72 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_RGBA8UI, 4, 4); |
| 73 |
| 74 glGenTextures(1, &depth_texture); |
| 75 glBindTexture(GL_TEXTURE_2D, depth_texture); |
| 76 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_DEPTH_COMPONENT16, 4, 4); |
| 77 |
| 78 // Create the FBO |
| 79 GLuint rgba8ui_framebuffer = 0; |
| 80 glGenFramebuffersEXT(1, &rgba8ui_framebuffer); |
| 81 glBindFramebufferEXT(GL_FRAMEBUFFER, rgba8ui_framebuffer); |
| 82 |
| 83 // Bind to the FBO to test support |
| 84 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, |
| 85 GL_TEXTURE_2D, rgba8ui_texture, 0); |
| 86 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, |
| 87 GL_TEXTURE_2D, depth_texture, 0); |
| 88 GLenum status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER); |
| 89 |
| 90 supports_usampler_ = (status == GL_FRAMEBUFFER_COMPLETE); |
| 91 |
| 92 glDeleteFramebuffersEXT(1, &rgba8ui_framebuffer); |
| 93 glDeleteTextures(1, &rgba8ui_texture); |
| 94 glDeleteTextures(1, &depth_texture); |
| 95 } |
| 96 |
| 97 // Check to see if R8 images are supported |
| 98 // If not supported, images are bound as R32F for write targets, not R8. |
| 99 { |
| 100 GLuint r8_texture = 0; |
| 101 glGenTextures(1, &r8_texture); |
| 102 glBindTexture(GL_TEXTURE_2D, r8_texture); |
| 103 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_R8, 4, 4); |
| 104 |
| 105 glGetError(); // reset all previous errors |
| 106 glBindImageTextureEXT(0, r8_texture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R8); |
| 107 if (glGetError() != GL_NO_ERROR) |
| 108 supports_r8_image_ = false; |
| 109 |
| 110 glDeleteTextures(1, &r8_texture); |
| 111 } |
| 112 |
| 113 // Check if R8 GLSL read formats are supported. |
| 114 // If not supported, r32f is used instead. |
| 115 { |
| 116 const char shader_source[] = |
| 117 "layout(r8) restrict writeonly uniform highp image2D g_r8Image; \n" |
| 118 "void main() \n" |
| 119 "{ \n" |
| 120 " imageStore(g_r8Image, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 0.0)); \n" |
| 121 "} \n"; |
| 122 |
| 123 GLuint shader = CreateShader(GL_FRAGMENT_SHADER, "", shader_source); |
| 124 supports_r8_read_format_ = (shader != 0); |
| 125 if (shader != 0) { |
| 126 glDeleteShader(shader); |
| 127 } |
| 128 } |
| 129 |
| 130 VLOG(1) << "ApplyFramebufferAttachmentCMAAINTEL: " |
| 131 << "Supports USampler is " << (supports_usampler_ ? "true" : "false"); |
| 132 VLOG(1) << "ApplyFramebufferAttachmentCMAAINTEL: " |
| 133 << "Supports R8 Images is " |
| 134 << (supports_r8_image_ ? "true" : "false"); |
| 135 VLOG(1) << "ApplyFramebufferAttachmentCMAAINTEL: " |
| 136 << "Supports R8 Read Format is " |
| 137 << (supports_r8_read_format_ ? "true" : "false"); |
| 138 |
| 139 // Create the shaders |
| 140 std::ostringstream defines, edge1, edge2, combineEdges, blur, displayEdges, |
| 141 cmaa_frag; |
| 142 |
| 143 cmaa_frag << cmaa_frag_s1_ << cmaa_frag_s2_; |
| 144 std::string cmaa_frag_string = cmaa_frag.str(); |
| 145 const char* cmaa_frag_c_str = cmaa_frag_string.c_str(); |
| 146 |
| 147 if (supports_usampler_) { |
| 148 defines << "#define SUPPORTS_USAMPLER2D\n"; |
| 149 } |
| 150 |
| 151 if (is_in_gamma_correct_mode_) { |
| 152 defines << "#define IN_GAMMA_CORRECT_MODE\n"; |
| 153 } |
| 154 |
| 155 if (supports_r8_read_format_) { |
| 156 defines << "#define EDGE_READ_FORMAT r8\n"; |
| 157 } else { |
| 158 defines << "#define EDGE_READ_FORMAT r32f\n"; |
| 159 } |
| 160 |
| 161 displayEdges << defines.str() << "#define DISPLAY_EDGES\n"; |
| 162 debug_display_edges_shader_ = |
| 163 CreateProgram(displayEdges.str().c_str(), vert_str_, cmaa_frag_c_str); |
| 164 |
| 165 edge1 << defines.str() << "#define DETECT_EDGES1\n"; |
| 166 edges0_shader_ = |
| 167 CreateProgram(edge1.str().c_str(), vert_str_, cmaa_frag_c_str); |
| 168 |
| 169 edge2 << defines.str() << "#define DETECT_EDGES2\n"; |
| 170 edges1_shader_ = |
| 171 CreateProgram(edge2.str().c_str(), vert_str_, cmaa_frag_c_str); |
| 172 |
| 173 combineEdges << defines.str() << "#define COMBINE_EDGES\n"; |
| 174 edges_combine_shader_ = |
| 175 CreateProgram(combineEdges.str().c_str(), vert_str_, cmaa_frag_c_str); |
| 176 |
| 177 blur << defines.str() << "#define BLUR_EDGES\n"; |
| 178 process_and_apply_shader_ = |
| 179 CreateProgram(blur.str().c_str(), vert_str_, cmaa_frag_c_str); |
| 180 |
| 181 edges1_shader_result_texture_float4_slot1_ = |
| 182 glGetUniformLocation(edges0_shader_, "g_resultTextureFlt4Slot1"); |
| 183 edges1_shader_result_texture_ = |
| 184 glGetUniformLocation(edges1_shader_, "g_resultTexture"); |
| 185 edges_combine_shader_result_texture_float4_slot1_ = |
| 186 glGetUniformLocation(edges_combine_shader_, "g_resultTextureFlt4Slot1"); |
| 187 edges_combine_shader_result_texture_slot2_ = |
| 188 glGetUniformLocation(edges_combine_shader_, "g_resultTextureSlot2"); |
| 189 process_and_apply_shader_result_texture_float4_slot1_ = glGetUniformLocation( |
| 190 process_and_apply_shader_, "g_resultTextureFlt4Slot1"); |
| 191 copy_to_image_shader_outTexture_ = |
| 192 glGetUniformLocation(copy_to_image_shader_, "outTexture"); |
| 193 |
| 194 initialized_ = true; |
| 195 } |
| 196 |
| 197 void ApplyFramebufferAttachmentCMAAINTELResourceManager::Destroy() { |
| 198 if (!initialized_) |
| 199 return; |
| 200 |
| 201 ReleaseTextures(); |
| 202 |
| 203 glDeleteProgram(copy_to_image_shader_); |
| 204 glDeleteProgram(copy_to_framebuffer_shader_); |
| 205 glDeleteProgram(process_and_apply_shader_); |
| 206 glDeleteProgram(edges_combine_shader_); |
| 207 glDeleteProgram(edges1_shader_); |
| 208 glDeleteProgram(edges0_shader_); |
| 209 glDeleteProgram(debug_display_edges_shader_); |
| 210 |
| 211 initialized_ = false; |
| 212 } |
| 213 |
| 214 // Apply CMAA(Conservative Morphological Anti-Aliasing) algorithm to the |
| 215 // color attachments of currently bound draw framebuffer. |
| 216 // Reference GL_INTEL_framebuffer_CMAA for details. |
| 217 void ApplyFramebufferAttachmentCMAAINTELResourceManager:: |
| 218 ApplyFramebufferAttachmentCMAAINTEL(gles2::GLES2Decoder* decoder, |
| 219 gles2::Framebuffer* framebuffer) { |
| 220 DCHECK(decoder); |
| 221 DCHECK(initialized_); |
| 222 if (!framebuffer) |
| 223 return; |
| 224 |
| 225 GLuint last_framebuffer = framebuffer->service_id(); |
| 226 |
| 227 // Process each color attachment of the current draw framebuffer. |
| 228 uint32_t max_draw_buffers = decoder->GetContextGroup()->max_draw_buffers(); |
| 229 for (uint32_t i = 0; i < max_draw_buffers; i++) { |
| 230 const gles2::Framebuffer::Attachment* attachment = |
| 231 framebuffer->GetAttachment(GL_COLOR_ATTACHMENT0 + i); |
| 232 if (attachment && attachment->IsTextureAttachment()) { |
| 233 // Get the texture info. |
| 234 GLuint source_texture_client_id = attachment->object_name(); |
| 235 GLuint source_texture = 0; |
| 236 if (!decoder->GetServiceTextureId(source_texture_client_id, |
| 237 &source_texture)) |
| 238 continue; |
| 239 GLsizei width = attachment->width(); |
| 240 GLsizei height = attachment->height(); |
| 241 GLenum internal_format = attachment->internal_format(); |
| 242 |
| 243 // Resize internal structures - only if needed. |
| 244 OnSize(width, height); |
| 245 |
| 246 // CMAA internally expects GL_RGBA8 textures. |
| 247 // Process using a GL_RGBA8 copy if this is not the case. |
| 248 bool do_copy = internal_format != GL_RGBA8; |
| 249 |
| 250 // Copy source_texture to rgba8_texture_ |
| 251 if (do_copy) { |
| 252 CopyTexture(source_texture, rgba8_texture_, false); |
| 253 } |
| 254 |
| 255 // CMAA Effect |
| 256 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, last_framebuffer); |
| 257 if (do_copy) { |
| 258 ApplyCMAAEffectTexture(rgba8_texture_, rgba8_texture_); |
| 259 } else { |
| 260 ApplyCMAAEffectTexture(source_texture, source_texture); |
| 261 } |
| 262 |
| 263 // Copy rgba8_texture_ to source_texture |
| 264 if (do_copy) { |
| 265 // Move source_texture to the first color attachment of the copy fbo. |
| 266 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, last_framebuffer); |
| 267 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + i, |
| 268 GL_TEXTURE_2D, 0, 0); |
| 269 glBindFramebufferEXT(GL_FRAMEBUFFER, copy_framebuffer_); |
| 270 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, |
| 271 GL_TEXTURE_2D, source_texture, 0); |
| 272 |
| 273 CopyTexture(rgba8_texture_, source_texture, true); |
| 274 |
| 275 // Restore color attachments |
| 276 glBindFramebufferEXT(GL_FRAMEBUFFER, copy_framebuffer_); |
| 277 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, |
| 278 GL_TEXTURE_2D, rgba8_texture_, 0); |
| 279 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, last_framebuffer); |
| 280 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + i, |
| 281 GL_TEXTURE_2D, source_texture, 0); |
| 282 } |
| 283 } |
| 284 } |
| 285 |
| 286 // Restore state |
| 287 decoder->RestoreAllAttributes(); |
| 288 decoder->RestoreTextureUnitBindings(0); |
| 289 decoder->RestoreTextureUnitBindings(1); |
| 290 decoder->RestoreActiveTexture(); |
| 291 decoder->RestoreProgramBindings(); |
| 292 decoder->RestoreBufferBindings(); |
| 293 decoder->RestoreFramebufferBindings(); |
| 294 decoder->RestoreGlobalState(); |
| 295 } |
| 296 |
| 297 void ApplyFramebufferAttachmentCMAAINTELResourceManager::ApplyCMAAEffectTexture( |
| 298 GLuint source_texture, |
| 299 GLuint dest_texture) { |
| 300 frame_id_++; |
| 301 |
| 302 GLuint edge_texture_a; |
| 303 GLuint edge_texture_b; |
| 304 |
| 305 // Flip flop - One pass clears the texture that needs clearing for the other |
| 306 // one (actually it's only important that it clears the highest bit) |
| 307 if ((frame_id_ % 2) == 0) { |
| 308 edge_texture_a = edges0_texture_; |
| 309 edge_texture_b = edges1_texture_; |
| 310 } else { |
| 311 edge_texture_a = edges1_texture_; |
| 312 edge_texture_b = edges0_texture_; |
| 313 } |
| 314 |
| 315 // Setup the main fbo |
| 316 glBindFramebufferEXT(GL_FRAMEBUFFER, cmaa_framebuffer_); |
| 317 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, |
| 318 mini4_edge_texture_, 0); |
| 319 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, |
| 320 mini4_edge_depth_texture_, 0); |
| 321 #if DCHECK_IS_ON() |
| 322 GLenum status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER); |
| 323 if (status != GL_FRAMEBUFFER_COMPLETE) { |
| 324 DLOG(ERROR) << "ApplyFramebufferAttachmentCMAAINTEL: " |
| 325 << "Incomplete framebuffer."; |
| 326 Destroy(); |
| 327 return; |
| 328 } |
| 329 #endif |
| 330 |
| 331 // Setup the viewport to match the fbo |
| 332 glViewport(0, 0, (width_ + 1) / 2, (height_ + 1) / 2); |
| 333 glEnable(GL_DEPTH_TEST); |
| 334 |
| 335 // Detect edges Pass 0 |
| 336 // - For every pixel detect edges to the right and down and output depth |
| 337 // mask where edges detected (1 - far, for detected, 0-near for empty |
| 338 // pixels) |
| 339 |
| 340 // Inputs |
| 341 // g_screenTexture source_texture tex0 |
| 342 // Outputs |
| 343 // gl_FragDepth mini4_edge_depth_texture_ fbo.depth |
| 344 // out uvec4 outEdges mini4_edge_texture_ fbo.col |
| 345 // image2D g_resultTextureFlt4Slot1 working_color_texture_ image1 |
| 346 GLenum edge_format = supports_r8_image_ ? GL_R8 : GL_R32F; |
| 347 |
| 348 { |
| 349 glUseProgram(edges0_shader_); |
| 350 glUniform1f(0, 1.0f); |
| 351 glUniform2f(1, 1.0f / width_, 1.0f / height_); |
| 352 glDepthMask(GL_TRUE); |
| 353 glDepthFunc(GL_ALWAYS); |
| 354 glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); |
| 355 |
| 356 if (!is_gles31_compatible_) { |
| 357 glUniform1i(edges1_shader_result_texture_float4_slot1_, 1); |
| 358 } |
| 359 glBindImageTextureEXT(1, working_color_texture_, 0, GL_FALSE, 0, |
| 360 GL_WRITE_ONLY, GL_RGBA8); |
| 361 |
| 362 glActiveTexture(GL_TEXTURE0); |
| 363 glBindTexture(GL_TEXTURE_2D, source_texture); |
| 364 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); |
| 365 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); |
| 366 |
| 367 glDrawArrays(GL_TRIANGLES, 0, 3); |
| 368 } |
| 369 |
| 370 // Detect edges Pass 1 (finish the previous pass edge processing). |
| 371 // Do the culling of non-dominant local edges (leave mainly locally dominant |
| 372 // edges) and merge Right and Bottom edges into TopRightBottomLeft |
| 373 |
| 374 // Inputs |
| 375 // g_src0Texture4Uint mini4_edge_texture_ tex1 |
| 376 // Outputs |
| 377 // image2D g_resultTexture edge_texture_b image0 |
| 378 { |
| 379 glUseProgram(edges1_shader_); |
| 380 glUniform1f(0, 0.0f); |
| 381 glUniform2f(1, 1.0f / width_, 1.0f / height_); |
| 382 glDepthMask(GL_FALSE); |
| 383 glDepthFunc(GL_LESS); |
| 384 glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); |
| 385 |
| 386 if (!is_gles31_compatible_) { |
| 387 glUniform1i(edges1_shader_result_texture_, 0); |
| 388 } |
| 389 glBindImageTextureEXT(0, edge_texture_b, 0, GL_FALSE, 0, GL_WRITE_ONLY, |
| 390 edge_format); |
| 391 |
| 392 glActiveTexture(GL_TEXTURE1); |
| 393 glBindTexture(GL_TEXTURE_2D, mini4_edge_texture_); |
| 394 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); |
| 395 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); |
| 396 |
| 397 glDrawArrays(GL_TRIANGLES, 0, 3); |
| 398 } |
| 399 |
| 400 // - Combine RightBottom (.xy) edges from previous pass into |
| 401 // RightBottomLeftTop (.xyzw) edges and output it into the mask (have to |
| 402 // fill in the whole buffer including empty ones for the line length |
| 403 // detection to work correctly). |
| 404 // - On all pixels with any edge, input buffer into a temporary color buffer |
| 405 // needed for correct blending in the next pass (other pixels not needed |
| 406 // so not copied to avoid bandwidth use). |
| 407 // - On all pixels with 2 or more edges output positive depth mask for the |
| 408 // next pass. |
| 409 |
| 410 // Inputs |
| 411 // g_src0TextureFlt edge_texture_b tex1 //ps |
| 412 // Outputs |
| 413 // image2D g_resultTextureSlot2 edge_texture_a image2 |
| 414 // gl_FragDepth mini4_edge_texture_ fbo.depth |
| 415 { |
| 416 // Combine edges: each pixel will now contain info on all (top, right, |
| 417 // bottom, left) edges; also create depth mask as above depth and mark |
| 418 // potential Z sAND also copy source color data but only on edge pixels |
| 419 glUseProgram(edges_combine_shader_); |
| 420 glUniform1f(0, 1.0f); |
| 421 glUniform2f(1, 1.0f / width_, 1.0f / height_); |
| 422 glDepthMask(GL_TRUE); |
| 423 glDepthFunc(GL_ALWAYS); |
| 424 glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); |
| 425 |
| 426 if (!is_gles31_compatible_) { |
| 427 glUniform1i(edges_combine_shader_result_texture_float4_slot1_, 1); |
| 428 glUniform1i(edges_combine_shader_result_texture_slot2_, 2); |
| 429 } |
| 430 glBindImageTextureEXT(1, dest_texture, 0, GL_FALSE, 0, GL_WRITE_ONLY, |
| 431 GL_RGBA8); |
| 432 glBindImageTextureEXT(2, edge_texture_a, 0, GL_FALSE, 0, GL_WRITE_ONLY, |
| 433 edge_format); |
| 434 |
| 435 glActiveTexture(GL_TEXTURE1); |
| 436 glBindTexture(GL_TEXTURE_2D, edge_texture_b); |
| 437 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); |
| 438 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); |
| 439 |
| 440 glDrawArrays(GL_TRIANGLES, 0, 3); |
| 441 } |
| 442 |
| 443 // Using depth mask and [earlydepthstencil] to work on pixels with 2, 3, 4 |
| 444 // edges: |
| 445 // - First blend simple blur map for 2,3,4 edge pixels |
| 446 // - Then do the lines (line length counter -should- guarantee no overlap |
| 447 // with other pixels - pixels with 1 edge are excluded in the previous |
| 448 // pass and the pixels with 2 parallel edges are excluded in the simple |
| 449 // blur) |
| 450 |
| 451 // Inputs |
| 452 // g_screenTexture working_color_texture_ tex0 |
| 453 // g_src0TextureFlt edge_texture_a tex1 //ps |
| 454 // sampled |
| 455 // Outputs |
| 456 // g_resultTextureFlt4Slot1 dest_texture image1 |
| 457 // gl_FragDepth mini4_edge_texture_ fbo.depth |
| 458 { |
| 459 glUseProgram(process_and_apply_shader_); |
| 460 glUniform1f(0, 0.0f); |
| 461 glUniform2f(1, 1.0f / width_, 1.0f / height_); |
| 462 glDepthMask(GL_FALSE); |
| 463 glDepthFunc(GL_LESS); |
| 464 glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); |
| 465 |
| 466 if (!is_gles31_compatible_) { |
| 467 glUniform1i(process_and_apply_shader_result_texture_float4_slot1_, 1); |
| 468 } |
| 469 glBindImageTextureEXT(1, dest_texture, 0, GL_FALSE, 0, GL_WRITE_ONLY, |
| 470 GL_RGBA8); |
| 471 |
| 472 glActiveTexture(GL_TEXTURE0); |
| 473 glBindTexture(GL_TEXTURE_2D, working_color_texture_); |
| 474 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); |
| 475 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); |
| 476 |
| 477 glActiveTexture(GL_TEXTURE1); |
| 478 glBindTexture(GL_TEXTURE_2D, edge_texture_a); |
| 479 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); |
| 480 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); |
| 481 |
| 482 glDrawArrays(GL_TRIANGLES, 0, 3); |
| 483 } |
| 484 |
| 485 glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); |
| 486 glDisable(GL_DEPTH_TEST); |
| 487 glDepthMask(GL_FALSE); |
| 488 glActiveTexture(GL_TEXTURE0); |
| 489 } |
| 490 |
| 491 void ApplyFramebufferAttachmentCMAAINTELResourceManager::OnSize(GLint width, |
| 492 GLint height) { |
| 493 if (height_ == height && width_ == width) |
| 494 return; |
| 495 |
| 496 ReleaseTextures(); |
| 497 |
| 498 height_ = height; |
| 499 width_ = width; |
| 500 |
| 501 glGenFramebuffersEXT(1, ©_framebuffer_); |
| 502 glGenTextures(1, &rgba8_texture_); |
| 503 glBindTexture(GL_TEXTURE_2D, rgba8_texture_); |
| 504 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_RGBA8, width, height); |
| 505 |
| 506 // Edges texture - R8 |
| 507 // OpenGLES has no single component 8/16-bit image support, so needs to be R32 |
| 508 // Although CHT does support R8. |
| 509 GLenum edge_format = supports_r8_image_ ? GL_R8 : GL_R32F; |
| 510 glGenTextures(1, &edges0_texture_); |
| 511 glBindTexture(GL_TEXTURE_2D, edges0_texture_); |
| 512 glTexStorage2DEXT(GL_TEXTURE_2D, 1, edge_format, width, height); |
| 513 |
| 514 glGenTextures(1, &edges1_texture_); |
| 515 glBindTexture(GL_TEXTURE_2D, edges1_texture_); |
| 516 glTexStorage2DEXT(GL_TEXTURE_2D, 1, edge_format, width, height); |
| 517 |
| 518 // Color working texture - RGBA8 |
| 519 glGenTextures(1, &working_color_texture_); |
| 520 glBindTexture(GL_TEXTURE_2D, working_color_texture_); |
| 521 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_RGBA8, width, height); |
| 522 |
| 523 // Half*half compressed 4-edge-per-pixel texture - RGBA8 |
| 524 glGenTextures(1, &mini4_edge_texture_); |
| 525 glBindTexture(GL_TEXTURE_2D, mini4_edge_texture_); |
| 526 GLenum format = GL_RGBA8UI; |
| 527 if (!supports_usampler_) { |
| 528 format = GL_RGBA8; |
| 529 } |
| 530 glTexStorage2DEXT(GL_TEXTURE_2D, 1, format, (width + 1) / 2, |
| 531 (height + 1) / 2); |
| 532 |
| 533 // Depth |
| 534 glGenTextures(1, &mini4_edge_depth_texture_); |
| 535 glBindTexture(GL_TEXTURE_2D, mini4_edge_depth_texture_); |
| 536 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_DEPTH_COMPONENT16, (width + 1) / 2, |
| 537 (height + 1) / 2); |
| 538 |
| 539 // Create the FBO |
| 540 glGenFramebuffersEXT(1, &cmaa_framebuffer_); |
| 541 glBindFramebufferEXT(GL_FRAMEBUFFER, cmaa_framebuffer_); |
| 542 |
| 543 // We need to clear the textures before they are first used. |
| 544 // The algorithm self-clears them later. |
| 545 glViewport(0, 0, width_, height_); |
| 546 glClearColor(0.0f, 0.0f, 0.0f, 0.0f); |
| 547 |
| 548 glBindFramebufferEXT(GL_FRAMEBUFFER, cmaa_framebuffer_); |
| 549 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, |
| 550 edges0_texture_, 0); |
| 551 glClear(GL_COLOR_BUFFER_BIT); |
| 552 |
| 553 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, |
| 554 edges1_texture_, 0); |
| 555 glClear(GL_COLOR_BUFFER_BIT); |
| 556 |
| 557 textures_initialized_ = true; |
| 558 } |
| 559 |
| 560 void ApplyFramebufferAttachmentCMAAINTELResourceManager::ReleaseTextures() { |
| 561 if (textures_initialized_) { |
| 562 glDeleteFramebuffersEXT(1, ©_framebuffer_); |
| 563 glDeleteFramebuffersEXT(1, &cmaa_framebuffer_); |
| 564 glDeleteTextures(1, &rgba8_texture_); |
| 565 glDeleteTextures(1, &edges0_texture_); |
| 566 glDeleteTextures(1, &edges1_texture_); |
| 567 glDeleteTextures(1, &mini4_edge_texture_); |
| 568 glDeleteTextures(1, &mini4_edge_depth_texture_); |
| 569 glDeleteTextures(1, &working_color_texture_); |
| 570 } |
| 571 textures_initialized_ = false; |
| 572 } |
| 573 |
| 574 void ApplyFramebufferAttachmentCMAAINTELResourceManager::CopyTexture( |
| 575 GLint source, |
| 576 GLint dest, |
| 577 bool via_fbo) { |
| 578 glViewport(0, 0, width_, height_); |
| 579 glActiveTexture(GL_TEXTURE0); |
| 580 glBindTexture(GL_TEXTURE_2D, source); |
| 581 |
| 582 if (!via_fbo) { |
| 583 glUseProgram(copy_to_image_shader_); |
| 584 if (!is_gles31_compatible_) { |
| 585 glUniform1i(copy_to_image_shader_outTexture_, 0); |
| 586 } |
| 587 glBindImageTextureEXT(0, dest, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8); |
| 588 } else { |
| 589 glDisable(GL_DEPTH_TEST); |
| 590 glDisable(GL_STENCIL_TEST); |
| 591 glDisable(GL_CULL_FACE); |
| 592 glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); |
| 593 glDepthMask(GL_FALSE); |
| 594 glDisable(GL_BLEND); |
| 595 glUseProgram(copy_to_framebuffer_shader_); |
| 596 } |
| 597 |
| 598 glDrawArrays(GL_TRIANGLES, 0, 3); |
| 599 glUseProgram(0); |
| 600 glBindTexture(GL_TEXTURE_2D, 0); |
| 601 } |
| 602 |
| 603 GLuint ApplyFramebufferAttachmentCMAAINTELResourceManager::CreateProgram( |
| 604 const char* defines, |
| 605 const char* vs_source, |
| 606 const char* fs_source) { |
| 607 GLuint program = glCreateProgram(); |
| 608 |
| 609 GLuint vs = CreateShader(GL_VERTEX_SHADER, defines, vs_source); |
| 610 GLuint fs = CreateShader(GL_FRAGMENT_SHADER, defines, fs_source); |
| 611 |
| 612 glAttachShader(program, vs); |
| 613 glDeleteShader(vs); |
| 614 glAttachShader(program, fs); |
| 615 glDeleteShader(fs); |
| 616 |
| 617 glLinkProgram(program); |
| 618 GLint link_status; |
| 619 glGetProgramiv(program, GL_LINK_STATUS, &link_status); |
| 620 |
| 621 if (link_status == 0) { |
| 622 #if DCHECK_IS_ON() |
| 623 GLint info_log_length; |
| 624 glGetProgramiv(program, GL_INFO_LOG_LENGTH, &info_log_length); |
| 625 std::vector<GLchar> info_log(info_log_length); |
| 626 glGetProgramInfoLog(program, static_cast<GLsizei>(info_log.size()), NULL, |
| 627 &info_log[0]); |
| 628 DLOG(ERROR) << "ApplyFramebufferAttachmentCMAAINTEL: " |
| 629 << "program link failed: " << &info_log[0]; |
| 630 #endif |
| 631 glDeleteProgram(program); |
| 632 program = 0; |
| 633 } |
| 634 |
| 635 return program; |
| 636 } |
| 637 |
| 638 GLuint ApplyFramebufferAttachmentCMAAINTELResourceManager::CreateShader( |
| 639 GLenum type, |
| 640 const char* defines, |
| 641 const char* source) { |
| 642 GLuint shader = glCreateShader(type); |
| 643 |
| 644 const char header_es31[] = |
| 645 "#version 310 es \n"; |
| 646 const char header_gl30[] = |
| 647 "#version 130 \n" |
| 648 "#extension GL_ARB_shading_language_420pack : require \n" |
| 649 "#extension GL_ARB_texture_gather : require \n" |
| 650 "#extension GL_ARB_explicit_uniform_location : require \n" |
| 651 "#extension GL_ARB_explicit_attrib_location : require \n" |
| 652 "#extension GL_ARB_shader_image_load_store : require \n"; |
| 653 |
| 654 const char* header = NULL; |
| 655 if (is_gles31_compatible_) { |
| 656 header = header_es31; |
| 657 } else { |
| 658 header = header_gl30; |
| 659 } |
| 660 |
| 661 const char* source_array[4] = {header, defines, "\n", source}; |
| 662 glShaderSource(shader, 4, source_array, NULL); |
| 663 |
| 664 glCompileShader(shader); |
| 665 |
| 666 GLint compile_result; |
| 667 glGetShaderiv(shader, GL_COMPILE_STATUS, &compile_result); |
| 668 if (compile_result == 0) { |
| 669 #if DCHECK_IS_ON() |
| 670 GLint info_log_length; |
| 671 glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &info_log_length); |
| 672 std::vector<GLchar> info_log(info_log_length); |
| 673 glGetShaderInfoLog(shader, static_cast<GLsizei>(info_log.size()), NULL, |
| 674 &info_log[0]); |
| 675 DLOG(ERROR) << "ApplyFramebufferAttachmentCMAAINTEL: " |
| 676 << "shader compilation failed: " |
| 677 << (type == GL_VERTEX_SHADER |
| 678 ? "GL_VERTEX_SHADER" |
| 679 : (type == GL_FRAGMENT_SHADER ? "GL_FRAGMENT_SHADER" |
| 680 : "UNKNOWN_SHADER")) |
| 681 << " shader compilation failed: " << &info_log[0]; |
| 682 #endif |
| 683 glDeleteShader(shader); |
| 684 shader = 0; |
| 685 } |
| 686 |
| 687 return shader; |
| 688 } |
| 689 |
| 690 // Shaders used in the CMAA algorithm. |
| 691 const char ApplyFramebufferAttachmentCMAAINTELResourceManager::vert_str_[] = |
| 692 "precision highp float; \n" |
| 693 "layout(location = 0) uniform float g_Depth; \n" |
| 694 "// No input data. \n" |
| 695 "// Verts are autogenerated. \n" |
| 696 "// \n" |
| 697 "// vertexID 0,1,2 should generate \n" |
| 698 "// POS: (-1,-1), (+3,-1), (-1,+3) \n" |
| 699 "// \n" |
| 700 "// This generates a triangle that completely covers the -1->1 viewport \n" |
| 701 "// \n" |
| 702 "void main() \n" |
| 703 "{ \n" |
| 704 " float x = -1.0 + float((gl_VertexID & 1) << 2); \n" |
| 705 " float y = -1.0 + float((gl_VertexID & 2) << 1); \n" |
| 706 " gl_Position = vec4(x, y, g_Depth, 1.0); \n" |
| 707 "} \n" |
| 708 " \n"; |
| 709 |
| 710 const char ApplyFramebufferAttachmentCMAAINTELResourceManager::cmaa_frag_s1_[] = |
| 711 "precision highp float; \n" |
| 712 "precision highp int; \n" |
| 713 " \n" |
| 714 "#define SETTINGS_ALLOW_SHORT_Zs 1 \n" |
| 715 "#define EDGE_DETECT_THRESHOLD 13.0f \n" |
| 716 " \n" |
| 717 "#define saturate(x) clamp((x), 0.0, 1.0) \n" |
| 718 " \n" |
| 719 "// bind to location 0 \n" |
| 720 "layout(location = 0) uniform float g_Depth; \n" |
| 721 "// bind to a uniform buffer bind point 0 \n" |
| 722 "layout(location = 1) uniform vec2 g_OneOverScreenSize; \n" |
| 723 "#ifndef EDGE_DETECT_THRESHOLD \n" |
| 724 "layout(location = 2) uniform float g_ColorThreshold; \n" |
| 725 "#endif \n" |
| 726 " \n" |
| 727 "#ifdef SUPPORTS_USAMPLER2D \n" |
| 728 "#define USAMPLER usampler2D \n" |
| 729 "#define UVEC4 uvec4 \n" |
| 730 "#define LOAD_UINT(arg) arg \n" |
| 731 "#define STORE_UVEC4(arg) arg \n" |
| 732 "#else \n" |
| 733 "#define USAMPLER sampler2D \n" |
| 734 "#define UVEC4 vec4 \n" |
| 735 "#define LOAD_UINT(arg) uint(arg * 255.0f) \n" |
| 736 "#define STORE_UVEC4(arg) vec4(float(arg.x) / 255.0f, \n" |
| 737 " float(arg.y) / 255.0f, \n" |
| 738 " float(arg.z) / 255.0f, \n" |
| 739 " float(arg.w) / 255.0f) \n" |
| 740 "#endif \n" |
| 741 " \n" |
| 742 "// bind to texture stage 0/1 \n" |
| 743 "layout(binding = 0) uniform highp sampler2D g_screenTexture; \n" |
| 744 "layout(binding = 1) uniform highp sampler2D g_src0TextureFlt; \n" |
| 745 "layout(binding = 1) uniform highp USAMPLER g_src0Texture4Uint; \n" |
| 746 " \n" |
| 747 "// bind to image stage 0/1/2 \n" |
| 748 "#ifdef GL_ES \n" |
| 749 "layout(binding = 0, EDGE_READ_FORMAT) restrict writeonly uniform highp \n" |
| 750 " image2D g_resultTexture; \n" |
| 751 "layout(binding = 1, rgba8) restrict writeonly uniform highp \n" |
| 752 " image2D g_resultTextureFlt4Slot1; \n" |
| 753 "layout(binding = 2, EDGE_READ_FORMAT) restrict writeonly uniform highp \n" |
| 754 " image2D g_resultTextureSlot2; \n" |
| 755 "#else \n" |
| 756 "layout(EDGE_READ_FORMAT) restrict writeonly uniform highp \n" |
| 757 " image2D g_resultTexture; \n" |
| 758 "layout(rgba8) restrict writeonly uniform highp \n" |
| 759 " image2D g_resultTextureFlt4Slot1; \n" |
| 760 "layout(EDGE_READ_FORMAT) restrict writeonly uniform highp \n" |
| 761 " image2D g_resultTextureSlot2; \n" |
| 762 "#endif \n" |
| 763 " \n" |
| 764 "// Constants \n" |
| 765 "const vec4 c_lumWeights = vec4(0.2126f, 0.7152f, 0.0722f, 0.0000f); \n" |
| 766 " \n" |
| 767 "#ifdef EDGE_DETECT_THRESHOLD \n" |
| 768 "const float c_ColorThreshold = 1.0f / EDGE_DETECT_THRESHOLD; \n" |
| 769 "#endif \n" |
| 770 " \n" |
| 771 "// Must be even number; Will work with ~16 pretty good too for \n" |
| 772 "// additional performance, or with ~64 for highest quality. \n" |
| 773 "const int c_maxLineLength = 64; \n" |
| 774 " \n" |
| 775 "const vec4 c_edgeDebugColours[5] = vec4[5](vec4(0.5, 0.5, 0.5, 0.4), \n" |
| 776 " vec4(1.0, 0.1, 1.0, 0.8), \n" |
| 777 " vec4(0.9, 0.0, 0.0, 0.8), \n" |
| 778 " vec4(0.0, 0.9, 0.0, 0.8), \n" |
| 779 " vec4(0.0, 0.0, 0.9, 0.8)); \n" |
| 780 " \n" |
| 781 "// this isn't needed if colour UAV is _SRGB but that doesn't work \n" |
| 782 "// everywhere \n" |
| 783 "#ifdef IN_GAMMA_CORRECT_MODE \n" |
| 784 "///////////////////////////////////////////////////////////////////////\n" |
| 785 "// \n" |
| 786 "// SRGB Helper Functions taken from D3DX_DXGIFormatConvert.inl \n" |
| 787 "float D3DX_FLOAT_to_SRGB(float val) { \n" |
| 788 " if (val < 0.0031308f) \n" |
| 789 " val *= 12.92f; \n" |
| 790 " else { \n" |
| 791 " val = 1.055f * pow(val, 1.0f / 2.4f) - 0.055f; \n" |
| 792 " } \n" |
| 793 " return val; \n" |
| 794 "} \n" |
| 795 "// \n" |
| 796 "vec3 D3DX_FLOAT3_to_SRGB(vec3 val) { \n" |
| 797 " vec3 outVal; \n" |
| 798 " outVal.x = D3DX_FLOAT_to_SRGB(val.x); \n" |
| 799 " outVal.y = D3DX_FLOAT_to_SRGB(val.y); \n" |
| 800 " outVal.z = D3DX_FLOAT_to_SRGB(val.z); \n" |
| 801 " return outVal; \n" |
| 802 "} \n" |
| 803 "// \n" |
| 804 "///////////////////////////////////////////////////////////////////////\n" |
| 805 "#endif // IN_GAMMA_CORRECT_MODE \n" |
| 806 " \n" |
| 807 "// how .rgba channels from the edge texture maps to pixel edges: \n" |
| 808 "// \n" |
| 809 "// A - 0x08 \n" |
| 810 "// |¯¯¯¯¯¯¯¯¯| \n" |
| 811 "// | | \n" |
| 812 "// 0x04 - B | pixel | R - 0x01 \n" |
| 813 "// | | \n" |
| 814 "// |_________| \n" |
| 815 "// G - 0x02 \n" |
| 816 "// \n" |
| 817 "// (A - there's an edge between us and a pixel above us) \n" |
| 818 "// (R - there's an edge between us and a pixel to the right) \n" |
| 819 "// (G - there's an edge between us and a pixel at the bottom) \n" |
| 820 "// (B - there's an edge between us and a pixel to the left) \n" |
| 821 " \n" |
| 822 "// Expecting values of 1 and 0 only! \n" |
| 823 "uint PackEdge(uvec4 edges) { \n" |
| 824 " return (edges.x << 0u) | (edges.y << 1u) | (edges.z << 2u) | \n" |
| 825 " (edges.w << 3u); \n" |
| 826 "} \n" |
| 827 " \n" |
| 828 "uvec4 UnpackEdge(uint value) { \n" |
| 829 " uvec4 ret; \n" |
| 830 " ret.x = (value & 0x01u) != 0u ? 1u : 0u; \n" |
| 831 " ret.y = (value & 0x02u) != 0u ? 1u : 0u; \n" |
| 832 " ret.z = (value & 0x04u) != 0u ? 1u : 0u; \n" |
| 833 " ret.w = (value & 0x08u) != 0u ? 1u : 0u; \n" |
| 834 " return ret; \n" |
| 835 "} \n" |
| 836 " \n" |
| 837 "uint PackZ(const uvec2 screenPos, const bool invertedZShape) { \n" |
| 838 " uint retVal = screenPos.x | (screenPos.y << 15u); \n" |
| 839 " if (invertedZShape) \n" |
| 840 " retVal |= (1u << 30u); \n" |
| 841 " return retVal; \n" |
| 842 "} \n" |
| 843 " \n" |
| 844 "void UnpackZ(uint packedZ, out uvec2 screenPos, \n" |
| 845 " out bool invertedZShape) \n" |
| 846 "{ \n" |
| 847 " screenPos.x = packedZ & 0x7FFFu; \n" |
| 848 " screenPos.y = (packedZ >> 15u) & 0x7FFFu; \n" |
| 849 " invertedZShape = (packedZ >> 30u) == 1u; \n" |
| 850 "} \n" |
| 851 " \n" |
| 852 "uint PackZ(const uvec2 screenPos, \n" |
| 853 " const bool invertedZShape, \n" |
| 854 " const bool horizontal) { \n" |
| 855 " uint retVal = screenPos.x | (screenPos.y << 15u); \n" |
| 856 " if (invertedZShape) \n" |
| 857 " retVal |= (1u << 30u); \n" |
| 858 " if (horizontal) \n" |
| 859 " retVal |= (1u << 31u); \n" |
| 860 " return retVal; \n" |
| 861 "} \n" |
| 862 " \n" |
| 863 "void UnpackZ(uint packedZ, \n" |
| 864 " out uvec2 screenPos, \n" |
| 865 " out bool invertedZShape, \n" |
| 866 " out bool horizontal) { \n" |
| 867 " screenPos.x = packedZ & 0x7FFFu; \n" |
| 868 " screenPos.y = (packedZ >> 15u) & 0x7FFFu; \n" |
| 869 " invertedZShape = (packedZ & (1u << 30u)) != 0u; \n" |
| 870 " horizontal = (packedZ & (1u << 31u)) != 0u; \n" |
| 871 "} \n" |
| 872 " \n" |
| 873 "vec4 PackBlurAAInfo(ivec2 pixelPos, uint shapeType) { \n" |
| 874 " uint packedEdges = uint( \n" |
| 875 " texelFetch(g_src0TextureFlt, pixelPos, 0).r * 255.5); \n" |
| 876 " \n" |
| 877 " float retval = float(packedEdges + (shapeType << 4u)); \n" |
| 878 " \n" |
| 879 " return vec4(retval / 255.0); \n" |
| 880 "} \n" |
| 881 " \n" |
| 882 "void UnpackBlurAAInfo(float packedValue, out uint edges, \n" |
| 883 " out uint shapeType) { \n" |
| 884 " uint packedValueInt = uint(packedValue * 255.5); \n" |
| 885 " edges = packedValueInt & 0xFu; \n" |
| 886 " shapeType = packedValueInt >> 4u; \n" |
| 887 "} \n" |
| 888 " \n" |
| 889 "float EdgeDetectColorCalcDiff(vec3 colorA, vec3 colorB) { \n" |
| 890 "#ifdef IN_BGR_MODE \n" |
| 891 " vec3 LumWeights = c_lumWeights.bgr; \n" |
| 892 "#else \n" |
| 893 " vec3 LumWeights = c_lumWeights.rgb; \n" |
| 894 "#endif \n" |
| 895 " \n" |
| 896 " return dot(abs(colorA.rgb - colorB.rgb), LumWeights); \n" |
| 897 "} \n" |
| 898 " \n" |
| 899 "bool EdgeDetectColor(vec3 colorA, vec3 colorB) { \n" |
| 900 "#ifdef EDGE_DETECT_THRESHOLD \n" |
| 901 " return EdgeDetectColorCalcDiff(colorA, colorB) > c_ColorThreshold; \n" |
| 902 "#else \n" |
| 903 " return EdgeDetectColorCalcDiff(colorA, colorB) > g_ColorThreshold; \n" |
| 904 "#endif \n" |
| 905 "} \n" |
| 906 " \n" |
| 907 "void FindLineLength(out int lineLengthLeft, \n" |
| 908 " out int lineLengthRight, \n" |
| 909 " ivec2 screenPos, \n" |
| 910 " const bool horizontal, \n" |
| 911 " const bool invertedZShape, \n" |
| 912 " const ivec2 stepRight) { \n" |
| 913 " // TODO: there must be a cleaner and faster way to get to these - \n" |
| 914 " // a precalculated array indexing maybe? \n" |
| 915 " uint maskLeft, bitsContinueLeft, maskRight, bitsContinueRight; \n" |
| 916 " { \n" |
| 917 " // Horizontal (vertical is the same, just rotated 90º \n" |
| 918 " // counter-clockwise) \n" |
| 919 " // Inverted Z case: // Normal Z case: \n" |
| 920 " // __ // __ \n" |
| 921 " // X| // X| \n" |
| 922 " // -- // -- \n" |
| 923 " // \n" |
| 924 " uint maskTraceLeft, maskTraceRight; \n" |
| 925 " uint maskStopLeft, maskStopRight; \n" |
| 926 " if (horizontal) { \n" |
| 927 " if (invertedZShape) { \n" |
| 928 " maskTraceLeft = 0x02u; // tracing bottom edge \n" |
| 929 " maskTraceRight = 0x08u; // tracing top edge \n" |
| 930 " } else { \n" |
| 931 " maskTraceLeft = 0x08u; // tracing top edge \n" |
| 932 " maskTraceRight = 0x02u; // tracing bottom edge \n" |
| 933 " } \n" |
| 934 " maskStopLeft = 0x01u; // stop on right edge \n" |
| 935 " maskStopRight = 0x04u; // stop on left edge \n" |
| 936 " } else { \n" |
| 937 " if (invertedZShape) { \n" |
| 938 " maskTraceLeft = 0x01u; // tracing right edge \n" |
| 939 " maskTraceRight = 0x04u; // tracing left edge \n" |
| 940 " } else { \n" |
| 941 " maskTraceLeft = 0x04u; // tracing left edge \n" |
| 942 " maskTraceRight = 0x01u; // tracing right edge \n" |
| 943 " } \n" |
| 944 " maskStopLeft = 0x08u; // stop on top edge \n" |
| 945 " maskStopRight = 0x02u; // stop on bottom edge \n" |
| 946 " } \n" |
| 947 " \n" |
| 948 " maskLeft = maskTraceLeft | maskStopLeft; \n" |
| 949 " bitsContinueLeft = maskTraceLeft; \n" |
| 950 " maskRight = maskTraceRight | maskStopRight; \n" |
| 951 " bitsContinueRight = maskTraceRight; \n" |
| 952 " } \n" |
| 953 "///////////////////////////////////////////////////////////////////////\n" |
| 954 " \n" |
| 955 "#ifdef SETTINGS_ALLOW_SHORT_Zs \n" |
| 956 " int i = 1; \n" |
| 957 "#else \n" |
| 958 " int i = 2; // starting from 2 because we already know it's at least 2\n" |
| 959 "#endif \n" |
| 960 " for (; i < c_maxLineLength; i++) { \n" |
| 961 " uint edgeLeft = uint( \n" |
| 962 " texelFetch(g_src0TextureFlt, \n" |
| 963 " ivec2(screenPos.xy - stepRight * i), 0).r * 255.5); \n" |
| 964 " uint edgeRight = uint( \n" |
| 965 " texelFetch(g_src0TextureFlt, \n" |
| 966 " ivec2(screenPos.xy + stepRight * (i + 1)), \n" |
| 967 " 0).r * 255.5); \n" |
| 968 " \n" |
| 969 " // stop on encountering 'stopping' edge (as defined by masks) \n" |
| 970 " int stopLeft = (edgeLeft & maskLeft) != bitsContinueLeft ? 1 : 0; \n" |
| 971 " int stopRight = \n" |
| 972 " (edgeRight & maskRight) != bitsContinueRight ? 1 : 0; \n" |
| 973 " \n" |
| 974 " if (bool(stopLeft) || bool(stopRight)) { \n" |
| 975 " lineLengthLeft = 1 + i - stopLeft; \n" |
| 976 " lineLengthRight = 1 + i - stopRight; \n" |
| 977 " return; \n" |
| 978 " } \n" |
| 979 " } \n" |
| 980 " lineLengthLeft = lineLengthRight = i; \n" |
| 981 " return; \n" |
| 982 "} \n" |
| 983 " \n" |
| 984 "void ProcessDetectedZ(ivec2 screenPos, bool horizontal, \n" |
| 985 " bool invertedZShape) { \n" |
| 986 " int lineLengthLeft, lineLengthRight; \n" |
| 987 " \n" |
| 988 " ivec2 stepRight = (horizontal) ? (ivec2(1, 0)) : (ivec2(0, -1)); \n" |
| 989 " vec2 blendDir = (horizontal) ? (vec2(0, -1)) : (vec2(-1, 0)); \n" |
| 990 " \n" |
| 991 " FindLineLength(lineLengthLeft, lineLengthRight, screenPos, \n" |
| 992 " horizontal, invertedZShape, stepRight); \n" |
| 993 " \n" |
| 994 " vec2 pixelSize = g_OneOverScreenSize; \n" |
| 995 " \n" |
| 996 " float leftOdd = 0.15 * float(lineLengthLeft % 2); \n" |
| 997 " float rightOdd = 0.15 * float(lineLengthRight % 2); \n" |
| 998 " \n" |
| 999 " int loopFrom = -int((lineLengthLeft + 1) / 2) + 1; \n" |
| 1000 " int loopTo = int((lineLengthRight + 1) / 2); \n" |
| 1001 " \n" |
| 1002 " float totalLength = float(loopTo - loopFrom) + 1.0 - leftOdd - \n" |
| 1003 " rightOdd; \n" |
| 1004 " \n" |
| 1005 " for (int i = loopFrom; i <= loopTo; i++) { \n" |
| 1006 " highp ivec2 pixelPos = screenPos + stepRight * i; \n" |
| 1007 " vec2 pixelPosFlt = vec2(float(pixelPos.x) + 0.5, \n" |
| 1008 " float(pixelPos.y) + 0.5); \n" |
| 1009 " \n" |
| 1010 "#ifdef DEBUG_OUTPUT_AAINFO \n" |
| 1011 " imageStore(g_resultTextureSlot2, pixelPos, \n" |
| 1012 " PackBlurAAInfo(pixelPos, 1u)); \n" |
| 1013 "#endif \n" |
| 1014 " \n" |
| 1015 " float m = (float(i) + 0.5 - leftOdd - float(loopFrom)) / \n" |
| 1016 " totalLength; \n" |
| 1017 " m = saturate(m); \n" |
| 1018 " float k = m - ((i > 0) ? 1.0 : 0.0); \n" |
| 1019 " k = (invertedZShape) ? (-k) : (k); \n" |
| 1020 " \n" |
| 1021 " vec4 color = textureLod(g_screenTexture, \n" |
| 1022 " (pixelPosFlt + blendDir * k) * pixelSize, \n" |
| 1023 " 0.0); \n" |
| 1024 " \n" |
| 1025 "#ifdef IN_GAMMA_CORRECT_MODE \n" |
| 1026 " color.rgb = D3DX_FLOAT3_to_SRGB(color.rgb); \n" |
| 1027 "#endif \n" |
| 1028 " imageStore(g_resultTextureFlt4Slot1, pixelPos, color); \n" |
| 1029 " } \n" |
| 1030 "} \n" |
| 1031 " \n" |
| 1032 "vec4 CalcDbgDisplayColor(const vec4 blurMap) { \n" |
| 1033 " vec3 pixelC = vec3(0.0, 0.0, 0.0); \n" |
| 1034 " vec3 pixelL = vec3(0.0, 0.0, 1.0); \n" |
| 1035 " vec3 pixelT = vec3(1.0, 0.0, 0.0); \n" |
| 1036 " vec3 pixelR = vec3(0.0, 1.0, 0.0); \n" |
| 1037 " vec3 pixelB = vec3(0.8, 0.8, 0.0); \n" |
| 1038 " \n" |
| 1039 " const float centerWeight = 1.0; \n" |
| 1040 " float fromBelowWeight = (1.0 / (1.0 - blurMap.x)) - 1.0; \n" |
| 1041 " float fromAboveWeight = (1.0 / (1.0 - blurMap.y)) - 1.0; \n" |
| 1042 " float fromRightWeight = (1.0 / (1.0 - blurMap.z)) - 1.0; \n" |
| 1043 " float fromLeftWeight = (1.0 / (1.0 - blurMap.w)) - 1.0; \n" |
| 1044 " \n" |
| 1045 " float weightSum = centerWeight + dot(vec4(fromBelowWeight, \n" |
| 1046 " fromAboveWeight, \n" |
| 1047 " fromRightWeight, \n" |
| 1048 " fromLeftWeight), \n" |
| 1049 " vec4(1, 1, 1, 1)); \n" |
| 1050 " \n" |
| 1051 " vec4 pixel; \n" |
| 1052 " \n" |
| 1053 " pixel.rgb = pixelC.rgb + fromAboveWeight * pixelT + \n" |
| 1054 " fromBelowWeight * pixelB + \n" |
| 1055 " fromLeftWeight * pixelL + \n" |
| 1056 " fromRightWeight * pixelR; \n" |
| 1057 " pixel.rgb /= weightSum; \n" |
| 1058 " \n" |
| 1059 " pixel.a = dot(pixel.rgb, vec3(1, 1, 1)) * 100.0; \n" |
| 1060 " \n" |
| 1061 " return saturate(pixel); \n" |
| 1062 "} \n" |
| 1063 " \n" |
| 1064 "#ifdef DETECT_EDGES1 \n" |
| 1065 "layout(location = 0) out UVEC4 outEdges; \n" |
| 1066 "void DetectEdges1() { \n" |
| 1067 " uvec4 outputEdges; \n" |
| 1068 " ivec2 screenPosI = ivec2(gl_FragCoord.xy) * ivec2(2, 2); \n" |
| 1069 " \n" |
| 1070 " // .rgb contains colour, .a contains flag whether to output it to \n" |
| 1071 " // working colour texture \n" |
| 1072 " vec4 pixel00 = texelFetch(g_screenTexture, screenPosI.xy, 0); \n" |
| 1073 " vec4 pixel10 = \n" |
| 1074 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(1, 0));\n" |
| 1075 " vec4 pixel20 = \n" |
| 1076 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(2, 0));\n" |
| 1077 " vec4 pixel01 = \n" |
| 1078 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(0, 1));\n" |
| 1079 " vec4 pixel11 = \n" |
| 1080 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(1, 1));\n" |
| 1081 " vec4 pixel21 = \n" |
| 1082 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(2, 1));\n" |
| 1083 " vec4 pixel02 = \n" |
| 1084 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(0, 2));\n" |
| 1085 " vec4 pixel12 = \n" |
| 1086 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(1, 2));\n" |
| 1087 " \n" |
| 1088 " float storeFlagPixel00 = 0.0; \n" |
| 1089 " float storeFlagPixel10 = 0.0; \n" |
| 1090 " float storeFlagPixel20 = 0.0; \n" |
| 1091 " float storeFlagPixel01 = 0.0; \n" |
| 1092 " float storeFlagPixel11 = 0.0; \n" |
| 1093 " float storeFlagPixel21 = 0.0; \n" |
| 1094 " float storeFlagPixel02 = 0.0; \n" |
| 1095 " float storeFlagPixel12 = 0.0; \n" |
| 1096 " \n" |
| 1097 " vec2 et; \n" |
| 1098 " \n" |
| 1099 "#ifdef EDGE_DETECT_THRESHOLD \n" |
| 1100 " float threshold = c_ColorThreshold; \n" |
| 1101 "#else \n" |
| 1102 " float threshold = g_ColorThreshold; \n" |
| 1103 "#endif \n" |
| 1104 " \n" |
| 1105 " { \n" |
| 1106 " et.x = EdgeDetectColorCalcDiff(pixel00.rgb, pixel10.rgb); \n" |
| 1107 " et.y = EdgeDetectColorCalcDiff(pixel00.rgb, pixel01.rgb); \n" |
| 1108 " et = saturate(et - threshold); \n" |
| 1109 " ivec2 eti = ivec2(et * 15.0 + 0.99); \n" |
| 1110 " outputEdges.x = uint(eti.x | (eti.y << 4)); \n" |
| 1111 " \n" |
| 1112 " storeFlagPixel00 += et.x; \n" |
| 1113 " storeFlagPixel00 += et.y; \n" |
| 1114 " storeFlagPixel10 += et.x; \n" |
| 1115 " storeFlagPixel01 += et.y; \n" |
| 1116 " } \n" |
| 1117 " \n" |
| 1118 " { \n" |
| 1119 " et.x = EdgeDetectColorCalcDiff(pixel10.rgb, pixel20.rgb); \n" |
| 1120 " et.y = EdgeDetectColorCalcDiff(pixel10.rgb, pixel11.rgb); \n" |
| 1121 " et = saturate(et - threshold); \n" |
| 1122 " ivec2 eti = ivec2(et * 15.0 + 0.99); \n" |
| 1123 " outputEdges.y = uint(eti.x | (eti.y << 4)); \n" |
| 1124 " \n" |
| 1125 " storeFlagPixel10 += et.x; \n" |
| 1126 " storeFlagPixel10 += et.y; \n" |
| 1127 " storeFlagPixel20 += et.x; \n" |
| 1128 " storeFlagPixel11 += et.y; \n" |
| 1129 " } \n" |
| 1130 " \n" |
| 1131 " { \n" |
| 1132 " et.x = EdgeDetectColorCalcDiff(pixel01.rgb, pixel11.rgb); \n" |
| 1133 " et.y = EdgeDetectColorCalcDiff(pixel01.rgb, pixel02.rgb); \n" |
| 1134 " et = saturate(et - threshold); \n" |
| 1135 " ivec2 eti = ivec2(et * 15.0 + 0.99); \n" |
| 1136 " outputEdges.z = uint(eti.x | (eti.y << 4)); \n" |
| 1137 " \n" |
| 1138 " storeFlagPixel01 += et.x; \n" |
| 1139 " storeFlagPixel01 += et.y; \n" |
| 1140 " storeFlagPixel11 += et.x; \n" |
| 1141 " storeFlagPixel02 += et.y; \n" |
| 1142 " } \n" |
| 1143 " \n" |
| 1144 " { \n" |
| 1145 " et.x = EdgeDetectColorCalcDiff(pixel11.rgb, pixel21.rgb); \n" |
| 1146 " et.y = EdgeDetectColorCalcDiff(pixel11.rgb, pixel12.rgb); \n" |
| 1147 " et = saturate(et - threshold); \n" |
| 1148 " ivec2 eti = ivec2(et * 15.0 + 0.99); \n" |
| 1149 " outputEdges.w = uint(eti.x | (eti.y << 4)); \n" |
| 1150 " \n" |
| 1151 " storeFlagPixel11 += et.x; \n" |
| 1152 " storeFlagPixel11 += et.y; \n" |
| 1153 " storeFlagPixel21 += et.x; \n" |
| 1154 " storeFlagPixel12 += et.y; \n" |
| 1155 " } \n" |
| 1156 " \n" |
| 1157 " gl_FragDepth = any(bvec4(outputEdges)) ? 1.0 : 0.0; \n" |
| 1158 " \n" |
| 1159 " if (gl_FragDepth != 0.0) { \n" |
| 1160 " if (storeFlagPixel00 != 0.0) \n" |
| 1161 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(0, 0),\n" |
| 1162 " pixel00); \n" |
| 1163 " if (storeFlagPixel10 != 0.0) \n" |
| 1164 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(1, 0),\n" |
| 1165 " pixel10); \n" |
| 1166 " if (storeFlagPixel20 != 0.0) \n" |
| 1167 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(2, 0),\n" |
| 1168 " pixel20); \n" |
| 1169 " if (storeFlagPixel01 != 0.0) \n" |
| 1170 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(0, 1),\n" |
| 1171 " pixel01); \n" |
| 1172 " if (storeFlagPixel02 != 0.0) \n" |
| 1173 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(0, 2),\n" |
| 1174 " pixel02); \n" |
| 1175 " if (storeFlagPixel11 != 0.0) \n" |
| 1176 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(1, 1),\n" |
| 1177 " pixel11); \n" |
| 1178 " if (storeFlagPixel21 != 0.0) \n" |
| 1179 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(2, 1),\n" |
| 1180 " pixel21); \n" |
| 1181 " if (storeFlagPixel12 != 0.0) \n" |
| 1182 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(1, 2),\n" |
| 1183 " pixel12); \n" |
| 1184 " } \n" |
| 1185 " outEdges = STORE_UVEC4(outputEdges); \n" |
| 1186 "} \n" |
| 1187 "#endif // DETECT_EDGES1 \n" |
| 1188 " \n" |
| 1189 "vec2 UnpackThresholds(uint val) { \n" |
| 1190 " return vec2(val & 0x0Fu, val >> 4u) / 15.0f; \n" |
| 1191 "} \n" |
| 1192 " \n" |
| 1193 "uint PruneNonDominantEdges(vec4 edges[3]) { \n" |
| 1194 " vec4 maxE4 = vec4(0.0, 0.0, 0.0, 0.0); \n" |
| 1195 " \n" |
| 1196 " float avg = 0.0; \n" |
| 1197 " \n" |
| 1198 " for (int i = 0; i < 3; i++) { \n" |
| 1199 " maxE4 = max(maxE4, edges[i]); \n" |
| 1200 " \n" |
| 1201 " avg = dot(edges[i], vec4(1, 1, 1, 1) / (3.0 * 4.0)); \n" |
| 1202 " } \n" |
| 1203 " \n" |
| 1204 " vec2 maxE2 = max(maxE4.xy, maxE4.zw); \n" |
| 1205 " float maxE = max(maxE2.x, maxE2.y); \n" |
| 1206 " \n" |
| 1207 " float threshold = avg * 0.65 + maxE * 0.35; \n" |
| 1208 " \n" |
| 1209 " // threshold = 0.0001; // this disables non-dominant edge pruning! \n" |
| 1210 " \n" |
| 1211 " uint cx = edges[0].x >= threshold ? 1u : 0u; \n" |
| 1212 " uint cy = edges[0].y >= threshold ? 1u : 0u; \n" |
| 1213 " return PackEdge(uvec4(cx, cy, 0, 0)); \n" |
| 1214 "} \n" |
| 1215 " \n" |
| 1216 "void CollectEdges(int offX, \n" |
| 1217 " int offY, \n" |
| 1218 " out vec4 edges[3], \n" |
| 1219 " const uint packedVals[6 * 6]) { \n" |
| 1220 " vec2 pixelP0P0 = UnpackThresholds(packedVals[(offX)*6+(offY)]); \n" |
| 1221 " vec2 pixelP1P0 = UnpackThresholds(packedVals[(offX+1)*6+(offY)]); \n" |
| 1222 " vec2 pixelP0P1 = UnpackThresholds(packedVals[(offX)*6+(offY+1)]); \n" |
| 1223 " vec2 pixelM1P0 = UnpackThresholds(packedVals[(offX-1)*6 +(offY)]); \n" |
| 1224 " vec2 pixelP0M1 = UnpackThresholds(packedVals[(offX)*6+(offY-1)]); \n" |
| 1225 " vec2 pixelP1M1 = UnpackThresholds(packedVals[(offX+1)*6 +(offY-1)]); \n" |
| 1226 " vec2 pixelM1P1 = UnpackThresholds(packedVals[(offX-1)*6+(offY+1)]); \n" |
| 1227 " \n" |
| 1228 " edges[0].x = pixelP0P0.x; \n" |
| 1229 " edges[0].y = pixelP0P0.y; \n" |
| 1230 " edges[0].z = pixelP1P0.x; \n" |
| 1231 " edges[0].w = pixelP1P0.y; \n" |
| 1232 " edges[1].x = pixelP0P1.x; \n" |
| 1233 " edges[1].y = pixelP0P1.y; \n" |
| 1234 " edges[1].z = pixelM1P0.x; \n" |
| 1235 " edges[1].w = pixelM1P0.y; \n" |
| 1236 " edges[2].x = pixelP0M1.x; \n" |
| 1237 " edges[2].y = pixelP0M1.y; \n" |
| 1238 " edges[2].z = pixelP1M1.y; \n" |
| 1239 " edges[2].w = pixelM1P1.x; \n" |
| 1240 "} \n" |
| 1241 " \n" |
| 1242 "#ifdef DETECT_EDGES2 \n" |
| 1243 "layout(early_fragment_tests) in; \n" |
| 1244 "void DetectEdges2() { \n" |
| 1245 " ivec2 screenPosI = ivec2(gl_FragCoord.xy); \n" |
| 1246 " \n" |
| 1247 " // source : edge differences from previous pass \n" |
| 1248 " uint packedVals[6 * 6]; \n" |
| 1249 " \n" |
| 1250 " // center pixel (our output) \n" |
| 1251 " UVEC4 packedQ4 = texelFetch(g_src0Texture4Uint, screenPosI.xy, 0); \n" |
| 1252 " packedVals[(2) * 6 + (2)] = LOAD_UINT(packedQ4.x); \n" |
| 1253 " packedVals[(3) * 6 + (2)] = LOAD_UINT(packedQ4.y); \n" |
| 1254 " packedVals[(2) * 6 + (3)] = LOAD_UINT(packedQ4.z); \n" |
| 1255 " packedVals[(3) * 6 + (3)] = LOAD_UINT(packedQ4.w); \n" |
| 1256 " \n" |
| 1257 " vec4 edges[3]; \n" |
| 1258 " if (bool(packedVals[(2) * 6 + (2)]) || \n" |
| 1259 " bool(packedVals[(3) * 6 + (2)])) { \n" |
| 1260 " UVEC4 packedQ1 = texelFetchOffset(g_src0Texture4Uint, \n" |
| 1261 " screenPosI.xy, 0, ivec2(0, -1)); \n" |
| 1262 " packedVals[(2) * 6 + (0)] = LOAD_UINT(packedQ1.x); \n" |
| 1263 " packedVals[(3) * 6 + (0)] = LOAD_UINT(packedQ1.y); \n" |
| 1264 " packedVals[(2) * 6 + (1)] = LOAD_UINT(packedQ1.z); \n" |
| 1265 " packedVals[(3) * 6 + (1)] = LOAD_UINT(packedQ1.w); \n" |
| 1266 " } \n" |
| 1267 " \n" |
| 1268 " if (bool(packedVals[(2) * 6 + (2)]) || \n" |
| 1269 " bool(packedVals[(2) * 6 + (3)])) { \n" |
| 1270 " UVEC4 packedQ3 = texelFetchOffset(g_src0Texture4Uint, \n" |
| 1271 " screenPosI.xy, 0, ivec2(-1, 0)); \n" |
| 1272 " packedVals[(0) * 6 + (2)] = LOAD_UINT(packedQ3.x); \n" |
| 1273 " packedVals[(1) * 6 + (2)] = LOAD_UINT(packedQ3.y); \n" |
| 1274 " packedVals[(0) * 6 + (3)] = LOAD_UINT(packedQ3.z); \n" |
| 1275 " packedVals[(1) * 6 + (3)] = LOAD_UINT(packedQ3.w); \n" |
| 1276 " } \n" |
| 1277 " \n" |
| 1278 " if (bool(packedVals[(2) * 6 + (2)])) { \n" |
| 1279 " CollectEdges(2, 2, edges, packedVals); \n" |
| 1280 " uint pe = PruneNonDominantEdges(edges); \n" |
| 1281 " if (pe != 0u) { \n" |
| 1282 " imageStore(g_resultTexture, 2 * screenPosI.xy + ivec2(0, 0), \n" |
| 1283 " vec4(float(0x80u | pe) / 255.0, 0, 0, 0)); \n" |
| 1284 " } \n" |
| 1285 " } \n" |
| 1286 " \n" |
| 1287 " if (bool(packedVals[(3) * 6 + (2)]) || \n" |
| 1288 " bool(packedVals[(3) * 6 + (3)])) { \n" |
| 1289 " UVEC4 packedQ5 = texelFetchOffset(g_src0Texture4Uint, \n" |
| 1290 " screenPosI.xy, 0, ivec2(1, 0)); \n" |
| 1291 " packedVals[(4) * 6 + (2)] = LOAD_UINT(packedQ5.x); \n" |
| 1292 " packedVals[(5) * 6 + (2)] = LOAD_UINT(packedQ5.y); \n" |
| 1293 " packedVals[(4) * 6 + (3)] = LOAD_UINT(packedQ5.z); \n" |
| 1294 " packedVals[(5) * 6 + (3)] = LOAD_UINT(packedQ5.w); \n" |
| 1295 " } \n" |
| 1296 " \n" |
| 1297 " if (bool(packedVals[(3) * 6 + (2)])) { \n" |
| 1298 " UVEC4 packedQ2 = texelFetchOffset(g_src0Texture4Uint, \n" |
| 1299 " screenPosI.xy, 0, ivec2(1, -1)); \n" |
| 1300 " packedVals[(4) * 6 + (0)] = LOAD_UINT(packedQ2.x); \n" |
| 1301 " packedVals[(5) * 6 + (0)] = LOAD_UINT(packedQ2.y); \n" |
| 1302 " packedVals[(4) * 6 + (1)] = LOAD_UINT(packedQ2.z); \n" |
| 1303 " packedVals[(5) * 6 + (1)] = LOAD_UINT(packedQ2.w); \n" |
| 1304 " \n" |
| 1305 " CollectEdges(3, 2, edges, packedVals); \n" |
| 1306 " uint pe = PruneNonDominantEdges(edges); \n" |
| 1307 " if (pe != 0u) { \n" |
| 1308 " imageStore(g_resultTexture, 2 * screenPosI.xy + ivec2(1, 0), \n" |
| 1309 " vec4(float(0x80u | pe) / 255.0, 0, 0, 0)); \n" |
| 1310 " } \n" |
| 1311 " } \n" |
| 1312 " \n" |
| 1313 " if (bool(packedVals[(2) * 6 + (3)]) || \n" |
| 1314 " bool(packedVals[(3) * 6 + (3)])) { \n" |
| 1315 " UVEC4 packedQ7 = texelFetchOffset(g_src0Texture4Uint, \n" |
| 1316 " screenPosI.xy, 0, ivec2(0, 1)); \n" |
| 1317 " packedVals[(2) * 6 + (4)] = LOAD_UINT(packedQ7.x); \n" |
| 1318 " packedVals[(3) * 6 + (4)] = LOAD_UINT(packedQ7.y); \n" |
| 1319 " packedVals[(2) * 6 + (5)] = LOAD_UINT(packedQ7.z); \n" |
| 1320 " packedVals[(3) * 6 + (5)] = LOAD_UINT(packedQ7.w); \n" |
| 1321 " } \n" |
| 1322 " \n" |
| 1323 " if (bool(packedVals[(2) * 6 + (3)])) { \n" |
| 1324 " UVEC4 packedQ6 = texelFetchOffset(g_src0Texture4Uint, \n" |
| 1325 " screenPosI.xy, 0, ivec2(-1, -1));\n" |
| 1326 " packedVals[(0) * 6 + (4)] = LOAD_UINT(packedQ6.x); \n" |
| 1327 " packedVals[(1) * 6 + (4)] = LOAD_UINT(packedQ6.y); \n" |
| 1328 " packedVals[(0) * 6 + (5)] = LOAD_UINT(packedQ6.z); \n" |
| 1329 " packedVals[(1) * 6 + (5)] = LOAD_UINT(packedQ6.w); \n" |
| 1330 " \n" |
| 1331 " CollectEdges(2, 3, edges, packedVals); \n" |
| 1332 " uint pe = PruneNonDominantEdges(edges); \n" |
| 1333 " if (pe != 0u) { \n" |
| 1334 " imageStore(g_resultTexture, 2 * screenPosI.xy + ivec2(0, 1), \n" |
| 1335 " vec4(float(0x80u | pe) / 255.0, 0, 0, 0)); \n" |
| 1336 " } \n" |
| 1337 " } \n" |
| 1338 " \n" |
| 1339 " if (bool(packedVals[(3) * 6 + (3)])) { \n" |
| 1340 " CollectEdges(3, 3, edges, packedVals); \n" |
| 1341 " uint pe = PruneNonDominantEdges(edges); \n" |
| 1342 " if (pe != 0u) { \n" |
| 1343 " imageStore(g_resultTexture, 2 * screenPosI.xy + ivec2(1, 1), \n" |
| 1344 " vec4(float(0x80u | pe) / 255.0, 0, 0, 0)); \n" |
| 1345 " } \n" |
| 1346 " } \n" |
| 1347 "} \n" |
| 1348 "#endif // DETECT_EDGES2 \n" |
| 1349 " \n"; |
| 1350 |
| 1351 const char ApplyFramebufferAttachmentCMAAINTELResourceManager::cmaa_frag_s2_[] = |
| 1352 "#ifdef COMBINE_EDGES \n" |
| 1353 "void CombineEdges() { \n" |
| 1354 " ivec3 screenPosIBase = ivec3(ivec2(gl_FragCoord.xy) * 2, 0); \n" |
| 1355 " vec3 screenPosBase = vec3(screenPosIBase); \n" |
| 1356 " uint packedEdgesArray[3 * 3]; \n" |
| 1357 " \n" |
| 1358 " // use only if it has the 'prev frame' flag:[sample * 255.0 - 127.5] \n" |
| 1359 " //-> if it has the last bit flag (128), it's going to stay above 0 \n" |
| 1360 " uvec4 sampA = uvec4( \n" |
| 1361 " textureGatherOffset(g_src0TextureFlt, \n" |
| 1362 " screenPosBase.xy * g_OneOverScreenSize, \n" |
| 1363 " ivec2(1, 0)) * 255.0 - 127.5); \n" |
| 1364 " uvec4 sampB = uvec4( \n" |
| 1365 " textureGatherOffset(g_src0TextureFlt, \n" |
| 1366 " screenPosBase.xy * g_OneOverScreenSize, \n" |
| 1367 " ivec2(0, 1)) * 255.0 - 127.5); \n" |
| 1368 " uint sampC = uint( \n" |
| 1369 " texelFetchOffset(g_src0TextureFlt, screenPosIBase.xy, 0, \n" |
| 1370 " ivec2(1, 1)).r * 255.0 - 127.5); \n" |
| 1371 " \n" |
| 1372 " packedEdgesArray[(0) * 3 + (0)] = 0u; \n" |
| 1373 " packedEdgesArray[(1) * 3 + (0)] = sampA.w; \n" |
| 1374 " packedEdgesArray[(2) * 3 + (0)] = sampA.z; \n" |
| 1375 " packedEdgesArray[(1) * 3 + (1)] = sampA.x; \n" |
| 1376 " packedEdgesArray[(2) * 3 + (1)] = sampA.y; \n" |
| 1377 " packedEdgesArray[(0) * 3 + (1)] = sampB.w; \n" |
| 1378 " packedEdgesArray[(0) * 3 + (2)] = sampB.x; \n" |
| 1379 " packedEdgesArray[(1) * 3 + (2)] = sampB.y; \n" |
| 1380 " packedEdgesArray[(2) * 3 + (2)] = sampC; \n" |
| 1381 " \n" |
| 1382 " uvec4 pixelsC = uvec4(packedEdgesArray[(1 + 0) * 3 + (1 + 0)], \n" |
| 1383 " packedEdgesArray[(1 + 1) * 3 + (1 + 0)], \n" |
| 1384 " packedEdgesArray[(1 + 0) * 3 + (1 + 1)], \n" |
| 1385 " packedEdgesArray[(1 + 1) * 3 + (1 + 1)]); \n" |
| 1386 " uvec4 pixelsL = uvec4(packedEdgesArray[(0 + 0) * 3 + (1 + 0)], \n" |
| 1387 " packedEdgesArray[(0 + 1) * 3 + (1 + 0)], \n" |
| 1388 " packedEdgesArray[(0 + 0) * 3 + (1 + 1)], \n" |
| 1389 " packedEdgesArray[(0 + 1) * 3 + (1 + 1)]); \n" |
| 1390 " uvec4 pixelsU = uvec4(packedEdgesArray[(1 + 0) * 3 + (0 + 0)], \n" |
| 1391 " packedEdgesArray[(1 + 1) * 3 + (0 + 0)], \n" |
| 1392 " packedEdgesArray[(1 + 0) * 3 + (0 + 1)], \n" |
| 1393 " packedEdgesArray[(1 + 1) * 3 + (0 + 1)]); \n" |
| 1394 " \n" |
| 1395 " uvec4 outEdge4 = \n" |
| 1396 " pixelsC | ((pixelsL & 0x01u) << 2u) | ((pixelsU & 0x02u) << 2u); \n" |
| 1397 " vec4 outEdge4Flt = vec4(outEdge4) / 255.0; \n" |
| 1398 " \n" |
| 1399 " imageStore(g_resultTextureSlot2, screenPosIBase.xy + ivec2(0, 0), \n" |
| 1400 " outEdge4Flt.xxxx); \n" |
| 1401 " imageStore(g_resultTextureSlot2, screenPosIBase.xy + ivec2(1, 0), \n" |
| 1402 " outEdge4Flt.yyyy); \n" |
| 1403 " imageStore(g_resultTextureSlot2, screenPosIBase.xy + ivec2(0, 1), \n" |
| 1404 " outEdge4Flt.zzzz); \n" |
| 1405 " imageStore(g_resultTextureSlot2, screenPosIBase.xy + ivec2(1, 1), \n" |
| 1406 " outEdge4Flt.wwww); \n" |
| 1407 " \n" |
| 1408 " // uvec4 numberOfEdges4 = uvec4(bitCount(outEdge4)); \n" |
| 1409 " // gl_FragDepth = \n" |
| 1410 " // any(greaterThan(numberOfEdges4, uvec4(1))) ? 1.0 : 0.0; \n" |
| 1411 " \n" |
| 1412 " gl_FragDepth = \n" |
| 1413 " any(greaterThan(outEdge4, uvec4(1))) ? 1.0 : 0.0; \n" |
| 1414 "} \n" |
| 1415 "#endif // COMBINE_EDGES \n" |
| 1416 " \n" |
| 1417 "#ifdef BLUR_EDGES \n" |
| 1418 "layout(early_fragment_tests) in; \n" |
| 1419 "void BlurEdges() { \n" |
| 1420 " int _i; \n" |
| 1421 " \n" |
| 1422 " ivec3 screenPosIBase = ivec3(ivec2(gl_FragCoord.xy) * 2, 0); \n" |
| 1423 " vec3 screenPosBase = vec3(screenPosIBase); \n" |
| 1424 " uint forFollowUpCount = 0u; \n" |
| 1425 " ivec4 forFollowUpCoords[4]; \n" |
| 1426 " \n" |
| 1427 " uint packedEdgesArray[4 * 4]; \n" |
| 1428 " \n" |
| 1429 " uvec4 sampA = uvec4( \n" |
| 1430 " textureGatherOffset(g_src0TextureFlt, \n" |
| 1431 " screenPosBase.xy * g_OneOverScreenSize, \n" |
| 1432 " ivec2(0, 0)) *255.5); \n" |
| 1433 " uvec4 sampB = uvec4( \n" |
| 1434 " textureGatherOffset(g_src0TextureFlt, \n" |
| 1435 " screenPosBase.xy * g_OneOverScreenSize, \n" |
| 1436 " ivec2(2, 0)) *255.5); \n" |
| 1437 " uvec4 sampC = uvec4( \n" |
| 1438 " textureGatherOffset(g_src0TextureFlt, \n" |
| 1439 " screenPosBase.xy * g_OneOverScreenSize, \n" |
| 1440 " ivec2(0, 2)) *255.5); \n" |
| 1441 " uvec4 sampD = uvec4( \n" |
| 1442 " textureGatherOffset(g_src0TextureFlt, \n" |
| 1443 " screenPosBase.xy * g_OneOverScreenSize, \n" |
| 1444 " ivec2(2, 2)) *255.5); \n" |
| 1445 " \n" |
| 1446 " packedEdgesArray[(0) * 4 + (0)] = sampA.w; \n" |
| 1447 " packedEdgesArray[(1) * 4 + (0)] = sampA.z; \n" |
| 1448 " packedEdgesArray[(0) * 4 + (1)] = sampA.x; \n" |
| 1449 " packedEdgesArray[(1) * 4 + (1)] = sampA.y; \n" |
| 1450 " packedEdgesArray[(2) * 4 + (0)] = sampB.w; \n" |
| 1451 " packedEdgesArray[(3) * 4 + (0)] = sampB.z; \n" |
| 1452 " packedEdgesArray[(2) * 4 + (1)] = sampB.x; \n" |
| 1453 " packedEdgesArray[(3) * 4 + (1)] = sampB.y; \n" |
| 1454 " packedEdgesArray[(0) * 4 + (2)] = sampC.w; \n" |
| 1455 " packedEdgesArray[(1) * 4 + (2)] = sampC.z; \n" |
| 1456 " packedEdgesArray[(0) * 4 + (3)] = sampC.x; \n" |
| 1457 " packedEdgesArray[(1) * 4 + (3)] = sampC.y; \n" |
| 1458 " packedEdgesArray[(2) * 4 + (2)] = sampD.w; \n" |
| 1459 " packedEdgesArray[(3) * 4 + (2)] = sampD.z; \n" |
| 1460 " packedEdgesArray[(2) * 4 + (3)] = sampD.x; \n" |
| 1461 " packedEdgesArray[(3) * 4 + (3)] = sampD.y; \n" |
| 1462 " \n" |
| 1463 " for (_i = 0; _i < 4; _i++) { \n" |
| 1464 " int _x = _i % 2; \n" |
| 1465 " int _y = _i / 2; \n" |
| 1466 " \n" |
| 1467 " ivec3 screenPosI = screenPosIBase + ivec3(_x, _y, 0); \n" |
| 1468 " \n" |
| 1469 " uint packedEdgesC = packedEdgesArray[(1 + _x) * 4 + (1 + _y)]; \n" |
| 1470 " \n" |
| 1471 " uvec4 edges = UnpackEdge(packedEdgesC); \n" |
| 1472 " vec4 edgesFlt = vec4(edges); \n" |
| 1473 " \n" |
| 1474 " float numberOfEdges = dot(edgesFlt, vec4(1, 1, 1, 1)); \n" |
| 1475 " if (numberOfEdges < 2.0) \n" |
| 1476 " continue; \n" |
| 1477 " \n" |
| 1478 " float fromRight = edgesFlt.r; \n" |
| 1479 " float fromBelow = edgesFlt.g; \n" |
| 1480 " float fromLeft = edgesFlt.b; \n" |
| 1481 " float fromAbove = edgesFlt.a; \n" |
| 1482 " \n" |
| 1483 " vec4 xFroms = vec4(fromBelow, fromAbove, fromRight, fromLeft); \n" |
| 1484 " \n" |
| 1485 " float blurCoeff = 0.0; \n" |
| 1486 " \n" |
| 1487 " // These are additional blurs that complement the main line-based \n" |
| 1488 " // blurring; Unlike line-based, these do not necessarily preserve \n" |
| 1489 " // the total amount of screen colour as they will take \n" |
| 1490 " // neighbouring pixel colours and apply them to the one currently \n" |
| 1491 " // processed. \n" |
| 1492 " \n" |
| 1493 " // 1.) L-like shape. \n" |
| 1494 " // For this shape, the total amount of screen colour will be \n" |
| 1495 " // preserved when this is a part of a (zigzag) diagonal line as the\n" |
| 1496 " // corners from the other side will do the same and take some of \n" |
| 1497 " // the current pixel's colour in return. \n" |
| 1498 " // However, in the case when this is an actual corner, the pixel's \n" |
| 1499 " // colour will be partially overwritten by it's 2 neighbours. \n" |
| 1500 " // if( numberOfEdges > 1.0 ) \n" |
| 1501 " { \n" |
| 1502 " // with value of 0.15, the pixel will retain approx 77% of its \n" |
| 1503 " // colour and the remaining 23% will come from its 2 neighbours \n" |
| 1504 " // (which are likely to be blurred too in the opposite direction)\n" |
| 1505 " blurCoeff = 0.08; \n" |
| 1506 " \n" |
| 1507 " // Only do blending if it's L shape - if we're between two \n" |
| 1508 " // parallel edges, don't do anything \n" |
| 1509 " blurCoeff *= (1.0 - fromBelow * fromAbove) * \n" |
| 1510 " (1.0 - fromRight * fromLeft); \n" |
| 1511 " } \n" |
| 1512 " \n" |
| 1513 " // 2.) U-like shape (surrounded with edges from 3 sides) \n" |
| 1514 " if (numberOfEdges > 2.0) { \n" |
| 1515 " // with value of 0.13, the pixel will retain approx 72% of its \n" |
| 1516 " // colour and the remaining 28% will be picked from its 3 \n" |
| 1517 " // neighbours (which are unlikely to be blurred too but could be)\n" |
| 1518 " blurCoeff = 0.11; \n" |
| 1519 " } \n" |
| 1520 " \n" |
| 1521 " // 3.) Completely surrounded with edges from all 4 sides \n" |
| 1522 " if (numberOfEdges > 3.0) { \n" |
| 1523 " // with value of 0.07, the pixel will retain 78% of its colour \n" |
| 1524 " // and the remaining 22% will come from its 4 neighbours (which \n" |
| 1525 " // are unlikely to be blurred) \n" |
| 1526 " blurCoeff = 0.05; \n" |
| 1527 " } \n" |
| 1528 " \n" |
| 1529 " if (blurCoeff == 0.0) { \n" |
| 1530 " // this avoids Z search below as well but that's ok because a Z \n" |
| 1531 " // shape will also always have some blurCoeff \n" |
| 1532 " continue; \n" |
| 1533 " } \n" |
| 1534 " \n" |
| 1535 " vec4 blurMap = xFroms * blurCoeff; \n" |
| 1536 " \n" |
| 1537 " vec4 pixelC = texelFetch(g_screenTexture, screenPosI.xy, 0); \n" |
| 1538 " \n" |
| 1539 " const float centerWeight = 1.0; \n" |
| 1540 " float fromBelowWeight = blurMap.x; \n" |
| 1541 " float fromAboveWeight = blurMap.y; \n" |
| 1542 " float fromRightWeight = blurMap.z; \n" |
| 1543 " float fromLeftWeight = blurMap.w; \n" |
| 1544 " \n" |
| 1545 " // this would be the proper math for blending if we were handling \n" |
| 1546 " // lines (Zs) and mini kernel smoothing here, but since we're doing\n" |
| 1547 " // lines separately, no need to complicate, just tweak the settings\n" |
| 1548 " // float fromBelowWeight = (1.0 / (1.0 - blurMap.x)) - 1.0; \n" |
| 1549 " // float fromAboveWeight = (1.0 / (1.0 - blurMap.y)) - 1.0; \n" |
| 1550 " // float fromRightWeight = (1.0 / (1.0 - blurMap.z)) - 1.0; \n" |
| 1551 " // float fromLeftWeight = (1.0 / (1.0 - blurMap.w)) - 1.0; \n" |
| 1552 " \n" |
| 1553 " float fourWeightSum = dot(blurMap, vec4(1, 1, 1, 1)); \n" |
| 1554 " float allWeightSum = centerWeight + fourWeightSum; \n" |
| 1555 " \n" |
| 1556 " vec4 color = vec4(0, 0, 0, 0); \n" |
| 1557 " if (fromLeftWeight > 0.0) { \n" |
| 1558 " vec3 pixelL = texelFetchOffset(g_screenTexture, screenPosI.xy, 0,\n" |
| 1559 " ivec2(-1, 0)).rgb; \n" |
| 1560 " color.rgb += fromLeftWeight * pixelL; \n" |
| 1561 " } \n" |
| 1562 " if (fromAboveWeight > 0.0) { \n" |
| 1563 " vec3 pixelT = texelFetchOffset(g_screenTexture, screenPosI.xy, 0,\n" |
| 1564 " ivec2(0, -1)).rgb; \n" |
| 1565 " color.rgb += fromAboveWeight * pixelT; \n" |
| 1566 " } \n" |
| 1567 " if (fromRightWeight > 0.0) { \n" |
| 1568 " vec3 pixelR = texelFetchOffset(g_screenTexture, screenPosI.xy, 0,\n" |
| 1569 " ivec2(1, 0)).rgb; \n" |
| 1570 " color.rgb += fromRightWeight * pixelR; \n" |
| 1571 " } \n" |
| 1572 " if (fromBelowWeight > 0.0) { \n" |
| 1573 " vec3 pixelB = texelFetchOffset(g_screenTexture, screenPosI.xy, 0,\n" |
| 1574 " ivec2(0, 1)).rgb; \n" |
| 1575 " color.rgb += fromBelowWeight * pixelB; \n" |
| 1576 " } \n" |
| 1577 " \n" |
| 1578 " color /= fourWeightSum + 0.0001; \n" |
| 1579 " color.a = 1.0 - centerWeight / allWeightSum; \n" |
| 1580 " \n" |
| 1581 " color.rgb = mix(pixelC.rgb, color.rgb, color.a).rgb; \n" |
| 1582 "#ifdef IN_GAMMA_CORRECT_MODE \n" |
| 1583 " color.rgb = D3DX_FLOAT3_to_SRGB(color.rgb); \n" |
| 1584 "#endif \n" |
| 1585 " \n" |
| 1586 "#ifdef DEBUG_OUTPUT_AAINFO \n" |
| 1587 " imageStore(g_resultTextureSlot2, screenPosI.xy, \n" |
| 1588 " PackBlurAAInfo(screenPosI.xy, uint(numberOfEdges))); \n" |
| 1589 "#endif \n" |
| 1590 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy, \n" |
| 1591 " vec4(color.rgb, pixelC.a)); \n" |
| 1592 " \n" |
| 1593 " if (numberOfEdges == 2.0) { \n" |
| 1594 " uint packedEdgesL = packedEdgesArray[(0 + _x) * 4 + (1 + _y)]; \n" |
| 1595 " uint packedEdgesT = packedEdgesArray[(1 + _x) * 4 + (0 + _y)]; \n" |
| 1596 " uint packedEdgesR = packedEdgesArray[(2 + _x) * 4 + (1 + _y)]; \n" |
| 1597 " uint packedEdgesB = packedEdgesArray[(1 + _x) * 4 + (2 + _y)]; \n" |
| 1598 " \n" |
| 1599 " bool isHorizontalA = ((packedEdgesC) == (0x01u | 0x02u)) && \n" |
| 1600 " ((packedEdgesR & (0x01u | 0x08u)) == (0x08u)); \n" |
| 1601 " bool isHorizontalB = ((packedEdgesC) == (0x01u | 0x08u)) && \n" |
| 1602 " ((packedEdgesR & (0x01u | 0x02u)) == (0x02u)); \n" |
| 1603 " \n" |
| 1604 " bool isHCandidate = isHorizontalA || isHorizontalB; \n" |
| 1605 " \n" |
| 1606 " bool isVerticalA = ((packedEdgesC) == (0x08u | 0x01u)) && \n" |
| 1607 " ((packedEdgesT & (0x08u | 0x04u)) == (0x04u)); \n" |
| 1608 " bool isVerticalB = ((packedEdgesC) == (0x08u | 0x04u)) && \n" |
| 1609 " ((packedEdgesT & (0x08u | 0x01u)) == (0x01u)); \n" |
| 1610 " bool isVCandidate = isVerticalA || isVerticalB; \n" |
| 1611 " \n" |
| 1612 " bool isCandidate = isHCandidate || isVCandidate; \n" |
| 1613 " \n" |
| 1614 " if (!isCandidate) \n" |
| 1615 " continue; \n" |
| 1616 " \n" |
| 1617 " bool horizontal = isHCandidate; \n" |
| 1618 " \n" |
| 1619 " // what if both are candidates? do additional pruning (still not \n" |
| 1620 " // 100% but gets rid of worst case errors) \n" |
| 1621 " if (isHCandidate && isVCandidate) \n" |
| 1622 " horizontal = \n" |
| 1623 " (isHorizontalA && ((packedEdgesL & 0x02u) == 0x02u)) || \n" |
| 1624 " (isHorizontalB && ((packedEdgesL & 0x08u) == 0x08u)); \n" |
| 1625 " \n" |
| 1626 " ivec2 offsetC; \n" |
| 1627 " uint packedEdgesM1P0; \n" |
| 1628 " uint packedEdgesP1P0; \n" |
| 1629 " if (horizontal) { \n" |
| 1630 " packedEdgesM1P0 = packedEdgesL; \n" |
| 1631 " packedEdgesP1P0 = packedEdgesR; \n" |
| 1632 " offsetC = ivec2(2, 0); \n" |
| 1633 " } else { \n" |
| 1634 " packedEdgesM1P0 = packedEdgesB; \n" |
| 1635 " packedEdgesP1P0 = packedEdgesT; \n" |
| 1636 " offsetC = ivec2(0, -2); \n" |
| 1637 " } \n" |
| 1638 " \n" |
| 1639 " uvec4 edgesM1P0 = UnpackEdge(packedEdgesM1P0); \n" |
| 1640 " uvec4 edgesP1P0 = UnpackEdge(packedEdgesP1P0); \n" |
| 1641 " uvec4 edgesP2P0 = UnpackEdge(uint(texelFetch( \n" |
| 1642 " g_src0TextureFlt, screenPosI.xy + offsetC, 0).r * 255.5)); \n" |
| 1643 " \n" |
| 1644 " uvec4 arg0; \n" |
| 1645 " uvec4 arg1; \n" |
| 1646 " uvec4 arg2; \n" |
| 1647 " uvec4 arg3; \n" |
| 1648 " bool arg4; \n" |
| 1649 " \n" |
| 1650 " if (horizontal) { \n" |
| 1651 " arg0 = uvec4(edges); \n" |
| 1652 " arg1 = edgesM1P0; \n" |
| 1653 " arg2 = edgesP1P0; \n" |
| 1654 " arg3 = edgesP2P0; \n" |
| 1655 " arg4 = true; \n" |
| 1656 " } else { \n" |
| 1657 " // Reuse the same code for vertical (used for horizontal above)\n" |
| 1658 " // but rotate input data 90º counter-clockwise, so that: \n" |
| 1659 " // left becomes bottom \n" |
| 1660 " // top becomes left \n" |
| 1661 " // right becomes top \n" |
| 1662 " // bottom becomes right \n" |
| 1663 " \n" |
| 1664 " // we also have to rotate edges, thus .argb \n" |
| 1665 " arg0 = uvec4(edges.argb); \n" |
| 1666 " arg1 = edgesM1P0.argb; \n" |
| 1667 " arg2 = edgesP1P0.argb; \n" |
| 1668 " arg3 = edgesP2P0.argb; \n" |
| 1669 " arg4 = false; \n" |
| 1670 " } \n" |
| 1671 " \n" |
| 1672 " { \n" |
| 1673 " ivec2 screenPos = screenPosI.xy; \n" |
| 1674 " uvec4 _edges = arg0; \n" |
| 1675 " uvec4 _edgesM1P0 = arg1; \n" |
| 1676 " uvec4 _edgesP1P0 = arg2; \n" |
| 1677 " uvec4 _edgesP2P0 = arg3; \n" |
| 1678 " bool horizontal = arg4; \n" |
| 1679 " // Inverted Z case: \n" |
| 1680 " // __ \n" |
| 1681 " // X| \n" |
| 1682 " // ¯¯ \n" |
| 1683 " bool isInvertedZ = false; \n" |
| 1684 " bool isNormalZ = false; \n" |
| 1685 " { \n" |
| 1686 "#ifndef SETTINGS_ALLOW_SHORT_Zs \n" |
| 1687 " // (1u-_edges.a) constraint can be removed; it was added for \n" |
| 1688 " // some rare cases \n" |
| 1689 " uint isZShape = _edges.r * _edges.g * _edgesM1P0.g * \n" |
| 1690 " _edgesP1P0.a *_edgesP2P0.a * (1u - _edges.b) * \n" |
| 1691 " (1u - _edgesP1P0.r) * (1u - _edges.a) * \n" |
| 1692 " (1u - _edgesP1P0.g); \n" |
| 1693 "#else \n" |
| 1694 " uint isZShape = _edges.r * _edges.g * _edgesP1P0.a * \n" |
| 1695 " (1u - _edges.b) * (1u - _edgesP1P0.r) * (1u - _edges.a) *\n" |
| 1696 " (1u - _edgesP1P0.g); \n" |
| 1697 " isZShape *= (_edgesM1P0.g + _edgesP2P0.a); \n" |
| 1698 " // and at least one of these need to be there\n" |
| 1699 "#endif \n" |
| 1700 " if (isZShape > 0u) { \n" |
| 1701 " isInvertedZ = true; \n" |
| 1702 " } \n" |
| 1703 " } \n" |
| 1704 " \n" |
| 1705 " // Normal Z case: \n" |
| 1706 " // __ \n" |
| 1707 " // X| \n" |
| 1708 " // ¯¯ \n" |
| 1709 " { \n" |
| 1710 "#ifndef SETTINGS_ALLOW_SHORT_Zs \n" |
| 1711 " uint isZShape = _edges.r * _edges.a * _edgesM1P0.a * \n" |
| 1712 " _edgesP1P0.g * _edgesP2P0.g * (1u - _edges.b) * \n" |
| 1713 " (1u - _edgesP1P0.r) * (1u - _edges.g) * \n" |
| 1714 " (1u - _edgesP1P0.a); \n" |
| 1715 "#else \n" |
| 1716 " uint isZShape = _edges.r * _edges.a * _edgesP1P0.g * \n" |
| 1717 " (1u - _edges.b) * (1u - _edgesP1P0.r) * (1u - _edges.g) *\n" |
| 1718 " (1u - _edgesP1P0.a); \n" |
| 1719 " isZShape *= \n" |
| 1720 " (_edgesM1P0.a + _edgesP2P0.g); \n" |
| 1721 " // and at least one of these need to be there\n" |
| 1722 "#endif \n" |
| 1723 " \n" |
| 1724 " if (isZShape > 0u) { \n" |
| 1725 " isNormalZ = true; \n" |
| 1726 " } \n" |
| 1727 " } \n" |
| 1728 " \n" |
| 1729 " bool isZ = isInvertedZ || isNormalZ; \n" |
| 1730 " if (isZ) { \n" |
| 1731 " forFollowUpCoords[forFollowUpCount++] = \n" |
| 1732 " ivec4(screenPosI.xy, horizontal, isInvertedZ); \n" |
| 1733 " } \n" |
| 1734 " } \n" |
| 1735 " } \n" |
| 1736 " } \n" |
| 1737 " \n" |
| 1738 " // This code below is the only potential bug with this algorithm : \n" |
| 1739 " // it HAS to be executed after the simple shapes above. It used to be\n" |
| 1740 " // executed as separate compute shader (by storing the packed \n" |
| 1741 " // 'forFollowUpCoords' in an append buffer and consuming it later) \n" |
| 1742 " // but the whole thing (append/consume buffers, using CS) appears to \n" |
| 1743 " // be too inefficient on most hardware. \n" |
| 1744 " // However, it seems to execute fairly efficiently here and without \n" |
| 1745 " // any issues, although there is no 100% guarantee that this code \n" |
| 1746 " // below will execute across all pixels (it has a c_maxLineLength \n" |
| 1747 " // wide kernel) after other shaders processing same pixels have done \n" |
| 1748 " // solving simple shapes. It appears to work regardless, across all \n" |
| 1749 " // hardware; pixels with 1-edge or two opposing edges are ignored by \n" |
| 1750 " // simple shapes anyway and other shapes stop the long line \n" |
| 1751 " // algorithm from executing the only danger appears to be simple \n" |
| 1752 " // shape L's colliding with Z shapes from neighbouring pixels but I \n" |
| 1753 " // couldn't reproduce any problems on any hardware. \n" |
| 1754 " for (uint _i = 0u; _i < forFollowUpCount; _i++) { \n" |
| 1755 " ivec4 data = forFollowUpCoords[_i]; \n" |
| 1756 " ProcessDetectedZ(data.xy, bool(data.z), bool(data.w)); \n" |
| 1757 " } \n" |
| 1758 "} \n" |
| 1759 "#endif // BLUR_EDGES \n" |
| 1760 " \n" |
| 1761 "#ifdef DISPLAY_EDGES \n" |
| 1762 "layout(location = 0) out vec4 color; \n" |
| 1763 "layout(location = 1) out vec4 hasEdges; \n" |
| 1764 "void DisplayEdges() { \n" |
| 1765 " ivec2 screenPosI = ivec2(gl_FragCoord.xy); \n" |
| 1766 " \n" |
| 1767 " uint packedEdges, shapeType; \n" |
| 1768 " UnpackBlurAAInfo(texelFetch(g_src0TextureFlt, screenPosI, 0).r, \n" |
| 1769 " packedEdges, shapeType); \n" |
| 1770 " \n" |
| 1771 " vec4 edges = vec4(UnpackEdge(packedEdges)); \n" |
| 1772 " if (any(greaterThan(edges.xyzw, vec4(0)))) { \n" |
| 1773 "#ifdef IN_BGR_MODE \n" |
| 1774 " color = c_edgeDebugColours[shapeType].bgra; \n" |
| 1775 "#else \n" |
| 1776 " color = c_edgeDebugColours[shapeType]; \n" |
| 1777 "#endif \n" |
| 1778 " hasEdges = vec4(1.0); \n" |
| 1779 " } else { \n" |
| 1780 " color = vec4(0); \n" |
| 1781 " hasEdges = vec4(0.0); \n" |
| 1782 " } \n" |
| 1783 "} \n" |
| 1784 "#endif // DISPLAY_EDGES \n" |
| 1785 " \n" |
| 1786 "void main() { \n" |
| 1787 "#ifdef DETECT_EDGES1 \n" |
| 1788 " DetectEdges1(); \n" |
| 1789 "#endif \n" |
| 1790 "#if defined DETECT_EDGES2 \n" |
| 1791 " DetectEdges2(); \n" |
| 1792 "#endif \n" |
| 1793 "#if defined COMBINE_EDGES \n" |
| 1794 " CombineEdges(); \n" |
| 1795 "#endif \n" |
| 1796 "#if defined BLUR_EDGES \n" |
| 1797 " BlurEdges(); \n" |
| 1798 "#endif \n" |
| 1799 "#if defined DISPLAY_EDGES \n" |
| 1800 " DisplayEdges(); \n" |
| 1801 "#endif \n" |
| 1802 "} \n"; |
| 1803 |
| 1804 const char |
| 1805 ApplyFramebufferAttachmentCMAAINTELResourceManager::copy_frag_str_[] = |
| 1806 "precision highp float; \n" |
| 1807 "layout(binding = 0) uniform highp sampler2D inTexture; \n" |
| 1808 "layout(location = 0) out vec4 outColor; \n" |
| 1809 "#ifdef GL_ES \n" |
| 1810 "layout(binding = 0, rgba8) restrict writeonly uniform highp \n" |
| 1811 " image2D outTexture; \n" |
| 1812 "#else \n" |
| 1813 "layout(rgba8) restrict writeonly uniform highp image2D outTexture; \n" |
| 1814 "#endif \n" |
| 1815 " \n" |
| 1816 "void main() { \n" |
| 1817 " ivec2 screenPosI = ivec2( gl_FragCoord.xy ); \n" |
| 1818 " vec4 pixel = texelFetch(inTexture, screenPosI, 0); \n" |
| 1819 "#ifdef OUT_FBO \n" |
| 1820 " outColor = pixel; \n" |
| 1821 "#else \n" |
| 1822 " imageStore(outTexture, screenPosI, pixel); \n" |
| 1823 "#endif \n" |
| 1824 "} \n"; |
| 1825 |
| 1826 } // namespace gpu |
OLD | NEW |