Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "gpu/command_buffer/service/gles2_cmd_apply_framebuffer_attachment_cmaa _intel.h" | |
| 6 | |
| 7 #include "base/logging.h" | |
| 8 #include "gpu/command_buffer/service/gles2_cmd_decoder.h" | |
| 9 #include "ui/gl/gl_gl_api_implementation.h" | |
| 10 #include "ui/gl/gl_version_info.h" | |
| 11 | |
| 12 namespace gpu { | |
| 13 | |
| 14 ApplyFramebufferAttachmentCMAAINTELResourceManager:: | |
| 15 ApplyFramebufferAttachmentCMAAINTELResourceManager() | |
| 16 : initialized_(false), | |
| 17 textures_initialized_(false), | |
| 18 is_in_gamma_correct_mode_(false), | |
| 19 supports_usampler_(true), | |
| 20 supports_r8_image_(true), | |
| 21 supports_r8_read_format_(true), | |
| 22 is_gles31_compatible(false), | |
| 23 frame_id_(0), | |
| 24 width_(0), | |
| 25 height_(0), | |
| 26 copy_to_framebuffer_shader_(0), | |
| 27 copy_to_image_shader_(0), | |
| 28 edges0_shader_(0), | |
| 29 edges1_shader_(0), | |
| 30 edges_combine_shader_(0), | |
| 31 process_and_apply_shader_(0), | |
| 32 debug_display_edges_shader_(0), | |
| 33 cmaa_framebuffer_(0), | |
| 34 copy_framebuffer_(0), | |
| 35 rgba8_texture_(0), | |
| 36 working_color_texture_(0), | |
| 37 edges0_texture_(0), | |
| 38 edges1_texture_(0), | |
| 39 mini4_edge_texture_(0), | |
| 40 mini4_edge_depth_texture_(0), | |
| 41 edges1_shader_result_texture_float4_slot1_(0), | |
| 42 edges1_shader_result_texture_(0), | |
| 43 edges_combine_shader_result_texture_float4_slot1_(0), | |
| 44 process_and_apply_shader_result_texture_float4_slot1_(0), | |
| 45 edges_combine_shader_result_texture_slot2_(0), | |
| 46 copy_to_image_shader_outTexture_(0) {} | |
| 47 | |
| 48 ApplyFramebufferAttachmentCMAAINTELResourceManager:: | |
| 49 ~ApplyFramebufferAttachmentCMAAINTELResourceManager() { | |
| 50 Destroy(); | |
| 51 } | |
| 52 | |
| 53 void ApplyFramebufferAttachmentCMAAINTELResourceManager::Initialize( | |
| 54 const gles2::GLES2Decoder* decoder) { | |
| 55 is_gles31_compatible = gl::GetGLVersionInfo()->IsAtLeastGLES(3, 1); | |
| 56 | |
| 57 glGenFramebuffersEXT(1, ©_framebuffer_); | |
| 58 glGenTextures(1, &rgba8_texture_); | |
| 59 | |
| 60 copy_to_image_shader_ = CreateProgram("", vert_str_, copy_frag_str_); | |
| 61 copy_to_framebuffer_shader_ = | |
| 62 CreateProgram("#define OUT_FBO 1\n", vert_str_, copy_frag_str_); | |
| 63 | |
| 64 // Check if RGBA8UI is supported as an FBO colour target with depth. | |
| 65 // If not supported, GLSL needs to convert the data to/from float so there is | |
| 66 // a small extra cost. | |
| 67 { | |
| 68 GLuint rgba8ui_texture = 0, depth_texture = 0; | |
| 69 glGenTextures(1, &rgba8ui_texture); | |
| 70 glBindTexture(GL_TEXTURE_2D, rgba8ui_texture); | |
| 71 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_RGBA8UI, 4, 4); | |
| 72 | |
| 73 glGenTextures(1, &depth_texture); | |
| 74 glBindTexture(GL_TEXTURE_2D, depth_texture); | |
| 75 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_DEPTH_COMPONENT16, 4, 4); | |
| 76 | |
| 77 // Create the FBO | |
| 78 GLuint rgba8ui_framebuffer = 0; | |
| 79 glGenFramebuffersEXT(1, &rgba8ui_framebuffer); | |
| 80 glBindFramebufferEXT(GL_FRAMEBUFFER, rgba8ui_framebuffer); | |
| 81 | |
| 82 // Bind to the FBO to test support | |
| 83 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, | |
| 84 GL_TEXTURE_2D, rgba8ui_texture, 0); | |
| 85 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, | |
| 86 GL_TEXTURE_2D, depth_texture, 0); | |
| 87 GLenum status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER); | |
| 88 | |
| 89 supports_usampler_ = (status == GL_FRAMEBUFFER_COMPLETE); | |
| 90 | |
| 91 glDeleteFramebuffersEXT(1, &rgba8ui_framebuffer); | |
| 92 glDeleteTextures(1, &rgba8ui_texture); | |
| 93 glDeleteTextures(1, &depth_texture); | |
| 94 } | |
| 95 | |
| 96 // Check to see if R8 images are supported | |
| 97 // If not supported, images are bound as R32F for write targets, not R8. | |
| 98 { | |
| 99 GLuint r8_texture = 0; | |
| 100 glGenTextures(1, &r8_texture); | |
| 101 glBindTexture(GL_TEXTURE_2D, r8_texture); | |
| 102 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_R8, 4, 4); | |
| 103 | |
| 104 glGetError(); // reset all previous errors | |
| 105 glBindImageTextureEXT(0, r8_texture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R8); | |
| 106 if (glGetError() != GL_NO_ERROR) | |
| 107 supports_r8_image_ = false; | |
| 108 | |
| 109 glDeleteTextures(1, &r8_texture); | |
| 110 } | |
| 111 | |
| 112 // Check if R8 GLSL read formats are supported. | |
| 113 // If not supported, r32f is used instead. | |
| 114 { | |
| 115 const char* shader_source = | |
| 116 "layout(r8) restrict writeonly uniform highp image2D g_r8Image; \n" | |
| 117 "void main() \n" | |
| 118 "{ \n" | |
| 119 " imageStore(g_r8Image, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 0.0)); \n" | |
| 120 "} \n"; | |
| 121 | |
| 122 GLuint shader = CreateShader(GL_FRAGMENT_SHADER, "", shader_source); | |
| 123 supports_r8_read_format_ = (shader != 0); | |
| 124 if (shader != 0) { | |
| 125 glDeleteShader(shader); | |
| 126 } | |
| 127 } | |
| 128 | |
| 129 VLOG(1) << "ApplyFramebufferAttachmentCMAAINTEL: " | |
| 130 << "Supports USampler is " << (supports_usampler_ ? "true" : "false"); | |
| 131 VLOG(1) << "ApplyFramebufferAttachmentCMAAINTEL: " | |
| 132 << "Supports R8 Images is " | |
| 133 << (supports_r8_image_ ? "true" : "false"); | |
| 134 VLOG(1) << "ApplyFramebufferAttachmentCMAAINTEL: " | |
| 135 << "Supports R8 Read Format is " | |
| 136 << (supports_r8_read_format_ ? "true" : "false"); | |
| 137 | |
| 138 // Create the shaders | |
| 139 std::ostringstream defines, edge1, edge2, combineEdges, blur, displayEdges; | |
| 140 | |
| 141 if (supports_usampler_) { | |
| 142 defines << "#define SUPPORTS_USAMPLER2D\n"; | |
| 143 } | |
| 144 | |
| 145 if (is_in_gamma_correct_mode_) { | |
| 146 defines << "#define IN_GAMMA_CORRECT_MODE\n"; | |
| 147 } | |
| 148 | |
| 149 if (supports_r8_read_format_) { | |
| 150 defines << "#define EDGE_READ_FORMAT r8\n"; | |
| 151 } else { | |
| 152 defines << "#define EDGE_READ_FORMAT r32f\n"; | |
| 153 } | |
| 154 | |
| 155 displayEdges << defines.str() << "#define DISPLAY_EDGES\n"; | |
| 156 debug_display_edges_shader_ = | |
| 157 CreateProgram(displayEdges.str().c_str(), vert_str_, cmaa_frag_str_); | |
| 158 | |
| 159 edge1 << defines.str() << "#define DETECT_EDGES1\n"; | |
| 160 edges0_shader_ = | |
| 161 CreateProgram(edge1.str().c_str(), vert_str_, cmaa_frag_str_); | |
| 162 | |
| 163 edge2 << defines.str() << "#define DETECT_EDGES2\n"; | |
| 164 edges1_shader_ = | |
| 165 CreateProgram(edge2.str().c_str(), vert_str_, cmaa_frag_str_); | |
| 166 | |
| 167 combineEdges << defines.str() << "#define COMBINE_EDGES\n"; | |
| 168 edges_combine_shader_ = | |
| 169 CreateProgram(combineEdges.str().c_str(), vert_str_, cmaa_frag_str_); | |
| 170 | |
| 171 blur << defines.str() << "#define BLUR_EDGES\n"; | |
| 172 process_and_apply_shader_ = | |
| 173 CreateProgram(blur.str().c_str(), vert_str_, cmaa_frag_str_); | |
| 174 | |
| 175 edges1_shader_result_texture_float4_slot1_ = | |
| 176 glGetUniformLocation(edges0_shader_, "g_resultTextureFlt4Slot1"); | |
| 177 edges1_shader_result_texture_ = | |
| 178 glGetUniformLocation(edges1_shader_, "g_resultTexture"); | |
| 179 edges_combine_shader_result_texture_float4_slot1_ = | |
| 180 glGetUniformLocation(edges_combine_shader_, "g_resultTextureFlt4Slot1"); | |
| 181 edges_combine_shader_result_texture_slot2_ = | |
| 182 glGetUniformLocation(edges_combine_shader_, "g_resultTextureSlot2"); | |
| 183 process_and_apply_shader_result_texture_float4_slot1_ = glGetUniformLocation( | |
| 184 process_and_apply_shader_, "g_resultTextureFlt4Slot1"); | |
| 185 copy_to_image_shader_outTexture_ = | |
| 186 glGetUniformLocation(copy_to_image_shader_, "outTexture"); | |
| 187 | |
| 188 initialized_ = true; | |
| 189 } | |
| 190 | |
| 191 void ApplyFramebufferAttachmentCMAAINTELResourceManager::Destroy() { | |
| 192 if (!initialized_) | |
| 193 return; | |
| 194 | |
| 195 ReleaseTextures(); | |
| 196 | |
| 197 glDeleteProgram(process_and_apply_shader_); | |
| 198 glDeleteProgram(edges_combine_shader_); | |
| 199 glDeleteProgram(edges1_shader_); | |
| 200 glDeleteProgram(edges0_shader_); | |
| 201 glDeleteProgram(debug_display_edges_shader_); | |
| 202 | |
| 203 initialized_ = false; | |
| 204 } | |
| 205 | |
| 206 // Apply CMAA(Conservative Morphological Anti-Aliasing) algorithm to the | |
| 207 // color attachments of currently bound draw framebuffer. | |
| 208 // Reference GL_INTEL_framebuffer_CMAA for details. | |
| 209 void ApplyFramebufferAttachmentCMAAINTELResourceManager:: | |
| 210 ApplyFramebufferAttachmentCMAAINTEL(const gles2::GLES2Decoder* decoder) { | |
| 211 if (!initialized_) | |
| 212 return; | |
| 213 | |
| 214 GLint last_framebuffer = 0; | |
| 215 GLint attachement_type = 0; | |
| 216 GLint source_texture = 0; | |
| 217 GLint texture_level = 0; | |
| 218 GLint width = 0; | |
| 219 GLint height = 0; | |
| 220 GLint internal_format = 0; | |
| 221 GLint max_draw_buffers = 0; | |
| 222 | |
| 223 glGetIntegerv(GL_FRAMEBUFFER_BINDING, &last_framebuffer); | |
| 224 | |
| 225 // Process each color attachment of the current draw framebuffer. | |
| 226 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, last_framebuffer); | |
| 227 glGetIntegerv(GL_MAX_DRAW_BUFFERS, &max_draw_buffers); | |
|
piman
2016/06/13 22:24:43
Here and below: we already have this value, and al
adrian.belgun
2016/06/16 14:53:14
Done, but needed to pass |decoder| without |const|
| |
| 228 for (int i = 0; i < max_draw_buffers; i++) { | |
| 229 glGetFramebufferAttachmentParameterivEXT( | |
| 230 GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + i, | |
| 231 GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE, &attachement_type); | |
| 232 if (attachement_type == GL_TEXTURE) { | |
| 233 // Get the texture width and height. | |
| 234 glGetFramebufferAttachmentParameterivEXT( | |
| 235 GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + i, | |
| 236 GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME, &source_texture); | |
| 237 glGetFramebufferAttachmentParameterivEXT( | |
| 238 GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + i, | |
| 239 GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL, &texture_level); | |
| 240 glBindTexture(GL_TEXTURE_2D, source_texture); | |
| 241 glGetTexLevelParameteriv(GL_TEXTURE_2D, texture_level, GL_TEXTURE_WIDTH, | |
| 242 &width); | |
| 243 glGetTexLevelParameteriv(GL_TEXTURE_2D, texture_level, GL_TEXTURE_HEIGHT, | |
| 244 &height); | |
| 245 glGetTexLevelParameteriv(GL_TEXTURE_2D, texture_level, | |
| 246 GL_TEXTURE_INTERNAL_FORMAT, &internal_format); | |
| 247 | |
| 248 // Resize internal structures - only if needed. | |
| 249 OnSize(width, height); | |
| 250 | |
| 251 // CMAA internally expects GL_RGBA8 textures. | |
| 252 // Process using a GL_RGBA8 copy if this is not the case. | |
| 253 bool do_copy = internal_format != GL_RGBA8; | |
| 254 | |
| 255 // Copy source_texture to rgba8_texture_ | |
| 256 if (do_copy) { | |
| 257 CopyTexture(source_texture, rgba8_texture_, false); | |
| 258 } | |
| 259 | |
| 260 // CMAA Effect | |
| 261 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, last_framebuffer); | |
| 262 if (do_copy) { | |
| 263 ApplyCMMAEffectTexture(1.0f / 13.0f, rgba8_texture_, rgba8_texture_); | |
|
piman
2016/06/13 22:24:43
Can you move this magic value into its own constan
adrian.belgun
2016/06/16 14:53:14
Removed. Was a remnant from an older version. Curr
| |
| 264 } else { | |
| 265 ApplyCMMAEffectTexture(1.0f / 13.0f, source_texture, source_texture); | |
| 266 } | |
| 267 | |
| 268 // Copy rgba8_texture_ to source_texture | |
| 269 if (do_copy) { | |
| 270 // Move source_texture to the first color attachment of the copy fbo. | |
| 271 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, last_framebuffer); | |
| 272 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + i, | |
| 273 GL_TEXTURE_2D, 0, 0); | |
| 274 glBindFramebufferEXT(GL_FRAMEBUFFER, copy_framebuffer_); | |
| 275 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, | |
| 276 GL_TEXTURE_2D, source_texture, 0); | |
| 277 | |
| 278 CopyTexture(rgba8_texture_, source_texture, true); | |
| 279 | |
| 280 // Restore color attachments | |
| 281 glBindFramebufferEXT(GL_FRAMEBUFFER, copy_framebuffer_); | |
| 282 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, | |
| 283 GL_TEXTURE_2D, rgba8_texture_, 0); | |
| 284 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, last_framebuffer); | |
| 285 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + i, | |
| 286 GL_TEXTURE_2D, source_texture, 0); | |
| 287 } | |
| 288 } | |
| 289 } | |
| 290 | |
| 291 // Restore state | |
| 292 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, last_framebuffer); | |
|
piman
2016/06/13 22:24:43
This is redundant with RestoreFramebufferBindings
adrian.belgun
2016/06/16 14:53:14
Done.
| |
| 293 decoder->RestoreAllAttributes(); | |
| 294 decoder->RestoreTextureUnitBindings(0); | |
| 295 decoder->RestoreActiveTexture(); | |
| 296 decoder->RestoreProgramBindings(); | |
| 297 decoder->RestoreBufferBindings(); | |
| 298 decoder->RestoreFramebufferBindings(); | |
| 299 decoder->RestoreGlobalState(); | |
| 300 } | |
| 301 | |
| 302 void ApplyFramebufferAttachmentCMAAINTELResourceManager::ApplyCMMAEffectTexture( | |
| 303 float edge_detection_threshold, | |
| 304 GLuint source_texture, | |
| 305 GLuint dest_texture) { | |
| 306 frame_id_++; | |
| 307 | |
| 308 GLuint edge_texture_a; | |
| 309 GLuint edge_texture_b; | |
| 310 | |
| 311 // Flip flop - One pass clears the texture that needs clearing for the other | |
| 312 // one (actually it's only important that it clears the highest bit) | |
| 313 if ((frame_id_ % 2) == 0) { | |
| 314 edge_texture_a = edges0_texture_; | |
| 315 edge_texture_b = edges1_texture_; | |
| 316 } else { | |
| 317 edge_texture_a = edges1_texture_; | |
| 318 edge_texture_b = edges0_texture_; | |
| 319 } | |
| 320 | |
| 321 // Setup the main fbo | |
| 322 glBindFramebufferEXT(GL_FRAMEBUFFER, cmaa_framebuffer_); | |
| 323 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, | |
| 324 mini4_edge_texture_, 0); | |
| 325 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, | |
| 326 mini4_edge_depth_texture_, 0); | |
| 327 #if DCHECK_IS_ON() | |
| 328 GLenum status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER); | |
| 329 if (status != GL_FRAMEBUFFER_COMPLETE) { | |
| 330 DLOG(ERROR) << "ApplyFramebufferAttachmentCMAAINTEL: " | |
| 331 << "Incomplete framebuffer."; | |
| 332 Destroy(); | |
| 333 return; | |
| 334 } | |
| 335 #endif | |
| 336 | |
| 337 // Setup the viewport to match the fbo | |
| 338 glViewport(0, 0, (int)((width_ + 1) / 2), (int)((height_ + 1) / 2)); | |
|
piman
2016/06/13 22:24:43
nit: no need for (int)
adrian.belgun
2016/06/16 14:53:14
Done.
| |
| 339 glEnable(GL_DEPTH_TEST); | |
| 340 | |
| 341 // Detect edges Pass 0 | |
| 342 // - For every pixel detect edges to the right and down and output depth | |
| 343 // mask where edges detected (1 - far, for detected, 0-near for empty | |
| 344 // pixels) | |
| 345 | |
| 346 // Inputs | |
| 347 // g_screenTexture source_texture tex0 | |
| 348 // Outputs | |
| 349 // gl_FragDepth mini4_edge_depth_texture_ fbo.depth | |
| 350 // out uvec4 outEdges mini4_edge_texture_ fbo.col | |
| 351 // image2D g_resultTextureFlt4Slot1 working_color_texture_ image1 | |
| 352 GLenum edge_format = supports_r8_image_ ? GL_R8 : GL_R32F; | |
| 353 | |
| 354 { | |
| 355 glUseProgram(edges0_shader_); | |
| 356 glUniform1f(0, 1.0f); | |
| 357 glUniform2f(1, 1.0f / width_, 1.0f / height_); | |
| 358 glDepthMask(GL_TRUE); | |
| 359 glDepthFunc(GL_ALWAYS); | |
| 360 glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); | |
| 361 | |
| 362 if (!is_gles31_compatible) { | |
| 363 glUniform1i(edges1_shader_result_texture_float4_slot1_, 1); | |
| 364 } | |
| 365 glBindImageTextureEXT(1, working_color_texture_, 0, GL_FALSE, 0, | |
| 366 GL_WRITE_ONLY, GL_RGBA8); | |
| 367 | |
| 368 glActiveTexture(GL_TEXTURE0); | |
| 369 glBindTexture(GL_TEXTURE_2D, source_texture); | |
| 370 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); | |
| 371 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | |
| 372 | |
| 373 glDrawArrays(GL_TRIANGLES, 0, 3); | |
| 374 } | |
| 375 | |
| 376 // Detect edges Pass 1 (finish the previous pass edge processing). | |
| 377 // Do the culling of non-dominant local edges (leave mainly locally dominant | |
| 378 // edges) and merge Right and Bottom edges into TopRightBottomLeft | |
| 379 | |
| 380 // Inputs | |
| 381 // g_src0Texture4Uint mini4_edge_texture_ tex1 | |
| 382 // Outputs | |
| 383 // image2D g_resultTexture edge_texture_b image0 | |
| 384 { | |
| 385 glUseProgram(edges1_shader_); | |
| 386 glUniform1f(0, 0.0f); | |
| 387 glUniform2f(1, 1.0f / width_, 1.0f / height_); | |
| 388 glDepthMask(GL_FALSE); | |
| 389 glDepthFunc(GL_LESS); | |
| 390 glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); | |
| 391 | |
| 392 if (!is_gles31_compatible) { | |
| 393 glUniform1i(edges1_shader_result_texture_, 0); | |
| 394 } | |
| 395 glBindImageTextureEXT(0, edge_texture_b, 0, GL_FALSE, 0, GL_WRITE_ONLY, | |
| 396 edge_format); | |
| 397 | |
| 398 glActiveTexture(GL_TEXTURE1); | |
| 399 glBindTexture(GL_TEXTURE_2D, mini4_edge_texture_); | |
| 400 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); | |
| 401 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); | |
| 402 | |
| 403 glDrawArrays(GL_TRIANGLES, 0, 3); | |
| 404 } | |
| 405 | |
| 406 // - Combine RightBottom (.xy) edges from previous pass into | |
| 407 // RightBottomLeftTop (.xyzw) edges and output it into the mask (have to | |
| 408 // fill in the whole buffer including empty ones for the line length | |
| 409 // detection to work correctly). | |
| 410 // - On all pixels with any edge, input buffer into a temporary color buffer | |
| 411 // needed for correct blending in the next pass (other pixels not needed | |
| 412 // so not copied to avoid bandwidth use). | |
| 413 // - On all pixels with 2 or more edges output positive depth mask for the | |
| 414 // next pass. | |
| 415 | |
| 416 // Inputs | |
| 417 // g_src0TextureFlt edge_texture_b tex1 //ps | |
| 418 // Outputs | |
| 419 // image2D g_resultTextureSlot2 edge_texture_a image2 | |
| 420 // gl_FragDepth mini4_edge_texture_ fbo.depth | |
| 421 { | |
| 422 // Combine edges: each pixel will now contain info on all (top, right, | |
| 423 // bottom, left) edges; also create depth mask as above depth and mark | |
| 424 // potential Z sAND also copy source color data but only on edge pixels | |
| 425 glUseProgram(edges_combine_shader_); | |
| 426 glUniform1f(0, 1.0f); | |
| 427 glUniform2f(1, 1.0f / width_, 1.0f / height_); | |
| 428 glDepthMask(GL_TRUE); | |
| 429 glDepthFunc(GL_ALWAYS); | |
| 430 glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); | |
| 431 | |
| 432 if (!is_gles31_compatible) { | |
| 433 glUniform1i(edges_combine_shader_result_texture_float4_slot1_, 1); | |
| 434 glUniform1i(edges_combine_shader_result_texture_slot2_, 2); | |
| 435 } | |
| 436 glBindImageTextureEXT(1, dest_texture, 0, GL_FALSE, 0, GL_WRITE_ONLY, | |
| 437 GL_RGBA8); | |
| 438 glBindImageTextureEXT(2, edge_texture_a, 0, GL_FALSE, 0, GL_WRITE_ONLY, | |
| 439 edge_format); | |
| 440 | |
| 441 glActiveTexture(GL_TEXTURE1); | |
| 442 glBindTexture(GL_TEXTURE_2D, edge_texture_b); | |
| 443 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); | |
| 444 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); | |
| 445 | |
| 446 glDrawArrays(GL_TRIANGLES, 0, 3); | |
| 447 } | |
| 448 | |
| 449 // Using depth mask and [earlydepthstencil] to work on pixels with 2, 3, 4 | |
| 450 // edges: | |
| 451 // - First blend simple blur map for 2,3,4 edge pixels | |
| 452 // - Then do the lines (line length counter -should- guarantee no overlap | |
| 453 // with other pixels - pixels with 1 edge are excluded in the previous | |
| 454 // pass and the pixels with 2 parallel edges are excluded in the simple | |
| 455 // blur) | |
| 456 | |
| 457 // Inputs | |
| 458 // g_screenTexture working_color_texture_ tex0 | |
| 459 // g_src0TextureFlt edge_texture_a tex1 //ps | |
| 460 // sampled | |
| 461 // Outputs | |
| 462 // g_resultTextureFlt4Slot1 dest_texture image1 | |
| 463 // gl_FragDepth mini4_edge_texture_ fbo.depth | |
| 464 { | |
| 465 glUseProgram(process_and_apply_shader_); | |
| 466 glUniform1f(0, 0.0f); | |
| 467 glUniform2f(1, 1.0f / width_, 1.0f / height_); | |
| 468 glDepthMask(GL_FALSE); | |
| 469 glDepthFunc(GL_LESS); | |
| 470 glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); | |
| 471 | |
| 472 if (!is_gles31_compatible) { | |
| 473 glUniform1i(process_and_apply_shader_result_texture_float4_slot1_, 1); | |
| 474 } | |
| 475 glBindImageTextureEXT(1, dest_texture, 0, GL_FALSE, 0, GL_WRITE_ONLY, | |
| 476 GL_RGBA8); | |
| 477 | |
| 478 glActiveTexture(GL_TEXTURE0); | |
| 479 glBindTexture(GL_TEXTURE_2D, working_color_texture_); | |
| 480 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); | |
| 481 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | |
| 482 | |
| 483 glActiveTexture(GL_TEXTURE1); | |
| 484 glBindTexture(GL_TEXTURE_2D, edge_texture_a); | |
| 485 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); | |
| 486 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); | |
| 487 | |
| 488 glDrawArrays(GL_TRIANGLES, 0, 3); | |
| 489 } | |
| 490 | |
| 491 glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); | |
| 492 glDisable(GL_DEPTH_TEST); | |
| 493 glDepthMask(GL_FALSE); | |
| 494 glActiveTexture(GL_TEXTURE0); | |
| 495 } | |
| 496 | |
| 497 void ApplyFramebufferAttachmentCMAAINTELResourceManager::OnSize(GLint width, | |
| 498 GLint height) { | |
| 499 if (height_ == height && width_ == width) | |
| 500 return; | |
| 501 | |
| 502 ReleaseTextures(); | |
| 503 | |
| 504 height_ = height; | |
| 505 width_ = width; | |
| 506 | |
| 507 glGenTextures(1, &rgba8_texture_); | |
| 508 glBindTexture(GL_TEXTURE_2D, rgba8_texture_); | |
| 509 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_RGBA8, width, height); | |
| 510 | |
| 511 // Edges texture - R8 | |
| 512 // OpenGLES has no single component 8/16-bit image support, so needs to be R32 | |
| 513 // Although CHT does support R8. | |
| 514 GLenum edge_format = supports_r8_image_ ? GL_R8 : GL_R32F; | |
| 515 glGenTextures(1, &edges0_texture_); | |
| 516 glBindTexture(GL_TEXTURE_2D, edges0_texture_); | |
| 517 glTexStorage2DEXT(GL_TEXTURE_2D, 1, edge_format, width, height); | |
| 518 | |
| 519 glGenTextures(1, &edges1_texture_); | |
| 520 glBindTexture(GL_TEXTURE_2D, edges1_texture_); | |
| 521 glTexStorage2DEXT(GL_TEXTURE_2D, 1, edge_format, width, height); | |
| 522 | |
| 523 // Color working texture - RGBA8 | |
| 524 glGenTextures(1, &working_color_texture_); | |
| 525 glBindTexture(GL_TEXTURE_2D, working_color_texture_); | |
| 526 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_RGBA8, width, height); | |
| 527 | |
| 528 // Half*half compressed 4-edge-per-pixel texture - RGBA8 | |
| 529 glGenTextures(1, &mini4_edge_texture_); | |
| 530 glBindTexture(GL_TEXTURE_2D, mini4_edge_texture_); | |
| 531 GLenum format = GL_RGBA8UI; | |
| 532 if (!supports_usampler_) { | |
| 533 format = GL_RGBA8; | |
| 534 } | |
| 535 glTexStorage2DEXT(GL_TEXTURE_2D, 1, format, (width + 1) / 2, | |
| 536 (height + 1) / 2); | |
| 537 | |
| 538 // Depth | |
| 539 glGenTextures(1, &mini4_edge_depth_texture_); | |
| 540 glBindTexture(GL_TEXTURE_2D, mini4_edge_depth_texture_); | |
| 541 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_DEPTH_COMPONENT16, (width + 1) / 2, | |
| 542 (height + 1) / 2); | |
| 543 | |
| 544 // Create the FBO | |
| 545 glGenFramebuffersEXT(1, &cmaa_framebuffer_); | |
| 546 glBindFramebufferEXT(GL_FRAMEBUFFER, cmaa_framebuffer_); | |
| 547 | |
| 548 // We need to clear the textures before they are first used. | |
| 549 // The algorithm self-clears them later. | |
| 550 glViewport(0, 0, (int)width_, (int)height_); | |
|
piman
2016/06/13 22:24:43
nit: remove (int)
adrian.belgun
2016/06/16 14:53:14
Done.
| |
| 551 glClearColor(0.0f, 0.0f, 0.0f, 0.0f); | |
| 552 | |
| 553 glBindFramebufferEXT(GL_FRAMEBUFFER, cmaa_framebuffer_); | |
| 554 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, | |
| 555 edges0_texture_, 0); | |
| 556 glClear(GL_COLOR_BUFFER_BIT); | |
| 557 | |
| 558 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, | |
| 559 edges1_texture_, 0); | |
| 560 glClear(GL_COLOR_BUFFER_BIT); | |
| 561 | |
| 562 textures_initialized_ = true; | |
| 563 } | |
| 564 | |
| 565 void ApplyFramebufferAttachmentCMAAINTELResourceManager::ReleaseTextures() { | |
| 566 if (textures_initialized_) { | |
| 567 glDeleteFramebuffersEXT(1, ©_framebuffer_); | |
|
piman
2016/06/13 22:24:43
copy_framebuffer_ and rgba8_texture_ are always cr
adrian.belgun
2016/06/16 14:53:14
Done. Moved copy_framebuffer_ and rgba8_texture_ i
| |
| 568 glDeleteFramebuffersEXT(1, &cmaa_framebuffer_); | |
| 569 glDeleteTextures(1, &rgba8_texture_); | |
| 570 glDeleteTextures(1, &edges0_texture_); | |
| 571 glDeleteTextures(1, &edges1_texture_); | |
| 572 glDeleteTextures(1, &mini4_edge_texture_); | |
| 573 glDeleteTextures(1, &mini4_edge_depth_texture_); | |
| 574 glDeleteTextures(1, &working_color_texture_); | |
| 575 } | |
| 576 textures_initialized_ = false; | |
| 577 } | |
| 578 | |
| 579 void ApplyFramebufferAttachmentCMAAINTELResourceManager::CopyTexture( | |
| 580 GLint source, | |
| 581 GLint dest, | |
| 582 bool via_fbo) { | |
| 583 glViewport(0, 0, width_, height_); | |
| 584 glActiveTexture(GL_TEXTURE0); | |
| 585 glBindTexture(GL_TEXTURE_2D, source); | |
| 586 | |
| 587 if (!via_fbo) { | |
| 588 glUseProgram(copy_to_image_shader_); | |
| 589 if (!is_gles31_compatible) { | |
| 590 glUniform1i(copy_to_image_shader_outTexture_, 0); | |
| 591 } | |
| 592 glBindImageTextureEXT(0, dest, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8); | |
| 593 } else { | |
| 594 glDisable(GL_BLEND); | |
|
piman
2016/06/13 22:24:43
What about other state such as color mask, scissor
adrian.belgun
2016/06/16 14:53:14
Done. Added remaining state flags.
| |
| 595 glUseProgram(copy_to_framebuffer_shader_); | |
| 596 } | |
| 597 | |
| 598 glDrawArrays(GL_TRIANGLES, 0, 3); | |
| 599 glUseProgram(0); | |
| 600 glBindTexture(GL_TEXTURE_2D, 0); | |
| 601 } | |
| 602 | |
| 603 GLuint ApplyFramebufferAttachmentCMAAINTELResourceManager::CreateProgram( | |
| 604 const char* defines, | |
| 605 const char* vs_source, | |
| 606 const char* fs_source) { | |
| 607 GLuint program = glCreateProgram(); | |
| 608 | |
| 609 GLuint vs = CreateShader(GL_VERTEX_SHADER, defines, vs_source); | |
| 610 GLuint fs = CreateShader(GL_FRAGMENT_SHADER, defines, fs_source); | |
| 611 | |
| 612 glAttachShader(program, vs); | |
| 613 glDeleteShader(vs); | |
| 614 glAttachShader(program, fs); | |
| 615 glDeleteShader(fs); | |
| 616 | |
| 617 glLinkProgram(program); | |
| 618 GLint linkStatus; | |
|
piman
2016/06/13 22:24:43
nit: link_status
adrian.belgun
2016/06/16 14:53:14
Done.
| |
| 619 glGetProgramiv(program, GL_LINK_STATUS, &linkStatus); | |
| 620 | |
| 621 if (linkStatus == 0) { | |
| 622 #if DCHECK_IS_ON() | |
| 623 GLint infoLogLength; | |
|
piman
2016/06/13 22:24:43
nit: info_log_length
adrian.belgun
2016/06/16 14:53:14
Done.
| |
| 624 glGetProgramiv(program, GL_INFO_LOG_LENGTH, &infoLogLength); | |
| 625 std::vector<GLchar> infoLog(infoLogLength); | |
|
piman
2016/06/13 22:24:43
nit: info_log
adrian.belgun
2016/06/16 14:53:14
Done.
| |
| 626 glGetProgramInfoLog(program, static_cast<GLsizei>(infoLog.size()), NULL, | |
| 627 &infoLog[0]); | |
| 628 DLOG(ERROR) << "ApplyFramebufferAttachmentCMAAINTEL: " | |
| 629 << "program link failed: " << &infoLog[0]; | |
| 630 #endif | |
| 631 glDeleteProgram(program); | |
| 632 program = 0; | |
| 633 } | |
| 634 | |
| 635 return program; | |
| 636 } | |
| 637 | |
| 638 GLuint ApplyFramebufferAttachmentCMAAINTELResourceManager::CreateShader( | |
| 639 GLenum type, | |
| 640 const char* defines, | |
| 641 const char* source) { | |
| 642 GLuint shader = glCreateShader(type); | |
| 643 | |
| 644 const char* header_es31 = | |
|
piman
2016/06/13 22:24:43
nit: const char header_es31[]
adrian.belgun
2016/06/16 14:53:14
Done.
| |
| 645 "#version 310 es \n"; | |
| 646 const char* header_gl30 = | |
|
piman
2016/06/13 22:24:43
nit: const char header_gl30[]
adrian.belgun
2016/06/16 14:53:14
Done.
| |
| 647 "#version 130 \n" | |
| 648 "#extension GL_ARB_shading_language_420pack : require \n" | |
| 649 "#extension GL_ARB_texture_gather : require \n" | |
| 650 "#extension GL_ARB_explicit_uniform_location : require \n" | |
| 651 "#extension GL_ARB_explicit_attrib_location : require \n" | |
| 652 "#extension GL_ARB_shader_image_load_store : require \n"; | |
| 653 | |
| 654 const char* header = NULL; | |
| 655 if (is_gles31_compatible) { | |
| 656 header = header_es31; | |
| 657 } else { | |
| 658 header = header_gl30; | |
| 659 } | |
| 660 | |
| 661 const char* sourceArray[4] = {header, defines, "\n", source}; | |
|
piman
2016/06/13 22:24:43
nit: source_array
adrian.belgun
2016/06/16 14:53:14
Done.
| |
| 662 glShaderSource(shader, 4, sourceArray, NULL); | |
| 663 | |
| 664 glCompileShader(shader); | |
| 665 | |
| 666 GLint compileResult; | |
|
piman
2016/06/13 22:24:43
nit: compile_result
adrian.belgun
2016/06/16 14:53:14
Done.
| |
| 667 glGetShaderiv(shader, GL_COMPILE_STATUS, &compileResult); | |
| 668 if (compileResult == 0) { | |
| 669 #if DCHECK_IS_ON() | |
| 670 GLint infoLogLength; | |
|
piman
2016/06/13 22:24:43
nit: info_log_length
adrian.belgun
2016/06/16 14:53:14
Done.
| |
| 671 glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &infoLogLength); | |
| 672 std::vector<GLchar> infoLog(infoLogLength); | |
|
piman
2016/06/13 22:24:43
nit: info_log
adrian.belgun
2016/06/16 14:53:14
Done.
| |
| 673 glGetShaderInfoLog(shader, static_cast<GLsizei>(infoLog.size()), NULL, | |
| 674 &infoLog[0]); | |
| 675 DLOG(ERROR) << "ApplyFramebufferAttachmentCMAAINTEL: " | |
| 676 << "shader compilation failed: " | |
| 677 << (type == GL_VERTEX_SHADER | |
| 678 ? "GL_VERTEX_SHADER" | |
| 679 : (type == GL_FRAGMENT_SHADER ? "GL_FRAGMENT_SHADER" | |
| 680 : "UNKNOWN_SHADER")) | |
| 681 << " shader compilation failed: " << &infoLog[0]; | |
| 682 #endif | |
| 683 glDeleteShader(shader); | |
| 684 shader = 0; | |
| 685 } | |
| 686 | |
| 687 return shader; | |
| 688 } | |
| 689 | |
| 690 // Shaders used in the CMAA algorithm. | |
| 691 const char* ApplyFramebufferAttachmentCMAAINTELResourceManager::vert_str_ = | |
| 692 "precision highp float; \n" | |
| 693 "layout(location = 0) uniform float g_Depth; \n" | |
| 694 "// No input data. \n" | |
| 695 "// Verts are autogenerated. \n" | |
| 696 "// \n" | |
| 697 "// vertexID 0,1,2 should generate \n" | |
| 698 "// POS: (-1,-1), (+3,-1), (-1,+3) \n" | |
| 699 "// \n" | |
| 700 "// This generates a triangle that completely covers the -1->1 viewport \n" | |
| 701 "// \n" | |
| 702 "void main() \n" | |
| 703 "{ \n" | |
| 704 " float x = -1.0 + float((gl_VertexID & 1) << 2); \n" | |
| 705 " float y = -1.0 + float((gl_VertexID & 2) << 1); \n" | |
| 706 " gl_Position = vec4(x, y, g_Depth, 1.0); \n" | |
| 707 "} \n" | |
| 708 " \n"; | |
| 709 | |
| 710 const char* ApplyFramebufferAttachmentCMAAINTELResourceManager::cmaa_frag_str_ = | |
| 711 "precision highp float; \n" | |
| 712 "precision highp int; \n" | |
| 713 " \n" | |
| 714 "#define SETTINGS_ALLOW_SHORT_Zs 1 \n" | |
| 715 "#define EDGE_DETECT_THRESHOLD 13.0f \n" | |
| 716 " \n" | |
| 717 "#define saturate(x) clamp((x), 0.0, 1.0) \n" | |
| 718 " \n" | |
| 719 "// bind to location 0 \n" | |
| 720 "layout(location = 0) uniform float g_Depth; \n" | |
| 721 "// bind to a uniform buffer bind point 0 \n" | |
| 722 "layout(location = 1) uniform vec2 g_OneOverScreenSize; \n" | |
| 723 "#ifndef EDGE_DETECT_THRESHOLD \n" | |
| 724 "layout(location = 2) uniform float g_ColorThreshold; \n" | |
| 725 "#endif \n" | |
| 726 " \n" | |
| 727 "#ifdef SUPPORTS_USAMPLER2D \n" | |
| 728 "#define USAMPLER usampler2D \n" | |
| 729 "#define UVEC4 uvec4 \n" | |
| 730 "#define LOAD_UINT(arg) arg \n" | |
| 731 "#define STORE_UVEC4(arg) arg \n" | |
| 732 "#else \n" | |
| 733 "#define USAMPLER sampler2D \n" | |
| 734 "#define UVEC4 vec4 \n" | |
| 735 "#define LOAD_UINT(arg) uint(arg * 255.0f) \n" | |
| 736 "#define STORE_UVEC4(arg) vec4(float(arg.x) / 255.0f, \\\n" | |
| 737 " float(arg.y) / 255.0f, \\\n" | |
| 738 " float(arg.z) / 255.0f, \\\n" | |
| 739 " float(arg.w) / 255.0f) \n" | |
| 740 "#endif \n" | |
| 741 " \n" | |
| 742 "// bind to texture stage 0/1 \n" | |
| 743 "layout(binding = 0) uniform highp sampler2D g_screenTexture; \n" | |
| 744 "layout(binding = 1) uniform highp sampler2D g_src0TextureFlt; \n" | |
| 745 "layout(binding = 1) uniform highp USAMPLER g_src0Texture4Uint; \n" | |
| 746 " \n" | |
| 747 "// bind to image stage 0/1/2 \n" | |
| 748 "#ifdef GL_ES \n" | |
| 749 "layout(binding = 0, EDGE_READ_FORMAT) restrict writeonly uniform highp \n" | |
| 750 " image2D g_resultTexture; \n" | |
| 751 "layout(binding = 1, rgba8) restrict writeonly uniform highp \n" | |
| 752 " image2D g_resultTextureFlt4Slot1; \n" | |
| 753 "layout(binding = 2, EDGE_READ_FORMAT) restrict writeonly uniform highp \n" | |
| 754 " image2D g_resultTextureSlot2; \n" | |
| 755 "#else \n" | |
| 756 "layout(EDGE_READ_FORMAT) restrict writeonly uniform highp \n" | |
| 757 " image2D g_resultTexture; \n" | |
| 758 "layout(rgba8) restrict writeonly uniform highp \n" | |
| 759 " image2D g_resultTextureFlt4Slot1; \n" | |
| 760 "layout(EDGE_READ_FORMAT) restrict writeonly uniform highp \n" | |
| 761 " image2D g_resultTextureSlot2; \n" | |
| 762 "#endif \n" | |
| 763 " \n" | |
| 764 "// Constants \n" | |
| 765 "const vec4 c_lumWeights = vec4(0.2126f, 0.7152f, 0.0722f, 0.0000f); \n" | |
| 766 " \n" | |
| 767 "#ifdef EDGE_DETECT_THRESHOLD \n" | |
| 768 "const float c_ColorThreshold = 1.0f / EDGE_DETECT_THRESHOLD; \n" | |
| 769 "#endif \n" | |
| 770 " \n" | |
| 771 "// Must be even number; Will work with ~16 pretty good too for \n" | |
| 772 "// additional performance, or with ~64 for highest quality. \n" | |
| 773 "const int c_maxLineLength = 64; \n" | |
| 774 " \n" | |
| 775 "const vec4 c_edgeDebugColours[5] = vec4[5](vec4(0.5, 0.5, 0.5, 0.4), \n" | |
| 776 " vec4(1.0, 0.1, 1.0, 0.8), \n" | |
| 777 " vec4(0.9, 0.0, 0.0, 0.8), \n" | |
| 778 " vec4(0.0, 0.9, 0.0, 0.8), \n" | |
| 779 " vec4(0.0, 0.0, 0.9, 0.8)); \n" | |
| 780 " \n" | |
| 781 "// this isn't needed if colour UAV is _SRGB but that doesn't work \n" | |
| 782 "// everywhere \n" | |
| 783 "#ifdef IN_GAMMA_CORRECT_MODE \n" | |
| 784 "///////////////////////////////////////////////////////////////////////\n" | |
| 785 "// \n" | |
| 786 "// SRGB Helper Functions taken from D3DX_DXGIFormatConvert.inl \n" | |
| 787 "float D3DX_FLOAT_to_SRGB(float val) { \n" | |
| 788 " if (val < 0.0031308f) \n" | |
| 789 " val *= 12.92f; \n" | |
| 790 " else { \n" | |
| 791 " val = 1.055f * pow(val, 1.0f / 2.4f) - 0.055f; \n" | |
| 792 " } \n" | |
| 793 " return val; \n" | |
| 794 "} \n" | |
| 795 "// \n" | |
| 796 "vec3 D3DX_FLOAT3_to_SRGB(vec3 val) { \n" | |
| 797 " vec3 outVal; \n" | |
| 798 " outVal.x = D3DX_FLOAT_to_SRGB(val.x); \n" | |
| 799 " outVal.y = D3DX_FLOAT_to_SRGB(val.y); \n" | |
| 800 " outVal.z = D3DX_FLOAT_to_SRGB(val.z); \n" | |
| 801 " return outVal; \n" | |
| 802 "} \n" | |
| 803 "// \n" | |
| 804 "///////////////////////////////////////////////////////////////////////\n" | |
| 805 "#endif // IN_GAMMA_CORRECT_MODE \n" | |
| 806 " \n" | |
| 807 "// how .rgba channels from the edge texture maps to pixel edges: \n" | |
| 808 "// \n" | |
| 809 "// A - 0x08 \n" | |
| 810 "// |¯¯¯¯¯¯¯¯¯| \n" | |
| 811 "// | | \n" | |
| 812 "// 0x04 - B | pixel | R - 0x01 \n" | |
| 813 "// | | \n" | |
| 814 "// |_________| \n" | |
| 815 "// G - 0x02 \n" | |
| 816 "// \n" | |
| 817 "// (A - there's an edge between us and a pixel above us) \n" | |
| 818 "// (R - there's an edge between us and a pixel to the right) \n" | |
| 819 "// (G - there's an edge between us and a pixel at the bottom) \n" | |
| 820 "// (B - there's an edge between us and a pixel to the left) \n" | |
| 821 " \n" | |
| 822 "// Expecting values of 1 and 0 only! \n" | |
| 823 "uint PackEdge(uvec4 edges) { \n" | |
| 824 " return (edges.x << 0u) | (edges.y << 1u) | (edges.z << 2u) | \n" | |
| 825 " (edges.w << 3u); \n" | |
| 826 "} \n" | |
| 827 " \n" | |
| 828 "uvec4 UnpackEdge(uint value) { \n" | |
| 829 " uvec4 ret; \n" | |
| 830 " ret.x = (value & 0x01u) != 0u ? 1u : 0u; \n" | |
| 831 " ret.y = (value & 0x02u) != 0u ? 1u : 0u; \n" | |
| 832 " ret.z = (value & 0x04u) != 0u ? 1u : 0u; \n" | |
| 833 " ret.w = (value & 0x08u) != 0u ? 1u : 0u; \n" | |
| 834 " return ret; \n" | |
| 835 "} \n" | |
| 836 " \n" | |
| 837 "uint PackZ(const uvec2 screenPos, const bool invertedZShape) { \n" | |
| 838 " uint retVal = screenPos.x | (screenPos.y << 15u); \n" | |
| 839 " if (invertedZShape) \n" | |
| 840 " retVal |= (1u << 30u); \n" | |
| 841 " return retVal; \n" | |
| 842 "} \n" | |
| 843 " \n" | |
| 844 "void UnpackZ(uint packedZ, out uvec2 screenPos, \n" | |
| 845 " out bool invertedZShape) \n" | |
| 846 "{ \n" | |
| 847 " screenPos.x = packedZ & 0x7FFFu; \n" | |
| 848 " screenPos.y = (packedZ >> 15u) & 0x7FFFu; \n" | |
| 849 " invertedZShape = (packedZ >> 30u) == 1u; \n" | |
| 850 "} \n" | |
| 851 " \n" | |
| 852 "uint PackZ(const uvec2 screenPos, \n" | |
| 853 " const bool invertedZShape, \n" | |
| 854 " const bool horizontal) { \n" | |
| 855 " uint retVal = screenPos.x | (screenPos.y << 15u); \n" | |
| 856 " if (invertedZShape) \n" | |
| 857 " retVal |= (1u << 30u); \n" | |
| 858 " if (horizontal) \n" | |
| 859 " retVal |= (1u << 31u); \n" | |
| 860 " return retVal; \n" | |
| 861 "} \n" | |
| 862 " \n" | |
| 863 "void UnpackZ(uint packedZ, \n" | |
| 864 " out uvec2 screenPos, \n" | |
| 865 " out bool invertedZShape, \n" | |
| 866 " out bool horizontal) { \n" | |
| 867 " screenPos.x = packedZ & 0x7FFFu; \n" | |
| 868 " screenPos.y = (packedZ >> 15u) & 0x7FFFu; \n" | |
| 869 " invertedZShape = (packedZ & (1u << 30u)) != 0u; \n" | |
| 870 " horizontal = (packedZ & (1u << 31u)) != 0u; \n" | |
| 871 "} \n" | |
| 872 " \n" | |
| 873 "vec4 PackBlurAAInfo(ivec2 pixelPos, uint shapeType) { \n" | |
| 874 " uint packedEdges = uint( \n" | |
| 875 " texelFetch(g_src0TextureFlt, pixelPos, 0).r * 255.5); \n" | |
| 876 " \n" | |
| 877 " float retval = float(packedEdges + (shapeType << 4u)); \n" | |
| 878 " \n" | |
| 879 " return vec4(retval / 255.0); \n" | |
| 880 "} \n" | |
| 881 " \n" | |
| 882 "void UnpackBlurAAInfo(float packedValue, out uint edges, \n" | |
| 883 " out uint shapeType) { \n" | |
| 884 " uint packedValueInt = uint(packedValue * 255.5); \n" | |
| 885 " edges = packedValueInt & 0xFu; \n" | |
| 886 " shapeType = packedValueInt >> 4u; \n" | |
| 887 "} \n" | |
| 888 " \n" | |
| 889 "float EdgeDetectColorCalcDiff(vec3 colorA, vec3 colorB) { \n" | |
| 890 "#ifdef IN_BGR_MODE \n" | |
| 891 " vec3 LumWeights = c_lumWeights.bgr; \n" | |
| 892 "#else \n" | |
| 893 " vec3 LumWeights = c_lumWeights.rgb; \n" | |
| 894 "#endif \n" | |
| 895 " \n" | |
| 896 " return dot(abs(colorA.rgb - colorB.rgb), LumWeights); \n" | |
| 897 "} \n" | |
| 898 " \n" | |
| 899 "bool EdgeDetectColor(vec3 colorA, vec3 colorB) { \n" | |
| 900 "#ifdef EDGE_DETECT_THRESHOLD \n" | |
| 901 " return EdgeDetectColorCalcDiff(colorA, colorB) > c_ColorThreshold; \n" | |
| 902 "#else \n" | |
| 903 " return EdgeDetectColorCalcDiff(colorA, colorB) > g_ColorThreshold; \n" | |
| 904 "#endif \n" | |
| 905 "} \n" | |
| 906 " \n" | |
| 907 "void FindLineLength(out int lineLengthLeft, \n" | |
| 908 " out int lineLengthRight, \n" | |
| 909 " ivec2 screenPos, \n" | |
| 910 " const bool horizontal, \n" | |
| 911 " const bool invertedZShape, \n" | |
| 912 " const ivec2 stepRight) { \n" | |
| 913 " // TODO: there must be a cleaner and faster way to get to these - \n" | |
| 914 " // a precalculated array indexing maybe? \n" | |
| 915 " uint maskLeft, bitsContinueLeft, maskRight, bitsContinueRight; \n" | |
| 916 " { \n" | |
| 917 " // Horizontal (vertical is the same, just rotated 90º \n" | |
| 918 " // counter-clockwise) \n" | |
| 919 " // Inverted Z case: // Normal Z case: \n" | |
| 920 " // __ // __ \n" | |
| 921 " // X| // X| \n" | |
| 922 " // -- // -- \n" | |
| 923 " // \n" | |
| 924 " uint maskTraceLeft, maskTraceRight; \n" | |
| 925 " uint maskStopLeft, maskStopRight; \n" | |
| 926 " if (horizontal) { \n" | |
| 927 " if (invertedZShape) { \n" | |
| 928 " maskTraceLeft = 0x02u; // tracing bottom edge \n" | |
| 929 " maskTraceRight = 0x08u; // tracing top edge \n" | |
| 930 " } else { \n" | |
| 931 " maskTraceLeft = 0x08u; // tracing top edge \n" | |
| 932 " maskTraceRight = 0x02u; // tracing bottom edge \n" | |
| 933 " } \n" | |
| 934 " maskStopLeft = 0x01u; // stop on right edge \n" | |
| 935 " maskStopRight = 0x04u; // stop on left edge \n" | |
| 936 " } else { \n" | |
| 937 " if (invertedZShape) { \n" | |
| 938 " maskTraceLeft = 0x01u; // tracing right edge \n" | |
| 939 " maskTraceRight = 0x04u; // tracing left edge \n" | |
| 940 " } else { \n" | |
| 941 " maskTraceLeft = 0x04u; // tracing left edge \n" | |
| 942 " maskTraceRight = 0x01u; // tracing right edge \n" | |
| 943 " } \n" | |
| 944 " maskStopLeft = 0x08u; // stop on top edge \n" | |
| 945 " maskStopRight = 0x02u; // stop on bottom edge \n" | |
| 946 " } \n" | |
| 947 " \n" | |
| 948 " maskLeft = maskTraceLeft | maskStopLeft; \n" | |
| 949 " bitsContinueLeft = maskTraceLeft; \n" | |
| 950 " maskRight = maskTraceRight | maskStopRight; \n" | |
| 951 " bitsContinueRight = maskTraceRight; \n" | |
| 952 " } \n" | |
| 953 "///////////////////////////////////////////////////////////////////////\n" | |
| 954 " \n" | |
| 955 "#ifdef SETTINGS_ALLOW_SHORT_Zs \n" | |
| 956 " int i = 1; \n" | |
| 957 "#else \n" | |
| 958 " int i = 2; // starting from 2 because we already know it's at least 2\n" | |
| 959 "#endif \n" | |
| 960 " for (; i < c_maxLineLength; i++) { \n" | |
| 961 " uint edgeLeft = uint( \n" | |
| 962 " texelFetch(g_src0TextureFlt, \n" | |
| 963 " ivec2(screenPos.xy - stepRight * i), 0).r * 255.5); \n" | |
| 964 " uint edgeRight = uint( \n" | |
| 965 " texelFetch(g_src0TextureFlt, \n" | |
| 966 " ivec2(screenPos.xy + stepRight * (i + 1)), \n" | |
| 967 " 0).r * 255.5); \n" | |
| 968 " \n" | |
| 969 " // stop on encountering 'stopping' edge (as defined by masks) \n" | |
| 970 " int stopLeft = (edgeLeft & maskLeft) != bitsContinueLeft ? 1 : 0; \n" | |
| 971 " int stopRight = \n" | |
| 972 " (edgeRight & maskRight) != bitsContinueRight ? 1 : 0; \n" | |
| 973 " \n" | |
| 974 " if (bool(stopLeft) || bool(stopRight)) { \n" | |
| 975 " lineLengthLeft = 1 + i - stopLeft; \n" | |
| 976 " lineLengthRight = 1 + i - stopRight; \n" | |
| 977 " return; \n" | |
| 978 " } \n" | |
| 979 " } \n" | |
| 980 " lineLengthLeft = lineLengthRight = i; \n" | |
| 981 " return; \n" | |
| 982 "} \n" | |
| 983 " \n" | |
| 984 "void ProcessDetectedZ(ivec2 screenPos, bool horizontal, \n" | |
| 985 " bool invertedZShape) { \n" | |
| 986 " int lineLengthLeft, lineLengthRight; \n" | |
| 987 " \n" | |
| 988 " ivec2 stepRight = (horizontal) ? (ivec2(1, 0)) : (ivec2(0, -1)); \n" | |
| 989 " vec2 blendDir = (horizontal) ? (vec2(0, -1)) : (vec2(-1, 0)); \n" | |
| 990 " \n" | |
| 991 " FindLineLength(lineLengthLeft, lineLengthRight, screenPos, \n" | |
| 992 " horizontal, invertedZShape, stepRight); \n" | |
| 993 " \n" | |
| 994 " vec2 pixelSize = g_OneOverScreenSize; \n" | |
| 995 " \n" | |
| 996 " float leftOdd = 0.15 * float(lineLengthLeft % 2); \n" | |
| 997 " float rightOdd = 0.15 * float(lineLengthRight % 2); \n" | |
| 998 " \n" | |
| 999 " int loopFrom = -int((lineLengthLeft + 1) / 2) + 1; \n" | |
| 1000 " int loopTo = int((lineLengthRight + 1) / 2); \n" | |
| 1001 " \n" | |
| 1002 " float totalLength = float(loopTo - loopFrom) + 1.0 - leftOdd - \n" | |
| 1003 " rightOdd; \n" | |
| 1004 " \n" | |
| 1005 " for (int i = loopFrom; i <= loopTo; i++) { \n" | |
| 1006 " highp ivec2 pixelPos = screenPos + stepRight * i; \n" | |
| 1007 " vec2 pixelPosFlt = vec2(float(pixelPos.x) + 0.5, \n" | |
| 1008 " float(pixelPos.y) + 0.5); \n" | |
| 1009 " \n" | |
| 1010 "#ifdef DEBUG_OUTPUT_AAINFO \n" | |
| 1011 " imageStore(g_resultTextureSlot2, pixelPos, \n" | |
| 1012 " PackBlurAAInfo(pixelPos, 1u)); \n" | |
| 1013 "#endif \n" | |
| 1014 " \n" | |
| 1015 " float m = (float(i) + 0.5 - leftOdd - float(loopFrom)) / \n" | |
| 1016 " totalLength; \n" | |
| 1017 " m = saturate(m); \n" | |
| 1018 " float k = m - ((i > 0) ? 1.0 : 0.0); \n" | |
| 1019 " k = (invertedZShape) ? (-k) : (k); \n" | |
| 1020 " \n" | |
| 1021 " vec4 color = textureLod(g_screenTexture, \n" | |
| 1022 " (pixelPosFlt + blendDir * k) * pixelSize, \n" | |
| 1023 " 0.0); \n" | |
| 1024 " \n" | |
| 1025 "#ifdef IN_GAMMA_CORRECT_MODE \n" | |
| 1026 " color.rgb = D3DX_FLOAT3_to_SRGB(color.rgb); \n" | |
| 1027 "#endif \n" | |
| 1028 " imageStore(g_resultTextureFlt4Slot1, pixelPos, color); \n" | |
| 1029 " } \n" | |
| 1030 "} \n" | |
| 1031 " \n" | |
| 1032 "vec4 CalcDbgDisplayColor(const vec4 blurMap) { \n" | |
| 1033 " vec3 pixelC = vec3(0.0, 0.0, 0.0); \n" | |
| 1034 " vec3 pixelL = vec3(0.0, 0.0, 1.0); \n" | |
| 1035 " vec3 pixelT = vec3(1.0, 0.0, 0.0); \n" | |
| 1036 " vec3 pixelR = vec3(0.0, 1.0, 0.0); \n" | |
| 1037 " vec3 pixelB = vec3(0.8, 0.8, 0.0); \n" | |
| 1038 " \n" | |
| 1039 " const float centerWeight = 1.0; \n" | |
| 1040 " float fromBelowWeight = (1.0 / (1.0 - blurMap.x)) - 1.0; \n" | |
| 1041 " float fromAboveWeight = (1.0 / (1.0 - blurMap.y)) - 1.0; \n" | |
| 1042 " float fromRightWeight = (1.0 / (1.0 - blurMap.z)) - 1.0; \n" | |
| 1043 " float fromLeftWeight = (1.0 / (1.0 - blurMap.w)) - 1.0; \n" | |
| 1044 " \n" | |
| 1045 " float weightSum = centerWeight + dot(vec4(fromBelowWeight, \n" | |
| 1046 " fromAboveWeight, \n" | |
| 1047 " fromRightWeight, \n" | |
| 1048 " fromLeftWeight), \n" | |
| 1049 " vec4(1, 1, 1, 1)); \n" | |
| 1050 " \n" | |
| 1051 " vec4 pixel; \n" | |
| 1052 " \n" | |
| 1053 " pixel.rgb = pixelC.rgb + fromAboveWeight * pixelT + \n" | |
| 1054 " fromBelowWeight * pixelB + \n" | |
| 1055 " fromLeftWeight * pixelL + \n" | |
| 1056 " fromRightWeight * pixelR; \n" | |
| 1057 " pixel.rgb /= weightSum; \n" | |
| 1058 " \n" | |
| 1059 " pixel.a = dot(pixel.rgb, vec3(1, 1, 1)) * 100.0; \n" | |
| 1060 " \n" | |
| 1061 " return saturate(pixel); \n" | |
| 1062 "} \n" | |
| 1063 " \n" | |
| 1064 "#ifdef DETECT_EDGES1 \n" | |
| 1065 "layout(location = 0) out UVEC4 outEdges; \n" | |
| 1066 "void DetectEdges1() { \n" | |
| 1067 " uvec4 outputEdges; \n" | |
| 1068 " ivec2 screenPosI = ivec2(gl_FragCoord.xy) * ivec2(2, 2); \n" | |
| 1069 " \n" | |
| 1070 " // .rgb contains colour, .a contains flag whether to output it to \n" | |
| 1071 " // working colour texture \n" | |
| 1072 " vec4 pixel00 = texelFetch(g_screenTexture, screenPosI.xy, 0); \n" | |
| 1073 " vec4 pixel10 = \n" | |
| 1074 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(1, 0));\n" | |
| 1075 " vec4 pixel20 = \n" | |
| 1076 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(2, 0));\n" | |
| 1077 " vec4 pixel01 = \n" | |
| 1078 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(0, 1));\n" | |
| 1079 " vec4 pixel11 = \n" | |
| 1080 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(1, 1));\n" | |
| 1081 " vec4 pixel21 = \n" | |
| 1082 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(2, 1));\n" | |
| 1083 " vec4 pixel02 = \n" | |
| 1084 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(0, 2));\n" | |
| 1085 " vec4 pixel12 = \n" | |
| 1086 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(1, 2));\n" | |
| 1087 " \n" | |
| 1088 " float storeFlagPixel00 = 0.0; \n" | |
| 1089 " float storeFlagPixel10 = 0.0; \n" | |
| 1090 " float storeFlagPixel20 = 0.0; \n" | |
| 1091 " float storeFlagPixel01 = 0.0; \n" | |
| 1092 " float storeFlagPixel11 = 0.0; \n" | |
| 1093 " float storeFlagPixel21 = 0.0; \n" | |
| 1094 " float storeFlagPixel02 = 0.0; \n" | |
| 1095 " float storeFlagPixel12 = 0.0; \n" | |
| 1096 " \n" | |
| 1097 " vec2 et; \n" | |
| 1098 " \n" | |
| 1099 "#ifdef EDGE_DETECT_THRESHOLD \n" | |
| 1100 " float threshold = c_ColorThreshold; \n" | |
| 1101 "#else \n" | |
| 1102 " float threshold = g_ColorThreshold; \n" | |
| 1103 "#endif \n" | |
| 1104 " \n" | |
| 1105 " { \n" | |
| 1106 " et.x = EdgeDetectColorCalcDiff(pixel00.rgb, pixel10.rgb); \n" | |
| 1107 " et.y = EdgeDetectColorCalcDiff(pixel00.rgb, pixel01.rgb); \n" | |
| 1108 " et = saturate(et - threshold); \n" | |
| 1109 " ivec2 eti = ivec2(et * 15.0 + 0.99); \n" | |
| 1110 " outputEdges.x = uint(eti.x | (eti.y << 4)); \n" | |
| 1111 " \n" | |
| 1112 " storeFlagPixel00 += et.x; \n" | |
| 1113 " storeFlagPixel00 += et.y; \n" | |
| 1114 " storeFlagPixel10 += et.x; \n" | |
| 1115 " storeFlagPixel01 += et.y; \n" | |
| 1116 " } \n" | |
| 1117 " \n" | |
| 1118 " { \n" | |
| 1119 " et.x = EdgeDetectColorCalcDiff(pixel10.rgb, pixel20.rgb); \n" | |
| 1120 " et.y = EdgeDetectColorCalcDiff(pixel10.rgb, pixel11.rgb); \n" | |
| 1121 " et = saturate(et - threshold); \n" | |
| 1122 " ivec2 eti = ivec2(et * 15.0 + 0.99); \n" | |
| 1123 " outputEdges.y = uint(eti.x | (eti.y << 4)); \n" | |
| 1124 " \n" | |
| 1125 " storeFlagPixel10 += et.x; \n" | |
| 1126 " storeFlagPixel10 += et.y; \n" | |
| 1127 " storeFlagPixel20 += et.x; \n" | |
| 1128 " storeFlagPixel11 += et.y; \n" | |
| 1129 " } \n" | |
| 1130 " \n" | |
| 1131 " { \n" | |
| 1132 " et.x = EdgeDetectColorCalcDiff(pixel01.rgb, pixel11.rgb); \n" | |
| 1133 " et.y = EdgeDetectColorCalcDiff(pixel01.rgb, pixel02.rgb); \n" | |
| 1134 " et = saturate(et - threshold); \n" | |
| 1135 " ivec2 eti = ivec2(et * 15.0 + 0.99); \n" | |
| 1136 " outputEdges.z = uint(eti.x | (eti.y << 4)); \n" | |
| 1137 " \n" | |
| 1138 " storeFlagPixel01 += et.x; \n" | |
| 1139 " storeFlagPixel01 += et.y; \n" | |
| 1140 " storeFlagPixel11 += et.x; \n" | |
| 1141 " storeFlagPixel02 += et.y; \n" | |
| 1142 " } \n" | |
| 1143 " \n" | |
| 1144 " { \n" | |
| 1145 " et.x = EdgeDetectColorCalcDiff(pixel11.rgb, pixel21.rgb); \n" | |
| 1146 " et.y = EdgeDetectColorCalcDiff(pixel11.rgb, pixel12.rgb); \n" | |
| 1147 " et = saturate(et - threshold); \n" | |
| 1148 " ivec2 eti = ivec2(et * 15.0 + 0.99); \n" | |
| 1149 " outputEdges.w = uint(eti.x | (eti.y << 4)); \n" | |
| 1150 " \n" | |
| 1151 " storeFlagPixel11 += et.x; \n" | |
| 1152 " storeFlagPixel11 += et.y; \n" | |
| 1153 " storeFlagPixel21 += et.x; \n" | |
| 1154 " storeFlagPixel12 += et.y; \n" | |
| 1155 " } \n" | |
| 1156 " \n" | |
| 1157 " gl_FragDepth = any(bvec4(outputEdges)) ? 1.0 : 0.0; \n" | |
| 1158 " \n" | |
| 1159 " if (gl_FragDepth != 0.0) { \n" | |
| 1160 " if (storeFlagPixel00 != 0.0) \n" | |
| 1161 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(0, 0),\n" | |
| 1162 " pixel00); \n" | |
| 1163 " if (storeFlagPixel10 != 0.0) \n" | |
| 1164 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(1, 0),\n" | |
| 1165 " pixel10); \n" | |
| 1166 " if (storeFlagPixel20 != 0.0) \n" | |
| 1167 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(2, 0),\n" | |
| 1168 " pixel20); \n" | |
| 1169 " if (storeFlagPixel01 != 0.0) \n" | |
| 1170 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(0, 1),\n" | |
| 1171 " pixel01); \n" | |
| 1172 " if (storeFlagPixel02 != 0.0) \n" | |
| 1173 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(0, 2),\n" | |
| 1174 " pixel02); \n" | |
| 1175 " if (storeFlagPixel11 != 0.0) \n" | |
| 1176 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(1, 1),\n" | |
| 1177 " pixel11); \n" | |
| 1178 " if (storeFlagPixel21 != 0.0) \n" | |
| 1179 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(2, 1),\n" | |
| 1180 " pixel21); \n" | |
| 1181 " if (storeFlagPixel12 != 0.0) \n" | |
| 1182 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(1, 2),\n" | |
| 1183 " pixel12); \n" | |
| 1184 " } \n" | |
| 1185 " outEdges = STORE_UVEC4(outputEdges); \n" | |
| 1186 "} \n" | |
| 1187 "#endif // DETECT_EDGES1 \n" | |
| 1188 " \n" | |
| 1189 "vec2 UnpackThresholds(uint val) { \n" | |
| 1190 " return vec2(val & 0x0Fu, val >> 4u) / 15.0f; \n" | |
| 1191 "} \n" | |
| 1192 " \n" | |
| 1193 "uint PruneNonDominantEdges(vec4 edges[3]) { \n" | |
| 1194 " vec4 maxE4 = vec4(0.0, 0.0, 0.0, 0.0); \n" | |
| 1195 " \n" | |
| 1196 " float avg = 0.0; \n" | |
| 1197 " \n" | |
| 1198 " for (int i = 0; i < 3; i++) { \n" | |
| 1199 " maxE4 = max(maxE4, edges[i]); \n" | |
| 1200 " \n" | |
| 1201 " avg = dot(edges[i], vec4(1, 1, 1, 1) / (3.0 * 4.0)); \n" | |
| 1202 " } \n" | |
| 1203 " \n" | |
| 1204 " vec2 maxE2 = max(maxE4.xy, maxE4.zw); \n" | |
| 1205 " float maxE = max(maxE2.x, maxE2.y); \n" | |
| 1206 " \n" | |
| 1207 " float threshold = avg * 0.65 + maxE * 0.35; \n" | |
| 1208 " \n" | |
| 1209 " // threshold = 0.0001; // this disables non-dominant edge pruning! \n" | |
| 1210 " \n" | |
| 1211 " uint cx = edges[0].x >= threshold ? 1u : 0u; \n" | |
| 1212 " uint cy = edges[0].y >= threshold ? 1u : 0u; \n" | |
| 1213 " return PackEdge(uvec4(cx, cy, 0, 0)); \n" | |
| 1214 "} \n" | |
| 1215 " \n" | |
| 1216 "void CollectEdges(int offX, \n" | |
| 1217 " int offY, \n" | |
| 1218 " out vec4 edges[3], \n" | |
| 1219 " const uint packedVals[6 * 6]) { \n" | |
| 1220 " vec2 pixelP0P0 = UnpackThresholds(packedVals[(offX)*6+(offY)]); \n" | |
| 1221 " vec2 pixelP1P0 = UnpackThresholds(packedVals[(offX+1)*6+(offY)]); \n" | |
| 1222 " vec2 pixelP0P1 = UnpackThresholds(packedVals[(offX)*6+(offY+1)]); \n" | |
| 1223 " vec2 pixelM1P0 = UnpackThresholds(packedVals[(offX-1)*6 +(offY)]); \n" | |
| 1224 " vec2 pixelP0M1 = UnpackThresholds(packedVals[(offX)*6+(offY-1)]); \n" | |
| 1225 " vec2 pixelP1M1 = UnpackThresholds(packedVals[(offX+1)*6 +(offY-1)]); \n" | |
| 1226 " vec2 pixelM1P1 = UnpackThresholds(packedVals[(offX-1)*6+(offY+1)]); \n" | |
| 1227 " \n" | |
| 1228 " edges[0].x = pixelP0P0.x; \n" | |
| 1229 " edges[0].y = pixelP0P0.y; \n" | |
| 1230 " edges[0].z = pixelP1P0.x; \n" | |
| 1231 " edges[0].w = pixelP1P0.y; \n" | |
| 1232 " edges[1].x = pixelP0P1.x; \n" | |
| 1233 " edges[1].y = pixelP0P1.y; \n" | |
| 1234 " edges[1].z = pixelM1P0.x; \n" | |
| 1235 " edges[1].w = pixelM1P0.y; \n" | |
| 1236 " edges[2].x = pixelP0M1.x; \n" | |
| 1237 " edges[2].y = pixelP0M1.y; \n" | |
| 1238 " edges[2].z = pixelP1M1.y; \n" | |
| 1239 " edges[2].w = pixelM1P1.x; \n" | |
| 1240 "} \n" | |
| 1241 " \n" | |
| 1242 "#ifdef DETECT_EDGES2 \n" | |
| 1243 "layout(early_fragment_tests) in; \n" | |
| 1244 "void DetectEdges2() { \n" | |
| 1245 " ivec2 screenPosI = ivec2(gl_FragCoord.xy); \n" | |
| 1246 " \n" | |
| 1247 " // source : edge differences from previous pass \n" | |
| 1248 " uint packedVals[6 * 6]; \n" | |
| 1249 " \n" | |
| 1250 " // center pixel (our output) \n" | |
| 1251 " UVEC4 packedQ4 = texelFetch(g_src0Texture4Uint, screenPosI.xy, 0); \n" | |
| 1252 " packedVals[(2) * 6 + (2)] = LOAD_UINT(packedQ4.x); \n" | |
| 1253 " packedVals[(3) * 6 + (2)] = LOAD_UINT(packedQ4.y); \n" | |
| 1254 " packedVals[(2) * 6 + (3)] = LOAD_UINT(packedQ4.z); \n" | |
| 1255 " packedVals[(3) * 6 + (3)] = LOAD_UINT(packedQ4.w); \n" | |
| 1256 " \n" | |
| 1257 " vec4 edges[3]; \n" | |
| 1258 " if (bool(packedVals[(2) * 6 + (2)]) || \n" | |
| 1259 " bool(packedVals[(3) * 6 + (2)])) { \n" | |
| 1260 " UVEC4 packedQ1 = texelFetchOffset(g_src0Texture4Uint, \n" | |
| 1261 " screenPosI.xy, 0, ivec2(0, -1)); \n" | |
| 1262 " packedVals[(2) * 6 + (0)] = LOAD_UINT(packedQ1.x); \n" | |
| 1263 " packedVals[(3) * 6 + (0)] = LOAD_UINT(packedQ1.y); \n" | |
| 1264 " packedVals[(2) * 6 + (1)] = LOAD_UINT(packedQ1.z); \n" | |
| 1265 " packedVals[(3) * 6 + (1)] = LOAD_UINT(packedQ1.w); \n" | |
| 1266 " } \n" | |
| 1267 " \n" | |
| 1268 " if (bool(packedVals[(2) * 6 + (2)]) || \n" | |
| 1269 " bool(packedVals[(2) * 6 + (3)])) { \n" | |
| 1270 " UVEC4 packedQ3 = texelFetchOffset(g_src0Texture4Uint, \n" | |
| 1271 " screenPosI.xy, 0, ivec2(-1, 0)); \n" | |
| 1272 " packedVals[(0) * 6 + (2)] = LOAD_UINT(packedQ3.x); \n" | |
| 1273 " packedVals[(1) * 6 + (2)] = LOAD_UINT(packedQ3.y); \n" | |
| 1274 " packedVals[(0) * 6 + (3)] = LOAD_UINT(packedQ3.z); \n" | |
| 1275 " packedVals[(1) * 6 + (3)] = LOAD_UINT(packedQ3.w); \n" | |
| 1276 " } \n" | |
| 1277 " \n" | |
| 1278 " if (bool(packedVals[(2) * 6 + (2)])) { \n" | |
| 1279 " CollectEdges(2, 2, edges, packedVals); \n" | |
| 1280 " uint pe = PruneNonDominantEdges(edges); \n" | |
| 1281 " if (pe != 0u) { \n" | |
| 1282 " imageStore(g_resultTexture, 2 * screenPosI.xy + ivec2(0, 0), \n" | |
| 1283 " vec4(float(0x80u | pe) / 255.0, 0, 0, 0)); \n" | |
| 1284 " } \n" | |
| 1285 " } \n" | |
| 1286 " \n" | |
| 1287 " if (bool(packedVals[(3) * 6 + (2)]) || \n" | |
| 1288 " bool(packedVals[(3) * 6 + (3)])) { \n" | |
| 1289 " UVEC4 packedQ5 = texelFetchOffset(g_src0Texture4Uint, \n" | |
| 1290 " screenPosI.xy, 0, ivec2(1, 0)); \n" | |
| 1291 " packedVals[(4) * 6 + (2)] = LOAD_UINT(packedQ5.x); \n" | |
| 1292 " packedVals[(5) * 6 + (2)] = LOAD_UINT(packedQ5.y); \n" | |
| 1293 " packedVals[(4) * 6 + (3)] = LOAD_UINT(packedQ5.z); \n" | |
| 1294 " packedVals[(5) * 6 + (3)] = LOAD_UINT(packedQ5.w); \n" | |
| 1295 " } \n" | |
| 1296 " \n" | |
| 1297 " if (bool(packedVals[(3) * 6 + (2)])) { \n" | |
| 1298 " UVEC4 packedQ2 = texelFetchOffset(g_src0Texture4Uint, \n" | |
| 1299 " screenPosI.xy, 0, ivec2(1, -1)); \n" | |
| 1300 " packedVals[(4) * 6 + (0)] = LOAD_UINT(packedQ2.x); \n" | |
| 1301 " packedVals[(5) * 6 + (0)] = LOAD_UINT(packedQ2.y); \n" | |
| 1302 " packedVals[(4) * 6 + (1)] = LOAD_UINT(packedQ2.z); \n" | |
| 1303 " packedVals[(5) * 6 + (1)] = LOAD_UINT(packedQ2.w); \n" | |
| 1304 " \n" | |
| 1305 " CollectEdges(3, 2, edges, packedVals); \n" | |
| 1306 " uint pe = PruneNonDominantEdges(edges); \n" | |
| 1307 " if (pe != 0u) { \n" | |
| 1308 " imageStore(g_resultTexture, 2 * screenPosI.xy + ivec2(1, 0), \n" | |
| 1309 " vec4(float(0x80u | pe) / 255.0, 0, 0, 0)); \n" | |
| 1310 " } \n" | |
| 1311 " } \n" | |
| 1312 " \n" | |
| 1313 " if (bool(packedVals[(2) * 6 + (3)]) || \n" | |
| 1314 " bool(packedVals[(3) * 6 + (3)])) { \n" | |
| 1315 " UVEC4 packedQ7 = texelFetchOffset(g_src0Texture4Uint, \n" | |
| 1316 " screenPosI.xy, 0, ivec2(0, 1)); \n" | |
| 1317 " packedVals[(2) * 6 + (4)] = LOAD_UINT(packedQ7.x); \n" | |
| 1318 " packedVals[(3) * 6 + (4)] = LOAD_UINT(packedQ7.y); \n" | |
| 1319 " packedVals[(2) * 6 + (5)] = LOAD_UINT(packedQ7.z); \n" | |
| 1320 " packedVals[(3) * 6 + (5)] = LOAD_UINT(packedQ7.w); \n" | |
| 1321 " } \n" | |
| 1322 " \n" | |
| 1323 " if (bool(packedVals[(2) * 6 + (3)])) { \n" | |
| 1324 " UVEC4 packedQ6 = texelFetchOffset(g_src0Texture4Uint, \n" | |
| 1325 " screenPosI.xy, 0, ivec2(-1, -1));\n" | |
| 1326 " packedVals[(0) * 6 + (4)] = LOAD_UINT(packedQ6.x); \n" | |
| 1327 " packedVals[(1) * 6 + (4)] = LOAD_UINT(packedQ6.y); \n" | |
| 1328 " packedVals[(0) * 6 + (5)] = LOAD_UINT(packedQ6.z); \n" | |
| 1329 " packedVals[(1) * 6 + (5)] = LOAD_UINT(packedQ6.w); \n" | |
| 1330 " \n" | |
| 1331 " CollectEdges(2, 3, edges, packedVals); \n" | |
| 1332 " uint pe = PruneNonDominantEdges(edges); \n" | |
| 1333 " if (pe != 0u) { \n" | |
| 1334 " imageStore(g_resultTexture, 2 * screenPosI.xy + ivec2(0, 1), \n" | |
| 1335 " vec4(float(0x80u | pe) / 255.0, 0, 0, 0)); \n" | |
| 1336 " } \n" | |
| 1337 " } \n" | |
| 1338 " \n" | |
| 1339 " if (bool(packedVals[(3) * 6 + (3)])) { \n" | |
| 1340 " CollectEdges(3, 3, edges, packedVals); \n" | |
| 1341 " uint pe = PruneNonDominantEdges(edges); \n" | |
| 1342 " if (pe != 0u) { \n" | |
| 1343 " imageStore(g_resultTexture, 2 * screenPosI.xy + ivec2(1, 1), \n" | |
| 1344 " vec4(float(0x80u | pe) / 255.0, 0, 0, 0)); \n" | |
| 1345 " } \n" | |
| 1346 " } \n" | |
| 1347 "} \n" | |
| 1348 "#endif // DETECT_EDGES2 \n" | |
| 1349 " \n" | |
| 1350 "#ifdef COMBINE_EDGES \n" | |
| 1351 "void CombineEdges() { \n" | |
| 1352 " ivec3 screenPosIBase = ivec3(ivec2(gl_FragCoord.xy) * 2, 0); \n" | |
| 1353 " vec3 screenPosBase = vec3(screenPosIBase); \n" | |
| 1354 " uint packedEdgesArray[3 * 3]; \n" | |
| 1355 " \n" | |
| 1356 " // use only if it has the 'prev frame' flag:[sample * 255.0 - 127.5] \n" | |
| 1357 " //-> if it has the last bit flag (128), it's going to stay above 0 \n" | |
| 1358 " uvec4 sampA = uvec4( \n" | |
| 1359 " textureGatherOffset(g_src0TextureFlt, \n" | |
| 1360 " screenPosBase.xy * g_OneOverScreenSize, \n" | |
| 1361 " ivec2(1, 0)) * 255.0 - 127.5); \n" | |
| 1362 " uvec4 sampB = uvec4( \n" | |
| 1363 " textureGatherOffset(g_src0TextureFlt, \n" | |
| 1364 " screenPosBase.xy * g_OneOverScreenSize, \n" | |
| 1365 " ivec2(0, 1)) * 255.0 - 127.5); \n" | |
| 1366 " uint sampC = uint( \n" | |
| 1367 " texelFetchOffset(g_src0TextureFlt, screenPosIBase.xy, 0, \n" | |
| 1368 " ivec2(1, 1)).r * 255.0 - 127.5); \n" | |
| 1369 " \n" | |
| 1370 " packedEdgesArray[(0) * 3 + (0)] = 0u; \n" | |
| 1371 " packedEdgesArray[(1) * 3 + (0)] = sampA.w; \n" | |
| 1372 " packedEdgesArray[(2) * 3 + (0)] = sampA.z; \n" | |
| 1373 " packedEdgesArray[(1) * 3 + (1)] = sampA.x; \n" | |
| 1374 " packedEdgesArray[(2) * 3 + (1)] = sampA.y; \n" | |
| 1375 " packedEdgesArray[(0) * 3 + (1)] = sampB.w; \n" | |
| 1376 " packedEdgesArray[(0) * 3 + (2)] = sampB.x; \n" | |
| 1377 " packedEdgesArray[(1) * 3 + (2)] = sampB.y; \n" | |
| 1378 " packedEdgesArray[(2) * 3 + (2)] = sampC; \n" | |
| 1379 " \n" | |
| 1380 " uvec4 pixelsC = uvec4(packedEdgesArray[(1 + 0) * 3 + (1 + 0)], \n" | |
| 1381 " packedEdgesArray[(1 + 1) * 3 + (1 + 0)], \n" | |
| 1382 " packedEdgesArray[(1 + 0) * 3 + (1 + 1)], \n" | |
| 1383 " packedEdgesArray[(1 + 1) * 3 + (1 + 1)]); \n" | |
| 1384 " uvec4 pixelsL = uvec4(packedEdgesArray[(0 + 0) * 3 + (1 + 0)], \n" | |
| 1385 " packedEdgesArray[(0 + 1) * 3 + (1 + 0)], \n" | |
| 1386 " packedEdgesArray[(0 + 0) * 3 + (1 + 1)], \n" | |
| 1387 " packedEdgesArray[(0 + 1) * 3 + (1 + 1)]); \n" | |
| 1388 " uvec4 pixelsU = uvec4(packedEdgesArray[(1 + 0) * 3 + (0 + 0)], \n" | |
| 1389 " packedEdgesArray[(1 + 1) * 3 + (0 + 0)], \n" | |
| 1390 " packedEdgesArray[(1 + 0) * 3 + (0 + 1)], \n" | |
| 1391 " packedEdgesArray[(1 + 1) * 3 + (0 + 1)]); \n" | |
| 1392 " \n" | |
| 1393 " uvec4 outEdge4 = \n" | |
| 1394 " pixelsC | ((pixelsL & 0x01u) << 2u) | ((pixelsU & 0x02u) << 2u); \n" | |
| 1395 " vec4 outEdge4Flt = vec4(outEdge4) / 255.0; \n" | |
| 1396 " \n" | |
| 1397 " imageStore(g_resultTextureSlot2, screenPosIBase.xy + ivec2(0, 0), \n" | |
| 1398 " outEdge4Flt.xxxx); \n" | |
| 1399 " imageStore(g_resultTextureSlot2, screenPosIBase.xy + ivec2(1, 0), \n" | |
| 1400 " outEdge4Flt.yyyy); \n" | |
| 1401 " imageStore(g_resultTextureSlot2, screenPosIBase.xy + ivec2(0, 1), \n" | |
| 1402 " outEdge4Flt.zzzz); \n" | |
| 1403 " imageStore(g_resultTextureSlot2, screenPosIBase.xy + ivec2(1, 1), \n" | |
| 1404 " outEdge4Flt.wwww); \n" | |
| 1405 " \n" | |
| 1406 " // uvec4 numberOfEdges4 = uvec4(bitCount(outEdge4)); \n" | |
| 1407 " // gl_FragDepth = \n" | |
| 1408 " // any(greaterThan(numberOfEdges4, uvec4(1))) ? 1.0 : 0.0; \n" | |
| 1409 " \n" | |
| 1410 " gl_FragDepth = \n" | |
| 1411 " any(greaterThan(outEdge4, uvec4(1))) ? 1.0 : 0.0; \n" | |
| 1412 "} \n" | |
| 1413 "#endif // COMBINE_EDGES \n" | |
| 1414 " \n" | |
| 1415 "#ifdef BLUR_EDGES \n" | |
| 1416 "layout(early_fragment_tests) in; \n" | |
| 1417 "void BlurEdges() { \n" | |
| 1418 " int _i; \n" | |
| 1419 " \n" | |
| 1420 " ivec3 screenPosIBase = ivec3(ivec2(gl_FragCoord.xy) * 2, 0); \n" | |
| 1421 " vec3 screenPosBase = vec3(screenPosIBase); \n" | |
| 1422 " uint forFollowUpCount = 0u; \n" | |
| 1423 " ivec4 forFollowUpCoords[4]; \n" | |
| 1424 " \n" | |
| 1425 " uint packedEdgesArray[4 * 4]; \n" | |
| 1426 " \n" | |
| 1427 " uvec4 sampA = uvec4( \n" | |
| 1428 " textureGatherOffset(g_src0TextureFlt, \n" | |
| 1429 " screenPosBase.xy * g_OneOverScreenSize, \n" | |
| 1430 " ivec2(0, 0)) *255.5); \n" | |
| 1431 " uvec4 sampB = uvec4( \n" | |
| 1432 " textureGatherOffset(g_src0TextureFlt, \n" | |
| 1433 " screenPosBase.xy * g_OneOverScreenSize, \n" | |
| 1434 " ivec2(2, 0)) *255.5); \n" | |
| 1435 " uvec4 sampC = uvec4( \n" | |
| 1436 " textureGatherOffset(g_src0TextureFlt, \n" | |
| 1437 " screenPosBase.xy * g_OneOverScreenSize, \n" | |
| 1438 " ivec2(0, 2)) *255.5); \n" | |
| 1439 " uvec4 sampD = uvec4( \n" | |
| 1440 " textureGatherOffset(g_src0TextureFlt, \n" | |
| 1441 " screenPosBase.xy * g_OneOverScreenSize, \n" | |
| 1442 " ivec2(2, 2)) *255.5); \n" | |
| 1443 " \n" | |
| 1444 " packedEdgesArray[(0) * 4 + (0)] = sampA.w; \n" | |
| 1445 " packedEdgesArray[(1) * 4 + (0)] = sampA.z; \n" | |
| 1446 " packedEdgesArray[(0) * 4 + (1)] = sampA.x; \n" | |
| 1447 " packedEdgesArray[(1) * 4 + (1)] = sampA.y; \n" | |
| 1448 " packedEdgesArray[(2) * 4 + (0)] = sampB.w; \n" | |
| 1449 " packedEdgesArray[(3) * 4 + (0)] = sampB.z; \n" | |
| 1450 " packedEdgesArray[(2) * 4 + (1)] = sampB.x; \n" | |
| 1451 " packedEdgesArray[(3) * 4 + (1)] = sampB.y; \n" | |
| 1452 " packedEdgesArray[(0) * 4 + (2)] = sampC.w; \n" | |
| 1453 " packedEdgesArray[(1) * 4 + (2)] = sampC.z; \n" | |
| 1454 " packedEdgesArray[(0) * 4 + (3)] = sampC.x; \n" | |
| 1455 " packedEdgesArray[(1) * 4 + (3)] = sampC.y; \n" | |
| 1456 " packedEdgesArray[(2) * 4 + (2)] = sampD.w; \n" | |
| 1457 " packedEdgesArray[(3) * 4 + (2)] = sampD.z; \n" | |
| 1458 " packedEdgesArray[(2) * 4 + (3)] = sampD.x; \n" | |
| 1459 " packedEdgesArray[(3) * 4 + (3)] = sampD.y; \n" | |
| 1460 " \n" | |
| 1461 " for (_i = 0; _i < 4; _i++) { \n" | |
| 1462 " int _x = _i % 2; \n" | |
| 1463 " int _y = _i / 2; \n" | |
| 1464 " \n" | |
| 1465 " ivec3 screenPosI = screenPosIBase + ivec3(_x, _y, 0); \n" | |
| 1466 " \n" | |
| 1467 " uint packedEdgesC = packedEdgesArray[(1 + _x) * 4 + (1 + _y)]; \n" | |
| 1468 " \n" | |
| 1469 " uvec4 edges = UnpackEdge(packedEdgesC); \n" | |
| 1470 " vec4 edgesFlt = vec4(edges); \n" | |
| 1471 " \n" | |
| 1472 " float numberOfEdges = dot(edgesFlt, vec4(1, 1, 1, 1)); \n" | |
| 1473 " if (numberOfEdges < 2.0) \n" | |
| 1474 " continue; \n" | |
| 1475 " \n" | |
| 1476 " float fromRight = edgesFlt.r; \n" | |
| 1477 " float fromBelow = edgesFlt.g; \n" | |
| 1478 " float fromLeft = edgesFlt.b; \n" | |
| 1479 " float fromAbove = edgesFlt.a; \n" | |
| 1480 " \n" | |
| 1481 " vec4 xFroms = vec4(fromBelow, fromAbove, fromRight, fromLeft); \n" | |
| 1482 " \n" | |
| 1483 " float blurCoeff = 0.0; \n" | |
| 1484 " \n" | |
| 1485 " // These are additional blurs that complement the main line-based \n" | |
| 1486 " // blurring; Unlike line-based, these do not necessarily preserve \n" | |
| 1487 " // the total amount of screen colour as they will take \n" | |
| 1488 " // neighbouring pixel colours and apply them to the one currently \n" | |
| 1489 " // processed. \n" | |
| 1490 " \n" | |
| 1491 " // 1.) L-like shape. \n" | |
| 1492 " // For this shape, the total amount of screen colour will be \n" | |
| 1493 " // preserved when this is a part of a (zigzag) diagonal line as the\n" | |
| 1494 " // corners from the other side will do the same and take some of \n" | |
| 1495 " // the current pixel's colour in return. \n" | |
| 1496 " // However, in the case when this is an actual corner, the pixel's \n" | |
| 1497 " // colour will be partially overwritten by it's 2 neighbours. \n" | |
| 1498 " // if( numberOfEdges > 1.0 ) \n" | |
| 1499 " { \n" | |
| 1500 " // with value of 0.15, the pixel will retain approx 77% of its \n" | |
| 1501 " // colour and the remaining 23% will come from its 2 neighbours \n" | |
| 1502 " // (which are likely to be blurred too in the opposite direction)\n" | |
| 1503 " blurCoeff = 0.08; \n" | |
| 1504 " \n" | |
| 1505 " // Only do blending if it's L shape - if we're between two \n" | |
| 1506 " // parallel edges, don't do anything \n" | |
| 1507 " blurCoeff *= (1.0 - fromBelow * fromAbove) * \n" | |
| 1508 " (1.0 - fromRight * fromLeft); \n" | |
| 1509 " } \n" | |
| 1510 " \n" | |
| 1511 " // 2.) U-like shape (surrounded with edges from 3 sides) \n" | |
| 1512 " if (numberOfEdges > 2.0) { \n" | |
| 1513 " // with value of 0.13, the pixel will retain approx 72% of its \n" | |
| 1514 " // colour and the remaining 28% will be picked from its 3 \n" | |
| 1515 " // neighbours (which are unlikely to be blurred too but could be)\n" | |
| 1516 " blurCoeff = 0.11; \n" | |
| 1517 " } \n" | |
| 1518 " \n" | |
| 1519 " // 3.) Completely surrounded with edges from all 4 sides \n" | |
| 1520 " if (numberOfEdges > 3.0) { \n" | |
| 1521 " // with value of 0.07, the pixel will retain 78% of its colour \n" | |
| 1522 " // and the remaining 22% will come from its 4 neighbours (which \n" | |
| 1523 " // are unlikely to be blurred) \n" | |
| 1524 " blurCoeff = 0.05; \n" | |
| 1525 " } \n" | |
| 1526 " \n" | |
| 1527 " if (blurCoeff == 0.0) { \n" | |
| 1528 " // this avoids Z search below as well but that's ok because a Z \n" | |
| 1529 " // shape will also always have some blurCoeff \n" | |
| 1530 " continue; \n" | |
| 1531 " } \n" | |
| 1532 " \n" | |
| 1533 " vec4 blurMap = xFroms * blurCoeff; \n" | |
| 1534 " \n" | |
| 1535 " vec4 pixelC = texelFetch(g_screenTexture, screenPosI.xy, 0); \n" | |
| 1536 " \n" | |
| 1537 " const float centerWeight = 1.0; \n" | |
| 1538 " float fromBelowWeight = blurMap.x; \n" | |
| 1539 " float fromAboveWeight = blurMap.y; \n" | |
| 1540 " float fromRightWeight = blurMap.z; \n" | |
| 1541 " float fromLeftWeight = blurMap.w; \n" | |
| 1542 " \n" | |
| 1543 " // this would be the proper math for blending if we were handling \n" | |
| 1544 " // lines (Zs) and mini kernel smoothing here, but since we're doing\n" | |
| 1545 " // lines separately, no need to complicate, just tweak the settings\n" | |
| 1546 " // float fromBelowWeight = (1.0 / (1.0 - blurMap.x)) - 1.0; \n" | |
| 1547 " // float fromAboveWeight = (1.0 / (1.0 - blurMap.y)) - 1.0; \n" | |
| 1548 " // float fromRightWeight = (1.0 / (1.0 - blurMap.z)) - 1.0; \n" | |
| 1549 " // float fromLeftWeight = (1.0 / (1.0 - blurMap.w)) - 1.0; \n" | |
| 1550 " \n" | |
| 1551 " float fourWeightSum = dot(blurMap, vec4(1, 1, 1, 1)); \n" | |
| 1552 " float allWeightSum = centerWeight + fourWeightSum; \n" | |
| 1553 " \n" | |
| 1554 " vec4 color = vec4(0, 0, 0, 0); \n" | |
| 1555 " if (fromLeftWeight > 0.0) { \n" | |
| 1556 " vec3 pixelL = texelFetchOffset(g_screenTexture, screenPosI.xy, 0,\n" | |
| 1557 " ivec2(-1, 0)).rgb; \n" | |
| 1558 " color.rgb += fromLeftWeight * pixelL; \n" | |
| 1559 " } \n" | |
| 1560 " if (fromAboveWeight > 0.0) { \n" | |
| 1561 " vec3 pixelT = texelFetchOffset(g_screenTexture, screenPosI.xy, 0,\n" | |
| 1562 " ivec2(0, -1)).rgb; \n" | |
| 1563 " color.rgb += fromAboveWeight * pixelT; \n" | |
| 1564 " } \n" | |
| 1565 " if (fromRightWeight > 0.0) { \n" | |
| 1566 " vec3 pixelR = texelFetchOffset(g_screenTexture, screenPosI.xy, 0,\n" | |
| 1567 " ivec2(1, 0)).rgb; \n" | |
| 1568 " color.rgb += fromRightWeight * pixelR; \n" | |
| 1569 " } \n" | |
| 1570 " if (fromBelowWeight > 0.0) { \n" | |
| 1571 " vec3 pixelB = texelFetchOffset(g_screenTexture, screenPosI.xy, 0,\n" | |
| 1572 " ivec2(0, 1)).rgb; \n" | |
| 1573 " color.rgb += fromBelowWeight * pixelB; \n" | |
| 1574 " } \n" | |
| 1575 " \n" | |
| 1576 " color /= fourWeightSum + 0.0001; \n" | |
| 1577 " color.a = 1.0 - centerWeight / allWeightSum; \n" | |
| 1578 " \n" | |
| 1579 " color.rgb = mix(pixelC.rgb, color.rgb, color.a).rgb; \n" | |
| 1580 "#ifdef IN_GAMMA_CORRECT_MODE \n" | |
| 1581 " color.rgb = D3DX_FLOAT3_to_SRGB(color.rgb); \n" | |
| 1582 "#endif \n" | |
| 1583 " \n" | |
| 1584 "#ifdef DEBUG_OUTPUT_AAINFO \n" | |
| 1585 " imageStore(g_resultTextureSlot2, screenPosI.xy, \n" | |
| 1586 " PackBlurAAInfo(screenPosI.xy, uint(numberOfEdges))); \n" | |
| 1587 "#endif \n" | |
| 1588 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy, \n" | |
| 1589 " vec4(color.rgb, pixelC.a)); \n" | |
| 1590 " \n" | |
| 1591 " if (numberOfEdges == 2.0) { \n" | |
| 1592 " uint packedEdgesL = packedEdgesArray[(0 + _x) * 4 + (1 + _y)]; \n" | |
| 1593 " uint packedEdgesT = packedEdgesArray[(1 + _x) * 4 + (0 + _y)]; \n" | |
| 1594 " uint packedEdgesR = packedEdgesArray[(2 + _x) * 4 + (1 + _y)]; \n" | |
| 1595 " uint packedEdgesB = packedEdgesArray[(1 + _x) * 4 + (2 + _y)]; \n" | |
| 1596 " \n" | |
| 1597 " bool isHorizontalA = ((packedEdgesC) == (0x01u | 0x02u)) && \n" | |
| 1598 " ((packedEdgesR & (0x01u | 0x08u)) == (0x08u)); \n" | |
| 1599 " bool isHorizontalB = ((packedEdgesC) == (0x01u | 0x08u)) && \n" | |
| 1600 " ((packedEdgesR & (0x01u | 0x02u)) == (0x02u)); \n" | |
| 1601 " \n" | |
| 1602 " bool isHCandidate = isHorizontalA || isHorizontalB; \n" | |
| 1603 " \n" | |
| 1604 " bool isVerticalA = ((packedEdgesC) == (0x08u | 0x01u)) && \n" | |
| 1605 " ((packedEdgesT & (0x08u | 0x04u)) == (0x04u)); \n" | |
| 1606 " bool isVerticalB = ((packedEdgesC) == (0x08u | 0x04u)) && \n" | |
| 1607 " ((packedEdgesT & (0x08u | 0x01u)) == (0x01u)); \n" | |
| 1608 " bool isVCandidate = isVerticalA || isVerticalB; \n" | |
| 1609 " \n" | |
| 1610 " bool isCandidate = isHCandidate || isVCandidate; \n" | |
| 1611 " \n" | |
| 1612 " if (!isCandidate) \n" | |
| 1613 " continue; \n" | |
| 1614 " \n" | |
| 1615 " bool horizontal = isHCandidate; \n" | |
| 1616 " \n" | |
| 1617 " // what if both are candidates? do additional pruning (still not \n" | |
| 1618 " // 100% but gets rid of worst case errors) \n" | |
| 1619 " if (isHCandidate && isVCandidate) \n" | |
| 1620 " horizontal = \n" | |
| 1621 " (isHorizontalA && ((packedEdgesL & 0x02u) == 0x02u)) || \n" | |
| 1622 " (isHorizontalB && ((packedEdgesL & 0x08u) == 0x08u)); \n" | |
| 1623 " \n" | |
| 1624 " ivec2 offsetC; \n" | |
| 1625 " uint packedEdgesM1P0; \n" | |
| 1626 " uint packedEdgesP1P0; \n" | |
| 1627 " if (horizontal) { \n" | |
| 1628 " packedEdgesM1P0 = packedEdgesL; \n" | |
| 1629 " packedEdgesP1P0 = packedEdgesR; \n" | |
| 1630 " offsetC = ivec2(2, 0); \n" | |
| 1631 " } else { \n" | |
| 1632 " packedEdgesM1P0 = packedEdgesB; \n" | |
| 1633 " packedEdgesP1P0 = packedEdgesT; \n" | |
| 1634 " offsetC = ivec2(0, -2); \n" | |
| 1635 " } \n" | |
| 1636 " \n" | |
| 1637 " uvec4 edgesM1P0 = UnpackEdge(packedEdgesM1P0); \n" | |
| 1638 " uvec4 edgesP1P0 = UnpackEdge(packedEdgesP1P0); \n" | |
| 1639 " uvec4 edgesP2P0 = UnpackEdge(uint(texelFetch( \n" | |
| 1640 " g_src0TextureFlt, screenPosI.xy + offsetC, 0).r * 255.5)); \n" | |
| 1641 " \n" | |
| 1642 " uvec4 arg0; \n" | |
| 1643 " uvec4 arg1; \n" | |
| 1644 " uvec4 arg2; \n" | |
| 1645 " uvec4 arg3; \n" | |
| 1646 " bool arg4; \n" | |
| 1647 " \n" | |
| 1648 " if (horizontal) { \n" | |
| 1649 " arg0 = uvec4(edges); \n" | |
| 1650 " arg1 = edgesM1P0; \n" | |
| 1651 " arg2 = edgesP1P0; \n" | |
| 1652 " arg3 = edgesP2P0; \n" | |
| 1653 " arg4 = true; \n" | |
| 1654 " } else { \n" | |
| 1655 " // Reuse the same code for vertical (used for horizontal above)\n" | |
| 1656 " // but rotate input data 90º counter-clockwise, so that: \n" | |
| 1657 " // left becomes bottom \n" | |
| 1658 " // top becomes left \n" | |
| 1659 " // right becomes top \n" | |
| 1660 " // bottom becomes right \n" | |
| 1661 " \n" | |
| 1662 " // we also have to rotate edges, thus .argb \n" | |
| 1663 " arg0 = uvec4(edges.argb); \n" | |
| 1664 " arg1 = edgesM1P0.argb; \n" | |
| 1665 " arg2 = edgesP1P0.argb; \n" | |
| 1666 " arg3 = edgesP2P0.argb; \n" | |
| 1667 " arg4 = false; \n" | |
| 1668 " } \n" | |
| 1669 " \n" | |
| 1670 " { \n" | |
| 1671 " ivec2 screenPos = screenPosI.xy; \n" | |
| 1672 " uvec4 _edges = arg0; \n" | |
| 1673 " uvec4 _edgesM1P0 = arg1; \n" | |
| 1674 " uvec4 _edgesP1P0 = arg2; \n" | |
| 1675 " uvec4 _edgesP2P0 = arg3; \n" | |
| 1676 " bool horizontal = arg4; \n" | |
| 1677 " // Inverted Z case: \n" | |
| 1678 " // __ \n" | |
| 1679 " // X| \n" | |
| 1680 " // ¯¯ \n" | |
| 1681 " bool isInvertedZ = false; \n" | |
| 1682 " bool isNormalZ = false; \n" | |
| 1683 " { \n" | |
| 1684 "#ifndef SETTINGS_ALLOW_SHORT_Zs \n" | |
| 1685 " // (1u-_edges.a) constraint can be removed; it was added for \n" | |
| 1686 " // some rare cases \n" | |
| 1687 " uint isZShape = _edges.r * _edges.g * _edgesM1P0.g * \n" | |
| 1688 " _edgesP1P0.a *_edgesP2P0.a * (1u - _edges.b) * \n" | |
| 1689 " (1u - _edgesP1P0.r) * (1u - _edges.a) * \n" | |
| 1690 " (1u - _edgesP1P0.g); \n" | |
| 1691 "#else \n" | |
| 1692 " uint isZShape = _edges.r * _edges.g * _edgesP1P0.a * \n" | |
| 1693 " (1u - _edges.b) * (1u - _edgesP1P0.r) * (1u - _edges.a) *\n" | |
| 1694 " (1u - _edgesP1P0.g); \n" | |
| 1695 " isZShape *= (_edgesM1P0.g + _edgesP2P0.a); \n" | |
| 1696 " // and at least one of these need to be there\n" | |
| 1697 "#endif \n" | |
| 1698 " if (isZShape > 0u) { \n" | |
| 1699 " isInvertedZ = true; \n" | |
| 1700 " } \n" | |
| 1701 " } \n" | |
| 1702 " \n" | |
| 1703 " // Normal Z case: \n" | |
| 1704 " // __ \n" | |
| 1705 " // X| \n" | |
| 1706 " // ¯¯ \n" | |
| 1707 " { \n" | |
| 1708 "#ifndef SETTINGS_ALLOW_SHORT_Zs \n" | |
| 1709 " uint isZShape = _edges.r * _edges.a * _edgesM1P0.a * \n" | |
| 1710 " _edgesP1P0.g * _edgesP2P0.g * (1u - _edges.b) * \n" | |
| 1711 " (1u - _edgesP1P0.r) * (1u - _edges.g) * \n" | |
| 1712 " (1u - _edgesP1P0.a); \n" | |
| 1713 "#else \n" | |
| 1714 " uint isZShape = _edges.r * _edges.a * _edgesP1P0.g * \n" | |
| 1715 " (1u - _edges.b) * (1u - _edgesP1P0.r) * (1u - _edges.g) *\n" | |
| 1716 " (1u - _edgesP1P0.a); \n" | |
| 1717 " isZShape *= \n" | |
| 1718 " (_edgesM1P0.a + _edgesP2P0.g); \n" | |
| 1719 " // and at least one of these need to be there\n" | |
| 1720 "#endif \n" | |
| 1721 " \n" | |
| 1722 " if (isZShape > 0u) { \n" | |
| 1723 " isNormalZ = true; \n" | |
| 1724 " } \n" | |
| 1725 " } \n" | |
| 1726 " \n" | |
| 1727 " bool isZ = isInvertedZ || isNormalZ; \n" | |
| 1728 " if (isZ) { \n" | |
| 1729 " forFollowUpCoords[forFollowUpCount++] = \n" | |
| 1730 " ivec4(screenPosI.xy, horizontal, isInvertedZ); \n" | |
| 1731 " } \n" | |
| 1732 " } \n" | |
| 1733 " } \n" | |
| 1734 " } \n" | |
| 1735 " \n" | |
| 1736 " // This code below is the only potential bug with this algorithm : \n" | |
| 1737 " // it HAS to be executed after the simple shapes above. It used to be\n" | |
| 1738 " // executed as separate compute shader (by storing the packed \n" | |
| 1739 " // 'forFollowUpCoords' in an append buffer and consuming it later) \n" | |
| 1740 " // but the whole thing (append/consume buffers, using CS) appears to \n" | |
| 1741 " // be too inefficient on most hardware. \n" | |
| 1742 " // However, it seems to execute fairly efficiently here and without \n" | |
| 1743 " // any issues, although there is no 100% guarantee that this code \n" | |
| 1744 " // below will execute across all pixels (it has a c_maxLineLength \n" | |
| 1745 " // wide kernel) after other shaders processing same pixels have done \n" | |
| 1746 " // solving simple shapes. It appears to work regardless, across all \n" | |
| 1747 " // hardware; pixels with 1-edge or two opposing edges are ignored by \n" | |
| 1748 " // simple shapes anyway and other shapes stop the long line \n" | |
| 1749 " // algorithm from executing the only danger appears to be simple \n" | |
| 1750 " // shape L's colliding with Z shapes from neighbouring pixels but I \n" | |
| 1751 " // couldn't reproduce any problems on any hardware. \n" | |
| 1752 " for (uint _i = 0u; _i < forFollowUpCount; _i++) { \n" | |
| 1753 " ivec4 data = forFollowUpCoords[_i]; \n" | |
| 1754 " ProcessDetectedZ(data.xy, bool(data.z), bool(data.w)); \n" | |
| 1755 " } \n" | |
| 1756 "} \n" | |
| 1757 "#endif // BLUR_EDGES \n" | |
| 1758 " \n" | |
| 1759 "#ifdef DISPLAY_EDGES \n" | |
| 1760 "layout(location = 0) out vec4 color; \n" | |
| 1761 "layout(location = 1) out vec4 hasEdges; \n" | |
| 1762 "void DisplayEdges() { \n" | |
| 1763 " ivec2 screenPosI = ivec2(gl_FragCoord.xy); \n" | |
| 1764 " \n" | |
| 1765 " uint packedEdges, shapeType; \n" | |
| 1766 " UnpackBlurAAInfo(texelFetch(g_src0TextureFlt, screenPosI, 0).r, \n" | |
| 1767 " packedEdges, shapeType); \n" | |
| 1768 " \n" | |
| 1769 " vec4 edges = vec4(UnpackEdge(packedEdges)); \n" | |
| 1770 " if (any(greaterThan(edges.xyzw, vec4(0)))) { \n" | |
| 1771 "#ifdef IN_BGR_MODE \n" | |
| 1772 " color = c_edgeDebugColours[shapeType].bgra; \n" | |
| 1773 "#else \n" | |
| 1774 " color = c_edgeDebugColours[shapeType]; \n" | |
| 1775 "#endif \n" | |
| 1776 " hasEdges = vec4(1.0); \n" | |
| 1777 " } else { \n" | |
| 1778 " color = vec4(0); \n" | |
| 1779 " hasEdges = vec4(0.0); \n" | |
| 1780 " } \n" | |
| 1781 "} \n" | |
| 1782 "#endif // DISPLAY_EDGES \n" | |
| 1783 " \n" | |
| 1784 "void main() { \n" | |
| 1785 "#ifdef DETECT_EDGES1 \n" | |
| 1786 " DetectEdges1(); \n" | |
| 1787 "#endif \n" | |
| 1788 "#if defined DETECT_EDGES2 \n" | |
| 1789 " DetectEdges2(); \n" | |
| 1790 "#endif \n" | |
| 1791 "#if defined COMBINE_EDGES \n" | |
| 1792 " CombineEdges(); \n" | |
| 1793 "#endif \n" | |
| 1794 "#if defined BLUR_EDGES \n" | |
| 1795 " BlurEdges(); \n" | |
| 1796 "#endif \n" | |
| 1797 "#if defined DISPLAY_EDGES \n" | |
| 1798 " DisplayEdges(); \n" | |
| 1799 "#endif \n" | |
| 1800 "} \n"; | |
| 1801 | |
| 1802 const char* ApplyFramebufferAttachmentCMAAINTELResourceManager::copy_frag_str_ = | |
| 1803 "precision highp float; \n" | |
| 1804 "layout(binding = 0) uniform highp sampler2D inTexture; \n" | |
| 1805 "layout(location = 0) out vec4 outColor; \n" | |
| 1806 "#ifdef GL_ES \n" | |
| 1807 "layout(binding = 0, rgba8) restrict writeonly uniform highp \n" | |
| 1808 " image2D outTexture; \n" | |
| 1809 "#else \n" | |
| 1810 "layout(rgba8) restrict writeonly uniform highp image2D outTexture; \n" | |
| 1811 "#endif \n" | |
| 1812 " \n" | |
| 1813 "void main() { \n" | |
| 1814 " ivec2 screenPosI = ivec2( gl_FragCoord.xy ); \n" | |
| 1815 " vec4 pixel = texelFetch(inTexture, screenPosI, 0); \n" | |
| 1816 "#ifdef OUT_FBO \n" | |
| 1817 " outColor = pixel; \n" | |
| 1818 "#else \n" | |
| 1819 " imageStore(outTexture, screenPosI, pixel); \n" | |
| 1820 "#endif \n" | |
| 1821 "} \n"; | |
| 1822 | |
| 1823 } // namespace gpu | |
| OLD | NEW |