OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "gpu/command_buffer/service/gles2_cmd_apply_framebuffer_attachment_cmaa _intel.h" | |
6 | |
7 #include "base/logging.h" | |
8 #include "gpu/command_buffer/service/gles2_cmd_decoder.h" | |
9 #include "ui/gl/gl_gl_api_implementation.h" | |
10 #include "ui/gl/gl_version_info.h" | |
11 | |
12 namespace gpu { | |
13 | |
14 ApplyFramebufferAttachmentCMAAINTELResourceManager:: | |
15 ApplyFramebufferAttachmentCMAAINTELResourceManager() | |
16 : initialized_(false), | |
17 textures_initialized_(false), | |
18 is_in_gamma_correct_mode_(false), | |
19 supports_usampler_(true), | |
20 supports_r8_image_(true), | |
21 supports_r8_read_format_(true), | |
22 is_gles31_compatible(false), | |
23 frame_id_(0), | |
24 width_(0), | |
25 height_(0), | |
26 copy_to_framebuffer_shader_(0), | |
27 copy_to_image_shader_(0), | |
28 edges0_shader_(0), | |
29 edges1_shader_(0), | |
30 edges_combine_shader_(0), | |
31 process_and_apply_shader_(0), | |
32 debug_display_edges_shader_(0), | |
33 cmaa_framebuffer_(0), | |
34 copy_framebuffer_(0), | |
35 rgba8_texture_(0), | |
36 working_color_texture_(0), | |
37 edges0_texture_(0), | |
38 edges1_texture_(0), | |
39 mini4_edge_texture_(0), | |
40 mini4_edge_depth_texture_(0), | |
41 edges1_shader_result_texture_float4_slot1_(0), | |
42 edges1_shader_result_texture_(0), | |
43 edges_combine_shader_result_texture_float4_slot1_(0), | |
44 process_and_apply_shader_result_texture_float4_slot1_(0), | |
45 edges_combine_shader_result_texture_slot2_(0), | |
46 copy_to_image_shader_outTexture_(0) {} | |
47 | |
48 ApplyFramebufferAttachmentCMAAINTELResourceManager:: | |
49 ~ApplyFramebufferAttachmentCMAAINTELResourceManager() { | |
50 Destroy(); | |
51 } | |
52 | |
53 void ApplyFramebufferAttachmentCMAAINTELResourceManager::Initialize( | |
54 const gles2::GLES2Decoder* decoder) { | |
55 is_gles31_compatible = gl::GetGLVersionInfo()->IsAtLeastGLES(3, 1); | |
56 | |
57 glGenFramebuffersEXT(1, ©_framebuffer_); | |
58 glGenTextures(1, &rgba8_texture_); | |
59 | |
60 copy_to_image_shader_ = CreateProgram("", vert_str_, copy_frag_str_); | |
61 copy_to_framebuffer_shader_ = | |
62 CreateProgram("#define OUT_FBO 1\n", vert_str_, copy_frag_str_); | |
63 | |
64 // Check if RGBA8UI is supported as an FBO colour target with depth. | |
65 // If not supported, GLSL needs to convert the data to/from float so there is | |
66 // a small extra cost. | |
67 { | |
68 GLuint rgba8ui_texture = 0, depth_texture = 0; | |
69 glGenTextures(1, &rgba8ui_texture); | |
70 glBindTexture(GL_TEXTURE_2D, rgba8ui_texture); | |
71 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_RGBA8UI, 4, 4); | |
72 | |
73 glGenTextures(1, &depth_texture); | |
74 glBindTexture(GL_TEXTURE_2D, depth_texture); | |
75 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_DEPTH_COMPONENT16, 4, 4); | |
76 | |
77 // Create the FBO | |
78 GLuint rgba8ui_framebuffer = 0; | |
79 glGenFramebuffersEXT(1, &rgba8ui_framebuffer); | |
80 glBindFramebufferEXT(GL_FRAMEBUFFER, rgba8ui_framebuffer); | |
81 | |
82 // Bind to the FBO to test support | |
83 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, | |
84 GL_TEXTURE_2D, rgba8ui_texture, 0); | |
85 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, | |
86 GL_TEXTURE_2D, depth_texture, 0); | |
87 GLenum status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER); | |
88 | |
89 supports_usampler_ = (status == GL_FRAMEBUFFER_COMPLETE); | |
90 | |
91 glDeleteFramebuffersEXT(1, &rgba8ui_framebuffer); | |
92 glDeleteTextures(1, &rgba8ui_texture); | |
93 glDeleteTextures(1, &depth_texture); | |
94 } | |
95 | |
96 // Check to see if R8 images are supported | |
97 // If not supported, images are bound as R32F for write targets, not R8. | |
98 { | |
99 GLuint r8_texture = 0; | |
100 glGenTextures(1, &r8_texture); | |
101 glBindTexture(GL_TEXTURE_2D, r8_texture); | |
102 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_R8, 4, 4); | |
103 | |
104 glGetError(); // reset all previous errors | |
105 glBindImageTextureEXT(0, r8_texture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R8); | |
106 if (glGetError() != GL_NO_ERROR) | |
107 supports_r8_image_ = false; | |
108 | |
109 glDeleteTextures(1, &r8_texture); | |
110 } | |
111 | |
112 // Check if R8 GLSL read formats are supported. | |
113 // If not supported, r32f is used instead. | |
114 { | |
115 const char* shader_source = | |
116 "layout(r8) restrict writeonly uniform highp image2D g_r8Image; \n" | |
117 "void main() \n" | |
118 "{ \n" | |
119 " imageStore(g_r8Image, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 0.0)); \n" | |
120 "} \n"; | |
121 | |
122 GLuint shader = CreateShader(GL_FRAGMENT_SHADER, "", shader_source); | |
123 supports_r8_read_format_ = (shader != 0); | |
124 if (shader != 0) { | |
125 glDeleteShader(shader); | |
126 } | |
127 } | |
128 | |
129 VLOG(1) << "ApplyFramebufferAttachmentCMAAINTEL: " | |
130 << "Supports USampler is " << (supports_usampler_ ? "true" : "false"); | |
131 VLOG(1) << "ApplyFramebufferAttachmentCMAAINTEL: " | |
132 << "Supports R8 Images is " | |
133 << (supports_r8_image_ ? "true" : "false"); | |
134 VLOG(1) << "ApplyFramebufferAttachmentCMAAINTEL: " | |
135 << "Supports R8 Read Format is " | |
136 << (supports_r8_read_format_ ? "true" : "false"); | |
137 | |
138 // Create the shaders | |
139 std::ostringstream defines, edge1, edge2, combineEdges, blur, displayEdges; | |
140 | |
141 if (supports_usampler_) { | |
142 defines << "#define SUPPORTS_USAMPLER2D\n"; | |
143 } | |
144 | |
145 if (is_in_gamma_correct_mode_) { | |
146 defines << "#define IN_GAMMA_CORRECT_MODE\n"; | |
147 } | |
148 | |
149 if (supports_r8_read_format_) { | |
150 defines << "#define EDGE_READ_FORMAT r8\n"; | |
151 } else { | |
152 defines << "#define EDGE_READ_FORMAT r32f\n"; | |
153 } | |
154 | |
155 displayEdges << defines.str() << "#define DISPLAY_EDGES\n"; | |
156 debug_display_edges_shader_ = | |
157 CreateProgram(displayEdges.str().c_str(), vert_str_, cmaa_frag_str_); | |
158 | |
159 edge1 << defines.str() << "#define DETECT_EDGES1\n"; | |
160 edges0_shader_ = | |
161 CreateProgram(edge1.str().c_str(), vert_str_, cmaa_frag_str_); | |
162 | |
163 edge2 << defines.str() << "#define DETECT_EDGES2\n"; | |
164 edges1_shader_ = | |
165 CreateProgram(edge2.str().c_str(), vert_str_, cmaa_frag_str_); | |
166 | |
167 combineEdges << defines.str() << "#define COMBINE_EDGES\n"; | |
168 edges_combine_shader_ = | |
169 CreateProgram(combineEdges.str().c_str(), vert_str_, cmaa_frag_str_); | |
170 | |
171 blur << defines.str() << "#define BLUR_EDGES\n"; | |
172 process_and_apply_shader_ = | |
173 CreateProgram(blur.str().c_str(), vert_str_, cmaa_frag_str_); | |
174 | |
175 edges1_shader_result_texture_float4_slot1_ = | |
176 glGetUniformLocation(edges0_shader_, "g_resultTextureFlt4Slot1"); | |
177 edges1_shader_result_texture_ = | |
178 glGetUniformLocation(edges1_shader_, "g_resultTexture"); | |
179 edges_combine_shader_result_texture_float4_slot1_ = | |
180 glGetUniformLocation(edges_combine_shader_, "g_resultTextureFlt4Slot1"); | |
181 edges_combine_shader_result_texture_slot2_ = | |
182 glGetUniformLocation(edges_combine_shader_, "g_resultTextureSlot2"); | |
183 process_and_apply_shader_result_texture_float4_slot1_ = glGetUniformLocation( | |
184 process_and_apply_shader_, "g_resultTextureFlt4Slot1"); | |
185 copy_to_image_shader_outTexture_ = | |
186 glGetUniformLocation(copy_to_image_shader_, "outTexture"); | |
187 | |
188 initialized_ = true; | |
189 } | |
190 | |
191 void ApplyFramebufferAttachmentCMAAINTELResourceManager::Destroy() { | |
192 if (!initialized_) | |
193 return; | |
194 | |
195 ReleaseTextures(); | |
196 | |
197 glDeleteProgram(process_and_apply_shader_); | |
198 glDeleteProgram(edges_combine_shader_); | |
199 glDeleteProgram(edges1_shader_); | |
200 glDeleteProgram(edges0_shader_); | |
201 glDeleteProgram(debug_display_edges_shader_); | |
202 | |
203 initialized_ = false; | |
204 } | |
205 | |
206 // Apply CMAA(Conservative Morphological Anti-Aliasing) algorithm to the | |
207 // color attachments of currently bound draw framebuffer. | |
208 // Reference GL_INTEL_framebuffer_CMAA for details. | |
209 void ApplyFramebufferAttachmentCMAAINTELResourceManager:: | |
210 ApplyFramebufferAttachmentCMAAINTEL(const gles2::GLES2Decoder* decoder) { | |
211 if (!initialized_) | |
212 return; | |
213 | |
214 GLint last_framebuffer = 0; | |
215 GLint attachement_type = 0; | |
216 GLint source_texture = 0; | |
217 GLint texture_level = 0; | |
218 GLint width = 0; | |
219 GLint height = 0; | |
220 GLint internal_format = 0; | |
221 GLint max_draw_buffers = 0; | |
222 | |
223 glGetIntegerv(GL_FRAMEBUFFER_BINDING, &last_framebuffer); | |
224 | |
225 // Process each color attachment of the current draw framebuffer. | |
226 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, last_framebuffer); | |
227 glGetIntegerv(GL_MAX_DRAW_BUFFERS, &max_draw_buffers); | |
piman
2016/06/13 22:24:43
Here and below: we already have this value, and al
adrian.belgun
2016/06/16 14:53:14
Done, but needed to pass |decoder| without |const|
| |
228 for (int i = 0; i < max_draw_buffers; i++) { | |
229 glGetFramebufferAttachmentParameterivEXT( | |
230 GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + i, | |
231 GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE, &attachement_type); | |
232 if (attachement_type == GL_TEXTURE) { | |
233 // Get the texture width and height. | |
234 glGetFramebufferAttachmentParameterivEXT( | |
235 GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + i, | |
236 GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME, &source_texture); | |
237 glGetFramebufferAttachmentParameterivEXT( | |
238 GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + i, | |
239 GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL, &texture_level); | |
240 glBindTexture(GL_TEXTURE_2D, source_texture); | |
241 glGetTexLevelParameteriv(GL_TEXTURE_2D, texture_level, GL_TEXTURE_WIDTH, | |
242 &width); | |
243 glGetTexLevelParameteriv(GL_TEXTURE_2D, texture_level, GL_TEXTURE_HEIGHT, | |
244 &height); | |
245 glGetTexLevelParameteriv(GL_TEXTURE_2D, texture_level, | |
246 GL_TEXTURE_INTERNAL_FORMAT, &internal_format); | |
247 | |
248 // Resize internal structures - only if needed. | |
249 OnSize(width, height); | |
250 | |
251 // CMAA internally expects GL_RGBA8 textures. | |
252 // Process using a GL_RGBA8 copy if this is not the case. | |
253 bool do_copy = internal_format != GL_RGBA8; | |
254 | |
255 // Copy source_texture to rgba8_texture_ | |
256 if (do_copy) { | |
257 CopyTexture(source_texture, rgba8_texture_, false); | |
258 } | |
259 | |
260 // CMAA Effect | |
261 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, last_framebuffer); | |
262 if (do_copy) { | |
263 ApplyCMMAEffectTexture(1.0f / 13.0f, rgba8_texture_, rgba8_texture_); | |
piman
2016/06/13 22:24:43
Can you move this magic value into its own constan
adrian.belgun
2016/06/16 14:53:14
Removed. Was a remnant from an older version. Curr
| |
264 } else { | |
265 ApplyCMMAEffectTexture(1.0f / 13.0f, source_texture, source_texture); | |
266 } | |
267 | |
268 // Copy rgba8_texture_ to source_texture | |
269 if (do_copy) { | |
270 // Move source_texture to the first color attachment of the copy fbo. | |
271 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, last_framebuffer); | |
272 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + i, | |
273 GL_TEXTURE_2D, 0, 0); | |
274 glBindFramebufferEXT(GL_FRAMEBUFFER, copy_framebuffer_); | |
275 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, | |
276 GL_TEXTURE_2D, source_texture, 0); | |
277 | |
278 CopyTexture(rgba8_texture_, source_texture, true); | |
279 | |
280 // Restore color attachments | |
281 glBindFramebufferEXT(GL_FRAMEBUFFER, copy_framebuffer_); | |
282 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, | |
283 GL_TEXTURE_2D, rgba8_texture_, 0); | |
284 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, last_framebuffer); | |
285 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + i, | |
286 GL_TEXTURE_2D, source_texture, 0); | |
287 } | |
288 } | |
289 } | |
290 | |
291 // Restore state | |
292 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, last_framebuffer); | |
piman
2016/06/13 22:24:43
This is redundant with RestoreFramebufferBindings
adrian.belgun
2016/06/16 14:53:14
Done.
| |
293 decoder->RestoreAllAttributes(); | |
294 decoder->RestoreTextureUnitBindings(0); | |
295 decoder->RestoreActiveTexture(); | |
296 decoder->RestoreProgramBindings(); | |
297 decoder->RestoreBufferBindings(); | |
298 decoder->RestoreFramebufferBindings(); | |
299 decoder->RestoreGlobalState(); | |
300 } | |
301 | |
302 void ApplyFramebufferAttachmentCMAAINTELResourceManager::ApplyCMMAEffectTexture( | |
303 float edge_detection_threshold, | |
304 GLuint source_texture, | |
305 GLuint dest_texture) { | |
306 frame_id_++; | |
307 | |
308 GLuint edge_texture_a; | |
309 GLuint edge_texture_b; | |
310 | |
311 // Flip flop - One pass clears the texture that needs clearing for the other | |
312 // one (actually it's only important that it clears the highest bit) | |
313 if ((frame_id_ % 2) == 0) { | |
314 edge_texture_a = edges0_texture_; | |
315 edge_texture_b = edges1_texture_; | |
316 } else { | |
317 edge_texture_a = edges1_texture_; | |
318 edge_texture_b = edges0_texture_; | |
319 } | |
320 | |
321 // Setup the main fbo | |
322 glBindFramebufferEXT(GL_FRAMEBUFFER, cmaa_framebuffer_); | |
323 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, | |
324 mini4_edge_texture_, 0); | |
325 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, | |
326 mini4_edge_depth_texture_, 0); | |
327 #if DCHECK_IS_ON() | |
328 GLenum status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER); | |
329 if (status != GL_FRAMEBUFFER_COMPLETE) { | |
330 DLOG(ERROR) << "ApplyFramebufferAttachmentCMAAINTEL: " | |
331 << "Incomplete framebuffer."; | |
332 Destroy(); | |
333 return; | |
334 } | |
335 #endif | |
336 | |
337 // Setup the viewport to match the fbo | |
338 glViewport(0, 0, (int)((width_ + 1) / 2), (int)((height_ + 1) / 2)); | |
piman
2016/06/13 22:24:43
nit: no need for (int)
adrian.belgun
2016/06/16 14:53:14
Done.
| |
339 glEnable(GL_DEPTH_TEST); | |
340 | |
341 // Detect edges Pass 0 | |
342 // - For every pixel detect edges to the right and down and output depth | |
343 // mask where edges detected (1 - far, for detected, 0-near for empty | |
344 // pixels) | |
345 | |
346 // Inputs | |
347 // g_screenTexture source_texture tex0 | |
348 // Outputs | |
349 // gl_FragDepth mini4_edge_depth_texture_ fbo.depth | |
350 // out uvec4 outEdges mini4_edge_texture_ fbo.col | |
351 // image2D g_resultTextureFlt4Slot1 working_color_texture_ image1 | |
352 GLenum edge_format = supports_r8_image_ ? GL_R8 : GL_R32F; | |
353 | |
354 { | |
355 glUseProgram(edges0_shader_); | |
356 glUniform1f(0, 1.0f); | |
357 glUniform2f(1, 1.0f / width_, 1.0f / height_); | |
358 glDepthMask(GL_TRUE); | |
359 glDepthFunc(GL_ALWAYS); | |
360 glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); | |
361 | |
362 if (!is_gles31_compatible) { | |
363 glUniform1i(edges1_shader_result_texture_float4_slot1_, 1); | |
364 } | |
365 glBindImageTextureEXT(1, working_color_texture_, 0, GL_FALSE, 0, | |
366 GL_WRITE_ONLY, GL_RGBA8); | |
367 | |
368 glActiveTexture(GL_TEXTURE0); | |
369 glBindTexture(GL_TEXTURE_2D, source_texture); | |
370 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); | |
371 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | |
372 | |
373 glDrawArrays(GL_TRIANGLES, 0, 3); | |
374 } | |
375 | |
376 // Detect edges Pass 1 (finish the previous pass edge processing). | |
377 // Do the culling of non-dominant local edges (leave mainly locally dominant | |
378 // edges) and merge Right and Bottom edges into TopRightBottomLeft | |
379 | |
380 // Inputs | |
381 // g_src0Texture4Uint mini4_edge_texture_ tex1 | |
382 // Outputs | |
383 // image2D g_resultTexture edge_texture_b image0 | |
384 { | |
385 glUseProgram(edges1_shader_); | |
386 glUniform1f(0, 0.0f); | |
387 glUniform2f(1, 1.0f / width_, 1.0f / height_); | |
388 glDepthMask(GL_FALSE); | |
389 glDepthFunc(GL_LESS); | |
390 glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); | |
391 | |
392 if (!is_gles31_compatible) { | |
393 glUniform1i(edges1_shader_result_texture_, 0); | |
394 } | |
395 glBindImageTextureEXT(0, edge_texture_b, 0, GL_FALSE, 0, GL_WRITE_ONLY, | |
396 edge_format); | |
397 | |
398 glActiveTexture(GL_TEXTURE1); | |
399 glBindTexture(GL_TEXTURE_2D, mini4_edge_texture_); | |
400 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); | |
401 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); | |
402 | |
403 glDrawArrays(GL_TRIANGLES, 0, 3); | |
404 } | |
405 | |
406 // - Combine RightBottom (.xy) edges from previous pass into | |
407 // RightBottomLeftTop (.xyzw) edges and output it into the mask (have to | |
408 // fill in the whole buffer including empty ones for the line length | |
409 // detection to work correctly). | |
410 // - On all pixels with any edge, input buffer into a temporary color buffer | |
411 // needed for correct blending in the next pass (other pixels not needed | |
412 // so not copied to avoid bandwidth use). | |
413 // - On all pixels with 2 or more edges output positive depth mask for the | |
414 // next pass. | |
415 | |
416 // Inputs | |
417 // g_src0TextureFlt edge_texture_b tex1 //ps | |
418 // Outputs | |
419 // image2D g_resultTextureSlot2 edge_texture_a image2 | |
420 // gl_FragDepth mini4_edge_texture_ fbo.depth | |
421 { | |
422 // Combine edges: each pixel will now contain info on all (top, right, | |
423 // bottom, left) edges; also create depth mask as above depth and mark | |
424 // potential Z sAND also copy source color data but only on edge pixels | |
425 glUseProgram(edges_combine_shader_); | |
426 glUniform1f(0, 1.0f); | |
427 glUniform2f(1, 1.0f / width_, 1.0f / height_); | |
428 glDepthMask(GL_TRUE); | |
429 glDepthFunc(GL_ALWAYS); | |
430 glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); | |
431 | |
432 if (!is_gles31_compatible) { | |
433 glUniform1i(edges_combine_shader_result_texture_float4_slot1_, 1); | |
434 glUniform1i(edges_combine_shader_result_texture_slot2_, 2); | |
435 } | |
436 glBindImageTextureEXT(1, dest_texture, 0, GL_FALSE, 0, GL_WRITE_ONLY, | |
437 GL_RGBA8); | |
438 glBindImageTextureEXT(2, edge_texture_a, 0, GL_FALSE, 0, GL_WRITE_ONLY, | |
439 edge_format); | |
440 | |
441 glActiveTexture(GL_TEXTURE1); | |
442 glBindTexture(GL_TEXTURE_2D, edge_texture_b); | |
443 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); | |
444 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); | |
445 | |
446 glDrawArrays(GL_TRIANGLES, 0, 3); | |
447 } | |
448 | |
449 // Using depth mask and [earlydepthstencil] to work on pixels with 2, 3, 4 | |
450 // edges: | |
451 // - First blend simple blur map for 2,3,4 edge pixels | |
452 // - Then do the lines (line length counter -should- guarantee no overlap | |
453 // with other pixels - pixels with 1 edge are excluded in the previous | |
454 // pass and the pixels with 2 parallel edges are excluded in the simple | |
455 // blur) | |
456 | |
457 // Inputs | |
458 // g_screenTexture working_color_texture_ tex0 | |
459 // g_src0TextureFlt edge_texture_a tex1 //ps | |
460 // sampled | |
461 // Outputs | |
462 // g_resultTextureFlt4Slot1 dest_texture image1 | |
463 // gl_FragDepth mini4_edge_texture_ fbo.depth | |
464 { | |
465 glUseProgram(process_and_apply_shader_); | |
466 glUniform1f(0, 0.0f); | |
467 glUniform2f(1, 1.0f / width_, 1.0f / height_); | |
468 glDepthMask(GL_FALSE); | |
469 glDepthFunc(GL_LESS); | |
470 glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); | |
471 | |
472 if (!is_gles31_compatible) { | |
473 glUniform1i(process_and_apply_shader_result_texture_float4_slot1_, 1); | |
474 } | |
475 glBindImageTextureEXT(1, dest_texture, 0, GL_FALSE, 0, GL_WRITE_ONLY, | |
476 GL_RGBA8); | |
477 | |
478 glActiveTexture(GL_TEXTURE0); | |
479 glBindTexture(GL_TEXTURE_2D, working_color_texture_); | |
480 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); | |
481 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | |
482 | |
483 glActiveTexture(GL_TEXTURE1); | |
484 glBindTexture(GL_TEXTURE_2D, edge_texture_a); | |
485 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); | |
486 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); | |
487 | |
488 glDrawArrays(GL_TRIANGLES, 0, 3); | |
489 } | |
490 | |
491 glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); | |
492 glDisable(GL_DEPTH_TEST); | |
493 glDepthMask(GL_FALSE); | |
494 glActiveTexture(GL_TEXTURE0); | |
495 } | |
496 | |
497 void ApplyFramebufferAttachmentCMAAINTELResourceManager::OnSize(GLint width, | |
498 GLint height) { | |
499 if (height_ == height && width_ == width) | |
500 return; | |
501 | |
502 ReleaseTextures(); | |
503 | |
504 height_ = height; | |
505 width_ = width; | |
506 | |
507 glGenTextures(1, &rgba8_texture_); | |
508 glBindTexture(GL_TEXTURE_2D, rgba8_texture_); | |
509 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_RGBA8, width, height); | |
510 | |
511 // Edges texture - R8 | |
512 // OpenGLES has no single component 8/16-bit image support, so needs to be R32 | |
513 // Although CHT does support R8. | |
514 GLenum edge_format = supports_r8_image_ ? GL_R8 : GL_R32F; | |
515 glGenTextures(1, &edges0_texture_); | |
516 glBindTexture(GL_TEXTURE_2D, edges0_texture_); | |
517 glTexStorage2DEXT(GL_TEXTURE_2D, 1, edge_format, width, height); | |
518 | |
519 glGenTextures(1, &edges1_texture_); | |
520 glBindTexture(GL_TEXTURE_2D, edges1_texture_); | |
521 glTexStorage2DEXT(GL_TEXTURE_2D, 1, edge_format, width, height); | |
522 | |
523 // Color working texture - RGBA8 | |
524 glGenTextures(1, &working_color_texture_); | |
525 glBindTexture(GL_TEXTURE_2D, working_color_texture_); | |
526 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_RGBA8, width, height); | |
527 | |
528 // Half*half compressed 4-edge-per-pixel texture - RGBA8 | |
529 glGenTextures(1, &mini4_edge_texture_); | |
530 glBindTexture(GL_TEXTURE_2D, mini4_edge_texture_); | |
531 GLenum format = GL_RGBA8UI; | |
532 if (!supports_usampler_) { | |
533 format = GL_RGBA8; | |
534 } | |
535 glTexStorage2DEXT(GL_TEXTURE_2D, 1, format, (width + 1) / 2, | |
536 (height + 1) / 2); | |
537 | |
538 // Depth | |
539 glGenTextures(1, &mini4_edge_depth_texture_); | |
540 glBindTexture(GL_TEXTURE_2D, mini4_edge_depth_texture_); | |
541 glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_DEPTH_COMPONENT16, (width + 1) / 2, | |
542 (height + 1) / 2); | |
543 | |
544 // Create the FBO | |
545 glGenFramebuffersEXT(1, &cmaa_framebuffer_); | |
546 glBindFramebufferEXT(GL_FRAMEBUFFER, cmaa_framebuffer_); | |
547 | |
548 // We need to clear the textures before they are first used. | |
549 // The algorithm self-clears them later. | |
550 glViewport(0, 0, (int)width_, (int)height_); | |
piman
2016/06/13 22:24:43
nit: remove (int)
adrian.belgun
2016/06/16 14:53:14
Done.
| |
551 glClearColor(0.0f, 0.0f, 0.0f, 0.0f); | |
552 | |
553 glBindFramebufferEXT(GL_FRAMEBUFFER, cmaa_framebuffer_); | |
554 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, | |
555 edges0_texture_, 0); | |
556 glClear(GL_COLOR_BUFFER_BIT); | |
557 | |
558 glFramebufferTexture2DEXT(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, | |
559 edges1_texture_, 0); | |
560 glClear(GL_COLOR_BUFFER_BIT); | |
561 | |
562 textures_initialized_ = true; | |
563 } | |
564 | |
565 void ApplyFramebufferAttachmentCMAAINTELResourceManager::ReleaseTextures() { | |
566 if (textures_initialized_) { | |
567 glDeleteFramebuffersEXT(1, ©_framebuffer_); | |
piman
2016/06/13 22:24:43
copy_framebuffer_ and rgba8_texture_ are always cr
adrian.belgun
2016/06/16 14:53:14
Done. Moved copy_framebuffer_ and rgba8_texture_ i
| |
568 glDeleteFramebuffersEXT(1, &cmaa_framebuffer_); | |
569 glDeleteTextures(1, &rgba8_texture_); | |
570 glDeleteTextures(1, &edges0_texture_); | |
571 glDeleteTextures(1, &edges1_texture_); | |
572 glDeleteTextures(1, &mini4_edge_texture_); | |
573 glDeleteTextures(1, &mini4_edge_depth_texture_); | |
574 glDeleteTextures(1, &working_color_texture_); | |
575 } | |
576 textures_initialized_ = false; | |
577 } | |
578 | |
579 void ApplyFramebufferAttachmentCMAAINTELResourceManager::CopyTexture( | |
580 GLint source, | |
581 GLint dest, | |
582 bool via_fbo) { | |
583 glViewport(0, 0, width_, height_); | |
584 glActiveTexture(GL_TEXTURE0); | |
585 glBindTexture(GL_TEXTURE_2D, source); | |
586 | |
587 if (!via_fbo) { | |
588 glUseProgram(copy_to_image_shader_); | |
589 if (!is_gles31_compatible) { | |
590 glUniform1i(copy_to_image_shader_outTexture_, 0); | |
591 } | |
592 glBindImageTextureEXT(0, dest, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8); | |
593 } else { | |
594 glDisable(GL_BLEND); | |
piman
2016/06/13 22:24:43
What about other state such as color mask, scissor
adrian.belgun
2016/06/16 14:53:14
Done. Added remaining state flags.
| |
595 glUseProgram(copy_to_framebuffer_shader_); | |
596 } | |
597 | |
598 glDrawArrays(GL_TRIANGLES, 0, 3); | |
599 glUseProgram(0); | |
600 glBindTexture(GL_TEXTURE_2D, 0); | |
601 } | |
602 | |
603 GLuint ApplyFramebufferAttachmentCMAAINTELResourceManager::CreateProgram( | |
604 const char* defines, | |
605 const char* vs_source, | |
606 const char* fs_source) { | |
607 GLuint program = glCreateProgram(); | |
608 | |
609 GLuint vs = CreateShader(GL_VERTEX_SHADER, defines, vs_source); | |
610 GLuint fs = CreateShader(GL_FRAGMENT_SHADER, defines, fs_source); | |
611 | |
612 glAttachShader(program, vs); | |
613 glDeleteShader(vs); | |
614 glAttachShader(program, fs); | |
615 glDeleteShader(fs); | |
616 | |
617 glLinkProgram(program); | |
618 GLint linkStatus; | |
piman
2016/06/13 22:24:43
nit: link_status
adrian.belgun
2016/06/16 14:53:14
Done.
| |
619 glGetProgramiv(program, GL_LINK_STATUS, &linkStatus); | |
620 | |
621 if (linkStatus == 0) { | |
622 #if DCHECK_IS_ON() | |
623 GLint infoLogLength; | |
piman
2016/06/13 22:24:43
nit: info_log_length
adrian.belgun
2016/06/16 14:53:14
Done.
| |
624 glGetProgramiv(program, GL_INFO_LOG_LENGTH, &infoLogLength); | |
625 std::vector<GLchar> infoLog(infoLogLength); | |
piman
2016/06/13 22:24:43
nit: info_log
adrian.belgun
2016/06/16 14:53:14
Done.
| |
626 glGetProgramInfoLog(program, static_cast<GLsizei>(infoLog.size()), NULL, | |
627 &infoLog[0]); | |
628 DLOG(ERROR) << "ApplyFramebufferAttachmentCMAAINTEL: " | |
629 << "program link failed: " << &infoLog[0]; | |
630 #endif | |
631 glDeleteProgram(program); | |
632 program = 0; | |
633 } | |
634 | |
635 return program; | |
636 } | |
637 | |
638 GLuint ApplyFramebufferAttachmentCMAAINTELResourceManager::CreateShader( | |
639 GLenum type, | |
640 const char* defines, | |
641 const char* source) { | |
642 GLuint shader = glCreateShader(type); | |
643 | |
644 const char* header_es31 = | |
piman
2016/06/13 22:24:43
nit: const char header_es31[]
adrian.belgun
2016/06/16 14:53:14
Done.
| |
645 "#version 310 es \n"; | |
646 const char* header_gl30 = | |
piman
2016/06/13 22:24:43
nit: const char header_gl30[]
adrian.belgun
2016/06/16 14:53:14
Done.
| |
647 "#version 130 \n" | |
648 "#extension GL_ARB_shading_language_420pack : require \n" | |
649 "#extension GL_ARB_texture_gather : require \n" | |
650 "#extension GL_ARB_explicit_uniform_location : require \n" | |
651 "#extension GL_ARB_explicit_attrib_location : require \n" | |
652 "#extension GL_ARB_shader_image_load_store : require \n"; | |
653 | |
654 const char* header = NULL; | |
655 if (is_gles31_compatible) { | |
656 header = header_es31; | |
657 } else { | |
658 header = header_gl30; | |
659 } | |
660 | |
661 const char* sourceArray[4] = {header, defines, "\n", source}; | |
piman
2016/06/13 22:24:43
nit: source_array
adrian.belgun
2016/06/16 14:53:14
Done.
| |
662 glShaderSource(shader, 4, sourceArray, NULL); | |
663 | |
664 glCompileShader(shader); | |
665 | |
666 GLint compileResult; | |
piman
2016/06/13 22:24:43
nit: compile_result
adrian.belgun
2016/06/16 14:53:14
Done.
| |
667 glGetShaderiv(shader, GL_COMPILE_STATUS, &compileResult); | |
668 if (compileResult == 0) { | |
669 #if DCHECK_IS_ON() | |
670 GLint infoLogLength; | |
piman
2016/06/13 22:24:43
nit: info_log_length
adrian.belgun
2016/06/16 14:53:14
Done.
| |
671 glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &infoLogLength); | |
672 std::vector<GLchar> infoLog(infoLogLength); | |
piman
2016/06/13 22:24:43
nit: info_log
adrian.belgun
2016/06/16 14:53:14
Done.
| |
673 glGetShaderInfoLog(shader, static_cast<GLsizei>(infoLog.size()), NULL, | |
674 &infoLog[0]); | |
675 DLOG(ERROR) << "ApplyFramebufferAttachmentCMAAINTEL: " | |
676 << "shader compilation failed: " | |
677 << (type == GL_VERTEX_SHADER | |
678 ? "GL_VERTEX_SHADER" | |
679 : (type == GL_FRAGMENT_SHADER ? "GL_FRAGMENT_SHADER" | |
680 : "UNKNOWN_SHADER")) | |
681 << " shader compilation failed: " << &infoLog[0]; | |
682 #endif | |
683 glDeleteShader(shader); | |
684 shader = 0; | |
685 } | |
686 | |
687 return shader; | |
688 } | |
689 | |
690 // Shaders used in the CMAA algorithm. | |
691 const char* ApplyFramebufferAttachmentCMAAINTELResourceManager::vert_str_ = | |
692 "precision highp float; \n" | |
693 "layout(location = 0) uniform float g_Depth; \n" | |
694 "// No input data. \n" | |
695 "// Verts are autogenerated. \n" | |
696 "// \n" | |
697 "// vertexID 0,1,2 should generate \n" | |
698 "// POS: (-1,-1), (+3,-1), (-1,+3) \n" | |
699 "// \n" | |
700 "// This generates a triangle that completely covers the -1->1 viewport \n" | |
701 "// \n" | |
702 "void main() \n" | |
703 "{ \n" | |
704 " float x = -1.0 + float((gl_VertexID & 1) << 2); \n" | |
705 " float y = -1.0 + float((gl_VertexID & 2) << 1); \n" | |
706 " gl_Position = vec4(x, y, g_Depth, 1.0); \n" | |
707 "} \n" | |
708 " \n"; | |
709 | |
710 const char* ApplyFramebufferAttachmentCMAAINTELResourceManager::cmaa_frag_str_ = | |
711 "precision highp float; \n" | |
712 "precision highp int; \n" | |
713 " \n" | |
714 "#define SETTINGS_ALLOW_SHORT_Zs 1 \n" | |
715 "#define EDGE_DETECT_THRESHOLD 13.0f \n" | |
716 " \n" | |
717 "#define saturate(x) clamp((x), 0.0, 1.0) \n" | |
718 " \n" | |
719 "// bind to location 0 \n" | |
720 "layout(location = 0) uniform float g_Depth; \n" | |
721 "// bind to a uniform buffer bind point 0 \n" | |
722 "layout(location = 1) uniform vec2 g_OneOverScreenSize; \n" | |
723 "#ifndef EDGE_DETECT_THRESHOLD \n" | |
724 "layout(location = 2) uniform float g_ColorThreshold; \n" | |
725 "#endif \n" | |
726 " \n" | |
727 "#ifdef SUPPORTS_USAMPLER2D \n" | |
728 "#define USAMPLER usampler2D \n" | |
729 "#define UVEC4 uvec4 \n" | |
730 "#define LOAD_UINT(arg) arg \n" | |
731 "#define STORE_UVEC4(arg) arg \n" | |
732 "#else \n" | |
733 "#define USAMPLER sampler2D \n" | |
734 "#define UVEC4 vec4 \n" | |
735 "#define LOAD_UINT(arg) uint(arg * 255.0f) \n" | |
736 "#define STORE_UVEC4(arg) vec4(float(arg.x) / 255.0f, \\\n" | |
737 " float(arg.y) / 255.0f, \\\n" | |
738 " float(arg.z) / 255.0f, \\\n" | |
739 " float(arg.w) / 255.0f) \n" | |
740 "#endif \n" | |
741 " \n" | |
742 "// bind to texture stage 0/1 \n" | |
743 "layout(binding = 0) uniform highp sampler2D g_screenTexture; \n" | |
744 "layout(binding = 1) uniform highp sampler2D g_src0TextureFlt; \n" | |
745 "layout(binding = 1) uniform highp USAMPLER g_src0Texture4Uint; \n" | |
746 " \n" | |
747 "// bind to image stage 0/1/2 \n" | |
748 "#ifdef GL_ES \n" | |
749 "layout(binding = 0, EDGE_READ_FORMAT) restrict writeonly uniform highp \n" | |
750 " image2D g_resultTexture; \n" | |
751 "layout(binding = 1, rgba8) restrict writeonly uniform highp \n" | |
752 " image2D g_resultTextureFlt4Slot1; \n" | |
753 "layout(binding = 2, EDGE_READ_FORMAT) restrict writeonly uniform highp \n" | |
754 " image2D g_resultTextureSlot2; \n" | |
755 "#else \n" | |
756 "layout(EDGE_READ_FORMAT) restrict writeonly uniform highp \n" | |
757 " image2D g_resultTexture; \n" | |
758 "layout(rgba8) restrict writeonly uniform highp \n" | |
759 " image2D g_resultTextureFlt4Slot1; \n" | |
760 "layout(EDGE_READ_FORMAT) restrict writeonly uniform highp \n" | |
761 " image2D g_resultTextureSlot2; \n" | |
762 "#endif \n" | |
763 " \n" | |
764 "// Constants \n" | |
765 "const vec4 c_lumWeights = vec4(0.2126f, 0.7152f, 0.0722f, 0.0000f); \n" | |
766 " \n" | |
767 "#ifdef EDGE_DETECT_THRESHOLD \n" | |
768 "const float c_ColorThreshold = 1.0f / EDGE_DETECT_THRESHOLD; \n" | |
769 "#endif \n" | |
770 " \n" | |
771 "// Must be even number; Will work with ~16 pretty good too for \n" | |
772 "// additional performance, or with ~64 for highest quality. \n" | |
773 "const int c_maxLineLength = 64; \n" | |
774 " \n" | |
775 "const vec4 c_edgeDebugColours[5] = vec4[5](vec4(0.5, 0.5, 0.5, 0.4), \n" | |
776 " vec4(1.0, 0.1, 1.0, 0.8), \n" | |
777 " vec4(0.9, 0.0, 0.0, 0.8), \n" | |
778 " vec4(0.0, 0.9, 0.0, 0.8), \n" | |
779 " vec4(0.0, 0.0, 0.9, 0.8)); \n" | |
780 " \n" | |
781 "// this isn't needed if colour UAV is _SRGB but that doesn't work \n" | |
782 "// everywhere \n" | |
783 "#ifdef IN_GAMMA_CORRECT_MODE \n" | |
784 "///////////////////////////////////////////////////////////////////////\n" | |
785 "// \n" | |
786 "// SRGB Helper Functions taken from D3DX_DXGIFormatConvert.inl \n" | |
787 "float D3DX_FLOAT_to_SRGB(float val) { \n" | |
788 " if (val < 0.0031308f) \n" | |
789 " val *= 12.92f; \n" | |
790 " else { \n" | |
791 " val = 1.055f * pow(val, 1.0f / 2.4f) - 0.055f; \n" | |
792 " } \n" | |
793 " return val; \n" | |
794 "} \n" | |
795 "// \n" | |
796 "vec3 D3DX_FLOAT3_to_SRGB(vec3 val) { \n" | |
797 " vec3 outVal; \n" | |
798 " outVal.x = D3DX_FLOAT_to_SRGB(val.x); \n" | |
799 " outVal.y = D3DX_FLOAT_to_SRGB(val.y); \n" | |
800 " outVal.z = D3DX_FLOAT_to_SRGB(val.z); \n" | |
801 " return outVal; \n" | |
802 "} \n" | |
803 "// \n" | |
804 "///////////////////////////////////////////////////////////////////////\n" | |
805 "#endif // IN_GAMMA_CORRECT_MODE \n" | |
806 " \n" | |
807 "// how .rgba channels from the edge texture maps to pixel edges: \n" | |
808 "// \n" | |
809 "// A - 0x08 \n" | |
810 "// |¯¯¯¯¯¯¯¯¯| \n" | |
811 "// | | \n" | |
812 "// 0x04 - B | pixel | R - 0x01 \n" | |
813 "// | | \n" | |
814 "// |_________| \n" | |
815 "// G - 0x02 \n" | |
816 "// \n" | |
817 "// (A - there's an edge between us and a pixel above us) \n" | |
818 "// (R - there's an edge between us and a pixel to the right) \n" | |
819 "// (G - there's an edge between us and a pixel at the bottom) \n" | |
820 "// (B - there's an edge between us and a pixel to the left) \n" | |
821 " \n" | |
822 "// Expecting values of 1 and 0 only! \n" | |
823 "uint PackEdge(uvec4 edges) { \n" | |
824 " return (edges.x << 0u) | (edges.y << 1u) | (edges.z << 2u) | \n" | |
825 " (edges.w << 3u); \n" | |
826 "} \n" | |
827 " \n" | |
828 "uvec4 UnpackEdge(uint value) { \n" | |
829 " uvec4 ret; \n" | |
830 " ret.x = (value & 0x01u) != 0u ? 1u : 0u; \n" | |
831 " ret.y = (value & 0x02u) != 0u ? 1u : 0u; \n" | |
832 " ret.z = (value & 0x04u) != 0u ? 1u : 0u; \n" | |
833 " ret.w = (value & 0x08u) != 0u ? 1u : 0u; \n" | |
834 " return ret; \n" | |
835 "} \n" | |
836 " \n" | |
837 "uint PackZ(const uvec2 screenPos, const bool invertedZShape) { \n" | |
838 " uint retVal = screenPos.x | (screenPos.y << 15u); \n" | |
839 " if (invertedZShape) \n" | |
840 " retVal |= (1u << 30u); \n" | |
841 " return retVal; \n" | |
842 "} \n" | |
843 " \n" | |
844 "void UnpackZ(uint packedZ, out uvec2 screenPos, \n" | |
845 " out bool invertedZShape) \n" | |
846 "{ \n" | |
847 " screenPos.x = packedZ & 0x7FFFu; \n" | |
848 " screenPos.y = (packedZ >> 15u) & 0x7FFFu; \n" | |
849 " invertedZShape = (packedZ >> 30u) == 1u; \n" | |
850 "} \n" | |
851 " \n" | |
852 "uint PackZ(const uvec2 screenPos, \n" | |
853 " const bool invertedZShape, \n" | |
854 " const bool horizontal) { \n" | |
855 " uint retVal = screenPos.x | (screenPos.y << 15u); \n" | |
856 " if (invertedZShape) \n" | |
857 " retVal |= (1u << 30u); \n" | |
858 " if (horizontal) \n" | |
859 " retVal |= (1u << 31u); \n" | |
860 " return retVal; \n" | |
861 "} \n" | |
862 " \n" | |
863 "void UnpackZ(uint packedZ, \n" | |
864 " out uvec2 screenPos, \n" | |
865 " out bool invertedZShape, \n" | |
866 " out bool horizontal) { \n" | |
867 " screenPos.x = packedZ & 0x7FFFu; \n" | |
868 " screenPos.y = (packedZ >> 15u) & 0x7FFFu; \n" | |
869 " invertedZShape = (packedZ & (1u << 30u)) != 0u; \n" | |
870 " horizontal = (packedZ & (1u << 31u)) != 0u; \n" | |
871 "} \n" | |
872 " \n" | |
873 "vec4 PackBlurAAInfo(ivec2 pixelPos, uint shapeType) { \n" | |
874 " uint packedEdges = uint( \n" | |
875 " texelFetch(g_src0TextureFlt, pixelPos, 0).r * 255.5); \n" | |
876 " \n" | |
877 " float retval = float(packedEdges + (shapeType << 4u)); \n" | |
878 " \n" | |
879 " return vec4(retval / 255.0); \n" | |
880 "} \n" | |
881 " \n" | |
882 "void UnpackBlurAAInfo(float packedValue, out uint edges, \n" | |
883 " out uint shapeType) { \n" | |
884 " uint packedValueInt = uint(packedValue * 255.5); \n" | |
885 " edges = packedValueInt & 0xFu; \n" | |
886 " shapeType = packedValueInt >> 4u; \n" | |
887 "} \n" | |
888 " \n" | |
889 "float EdgeDetectColorCalcDiff(vec3 colorA, vec3 colorB) { \n" | |
890 "#ifdef IN_BGR_MODE \n" | |
891 " vec3 LumWeights = c_lumWeights.bgr; \n" | |
892 "#else \n" | |
893 " vec3 LumWeights = c_lumWeights.rgb; \n" | |
894 "#endif \n" | |
895 " \n" | |
896 " return dot(abs(colorA.rgb - colorB.rgb), LumWeights); \n" | |
897 "} \n" | |
898 " \n" | |
899 "bool EdgeDetectColor(vec3 colorA, vec3 colorB) { \n" | |
900 "#ifdef EDGE_DETECT_THRESHOLD \n" | |
901 " return EdgeDetectColorCalcDiff(colorA, colorB) > c_ColorThreshold; \n" | |
902 "#else \n" | |
903 " return EdgeDetectColorCalcDiff(colorA, colorB) > g_ColorThreshold; \n" | |
904 "#endif \n" | |
905 "} \n" | |
906 " \n" | |
907 "void FindLineLength(out int lineLengthLeft, \n" | |
908 " out int lineLengthRight, \n" | |
909 " ivec2 screenPos, \n" | |
910 " const bool horizontal, \n" | |
911 " const bool invertedZShape, \n" | |
912 " const ivec2 stepRight) { \n" | |
913 " // TODO: there must be a cleaner and faster way to get to these - \n" | |
914 " // a precalculated array indexing maybe? \n" | |
915 " uint maskLeft, bitsContinueLeft, maskRight, bitsContinueRight; \n" | |
916 " { \n" | |
917 " // Horizontal (vertical is the same, just rotated 90º \n" | |
918 " // counter-clockwise) \n" | |
919 " // Inverted Z case: // Normal Z case: \n" | |
920 " // __ // __ \n" | |
921 " // X| // X| \n" | |
922 " // -- // -- \n" | |
923 " // \n" | |
924 " uint maskTraceLeft, maskTraceRight; \n" | |
925 " uint maskStopLeft, maskStopRight; \n" | |
926 " if (horizontal) { \n" | |
927 " if (invertedZShape) { \n" | |
928 " maskTraceLeft = 0x02u; // tracing bottom edge \n" | |
929 " maskTraceRight = 0x08u; // tracing top edge \n" | |
930 " } else { \n" | |
931 " maskTraceLeft = 0x08u; // tracing top edge \n" | |
932 " maskTraceRight = 0x02u; // tracing bottom edge \n" | |
933 " } \n" | |
934 " maskStopLeft = 0x01u; // stop on right edge \n" | |
935 " maskStopRight = 0x04u; // stop on left edge \n" | |
936 " } else { \n" | |
937 " if (invertedZShape) { \n" | |
938 " maskTraceLeft = 0x01u; // tracing right edge \n" | |
939 " maskTraceRight = 0x04u; // tracing left edge \n" | |
940 " } else { \n" | |
941 " maskTraceLeft = 0x04u; // tracing left edge \n" | |
942 " maskTraceRight = 0x01u; // tracing right edge \n" | |
943 " } \n" | |
944 " maskStopLeft = 0x08u; // stop on top edge \n" | |
945 " maskStopRight = 0x02u; // stop on bottom edge \n" | |
946 " } \n" | |
947 " \n" | |
948 " maskLeft = maskTraceLeft | maskStopLeft; \n" | |
949 " bitsContinueLeft = maskTraceLeft; \n" | |
950 " maskRight = maskTraceRight | maskStopRight; \n" | |
951 " bitsContinueRight = maskTraceRight; \n" | |
952 " } \n" | |
953 "///////////////////////////////////////////////////////////////////////\n" | |
954 " \n" | |
955 "#ifdef SETTINGS_ALLOW_SHORT_Zs \n" | |
956 " int i = 1; \n" | |
957 "#else \n" | |
958 " int i = 2; // starting from 2 because we already know it's at least 2\n" | |
959 "#endif \n" | |
960 " for (; i < c_maxLineLength; i++) { \n" | |
961 " uint edgeLeft = uint( \n" | |
962 " texelFetch(g_src0TextureFlt, \n" | |
963 " ivec2(screenPos.xy - stepRight * i), 0).r * 255.5); \n" | |
964 " uint edgeRight = uint( \n" | |
965 " texelFetch(g_src0TextureFlt, \n" | |
966 " ivec2(screenPos.xy + stepRight * (i + 1)), \n" | |
967 " 0).r * 255.5); \n" | |
968 " \n" | |
969 " // stop on encountering 'stopping' edge (as defined by masks) \n" | |
970 " int stopLeft = (edgeLeft & maskLeft) != bitsContinueLeft ? 1 : 0; \n" | |
971 " int stopRight = \n" | |
972 " (edgeRight & maskRight) != bitsContinueRight ? 1 : 0; \n" | |
973 " \n" | |
974 " if (bool(stopLeft) || bool(stopRight)) { \n" | |
975 " lineLengthLeft = 1 + i - stopLeft; \n" | |
976 " lineLengthRight = 1 + i - stopRight; \n" | |
977 " return; \n" | |
978 " } \n" | |
979 " } \n" | |
980 " lineLengthLeft = lineLengthRight = i; \n" | |
981 " return; \n" | |
982 "} \n" | |
983 " \n" | |
984 "void ProcessDetectedZ(ivec2 screenPos, bool horizontal, \n" | |
985 " bool invertedZShape) { \n" | |
986 " int lineLengthLeft, lineLengthRight; \n" | |
987 " \n" | |
988 " ivec2 stepRight = (horizontal) ? (ivec2(1, 0)) : (ivec2(0, -1)); \n" | |
989 " vec2 blendDir = (horizontal) ? (vec2(0, -1)) : (vec2(-1, 0)); \n" | |
990 " \n" | |
991 " FindLineLength(lineLengthLeft, lineLengthRight, screenPos, \n" | |
992 " horizontal, invertedZShape, stepRight); \n" | |
993 " \n" | |
994 " vec2 pixelSize = g_OneOverScreenSize; \n" | |
995 " \n" | |
996 " float leftOdd = 0.15 * float(lineLengthLeft % 2); \n" | |
997 " float rightOdd = 0.15 * float(lineLengthRight % 2); \n" | |
998 " \n" | |
999 " int loopFrom = -int((lineLengthLeft + 1) / 2) + 1; \n" | |
1000 " int loopTo = int((lineLengthRight + 1) / 2); \n" | |
1001 " \n" | |
1002 " float totalLength = float(loopTo - loopFrom) + 1.0 - leftOdd - \n" | |
1003 " rightOdd; \n" | |
1004 " \n" | |
1005 " for (int i = loopFrom; i <= loopTo; i++) { \n" | |
1006 " highp ivec2 pixelPos = screenPos + stepRight * i; \n" | |
1007 " vec2 pixelPosFlt = vec2(float(pixelPos.x) + 0.5, \n" | |
1008 " float(pixelPos.y) + 0.5); \n" | |
1009 " \n" | |
1010 "#ifdef DEBUG_OUTPUT_AAINFO \n" | |
1011 " imageStore(g_resultTextureSlot2, pixelPos, \n" | |
1012 " PackBlurAAInfo(pixelPos, 1u)); \n" | |
1013 "#endif \n" | |
1014 " \n" | |
1015 " float m = (float(i) + 0.5 - leftOdd - float(loopFrom)) / \n" | |
1016 " totalLength; \n" | |
1017 " m = saturate(m); \n" | |
1018 " float k = m - ((i > 0) ? 1.0 : 0.0); \n" | |
1019 " k = (invertedZShape) ? (-k) : (k); \n" | |
1020 " \n" | |
1021 " vec4 color = textureLod(g_screenTexture, \n" | |
1022 " (pixelPosFlt + blendDir * k) * pixelSize, \n" | |
1023 " 0.0); \n" | |
1024 " \n" | |
1025 "#ifdef IN_GAMMA_CORRECT_MODE \n" | |
1026 " color.rgb = D3DX_FLOAT3_to_SRGB(color.rgb); \n" | |
1027 "#endif \n" | |
1028 " imageStore(g_resultTextureFlt4Slot1, pixelPos, color); \n" | |
1029 " } \n" | |
1030 "} \n" | |
1031 " \n" | |
1032 "vec4 CalcDbgDisplayColor(const vec4 blurMap) { \n" | |
1033 " vec3 pixelC = vec3(0.0, 0.0, 0.0); \n" | |
1034 " vec3 pixelL = vec3(0.0, 0.0, 1.0); \n" | |
1035 " vec3 pixelT = vec3(1.0, 0.0, 0.0); \n" | |
1036 " vec3 pixelR = vec3(0.0, 1.0, 0.0); \n" | |
1037 " vec3 pixelB = vec3(0.8, 0.8, 0.0); \n" | |
1038 " \n" | |
1039 " const float centerWeight = 1.0; \n" | |
1040 " float fromBelowWeight = (1.0 / (1.0 - blurMap.x)) - 1.0; \n" | |
1041 " float fromAboveWeight = (1.0 / (1.0 - blurMap.y)) - 1.0; \n" | |
1042 " float fromRightWeight = (1.0 / (1.0 - blurMap.z)) - 1.0; \n" | |
1043 " float fromLeftWeight = (1.0 / (1.0 - blurMap.w)) - 1.0; \n" | |
1044 " \n" | |
1045 " float weightSum = centerWeight + dot(vec4(fromBelowWeight, \n" | |
1046 " fromAboveWeight, \n" | |
1047 " fromRightWeight, \n" | |
1048 " fromLeftWeight), \n" | |
1049 " vec4(1, 1, 1, 1)); \n" | |
1050 " \n" | |
1051 " vec4 pixel; \n" | |
1052 " \n" | |
1053 " pixel.rgb = pixelC.rgb + fromAboveWeight * pixelT + \n" | |
1054 " fromBelowWeight * pixelB + \n" | |
1055 " fromLeftWeight * pixelL + \n" | |
1056 " fromRightWeight * pixelR; \n" | |
1057 " pixel.rgb /= weightSum; \n" | |
1058 " \n" | |
1059 " pixel.a = dot(pixel.rgb, vec3(1, 1, 1)) * 100.0; \n" | |
1060 " \n" | |
1061 " return saturate(pixel); \n" | |
1062 "} \n" | |
1063 " \n" | |
1064 "#ifdef DETECT_EDGES1 \n" | |
1065 "layout(location = 0) out UVEC4 outEdges; \n" | |
1066 "void DetectEdges1() { \n" | |
1067 " uvec4 outputEdges; \n" | |
1068 " ivec2 screenPosI = ivec2(gl_FragCoord.xy) * ivec2(2, 2); \n" | |
1069 " \n" | |
1070 " // .rgb contains colour, .a contains flag whether to output it to \n" | |
1071 " // working colour texture \n" | |
1072 " vec4 pixel00 = texelFetch(g_screenTexture, screenPosI.xy, 0); \n" | |
1073 " vec4 pixel10 = \n" | |
1074 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(1, 0));\n" | |
1075 " vec4 pixel20 = \n" | |
1076 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(2, 0));\n" | |
1077 " vec4 pixel01 = \n" | |
1078 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(0, 1));\n" | |
1079 " vec4 pixel11 = \n" | |
1080 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(1, 1));\n" | |
1081 " vec4 pixel21 = \n" | |
1082 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(2, 1));\n" | |
1083 " vec4 pixel02 = \n" | |
1084 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(0, 2));\n" | |
1085 " vec4 pixel12 = \n" | |
1086 " texelFetchOffset(g_screenTexture, screenPosI.xy, 0, ivec2(1, 2));\n" | |
1087 " \n" | |
1088 " float storeFlagPixel00 = 0.0; \n" | |
1089 " float storeFlagPixel10 = 0.0; \n" | |
1090 " float storeFlagPixel20 = 0.0; \n" | |
1091 " float storeFlagPixel01 = 0.0; \n" | |
1092 " float storeFlagPixel11 = 0.0; \n" | |
1093 " float storeFlagPixel21 = 0.0; \n" | |
1094 " float storeFlagPixel02 = 0.0; \n" | |
1095 " float storeFlagPixel12 = 0.0; \n" | |
1096 " \n" | |
1097 " vec2 et; \n" | |
1098 " \n" | |
1099 "#ifdef EDGE_DETECT_THRESHOLD \n" | |
1100 " float threshold = c_ColorThreshold; \n" | |
1101 "#else \n" | |
1102 " float threshold = g_ColorThreshold; \n" | |
1103 "#endif \n" | |
1104 " \n" | |
1105 " { \n" | |
1106 " et.x = EdgeDetectColorCalcDiff(pixel00.rgb, pixel10.rgb); \n" | |
1107 " et.y = EdgeDetectColorCalcDiff(pixel00.rgb, pixel01.rgb); \n" | |
1108 " et = saturate(et - threshold); \n" | |
1109 " ivec2 eti = ivec2(et * 15.0 + 0.99); \n" | |
1110 " outputEdges.x = uint(eti.x | (eti.y << 4)); \n" | |
1111 " \n" | |
1112 " storeFlagPixel00 += et.x; \n" | |
1113 " storeFlagPixel00 += et.y; \n" | |
1114 " storeFlagPixel10 += et.x; \n" | |
1115 " storeFlagPixel01 += et.y; \n" | |
1116 " } \n" | |
1117 " \n" | |
1118 " { \n" | |
1119 " et.x = EdgeDetectColorCalcDiff(pixel10.rgb, pixel20.rgb); \n" | |
1120 " et.y = EdgeDetectColorCalcDiff(pixel10.rgb, pixel11.rgb); \n" | |
1121 " et = saturate(et - threshold); \n" | |
1122 " ivec2 eti = ivec2(et * 15.0 + 0.99); \n" | |
1123 " outputEdges.y = uint(eti.x | (eti.y << 4)); \n" | |
1124 " \n" | |
1125 " storeFlagPixel10 += et.x; \n" | |
1126 " storeFlagPixel10 += et.y; \n" | |
1127 " storeFlagPixel20 += et.x; \n" | |
1128 " storeFlagPixel11 += et.y; \n" | |
1129 " } \n" | |
1130 " \n" | |
1131 " { \n" | |
1132 " et.x = EdgeDetectColorCalcDiff(pixel01.rgb, pixel11.rgb); \n" | |
1133 " et.y = EdgeDetectColorCalcDiff(pixel01.rgb, pixel02.rgb); \n" | |
1134 " et = saturate(et - threshold); \n" | |
1135 " ivec2 eti = ivec2(et * 15.0 + 0.99); \n" | |
1136 " outputEdges.z = uint(eti.x | (eti.y << 4)); \n" | |
1137 " \n" | |
1138 " storeFlagPixel01 += et.x; \n" | |
1139 " storeFlagPixel01 += et.y; \n" | |
1140 " storeFlagPixel11 += et.x; \n" | |
1141 " storeFlagPixel02 += et.y; \n" | |
1142 " } \n" | |
1143 " \n" | |
1144 " { \n" | |
1145 " et.x = EdgeDetectColorCalcDiff(pixel11.rgb, pixel21.rgb); \n" | |
1146 " et.y = EdgeDetectColorCalcDiff(pixel11.rgb, pixel12.rgb); \n" | |
1147 " et = saturate(et - threshold); \n" | |
1148 " ivec2 eti = ivec2(et * 15.0 + 0.99); \n" | |
1149 " outputEdges.w = uint(eti.x | (eti.y << 4)); \n" | |
1150 " \n" | |
1151 " storeFlagPixel11 += et.x; \n" | |
1152 " storeFlagPixel11 += et.y; \n" | |
1153 " storeFlagPixel21 += et.x; \n" | |
1154 " storeFlagPixel12 += et.y; \n" | |
1155 " } \n" | |
1156 " \n" | |
1157 " gl_FragDepth = any(bvec4(outputEdges)) ? 1.0 : 0.0; \n" | |
1158 " \n" | |
1159 " if (gl_FragDepth != 0.0) { \n" | |
1160 " if (storeFlagPixel00 != 0.0) \n" | |
1161 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(0, 0),\n" | |
1162 " pixel00); \n" | |
1163 " if (storeFlagPixel10 != 0.0) \n" | |
1164 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(1, 0),\n" | |
1165 " pixel10); \n" | |
1166 " if (storeFlagPixel20 != 0.0) \n" | |
1167 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(2, 0),\n" | |
1168 " pixel20); \n" | |
1169 " if (storeFlagPixel01 != 0.0) \n" | |
1170 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(0, 1),\n" | |
1171 " pixel01); \n" | |
1172 " if (storeFlagPixel02 != 0.0) \n" | |
1173 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(0, 2),\n" | |
1174 " pixel02); \n" | |
1175 " if (storeFlagPixel11 != 0.0) \n" | |
1176 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(1, 1),\n" | |
1177 " pixel11); \n" | |
1178 " if (storeFlagPixel21 != 0.0) \n" | |
1179 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(2, 1),\n" | |
1180 " pixel21); \n" | |
1181 " if (storeFlagPixel12 != 0.0) \n" | |
1182 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy + ivec2(1, 2),\n" | |
1183 " pixel12); \n" | |
1184 " } \n" | |
1185 " outEdges = STORE_UVEC4(outputEdges); \n" | |
1186 "} \n" | |
1187 "#endif // DETECT_EDGES1 \n" | |
1188 " \n" | |
1189 "vec2 UnpackThresholds(uint val) { \n" | |
1190 " return vec2(val & 0x0Fu, val >> 4u) / 15.0f; \n" | |
1191 "} \n" | |
1192 " \n" | |
1193 "uint PruneNonDominantEdges(vec4 edges[3]) { \n" | |
1194 " vec4 maxE4 = vec4(0.0, 0.0, 0.0, 0.0); \n" | |
1195 " \n" | |
1196 " float avg = 0.0; \n" | |
1197 " \n" | |
1198 " for (int i = 0; i < 3; i++) { \n" | |
1199 " maxE4 = max(maxE4, edges[i]); \n" | |
1200 " \n" | |
1201 " avg = dot(edges[i], vec4(1, 1, 1, 1) / (3.0 * 4.0)); \n" | |
1202 " } \n" | |
1203 " \n" | |
1204 " vec2 maxE2 = max(maxE4.xy, maxE4.zw); \n" | |
1205 " float maxE = max(maxE2.x, maxE2.y); \n" | |
1206 " \n" | |
1207 " float threshold = avg * 0.65 + maxE * 0.35; \n" | |
1208 " \n" | |
1209 " // threshold = 0.0001; // this disables non-dominant edge pruning! \n" | |
1210 " \n" | |
1211 " uint cx = edges[0].x >= threshold ? 1u : 0u; \n" | |
1212 " uint cy = edges[0].y >= threshold ? 1u : 0u; \n" | |
1213 " return PackEdge(uvec4(cx, cy, 0, 0)); \n" | |
1214 "} \n" | |
1215 " \n" | |
1216 "void CollectEdges(int offX, \n" | |
1217 " int offY, \n" | |
1218 " out vec4 edges[3], \n" | |
1219 " const uint packedVals[6 * 6]) { \n" | |
1220 " vec2 pixelP0P0 = UnpackThresholds(packedVals[(offX)*6+(offY)]); \n" | |
1221 " vec2 pixelP1P0 = UnpackThresholds(packedVals[(offX+1)*6+(offY)]); \n" | |
1222 " vec2 pixelP0P1 = UnpackThresholds(packedVals[(offX)*6+(offY+1)]); \n" | |
1223 " vec2 pixelM1P0 = UnpackThresholds(packedVals[(offX-1)*6 +(offY)]); \n" | |
1224 " vec2 pixelP0M1 = UnpackThresholds(packedVals[(offX)*6+(offY-1)]); \n" | |
1225 " vec2 pixelP1M1 = UnpackThresholds(packedVals[(offX+1)*6 +(offY-1)]); \n" | |
1226 " vec2 pixelM1P1 = UnpackThresholds(packedVals[(offX-1)*6+(offY+1)]); \n" | |
1227 " \n" | |
1228 " edges[0].x = pixelP0P0.x; \n" | |
1229 " edges[0].y = pixelP0P0.y; \n" | |
1230 " edges[0].z = pixelP1P0.x; \n" | |
1231 " edges[0].w = pixelP1P0.y; \n" | |
1232 " edges[1].x = pixelP0P1.x; \n" | |
1233 " edges[1].y = pixelP0P1.y; \n" | |
1234 " edges[1].z = pixelM1P0.x; \n" | |
1235 " edges[1].w = pixelM1P0.y; \n" | |
1236 " edges[2].x = pixelP0M1.x; \n" | |
1237 " edges[2].y = pixelP0M1.y; \n" | |
1238 " edges[2].z = pixelP1M1.y; \n" | |
1239 " edges[2].w = pixelM1P1.x; \n" | |
1240 "} \n" | |
1241 " \n" | |
1242 "#ifdef DETECT_EDGES2 \n" | |
1243 "layout(early_fragment_tests) in; \n" | |
1244 "void DetectEdges2() { \n" | |
1245 " ivec2 screenPosI = ivec2(gl_FragCoord.xy); \n" | |
1246 " \n" | |
1247 " // source : edge differences from previous pass \n" | |
1248 " uint packedVals[6 * 6]; \n" | |
1249 " \n" | |
1250 " // center pixel (our output) \n" | |
1251 " UVEC4 packedQ4 = texelFetch(g_src0Texture4Uint, screenPosI.xy, 0); \n" | |
1252 " packedVals[(2) * 6 + (2)] = LOAD_UINT(packedQ4.x); \n" | |
1253 " packedVals[(3) * 6 + (2)] = LOAD_UINT(packedQ4.y); \n" | |
1254 " packedVals[(2) * 6 + (3)] = LOAD_UINT(packedQ4.z); \n" | |
1255 " packedVals[(3) * 6 + (3)] = LOAD_UINT(packedQ4.w); \n" | |
1256 " \n" | |
1257 " vec4 edges[3]; \n" | |
1258 " if (bool(packedVals[(2) * 6 + (2)]) || \n" | |
1259 " bool(packedVals[(3) * 6 + (2)])) { \n" | |
1260 " UVEC4 packedQ1 = texelFetchOffset(g_src0Texture4Uint, \n" | |
1261 " screenPosI.xy, 0, ivec2(0, -1)); \n" | |
1262 " packedVals[(2) * 6 + (0)] = LOAD_UINT(packedQ1.x); \n" | |
1263 " packedVals[(3) * 6 + (0)] = LOAD_UINT(packedQ1.y); \n" | |
1264 " packedVals[(2) * 6 + (1)] = LOAD_UINT(packedQ1.z); \n" | |
1265 " packedVals[(3) * 6 + (1)] = LOAD_UINT(packedQ1.w); \n" | |
1266 " } \n" | |
1267 " \n" | |
1268 " if (bool(packedVals[(2) * 6 + (2)]) || \n" | |
1269 " bool(packedVals[(2) * 6 + (3)])) { \n" | |
1270 " UVEC4 packedQ3 = texelFetchOffset(g_src0Texture4Uint, \n" | |
1271 " screenPosI.xy, 0, ivec2(-1, 0)); \n" | |
1272 " packedVals[(0) * 6 + (2)] = LOAD_UINT(packedQ3.x); \n" | |
1273 " packedVals[(1) * 6 + (2)] = LOAD_UINT(packedQ3.y); \n" | |
1274 " packedVals[(0) * 6 + (3)] = LOAD_UINT(packedQ3.z); \n" | |
1275 " packedVals[(1) * 6 + (3)] = LOAD_UINT(packedQ3.w); \n" | |
1276 " } \n" | |
1277 " \n" | |
1278 " if (bool(packedVals[(2) * 6 + (2)])) { \n" | |
1279 " CollectEdges(2, 2, edges, packedVals); \n" | |
1280 " uint pe = PruneNonDominantEdges(edges); \n" | |
1281 " if (pe != 0u) { \n" | |
1282 " imageStore(g_resultTexture, 2 * screenPosI.xy + ivec2(0, 0), \n" | |
1283 " vec4(float(0x80u | pe) / 255.0, 0, 0, 0)); \n" | |
1284 " } \n" | |
1285 " } \n" | |
1286 " \n" | |
1287 " if (bool(packedVals[(3) * 6 + (2)]) || \n" | |
1288 " bool(packedVals[(3) * 6 + (3)])) { \n" | |
1289 " UVEC4 packedQ5 = texelFetchOffset(g_src0Texture4Uint, \n" | |
1290 " screenPosI.xy, 0, ivec2(1, 0)); \n" | |
1291 " packedVals[(4) * 6 + (2)] = LOAD_UINT(packedQ5.x); \n" | |
1292 " packedVals[(5) * 6 + (2)] = LOAD_UINT(packedQ5.y); \n" | |
1293 " packedVals[(4) * 6 + (3)] = LOAD_UINT(packedQ5.z); \n" | |
1294 " packedVals[(5) * 6 + (3)] = LOAD_UINT(packedQ5.w); \n" | |
1295 " } \n" | |
1296 " \n" | |
1297 " if (bool(packedVals[(3) * 6 + (2)])) { \n" | |
1298 " UVEC4 packedQ2 = texelFetchOffset(g_src0Texture4Uint, \n" | |
1299 " screenPosI.xy, 0, ivec2(1, -1)); \n" | |
1300 " packedVals[(4) * 6 + (0)] = LOAD_UINT(packedQ2.x); \n" | |
1301 " packedVals[(5) * 6 + (0)] = LOAD_UINT(packedQ2.y); \n" | |
1302 " packedVals[(4) * 6 + (1)] = LOAD_UINT(packedQ2.z); \n" | |
1303 " packedVals[(5) * 6 + (1)] = LOAD_UINT(packedQ2.w); \n" | |
1304 " \n" | |
1305 " CollectEdges(3, 2, edges, packedVals); \n" | |
1306 " uint pe = PruneNonDominantEdges(edges); \n" | |
1307 " if (pe != 0u) { \n" | |
1308 " imageStore(g_resultTexture, 2 * screenPosI.xy + ivec2(1, 0), \n" | |
1309 " vec4(float(0x80u | pe) / 255.0, 0, 0, 0)); \n" | |
1310 " } \n" | |
1311 " } \n" | |
1312 " \n" | |
1313 " if (bool(packedVals[(2) * 6 + (3)]) || \n" | |
1314 " bool(packedVals[(3) * 6 + (3)])) { \n" | |
1315 " UVEC4 packedQ7 = texelFetchOffset(g_src0Texture4Uint, \n" | |
1316 " screenPosI.xy, 0, ivec2(0, 1)); \n" | |
1317 " packedVals[(2) * 6 + (4)] = LOAD_UINT(packedQ7.x); \n" | |
1318 " packedVals[(3) * 6 + (4)] = LOAD_UINT(packedQ7.y); \n" | |
1319 " packedVals[(2) * 6 + (5)] = LOAD_UINT(packedQ7.z); \n" | |
1320 " packedVals[(3) * 6 + (5)] = LOAD_UINT(packedQ7.w); \n" | |
1321 " } \n" | |
1322 " \n" | |
1323 " if (bool(packedVals[(2) * 6 + (3)])) { \n" | |
1324 " UVEC4 packedQ6 = texelFetchOffset(g_src0Texture4Uint, \n" | |
1325 " screenPosI.xy, 0, ivec2(-1, -1));\n" | |
1326 " packedVals[(0) * 6 + (4)] = LOAD_UINT(packedQ6.x); \n" | |
1327 " packedVals[(1) * 6 + (4)] = LOAD_UINT(packedQ6.y); \n" | |
1328 " packedVals[(0) * 6 + (5)] = LOAD_UINT(packedQ6.z); \n" | |
1329 " packedVals[(1) * 6 + (5)] = LOAD_UINT(packedQ6.w); \n" | |
1330 " \n" | |
1331 " CollectEdges(2, 3, edges, packedVals); \n" | |
1332 " uint pe = PruneNonDominantEdges(edges); \n" | |
1333 " if (pe != 0u) { \n" | |
1334 " imageStore(g_resultTexture, 2 * screenPosI.xy + ivec2(0, 1), \n" | |
1335 " vec4(float(0x80u | pe) / 255.0, 0, 0, 0)); \n" | |
1336 " } \n" | |
1337 " } \n" | |
1338 " \n" | |
1339 " if (bool(packedVals[(3) * 6 + (3)])) { \n" | |
1340 " CollectEdges(3, 3, edges, packedVals); \n" | |
1341 " uint pe = PruneNonDominantEdges(edges); \n" | |
1342 " if (pe != 0u) { \n" | |
1343 " imageStore(g_resultTexture, 2 * screenPosI.xy + ivec2(1, 1), \n" | |
1344 " vec4(float(0x80u | pe) / 255.0, 0, 0, 0)); \n" | |
1345 " } \n" | |
1346 " } \n" | |
1347 "} \n" | |
1348 "#endif // DETECT_EDGES2 \n" | |
1349 " \n" | |
1350 "#ifdef COMBINE_EDGES \n" | |
1351 "void CombineEdges() { \n" | |
1352 " ivec3 screenPosIBase = ivec3(ivec2(gl_FragCoord.xy) * 2, 0); \n" | |
1353 " vec3 screenPosBase = vec3(screenPosIBase); \n" | |
1354 " uint packedEdgesArray[3 * 3]; \n" | |
1355 " \n" | |
1356 " // use only if it has the 'prev frame' flag:[sample * 255.0 - 127.5] \n" | |
1357 " //-> if it has the last bit flag (128), it's going to stay above 0 \n" | |
1358 " uvec4 sampA = uvec4( \n" | |
1359 " textureGatherOffset(g_src0TextureFlt, \n" | |
1360 " screenPosBase.xy * g_OneOverScreenSize, \n" | |
1361 " ivec2(1, 0)) * 255.0 - 127.5); \n" | |
1362 " uvec4 sampB = uvec4( \n" | |
1363 " textureGatherOffset(g_src0TextureFlt, \n" | |
1364 " screenPosBase.xy * g_OneOverScreenSize, \n" | |
1365 " ivec2(0, 1)) * 255.0 - 127.5); \n" | |
1366 " uint sampC = uint( \n" | |
1367 " texelFetchOffset(g_src0TextureFlt, screenPosIBase.xy, 0, \n" | |
1368 " ivec2(1, 1)).r * 255.0 - 127.5); \n" | |
1369 " \n" | |
1370 " packedEdgesArray[(0) * 3 + (0)] = 0u; \n" | |
1371 " packedEdgesArray[(1) * 3 + (0)] = sampA.w; \n" | |
1372 " packedEdgesArray[(2) * 3 + (0)] = sampA.z; \n" | |
1373 " packedEdgesArray[(1) * 3 + (1)] = sampA.x; \n" | |
1374 " packedEdgesArray[(2) * 3 + (1)] = sampA.y; \n" | |
1375 " packedEdgesArray[(0) * 3 + (1)] = sampB.w; \n" | |
1376 " packedEdgesArray[(0) * 3 + (2)] = sampB.x; \n" | |
1377 " packedEdgesArray[(1) * 3 + (2)] = sampB.y; \n" | |
1378 " packedEdgesArray[(2) * 3 + (2)] = sampC; \n" | |
1379 " \n" | |
1380 " uvec4 pixelsC = uvec4(packedEdgesArray[(1 + 0) * 3 + (1 + 0)], \n" | |
1381 " packedEdgesArray[(1 + 1) * 3 + (1 + 0)], \n" | |
1382 " packedEdgesArray[(1 + 0) * 3 + (1 + 1)], \n" | |
1383 " packedEdgesArray[(1 + 1) * 3 + (1 + 1)]); \n" | |
1384 " uvec4 pixelsL = uvec4(packedEdgesArray[(0 + 0) * 3 + (1 + 0)], \n" | |
1385 " packedEdgesArray[(0 + 1) * 3 + (1 + 0)], \n" | |
1386 " packedEdgesArray[(0 + 0) * 3 + (1 + 1)], \n" | |
1387 " packedEdgesArray[(0 + 1) * 3 + (1 + 1)]); \n" | |
1388 " uvec4 pixelsU = uvec4(packedEdgesArray[(1 + 0) * 3 + (0 + 0)], \n" | |
1389 " packedEdgesArray[(1 + 1) * 3 + (0 + 0)], \n" | |
1390 " packedEdgesArray[(1 + 0) * 3 + (0 + 1)], \n" | |
1391 " packedEdgesArray[(1 + 1) * 3 + (0 + 1)]); \n" | |
1392 " \n" | |
1393 " uvec4 outEdge4 = \n" | |
1394 " pixelsC | ((pixelsL & 0x01u) << 2u) | ((pixelsU & 0x02u) << 2u); \n" | |
1395 " vec4 outEdge4Flt = vec4(outEdge4) / 255.0; \n" | |
1396 " \n" | |
1397 " imageStore(g_resultTextureSlot2, screenPosIBase.xy + ivec2(0, 0), \n" | |
1398 " outEdge4Flt.xxxx); \n" | |
1399 " imageStore(g_resultTextureSlot2, screenPosIBase.xy + ivec2(1, 0), \n" | |
1400 " outEdge4Flt.yyyy); \n" | |
1401 " imageStore(g_resultTextureSlot2, screenPosIBase.xy + ivec2(0, 1), \n" | |
1402 " outEdge4Flt.zzzz); \n" | |
1403 " imageStore(g_resultTextureSlot2, screenPosIBase.xy + ivec2(1, 1), \n" | |
1404 " outEdge4Flt.wwww); \n" | |
1405 " \n" | |
1406 " // uvec4 numberOfEdges4 = uvec4(bitCount(outEdge4)); \n" | |
1407 " // gl_FragDepth = \n" | |
1408 " // any(greaterThan(numberOfEdges4, uvec4(1))) ? 1.0 : 0.0; \n" | |
1409 " \n" | |
1410 " gl_FragDepth = \n" | |
1411 " any(greaterThan(outEdge4, uvec4(1))) ? 1.0 : 0.0; \n" | |
1412 "} \n" | |
1413 "#endif // COMBINE_EDGES \n" | |
1414 " \n" | |
1415 "#ifdef BLUR_EDGES \n" | |
1416 "layout(early_fragment_tests) in; \n" | |
1417 "void BlurEdges() { \n" | |
1418 " int _i; \n" | |
1419 " \n" | |
1420 " ivec3 screenPosIBase = ivec3(ivec2(gl_FragCoord.xy) * 2, 0); \n" | |
1421 " vec3 screenPosBase = vec3(screenPosIBase); \n" | |
1422 " uint forFollowUpCount = 0u; \n" | |
1423 " ivec4 forFollowUpCoords[4]; \n" | |
1424 " \n" | |
1425 " uint packedEdgesArray[4 * 4]; \n" | |
1426 " \n" | |
1427 " uvec4 sampA = uvec4( \n" | |
1428 " textureGatherOffset(g_src0TextureFlt, \n" | |
1429 " screenPosBase.xy * g_OneOverScreenSize, \n" | |
1430 " ivec2(0, 0)) *255.5); \n" | |
1431 " uvec4 sampB = uvec4( \n" | |
1432 " textureGatherOffset(g_src0TextureFlt, \n" | |
1433 " screenPosBase.xy * g_OneOverScreenSize, \n" | |
1434 " ivec2(2, 0)) *255.5); \n" | |
1435 " uvec4 sampC = uvec4( \n" | |
1436 " textureGatherOffset(g_src0TextureFlt, \n" | |
1437 " screenPosBase.xy * g_OneOverScreenSize, \n" | |
1438 " ivec2(0, 2)) *255.5); \n" | |
1439 " uvec4 sampD = uvec4( \n" | |
1440 " textureGatherOffset(g_src0TextureFlt, \n" | |
1441 " screenPosBase.xy * g_OneOverScreenSize, \n" | |
1442 " ivec2(2, 2)) *255.5); \n" | |
1443 " \n" | |
1444 " packedEdgesArray[(0) * 4 + (0)] = sampA.w; \n" | |
1445 " packedEdgesArray[(1) * 4 + (0)] = sampA.z; \n" | |
1446 " packedEdgesArray[(0) * 4 + (1)] = sampA.x; \n" | |
1447 " packedEdgesArray[(1) * 4 + (1)] = sampA.y; \n" | |
1448 " packedEdgesArray[(2) * 4 + (0)] = sampB.w; \n" | |
1449 " packedEdgesArray[(3) * 4 + (0)] = sampB.z; \n" | |
1450 " packedEdgesArray[(2) * 4 + (1)] = sampB.x; \n" | |
1451 " packedEdgesArray[(3) * 4 + (1)] = sampB.y; \n" | |
1452 " packedEdgesArray[(0) * 4 + (2)] = sampC.w; \n" | |
1453 " packedEdgesArray[(1) * 4 + (2)] = sampC.z; \n" | |
1454 " packedEdgesArray[(0) * 4 + (3)] = sampC.x; \n" | |
1455 " packedEdgesArray[(1) * 4 + (3)] = sampC.y; \n" | |
1456 " packedEdgesArray[(2) * 4 + (2)] = sampD.w; \n" | |
1457 " packedEdgesArray[(3) * 4 + (2)] = sampD.z; \n" | |
1458 " packedEdgesArray[(2) * 4 + (3)] = sampD.x; \n" | |
1459 " packedEdgesArray[(3) * 4 + (3)] = sampD.y; \n" | |
1460 " \n" | |
1461 " for (_i = 0; _i < 4; _i++) { \n" | |
1462 " int _x = _i % 2; \n" | |
1463 " int _y = _i / 2; \n" | |
1464 " \n" | |
1465 " ivec3 screenPosI = screenPosIBase + ivec3(_x, _y, 0); \n" | |
1466 " \n" | |
1467 " uint packedEdgesC = packedEdgesArray[(1 + _x) * 4 + (1 + _y)]; \n" | |
1468 " \n" | |
1469 " uvec4 edges = UnpackEdge(packedEdgesC); \n" | |
1470 " vec4 edgesFlt = vec4(edges); \n" | |
1471 " \n" | |
1472 " float numberOfEdges = dot(edgesFlt, vec4(1, 1, 1, 1)); \n" | |
1473 " if (numberOfEdges < 2.0) \n" | |
1474 " continue; \n" | |
1475 " \n" | |
1476 " float fromRight = edgesFlt.r; \n" | |
1477 " float fromBelow = edgesFlt.g; \n" | |
1478 " float fromLeft = edgesFlt.b; \n" | |
1479 " float fromAbove = edgesFlt.a; \n" | |
1480 " \n" | |
1481 " vec4 xFroms = vec4(fromBelow, fromAbove, fromRight, fromLeft); \n" | |
1482 " \n" | |
1483 " float blurCoeff = 0.0; \n" | |
1484 " \n" | |
1485 " // These are additional blurs that complement the main line-based \n" | |
1486 " // blurring; Unlike line-based, these do not necessarily preserve \n" | |
1487 " // the total amount of screen colour as they will take \n" | |
1488 " // neighbouring pixel colours and apply them to the one currently \n" | |
1489 " // processed. \n" | |
1490 " \n" | |
1491 " // 1.) L-like shape. \n" | |
1492 " // For this shape, the total amount of screen colour will be \n" | |
1493 " // preserved when this is a part of a (zigzag) diagonal line as the\n" | |
1494 " // corners from the other side will do the same and take some of \n" | |
1495 " // the current pixel's colour in return. \n" | |
1496 " // However, in the case when this is an actual corner, the pixel's \n" | |
1497 " // colour will be partially overwritten by it's 2 neighbours. \n" | |
1498 " // if( numberOfEdges > 1.0 ) \n" | |
1499 " { \n" | |
1500 " // with value of 0.15, the pixel will retain approx 77% of its \n" | |
1501 " // colour and the remaining 23% will come from its 2 neighbours \n" | |
1502 " // (which are likely to be blurred too in the opposite direction)\n" | |
1503 " blurCoeff = 0.08; \n" | |
1504 " \n" | |
1505 " // Only do blending if it's L shape - if we're between two \n" | |
1506 " // parallel edges, don't do anything \n" | |
1507 " blurCoeff *= (1.0 - fromBelow * fromAbove) * \n" | |
1508 " (1.0 - fromRight * fromLeft); \n" | |
1509 " } \n" | |
1510 " \n" | |
1511 " // 2.) U-like shape (surrounded with edges from 3 sides) \n" | |
1512 " if (numberOfEdges > 2.0) { \n" | |
1513 " // with value of 0.13, the pixel will retain approx 72% of its \n" | |
1514 " // colour and the remaining 28% will be picked from its 3 \n" | |
1515 " // neighbours (which are unlikely to be blurred too but could be)\n" | |
1516 " blurCoeff = 0.11; \n" | |
1517 " } \n" | |
1518 " \n" | |
1519 " // 3.) Completely surrounded with edges from all 4 sides \n" | |
1520 " if (numberOfEdges > 3.0) { \n" | |
1521 " // with value of 0.07, the pixel will retain 78% of its colour \n" | |
1522 " // and the remaining 22% will come from its 4 neighbours (which \n" | |
1523 " // are unlikely to be blurred) \n" | |
1524 " blurCoeff = 0.05; \n" | |
1525 " } \n" | |
1526 " \n" | |
1527 " if (blurCoeff == 0.0) { \n" | |
1528 " // this avoids Z search below as well but that's ok because a Z \n" | |
1529 " // shape will also always have some blurCoeff \n" | |
1530 " continue; \n" | |
1531 " } \n" | |
1532 " \n" | |
1533 " vec4 blurMap = xFroms * blurCoeff; \n" | |
1534 " \n" | |
1535 " vec4 pixelC = texelFetch(g_screenTexture, screenPosI.xy, 0); \n" | |
1536 " \n" | |
1537 " const float centerWeight = 1.0; \n" | |
1538 " float fromBelowWeight = blurMap.x; \n" | |
1539 " float fromAboveWeight = blurMap.y; \n" | |
1540 " float fromRightWeight = blurMap.z; \n" | |
1541 " float fromLeftWeight = blurMap.w; \n" | |
1542 " \n" | |
1543 " // this would be the proper math for blending if we were handling \n" | |
1544 " // lines (Zs) and mini kernel smoothing here, but since we're doing\n" | |
1545 " // lines separately, no need to complicate, just tweak the settings\n" | |
1546 " // float fromBelowWeight = (1.0 / (1.0 - blurMap.x)) - 1.0; \n" | |
1547 " // float fromAboveWeight = (1.0 / (1.0 - blurMap.y)) - 1.0; \n" | |
1548 " // float fromRightWeight = (1.0 / (1.0 - blurMap.z)) - 1.0; \n" | |
1549 " // float fromLeftWeight = (1.0 / (1.0 - blurMap.w)) - 1.0; \n" | |
1550 " \n" | |
1551 " float fourWeightSum = dot(blurMap, vec4(1, 1, 1, 1)); \n" | |
1552 " float allWeightSum = centerWeight + fourWeightSum; \n" | |
1553 " \n" | |
1554 " vec4 color = vec4(0, 0, 0, 0); \n" | |
1555 " if (fromLeftWeight > 0.0) { \n" | |
1556 " vec3 pixelL = texelFetchOffset(g_screenTexture, screenPosI.xy, 0,\n" | |
1557 " ivec2(-1, 0)).rgb; \n" | |
1558 " color.rgb += fromLeftWeight * pixelL; \n" | |
1559 " } \n" | |
1560 " if (fromAboveWeight > 0.0) { \n" | |
1561 " vec3 pixelT = texelFetchOffset(g_screenTexture, screenPosI.xy, 0,\n" | |
1562 " ivec2(0, -1)).rgb; \n" | |
1563 " color.rgb += fromAboveWeight * pixelT; \n" | |
1564 " } \n" | |
1565 " if (fromRightWeight > 0.0) { \n" | |
1566 " vec3 pixelR = texelFetchOffset(g_screenTexture, screenPosI.xy, 0,\n" | |
1567 " ivec2(1, 0)).rgb; \n" | |
1568 " color.rgb += fromRightWeight * pixelR; \n" | |
1569 " } \n" | |
1570 " if (fromBelowWeight > 0.0) { \n" | |
1571 " vec3 pixelB = texelFetchOffset(g_screenTexture, screenPosI.xy, 0,\n" | |
1572 " ivec2(0, 1)).rgb; \n" | |
1573 " color.rgb += fromBelowWeight * pixelB; \n" | |
1574 " } \n" | |
1575 " \n" | |
1576 " color /= fourWeightSum + 0.0001; \n" | |
1577 " color.a = 1.0 - centerWeight / allWeightSum; \n" | |
1578 " \n" | |
1579 " color.rgb = mix(pixelC.rgb, color.rgb, color.a).rgb; \n" | |
1580 "#ifdef IN_GAMMA_CORRECT_MODE \n" | |
1581 " color.rgb = D3DX_FLOAT3_to_SRGB(color.rgb); \n" | |
1582 "#endif \n" | |
1583 " \n" | |
1584 "#ifdef DEBUG_OUTPUT_AAINFO \n" | |
1585 " imageStore(g_resultTextureSlot2, screenPosI.xy, \n" | |
1586 " PackBlurAAInfo(screenPosI.xy, uint(numberOfEdges))); \n" | |
1587 "#endif \n" | |
1588 " imageStore(g_resultTextureFlt4Slot1, screenPosI.xy, \n" | |
1589 " vec4(color.rgb, pixelC.a)); \n" | |
1590 " \n" | |
1591 " if (numberOfEdges == 2.0) { \n" | |
1592 " uint packedEdgesL = packedEdgesArray[(0 + _x) * 4 + (1 + _y)]; \n" | |
1593 " uint packedEdgesT = packedEdgesArray[(1 + _x) * 4 + (0 + _y)]; \n" | |
1594 " uint packedEdgesR = packedEdgesArray[(2 + _x) * 4 + (1 + _y)]; \n" | |
1595 " uint packedEdgesB = packedEdgesArray[(1 + _x) * 4 + (2 + _y)]; \n" | |
1596 " \n" | |
1597 " bool isHorizontalA = ((packedEdgesC) == (0x01u | 0x02u)) && \n" | |
1598 " ((packedEdgesR & (0x01u | 0x08u)) == (0x08u)); \n" | |
1599 " bool isHorizontalB = ((packedEdgesC) == (0x01u | 0x08u)) && \n" | |
1600 " ((packedEdgesR & (0x01u | 0x02u)) == (0x02u)); \n" | |
1601 " \n" | |
1602 " bool isHCandidate = isHorizontalA || isHorizontalB; \n" | |
1603 " \n" | |
1604 " bool isVerticalA = ((packedEdgesC) == (0x08u | 0x01u)) && \n" | |
1605 " ((packedEdgesT & (0x08u | 0x04u)) == (0x04u)); \n" | |
1606 " bool isVerticalB = ((packedEdgesC) == (0x08u | 0x04u)) && \n" | |
1607 " ((packedEdgesT & (0x08u | 0x01u)) == (0x01u)); \n" | |
1608 " bool isVCandidate = isVerticalA || isVerticalB; \n" | |
1609 " \n" | |
1610 " bool isCandidate = isHCandidate || isVCandidate; \n" | |
1611 " \n" | |
1612 " if (!isCandidate) \n" | |
1613 " continue; \n" | |
1614 " \n" | |
1615 " bool horizontal = isHCandidate; \n" | |
1616 " \n" | |
1617 " // what if both are candidates? do additional pruning (still not \n" | |
1618 " // 100% but gets rid of worst case errors) \n" | |
1619 " if (isHCandidate && isVCandidate) \n" | |
1620 " horizontal = \n" | |
1621 " (isHorizontalA && ((packedEdgesL & 0x02u) == 0x02u)) || \n" | |
1622 " (isHorizontalB && ((packedEdgesL & 0x08u) == 0x08u)); \n" | |
1623 " \n" | |
1624 " ivec2 offsetC; \n" | |
1625 " uint packedEdgesM1P0; \n" | |
1626 " uint packedEdgesP1P0; \n" | |
1627 " if (horizontal) { \n" | |
1628 " packedEdgesM1P0 = packedEdgesL; \n" | |
1629 " packedEdgesP1P0 = packedEdgesR; \n" | |
1630 " offsetC = ivec2(2, 0); \n" | |
1631 " } else { \n" | |
1632 " packedEdgesM1P0 = packedEdgesB; \n" | |
1633 " packedEdgesP1P0 = packedEdgesT; \n" | |
1634 " offsetC = ivec2(0, -2); \n" | |
1635 " } \n" | |
1636 " \n" | |
1637 " uvec4 edgesM1P0 = UnpackEdge(packedEdgesM1P0); \n" | |
1638 " uvec4 edgesP1P0 = UnpackEdge(packedEdgesP1P0); \n" | |
1639 " uvec4 edgesP2P0 = UnpackEdge(uint(texelFetch( \n" | |
1640 " g_src0TextureFlt, screenPosI.xy + offsetC, 0).r * 255.5)); \n" | |
1641 " \n" | |
1642 " uvec4 arg0; \n" | |
1643 " uvec4 arg1; \n" | |
1644 " uvec4 arg2; \n" | |
1645 " uvec4 arg3; \n" | |
1646 " bool arg4; \n" | |
1647 " \n" | |
1648 " if (horizontal) { \n" | |
1649 " arg0 = uvec4(edges); \n" | |
1650 " arg1 = edgesM1P0; \n" | |
1651 " arg2 = edgesP1P0; \n" | |
1652 " arg3 = edgesP2P0; \n" | |
1653 " arg4 = true; \n" | |
1654 " } else { \n" | |
1655 " // Reuse the same code for vertical (used for horizontal above)\n" | |
1656 " // but rotate input data 90º counter-clockwise, so that: \n" | |
1657 " // left becomes bottom \n" | |
1658 " // top becomes left \n" | |
1659 " // right becomes top \n" | |
1660 " // bottom becomes right \n" | |
1661 " \n" | |
1662 " // we also have to rotate edges, thus .argb \n" | |
1663 " arg0 = uvec4(edges.argb); \n" | |
1664 " arg1 = edgesM1P0.argb; \n" | |
1665 " arg2 = edgesP1P0.argb; \n" | |
1666 " arg3 = edgesP2P0.argb; \n" | |
1667 " arg4 = false; \n" | |
1668 " } \n" | |
1669 " \n" | |
1670 " { \n" | |
1671 " ivec2 screenPos = screenPosI.xy; \n" | |
1672 " uvec4 _edges = arg0; \n" | |
1673 " uvec4 _edgesM1P0 = arg1; \n" | |
1674 " uvec4 _edgesP1P0 = arg2; \n" | |
1675 " uvec4 _edgesP2P0 = arg3; \n" | |
1676 " bool horizontal = arg4; \n" | |
1677 " // Inverted Z case: \n" | |
1678 " // __ \n" | |
1679 " // X| \n" | |
1680 " // ¯¯ \n" | |
1681 " bool isInvertedZ = false; \n" | |
1682 " bool isNormalZ = false; \n" | |
1683 " { \n" | |
1684 "#ifndef SETTINGS_ALLOW_SHORT_Zs \n" | |
1685 " // (1u-_edges.a) constraint can be removed; it was added for \n" | |
1686 " // some rare cases \n" | |
1687 " uint isZShape = _edges.r * _edges.g * _edgesM1P0.g * \n" | |
1688 " _edgesP1P0.a *_edgesP2P0.a * (1u - _edges.b) * \n" | |
1689 " (1u - _edgesP1P0.r) * (1u - _edges.a) * \n" | |
1690 " (1u - _edgesP1P0.g); \n" | |
1691 "#else \n" | |
1692 " uint isZShape = _edges.r * _edges.g * _edgesP1P0.a * \n" | |
1693 " (1u - _edges.b) * (1u - _edgesP1P0.r) * (1u - _edges.a) *\n" | |
1694 " (1u - _edgesP1P0.g); \n" | |
1695 " isZShape *= (_edgesM1P0.g + _edgesP2P0.a); \n" | |
1696 " // and at least one of these need to be there\n" | |
1697 "#endif \n" | |
1698 " if (isZShape > 0u) { \n" | |
1699 " isInvertedZ = true; \n" | |
1700 " } \n" | |
1701 " } \n" | |
1702 " \n" | |
1703 " // Normal Z case: \n" | |
1704 " // __ \n" | |
1705 " // X| \n" | |
1706 " // ¯¯ \n" | |
1707 " { \n" | |
1708 "#ifndef SETTINGS_ALLOW_SHORT_Zs \n" | |
1709 " uint isZShape = _edges.r * _edges.a * _edgesM1P0.a * \n" | |
1710 " _edgesP1P0.g * _edgesP2P0.g * (1u - _edges.b) * \n" | |
1711 " (1u - _edgesP1P0.r) * (1u - _edges.g) * \n" | |
1712 " (1u - _edgesP1P0.a); \n" | |
1713 "#else \n" | |
1714 " uint isZShape = _edges.r * _edges.a * _edgesP1P0.g * \n" | |
1715 " (1u - _edges.b) * (1u - _edgesP1P0.r) * (1u - _edges.g) *\n" | |
1716 " (1u - _edgesP1P0.a); \n" | |
1717 " isZShape *= \n" | |
1718 " (_edgesM1P0.a + _edgesP2P0.g); \n" | |
1719 " // and at least one of these need to be there\n" | |
1720 "#endif \n" | |
1721 " \n" | |
1722 " if (isZShape > 0u) { \n" | |
1723 " isNormalZ = true; \n" | |
1724 " } \n" | |
1725 " } \n" | |
1726 " \n" | |
1727 " bool isZ = isInvertedZ || isNormalZ; \n" | |
1728 " if (isZ) { \n" | |
1729 " forFollowUpCoords[forFollowUpCount++] = \n" | |
1730 " ivec4(screenPosI.xy, horizontal, isInvertedZ); \n" | |
1731 " } \n" | |
1732 " } \n" | |
1733 " } \n" | |
1734 " } \n" | |
1735 " \n" | |
1736 " // This code below is the only potential bug with this algorithm : \n" | |
1737 " // it HAS to be executed after the simple shapes above. It used to be\n" | |
1738 " // executed as separate compute shader (by storing the packed \n" | |
1739 " // 'forFollowUpCoords' in an append buffer and consuming it later) \n" | |
1740 " // but the whole thing (append/consume buffers, using CS) appears to \n" | |
1741 " // be too inefficient on most hardware. \n" | |
1742 " // However, it seems to execute fairly efficiently here and without \n" | |
1743 " // any issues, although there is no 100% guarantee that this code \n" | |
1744 " // below will execute across all pixels (it has a c_maxLineLength \n" | |
1745 " // wide kernel) after other shaders processing same pixels have done \n" | |
1746 " // solving simple shapes. It appears to work regardless, across all \n" | |
1747 " // hardware; pixels with 1-edge or two opposing edges are ignored by \n" | |
1748 " // simple shapes anyway and other shapes stop the long line \n" | |
1749 " // algorithm from executing the only danger appears to be simple \n" | |
1750 " // shape L's colliding with Z shapes from neighbouring pixels but I \n" | |
1751 " // couldn't reproduce any problems on any hardware. \n" | |
1752 " for (uint _i = 0u; _i < forFollowUpCount; _i++) { \n" | |
1753 " ivec4 data = forFollowUpCoords[_i]; \n" | |
1754 " ProcessDetectedZ(data.xy, bool(data.z), bool(data.w)); \n" | |
1755 " } \n" | |
1756 "} \n" | |
1757 "#endif // BLUR_EDGES \n" | |
1758 " \n" | |
1759 "#ifdef DISPLAY_EDGES \n" | |
1760 "layout(location = 0) out vec4 color; \n" | |
1761 "layout(location = 1) out vec4 hasEdges; \n" | |
1762 "void DisplayEdges() { \n" | |
1763 " ivec2 screenPosI = ivec2(gl_FragCoord.xy); \n" | |
1764 " \n" | |
1765 " uint packedEdges, shapeType; \n" | |
1766 " UnpackBlurAAInfo(texelFetch(g_src0TextureFlt, screenPosI, 0).r, \n" | |
1767 " packedEdges, shapeType); \n" | |
1768 " \n" | |
1769 " vec4 edges = vec4(UnpackEdge(packedEdges)); \n" | |
1770 " if (any(greaterThan(edges.xyzw, vec4(0)))) { \n" | |
1771 "#ifdef IN_BGR_MODE \n" | |
1772 " color = c_edgeDebugColours[shapeType].bgra; \n" | |
1773 "#else \n" | |
1774 " color = c_edgeDebugColours[shapeType]; \n" | |
1775 "#endif \n" | |
1776 " hasEdges = vec4(1.0); \n" | |
1777 " } else { \n" | |
1778 " color = vec4(0); \n" | |
1779 " hasEdges = vec4(0.0); \n" | |
1780 " } \n" | |
1781 "} \n" | |
1782 "#endif // DISPLAY_EDGES \n" | |
1783 " \n" | |
1784 "void main() { \n" | |
1785 "#ifdef DETECT_EDGES1 \n" | |
1786 " DetectEdges1(); \n" | |
1787 "#endif \n" | |
1788 "#if defined DETECT_EDGES2 \n" | |
1789 " DetectEdges2(); \n" | |
1790 "#endif \n" | |
1791 "#if defined COMBINE_EDGES \n" | |
1792 " CombineEdges(); \n" | |
1793 "#endif \n" | |
1794 "#if defined BLUR_EDGES \n" | |
1795 " BlurEdges(); \n" | |
1796 "#endif \n" | |
1797 "#if defined DISPLAY_EDGES \n" | |
1798 " DisplayEdges(); \n" | |
1799 "#endif \n" | |
1800 "} \n"; | |
1801 | |
1802 const char* ApplyFramebufferAttachmentCMAAINTELResourceManager::copy_frag_str_ = | |
1803 "precision highp float; \n" | |
1804 "layout(binding = 0) uniform highp sampler2D inTexture; \n" | |
1805 "layout(location = 0) out vec4 outColor; \n" | |
1806 "#ifdef GL_ES \n" | |
1807 "layout(binding = 0, rgba8) restrict writeonly uniform highp \n" | |
1808 " image2D outTexture; \n" | |
1809 "#else \n" | |
1810 "layout(rgba8) restrict writeonly uniform highp image2D outTexture; \n" | |
1811 "#endif \n" | |
1812 " \n" | |
1813 "void main() { \n" | |
1814 " ivec2 screenPosI = ivec2( gl_FragCoord.xy ); \n" | |
1815 " vec4 pixel = texelFetch(inTexture, screenPosI, 0); \n" | |
1816 "#ifdef OUT_FBO \n" | |
1817 " outColor = pixel; \n" | |
1818 "#else \n" | |
1819 " imageStore(outTexture, screenPosI, pixel); \n" | |
1820 "#endif \n" | |
1821 "} \n"; | |
1822 | |
1823 } // namespace gpu | |
OLD | NEW |