diff options
Diffstat (limited to 'servers/rendering/renderer_rd/shaders')
77 files changed, 4181 insertions, 1662 deletions
diff --git a/servers/rendering/renderer_rd/shaders/SCsub b/servers/rendering/renderer_rd/shaders/SCsub index fc513d3fb9..d352743908 100644 --- a/servers/rendering/renderer_rd/shaders/SCsub +++ b/servers/rendering/renderer_rd/shaders/SCsub @@ -10,8 +10,11 @@ if "RD_GLSL" in env["BUILDERS"]: glsl_files = [str(f) for f in Glob("*.glsl") if str(f) not in gl_include_files] # make sure we recompile shaders if include files change - env.Depends([f + ".gen.h" for f in glsl_files], gl_include_files) + env.Depends([f + ".gen.h" for f in glsl_files], gl_include_files + ["#glsl_builders.py"]) # compile shaders for glsl_file in glsl_files: env.RD_GLSL(glsl_file) + +SConscript("effects/SCsub") +SConscript("environment/SCsub") diff --git a/servers/rendering/renderer_rd/shaders/blit.glsl b/servers/rendering/renderer_rd/shaders/blit.glsl index 967da1e6e4..14f190a49f 100644 --- a/servers/rendering/renderer_rd/shaders/blit.glsl +++ b/servers/rendering/renderer_rd/shaders/blit.glsl @@ -4,7 +4,8 @@ #VERSION_DEFINES -layout(push_constant, binding = 0, std140) uniform Pos { +layout(push_constant, std140) uniform Pos { + vec4 src_rect; vec4 dst_rect; vec2 eye_center; @@ -22,8 +23,8 @@ layout(location = 0) out vec2 uv; void main() { vec2 base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0)); - uv = base_arr[gl_VertexIndex]; - vec2 vtx = data.dst_rect.xy + uv * data.dst_rect.zw; + uv = data.src_rect.xy + base_arr[gl_VertexIndex] * data.src_rect.zw; + vec2 vtx = data.dst_rect.xy + base_arr[gl_VertexIndex] * data.dst_rect.zw; gl_Position = vec4(vtx * 2.0 - 1.0, 0.0, 1.0); } @@ -33,7 +34,8 @@ void main() { #VERSION_DEFINES -layout(push_constant, binding = 0, std140) uniform Pos { +layout(push_constant, std140) uniform Pos { + vec4 src_rect; vec4 dst_rect; vec2 eye_center; diff --git a/servers/rendering/renderer_rd/shaders/blur_raster_inc.glsl b/servers/rendering/renderer_rd/shaders/blur_raster_inc.glsl deleted file mode 100644 index 52bf2886b5..0000000000 --- a/servers/rendering/renderer_rd/shaders/blur_raster_inc.glsl +++ /dev/null @@ -1,21 +0,0 @@ -#define FLAG_HORIZONTAL (1 << 0) -#define FLAG_USE_ORTHOGONAL_PROJECTION (1 << 1) -#define FLAG_GLOW_FIRST_PASS (1 << 2) - -layout(push_constant, binding = 1, std430) uniform Blur { - vec2 pixel_size; - uint flags; - uint pad; - - // Glow. - float glow_strength; - float glow_bloom; - float glow_hdr_threshold; - float glow_hdr_scale; - - float glow_exposure; - float glow_white; - float glow_luminance_cap; - float glow_auto_exposure_grey; -} -blur; diff --git a/servers/rendering/renderer_rd/shaders/canvas.glsl b/servers/rendering/renderer_rd/shaders/canvas.glsl index 2911e8b731..f8e9020f9f 100644 --- a/servers/rendering/renderer_rd/shaders/canvas.glsl +++ b/servers/rendering/renderer_rd/shaders/canvas.glsl @@ -82,7 +82,7 @@ void main() { #endif - mat4 world_matrix = mat4(vec4(draw_data.world_x, 0.0, 0.0), vec4(draw_data.world_y, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(draw_data.world_ofs, 0.0, 1.0)); + mat4 model_matrix = mat4(vec4(draw_data.world_x, 0.0, 0.0), vec4(draw_data.world_y, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(draw_data.world_ofs, 0.0, 1.0)); #define FLAGS_INSTANCING_MASK 0x7F #define FLAGS_INSTANCING_HAS_COLORS (1 << 7) @@ -91,7 +91,6 @@ void main() { uint instancing = draw_data.flags & FLAGS_INSTANCING_MASK; #ifdef USE_ATTRIBUTES - if (instancing > 1) { // trails @@ -128,37 +127,37 @@ void main() { vertex = new_vertex; color *= pcolor; - } else #endif // USE_ATTRIBUTES + { + if (instancing == 1) { + uint stride = 2; + { + if (bool(draw_data.flags & FLAGS_INSTANCING_HAS_COLORS)) { + stride += 1; + } + if (bool(draw_data.flags & FLAGS_INSTANCING_HAS_CUSTOM_DATA)) { + stride += 1; + } + } + + uint offset = stride * gl_InstanceIndex; + + mat4 matrix = mat4(transforms.data[offset + 0], transforms.data[offset + 1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0)); + offset += 2; - if (instancing == 1) { - uint stride = 2; - { if (bool(draw_data.flags & FLAGS_INSTANCING_HAS_COLORS)) { - stride += 1; + color *= transforms.data[offset]; + offset += 1; } + if (bool(draw_data.flags & FLAGS_INSTANCING_HAS_CUSTOM_DATA)) { - stride += 1; + instance_custom = transforms.data[offset]; } - } - - uint offset = stride * gl_InstanceIndex; - mat4 matrix = mat4(transforms.data[offset + 0], transforms.data[offset + 1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0)); - offset += 2; - - if (bool(draw_data.flags & FLAGS_INSTANCING_HAS_COLORS)) { - color *= transforms.data[offset]; - offset += 1; + matrix = transpose(matrix); + model_matrix = model_matrix * matrix; } - - if (bool(draw_data.flags & FLAGS_INSTANCING_HAS_CUSTOM_DATA)) { - instance_custom = transforms.data[offset]; - } - - matrix = transpose(matrix); - world_matrix = world_matrix * matrix; } #if !defined(USE_ATTRIBUTES) && !defined(USE_PRIMITIVE) @@ -180,7 +179,7 @@ void main() { #endif #if !defined(SKIP_TRANSFORM_USED) - vertex = (world_matrix * vec4(vertex, 0.0, 1.0)).xy; + vertex = (model_matrix * vec4(vertex, 0.0, 1.0)).xy; #endif color_interp = color; @@ -462,10 +461,6 @@ float msdf_median(float r, float g, float b, float a) { return min(max(min(r, g), min(max(r, g), b)), a); } -vec2 msdf_map(vec2 value, vec2 in_min, vec2 in_max, vec2 out_min, vec2 out_max) { - return out_min + (out_max - out_min) * (value - in_min) / (in_max - in_min); -} - void main() { vec4 color = color_interp; vec2 uv = uv_interp; diff --git a/servers/rendering/renderer_rd/shaders/canvas_occlusion.glsl b/servers/rendering/renderer_rd/shaders/canvas_occlusion.glsl index 9f89f4b3b7..930cf792cb 100644 --- a/servers/rendering/renderer_rd/shaders/canvas_occlusion.glsl +++ b/servers/rendering/renderer_rd/shaders/canvas_occlusion.glsl @@ -6,7 +6,7 @@ layout(location = 0) in highp vec3 vertex; -layout(push_constant, binding = 0, std430) uniform Constants { +layout(push_constant, std430) uniform Constants { mat4 projection; mat2x4 modelview; vec2 direction; @@ -34,7 +34,7 @@ void main() { #VERSION_DEFINES -layout(push_constant, binding = 0, std430) uniform Constants { +layout(push_constant, std430) uniform Constants { mat4 projection; mat2x4 modelview; vec2 direction; diff --git a/servers/rendering/renderer_rd/shaders/canvas_sdf.glsl b/servers/rendering/renderer_rd/shaders/canvas_sdf.glsl index 2bdfbabfcf..0fafc7d486 100644 --- a/servers/rendering/renderer_rd/shaders/canvas_sdf.glsl +++ b/servers/rendering/renderer_rd/shaders/canvas_sdf.glsl @@ -12,7 +12,7 @@ layout(r16_snorm, set = 0, binding = 2) uniform restrict writeonly image2D dst_s layout(rg16i, set = 0, binding = 3) uniform restrict readonly iimage2D src_process; layout(rg16i, set = 0, binding = 4) uniform restrict writeonly iimage2D dst_process; -layout(push_constant, binding = 0, std430) uniform Params { +layout(push_constant, std430) uniform Params { ivec2 size; int stride; int shift; diff --git a/servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl b/servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl index 0cff505cae..2ea6965c09 100644 --- a/servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl @@ -41,7 +41,7 @@ // Push Constant -layout(push_constant, binding = 0, std430) uniform DrawData { +layout(push_constant, std430) uniform DrawData { vec2 world_x; vec2 world_y; vec2 world_ofs; @@ -138,10 +138,10 @@ layout(set = 0, binding = 7) uniform texture2D sdf_texture; layout(set = 0, binding = 8) uniform sampler material_samplers[12]; -layout(set = 0, binding = 9, std430) restrict readonly buffer GlobalVariableData { +layout(set = 0, binding = 9, std430) restrict readonly buffer GlobalShaderUniformData { vec4 data[]; } -global_variables; +global_shader_uniforms; /* SET1: Is reserved for the material */ diff --git a/servers/rendering/renderer_rd/shaders/cluster_debug.glsl b/servers/rendering/renderer_rd/shaders/cluster_debug.glsl index 40da2c6e5c..0034de8c91 100644 --- a/servers/rendering/renderer_rd/shaders/cluster_debug.glsl +++ b/servers/rendering/renderer_rd/shaders/cluster_debug.glsl @@ -40,7 +40,7 @@ const vec3 usage_gradient[33] = vec3[]( // 1 (none) + 32 vec3(0.83, 0.22, 0.27), vec3(0.83, 0.22, 0.32), vec3(1.00, 0.63, 0.70)); -layout(push_constant, binding = 0, std430) uniform Params { +layout(push_constant, std430) uniform Params { uvec2 screen_size; uvec2 cluster_screen_size; diff --git a/servers/rendering/renderer_rd/shaders/cluster_render.glsl b/servers/rendering/renderer_rd/shaders/cluster_render.glsl index 6d95722a57..2fe230f0bf 100644 --- a/servers/rendering/renderer_rd/shaders/cluster_render.glsl +++ b/servers/rendering/renderer_rd/shaders/cluster_render.glsl @@ -9,7 +9,7 @@ layout(location = 0) in vec3 vertex_attrib; layout(location = 0) out float depth_interp; layout(location = 1) out flat uint element_index; -layout(push_constant, binding = 0, std430) uniform Params { +layout(push_constant, std430) uniform Params { uint base_index; uint pad0; uint pad1; diff --git a/servers/rendering/renderer_rd/shaders/cluster_store.glsl b/servers/rendering/renderer_rd/shaders/cluster_store.glsl index b0606efa94..64a145f3c6 100644 --- a/servers/rendering/renderer_rd/shaders/cluster_store.glsl +++ b/servers/rendering/renderer_rd/shaders/cluster_store.glsl @@ -6,7 +6,7 @@ layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; -layout(push_constant, binding = 0, std430) uniform Params { +layout(push_constant, std430) uniform Params { uint cluster_render_data_size; // how much data for a single cluster takes uint max_render_element_count_div_32; //divided by 32 uvec2 cluster_screen_size; diff --git a/servers/rendering/renderer_rd/shaders/effects/SCsub b/servers/rendering/renderer_rd/shaders/effects/SCsub new file mode 100644 index 0000000000..741da8fe69 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/SCsub @@ -0,0 +1,17 @@ +#!/usr/bin/env python + +Import("env") + +if "RD_GLSL" in env["BUILDERS"]: + # find all include files + gl_include_files = [str(f) for f in Glob("*_inc.glsl")] + + # find all shader code(all glsl files excluding our include files) + glsl_files = [str(f) for f in Glob("*.glsl") if str(f) not in gl_include_files] + + # make sure we recompile shaders if include files change + env.Depends([f + ".gen.h" for f in glsl_files], gl_include_files + ["#glsl_builders.py"]) + + # compile shaders + for glsl_file in glsl_files: + env.RD_GLSL(glsl_file) diff --git a/servers/rendering/renderer_rd/shaders/blur_raster.glsl b/servers/rendering/renderer_rd/shaders/effects/blur_raster.glsl index f8b4e3f610..96f5c3e9f2 100644 --- a/servers/rendering/renderer_rd/shaders/blur_raster.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/blur_raster.glsl @@ -53,7 +53,9 @@ void main() { #ifdef MODE_GAUSSIAN_BLUR - //Simpler blur uses SIGMA2 for the gaussian kernel for a stronger effect + // Simpler blur uses SIGMA2 for the gaussian kernel for a stronger effect + + // note, for blur blur.luminance_multiplier is irrelavant, we would be multiplying and then dividing by this amount. if (bool(blur.flags & FLAG_HORIZONTAL)) { vec2 pix_size = blur.pixel_size; @@ -94,6 +96,7 @@ void main() { if (bool(blur.flags & FLAG_HORIZONTAL)) { vec2 pix_size = blur.pixel_size; pix_size *= 0.5; //reading from larger buffer, so use more samples + vec4 color = texture(source_color, uv_interp + vec2(0.0, 0.0) * pix_size) * 0.174938; GLOW_ADD(vec2(1.0, 0.0), 0.165569); GLOW_ADD(vec2(2.0, 0.0), 0.140367); @@ -101,7 +104,10 @@ void main() { GLOW_ADD(vec2(-1.0, 0.0), 0.165569); GLOW_ADD(vec2(-2.0, 0.0), 0.140367); GLOW_ADD(vec2(-3.0, 0.0), 0.106595); + + // only do this in the horizontal pass, if we also do this in the vertical pass we're doubling up. color *= blur.glow_strength; + frag_color = color; } else { vec2 pix_size = blur.pixel_size; @@ -110,13 +116,17 @@ void main() { GLOW_ADD(vec2(0.0, 2.0), 0.122581); GLOW_ADD(vec2(0.0, -1.0), 0.233062); GLOW_ADD(vec2(0.0, -2.0), 0.122581); - color *= blur.glow_strength; + frag_color = color; } #undef GLOW_ADD if (bool(blur.flags & FLAG_GLOW_FIRST_PASS)) { + // In the first pass bring back to correct color range else we're applying the wrong threshold + // in subsequent passes we can use it as is as we'd just be undoing it right after. + frag_color *= blur.luminance_multiplier; + #ifdef GLOW_USE_AUTO_EXPOSURE frag_color /= texelFetch(source_auto_exposure, ivec2(0, 0), 0).r / blur.glow_auto_exposure_grey; @@ -126,10 +136,10 @@ void main() { float luminance = max(frag_color.r, max(frag_color.g, frag_color.b)); float feedback = max(smoothstep(blur.glow_hdr_threshold, blur.glow_hdr_threshold + blur.glow_hdr_scale, luminance), blur.glow_bloom); - frag_color = min(frag_color * feedback, vec4(blur.glow_luminance_cap)); + frag_color = min(frag_color * feedback, vec4(blur.glow_luminance_cap)) / blur.luminance_multiplier; } -#endif +#endif // MODE_GAUSSIAN_GLOW #ifdef MODE_COPY vec4 color = textureLod(source_color, uv_interp, 0.0); diff --git a/servers/rendering/renderer_rd/shaders/effects/blur_raster_inc.glsl b/servers/rendering/renderer_rd/shaders/effects/blur_raster_inc.glsl new file mode 100644 index 0000000000..730504571a --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/blur_raster_inc.glsl @@ -0,0 +1,26 @@ +#define FLAG_HORIZONTAL (1 << 0) +#define FLAG_USE_ORTHOGONAL_PROJECTION (1 << 1) +#define FLAG_GLOW_FIRST_PASS (1 << 2) + +layout(push_constant, std430) uniform Blur { + vec2 pixel_size; // 08 - 08 + uint flags; // 04 - 12 + uint pad; // 04 - 16 + + // Glow. + float glow_strength; // 04 - 20 + float glow_bloom; // 04 - 24 + float glow_hdr_threshold; // 04 - 28 + float glow_hdr_scale; // 04 - 32 + + float glow_exposure; // 04 - 36 + float glow_white; // 04 - 40 + float glow_luminance_cap; // 04 - 44 + float glow_auto_exposure_grey; // 04 - 48 + + float luminance_multiplier; // 04 - 52 + float res1; // 04 - 56 + float res2; // 04 - 60 + float res3; // 04 - 64 +} +blur; diff --git a/servers/rendering/renderer_rd/shaders/bokeh_dof.glsl b/servers/rendering/renderer_rd/shaders/effects/bokeh_dof.glsl index 0438671dd2..0438671dd2 100644 --- a/servers/rendering/renderer_rd/shaders/bokeh_dof.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/bokeh_dof.glsl diff --git a/servers/rendering/renderer_rd/shaders/bokeh_dof_inc.glsl b/servers/rendering/renderer_rd/shaders/effects/bokeh_dof_inc.glsl index fadea1631c..b90a527554 100644 --- a/servers/rendering/renderer_rd/shaders/bokeh_dof_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/bokeh_dof_inc.glsl @@ -1,4 +1,4 @@ -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, std430) uniform Params { ivec2 size; float z_far; float z_near; diff --git a/servers/rendering/renderer_rd/shaders/bokeh_dof_raster.glsl b/servers/rendering/renderer_rd/shaders/effects/bokeh_dof_raster.glsl index a3b3938ee9..a3b3938ee9 100644 --- a/servers/rendering/renderer_rd/shaders/bokeh_dof_raster.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/bokeh_dof_raster.glsl diff --git a/servers/rendering/renderer_rd/shaders/copy.glsl b/servers/rendering/renderer_rd/shaders/effects/copy.glsl index 4110a95ddb..3a4ef86ef0 100644 --- a/servers/rendering/renderer_rd/shaders/copy.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/copy.glsl @@ -17,7 +17,7 @@ layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; #define FLAG_HIGH_QUALITY_GLOW (1 << 8) #define FLAG_ALPHA_TO_ONE (1 << 9) -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, std430) uniform Params { ivec4 section; ivec2 target; uint flags; @@ -61,7 +61,7 @@ layout(rgba8, set = 3, binding = 0) uniform restrict writeonly image2D dest_buff layout(rgba32f, set = 3, binding = 0) uniform restrict writeonly image2D dest_buffer; #endif -#ifdef MODE_GAUSSIAN_GLOW +#ifdef MODE_GAUSSIAN_BLUR shared vec4 local_cache[256]; shared vec4 temp_cache[128]; #endif @@ -70,7 +70,7 @@ void main() { // Pixel being shaded ivec2 pos = ivec2(gl_GlobalInvocationID.xy); -#ifndef MODE_GAUSSIAN_GLOW // Glow needs the extra threads +#ifndef MODE_GAUSSIAN_BLUR // Gaussian blur needs the extra threads if (any(greaterThanEqual(pos, params.section.zw))) { //too large, do nothing return; } @@ -84,41 +84,19 @@ void main() { color += texelFetch(source_color, base_pos + ivec2(1, 0), 0); color += texelFetch(source_color, base_pos + ivec2(1, 1), 0); color /= 4.0; + color = mix(color, vec4(100.0, 100.0, 100.0, 1.0), isinf(color)); + color = mix(color, vec4(100.0, 100.0, 100.0, 1.0), isnan(color)); imageStore(dest_buffer, pos + params.target, color); #endif #ifdef MODE_GAUSSIAN_BLUR - //Simpler blur uses SIGMA2 for the gaussian kernel for a stronger effect - - if (bool(params.flags & FLAG_HORIZONTAL)) { - ivec2 base_pos = (pos + params.section.xy) << 1; - vec4 color = texelFetch(source_color, base_pos + ivec2(0, 0), 0) * 0.214607; - color += texelFetch(source_color, base_pos + ivec2(1, 0), 0) * 0.189879; - color += texelFetch(source_color, base_pos + ivec2(2, 0), 0) * 0.131514; - color += texelFetch(source_color, base_pos + ivec2(3, 0), 0) * 0.071303; - color += texelFetch(source_color, base_pos + ivec2(-1, 0), 0) * 0.189879; - color += texelFetch(source_color, base_pos + ivec2(-2, 0), 0) * 0.131514; - color += texelFetch(source_color, base_pos + ivec2(-3, 0), 0) * 0.071303; - imageStore(dest_buffer, pos + params.target, color); - } else { - ivec2 base_pos = (pos + params.section.xy); - vec4 color = texelFetch(source_color, base_pos + ivec2(0, 0), 0) * 0.38774; - color += texelFetch(source_color, base_pos + ivec2(0, 1), 0) * 0.24477; - color += texelFetch(source_color, base_pos + ivec2(0, 2), 0) * 0.06136; - color += texelFetch(source_color, base_pos + ivec2(0, -1), 0) * 0.24477; - color += texelFetch(source_color, base_pos + ivec2(0, -2), 0) * 0.06136; - imageStore(dest_buffer, pos + params.target, color); - } -#endif - -#ifdef MODE_GAUSSIAN_GLOW - // First pass copy texture into 16x16 local memory for every 8x8 thread block vec2 quad_center_uv = clamp(vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 3.5) / params.section.zw, vec2(0.5 / params.section.zw), vec2(1.0 - 1.5 / params.section.zw)); uint dest_index = gl_LocalInvocationID.x * 2 + gl_LocalInvocationID.y * 2 * 16; +#ifdef MODE_GLOW if (bool(params.flags & FLAG_HIGH_QUALITY_GLOW)) { vec2 quad_offset_uv = clamp((vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 3.0)) / params.section.zw, vec2(0.5 / params.section.zw), vec2(1.0 - 1.5 / params.section.zw)); @@ -126,35 +104,49 @@ void main() { local_cache[dest_index + 1] = (textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.z, 0.0), 0) + textureLod(source_color, quad_offset_uv + vec2(1.0 / params.section.z, 0.0), 0)) * 0.5; local_cache[dest_index + 16] = (textureLod(source_color, quad_center_uv + vec2(0.0, 1.0 / params.section.w), 0) + textureLod(source_color, quad_offset_uv + vec2(0.0, 1.0 / params.section.w), 0)) * 0.5; local_cache[dest_index + 16 + 1] = (textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.zw), 0) + textureLod(source_color, quad_offset_uv + vec2(1.0 / params.section.zw), 0)) * 0.5; - } else { + } else +#endif + { local_cache[dest_index] = textureLod(source_color, quad_center_uv, 0); local_cache[dest_index + 1] = textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.z, 0.0), 0); local_cache[dest_index + 16] = textureLod(source_color, quad_center_uv + vec2(0.0, 1.0 / params.section.w), 0); local_cache[dest_index + 16 + 1] = textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.zw), 0); } - +#ifdef MODE_GLOW + if (bool(params.flags & FLAG_GLOW_FIRST_PASS)) { + // Tonemap initial samples to reduce weight of fireflies: https://graphicrants.blogspot.com/2013/12/tone-mapping.html + local_cache[dest_index] /= 1.0 + dot(local_cache[dest_index].rgb, vec3(0.299, 0.587, 0.114)); + local_cache[dest_index + 1] /= 1.0 + dot(local_cache[dest_index + 1].rgb, vec3(0.299, 0.587, 0.114)); + local_cache[dest_index + 16] /= 1.0 + dot(local_cache[dest_index + 16].rgb, vec3(0.299, 0.587, 0.114)); + local_cache[dest_index + 16 + 1] /= 1.0 + dot(local_cache[dest_index + 16 + 1].rgb, vec3(0.299, 0.587, 0.114)); + } + const float kernel[4] = { 0.174938, 0.165569, 0.140367, 0.106595 }; +#else + // Simpler blur uses SIGMA2 for the gaussian kernel for a stronger effect. + const float kernel[4] = { 0.214607, 0.189879, 0.131514, 0.071303 }; +#endif memoryBarrierShared(); barrier(); // Horizontal pass. Needs to copy into 8x16 chunk of local memory so vertical pass has full resolution uint read_index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * 32 + 4; vec4 color_top = vec4(0.0); - color_top += local_cache[read_index] * 0.174938; - color_top += local_cache[read_index + 1] * 0.165569; - color_top += local_cache[read_index + 2] * 0.140367; - color_top += local_cache[read_index + 3] * 0.106595; - color_top += local_cache[read_index - 1] * 0.165569; - color_top += local_cache[read_index - 2] * 0.140367; - color_top += local_cache[read_index - 3] * 0.106595; + color_top += local_cache[read_index] * kernel[0]; + color_top += local_cache[read_index + 1] * kernel[1]; + color_top += local_cache[read_index + 2] * kernel[2]; + color_top += local_cache[read_index + 3] * kernel[3]; + color_top += local_cache[read_index - 1] * kernel[1]; + color_top += local_cache[read_index - 2] * kernel[2]; + color_top += local_cache[read_index - 3] * kernel[3]; vec4 color_bottom = vec4(0.0); - color_bottom += local_cache[read_index + 16] * 0.174938; - color_bottom += local_cache[read_index + 1 + 16] * 0.165569; - color_bottom += local_cache[read_index + 2 + 16] * 0.140367; - color_bottom += local_cache[read_index + 3 + 16] * 0.106595; - color_bottom += local_cache[read_index - 1 + 16] * 0.165569; - color_bottom += local_cache[read_index - 2 + 16] * 0.140367; - color_bottom += local_cache[read_index - 3 + 16] * 0.106595; + color_bottom += local_cache[read_index + 16] * kernel[0]; + color_bottom += local_cache[read_index + 1 + 16] * kernel[1]; + color_bottom += local_cache[read_index + 2 + 16] * kernel[2]; + color_bottom += local_cache[read_index + 3 + 16] * kernel[3]; + color_bottom += local_cache[read_index - 1 + 16] * kernel[1]; + color_bottom += local_cache[read_index - 2 + 16] * kernel[2]; + color_bottom += local_cache[read_index - 3 + 16] * kernel[3]; // rotate samples to take advantage of cache coherency uint write_index = gl_LocalInvocationID.y * 2 + gl_LocalInvocationID.x * 16; @@ -165,17 +157,28 @@ void main() { memoryBarrierShared(); barrier(); + // If destination outside of texture, can stop doing work now + if (any(greaterThanEqual(pos, params.section.zw))) { + return; + } + // Vertical pass uint index = gl_LocalInvocationID.y + gl_LocalInvocationID.x * 16 + 4; vec4 color = vec4(0.0); - color += temp_cache[index] * 0.174938; - color += temp_cache[index + 1] * 0.165569; - color += temp_cache[index + 2] * 0.140367; - color += temp_cache[index + 3] * 0.106595; - color += temp_cache[index - 1] * 0.165569; - color += temp_cache[index - 2] * 0.140367; - color += temp_cache[index - 3] * 0.106595; + color += temp_cache[index] * kernel[0]; + color += temp_cache[index + 1] * kernel[1]; + color += temp_cache[index + 2] * kernel[2]; + color += temp_cache[index + 3] * kernel[3]; + color += temp_cache[index - 1] * kernel[1]; + color += temp_cache[index - 2] * kernel[2]; + color += temp_cache[index - 3] * kernel[3]; + +#ifdef MODE_GLOW + if (bool(params.flags & FLAG_GLOW_FIRST_PASS)) { + // Undo tonemap to restore range: https://graphicrants.blogspot.com/2013/12/tone-mapping.html + color /= 1.0 - dot(color.rgb, vec3(0.299, 0.587, 0.114)); + } color *= params.glow_strength; @@ -191,7 +194,7 @@ void main() { color = min(color * feedback, vec4(params.glow_luminance_cap)); } - +#endif imageStore(dest_buffer, pos + params.target, color); #endif @@ -256,7 +259,9 @@ void main() { const float PI = 3.14159265359; vec2 uv = vec2(pos) / vec2(params.section.zw); - uv.y = 1.0 - uv.y; + if (bool(params.flags & FLAG_FLIP_Y)) { + uv.y = 1.0 - uv.y; + } float phi = uv.x * 2.0 * PI; float theta = uv.y * PI; diff --git a/servers/rendering/renderer_rd/shaders/copy_to_fb.glsl b/servers/rendering/renderer_rd/shaders/effects/copy_to_fb.glsl index 8c68e2dc2f..1c17eabb56 100644 --- a/servers/rendering/renderer_rd/shaders/copy_to_fb.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/copy_to_fb.glsl @@ -4,9 +4,22 @@ #VERSION_DEFINES +#ifdef MULTIVIEW +#ifdef has_VK_KHR_multiview +#extension GL_EXT_multiview : enable +#define ViewIndex gl_ViewIndex +#else // has_VK_KHR_multiview +#define ViewIndex 0 +#endif // has_VK_KHR_multiview +#endif //MULTIVIEW + +#ifdef MULTIVIEW +layout(location = 0) out vec3 uv_interp; +#else layout(location = 0) out vec2 uv_interp; +#endif -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, std430) uniform Params { vec4 section; vec2 pixel_size; bool flip_y; @@ -19,9 +32,11 @@ params; void main() { vec2 base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0)); - uv_interp = base_arr[gl_VertexIndex]; - - vec2 vpos = uv_interp; + uv_interp.xy = base_arr[gl_VertexIndex]; +#ifdef MULTIVIEW + uv_interp.z = ViewIndex; +#endif + vec2 vpos = uv_interp.xy; if (params.use_section) { vpos = params.section.xy + vpos * params.section.zw; } @@ -39,7 +54,16 @@ void main() { #VERSION_DEFINES -layout(push_constant, binding = 1, std430) uniform Params { +#ifdef MULTIVIEW +#ifdef has_VK_KHR_multiview +#extension GL_EXT_multiview : enable +#define ViewIndex gl_ViewIndex +#else // has_VK_KHR_multiview +#define ViewIndex 0 +#endif // has_VK_KHR_multiview +#endif //MULTIVIEW + +layout(push_constant, std430) uniform Params { vec4 section; vec2 pixel_size; bool flip_y; @@ -52,12 +76,25 @@ layout(push_constant, binding = 1, std430) uniform Params { } params; +#ifdef MULTIVIEW +layout(location = 0) in vec3 uv_interp; +#else layout(location = 0) in vec2 uv_interp; +#endif +#ifdef MULTIVIEW +layout(set = 0, binding = 0) uniform sampler2DArray source_color; +#ifdef MODE_TWO_SOURCES +layout(set = 1, binding = 0) uniform sampler2DArray source_depth; +layout(location = 1) out float depth; +#endif /* MODE_TWO_SOURCES */ +#else /* MULTIVIEW */ layout(set = 0, binding = 0) uniform sampler2D source_color; #ifdef MODE_TWO_SOURCES layout(set = 1, binding = 0) uniform sampler2D source_color2; -#endif +#endif /* MODE_TWO_SOURCES */ +#endif /* MULTIVIEW */ + layout(location = 0) out vec4 frag_color; vec3 linear_to_srgb(vec3 color) { @@ -68,9 +105,14 @@ vec3 linear_to_srgb(vec3 color) { } void main() { +#ifdef MULTIVIEW + vec3 uv = uv_interp; +#else vec2 uv = uv_interp; +#endif #ifdef MODE_PANORAMA_TO_DP + // Note, multiview and panorama should not be mixed at this time //obtain normal from dual paraboloid uv #define M_PI 3.14159265359 @@ -97,11 +139,22 @@ void main() { //uv.y = 1.0 - uv.y; uv = 1.0 - uv; } -#endif +#endif /* MODE_PANORAMA_TO_DP */ + +#ifdef MULTIVIEW + vec4 color = textureLod(source_color, uv, 0.0); +#ifdef MODE_TWO_SOURCES + // In multiview our 2nd input will be our depth map + depth = textureLod(source_depth, uv, 0.0).r; +#endif /* MODE_TWO_SOURCES */ + +#else /* MULTIVIEW */ vec4 color = textureLod(source_color, uv, 0.0); #ifdef MODE_TWO_SOURCES color += textureLod(source_color2, uv, 0.0); -#endif +#endif /* MODE_TWO_SOURCES */ +#endif /* MULTIVIEW */ + if (params.force_luminance) { color.rgb = vec3(max(max(color.r, color.g), color.b)); } @@ -111,5 +164,6 @@ void main() { if (params.srgb) { color.rgb = linear_to_srgb(color.rgb); } + frag_color = color; } diff --git a/servers/rendering/renderer_rd/shaders/cube_to_dp.glsl b/servers/rendering/renderer_rd/shaders/effects/cube_to_dp.glsl index 69b895ed29..e77d0de719 100644 --- a/servers/rendering/renderer_rd/shaders/cube_to_dp.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/cube_to_dp.glsl @@ -4,7 +4,7 @@ #VERSION_DEFINES -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, std430) uniform Params { float z_far; float z_near; vec2 texel_size; @@ -31,7 +31,7 @@ layout(location = 0) in vec2 uv_interp; layout(set = 0, binding = 0) uniform samplerCube source_cube; -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, std430) uniform Params { float z_far; float z_near; vec2 texel_size; diff --git a/servers/rendering/renderer_rd/shaders/cubemap_downsampler.glsl b/servers/rendering/renderer_rd/shaders/effects/cubemap_downsampler.glsl index 63f0ce690e..63f0ce690e 100644 --- a/servers/rendering/renderer_rd/shaders/cubemap_downsampler.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/cubemap_downsampler.glsl diff --git a/servers/rendering/renderer_rd/shaders/cubemap_downsampler_inc.glsl b/servers/rendering/renderer_rd/shaders/effects/cubemap_downsampler_inc.glsl index b329e67293..641e0906f5 100644 --- a/servers/rendering/renderer_rd/shaders/cubemap_downsampler_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/cubemap_downsampler_inc.glsl @@ -1,4 +1,4 @@ -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, std430) uniform Params { uint face_size; uint face_id; // only used in raster shader } diff --git a/servers/rendering/renderer_rd/shaders/cubemap_downsampler_raster.glsl b/servers/rendering/renderer_rd/shaders/effects/cubemap_downsampler_raster.glsl index 0828ffd921..0828ffd921 100644 --- a/servers/rendering/renderer_rd/shaders/cubemap_downsampler_raster.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/cubemap_downsampler_raster.glsl diff --git a/servers/rendering/renderer_rd/shaders/cubemap_filter.glsl b/servers/rendering/renderer_rd/shaders/effects/cubemap_filter.glsl index 2a774b0eb4..2a774b0eb4 100644 --- a/servers/rendering/renderer_rd/shaders/cubemap_filter.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/cubemap_filter.glsl diff --git a/servers/rendering/renderer_rd/shaders/cubemap_filter_raster.glsl b/servers/rendering/renderer_rd/shaders/effects/cubemap_filter_raster.glsl index 324d306218..0990dc7c2f 100644 --- a/servers/rendering/renderer_rd/shaders/cubemap_filter_raster.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/cubemap_filter_raster.glsl @@ -25,7 +25,7 @@ #VERSION_DEFINES -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, std430) uniform Params { int mip_level; uint face_id; } @@ -47,7 +47,7 @@ void main() { #VERSION_DEFINES -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, std430) uniform Params { int mip_level; uint face_id; } diff --git a/servers/rendering/renderer_rd/shaders/cubemap_roughness.glsl b/servers/rendering/renderer_rd/shaders/effects/cubemap_roughness.glsl index 28f4dc59ec..1d46f59408 100644 --- a/servers/rendering/renderer_rd/shaders/cubemap_roughness.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/cubemap_roughness.glsl @@ -21,24 +21,38 @@ void main() { vec2 uv = ((vec2(id.xy) * 2.0 + 1.0) / (params.face_size) - 1.0); vec3 N = texelCoordToVec(uv, id.z); - //vec4 color = color_interp; - if (params.use_direct_write) { imageStore(dest_cubemap, ivec3(id), vec4(texture(source_cube, N).rgb, 1.0)); } else { vec4 sum = vec4(0.0, 0.0, 0.0, 0.0); + float solid_angle_texel = 4.0 * M_PI / (6.0 * params.face_size * params.face_size); + float roughness2 = params.roughness * params.roughness; + float roughness4 = roughness2 * roughness2; + vec3 UpVector = abs(N.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(1.0, 0.0, 0.0); + mat3 T; + T[0] = normalize(cross(UpVector, N)); + T[1] = cross(N, T[0]); + T[2] = N; + for (uint sampleNum = 0u; sampleNum < params.sample_count; sampleNum++) { vec2 xi = Hammersley(sampleNum, params.sample_count); - vec3 H = ImportanceSampleGGX(xi, params.roughness, N); - vec3 V = N; - vec3 L = (2.0 * dot(V, H) * H - V); + vec3 H = T * ImportanceSampleGGX(xi, roughness4); + float NdotH = dot(N, H); + vec3 L = (2.0 * NdotH * H - N); float ndotl = clamp(dot(N, L), 0.0, 1.0); if (ndotl > 0.0) { - sum.rgb += textureLod(source_cube, L, 0.0).rgb * ndotl; + float D = DistributionGGX(NdotH, roughness4); + float pdf = D * NdotH / (4.0 * NdotH) + 0.0001; + + float solid_angle_sample = 1.0 / (float(params.sample_count) * pdf + 0.0001); + + float mipLevel = params.roughness == 0.0 ? 0.0 : 0.5 * log2(solid_angle_sample / solid_angle_texel); + + sum.rgb += textureLod(source_cube, L, mipLevel).rgb * ndotl; sum.a += ndotl; } } diff --git a/servers/rendering/renderer_rd/shaders/cubemap_roughness_inc.glsl b/servers/rendering/renderer_rd/shaders/effects/cubemap_roughness_inc.glsl index be12be5dec..1bee428a6f 100644 --- a/servers/rendering/renderer_rd/shaders/cubemap_roughness_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/cubemap_roughness_inc.glsl @@ -1,6 +1,6 @@ #define M_PI 3.14159265359 -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, std430) uniform Params { uint face_id; uint sample_count; float roughness; @@ -47,12 +47,10 @@ vec3 texelCoordToVec(vec2 uv, uint faceID) { return normalize(result); } -vec3 ImportanceSampleGGX(vec2 Xi, float Roughness, vec3 N) { - float a = Roughness * Roughness; // DISNEY'S ROUGHNESS [see Burley'12 siggraph] - +vec3 ImportanceSampleGGX(vec2 xi, float roughness4) { // Compute distribution direction - float Phi = 2.0 * M_PI * Xi.x; - float CosTheta = sqrt((1.0 - Xi.y) / (1.0 + (a * a - 1.0) * Xi.y)); + float Phi = 2.0 * M_PI * xi.x; + float CosTheta = sqrt((1.0 - xi.y) / (1.0 + (roughness4 - 1.0) * xi.y)); float SinTheta = sqrt(1.0 - CosTheta * CosTheta); // Convert to spherical direction @@ -61,12 +59,15 @@ vec3 ImportanceSampleGGX(vec2 Xi, float Roughness, vec3 N) { H.y = SinTheta * sin(Phi); H.z = CosTheta; - vec3 UpVector = abs(N.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(1.0, 0.0, 0.0); - vec3 TangentX = normalize(cross(UpVector, N)); - vec3 TangentY = cross(N, TangentX); + return H; +} + +float DistributionGGX(float NdotH, float roughness4) { + float NdotH2 = NdotH * NdotH; + float denom = (NdotH2 * (roughness4 - 1.0) + 1.0); + denom = M_PI * denom * denom; - // Tangent to world space - return TangentX * H.x + TangentY * H.y + N * H.z; + return roughness4 / denom; } // https://graphicrants.blogspot.com.au/2013/08/specular-brdf-reference.html diff --git a/servers/rendering/renderer_rd/shaders/cubemap_roughness_raster.glsl b/servers/rendering/renderer_rd/shaders/effects/cubemap_roughness_raster.glsl index 2570308816..c29accd8a7 100644 --- a/servers/rendering/renderer_rd/shaders/cubemap_roughness_raster.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/cubemap_roughness_raster.glsl @@ -42,17 +42,33 @@ void main() { } else { vec4 sum = vec4(0.0, 0.0, 0.0, 0.0); + float solid_angle_texel = 4.0 * M_PI / (6.0 * params.face_size * params.face_size); + float roughness2 = params.roughness * params.roughness; + float roughness4 = roughness2 * roughness2; + vec3 UpVector = abs(N.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(1.0, 0.0, 0.0); + mat3 T; + T[0] = normalize(cross(UpVector, N)); + T[1] = cross(N, T[0]); + T[2] = N; + for (uint sampleNum = 0u; sampleNum < params.sample_count; sampleNum++) { vec2 xi = Hammersley(sampleNum, params.sample_count); - vec3 H = ImportanceSampleGGX(xi, params.roughness, N); - vec3 V = N; - vec3 L = (2.0 * dot(V, H) * H - V); + vec3 H = T * ImportanceSampleGGX(xi, roughness4); + float NdotH = dot(N, H); + vec3 L = (2.0 * NdotH * H - N); float ndotl = clamp(dot(N, L), 0.0, 1.0); if (ndotl > 0.0) { - sum.rgb += textureLod(source_cube, L, 0.0).rgb * ndotl; + float D = DistributionGGX(NdotH, roughness4); + float pdf = D * NdotH / (4.0 * NdotH) + 0.0001; + + float solid_angle_sample = 1.0 / (float(params.sample_count) * pdf + 0.0001); + + float mipLevel = params.roughness == 0.0 ? 0.0 : 0.5 * log2(solid_angle_sample / solid_angle_texel); + + sum.rgb += textureLod(source_cube, L, mipLevel).rgb * ndotl; sum.a += ndotl; } } diff --git a/servers/rendering/renderer_rd/shaders/resolve.glsl b/servers/rendering/renderer_rd/shaders/effects/resolve.glsl index fecf812a8c..0e086331c0 100644 --- a/servers/rendering/renderer_rd/shaders/resolve.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/resolve.glsl @@ -25,7 +25,7 @@ layout(rg8ui, set = 3, binding = 0) uniform restrict writeonly uimage2D dest_vox #endif -layout(push_constant, binding = 16, std430) uniform Params { +layout(push_constant, std430) uniform Params { ivec2 screen_size; int sample_count; uint pad; diff --git a/servers/rendering/renderer_rd/shaders/screen_space_reflection.glsl b/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection.glsl index 78e0a85341..246ef81cb2 100644 --- a/servers/rendering/renderer_rd/shaders/screen_space_reflection.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection.glsl @@ -15,7 +15,7 @@ layout(r8, set = 1, binding = 1) uniform restrict writeonly image2D blur_radius_ layout(rgba8, set = 2, binding = 0) uniform restrict readonly image2D source_normal_roughness; layout(set = 3, binding = 0) uniform sampler2D source_metallic; -layout(push_constant, binding = 2, std430) uniform Params { +layout(push_constant, std430) uniform Params { vec4 proj_info; ivec2 screen_size; @@ -32,12 +32,17 @@ layout(push_constant, binding = 2, std430) uniform Params { bool use_half_res; uint metallic_mask; - mat4 projection; + uint view_index; + uint pad1; + uint pad2; + uint pad3; } params; +#include "screen_space_reflection_inc.glsl" + vec2 view_to_screen(vec3 view_pos, out float w) { - vec4 projected = params.projection * vec4(view_pos, 1.0); + vec4 projected = scene_data.projection[params.view_index] * vec4(view_pos, 1.0); projected.xyz /= projected.w; projected.xy = projected.xy * 0.5 + 0.5; w = projected.w; @@ -46,24 +51,16 @@ vec2 view_to_screen(vec3 view_pos, out float w) { #define M_PI 3.14159265359 -vec3 reconstructCSPosition(vec2 S, float z) { - if (params.orthogonal) { - return vec3((S.xy * params.proj_info.xy + params.proj_info.zw), z); - } else { - return vec3((S.xy * params.proj_info.xy + params.proj_info.zw) * z, z); - } -} - void main() { // Pixel being shaded ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); - if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing + if (any(greaterThanEqual(ssC.xy, params.screen_size))) { //too large, do nothing return; } vec2 pixel_size = 1.0 / vec2(params.screen_size); - vec2 uv = vec2(ssC) * pixel_size; + vec2 uv = vec2(ssC.xy) * pixel_size; uv += pixel_size * 0.5; @@ -77,7 +74,12 @@ void main() { normal = normalize(normal); normal.y = -normal.y; //because this code reads flipped - vec3 view_dir = normalize(vertex); + vec3 view_dir; + if (sc_multiview) { + view_dir = normalize(vertex + scene_data.eye_offset[params.view_index].xyz); + } else { + view_dir = normalize(vertex); + } vec3 ray_dir = normalize(reflect(view_dir, normal)); if (dot(ray_dir, normal) < 0.001) { @@ -154,6 +156,11 @@ void main() { // convert to linear depth depth = imageLoad(source_depth, ivec2(pos - 0.5)).r; + if (sc_multiview) { + depth = depth * 2.0 - 1.0; + depth = 2.0 * params.camera_z_near * params.camera_z_far / (params.camera_z_far + params.camera_z_near - depth * (params.camera_z_far - params.camera_z_near)); + depth = -depth; + } z_from = z_to; z_to = z / w; @@ -175,23 +182,23 @@ void main() { if (found) { float margin_blend = 1.0; - vec2 margin = vec2((params.screen_size.x + params.screen_size.y) * 0.5 * 0.05); // make a uniform margin - if (any(bvec4(lessThan(pos, -margin), greaterThan(pos, params.screen_size + margin)))) { - // clip outside screen + margin + vec2 margin = vec2((params.screen_size.x + params.screen_size.y) * 0.05); // make a uniform margin + if (any(bvec4(lessThan(pos, vec2(0.0, 0.0)), greaterThan(pos, params.screen_size)))) { + // clip at the screen edges imageStore(ssr_image, ssC, vec4(0.0)); return; } { - //blend fading out towards external margin - vec2 margin_grad = mix(pos - params.screen_size, -pos, lessThan(pos, vec2(0.0))); - margin_blend = 1.0 - smoothstep(0.0, margin.x, max(margin_grad.x, margin_grad.y)); + //blend fading out towards inner margin + // 0.5 = midpoint of reflection + vec2 margin_grad = mix(params.screen_size - pos, pos, lessThan(pos, params.screen_size * 0.5)); + margin_blend = smoothstep(0.0, margin.x * margin.y, margin_grad.x * margin_grad.y); //margin_blend = 1.0; } vec2 final_pos; - float grad; - grad = steps_taken / float(params.num_steps); + float grad = (steps_taken + 1.0) / float(params.num_steps); float initial_fade = params.curve_fade_in == 0.0 ? 1.0 : pow(clamp(grad, 0.0, 1.0), params.curve_fade_in); float fade = pow(clamp(1.0 - grad, 0.0, 1.0), params.distance_fade) * initial_fade; final_pos = pos; @@ -224,13 +231,15 @@ void main() { } } + // Isn't this going to be overwritten after our endif? final_color = imageLoad(source_diffuse, ivec2((final_pos - 0.5) * pixel_size)); imageStore(blur_radius_image, ssC, vec4(blur_radius / 255.0)); //stored in r8 -#endif +#endif // MODE_ROUGH final_color = vec4(imageLoad(source_diffuse, ivec2(final_pos - 0.5)).rgb, fade * margin_blend); + //change blend by metallic vec4 metallic_mask = unpackUnorm4x8(params.metallic_mask); final_color.a *= dot(metallic_mask, texelFetch(source_metallic, ssC << 1, 0)); diff --git a/servers/rendering/renderer_rd/shaders/screen_space_reflection_filter.glsl b/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_filter.glsl index 62d1cffb0a..a63d60e0b2 100644 --- a/servers/rendering/renderer_rd/shaders/screen_space_reflection_filter.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_filter.glsl @@ -16,13 +16,13 @@ layout(r8, set = 2, binding = 1) uniform restrict writeonly image2D dest_radius; #endif layout(r32f, set = 3, binding = 0) uniform restrict readonly image2D source_depth; -layout(push_constant, binding = 2, std430) uniform Params { +layout(push_constant, std430) uniform Params { vec4 proj_info; bool orthogonal; float edge_tolerance; int increment; - uint pad; + uint view_index; ivec2 screen_size; bool vertical; @@ -30,6 +30,8 @@ layout(push_constant, binding = 2, std430) uniform Params { } params; +#include "screen_space_reflection_inc.glsl" + #define GAUSS_TABLE_SIZE 15 const float gauss_table[GAUSS_TABLE_SIZE + 1] = float[]( @@ -64,14 +66,6 @@ float gauss_weight(float p_val) { #define M_PI 3.14159265359 -vec3 reconstructCSPosition(vec2 S, float z) { - if (params.orthogonal) { - return vec3((S.xy * params.proj_info.xy + params.proj_info.zw), z); - } else { - return vec3((S.xy * params.proj_info.xy + params.proj_info.zw) * z, z); - } -} - void do_filter(inout vec4 accum, inout float accum_radius, inout float divisor, ivec2 texcoord, ivec2 increment, vec3 p_pos, vec3 normal, float p_limit_radius) { for (int i = 1; i < params.steps; i++) { float d = float(i * params.increment); @@ -110,7 +104,7 @@ void main() { // Pixel being shaded ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); - if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing + if (any(greaterThanEqual(ssC.xy, params.screen_size))) { //too large, do nothing return; } @@ -130,13 +124,13 @@ void main() { ivec2 direction = ivec2(params.increment, 0); #endif float depth = imageLoad(source_depth, ssC).r; - vec3 pos = reconstructCSPosition(vec2(ssC) + 0.5, depth); + vec3 pos = reconstructCSPosition(vec2(ssC.xy) + 0.5, depth); vec3 normal = imageLoad(source_normal, ssC).xyz * 2.0 - 1.0; normal = normalize(normal); normal.y = -normal.y; - do_filter(accum, accum_radius, divisor, ssC, direction, pos, normal, radius); - do_filter(accum, accum_radius, divisor, ssC, -direction, pos, normal, radius); + do_filter(accum, accum_radius, divisor, ssC.xy, direction, pos, normal, radius); + do_filter(accum, accum_radius, divisor, ssC.xy, -direction, pos, normal, radius); if (divisor > 0.0) { accum /= divisor; diff --git a/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_inc.glsl b/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_inc.glsl new file mode 100644 index 0000000000..26405ab040 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_inc.glsl @@ -0,0 +1,28 @@ +layout(constant_id = 0) const bool sc_multiview = false; + +layout(set = 4, binding = 0, std140) uniform SceneData { + mat4x4 projection[2]; + mat4x4 inv_projection[2]; + vec4 eye_offset[2]; +} +scene_data; + +vec3 reconstructCSPosition(vec2 screen_pos, float z) { + if (sc_multiview) { + vec4 pos; + pos.xy = (2.0 * vec2(screen_pos) / vec2(params.screen_size)) - 1.0; + pos.z = z * 2.0 - 1.0; + pos.w = 1.0; + + pos = scene_data.inv_projection[params.view_index] * pos; + pos.xyz /= pos.w; + + return pos.xyz; + } else { + if (params.orthogonal) { + return vec3((screen_pos.xy * params.proj_info.xy + params.proj_info.zw), z); + } else { + return vec3((screen_pos.xy * params.proj_info.xy + params.proj_info.zw) * z, z); + } + } +} diff --git a/servers/rendering/renderer_rd/shaders/screen_space_reflection_scale.glsl b/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_scale.glsl index 2328effe7b..a7da0812df 100644 --- a/servers/rendering/renderer_rd/shaders/screen_space_reflection_scale.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_scale.glsl @@ -6,6 +6,11 @@ layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; +/* Specialization Constants (Toggles) */ + +layout(constant_id = 0) const bool sc_multiview = false; + +/* inputs */ layout(set = 0, binding = 0) uniform sampler2D source_ssr; layout(set = 1, binding = 0) uniform sampler2D source_depth; layout(set = 1, binding = 1) uniform sampler2D source_normal; @@ -13,7 +18,7 @@ layout(rgba16f, set = 2, binding = 0) uniform restrict writeonly image2D dest_ss layout(r32f, set = 3, binding = 0) uniform restrict writeonly image2D dest_depth; layout(rgba8, set = 3, binding = 1) uniform restrict writeonly image2D dest_normal; -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, std430) uniform Params { ivec2 screen_size; float camera_z_near; float camera_z_far; @@ -28,7 +33,7 @@ void main() { // Pixel being shaded ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); - if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing + if (any(greaterThanEqual(ssC.xy, params.screen_size))) { //too large, do nothing return; } //do not filter, SSR will generate arctifacts if this is done @@ -57,13 +62,19 @@ void main() { normal.xyz += nr.xyz * 2.0 - 1.0; normal.w += nr.w; - d = d * 2.0 - 1.0; - if (params.orthogonal) { - d = ((d + (params.camera_z_far + params.camera_z_near) / (params.camera_z_far - params.camera_z_near)) * (params.camera_z_far - params.camera_z_near)) / 2.0; + if (sc_multiview) { + // we're doing a full unproject so we need the value as is. + depth += d; } else { - d = 2.0 * params.camera_z_near * params.camera_z_far / (params.camera_z_far + params.camera_z_near - d * (params.camera_z_far - params.camera_z_near)); + // unproject our Z value so we can use it directly. + d = d * 2.0 - 1.0; + if (params.orthogonal) { + d = ((d + (params.camera_z_far + params.camera_z_near) / (params.camera_z_far - params.camera_z_near)) * (params.camera_z_far - params.camera_z_near)) / 2.0; + } else { + d = 2.0 * params.camera_z_near * params.camera_z_far / (params.camera_z_far + params.camera_z_near - d * (params.camera_z_far - params.camera_z_near)); + } + depth += -d; } - depth += -d; } color /= 4.0; @@ -71,17 +82,22 @@ void main() { normal.xyz = normalize(normal.xyz / 4.0) * 0.5 + 0.5; normal.w /= 4.0; } else { - color = texelFetch(source_ssr, ssC << 1, 0); - depth = texelFetch(source_depth, ssC << 1, 0).r; - normal = texelFetch(source_normal, ssC << 1, 0); - - depth = depth * 2.0 - 1.0; - if (params.orthogonal) { - depth = ((depth + (params.camera_z_far + params.camera_z_near) / (params.camera_z_far - params.camera_z_near)) * (params.camera_z_far - params.camera_z_near)) / 2.0; - } else { - depth = 2.0 * params.camera_z_near * params.camera_z_far / (params.camera_z_far + params.camera_z_near - depth * (params.camera_z_far - params.camera_z_near)); + ivec2 ofs = ssC << 1; + + color = texelFetch(source_ssr, ofs, 0); + depth = texelFetch(source_depth, ofs, 0).r; + normal = texelFetch(source_normal, ofs, 0); + + if (!sc_multiview) { + // unproject our Z value so we can use it directly. + depth = depth * 2.0 - 1.0; + if (params.orthogonal) { + depth = ((depth + (params.camera_z_far + params.camera_z_near) / (params.camera_z_far - params.camera_z_near)) * (params.camera_z_far - params.camera_z_near)) / 2.0; + } else { + depth = 2.0 * params.camera_z_near * params.camera_z_far / (params.camera_z_far + params.camera_z_near - depth * (params.camera_z_far - params.camera_z_near)); + } + depth = -depth; } - depth = -depth; } imageStore(dest_ssr, ssC, color); diff --git a/servers/rendering/renderer_rd/shaders/effects/specular_merge.glsl b/servers/rendering/renderer_rd/shaders/effects/specular_merge.glsl new file mode 100644 index 0000000000..c62144fdaf --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/specular_merge.glsl @@ -0,0 +1,112 @@ +#[vertex] + +#version 450 + +#VERSION_DEFINES + +#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview) +#extension GL_EXT_multiview : enable +#endif + +#ifdef USE_MULTIVIEW +#ifdef has_VK_KHR_multiview +#define ViewIndex gl_ViewIndex +#else // has_VK_KHR_multiview +// !BAS! This needs to become an input once we implement our fallback! +#define ViewIndex 0 +#endif // has_VK_KHR_multiview +#else // USE_MULTIVIEW +// Set to zero, not supported in non stereo +#define ViewIndex 0 +#endif //USE_MULTIVIEW + +#ifdef USE_MULTIVIEW +layout(location = 0) out vec3 uv_interp; +#else // USE_MULTIVIEW +layout(location = 0) out vec2 uv_interp; +#endif //USE_MULTIVIEW + +void main() { + vec2 base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0)); + +#ifdef USE_MULTIVIEW + uv_interp = vec3(base_arr[gl_VertexIndex], ViewIndex); + + gl_Position = vec4(uv_interp.xy * 2.0 - 1.0, 0.0, 1.0); +#else + uv_interp = base_arr[gl_VertexIndex]; + + gl_Position = vec4(uv_interp * 2.0 - 1.0, 0.0, 1.0); +#endif +} + +#[fragment] + +#version 450 + +#VERSION_DEFINES + +#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview) +#extension GL_EXT_multiview : enable +#endif + +#ifdef USE_MULTIVIEW +#ifdef has_VK_KHR_multiview +#define ViewIndex gl_ViewIndex +#else // has_VK_KHR_multiview +// !BAS! This needs to become an input once we implement our fallback! +#define ViewIndex 0 +#endif // has_VK_KHR_multiview +#else // USE_MULTIVIEW +// Set to zero, not supported in non stereo +#define ViewIndex 0 +#endif //USE_MULTIVIEW + +#ifdef USE_MULTIVIEW +layout(location = 0) in vec3 uv_interp; +#else // USE_MULTIVIEW +layout(location = 0) in vec2 uv_interp; +#endif //USE_MULTIVIEW + +#ifdef USE_MULTIVIEW +layout(set = 0, binding = 0) uniform sampler2DArray specular; +#else // USE_MULTIVIEW +layout(set = 0, binding = 0) uniform sampler2D specular; +#endif //USE_MULTIVIEW + +#ifdef MODE_SSR + +#ifdef USE_MULTIVIEW +layout(set = 1, binding = 0) uniform sampler2DArray ssr; +#else // USE_MULTIVIEW +layout(set = 1, binding = 0) uniform sampler2D ssr; +#endif //USE_MULTIVIEW + +#endif + +#ifdef MODE_MERGE + +#ifdef USE_MULTIVIEW +layout(set = 2, binding = 0) uniform sampler2DArray diffuse; +#else // USE_MULTIVIEW +layout(set = 2, binding = 0) uniform sampler2D diffuse; +#endif //USE_MULTIVIEW + +#endif + +layout(location = 0) out vec4 frag_color; + +void main() { + frag_color.rgb = texture(specular, uv_interp).rgb; + frag_color.a = 0.0; +#ifdef MODE_SSR + + vec4 ssr_color = texture(ssr, uv_interp); + frag_color.rgb = mix(frag_color.rgb, ssr_color.rgb, ssr_color.a); +#endif + +#ifdef MODE_MERGE + frag_color += texture(diffuse, uv_interp); +#endif + //added using additive blend +} diff --git a/servers/rendering/renderer_rd/shaders/ssao_downsample.glsl b/servers/rendering/renderer_rd/shaders/effects/ss_effects_downsample.glsl index ee0db6a6f0..134aae5ce7 100644 --- a/servers/rendering/renderer_rd/shaders/ssao_downsample.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/ss_effects_downsample.glsl @@ -25,7 +25,7 @@ layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, std430) uniform Params { vec2 pixel_size; float z_far; float z_near; @@ -42,6 +42,9 @@ layout(r16f, set = 1, binding = 0) uniform restrict writeonly image2DArray dest_ layout(r16f, set = 2, binding = 0) uniform restrict writeonly image2DArray dest_image1; layout(r16f, set = 2, binding = 1) uniform restrict writeonly image2DArray dest_image2; layout(r16f, set = 2, binding = 2) uniform restrict writeonly image2DArray dest_image3; +#ifdef GENERATE_FULL_MIPS +layout(r16f, set = 2, binding = 3) uniform restrict writeonly image2DArray dest_image4; +#endif #endif vec4 screen_space_to_view_space_depth(vec4 p_depth) { @@ -150,7 +153,27 @@ void prepare_depths_and_mips(vec4 p_samples, uvec2 p_output_coord, uvec2 p_gtid) float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11)); imageStore(dest_image3, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg)); +#ifndef GENERATE_FULL_MIPS + } +#else + depth_buffer[depth_array_index][buffer_coord.x][buffer_coord.y] = avg; } + still_alive = p_gtid.x % 16 == depth_array_offset.x && depth_array_offset.y % 16 == depth_array_offset.y; + + p_output_coord /= 2; + groupMemoryBarrier(); + barrier(); + + if (still_alive) { + float sample_00 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 0]; + float sample_01 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 8]; + float sample_10 = depth_buffer[depth_array_index][buffer_coord.x + 8][buffer_coord.y + 0]; + float sample_11 = depth_buffer[depth_array_index][buffer_coord.x + 8][buffer_coord.y + 8]; + + float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11)); + imageStore(dest_image4, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg)); + } +#endif } #else #ifndef USE_HALF_BUFFERS diff --git a/servers/rendering/renderer_rd/shaders/ssao.glsl b/servers/rendering/renderer_rd/shaders/effects/ssao.glsl index 6e945edfcd..2a87e273bc 100644 --- a/servers/rendering/renderer_rd/shaders/ssao.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/ssao.glsl @@ -23,8 +23,6 @@ #VERSION_DEFINES -#define SSAO_ADAPTIVE_TAP_BASE_COUNT 5 - #define INTELSSAO_MAIN_DISK_SAMPLE_COUNT (32) const vec4 sample_pattern[INTELSSAO_MAIN_DISK_SAMPLE_COUNT] = { vec4(0.78488064, 0.56661671, 1.500000, -0.126083), vec4(0.26022232, -0.29575172, 1.500000, -1.064030), vec4(0.10459357, 0.08372527, 1.110000, -2.730563), vec4(-0.68286800, 0.04963045, 1.090000, -0.498827), @@ -62,7 +60,6 @@ const int num_taps[5] = { 3, 5, 12, 0, 0 }; #define SSAO_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET (1) #define SSAO_MAX_TAPS 32 -#define SSAO_MAX_REF_TAPS 512 #define SSAO_ADAPTIVE_TAP_BASE_COUNT 5 #define SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT (SSAO_MAX_TAPS - SSAO_ADAPTIVE_TAP_BASE_COUNT) #define SSAO_DEPTH_MIP_LEVELS 4 @@ -88,7 +85,7 @@ counter; layout(rg8, set = 2, binding = 0) uniform restrict writeonly image2D dest_image; // This push_constant is full - 128 bytes - if you need to add more data, consider adding to the uniform buffer instead -layout(push_constant, binding = 3, std430) uniform Params { +layout(push_constant, std430) uniform Params { ivec2 screen_size; int pass; int quality; diff --git a/servers/rendering/renderer_rd/shaders/ssao_blur.glsl b/servers/rendering/renderer_rd/shaders/effects/ssao_blur.glsl index d9cd2b4e85..f42734c46d 100644 --- a/servers/rendering/renderer_rd/shaders/ssao_blur.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/ssao_blur.glsl @@ -29,7 +29,7 @@ layout(set = 0, binding = 0) uniform sampler2D source_ssao; layout(rg8, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, std430) uniform Params { float edge_sharpness; float pad; vec2 half_screen_pixel_size; @@ -128,19 +128,19 @@ void main() { #ifdef MODE_NON_SMART - vec2 halfPixel = params.half_screen_pixel_size * 0.5f; + vec2 half_pixel = params.half_screen_pixel_size * 0.5; vec2 uv = (vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size; - vec2 centre = textureLod(source_ssao, vec2(uv), 0.0).xy; + vec2 center = textureLod(source_ssao, vec2(uv), 0.0).xy; vec4 vals; - vals.x = textureLod(source_ssao, vec2(uv + vec2(-halfPixel.x * 3, -halfPixel.y)), 0.0).x; - vals.y = textureLod(source_ssao, vec2(uv + vec2(+halfPixel.x, -halfPixel.y * 3)), 0.0).x; - vals.z = textureLod(source_ssao, vec2(uv + vec2(-halfPixel.x, +halfPixel.y * 3)), 0.0).x; - vals.w = textureLod(source_ssao, vec2(uv + vec2(+halfPixel.x * 3, +halfPixel.y)), 0.0).x; + vals.x = textureLod(source_ssao, vec2(uv + vec2(-half_pixel.x * 3, -half_pixel.y)), 0.0).x; + vals.y = textureLod(source_ssao, vec2(uv + vec2(+half_pixel.x, -half_pixel.y * 3)), 0.0).x; + vals.z = textureLod(source_ssao, vec2(uv + vec2(-half_pixel.x, +half_pixel.y * 3)), 0.0).x; + vals.w = textureLod(source_ssao, vec2(uv + vec2(+half_pixel.x * 3, +half_pixel.y)), 0.0).x; - vec2 sampled = vec2(dot(vals, vec4(0.2)) + centre.x * 0.2, centre.y); + vec2 sampled = vec2(dot(vals, vec4(0.2)) + center.x * 0.2, center.y); #else #ifdef MODE_SMART diff --git a/servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl b/servers/rendering/renderer_rd/shaders/effects/ssao_importance_map.glsl index 687fe1e6e2..04f98964e8 100644 --- a/servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/ssao_importance_map.glsl @@ -26,7 +26,7 @@ layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; #ifdef GENERATE_MAP -layout(set = 0, binding = 0) uniform sampler2DArray source_ssao; +layout(set = 0, binding = 0) uniform sampler2DArray source_texture; #else layout(set = 0, binding = 0) uniform sampler2D source_importance; #endif @@ -39,7 +39,7 @@ layout(set = 2, binding = 0, std430) buffer Counter { counter; #endif -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, std430) uniform Params { vec2 half_screen_pixel_size; float intensity; float power; @@ -56,11 +56,10 @@ void main() { vec2 base_uv = (vec2(base_position) + vec2(0.5f, 0.5f)) * params.half_screen_pixel_size; - float avg = 0.0; float minV = 1.0; float maxV = 0.0; for (int i = 0; i < 4; i++) { - vec4 vals = textureGather(source_ssao, vec3(base_uv, i)); + vec4 vals = textureGather(source_texture, vec3(base_uv, i)); // apply the same modifications that would have been applied in the main shader vals = params.intensity * vals; @@ -69,8 +68,6 @@ void main() { vals = pow(clamp(vals, 0.0, 1.0), vec4(params.power)); - avg += dot(vec4(vals.x, vals.y, vals.z, vals.w), vec4(1.0 / 16.0, 1.0 / 16.0, 1.0 / 16.0, 1.0 / 16.0)); - maxV = max(maxV, max(max(vals.x, vals.y), max(vals.z, vals.w))); minV = min(minV, min(min(vals.x, vals.y), min(vals.z, vals.w))); } diff --git a/servers/rendering/renderer_rd/shaders/ssao_interleave.glsl b/servers/rendering/renderer_rd/shaders/effects/ssao_interleave.glsl index 0907423d5d..f6a9a92fac 100644 --- a/servers/rendering/renderer_rd/shaders/ssao_interleave.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/ssao_interleave.glsl @@ -27,7 +27,7 @@ layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; layout(rgba8, set = 0, binding = 0) uniform restrict writeonly image2D dest_image; layout(set = 1, binding = 0) uniform sampler2DArray source_texture; -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, std430) uniform Params { float inv_sharpness; uint size_modifier; vec2 pixel_size; diff --git a/servers/rendering/renderer_rd/shaders/effects/ssil.glsl b/servers/rendering/renderer_rd/shaders/effects/ssil.glsl new file mode 100644 index 0000000000..513791dfbf --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ssil.glsl @@ -0,0 +1,444 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +// 2021-05-27: clayjohn: convert SSAO to SSIL +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[compute] + +#version 450 + +#VERSION_DEFINES + +#define SSIL_MAIN_DISK_SAMPLE_COUNT (32) +const vec4 sample_pattern[SSIL_MAIN_DISK_SAMPLE_COUNT] = { + vec4(0.78488064, 0.56661671, 1.500000, -0.126083), vec4(0.26022232, -0.29575172, 1.500000, -1.064030), vec4(0.10459357, 0.08372527, 1.110000, -2.730563), vec4(-0.68286800, 0.04963045, 1.090000, -0.498827), + vec4(-0.13570161, -0.64190155, 1.250000, -0.532765), vec4(-0.26193795, -0.08205118, 0.670000, -1.783245), vec4(-0.61177456, 0.66664219, 0.710000, -0.044234), vec4(0.43675563, 0.25119025, 0.610000, -1.167283), + vec4(0.07884444, 0.86618668, 0.640000, -0.459002), vec4(-0.12790935, -0.29869005, 0.600000, -1.729424), vec4(-0.04031125, 0.02413622, 0.600000, -4.792042), vec4(0.16201244, -0.52851415, 0.790000, -1.067055), + vec4(-0.70991218, 0.47301072, 0.640000, -0.335236), vec4(0.03277707, -0.22349690, 0.600000, -1.982384), vec4(0.68921727, 0.36800742, 0.630000, -0.266718), vec4(0.29251814, 0.37775412, 0.610000, -1.422520), + vec4(-0.12224089, 0.96582592, 0.600000, -0.426142), vec4(0.11071457, -0.16131058, 0.600000, -2.165947), vec4(0.46562141, -0.59747696, 0.600000, -0.189760), vec4(-0.51548797, 0.11804193, 0.600000, -1.246800), + vec4(0.89141309, -0.42090443, 0.600000, 0.028192), vec4(-0.32402530, -0.01591529, 0.600000, -1.543018), vec4(0.60771245, 0.41635221, 0.600000, -0.605411), vec4(0.02379565, -0.08239821, 0.600000, -3.809046), + vec4(0.48951152, -0.23657045, 0.600000, -1.189011), vec4(-0.17611565, -0.81696892, 0.600000, -0.513724), vec4(-0.33930185, -0.20732205, 0.600000, -1.698047), vec4(-0.91974425, 0.05403209, 0.600000, 0.062246), + vec4(-0.15064627, -0.14949332, 0.600000, -1.896062), vec4(0.53180975, -0.35210401, 0.600000, -0.758838), vec4(0.41487166, 0.81442589, 0.600000, -0.505648), vec4(-0.24106961, -0.32721516, 0.600000, -1.665244) +}; + +// these values can be changed (up to SSIL_MAX_TAPS) with no changes required elsewhere; values for 4th and 5th preset are ignored but array needed to avoid compilation errors +// the actual number of texture samples is two times this value (each "tap" has two symmetrical depth texture samples) +const int num_taps[5] = { 3, 5, 12, 0, 0 }; + +#define SSIL_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET (99) // to disable simply set to 99 or similar +#define SSIL_TILT_SAMPLES_AMOUNT (0.4) +// +#define SSIL_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET (1) // to disable simply set to 99 or similar +#define SSIL_HALOING_REDUCTION_AMOUNT (0.8) // values from 0.0 - 1.0, 1.0 means max weighting (will cause artifacts, 0.8 is more reasonable) +// +#define SSIL_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET (2) +#define SSIL_DEPTH_MIPS_GLOBAL_OFFSET (-4.3) // best noise/quality/performance tradeoff, found empirically +// +// !!warning!! the edge handling is hard-coded to 'disabled' on quality level 0, and enabled above, on the C++ side; while toggling it here will work for +// testing purposes, it will not yield performance gains (or correct results) +#define SSIL_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (1) +// +#define SSIL_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET (1) + +#define SSIL_MAX_TAPS 32 +#define SSIL_ADAPTIVE_TAP_BASE_COUNT 5 +#define SSIL_ADAPTIVE_TAP_FLEXIBLE_COUNT (SSIL_MAX_TAPS - SSIL_ADAPTIVE_TAP_BASE_COUNT) +#define SSIL_DEPTH_MIP_LEVELS 4 + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform sampler2DArray source_depth_mipmaps; +layout(rgba8, set = 0, binding = 1) uniform restrict readonly image2D source_normal; +layout(set = 0, binding = 2) uniform Constants { //get into a lower set + vec4 rotation_matrices[20]; +} +constants; + +#ifdef ADAPTIVE +layout(rgba16, set = 1, binding = 0) uniform restrict readonly image2DArray source_ssil; +layout(set = 1, binding = 1) uniform sampler2D source_importance; +layout(set = 1, binding = 2, std430) buffer Counter { + uint sum; +} +counter; +#endif + +layout(rgba16, set = 2, binding = 0) uniform restrict writeonly image2D dest_image; +layout(r8, set = 2, binding = 1) uniform image2D edges_weights_image; + +layout(set = 3, binding = 0) uniform sampler2D last_frame; +layout(set = 3, binding = 1) uniform ProjectionConstants { + mat4 reprojection; +} +projection_constants; + +layout(push_constant, std430) uniform Params { + ivec2 screen_size; + int pass; + int quality; + + vec2 half_screen_pixel_size; + vec2 half_screen_pixel_size_x025; + + vec2 NDC_to_view_mul; + vec2 NDC_to_view_add; + + vec2 pad2; + float z_near; + float z_far; + + float radius; + float intensity; + int size_multiplier; + int pad; + + float fade_out_mul; + float fade_out_add; + float normal_rejection_amount; + float inv_radius_near_limit; + + bool is_orthogonal; + float neg_inv_radius; + float load_counter_avg_div; + float adaptive_sample_limit; + + ivec2 pass_coord_offset; + vec2 pass_uv_offset; +} +params; + +float pack_edges(vec4 p_edgesLRTB) { + p_edgesLRTB = round(clamp(p_edgesLRTB, 0.0, 1.0) * 3.05); + return dot(p_edgesLRTB, vec4(64.0 / 255.0, 16.0 / 255.0, 4.0 / 255.0, 1.0 / 255.0)); +} + +vec3 NDC_to_view_space(vec2 p_pos, float p_viewspace_depth) { + if (params.is_orthogonal) { + return vec3((params.NDC_to_view_mul * p_pos.xy + params.NDC_to_view_add), p_viewspace_depth); + } else { + return vec3((params.NDC_to_view_mul * p_pos.xy + params.NDC_to_view_add) * p_viewspace_depth, p_viewspace_depth); + } +} + +// calculate effect radius and fit our screen sampling pattern inside it +void calculate_radius_parameters(const float p_pix_center_length, const vec2 p_pixel_size_at_center, out float r_lookup_radius, out float r_radius, out float r_fallof_sq) { + r_radius = params.radius; + + // when too close, on-screen sampling disk will grow beyond screen size; limit this to avoid closeup temporal artifacts + const float too_close_limit = clamp(p_pix_center_length * params.inv_radius_near_limit, 0.0, 1.0) * 0.8 + 0.2; + + r_radius *= too_close_limit; + + // 0.85 is to reduce the radius to allow for more samples on a slope to still stay within influence + r_lookup_radius = (0.85 * r_radius) / p_pixel_size_at_center.x; + + // used to calculate falloff (both for AO samples and per-sample weights) + r_fallof_sq = -1.0 / (r_radius * r_radius); +} + +vec4 calculate_edges(const float p_center_z, const float p_left_z, const float p_right_z, const float p_top_z, const float p_bottom_z) { + // slope-sensitive depth-based edge detection + vec4 edgesLRTB = vec4(p_left_z, p_right_z, p_top_z, p_bottom_z) - p_center_z; + vec4 edgesLRTB_slope_adjusted = edgesLRTB + edgesLRTB.yxwz; + edgesLRTB = min(abs(edgesLRTB), abs(edgesLRTB_slope_adjusted)); + return clamp((1.3 - edgesLRTB / (p_center_z * 0.040)), 0.0, 1.0); +} + +vec3 decode_normal(vec3 p_encoded_normal) { + vec3 normal = p_encoded_normal * 2.0 - 1.0; + return normal; +} + +vec3 load_normal(ivec2 p_pos) { + vec3 encoded_normal = imageLoad(source_normal, p_pos).xyz; + encoded_normal.z = 1.0 - encoded_normal.z; + return decode_normal(encoded_normal); +} + +vec3 load_normal(ivec2 p_pos, ivec2 p_offset) { + vec3 encoded_normal = imageLoad(source_normal, p_pos + p_offset).xyz; + encoded_normal.z = 1.0 - encoded_normal.z; + return decode_normal(encoded_normal); +} + +// all vectors in viewspace +float calculate_pixel_obscurance(vec3 p_pixel_normal, vec3 p_hit_delta, float p_fallof_sq) { + float length_sq = dot(p_hit_delta, p_hit_delta); + float NdotD = dot(p_pixel_normal, p_hit_delta) / sqrt(length_sq); + + float falloff_mult = max(0.0, length_sq * p_fallof_sq + 1.0); + + return max(0, NdotD - 0.05) * falloff_mult; +} + +void SSIL_tap_inner(const int p_quality_level, inout vec3 r_color_sum, inout float r_obscurance_sum, inout float r_weight_sum, const vec2 p_sampling_uv, const float p_mip_level, const vec3 p_pix_center_pos, vec3 p_pixel_normal, const float p_fallof_sq, const float p_weight_mod) { + // get depth at sample + float viewspace_sample_z = textureLod(source_depth_mipmaps, vec3(p_sampling_uv, params.pass), p_mip_level).x; + vec3 sample_normal = load_normal(ivec2(p_sampling_uv * vec2(params.screen_size))); + + // convert to viewspace + vec3 hit_pos = NDC_to_view_space(p_sampling_uv.xy, viewspace_sample_z); + vec3 hit_delta = hit_pos - p_pix_center_pos; + + float obscurance = calculate_pixel_obscurance(p_pixel_normal, hit_delta, p_fallof_sq); + float weight = 1.0; + + if (p_quality_level >= SSIL_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET) { + float reduct = max(0, -hit_delta.z); + reduct = clamp(reduct * params.neg_inv_radius + 2.0, 0.0, 1.0); + weight = SSIL_HALOING_REDUCTION_AMOUNT * reduct + (1.0 - SSIL_HALOING_REDUCTION_AMOUNT); + } + + // Translate sampling_uv to last screen's coordinates + const vec4 sample_pos = projection_constants.reprojection * vec4(p_sampling_uv * 2.0 - 1.0, (viewspace_sample_z - params.z_near) / (params.z_far - params.z_near) * 2.0 - 1.0, 1.0); + vec2 reprojected_sampling_uv = (sample_pos.xy / sample_pos.w) * 0.5 + 0.5; + + weight *= p_weight_mod; + + r_obscurance_sum += obscurance * weight; + + vec3 sample_color = textureLod(last_frame, reprojected_sampling_uv, 5.0).rgb; + // Reduce impact of fireflies by tonemapping before averaging: http://graphicrants.blogspot.com/2013/12/tone-mapping.html + sample_color /= (1.0 + dot(sample_color, vec3(0.299, 0.587, 0.114))); + r_color_sum += sample_color * obscurance * weight * mix(1.0, smoothstep(0.0, 0.1, -dot(sample_normal, normalize(hit_delta))), params.normal_rejection_amount); + r_weight_sum += weight; +} + +void SSILTap(const int p_quality_level, inout vec3 r_color_sum, inout float r_obscurance_sum, inout float r_weight_sum, const int p_tap_index, const mat2 p_rot_scale, const vec3 p_pix_center_pos, vec3 p_pixel_normal, const vec2 p_normalized_screen_pos, const float p_mip_offset, const float p_fallof_sq, float p_weight_mod, vec2 p_norm_xy, float p_norm_xy_length) { + vec2 sample_offset; + float sample_pow_2_len; + + // patterns + { + vec4 new_sample = sample_pattern[p_tap_index]; + sample_offset = new_sample.xy * p_rot_scale; + sample_pow_2_len = new_sample.w; // precalculated, same as: sample_pow_2_len = log2( length( new_sample.xy ) ); + p_weight_mod *= new_sample.z; + } + + // snap to pixel center (more correct obscurance math, avoids artifacts) + sample_offset = round(sample_offset); + + // calculate MIP based on the sample distance from the centre, similar to as described + // in http://graphics.cs.williams.edu/papers/SAOHPG12/. + float mip_level = (p_quality_level < SSIL_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (sample_pow_2_len + p_mip_offset); + + vec2 sampling_uv = sample_offset * params.half_screen_pixel_size + p_normalized_screen_pos; + + SSIL_tap_inner(p_quality_level, r_color_sum, r_obscurance_sum, r_weight_sum, sampling_uv, mip_level, p_pix_center_pos, p_pixel_normal, p_fallof_sq, p_weight_mod); + + // for the second tap, just use the mirrored offset + vec2 sample_offset_mirrored_uv = -sample_offset; + + // tilt the second set of samples so that the disk is effectively rotated by the normal + // effective at removing one set of artifacts, but too expensive for lower quality settings + if (p_quality_level >= SSIL_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET) { + float dot_norm = dot(sample_offset_mirrored_uv, p_norm_xy); + sample_offset_mirrored_uv -= dot_norm * p_norm_xy_length * p_norm_xy; + sample_offset_mirrored_uv = round(sample_offset_mirrored_uv); + } + + // snap to pixel center (more correct obscurance math, avoids artifacts) + vec2 sampling_mirrored_uv = sample_offset_mirrored_uv * params.half_screen_pixel_size + p_normalized_screen_pos; + + SSIL_tap_inner(p_quality_level, r_color_sum, r_obscurance_sum, r_weight_sum, sampling_mirrored_uv, mip_level, p_pix_center_pos, p_pixel_normal, p_fallof_sq, p_weight_mod); +} + +void generate_SSIL(out vec3 r_color, out vec4 r_edges, out float r_obscurance, out float r_weight, const vec2 p_pos, int p_quality_level, bool p_adaptive_base) { + vec2 pos_rounded = trunc(p_pos); + uvec2 upos = uvec2(pos_rounded); + + const int number_of_taps = (p_adaptive_base) ? (SSIL_ADAPTIVE_TAP_BASE_COUNT) : (num_taps[p_quality_level]); + float pix_z, pix_left_z, pix_top_z, pix_right_z, pix_bottom_z; + + vec4 valuesUL = textureGather(source_depth_mipmaps, vec3(pos_rounded * params.half_screen_pixel_size, params.pass)); + vec4 valuesBR = textureGather(source_depth_mipmaps, vec3((pos_rounded + vec2(1.0)) * params.half_screen_pixel_size, params.pass)); + + // get this pixel's viewspace depth + pix_z = valuesUL.y; + + // get left right top bottom neighbouring pixels for edge detection (gets compiled out on quality_level == 0) + pix_left_z = valuesUL.x; + pix_top_z = valuesUL.z; + pix_right_z = valuesBR.z; + pix_bottom_z = valuesBR.x; + + vec2 normalized_screen_pos = pos_rounded * params.half_screen_pixel_size + params.half_screen_pixel_size_x025; + vec3 pix_center_pos = NDC_to_view_space(normalized_screen_pos, pix_z); + + // Load this pixel's viewspace normal + uvec2 full_res_coord = upos * 2 * params.size_multiplier + params.pass_coord_offset.xy; + vec3 pixel_normal = load_normal(ivec2(full_res_coord)); + + const vec2 pixel_size_at_center = NDC_to_view_space(normalized_screen_pos.xy + params.half_screen_pixel_size, pix_center_pos.z).xy - pix_center_pos.xy; + + float pixel_lookup_radius; + float fallof_sq; + + // calculate effect radius and fit our screen sampling pattern inside it + float viewspace_radius; + calculate_radius_parameters(length(pix_center_pos), pixel_size_at_center, pixel_lookup_radius, viewspace_radius, fallof_sq); + + // calculate samples rotation/scaling + mat2 rot_scale_matrix; + uint pseudo_random_index; + + { + vec4 rotation_scale; + // reduce effect radius near the screen edges slightly; ideally, one would render a larger depth buffer (5% on each side) instead + if (!p_adaptive_base && (p_quality_level >= SSIL_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET)) { + float near_screen_border = min(min(normalized_screen_pos.x, 1.0 - normalized_screen_pos.x), min(normalized_screen_pos.y, 1.0 - normalized_screen_pos.y)); + near_screen_border = clamp(10.0 * near_screen_border + 0.6, 0.0, 1.0); + pixel_lookup_radius *= near_screen_border; + } + + // load & update pseudo-random rotation matrix + pseudo_random_index = uint(pos_rounded.y * 2 + pos_rounded.x) % 5; + rotation_scale = constants.rotation_matrices[params.pass * 5 + pseudo_random_index]; + rot_scale_matrix = mat2(rotation_scale.x * pixel_lookup_radius, rotation_scale.y * pixel_lookup_radius, rotation_scale.z * pixel_lookup_radius, rotation_scale.w * pixel_lookup_radius); + } + + // the main obscurance & sample weight storage + vec3 color_sum = vec3(0.0); + float obscurance_sum = 0.0; + float weight_sum = 0.0; + + // edge mask for between this and left/right/top/bottom neighbour pixels - not used in quality level 0 so initialize to "no edge" (1 is no edge, 0 is edge) + vec4 edgesLRTB = vec4(1.0, 1.0, 1.0, 1.0); + + // Move center pixel slightly towards camera to avoid imprecision artifacts due to using of 16bit depth buffer; a lot smaller offsets needed when using 32bit floats + pix_center_pos *= 0.9992; + + if (!p_adaptive_base && (p_quality_level >= SSIL_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) { + edgesLRTB = calculate_edges(pix_z, pix_left_z, pix_right_z, pix_top_z, pix_bottom_z); + } + + const float global_mip_offset = SSIL_DEPTH_MIPS_GLOBAL_OFFSET; + float mip_offset = (p_quality_level < SSIL_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (log2(pixel_lookup_radius) + global_mip_offset); + + // Used to tilt the second set of samples so that the disk is effectively rotated by the normal + // effective at removing one set of artifacts, but too expensive for lower quality settings + vec2 norm_xy = vec2(pixel_normal.x, pixel_normal.y); + float norm_xy_length = length(norm_xy); + norm_xy /= vec2(norm_xy_length, -norm_xy_length); + norm_xy_length *= SSIL_TILT_SAMPLES_AMOUNT; + + // standard, non-adaptive approach + if ((p_quality_level != 3) || p_adaptive_base) { + for (int i = 0; i < number_of_taps; i++) { + SSILTap(p_quality_level, color_sum, obscurance_sum, weight_sum, i, rot_scale_matrix, pix_center_pos, pixel_normal, normalized_screen_pos, mip_offset, fallof_sq, 1.0, norm_xy, norm_xy_length); + } + } +#ifdef ADAPTIVE + else { + // add new ones if needed + vec2 full_res_uv = normalized_screen_pos + params.pass_uv_offset.xy; + float importance = textureLod(source_importance, full_res_uv, 0.0).x; + + //Need to store obscurance from base pass + // load existing base values + vec4 base_values = imageLoad(source_ssil, ivec3(upos, params.pass)); + weight_sum += imageLoad(edges_weights_image, ivec2(upos)).r * float(SSIL_ADAPTIVE_TAP_BASE_COUNT * 4.0); + color_sum += (base_values.rgb) * weight_sum; + obscurance_sum += (base_values.a) * weight_sum; + + // increase importance around edges + float edge_count = dot(1.0 - edgesLRTB, vec4(1.0, 1.0, 1.0, 1.0)); + + float avg_total_importance = float(counter.sum) * params.load_counter_avg_div; + + float importance_limiter = clamp(params.adaptive_sample_limit / avg_total_importance, 0.0, 1.0); + importance *= importance_limiter; + + float additional_sample_count = SSIL_ADAPTIVE_TAP_FLEXIBLE_COUNT * importance; + + const float blend_range = 3.0; + const float blend_range_inv = 1.0 / blend_range; + + additional_sample_count += 0.5; + uint additional_samples = uint(additional_sample_count); + uint additional_samples_to = min(SSIL_MAX_TAPS, additional_samples + SSIL_ADAPTIVE_TAP_BASE_COUNT); + + for (uint i = SSIL_ADAPTIVE_TAP_BASE_COUNT; i < additional_samples_to; i++) { + additional_sample_count -= 1.0f; + float weight_mod = clamp(additional_sample_count * blend_range_inv, 0.0, 1.0); + SSILTap(p_quality_level, color_sum, obscurance_sum, weight_sum, int(i), rot_scale_matrix, pix_center_pos, pixel_normal, normalized_screen_pos, mip_offset, fallof_sq, weight_mod, norm_xy, norm_xy_length); + } + } +#endif + + // Early out for adaptive base + if (p_adaptive_base) { + vec3 color = color_sum / weight_sum; + + r_color = color; + r_edges = vec4(0.0); + r_obscurance = obscurance_sum / weight_sum; + r_weight = weight_sum; + return; + } + + // Calculate weighted average + vec3 color = color_sum / weight_sum; + color /= 1.0 - dot(color, vec3(0.299, 0.587, 0.114)); + + // Calculate fadeout (1 close, gradient, 0 far) + float fade_out = clamp(pix_center_pos.z * params.fade_out_mul + params.fade_out_add, 0.0, 1.0); + + // Reduce the SSIL if we're on the edge to remove artifacts on edges (we don't care for the lower quality one) + if (!p_adaptive_base && (p_quality_level >= SSIL_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) { + // when there's more than 2 opposite edges, start fading out the occlusion to reduce aliasing artifacts + float edge_fadeout_factor = clamp((1.0 - edgesLRTB.x - edgesLRTB.y) * 0.35, 0.0, 1.0) + clamp((1.0 - edgesLRTB.z - edgesLRTB.w) * 0.35, 0.0, 1.0); + + fade_out *= clamp(1.0 - edge_fadeout_factor, 0.0, 1.0); + } + + color = params.intensity * color; + + color *= fade_out; + + // outputs! + r_color = color; + r_edges = edgesLRTB; // These are used to prevent blurring across edges, 1 means no edge, 0 means edge, 0.5 means half way there, etc. + r_obscurance = clamp((obscurance_sum / weight_sum) * params.intensity, 0.0, 1.0); + r_weight = weight_sum; +} + +void main() { + vec3 out_color; + float out_obscurance; + float out_weight; + vec4 out_edges; + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing + return; + } + + vec2 uv = vec2(gl_GlobalInvocationID) + vec2(0.5); +#ifdef SSIL_BASE + generate_SSIL(out_color, out_edges, out_obscurance, out_weight, uv, params.quality, true); + + imageStore(dest_image, ssC, vec4(out_color, out_obscurance)); + imageStore(edges_weights_image, ssC, vec4(out_weight / (float(SSIL_ADAPTIVE_TAP_BASE_COUNT) * 4.0))); +#else + generate_SSIL(out_color, out_edges, out_obscurance, out_weight, uv, params.quality, false); // pass in quality levels + + imageStore(dest_image, ssC, vec4(out_color, out_obscurance)); + imageStore(edges_weights_image, ssC, vec4(pack_edges(out_edges))); +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/effects/ssil_blur.glsl b/servers/rendering/renderer_rd/shaders/effects/ssil_blur.glsl new file mode 100644 index 0000000000..47c56571f6 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ssil_blur.glsl @@ -0,0 +1,144 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +// 2021-05-27: clayjohn: convert SSAO to SSIL +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[compute] + +#version 450 + +#VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform sampler2D source_ssil; + +layout(rgba16, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; + +layout(r8, set = 2, binding = 0) uniform restrict readonly image2D source_edges; + +layout(push_constant, std430) uniform Params { + float edge_sharpness; + float pad; + vec2 half_screen_pixel_size; +} +params; + +vec4 unpack_edges(float p_packed_val) { + uint packed_val = uint(p_packed_val * 255.5); + vec4 edgesLRTB; + edgesLRTB.x = float((packed_val >> 6) & 0x03) / 3.0; + edgesLRTB.y = float((packed_val >> 4) & 0x03) / 3.0; + edgesLRTB.z = float((packed_val >> 2) & 0x03) / 3.0; + edgesLRTB.w = float((packed_val >> 0) & 0x03) / 3.0; + + return clamp(edgesLRTB + params.edge_sharpness, 0.0, 1.0); +} + +void add_sample(vec4 p_ssil_value, float p_edge_value, inout vec4 r_sum, inout float r_sum_weight) { + float weight = p_edge_value; + + r_sum += (weight * p_ssil_value); + r_sum_weight += weight; +} + +#ifdef MODE_WIDE +vec4 sample_blurred_wide(ivec2 p_pos, vec2 p_coord) { + vec4 ssil_value = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(0, 0)); + vec4 ssil_valueL = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(-2, 0)); + vec4 ssil_valueT = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(0, -2)); + vec4 ssil_valueR = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(2, 0)); + vec4 ssil_valueB = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(0, 2)); + + vec4 edgesLRTB = unpack_edges(imageLoad(source_edges, p_pos).r); + edgesLRTB.x *= unpack_edges(imageLoad(source_edges, p_pos + ivec2(-2, 0)).r).y; + edgesLRTB.z *= unpack_edges(imageLoad(source_edges, p_pos + ivec2(0, -2)).r).w; + edgesLRTB.y *= unpack_edges(imageLoad(source_edges, p_pos + ivec2(2, 0)).r).x; + edgesLRTB.w *= unpack_edges(imageLoad(source_edges, p_pos + ivec2(0, 2)).r).z; + + float sum_weight = 0.8; + vec4 sum = ssil_value * sum_weight; + + add_sample(ssil_valueL, edgesLRTB.x, sum, sum_weight); + add_sample(ssil_valueR, edgesLRTB.y, sum, sum_weight); + add_sample(ssil_valueT, edgesLRTB.z, sum, sum_weight); + add_sample(ssil_valueB, edgesLRTB.w, sum, sum_weight); + + vec4 ssil_avg = sum / sum_weight; + + ssil_value = ssil_avg; + + return ssil_value; +} +#endif + +#ifdef MODE_SMART +vec4 sample_blurred(ivec2 p_pos, vec2 p_coord) { + vec4 vC = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(0, 0)); + vec4 vL = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(-1, 0)); + vec4 vT = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(0, -1)); + vec4 vR = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(1, 0)); + vec4 vB = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(0, 1)); + + float packed_edges = imageLoad(source_edges, p_pos).r; + vec4 edgesLRTB = unpack_edges(packed_edges); + + float sum_weight = 0.5; + vec4 sum = vC * sum_weight; + + add_sample(vL, edgesLRTB.x, sum, sum_weight); + add_sample(vR, edgesLRTB.y, sum, sum_weight); + add_sample(vT, edgesLRTB.z, sum, sum_weight); + add_sample(vB, edgesLRTB.w, sum, sum_weight); + + vec4 ssil_avg = sum / sum_weight; + + vec4 ssil_value = ssil_avg; + + return ssil_value; +} +#endif + +void main() { + // Pixel being shaded + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + +#ifdef MODE_NON_SMART + + vec2 half_pixel = params.half_screen_pixel_size * 0.5; + + vec2 uv = (vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size; + + vec4 centre = textureLod(source_ssil, uv, 0.0); + + vec4 value = textureLod(source_ssil, vec2(uv + vec2(-half_pixel.x * 3, -half_pixel.y)), 0.0) * 0.2; + value += textureLod(source_ssil, vec2(uv + vec2(+half_pixel.x, -half_pixel.y * 3)), 0.0) * 0.2; + value += textureLod(source_ssil, vec2(uv + vec2(-half_pixel.x, +half_pixel.y * 3)), 0.0) * 0.2; + value += textureLod(source_ssil, vec2(uv + vec2(+half_pixel.x * 3, +half_pixel.y)), 0.0) * 0.2; + + vec4 sampled = value + centre * 0.2; + +#else +#ifdef MODE_SMART + vec4 sampled = sample_blurred(ssC, (vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size); +#else // MODE_WIDE + vec4 sampled = sample_blurred_wide(ssC, (vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size); +#endif +#endif // MODE_NON_SMART + imageStore(dest_image, ssC, sampled); +} diff --git a/servers/rendering/renderer_rd/shaders/effects/ssil_importance_map.glsl b/servers/rendering/renderer_rd/shaders/effects/ssil_importance_map.glsl new file mode 100644 index 0000000000..6b6b02739d --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ssil_importance_map.glsl @@ -0,0 +1,125 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +// 2021-05-27: clayjohn: convert SSAO to SSIL +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[compute] + +#version 450 + +#VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#ifdef GENERATE_MAP +layout(set = 0, binding = 0) uniform sampler2DArray source_texture; +#else +layout(set = 0, binding = 0) uniform sampler2D source_importance; +#endif +layout(r8, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; + +#ifdef PROCESS_MAPB +layout(set = 2, binding = 0, std430) buffer Counter { + uint sum; +} +counter; +#endif + +layout(push_constant, std430) uniform Params { + vec2 half_screen_pixel_size; + float intensity; + float pad; +} +params; + +void main() { + // Pixel being shaded + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + +#ifdef GENERATE_MAP + // importance map stuff + uvec2 base_position = ssC * 2; + + float avg = 0.0; + float minV = 1.0; + float maxV = 0.0; + for (int i = 0; i < 4; i++) { + vec3 value_a = texelFetch(source_texture, ivec3(base_position, i), 0).rgb * params.intensity; + vec3 value_b = texelFetch(source_texture, ivec3(base_position, i) + ivec3(0, 1, 0), 0).rgb * params.intensity; + vec3 value_c = texelFetch(source_texture, ivec3(base_position, i) + ivec3(1, 0, 0), 0).rgb * params.intensity; + vec3 value_d = texelFetch(source_texture, ivec3(base_position, i) + ivec3(1, 1, 0), 0).rgb * params.intensity; + + // Calculate luminance (black and white value) + float a = dot(value_a, vec3(0.2125, 0.7154, 0.0721)); + float b = dot(value_b, vec3(0.2125, 0.7154, 0.0721)); + float c = dot(value_c, vec3(0.2125, 0.7154, 0.0721)); + float d = dot(value_d, vec3(0.2125, 0.7154, 0.0721)); + + maxV = max(maxV, max(max(a, b), max(c, d))); + minV = min(minV, min(min(a, b), min(c, d))); + } + + float min_max_diff = maxV - minV; + + imageStore(dest_image, ssC, vec4(pow(clamp(min_max_diff * 2.0, 0.0, 1.0), 0.6))); +#endif + +#ifdef PROCESS_MAPA + vec2 uv = (vec2(ssC) + 0.5) * params.half_screen_pixel_size * 2.0; + + float centre = textureLod(source_importance, uv, 0.0).x; + + vec2 half_pixel = params.half_screen_pixel_size; + + vec4 vals; + vals.x = textureLod(source_importance, uv + vec2(-half_pixel.x * 3, -half_pixel.y), 0.0).x; + vals.y = textureLod(source_importance, uv + vec2(+half_pixel.x, -half_pixel.y * 3), 0.0).x; + vals.z = textureLod(source_importance, uv + vec2(+half_pixel.x * 3, +half_pixel.y), 0.0).x; + vals.w = textureLod(source_importance, uv + vec2(-half_pixel.x, +half_pixel.y * 3), 0.0).x; + + float avg = dot(vals, vec4(0.25, 0.25, 0.25, 0.25)); + + imageStore(dest_image, ssC, vec4(avg)); +#endif + +#ifdef PROCESS_MAPB + vec2 uv = (vec2(ssC) + 0.5f) * params.half_screen_pixel_size * 2.0; + + float centre = textureLod(source_importance, uv, 0.0).x; + + vec2 half_pixel = params.half_screen_pixel_size; + + vec4 vals; + vals.x = textureLod(source_importance, uv + vec2(-half_pixel.x, -half_pixel.y * 3), 0.0).x; + vals.y = textureLod(source_importance, uv + vec2(+half_pixel.x * 3, -half_pixel.y), 0.0).x; + vals.z = textureLod(source_importance, uv + vec2(+half_pixel.x, +half_pixel.y * 3), 0.0).x; + vals.w = textureLod(source_importance, uv + vec2(-half_pixel.x * 3, +half_pixel.y), 0.0).x; + + float avg = dot(vals, vec4(0.25, 0.25, 0.25, 0.25)); + + imageStore(dest_image, ssC, vec4(avg)); + + // sum the average; to avoid overflowing we assume max AO resolution is not bigger than 16384x16384; so quarter res (used here) will be 4096x4096, which leaves us with 8 bits per pixel + uint sum = uint(clamp(avg, 0.0, 1.0) * 255.0 + 0.5); + + // save every 9th to avoid InterlockedAdd congestion - since we're blurring, this is good enough; compensated by multiplying load_counter_avg_div by 9 + if (((ssC.x % 3) + (ssC.y % 3)) == 0) { + atomicAdd(counter.sum, sum); + } +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/effects/ssil_interleave.glsl b/servers/rendering/renderer_rd/shaders/effects/ssil_interleave.glsl new file mode 100644 index 0000000000..9e86ac0cf0 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ssil_interleave.glsl @@ -0,0 +1,122 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +// 2021-05-27: clayjohn: convert SSAO to SSIL +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[compute] + +#version 450 + +#VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(rgba16, set = 0, binding = 0) uniform restrict writeonly image2D dest_image; +layout(set = 1, binding = 0) uniform sampler2DArray source_texture; +layout(r8, set = 2, binding = 0) uniform restrict readonly image2DArray source_edges; + +layout(push_constant, std430) uniform Params { + float inv_sharpness; + uint size_modifier; + vec2 pixel_size; +} +params; + +vec4 unpack_edges(float p_packed_val) { + uint packed_val = uint(p_packed_val * 255.5); + vec4 edgesLRTB; + edgesLRTB.x = float((packed_val >> 6) & 0x03) / 3.0; + edgesLRTB.y = float((packed_val >> 4) & 0x03) / 3.0; + edgesLRTB.z = float((packed_val >> 2) & 0x03) / 3.0; + edgesLRTB.w = float((packed_val >> 0) & 0x03) / 3.0; + + return clamp(edgesLRTB + params.inv_sharpness, 0.0, 1.0); +} + +void main() { + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(ssC, ivec2(1.0 / params.pixel_size)))) { //too large, do nothing + return; + } + +#ifdef MODE_SMART + uvec2 pix_pos = uvec2(gl_GlobalInvocationID.xy); + vec2 uv = (gl_GlobalInvocationID.xy + vec2(0.5)) * params.pixel_size; + + // calculate index in the four deinterleaved source array texture + int mx = int(pix_pos.x % 2); + int my = int(pix_pos.y % 2); + int index_center = mx + my * 2; // center index + int index_horizontal = (1 - mx) + my * 2; // neighbouring, horizontal + int index_vertical = mx + (1 - my) * 2; // neighbouring, vertical + int index_diagonal = (1 - mx) + (1 - my) * 2; // diagonal + + vec4 color = texelFetch(source_texture, ivec3(pix_pos / uvec2(params.size_modifier), index_center), 0); + + vec4 edgesLRTB = unpack_edges(imageLoad(source_edges, ivec3(pix_pos / uvec2(params.size_modifier), index_center)).r); + + // convert index shifts to sampling offsets + float fmx = float(mx); + float fmy = float(my); + + // in case of an edge, push sampling offsets away from the edge (towards pixel center) + float fmxe = (edgesLRTB.y - edgesLRTB.x); + float fmye = (edgesLRTB.w - edgesLRTB.z); + + // calculate final sampling offsets and sample using bilinear filter + vec2 uv_horizontal = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(fmx + fmxe - 0.5, 0.5 - fmy)) * params.pixel_size; + vec4 color_horizontal = textureLod(source_texture, vec3(uv_horizontal, index_horizontal), 0.0); + vec2 uv_vertical = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(0.5 - fmx, fmy - 0.5 + fmye)) * params.pixel_size; + vec4 color_vertical = textureLod(source_texture, vec3(uv_vertical, index_vertical), 0.0); + vec2 uv_diagonal = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(fmx - 0.5 + fmxe, fmy - 0.5 + fmye)) * params.pixel_size; + vec4 color_diagonal = textureLod(source_texture, vec3(uv_diagonal, index_diagonal), 0.0); + + // reduce weight for samples near edge - if the edge is on both sides, weight goes to 0 + vec4 blendWeights; + blendWeights.x = 1.0; + blendWeights.y = (edgesLRTB.x + edgesLRTB.y) * 0.5; + blendWeights.z = (edgesLRTB.z + edgesLRTB.w) * 0.5; + blendWeights.w = (blendWeights.y + blendWeights.z) * 0.5; + + // calculate weighted average + float blendWeightsSum = dot(blendWeights, vec4(1.0, 1.0, 1.0, 1.0)); + color += color_horizontal * blendWeights.y; + color += color_vertical * blendWeights.z; + color += color_diagonal * blendWeights.w; + color /= blendWeightsSum; + + imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), color); +#else // !MODE_SMART + + vec2 uv = (gl_GlobalInvocationID.xy + vec2(0.5)) * params.pixel_size; +#ifdef MODE_HALF + vec4 a = textureLod(source_texture, vec3(uv, 0), 0.0); + vec4 d = textureLod(source_texture, vec3(uv, 3), 0.0); + vec4 avg = (a + d) * 0.5; + +#else + vec4 a = textureLod(source_texture, vec3(uv, 0), 0.0); + vec4 b = textureLod(source_texture, vec3(uv, 1), 0.0); + vec4 c = textureLod(source_texture, vec3(uv, 2), 0.0); + vec4 d = textureLod(source_texture, vec3(uv, 3), 0.0); + vec4 avg = (a + b + c + d) * 0.25; + +#endif + imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), avg); +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/tonemap.glsl b/servers/rendering/renderer_rd/shaders/effects/tonemap.glsl index 4411587116..62a7b0e7d7 100644 --- a/servers/rendering/renderer_rd/shaders/tonemap.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/tonemap.glsl @@ -44,7 +44,12 @@ layout(set = 0, binding = 0) uniform sampler2D source_color; #endif layout(set = 1, binding = 0) uniform sampler2D source_auto_exposure; +#ifdef MULTIVIEW +layout(set = 2, binding = 0) uniform sampler2DArray source_glow; +#else layout(set = 2, binding = 0) uniform sampler2D source_glow; +#endif +layout(set = 2, binding = 1) uniform sampler2D glow_map; #ifdef USE_1D_LUT layout(set = 3, binding = 0) uniform sampler2D source_color_correction; @@ -52,7 +57,7 @@ layout(set = 3, binding = 0) uniform sampler2D source_color_correction; layout(set = 3, binding = 0) uniform sampler3D source_color_correction; #endif -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, std430) uniform Params { vec3 bcs; bool use_bcs; @@ -63,7 +68,7 @@ layout(push_constant, binding = 1, std430) uniform Params { uvec2 glow_texture_size; float glow_intensity; - uint pad3; + float glow_map_strength; uint glow_mode; float glow_levels[7]; @@ -117,6 +122,36 @@ float h1(float a) { return 1.0f + w3(a) / (w2(a) + w3(a)); } +#ifdef MULTIVIEW +vec4 texture2D_bicubic(sampler2DArray tex, vec2 uv, int p_lod) { + float lod = float(p_lod); + vec2 tex_size = vec2(params.glow_texture_size >> p_lod); + vec2 pixel_size = vec2(1.0f) / tex_size; + + uv = uv * tex_size + vec2(0.5f); + + vec2 iuv = floor(uv); + vec2 fuv = fract(uv); + + float g0x = g0(fuv.x); + float g1x = g1(fuv.x); + float h0x = h0(fuv.x); + float h1x = h1(fuv.x); + float h0y = h0(fuv.y); + float h1y = h1(fuv.y); + + vec3 p0 = vec3((vec2(iuv.x + h0x, iuv.y + h0y) - vec2(0.5f)) * pixel_size, ViewIndex); + vec3 p1 = vec3((vec2(iuv.x + h1x, iuv.y + h0y) - vec2(0.5f)) * pixel_size, ViewIndex); + vec3 p2 = vec3((vec2(iuv.x + h0x, iuv.y + h1y) - vec2(0.5f)) * pixel_size, ViewIndex); + vec3 p3 = vec3((vec2(iuv.x + h1x, iuv.y + h1y) - vec2(0.5f)) * pixel_size, ViewIndex); + + return (g0(fuv.y) * (g0x * textureLod(tex, p0, lod) + g1x * textureLod(tex, p1, lod))) + + (g1(fuv.y) * (g0x * textureLod(tex, p2, lod) + g1x * textureLod(tex, p3, lod))); +} + +#define GLOW_TEXTURE_SAMPLE(m_tex, m_uv, m_lod) texture2D_bicubic(m_tex, m_uv, m_lod) +#else // MULTIVIEW + vec4 texture2D_bicubic(sampler2D tex, vec2 uv, int p_lod) { float lod = float(p_lod); vec2 tex_size = vec2(params.glow_texture_size >> p_lod); @@ -140,16 +175,21 @@ vec4 texture2D_bicubic(sampler2D tex, vec2 uv, int p_lod) { vec2 p3 = (vec2(iuv.x + h1x, iuv.y + h1y) - vec2(0.5f)) * pixel_size; return (g0(fuv.y) * (g0x * textureLod(tex, p0, lod) + g1x * textureLod(tex, p1, lod))) + - (g1(fuv.y) * (g0x * textureLod(tex, p2, lod) + g1x * textureLod(tex, p3, lod))); + (g1(fuv.y) * (g0x * textureLod(tex, p2, lod) + g1x * textureLod(tex, p3, lod))); } #define GLOW_TEXTURE_SAMPLE(m_tex, m_uv, m_lod) texture2D_bicubic(m_tex, m_uv, m_lod) +#endif // !MULTIVIEW -#else +#else // USE_GLOW_FILTER_BICUBIC +#ifdef MULTIVIEW +#define GLOW_TEXTURE_SAMPLE(m_tex, m_uv, m_lod) textureLod(m_tex, vec3(m_uv, ViewIndex), float(m_lod)) +#else // MULTIVIEW #define GLOW_TEXTURE_SAMPLE(m_tex, m_uv, m_lod) textureLod(m_tex, m_uv, float(m_lod)) +#endif // !MULTIVIEW -#endif +#endif // !USE_GLOW_FILTER_BICUBIC vec3 tonemap_filmic(vec3 color, float white) { // exposure bias: input scale (color *= bias, white *= bias) to make the brightness consistent with other tonemappers @@ -169,16 +209,33 @@ vec3 tonemap_filmic(vec3 color, float white) { return color_tonemapped / white_tonemapped; } +// Adapted from https://github.com/TheRealMJP/BakingLab/blob/master/BakingLab/ACES.hlsl +// (MIT License). vec3 tonemap_aces(vec3 color, float white) { - const float exposure_bias = 0.85f; - const float A = 2.51f * exposure_bias * exposure_bias; - const float B = 0.03f * exposure_bias; - const float C = 2.43f * exposure_bias * exposure_bias; - const float D = 0.59f * exposure_bias; - const float E = 0.14f; - - vec3 color_tonemapped = (color * (A * color + B)) / (color * (C * color + D) + E); - float white_tonemapped = (white * (A * white + B)) / (white * (C * white + D) + E); + const float exposure_bias = 1.8f; + const float A = 0.0245786f; + const float B = 0.000090537f; + const float C = 0.983729f; + const float D = 0.432951f; + const float E = 0.238081f; + + // Exposure bias baked into transform to save shader instructions. Equivalent to `color *= exposure_bias` + const mat3 rgb_to_rrt = mat3( + vec3(0.59719f * exposure_bias, 0.35458f * exposure_bias, 0.04823f * exposure_bias), + vec3(0.07600f * exposure_bias, 0.90834f * exposure_bias, 0.01566f * exposure_bias), + vec3(0.02840f * exposure_bias, 0.13383f * exposure_bias, 0.83777f * exposure_bias)); + + const mat3 odt_to_rgb = mat3( + vec3(1.60475f, -0.53108f, -0.07367f), + vec3(-0.10208f, 1.10813f, -0.00605f), + vec3(-0.00327f, -0.07276f, 1.07602f)); + + color *= rgb_to_rrt; + vec3 color_tonemapped = (color * (color + A) - B) / (color * (C * color + D) + E); + color_tonemapped *= odt_to_rgb; + + white *= exposure_bias; + float white_tonemapped = (white * (white + A) - B) / (white * (C * white + D) + E); return color_tonemapped / white_tonemapped; } @@ -200,19 +257,24 @@ vec3 linear_to_srgb(vec3 color) { #define TONEMAPPER_ACES 3 vec3 apply_tonemapping(vec3 color, float white) { // inputs are LINEAR, always outputs clamped [0;1] color - + // Ensure color values passed to tonemappers are positive. + // They can be negative in the case of negative lights, which leads to undesired behavior. if (params.tonemapper == TONEMAPPER_LINEAR) { return color; } else if (params.tonemapper == TONEMAPPER_REINHARD) { - return tonemap_reinhard(color, white); + return tonemap_reinhard(max(vec3(0.0f), color), white); } else if (params.tonemapper == TONEMAPPER_FILMIC) { - return tonemap_filmic(color, white); + return tonemap_filmic(max(vec3(0.0f), color), white); } else { // TONEMAPPER_ACES - return tonemap_aces(color, white); + return tonemap_aces(max(vec3(0.0f), color), white); } } +#ifdef MULTIVIEW +vec3 gather_glow(sampler2DArray tex, vec2 uv) { // sample all selected glow levels, view is added to uv later +#else vec3 gather_glow(sampler2D tex, vec2 uv) { // sample all selected glow levels +#endif // defined(MULTIVIEW) vec3 glow = vec3(0.0f); if (params.glow_levels[0] > 0.0001) { @@ -323,14 +385,14 @@ vec3 do_fxaa(vec3 color, float exposure, vec2 uv_interp) { dir.y = ((lumaNW + lumaSW) - (lumaNE + lumaSE)); float dirReduce = max((lumaNW + lumaNE + lumaSW + lumaSE) * - (0.25 * FXAA_REDUCE_MUL), + (0.25 * FXAA_REDUCE_MUL), FXAA_REDUCE_MIN); float rcpDirMin = 1.0 / (min(abs(dir.x), abs(dir.y)) + dirReduce); dir = min(vec2(FXAA_SPAN_MAX, FXAA_SPAN_MAX), max(vec2(-FXAA_SPAN_MAX, -FXAA_SPAN_MAX), dir * rcpDirMin)) * - params.pixel_size; + params.pixel_size; #ifdef MULTIVIEW vec3 rgbA = 0.5 * exposure * (textureLod(source_color, vec3(uv_interp + dir * (1.0 / 3.0 - 0.5), ViewIndex), 0.0).xyz + textureLod(source_color, vec3(uv_interp + dir * (2.0 / 3.0 - 0.5), ViewIndex), 0.0).xyz) * params.luminance_multiplier; @@ -364,12 +426,13 @@ vec3 screen_space_dither(vec2 frag_coord) { void main() { #ifdef SUBPASS // SUBPASS and MULTIVIEW can be combined but in that case we're already reading from the correct layer - vec3 color = subpassLoad(input_color).rgb * params.luminance_multiplier; + vec4 color = subpassLoad(input_color); #elif defined(MULTIVIEW) - vec3 color = textureLod(source_color, vec3(uv_interp, ViewIndex), 0.0f).rgb * params.luminance_multiplier; + vec4 color = textureLod(source_color, vec3(uv_interp, ViewIndex), 0.0f); #else - vec3 color = textureLod(source_color, uv_interp, 0.0f).rgb * params.luminance_multiplier; + vec4 color = textureLod(source_color, uv_interp, 0.0f); #endif + color.rgb *= params.luminance_multiplier; // Exposure @@ -381,55 +444,59 @@ void main() { } #endif - color *= exposure; + color.rgb *= exposure; // Early Tonemap & SRGB Conversion #ifndef SUBPASS + if (params.use_fxaa) { + // FXAA must be performed before glow to preserve the "bleed" effect of glow. + color.rgb = do_fxaa(color.rgb, exposure, uv_interp); + } + if (params.use_glow && params.glow_mode == GLOW_MODE_MIX) { vec3 glow = gather_glow(source_glow, uv_interp) * params.luminance_multiplier; + if (params.glow_map_strength > 0.001) { + glow = mix(glow, texture(glow_map, uv_interp).rgb * glow, params.glow_map_strength); + } color.rgb = mix(color.rgb, glow, params.glow_intensity); } - - if (params.use_fxaa) { - color = do_fxaa(color, exposure, uv_interp); - } #endif if (params.use_debanding) { // For best results, debanding should be done before tonemapping. // Otherwise, we're adding noise to an already-quantized image. - color += screen_space_dither(gl_FragCoord.xy); + color.rgb += screen_space_dither(gl_FragCoord.xy); } - // Ensure color values passed to tonemappers are positive. - // They can be negative in the case of negative lights, which leads to undesired behavior. - color = apply_tonemapping(max(vec3(0.0), color), params.white); + color.rgb = apply_tonemapping(color.rgb, params.white); - color = linear_to_srgb(color); // regular linear -> SRGB conversion + color.rgb = linear_to_srgb(color.rgb); // regular linear -> SRGB conversion #ifndef SUBPASS // Glow - if (params.use_glow && params.glow_mode != GLOW_MODE_MIX) { vec3 glow = gather_glow(source_glow, uv_interp) * params.glow_intensity * params.luminance_multiplier; + if (params.glow_map_strength > 0.001) { + glow = mix(glow, texture(glow_map, uv_interp).rgb * glow, params.glow_map_strength); + } // high dynamic range -> SRGB glow = apply_tonemapping(glow, params.white); glow = linear_to_srgb(glow); - color = apply_glow(color, glow); + color.rgb = apply_glow(color.rgb, glow); } #endif // Additional effects if (params.use_bcs) { - color = apply_bcs(color, params.bcs); + color.rgb = apply_bcs(color.rgb, params.bcs); } if (params.use_color_correction) { - color = apply_color_correction(color); + color.rgb = apply_color_correction(color.rgb); } - frag_color = vec4(color, 1.0f); + frag_color = color; } diff --git a/servers/rendering/renderer_rd/shaders/effects/vrs.glsl b/servers/rendering/renderer_rd/shaders/effects/vrs.glsl new file mode 100644 index 0000000000..5ef83c0b44 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/vrs.glsl @@ -0,0 +1,72 @@ +#[vertex] + +#version 450 + +#VERSION_DEFINES + +#ifdef MULTIVIEW +#ifdef has_VK_KHR_multiview +#extension GL_EXT_multiview : enable +#define ViewIndex gl_ViewIndex +#else // has_VK_KHR_multiview +#define ViewIndex 0 +#endif // has_VK_KHR_multiview +#endif //MULTIVIEW + +#ifdef MULTIVIEW +layout(location = 0) out vec3 uv_interp; +#else +layout(location = 0) out vec2 uv_interp; +#endif + +void main() { + vec2 base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0)); + uv_interp.xy = base_arr[gl_VertexIndex]; +#ifdef MULTIVIEW + uv_interp.z = ViewIndex; +#endif + + gl_Position = vec4(uv_interp.xy * 2.0 - 1.0, 0.0, 1.0); +} + +#[fragment] + +#version 450 + +#VERSION_DEFINES + +#ifdef MULTIVIEW +#ifdef has_VK_KHR_multiview +#extension GL_EXT_multiview : enable +#define ViewIndex gl_ViewIndex +#else // has_VK_KHR_multiview +#define ViewIndex 0 +#endif // has_VK_KHR_multiview +#endif //MULTIVIEW + +#ifdef MULTIVIEW +layout(location = 0) in vec3 uv_interp; +layout(set = 0, binding = 0) uniform sampler2DArray source_color; +#else /* MULTIVIEW */ +layout(location = 0) in vec2 uv_interp; +layout(set = 0, binding = 0) uniform sampler2D source_color; +#endif /* MULTIVIEW */ + +layout(location = 0) out uint frag_color; + +void main() { +#ifdef MULTIVIEW + vec3 uv = uv_interp; +#else + vec2 uv = uv_interp; +#endif + +#ifdef MULTIVIEW + vec4 color = textureLod(source_color, uv, 0.0); +#else /* MULTIVIEW */ + vec4 color = textureLod(source_color, uv, 0.0); +#endif /* MULTIVIEW */ + + // See if we can change the sampler to one that returns int... + frag_color = uint(color.r * 256.0); +} diff --git a/servers/rendering/renderer_rd/shaders/environment/SCsub b/servers/rendering/renderer_rd/shaders/environment/SCsub new file mode 100644 index 0000000000..741da8fe69 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/environment/SCsub @@ -0,0 +1,17 @@ +#!/usr/bin/env python + +Import("env") + +if "RD_GLSL" in env["BUILDERS"]: + # find all include files + gl_include_files = [str(f) for f in Glob("*_inc.glsl")] + + # find all shader code(all glsl files excluding our include files) + glsl_files = [str(f) for f in Glob("*.glsl") if str(f) not in gl_include_files] + + # make sure we recompile shaders if include files change + env.Depends([f + ".gen.h" for f in glsl_files], gl_include_files + ["#glsl_builders.py"]) + + # compile shaders + for glsl_file in glsl_files: + env.RD_GLSL(glsl_file) diff --git a/servers/rendering/renderer_rd/shaders/gi.glsl b/servers/rendering/renderer_rd/shaders/environment/gi.glsl index 60c881881d..6ea8cb1377 100644 --- a/servers/rendering/renderer_rd/shaders/gi.glsl +++ b/servers/rendering/renderer_rd/shaders/environment/gi.glsl @@ -8,6 +8,12 @@ layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; #define M_PI 3.141592 +/* Specialization Constants (Toggles) */ + +layout(constant_id = 0) const bool sc_half_res = false; +layout(constant_id = 1) const bool sc_use_full_projection_matrix = false; +layout(constant_id = 2) const bool sc_use_vrs = false; + #define SDFGI_MAX_CASCADES 8 //set 0 for SDFGI and render buffers @@ -68,19 +74,15 @@ sdfgi; #define MAX_VOXEL_GI_INSTANCES 8 struct VoxelGIData { - mat4 xform; - vec3 bounds; - float dynamic_range; + mat4 xform; // 64 - 64 - float bias; - float normal_bias; - bool blend_ambient; - uint texture_slot; + vec3 bounds; // 12 - 76 + float dynamic_range; // 4 - 80 - uint pad0; - uint pad1; - uint pad2; - uint mipmaps; + float bias; // 4 - 84 + float normal_bias; // 4 - 88 + bool blend_ambient; // 4 - 92 + uint mipmaps; // 4 - 96 }; layout(set = 0, binding = 16, std140) uniform VoxelGIs { @@ -90,21 +92,31 @@ voxel_gi_instances; layout(set = 0, binding = 17) uniform texture3D voxel_gi_textures[MAX_VOXEL_GI_INSTANCES]; -layout(push_constant, binding = 0, std430) uniform Params { +layout(set = 0, binding = 18, std140) uniform SceneData { + mat4x4 inv_projection[2]; + mat4x4 cam_transform; + vec4 eye_offset[2]; + ivec2 screen_size; - float z_near; - float z_far; + float pad1; + float pad2; +} +scene_data; - vec4 proj_info; +layout(r8ui, set = 0, binding = 19) uniform restrict readonly uimage2D vrs_buffer; - vec3 ao_color; +layout(push_constant, std430) uniform Params { uint max_voxel_gi_instances; - bool high_quality_vct; bool orthogonal; - uint pad[2]; + uint view_index; + + vec4 proj_info; - mat3x4 cam_rotation; + float z_near; + float z_far; + float pad2; + float pad3; } params; @@ -136,23 +148,34 @@ vec4 blend_color(vec4 src, vec4 dst) { } vec3 reconstruct_position(ivec2 screen_pos) { - vec3 pos; - pos.z = texelFetch(sampler2D(depth_buffer, linear_sampler), screen_pos, 0).r; + if (sc_use_full_projection_matrix) { + vec4 pos; + pos.xy = (2.0 * vec2(screen_pos) / vec2(scene_data.screen_size)) - 1.0; + pos.z = texelFetch(sampler2D(depth_buffer, linear_sampler), screen_pos, 0).r * 2.0 - 1.0; + pos.w = 1.0; + + pos = scene_data.inv_projection[params.view_index] * pos; - pos.z = pos.z * 2.0 - 1.0; - if (params.orthogonal) { - pos.z = ((pos.z + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; + return pos.xyz / pos.w; } else { - pos.z = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - pos.z * (params.z_far - params.z_near)); - } - pos.z = -pos.z; + vec3 pos; + pos.z = texelFetch(sampler2D(depth_buffer, linear_sampler), screen_pos, 0).r; + + pos.z = pos.z * 2.0 - 1.0; + if (params.orthogonal) { + pos.z = ((pos.z + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; + } else { + pos.z = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - pos.z * (params.z_far - params.z_near)); + } + pos.z = -pos.z; - pos.xy = vec2(screen_pos) * params.proj_info.xy + params.proj_info.zw; - if (!params.orthogonal) { - pos.xy *= pos.z; - } + pos.xy = vec2(screen_pos) * params.proj_info.xy + params.proj_info.zw; + if (!params.orthogonal) { + pos.xy *= pos.z; + } - return pos; + return pos; + } } void sdfvoxel_gi_process(uint cascade, vec3 cascade_pos, vec3 cam_pos, vec3 cam_normal, vec3 cam_specular_normal, float roughness, out vec3 diffuse_light, out vec3 specular_light) { @@ -572,7 +595,6 @@ void voxel_gi_compute(uint index, vec3 position, vec3 normal, vec3 ref_vec, mat3 vec4 fetch_normal_and_roughness(ivec2 pos) { vec4 normal_roughness = texelFetch(sampler2D(normal_roughness_buffer, linear_sampler), pos, 0); - normal_roughness.xyz = normalize(normal_roughness.xyz * 2.0 - 1.0); return normal_roughness; } @@ -585,9 +607,10 @@ void process_gi(ivec2 pos, vec3 vertex, inout vec4 ambient_light, inout vec4 ref if (normal.length() > 0.5) { //valid normal, can do GI float roughness = normal_roughness.w; - vertex = mat3(params.cam_rotation) * vertex; - normal = normalize(mat3(params.cam_rotation) * normal); - vec3 reflection = normalize(reflect(normalize(vertex), normal)); + vec3 view = -normalize(mat3(scene_data.cam_transform) * (vertex - scene_data.eye_offset[gl_GlobalInvocationID.z].xyz)); + vertex = mat3(scene_data.cam_transform) * vertex; + normal = normalize(mat3(scene_data.cam_transform) * normal); + vec3 reflection = normalize(reflect(-view, normal)); #ifdef USE_SDFGI sdfgi_process(vertex, normal, reflection, roughness, ambient_light, reflection_light); @@ -632,10 +655,36 @@ void process_gi(ivec2 pos, vec3 vertex, inout vec4 ambient_light, inout vec4 ref void main() { ivec2 pos = ivec2(gl_GlobalInvocationID.xy); -#ifdef MODE_HALF_RES - pos <<= 1; -#endif - if (any(greaterThanEqual(pos, params.screen_size))) { //too large, do nothing + uint vrs_x, vrs_y; + if (sc_use_vrs) { + ivec2 vrs_pos; + + // Currently we use a 16x16 texel, possibly some day make this configurable. + if (sc_half_res) { + vrs_pos = pos >> 3; + } else { + vrs_pos = pos >> 4; + } + + uint vrs_texel = imageLoad(vrs_buffer, vrs_pos).r; + // note, valid values for vrs_x and vrs_y are 1, 2 and 4. + vrs_x = 1 << ((vrs_texel >> 2) & 3); + vrs_y = 1 << (vrs_texel & 3); + + if (mod(pos.x, vrs_x) != 0) { + return; + } + + if (mod(pos.y, vrs_y) != 0) { + return; + } + } + + if (sc_half_res) { + pos <<= 1; + } + + if (any(greaterThanEqual(pos, scene_data.screen_size))) { //too large, do nothing return; } @@ -647,10 +696,69 @@ void main() { process_gi(pos, vertex, ambient_light, reflection_light); -#ifdef MODE_HALF_RES - pos >>= 1; -#endif + if (sc_half_res) { + pos >>= 1; + } imageStore(ambient_buffer, pos, ambient_light); imageStore(reflection_buffer, pos, reflection_light); + + if (sc_use_vrs) { + if (vrs_x > 1) { + imageStore(ambient_buffer, pos + ivec2(1, 0), ambient_light); + imageStore(reflection_buffer, pos + ivec2(1, 0), reflection_light); + } + + if (vrs_x > 2) { + imageStore(ambient_buffer, pos + ivec2(2, 0), ambient_light); + imageStore(reflection_buffer, pos + ivec2(2, 0), reflection_light); + + imageStore(ambient_buffer, pos + ivec2(3, 0), ambient_light); + imageStore(reflection_buffer, pos + ivec2(3, 0), reflection_light); + } + + if (vrs_y > 1) { + imageStore(ambient_buffer, pos + ivec2(0, 1), ambient_light); + imageStore(reflection_buffer, pos + ivec2(0, 1), reflection_light); + } + + if (vrs_y > 1 && vrs_x > 1) { + imageStore(ambient_buffer, pos + ivec2(1, 1), ambient_light); + imageStore(reflection_buffer, pos + ivec2(1, 1), reflection_light); + } + + if (vrs_y > 1 && vrs_x > 2) { + imageStore(ambient_buffer, pos + ivec2(2, 1), ambient_light); + imageStore(reflection_buffer, pos + ivec2(2, 1), reflection_light); + + imageStore(ambient_buffer, pos + ivec2(3, 1), ambient_light); + imageStore(reflection_buffer, pos + ivec2(3, 1), reflection_light); + } + + if (vrs_y > 2) { + imageStore(ambient_buffer, pos + ivec2(0, 2), ambient_light); + imageStore(reflection_buffer, pos + ivec2(0, 2), reflection_light); + imageStore(ambient_buffer, pos + ivec2(0, 3), ambient_light); + imageStore(reflection_buffer, pos + ivec2(0, 3), reflection_light); + } + + if (vrs_y > 2 && vrs_x > 1) { + imageStore(ambient_buffer, pos + ivec2(1, 2), ambient_light); + imageStore(reflection_buffer, pos + ivec2(1, 2), reflection_light); + imageStore(ambient_buffer, pos + ivec2(1, 3), ambient_light); + imageStore(reflection_buffer, pos + ivec2(1, 3), reflection_light); + } + + if (vrs_y > 2 && vrs_x > 2) { + imageStore(ambient_buffer, pos + ivec2(2, 2), ambient_light); + imageStore(reflection_buffer, pos + ivec2(2, 2), reflection_light); + imageStore(ambient_buffer, pos + ivec2(2, 3), ambient_light); + imageStore(reflection_buffer, pos + ivec2(2, 3), reflection_light); + + imageStore(ambient_buffer, pos + ivec2(3, 2), ambient_light); + imageStore(reflection_buffer, pos + ivec2(3, 2), reflection_light); + imageStore(ambient_buffer, pos + ivec2(3, 3), ambient_light); + imageStore(reflection_buffer, pos + ivec2(3, 3), reflection_light); + } + } } diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl b/servers/rendering/renderer_rd/shaders/environment/sdfgi_debug.glsl index 8b58796962..9640d30e78 100644 --- a/servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl +++ b/servers/rendering/renderer_rd/shaders/environment/sdfgi_debug.glsl @@ -32,18 +32,19 @@ layout(rgba16f, set = 0, binding = 10) uniform restrict writeonly image2D screen layout(set = 0, binding = 11) uniform texture2DArray lightprobe_texture; -layout(push_constant, binding = 0, std430) uniform Params { +layout(push_constant, std430) uniform Params { vec3 grid_size; uint max_cascades; ivec2 screen_size; - bool use_occlusion; float y_mult; - vec3 cam_extent; - int probe_axis_size; + float z_near; - mat4 cam_transform; + mat3x4 inv_projection; + // We pack these more tightly than mat3 and vec3, which will require some reconstruction trickery. + float cam_basis[3][3]; + float cam_origin[3]; } params; @@ -79,12 +80,21 @@ void main() { vec3 ray_pos; vec3 ray_dir; { - ray_pos = params.cam_transform[3].xyz; + ray_pos = vec3(params.cam_origin[0], params.cam_origin[1], params.cam_origin[2]); - ray_dir.xy = params.cam_extent.xy * ((vec2(screen_pos) / vec2(params.screen_size)) * 2.0 - 1.0); - ray_dir.z = params.cam_extent.z; + ray_dir.xy = ((vec2(screen_pos) / vec2(params.screen_size)) * 2.0 - 1.0); + ray_dir.z = params.z_near; - ray_dir = normalize(mat3(params.cam_transform) * ray_dir); + ray_dir = (vec4(ray_dir, 1.0) * mat4(params.inv_projection)).xyz; + + mat3 cam_basis; + { + vec3 c0 = vec3(params.cam_basis[0][0], params.cam_basis[0][1], params.cam_basis[0][2]); + vec3 c1 = vec3(params.cam_basis[1][0], params.cam_basis[1][1], params.cam_basis[1][2]); + vec3 c2 = vec3(params.cam_basis[2][0], params.cam_basis[2][1], params.cam_basis[2][2]); + cam_basis = mat3(c0, c1, c2); + } + ray_dir = normalize(cam_basis * ray_dir); } ray_pos.y *= params.y_mult; diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_debug_probes.glsl b/servers/rendering/renderer_rd/shaders/environment/sdfgi_debug_probes.glsl index 4290d5b869..75b1ad2130 100644 --- a/servers/rendering/renderer_rd/shaders/sdfgi_debug_probes.glsl +++ b/servers/rendering/renderer_rd/shaders/environment/sdfgi_debug_probes.glsl @@ -2,13 +2,28 @@ #version 450 +#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview) +#extension GL_EXT_multiview : enable +#endif + +#ifdef USE_MULTIVIEW +#ifdef has_VK_KHR_multiview +#define ViewIndex gl_ViewIndex +#else // has_VK_KHR_multiview +// !BAS! This needs to become an input once we implement our fallback! +#define ViewIndex 0 +#endif // has_VK_KHR_multiview +#else // USE_MULTIVIEW +// Set to zero, not supported in non stereo +#define ViewIndex 0 +#endif //USE_MULTIVIEW + #VERSION_DEFINES #define MAX_CASCADES 8 +#define MAX_VIEWS 2 -layout(push_constant, binding = 0, std430) uniform Params { - mat4 projection; - +layout(push_constant, std430) uniform Params { uint band_power; uint sections_in_band; uint band_mask; @@ -68,6 +83,11 @@ cascades; layout(set = 0, binding = 4) uniform texture3D occlusion_texture; layout(set = 0, binding = 3) uniform sampler linear_sampler; +layout(set = 0, binding = 5, std140) uniform SceneData { + mat4 projection[MAX_VIEWS]; +} +scene_data; + void main() { #ifdef MODE_PROBES probe_index = gl_InstanceIndex; @@ -85,7 +105,7 @@ void main() { vertex += (cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size) / vec3(1.0, params.y_mult, 1.0); - gl_Position = params.projection * vec4(vertex, 1.0); + gl_Position = scene_data.projection[ViewIndex] * vec4(vertex, 1.0); #endif #ifdef MODE_VISIBILITY @@ -144,7 +164,7 @@ void main() { visibility = dot(texelFetch(sampler3D(occlusion_texture, linear_sampler), tex_pos, 0), layer_axis[occlusion_layer]); - gl_Position = params.projection * vec4(vertex, 1.0); + gl_Position = scene_data.projection[ViewIndex] * vec4(vertex, 1.0); #endif } @@ -153,16 +173,32 @@ void main() { #version 450 +#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview) +#extension GL_EXT_multiview : enable +#endif + +#ifdef USE_MULTIVIEW +#ifdef has_VK_KHR_multiview +#define ViewIndex gl_ViewIndex +#else // has_VK_KHR_multiview +// !BAS! This needs to become an input once we implement our fallback! +#define ViewIndex 0 +#endif // has_VK_KHR_multiview +#else // USE_MULTIVIEW +// Set to zero, not supported in non stereo +#define ViewIndex 0 +#endif //USE_MULTIVIEW + #VERSION_DEFINES +#define MAX_VIEWS 2 + layout(location = 0) out vec4 frag_color; layout(set = 0, binding = 2) uniform texture2DArray lightprobe_texture; layout(set = 0, binding = 3) uniform sampler linear_sampler; -layout(push_constant, binding = 0, std430) uniform Params { - mat4 projection; - +layout(push_constant, std430) uniform Params { uint band_power; uint sections_in_band; uint band_mask; diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl b/servers/rendering/renderer_rd/shaders/environment/sdfgi_direct_light.glsl index d6e5c6a92e..b95fad650e 100644 --- a/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl +++ b/servers/rendering/renderer_rd/shaders/environment/sdfgi_direct_light.glsl @@ -70,8 +70,6 @@ struct Light { float cos_spot_angle; float inv_spot_attenuation; float radius; - - vec4 shadow_color; }; layout(set = 0, binding = 9, std140) buffer restrict readonly Lights { @@ -82,7 +80,7 @@ lights; layout(set = 0, binding = 10) uniform texture2DArray lightprobe_texture; layout(set = 0, binding = 11) uniform texture3D occlusion_texture; -layout(push_constant, binding = 0, std430) uniform Params { +layout(push_constant, std430) uniform Params { vec3 grid_size; uint max_cascades; diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl b/servers/rendering/renderer_rd/shaders/environment/sdfgi_integrate.glsl index eedd28959c..9c03297f5c 100644 --- a/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl +++ b/servers/rendering/renderer_rd/shaders/environment/sdfgi_integrate.glsl @@ -52,7 +52,7 @@ layout(set = 1, binding = 1) uniform sampler linear_sampler_mipmaps; #define SKY_MODE_COLOR 1 #define SKY_MODE_SKY 2 -layout(push_constant, binding = 0, std430) uniform Params { +layout(push_constant, std430) uniform Params { vec3 grid_size; uint max_cascades; diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_preprocess.glsl b/servers/rendering/renderer_rd/shaders/environment/sdfgi_preprocess.glsl index 4d9fa85a74..bce98f4054 100644 --- a/servers/rendering/renderer_rd/shaders/sdfgi_preprocess.glsl +++ b/servers/rendering/renderer_rd/shaders/environment/sdfgi_preprocess.glsl @@ -102,7 +102,7 @@ dispatch_data; struct ProcessVoxel { uint position; // xyz 7 bit packed, extra 11 bits for neighbors. - uint albedo; //rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neibhbours + uint albedo; //rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neighbours uint light; //rgbe8985 encoded total saved light, extra 2 bits for neighbours uint light_aniso; //55555 light anisotropy, extra 2 bits for neighbours //total neighbours: 26 @@ -135,7 +135,7 @@ dispatch_data; struct ProcessVoxel { uint position; // xyz 7 bit packed, extra 11 bits for neighbors. - uint albedo; //rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neibhbours + uint albedo; //rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neighbours uint light; //rgbe8985 encoded total saved light, extra 2 bits for neighbours uint light_aniso; //55555 light anisotropy, extra 2 bits for neighbours //total neighbours: 26 @@ -155,7 +155,7 @@ layout(r16ui, set = 0, binding = 2) uniform restrict readonly uimage3D src_occlu #endif -layout(push_constant, binding = 0, std430) uniform Params { +layout(push_constant, std430) uniform Params { ivec3 scroll; int grid_size; diff --git a/servers/rendering/renderer_rd/shaders/sky.glsl b/servers/rendering/renderer_rd/shaders/environment/sky.glsl index d07a454ade..e825020a4e 100644 --- a/servers/rendering/renderer_rd/shaders/sky.glsl +++ b/servers/rendering/renderer_rd/shaders/environment/sky.glsl @@ -12,7 +12,7 @@ layout(location = 0) out vec2 uv_interp; -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, std430) uniform Params { mat3 orientation; vec4 projections[MAX_VIEWS]; vec4 position_multiplier; @@ -52,7 +52,7 @@ void main() { layout(location = 0) in vec2 uv_interp; -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, std430) uniform Params { mat3 orientation; vec4 projections[MAX_VIEWS]; vec4 position_multiplier; @@ -77,10 +77,10 @@ params; layout(set = 0, binding = 0) uniform sampler material_samplers[12]; -layout(set = 0, binding = 1, std430) restrict readonly buffer GlobalVariableData { +layout(set = 0, binding = 1, std430) restrict readonly buffer GlobalShaderUniformData { vec4 data[]; } -global_variables; +global_shader_uniforms; layout(set = 0, binding = 2, std140) uniform SceneData { bool volumetric_fog_enabled; @@ -180,12 +180,11 @@ void main() { cube_normal.x = (cube_normal.z * (-uv_interp.x - params.projections[ViewIndex].x)) / params.projections[ViewIndex].y; cube_normal.y = -(cube_normal.z * (-uv_interp.y - params.projections[ViewIndex].z)) / params.projections[ViewIndex].w; cube_normal = mat3(params.orientation) * cube_normal; - cube_normal.z = -cube_normal.z; cube_normal = normalize(cube_normal); vec2 uv = uv_interp * 0.5 + 0.5; - vec2 panorama_coords = vec2(atan(cube_normal.x, cube_normal.z), acos(cube_normal.y)); + vec2 panorama_coords = vec2(atan(cube_normal.x, -cube_normal.z), acos(cube_normal.y)); if (panorama_coords.x < 0.0) { panorama_coords.x += M_PI * 2.0; @@ -200,13 +199,11 @@ void main() { vec4 custom_fog = vec4(0.0); #ifdef USE_CUBEMAP_PASS - vec3 inverted_cube_normal = cube_normal; - inverted_cube_normal.z *= -1.0; #ifdef USES_HALF_RES_COLOR - half_res_color = texture(samplerCube(half_res, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), inverted_cube_normal) * params.luminance_multiplier; + half_res_color = texture(samplerCube(half_res, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), cube_normal) * params.luminance_multiplier; #endif #ifdef USES_QUARTER_RES_COLOR - quarter_res_color = texture(samplerCube(quarter_res, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), inverted_cube_normal) * params.luminance_multiplier; + quarter_res_color = texture(samplerCube(quarter_res, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), cube_normal) * params.luminance_multiplier; #endif #else #ifdef USES_HALF_RES_COLOR diff --git a/servers/rendering/renderer_rd/shaders/environment/volumetric_fog.glsl b/servers/rendering/renderer_rd/shaders/environment/volumetric_fog.glsl new file mode 100644 index 0000000000..4658afd02d --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/environment/volumetric_fog.glsl @@ -0,0 +1,309 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; + +#define SAMPLER_NEAREST_CLAMP 0 +#define SAMPLER_LINEAR_CLAMP 1 +#define SAMPLER_NEAREST_WITH_MIPMAPS_CLAMP 2 +#define SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP 3 +#define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_CLAMP 4 +#define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_CLAMP 5 +#define SAMPLER_NEAREST_REPEAT 6 +#define SAMPLER_LINEAR_REPEAT 7 +#define SAMPLER_NEAREST_WITH_MIPMAPS_REPEAT 8 +#define SAMPLER_LINEAR_WITH_MIPMAPS_REPEAT 9 +#define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_REPEAT 10 +#define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_REPEAT 11 + +#define DENSITY_SCALE 1024.0 + +#include "../cluster_data_inc.glsl" +#include "../light_data_inc.glsl" + +#define M_PI 3.14159265359 + +layout(set = 0, binding = 1) uniform sampler material_samplers[12]; + +layout(set = 0, binding = 2, std430) restrict readonly buffer GlobalShaderUniformData { + vec4 data[]; +} +global_shader_uniforms; + +layout(push_constant, std430) uniform Params { + vec3 position; + float pad; + + vec3 extents; + float pad2; + + ivec3 corner; + uint shape; + + mat4 transform; +} +params; + +#ifdef MOLTENVK_USED +layout(set = 1, binding = 1) volatile buffer emissive_only_map_buffer { + uint emissive_only_map[]; +}; +#else +layout(r32ui, set = 1, binding = 1) uniform volatile uimage3D emissive_only_map; +#endif + +layout(set = 1, binding = 2, std140) uniform SceneParams { + vec2 fog_frustum_size_begin; + vec2 fog_frustum_size_end; + + float fog_frustum_end; + float z_near; // + float z_far; // + float time; + + ivec3 fog_volume_size; + uint directional_light_count; // + + bool use_temporal_reprojection; + uint temporal_frame; + float detail_spread; + float temporal_blend; + + mat4 to_prev_view; + mat4 transform; +} +scene_params; + +#ifdef MOLTENVK_USED +layout(set = 1, binding = 3) volatile buffer density_only_map_buffer { + uint density_only_map[]; +}; +layout(set = 1, binding = 4) volatile buffer light_only_map_buffer { + uint light_only_map[]; +}; +#else +layout(r32ui, set = 1, binding = 3) uniform volatile uimage3D density_only_map; +layout(r32ui, set = 1, binding = 4) uniform volatile uimage3D light_only_map; +#endif + +#ifdef MATERIAL_UNIFORMS_USED +layout(set = 2, binding = 0, std140) uniform MaterialUniforms{ +#MATERIAL_UNIFORMS +} material; +#endif + +#GLOBALS + +float get_depth_at_pos(float cell_depth_size, int z) { + float d = float(z) * cell_depth_size + cell_depth_size * 0.5; //center of voxels + d = pow(d, scene_params.detail_spread); + return scene_params.fog_frustum_end * d; +} + +#define TEMPORAL_FRAMES 16 + +const vec3 halton_map[TEMPORAL_FRAMES] = vec3[]( + vec3(0.5, 0.33333333, 0.2), + vec3(0.25, 0.66666667, 0.4), + vec3(0.75, 0.11111111, 0.6), + vec3(0.125, 0.44444444, 0.8), + vec3(0.625, 0.77777778, 0.04), + vec3(0.375, 0.22222222, 0.24), + vec3(0.875, 0.55555556, 0.44), + vec3(0.0625, 0.88888889, 0.64), + vec3(0.5625, 0.03703704, 0.84), + vec3(0.3125, 0.37037037, 0.08), + vec3(0.8125, 0.7037037, 0.28), + vec3(0.1875, 0.14814815, 0.48), + vec3(0.6875, 0.48148148, 0.68), + vec3(0.4375, 0.81481481, 0.88), + vec3(0.9375, 0.25925926, 0.12), + vec3(0.03125, 0.59259259, 0.32)); + +void main() { + vec3 fog_cell_size = 1.0 / vec3(scene_params.fog_volume_size); + + ivec3 pos = ivec3(gl_GlobalInvocationID.xyz) + params.corner; + if (any(greaterThanEqual(pos, scene_params.fog_volume_size))) { + return; //do not compute + } +#ifdef MOLTENVK_USED + uint lpos = pos.z * scene_params.fog_volume_size.x * scene_params.fog_volume_size.y + pos.y * scene_params.fog_volume_size.x + pos.x; +#endif + + vec3 posf = vec3(pos); + + vec3 fog_unit_pos = posf * fog_cell_size + fog_cell_size * 0.5; //center of voxels + fog_unit_pos.z = pow(fog_unit_pos.z, scene_params.detail_spread); + + vec3 view_pos; + view_pos.xy = (fog_unit_pos.xy * 2.0 - 1.0) * mix(scene_params.fog_frustum_size_begin, scene_params.fog_frustum_size_end, vec2(fog_unit_pos.z)); + view_pos.z = -scene_params.fog_frustum_end * fog_unit_pos.z; + view_pos.y = -view_pos.y; + + if (scene_params.use_temporal_reprojection) { + vec3 prev_view = (scene_params.to_prev_view * vec4(view_pos, 1.0)).xyz; + //undo transform into prev view + prev_view.y = -prev_view.y; + //z back to unit size + prev_view.z /= -scene_params.fog_frustum_end; + //xy back to unit size + prev_view.xy /= mix(scene_params.fog_frustum_size_begin, scene_params.fog_frustum_size_end, vec2(prev_view.z)); + prev_view.xy = prev_view.xy * 0.5 + 0.5; + //z back to unspread value + prev_view.z = pow(prev_view.z, 1.0 / scene_params.detail_spread); + + if (all(greaterThan(prev_view, vec3(0.0))) && all(lessThan(prev_view, vec3(1.0)))) { + //reprojectinon fits + // Since we can reproject, now we must jitter the current view pos. + // This is done here because cells that can't reproject should not jitter. + + fog_unit_pos = posf * fog_cell_size + fog_cell_size * halton_map[scene_params.temporal_frame]; //center of voxels, offset by halton table + fog_unit_pos.z = pow(fog_unit_pos.z, scene_params.detail_spread); + + view_pos.xy = (fog_unit_pos.xy * 2.0 - 1.0) * mix(scene_params.fog_frustum_size_begin, scene_params.fog_frustum_size_end, vec2(fog_unit_pos.z)); + view_pos.z = -scene_params.fog_frustum_end * fog_unit_pos.z; + view_pos.y = -view_pos.y; + } + } + + float density = 0.0; + vec3 emission = vec3(0.0); + vec3 albedo = vec3(0.0); + + float cell_depth_size = abs(view_pos.z - get_depth_at_pos(fog_cell_size.z, pos.z + 1)); + + vec4 world = scene_params.transform * vec4(view_pos, 1.0); + world.xyz /= world.w; + + vec3 uvw = fog_unit_pos; + + vec4 local_pos = params.transform * world; + local_pos.xyz /= local_pos.w; + + float sdf = -1.0; + if (params.shape == 0) { + // Ellipsoid + // https://www.shadertoy.com/view/tdS3DG + float k0 = length(local_pos.xyz / params.extents); + float k1 = length(local_pos.xyz / (params.extents * params.extents)); + sdf = k0 * (k0 - 1.0) / k1; + } else if (params.shape == 1) { + // Cone + // https://iquilezles.org/www/articles/distfunctions/distfunctions.htm + + // Compute the cone angle automatically to fit within the volume's extents. + float inv_height = 1.0 / max(0.001, params.extents.y); + float radius = 1.0 / max(0.001, (min(params.extents.x, params.extents.z) * 0.5)); + float hypotenuse = sqrt(radius * radius + inv_height * inv_height); + float rsin = radius / hypotenuse; + float rcos = inv_height / hypotenuse; + vec2 c = vec2(rsin, rcos); + + float q = length(local_pos.xz); + sdf = max(dot(c, vec2(q, local_pos.y - params.extents.y)), -params.extents.y - local_pos.y); + } else if (params.shape == 2) { + // Cylinder + // https://iquilezles.org/www/articles/distfunctions/distfunctions.htm + vec2 d = abs(vec2(length(local_pos.xz), local_pos.y)) - vec2(min(params.extents.x, params.extents.z), params.extents.y); + sdf = min(max(d.x, d.y), 0.0) + length(max(d, 0.0)); + } else if (params.shape == 3) { + // Box + // https://iquilezles.org/www/articles/distfunctions/distfunctions.htm + vec3 q = abs(local_pos.xyz) - params.extents; + sdf = length(max(q, 0.0)) + min(max(q.x, max(q.y, q.z)), 0.0); + } + + float cull_mask = 1.0; //used to cull cells that do not contribute + if (params.shape <= 3) { +#ifndef SDF_USED + cull_mask = 1.0 - smoothstep(-0.1, 0.0, sdf); +#endif + uvw = clamp((local_pos.xyz + params.extents) / (2.0 * params.extents), 0.0, 1.0); + } + + if (cull_mask > 0.0) { + { +#CODE : FOG + } + +#ifdef DENSITY_USED + density *= cull_mask; + if (abs(density) > 0.001) { + int final_density = int(density * DENSITY_SCALE); +#ifdef MOLTENVK_USED + atomicAdd(density_only_map[lpos], uint(final_density)); +#else + imageAtomicAdd(density_only_map, pos, uint(final_density)); +#endif + +#ifdef EMISSION_USED + { + emission *= clamp(density, 0.0, 1.0); + emission = clamp(emission, vec3(0.0), vec3(4.0)); + // Scale to fit into R11G11B10 with a range of 0-4 + uvec3 emission_u = uvec3(emission.r * 511.0, emission.g * 511.0, emission.b * 255.0); + // R and G have 11 bits each and B has 10. Then pack them into a 32 bit uint + uint final_emission = emission_u.r << 21 | emission_u.g << 10 | emission_u.b; +#ifdef MOLTENVK_USED + uint prev_emission = atomicAdd(emissive_only_map[lpos], final_emission); +#else + uint prev_emission = imageAtomicAdd(emissive_only_map, pos, final_emission); +#endif + + // Adding can lead to colors overflowing, so validate + uvec3 prev_emission_u = uvec3(prev_emission >> 21, (prev_emission << 11) >> 21, prev_emission % 1024); + uint add_emission = final_emission + prev_emission; + uvec3 add_emission_u = uvec3(add_emission >> 21, (add_emission << 11) >> 21, add_emission % 1024); + + bvec3 overflowing = lessThan(add_emission_u, prev_emission_u + emission_u); + + if (any(overflowing)) { + uvec3 overflow_factor = mix(uvec3(0), uvec3(2047 << 21, 2047 << 10, 1023), overflowing); + uint force_max = overflow_factor.r | overflow_factor.g | overflow_factor.b; +#ifdef MOLTENVK_USED + atomicOr(emissive_only_map[lpos], force_max); +#else + imageAtomicOr(emissive_only_map, pos, force_max); +#endif + } + } +#endif +#ifdef ALBEDO_USED + { + vec3 scattering = albedo * clamp(density, 0.0, 1.0); + scattering = clamp(scattering, vec3(0.0), vec3(1.0)); + uvec3 scattering_u = uvec3(scattering.r * 2047.0, scattering.g * 2047.0, scattering.b * 1023.0); + // R and G have 11 bits each and B has 10. Then pack them into a 32 bit uint + uint final_scattering = scattering_u.r << 21 | scattering_u.g << 10 | scattering_u.b; +#ifdef MOLTENVK_USED + uint prev_scattering = atomicAdd(light_only_map[lpos], final_scattering); +#else + uint prev_scattering = imageAtomicAdd(light_only_map, pos, final_scattering); +#endif + + // Adding can lead to colors overflowing, so validate + uvec3 prev_scattering_u = uvec3(prev_scattering >> 21, (prev_scattering << 11) >> 21, prev_scattering % 1024); + uint add_scattering = final_scattering + prev_scattering; + uvec3 add_scattering_u = uvec3(add_scattering >> 21, (add_scattering << 11) >> 21, add_scattering % 1024); + + bvec3 overflowing = lessThan(add_scattering_u, prev_scattering_u + scattering_u); + + if (any(overflowing)) { + uvec3 overflow_factor = mix(uvec3(0), uvec3(2047 << 21, 2047 << 10, 1023), overflowing); + uint force_max = overflow_factor.r | overflow_factor.g | overflow_factor.b; +#ifdef MOLTENVK_USED + atomicOr(light_only_map[lpos], force_max); +#else + imageAtomicOr(light_only_map, pos, force_max); +#endif + } + } +#endif // ALBEDO_USED + } +#endif // DENSITY_USED + } +} diff --git a/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl b/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl new file mode 100644 index 0000000000..6f79b9e771 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl @@ -0,0 +1,782 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +/* Do not use subgroups here, seems there is not much advantage and causes glitches +#if defined(has_GL_KHR_shader_subgroup_ballot) && defined(has_GL_KHR_shader_subgroup_arithmetic) +#extension GL_KHR_shader_subgroup_ballot: enable +#extension GL_KHR_shader_subgroup_arithmetic: enable + +#define USE_SUBGROUPS +#endif +*/ + +#ifdef MODE_DENSITY +layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; +#else +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; +#endif + +#include "../cluster_data_inc.glsl" +#include "../light_data_inc.glsl" + +#define M_PI 3.14159265359 + +#define DENSITY_SCALE 1024.0 + +layout(set = 0, binding = 1) uniform texture2D shadow_atlas; +layout(set = 0, binding = 2) uniform texture2D directional_shadow_atlas; + +layout(set = 0, binding = 3, std430) restrict readonly buffer OmniLights { + LightData data[]; +} +omni_lights; + +layout(set = 0, binding = 4, std430) restrict readonly buffer SpotLights { + LightData data[]; +} +spot_lights; + +layout(set = 0, binding = 5, std140) uniform DirectionalLights { + DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS]; +} +directional_lights; + +layout(set = 0, binding = 6, std430) buffer restrict readonly ClusterBuffer { + uint data[]; +} +cluster_buffer; + +layout(set = 0, binding = 7) uniform sampler linear_sampler; + +#ifdef MODE_DENSITY +layout(rgba16f, set = 0, binding = 8) uniform restrict writeonly image3D density_map; +#endif + +#ifdef MODE_FOG +layout(rgba16f, set = 0, binding = 8) uniform restrict readonly image3D density_map; +layout(rgba16f, set = 0, binding = 9) uniform restrict writeonly image3D fog_map; +#endif + +#ifdef MODE_COPY +layout(rgba16f, set = 0, binding = 8) uniform restrict readonly image3D source_map; +layout(rgba16f, set = 0, binding = 9) uniform restrict writeonly image3D dest_map; +#endif + +#ifdef MODE_FILTER +layout(rgba16f, set = 0, binding = 8) uniform restrict readonly image3D source_map; +layout(rgba16f, set = 0, binding = 9) uniform restrict writeonly image3D dest_map; +#endif + +layout(set = 0, binding = 10) uniform sampler shadow_sampler; + +#define MAX_VOXEL_GI_INSTANCES 8 + +struct VoxelGIData { + mat4 xform; // 64 - 64 + + vec3 bounds; // 12 - 76 + float dynamic_range; // 4 - 80 + + float bias; // 4 - 84 + float normal_bias; // 4 - 88 + bool blend_ambient; // 4 - 92 + uint mipmaps; // 4 - 96 +}; + +layout(set = 0, binding = 11, std140) uniform VoxelGIs { + VoxelGIData data[MAX_VOXEL_GI_INSTANCES]; +} +voxel_gi_instances; + +layout(set = 0, binding = 12) uniform texture3D voxel_gi_textures[MAX_VOXEL_GI_INSTANCES]; + +layout(set = 0, binding = 13) uniform sampler linear_sampler_with_mipmaps; + +#ifdef ENABLE_SDFGI + +// SDFGI Integration on set 1 +#define SDFGI_MAX_CASCADES 8 + +struct SDFVoxelGICascadeData { + vec3 position; + float to_probe; + ivec3 probe_world_offset; + float to_cell; // 1/bounds * grid_size +}; + +layout(set = 1, binding = 0, std140) uniform SDFGI { + vec3 grid_size; + uint max_cascades; + + bool use_occlusion; + int probe_axis_size; + float probe_to_uvw; + float normal_bias; + + vec3 lightprobe_tex_pixel_size; + float energy; + + vec3 lightprobe_uv_offset; + float y_mult; + + vec3 occlusion_clamp; + uint pad3; + + vec3 occlusion_renormalize; + uint pad4; + + vec3 cascade_probe_size; + uint pad5; + + SDFVoxelGICascadeData cascades[SDFGI_MAX_CASCADES]; +} +sdfgi; + +layout(set = 1, binding = 1) uniform texture2DArray sdfgi_ambient_texture; + +layout(set = 1, binding = 2) uniform texture3D sdfgi_occlusion_texture; + +#endif //SDFGI + +layout(set = 0, binding = 14, std140) uniform Params { + vec2 fog_frustum_size_begin; + vec2 fog_frustum_size_end; + + float fog_frustum_end; + float ambient_inject; + float z_far; + int filter_axis; + + vec3 ambient_color; + float sky_contribution; + + ivec3 fog_volume_size; + uint directional_light_count; + + vec3 base_emission; + float base_density; + + vec3 base_scattering; + float phase_g; + + float detail_spread; + float gi_inject; + uint max_voxel_gi_instances; + uint cluster_type_size; + + vec2 screen_size; + uint cluster_shift; + uint cluster_width; + + uint max_cluster_element_count_div_32; + bool use_temporal_reprojection; + uint temporal_frame; + float temporal_blend; + + mat3x4 cam_rotation; + mat4 to_prev_view; + + mat3 radiance_inverse_xform; +} +params; +#ifndef MODE_COPY +layout(set = 0, binding = 15) uniform texture3D prev_density_texture; + +#ifdef MOLTENVK_USED +layout(set = 0, binding = 16) buffer density_only_map_buffer { + uint density_only_map[]; +}; +layout(set = 0, binding = 17) buffer light_only_map_buffer { + uint light_only_map[]; +}; +layout(set = 0, binding = 18) buffer emissive_only_map_buffer { + uint emissive_only_map[]; +}; +#else +layout(r32ui, set = 0, binding = 16) uniform uimage3D density_only_map; +layout(r32ui, set = 0, binding = 17) uniform uimage3D light_only_map; +layout(r32ui, set = 0, binding = 18) uniform uimage3D emissive_only_map; +#endif + +#ifdef USE_RADIANCE_CUBEMAP_ARRAY +layout(set = 0, binding = 19) uniform textureCubeArray sky_texture; +#else +layout(set = 0, binding = 19) uniform textureCube sky_texture; +#endif +#endif // MODE_COPY + +float get_depth_at_pos(float cell_depth_size, int z) { + float d = float(z) * cell_depth_size + cell_depth_size * 0.5; //center of voxels + d = pow(d, params.detail_spread); + return params.fog_frustum_end * d; +} + +vec3 hash3f(uvec3 x) { + x = ((x >> 16) ^ x) * 0x45d9f3b; + x = ((x >> 16) ^ x) * 0x45d9f3b; + x = (x >> 16) ^ x; + return vec3(x & 0xFFFFF) / vec3(float(0xFFFFF)); +} + +float get_omni_attenuation(float dist, float inv_range, float decay) { + float nd = dist * inv_range; + nd *= nd; + nd *= nd; // nd^4 + nd = max(1.0 - nd, 0.0); + nd *= nd; // nd^2 + return nd * pow(max(dist, 0.0001), -decay); +} + +void cluster_get_item_range(uint p_offset, out uint item_min, out uint item_max, out uint item_from, out uint item_to) { + uint item_min_max = cluster_buffer.data[p_offset]; + item_min = item_min_max & 0xFFFF; + item_max = item_min_max >> 16; + + item_from = item_min >> 5; + item_to = (item_max == 0) ? 0 : ((item_max - 1) >> 5) + 1; //side effect of how it is stored, as item_max 0 means no elements +} + +uint cluster_get_range_clip_mask(uint i, uint z_min, uint z_max) { + int local_min = clamp(int(z_min) - int(i) * 32, 0, 31); + int mask_width = min(int(z_max) - int(z_min), 32 - local_min); + return bitfieldInsert(uint(0), uint(0xFFFFFFFF), local_min, mask_width); +} + +float henyey_greenstein(float cos_theta, float g) { + const float k = 0.0795774715459; // 1 / (4 * PI) + return k * (1.0 - g * g) / (pow(1.0 + g * g - 2.0 * g * cos_theta, 1.5)); +} + +#define TEMPORAL_FRAMES 16 + +const vec3 halton_map[TEMPORAL_FRAMES] = vec3[]( + vec3(0.5, 0.33333333, 0.2), + vec3(0.25, 0.66666667, 0.4), + vec3(0.75, 0.11111111, 0.6), + vec3(0.125, 0.44444444, 0.8), + vec3(0.625, 0.77777778, 0.04), + vec3(0.375, 0.22222222, 0.24), + vec3(0.875, 0.55555556, 0.44), + vec3(0.0625, 0.88888889, 0.64), + vec3(0.5625, 0.03703704, 0.84), + vec3(0.3125, 0.37037037, 0.08), + vec3(0.8125, 0.7037037, 0.28), + vec3(0.1875, 0.14814815, 0.48), + vec3(0.6875, 0.48148148, 0.68), + vec3(0.4375, 0.81481481, 0.88), + vec3(0.9375, 0.25925926, 0.12), + vec3(0.03125, 0.59259259, 0.32)); + +void main() { + vec3 fog_cell_size = 1.0 / vec3(params.fog_volume_size); + +#ifdef MODE_DENSITY + + ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + if (any(greaterThanEqual(pos, params.fog_volume_size))) { + return; //do not compute + } +#ifdef MOLTENVK_USED + uint lpos = pos.z * params.fog_volume_size.x * params.fog_volume_size.y + pos.y * params.fog_volume_size.x + pos.x; +#endif + + vec3 posf = vec3(pos); + + //posf += mix(vec3(0.0),vec3(1.0),0.3) * hash3f(uvec3(pos)) * 2.0 - 1.0; + + vec3 fog_unit_pos = posf * fog_cell_size + fog_cell_size * 0.5; //center of voxels + + uvec2 screen_pos = uvec2(fog_unit_pos.xy * params.screen_size); + uvec2 cluster_pos = screen_pos >> params.cluster_shift; + uint cluster_offset = (params.cluster_width * cluster_pos.y + cluster_pos.x) * (params.max_cluster_element_count_div_32 + 32); + //positions in screen are too spread apart, no hopes for optimizing with subgroups + + fog_unit_pos.z = pow(fog_unit_pos.z, params.detail_spread); + + vec3 view_pos; + view_pos.xy = (fog_unit_pos.xy * 2.0 - 1.0) * mix(params.fog_frustum_size_begin, params.fog_frustum_size_end, vec2(fog_unit_pos.z)); + view_pos.z = -params.fog_frustum_end * fog_unit_pos.z; + view_pos.y = -view_pos.y; + + vec4 reprojected_density = vec4(0.0); + float reproject_amount = 0.0; + + if (params.use_temporal_reprojection) { + vec3 prev_view = (params.to_prev_view * vec4(view_pos, 1.0)).xyz; + //undo transform into prev view + prev_view.y = -prev_view.y; + //z back to unit size + prev_view.z /= -params.fog_frustum_end; + //xy back to unit size + prev_view.xy /= mix(params.fog_frustum_size_begin, params.fog_frustum_size_end, vec2(prev_view.z)); + prev_view.xy = prev_view.xy * 0.5 + 0.5; + //z back to unspread value + prev_view.z = pow(prev_view.z, 1.0 / params.detail_spread); + + if (all(greaterThan(prev_view, vec3(0.0))) && all(lessThan(prev_view, vec3(1.0)))) { + //reprojectinon fits + + reprojected_density = textureLod(sampler3D(prev_density_texture, linear_sampler), prev_view, 0.0); + reproject_amount = params.temporal_blend; + + // Since we can reproject, now we must jitter the current view pos. + // This is done here because cells that can't reproject should not jitter. + + fog_unit_pos = posf * fog_cell_size + fog_cell_size * halton_map[params.temporal_frame]; //center of voxels, offset by halton table + + screen_pos = uvec2(fog_unit_pos.xy * params.screen_size); + cluster_pos = screen_pos >> params.cluster_shift; + cluster_offset = (params.cluster_width * cluster_pos.y + cluster_pos.x) * (params.max_cluster_element_count_div_32 + 32); + //positions in screen are too spread apart, no hopes for optimizing with subgroups + + fog_unit_pos.z = pow(fog_unit_pos.z, params.detail_spread); + + view_pos.xy = (fog_unit_pos.xy * 2.0 - 1.0) * mix(params.fog_frustum_size_begin, params.fog_frustum_size_end, vec2(fog_unit_pos.z)); + view_pos.z = -params.fog_frustum_end * fog_unit_pos.z; + view_pos.y = -view_pos.y; + } + } + + uint cluster_z = uint(clamp((abs(view_pos.z) / params.z_far) * 32.0, 0.0, 31.0)); + + vec3 total_light = vec3(0.0); + + float total_density = params.base_density; +#ifdef MOLTENVK_USED + uint local_density = density_only_map[lpos]; +#else + uint local_density = imageLoad(density_only_map, pos).x; +#endif + + total_density += float(int(local_density)) / DENSITY_SCALE; + total_density = max(0.0, total_density); + +#ifdef MOLTENVK_USED + uint scattering_u = light_only_map[lpos]; +#else + uint scattering_u = imageLoad(light_only_map, pos).x; +#endif + vec3 scattering = vec3(scattering_u >> 21, (scattering_u << 11) >> 21, scattering_u % 1024) / vec3(2047.0, 2047.0, 1023.0); + scattering += params.base_scattering * params.base_density; + +#ifdef MOLTENVK_USED + uint emission_u = emissive_only_map[lpos]; +#else + uint emission_u = imageLoad(emissive_only_map, pos).x; +#endif + vec3 emission = vec3(emission_u >> 21, (emission_u << 11) >> 21, emission_u % 1024) / vec3(511.0, 511.0, 255.0); + emission += params.base_emission * params.base_density; + + float cell_depth_size = abs(view_pos.z - get_depth_at_pos(fog_cell_size.z, pos.z + 1)); + //compute directional lights + + if (total_density > 0.001) { + for (uint i = 0; i < params.directional_light_count; i++) { + vec3 shadow_attenuation = vec3(1.0); + + if (directional_lights.data[i].shadow_opacity > 0.001) { + float depth_z = -view_pos.z; + + vec4 pssm_coord; + vec3 light_dir = directional_lights.data[i].direction; + vec4 v = vec4(view_pos, 1.0); + float z_range; + + if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { + pssm_coord = (directional_lights.data[i].shadow_matrix1 * v); + pssm_coord /= pssm_coord.w; + z_range = directional_lights.data[i].shadow_z_range.x; + + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { + pssm_coord = (directional_lights.data[i].shadow_matrix2 * v); + pssm_coord /= pssm_coord.w; + z_range = directional_lights.data[i].shadow_z_range.y; + + } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { + pssm_coord = (directional_lights.data[i].shadow_matrix3 * v); + pssm_coord /= pssm_coord.w; + z_range = directional_lights.data[i].shadow_z_range.z; + + } else { + pssm_coord = (directional_lights.data[i].shadow_matrix4 * v); + pssm_coord /= pssm_coord.w; + z_range = directional_lights.data[i].shadow_z_range.w; + } + + float depth = texture(sampler2D(directional_shadow_atlas, linear_sampler), pssm_coord.xy).r; + float shadow = exp(min(0.0, (depth - pssm_coord.z)) * z_range * directional_lights.data[i].shadow_volumetric_fog_fade); + + shadow = mix(shadow, 1.0, smoothstep(directional_lights.data[i].fade_from, directional_lights.data[i].fade_to, view_pos.z)); //done with negative values for performance + + shadow_attenuation = mix(vec3(0.0), vec3(1.0), shadow); + } + + total_light += shadow_attenuation * directional_lights.data[i].color * directional_lights.data[i].energy * henyey_greenstein(dot(normalize(view_pos), normalize(directional_lights.data[i].direction)), params.phase_g); + } + + // Compute light from sky + if (params.ambient_inject > 0.0) { + vec3 isotropic = vec3(0.0); + vec3 anisotropic = vec3(0.0); + if (params.sky_contribution > 0.0) { + float mip_bias = 2.0 + total_density * (MAX_SKY_LOD - 2.0); // Not physically based, but looks nice + vec3 scatter_direction = (params.radiance_inverse_xform * normalize(view_pos)) * sign(params.phase_g); +#ifdef USE_RADIANCE_CUBEMAP_ARRAY + isotropic = texture(samplerCubeArray(sky_texture, linear_sampler_with_mipmaps), vec4(0.0, 1.0, 0.0, mip_bias)).rgb; + anisotropic = texture(samplerCubeArray(sky_texture, linear_sampler_with_mipmaps), vec4(scatter_direction, mip_bias)).rgb; +#else + isotropic = textureLod(samplerCube(sky_texture, linear_sampler_with_mipmaps), vec3(0.0, 1.0, 0.0), mip_bias).rgb; + anisotropic = textureLod(samplerCube(sky_texture, linear_sampler_with_mipmaps), vec3(scatter_direction), mip_bias).rgb; +#endif //USE_RADIANCE_CUBEMAP_ARRAY + } + + total_light += mix(params.ambient_color, mix(isotropic, anisotropic, abs(params.phase_g)), params.sky_contribution) * params.ambient_inject; + } + + //compute lights from cluster + + { //omni lights + + uint cluster_omni_offset = cluster_offset; + + uint item_min; + uint item_max; + uint item_from; + uint item_to; + + cluster_get_item_range(cluster_omni_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); + +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif + + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_omni_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif + + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + uint light_index = 32 * i + bit; + + //if (!bool(omni_omni_lights.data[light_index].mask & draw_call.layer_mask)) { + // continue; //not masked + //} + + vec3 light_pos = omni_lights.data[light_index].position; + float d = distance(omni_lights.data[light_index].position, view_pos); + float shadow_attenuation = 1.0; + + if (d * omni_lights.data[light_index].inv_radius < 1.0) { + float attenuation = get_omni_attenuation(d, omni_lights.data[light_index].inv_radius, omni_lights.data[light_index].attenuation); + + vec3 light = omni_lights.data[light_index].color; + + if (omni_lights.data[light_index].shadow_opacity > 0.001) { + //has shadow + vec4 uv_rect = omni_lights.data[light_index].atlas_rect; + vec2 flip_offset = omni_lights.data[light_index].direction.xy; + + vec3 local_vert = (omni_lights.data[light_index].shadow_matrix * vec4(view_pos, 1.0)).xyz; + + float shadow_len = length(local_vert); //need to remember shadow len from here + vec3 shadow_sample = normalize(local_vert); + + if (shadow_sample.z >= 0.0) { + uv_rect.xy += flip_offset; + } + + shadow_sample.z = 1.0 + abs(shadow_sample.z); + vec3 pos = vec3(shadow_sample.xy / shadow_sample.z, shadow_len - omni_lights.data[light_index].shadow_bias); + pos.z *= omni_lights.data[light_index].inv_radius; + + pos.xy = pos.xy * 0.5 + 0.5; + pos.xy = uv_rect.xy + pos.xy * uv_rect.zw; + + float depth = texture(sampler2D(shadow_atlas, linear_sampler), pos.xy).r; + + shadow_attenuation = exp(min(0.0, (depth - pos.z)) / omni_lights.data[light_index].inv_radius * omni_lights.data[light_index].shadow_volumetric_fog_fade); + } + total_light += light * attenuation * shadow_attenuation * henyey_greenstein(dot(normalize(light_pos - view_pos), normalize(view_pos)), params.phase_g); + } + } + } + } + + { //spot lights + + uint cluster_spot_offset = cluster_offset + params.cluster_type_size; + + uint item_min; + uint item_max; + uint item_from; + uint item_to; + + cluster_get_item_range(cluster_spot_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); + +#ifdef USE_SUBGROUPS + item_from = subgroupBroadcastFirst(subgroupMin(item_from)); + item_to = subgroupBroadcastFirst(subgroupMax(item_to)); +#endif + + for (uint i = item_from; i < item_to; i++) { + uint mask = cluster_buffer.data[cluster_spot_offset + i]; + mask &= cluster_get_range_clip_mask(i, item_min, item_max); +#ifdef USE_SUBGROUPS + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); +#else + uint merged_mask = mask; +#endif + + while (merged_mask != 0) { + uint bit = findMSB(merged_mask); + merged_mask &= ~(1 << bit); +#ifdef USE_SUBGROUPS + if (((1 << bit) & mask) == 0) { //do not process if not originally here + continue; + } +#endif + + //if (!bool(omni_lights.data[light_index].mask & draw_call.layer_mask)) { + // continue; //not masked + //} + + uint light_index = 32 * i + bit; + + vec3 light_pos = spot_lights.data[light_index].position; + vec3 light_rel_vec = spot_lights.data[light_index].position - view_pos; + float d = length(light_rel_vec); + float shadow_attenuation = 1.0; + + if (d * spot_lights.data[light_index].inv_radius < 1.0) { + float attenuation = get_omni_attenuation(d, spot_lights.data[light_index].inv_radius, spot_lights.data[light_index].attenuation); + + vec3 spot_dir = spot_lights.data[light_index].direction; + float scos = max(dot(-normalize(light_rel_vec), spot_dir), spot_lights.data[light_index].cone_angle); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - spot_lights.data[light_index].cone_angle)); + attenuation *= 1.0 - pow(spot_rim, spot_lights.data[light_index].cone_attenuation); + + vec3 light = spot_lights.data[light_index].color; + + if (spot_lights.data[light_index].shadow_opacity > 0.001) { + //has shadow + vec4 uv_rect = spot_lights.data[light_index].atlas_rect; + vec2 flip_offset = spot_lights.data[light_index].direction.xy; + + vec3 local_vert = (spot_lights.data[light_index].shadow_matrix * vec4(view_pos, 1.0)).xyz; + + float shadow_len = length(local_vert); //need to remember shadow len from here + vec3 shadow_sample = normalize(local_vert); + + if (shadow_sample.z >= 0.0) { + uv_rect.xy += flip_offset; + } + + shadow_sample.z = 1.0 + abs(shadow_sample.z); + vec3 pos = vec3(shadow_sample.xy / shadow_sample.z, shadow_len - spot_lights.data[light_index].shadow_bias); + pos.z *= spot_lights.data[light_index].inv_radius; + + pos.xy = pos.xy * 0.5 + 0.5; + pos.xy = uv_rect.xy + pos.xy * uv_rect.zw; + + float depth = texture(sampler2D(shadow_atlas, linear_sampler), pos.xy).r; + + shadow_attenuation = exp(min(0.0, (depth - pos.z)) / spot_lights.data[light_index].inv_radius * spot_lights.data[light_index].shadow_volumetric_fog_fade); + } + total_light += light * attenuation * shadow_attenuation * henyey_greenstein(dot(normalize(light_rel_vec), normalize(view_pos)), params.phase_g); + } + } + } + } + + vec3 world_pos = mat3(params.cam_rotation) * view_pos; + + for (uint i = 0; i < params.max_voxel_gi_instances; i++) { + vec3 position = (voxel_gi_instances.data[i].xform * vec4(world_pos, 1.0)).xyz; + + //this causes corrupted pixels, i have no idea why.. + if (all(bvec2(all(greaterThanEqual(position, vec3(0.0))), all(lessThan(position, voxel_gi_instances.data[i].bounds))))) { + position /= voxel_gi_instances.data[i].bounds; + + vec4 light = vec4(0.0); + for (uint j = 0; j < voxel_gi_instances.data[i].mipmaps; j++) { + vec4 slight = textureLod(sampler3D(voxel_gi_textures[i], linear_sampler_with_mipmaps), position, float(j)); + float a = (1.0 - light.a); + light += a * slight; + } + + light.rgb *= voxel_gi_instances.data[i].dynamic_range * params.gi_inject; + + total_light += light.rgb; + } + } + + //sdfgi +#ifdef ENABLE_SDFGI + + { + float blend = -1.0; + vec3 ambient_total = vec3(0.0); + + for (uint i = 0; i < sdfgi.max_cascades; i++) { + vec3 cascade_pos = (world_pos - sdfgi.cascades[i].position) * sdfgi.cascades[i].to_probe; + + if (any(lessThan(cascade_pos, vec3(0.0))) || any(greaterThanEqual(cascade_pos, sdfgi.cascade_probe_size))) { + continue; //skip cascade + } + + vec3 base_pos = floor(cascade_pos); + ivec3 probe_base_pos = ivec3(base_pos); + + vec4 ambient_accum = vec4(0.0); + + ivec3 tex_pos = ivec3(probe_base_pos.xy, int(i)); + tex_pos.x += probe_base_pos.z * sdfgi.probe_axis_size; + + for (uint j = 0; j < 8; j++) { + ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1); + ivec3 probe_posi = probe_base_pos; + probe_posi += offset; + + // Compute weight + + vec3 probe_pos = vec3(probe_posi); + vec3 probe_to_pos = cascade_pos - probe_pos; + + vec3 trilinear = vec3(1.0) - abs(probe_to_pos); + float weight = trilinear.x * trilinear.y * trilinear.z; + + // Compute lightprobe occlusion + + if (sdfgi.use_occlusion) { + ivec3 occ_indexv = abs((sdfgi.cascades[i].probe_world_offset + probe_posi) & ivec3(1, 1, 1)) * ivec3(1, 2, 4); + vec4 occ_mask = mix(vec4(0.0), vec4(1.0), equal(ivec4(occ_indexv.x | occ_indexv.y), ivec4(0, 1, 2, 3))); + + vec3 occ_pos = clamp(cascade_pos, probe_pos - sdfgi.occlusion_clamp, probe_pos + sdfgi.occlusion_clamp) * sdfgi.probe_to_uvw; + occ_pos.z += float(i); + if (occ_indexv.z != 0) { //z bit is on, means index is >=4, so make it switch to the other half of textures + occ_pos.x += 1.0; + } + + occ_pos *= sdfgi.occlusion_renormalize; + float occlusion = dot(textureLod(sampler3D(sdfgi_occlusion_texture, linear_sampler), occ_pos, 0.0), occ_mask); + + weight *= max(occlusion, 0.01); + } + + // Compute ambient texture position + + ivec3 uvw = tex_pos; + uvw.xy += offset.xy; + uvw.x += offset.z * sdfgi.probe_axis_size; + + vec3 ambient = texelFetch(sampler2DArray(sdfgi_ambient_texture, linear_sampler), uvw, 0).rgb; + + ambient_accum.rgb += ambient * weight; + ambient_accum.a += weight; + } + + if (ambient_accum.a > 0) { + ambient_accum.rgb /= ambient_accum.a; + } + ambient_total = ambient_accum.rgb; + break; + } + + total_light += ambient_total * params.gi_inject; + } + +#endif + } + + vec4 final_density = vec4(total_light * scattering + emission, total_density); + + final_density = mix(final_density, reprojected_density, reproject_amount); + + imageStore(density_map, pos, final_density); +#ifdef MOLTENVK_USED + density_only_map[lpos] = 0; + light_only_map[lpos] = 0; + emissive_only_map[lpos] = 0; +#else + imageStore(density_only_map, pos, uvec4(0)); + imageStore(light_only_map, pos, uvec4(0)); + imageStore(emissive_only_map, pos, uvec4(0)); +#endif +#endif + +#ifdef MODE_FOG + + ivec3 pos = ivec3(gl_GlobalInvocationID.xy, 0); + + if (any(greaterThanEqual(pos, params.fog_volume_size))) { + return; //do not compute + } + + vec4 fog_accum = vec4(0.0, 0.0, 0.0, 1.0); + float prev_z = 0.0; + + for (int i = 0; i < params.fog_volume_size.z; i++) { + //compute fog position + ivec3 fog_pos = pos + ivec3(0, 0, i); + //get fog value + vec4 fog = imageLoad(density_map, fog_pos); + + //get depth at cell pos + float z = get_depth_at_pos(fog_cell_size.z, i); + //get distance from previous pos + float d = abs(prev_z - z); + //compute transmittance using beer's law + float transmittance = exp(-d * fog.a); + + fog_accum.rgb += ((fog.rgb - fog.rgb * transmittance) / max(fog.a, 0.00001)) * fog_accum.a; + fog_accum.a *= transmittance; + + prev_z = z; + + imageStore(fog_map, fog_pos, vec4(fog_accum.rgb, 1.0 - fog_accum.a)); + } + +#endif + +#ifdef MODE_FILTER + + ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + + const float gauss[7] = float[](0.071303, 0.131514, 0.189879, 0.214607, 0.189879, 0.131514, 0.071303); + + const ivec3 filter_dir[3] = ivec3[](ivec3(1, 0, 0), ivec3(0, 1, 0), ivec3(0, 0, 1)); + ivec3 offset = filter_dir[params.filter_axis]; + + vec4 accum = vec4(0.0); + for (int i = -3; i <= 3; i++) { + accum += imageLoad(source_map, clamp(pos + offset * i, ivec3(0), params.fog_volume_size - ivec3(1))) * gauss[i + 3]; + } + + imageStore(dest_map, pos, accum); + +#endif +#ifdef MODE_COPY + ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + if (any(greaterThanEqual(pos, params.fog_volume_size))) { + return; //do not compute + } + + imageStore(dest_map, pos, imageLoad(source_map, pos)); + +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/voxel_gi.glsl b/servers/rendering/renderer_rd/shaders/environment/voxel_gi.glsl index 779f04ed35..577c6d0cd0 100644 --- a/servers/rendering/renderer_rd/shaders/voxel_gi.glsl +++ b/servers/rendering/renderer_rd/shaders/environment/voxel_gi.glsl @@ -13,7 +13,6 @@ layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; #ifndef MODE_DYNAMIC #define NO_CHILDREN 0xFFFFFFFF -#define GREY_VEC vec3(0.33333, 0.33333, 0.33333) struct CellChildren { uint children[8]; @@ -75,7 +74,7 @@ layout(set = 0, binding = 5) uniform texture3D color_texture; #ifndef MODE_DYNAMIC -layout(push_constant, binding = 0, std430) uniform Params { +layout(push_constant, std430) uniform Params { ivec3 limits; uint stack_size; @@ -109,7 +108,7 @@ layout(rgba8, set = 0, binding = 5) uniform restrict writeonly image3D color_tex #ifdef MODE_DYNAMIC -layout(push_constant, binding = 0, std430) uniform Params { +layout(push_constant, std430) uniform Params { ivec3 limits; uint light_count; //when not lighting ivec3 x_dir; diff --git a/servers/rendering/renderer_rd/shaders/voxel_gi_debug.glsl b/servers/rendering/renderer_rd/shaders/environment/voxel_gi_debug.glsl index 281c496df3..fd7a2bf8ad 100644 --- a/servers/rendering/renderer_rd/shaders/voxel_gi_debug.glsl +++ b/servers/rendering/renderer_rd/shaders/environment/voxel_gi_debug.glsl @@ -20,7 +20,7 @@ layout(set = 0, binding = 2) uniform texture3D color_tex; layout(set = 0, binding = 3) uniform sampler tex_sampler; -layout(push_constant, binding = 0, std430) uniform Params { +layout(push_constant, std430) uniform Params { mat4 projection; uint cell_offset; float dynamic_range; @@ -90,66 +90,10 @@ void main() { #endif #ifdef MODE_DEBUG_LIGHT - -#ifdef USE_ANISOTROPY - -#define POS_X 0 -#define POS_Y 1 -#define POS_Z 2 -#define NEG_X 3 -#define NEG_Y 4 -#define NEG_Z 5 - - const uint triangle_aniso[12] = uint[]( - NEG_X, - NEG_Z, - NEG_Y, - NEG_Z, - NEG_X, - NEG_Y, - POS_Z, - POS_X, - POS_X, - POS_Y, - POS_Y, - POS_Z); - - color_interp.xyz = texelFetch(sampler3D(color_tex, tex_sampler), ivec3(posu), int(params.level)).xyz * params.dynamic_range; - vec3 aniso_pos = texelFetch(sampler3D(aniso_pos_tex, tex_sampler), ivec3(posu), int(params.level)).xyz; - vec3 aniso_neg = texelFetch(sampler3D(aniso_neg_tex, tex_sampler), ivec3(posu), int(params.level)).xyz; - uint side = triangle_aniso[gl_VertexIndex / 3]; - - float strength = 0.0; - switch (side) { - case POS_X: - strength = aniso_pos.x; - break; - case POS_Y: - strength = aniso_pos.y; - break; - case POS_Z: - strength = aniso_pos.z; - break; - case NEG_X: - strength = aniso_neg.x; - break; - case NEG_Y: - strength = aniso_neg.y; - break; - case NEG_Z: - strength = aniso_neg.z; - break; - } - - color_interp.xyz *= strength; - -#else color_interp = texelFetch(sampler3D(color_tex, tex_sampler), ivec3(posu), int(params.level)); color_interp.xyz *params.dynamic_range; - #endif -#endif float scale = (1 << params.level); gl_Position = params.projection * vec4((vec3(posu) + vertex) * scale, 1.0); diff --git a/servers/rendering/renderer_rd/shaders/voxel_gi_sdf.glsl b/servers/rendering/renderer_rd/shaders/environment/voxel_gi_sdf.glsl index e20b3f680d..47a611a543 100644 --- a/servers/rendering/renderer_rd/shaders/voxel_gi_sdf.glsl +++ b/servers/rendering/renderer_rd/shaders/environment/voxel_gi_sdf.glsl @@ -6,10 +6,9 @@ layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; -#define MAX_DISTANCE 100000 +#define MAX_DISTANCE 100000.0 #define NO_CHILDREN 0xFFFFFFFF -#define GREY_VEC vec3(0.33333, 0.33333, 0.33333) struct CellChildren { uint children[8]; @@ -34,7 +33,7 @@ cell_data; layout(r8ui, set = 0, binding = 3) uniform restrict writeonly uimage3D sdf_tex; -layout(push_constant, binding = 0, std430) uniform Params { +layout(push_constant, std430) uniform Params { uint offset; uint end; uint pad0; @@ -44,7 +43,7 @@ params; void main() { vec3 pos = vec3(gl_GlobalInvocationID); - float closest_dist = 100000.0; + float closest_dist = MAX_DISTANCE; for (uint i = params.offset; i < params.end; i++) { vec3 posu = vec3(uvec3(cell_data.data[i].position & 0x7FF, (cell_data.data[i].position >> 11) & 0x3FF, cell_data.data[i].position >> 21)); @@ -67,7 +66,7 @@ void main() { } #if 0 -layout(push_constant, binding = 0, std430) uniform Params { +layout(push_constant, std430) uniform Params { ivec3 limits; uint stack_size; } diff --git a/servers/rendering/renderer_rd/shaders/fsr_upscale.glsl b/servers/rendering/renderer_rd/shaders/fsr_upscale.glsl new file mode 100644 index 0000000000..c8eb78a2f0 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/fsr_upscale.glsl @@ -0,0 +1,173 @@ +/*************************************************************************/ +/* fsr_upscale.glsl */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#[compute] + +#version 450 + +#VERSION_DEFINES + +#define A_GPU +#define A_GLSL + +#ifdef MODE_FSR_UPSCALE_NORMAL + +#define A_HALF + +#endif + +#include "thirdparty/amd-fsr/ffx_a.h" + +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +layout(rgba16f, set = 1, binding = 0) uniform restrict writeonly image2D fsr_image; +layout(set = 0, binding = 0) uniform sampler2D source_image; + +#define FSR_UPSCALE_PASS_TYPE_EASU 0 +#define FSR_UPSCALE_PASS_TYPE_RCAS 1 + +layout(push_constant, std430) uniform Params { + float resolution_width; + float resolution_height; + float upscaled_width; + float upscaled_height; + float sharpness; + int pass; +} +params; + +AU4 Const0, Const1, Const2, Const3; + +#ifdef MODE_FSR_UPSCALE_FALLBACK + +#define FSR_EASU_F +AF4 FsrEasuRF(AF2 p) { + AF4 res = textureGather(source_image, p, 0); + return res; +} +AF4 FsrEasuGF(AF2 p) { + AF4 res = textureGather(source_image, p, 1); + return res; +} +AF4 FsrEasuBF(AF2 p) { + AF4 res = textureGather(source_image, p, 2); + return res; +} + +#define FSR_RCAS_F +AF4 FsrRcasLoadF(ASU2 p) { + return AF4(texelFetch(source_image, ASU2(p), 0)); +} +void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {} + +#else + +#define FSR_EASU_H +AH4 FsrEasuRH(AF2 p) { + AH4 res = AH4(textureGather(source_image, p, 0)); + return res; +} +AH4 FsrEasuGH(AF2 p) { + AH4 res = AH4(textureGather(source_image, p, 1)); + return res; +} +AH4 FsrEasuBH(AF2 p) { + AH4 res = AH4(textureGather(source_image, p, 2)); + return res; +} + +#define FSR_RCAS_H +AH4 FsrRcasLoadH(ASW2 p) { + return AH4(texelFetch(source_image, ASU2(p), 0)); +} +void FsrRcasInputH(inout AH1 r, inout AH1 g, inout AH1 b) {} + +#endif + +#include "thirdparty/amd-fsr/ffx_fsr1.h" + +void fsr_easu_pass(AU2 pos) { +#ifdef MODE_FSR_UPSCALE_NORMAL + + AH3 Gamma2Color = AH3(0, 0, 0); + FsrEasuH(Gamma2Color, pos, Const0, Const1, Const2, Const3); + imageStore(fsr_image, ASU2(pos), AH4(Gamma2Color, 1)); + +#else + + AF3 Gamma2Color = AF3(0, 0, 0); + FsrEasuF(Gamma2Color, pos, Const0, Const1, Const2, Const3); + imageStore(fsr_image, ASU2(pos), AF4(Gamma2Color, 1)); + +#endif +} + +void fsr_rcas_pass(AU2 pos) { +#ifdef MODE_FSR_UPSCALE_NORMAL + + AH3 Gamma2Color = AH3(0, 0, 0); + FsrRcasH(Gamma2Color.r, Gamma2Color.g, Gamma2Color.b, pos, Const0); + imageStore(fsr_image, ASU2(pos), AH4(Gamma2Color, 1)); + +#else + + AF3 Gamma2Color = AF3(0, 0, 0); + FsrRcasF(Gamma2Color.r, Gamma2Color.g, Gamma2Color.b, pos, Const0); + imageStore(fsr_image, ASU2(pos), AF4(Gamma2Color, 1)); + +#endif +} + +void fsr_pass(AU2 pos) { + if (params.pass == FSR_UPSCALE_PASS_TYPE_EASU) { + fsr_easu_pass(pos); + } else if (params.pass == FSR_UPSCALE_PASS_TYPE_RCAS) { + fsr_rcas_pass(pos); + } +} + +void main() { + // Clang does not like unused functions. If ffx_a.h is included in the binary, clang will throw a fit and not compile so we must configure FSR in this shader + if (params.pass == FSR_UPSCALE_PASS_TYPE_EASU) { + FsrEasuCon(Const0, Const1, Const2, Const3, params.resolution_width, params.resolution_height, params.resolution_width, params.resolution_height, params.upscaled_width, params.upscaled_height); + } else if (params.pass == FSR_UPSCALE_PASS_TYPE_RCAS) { + FsrRcasCon(Const0, params.sharpness); + } + + AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u); + + fsr_pass(gxy); + gxy.x += 8u; + fsr_pass(gxy); + gxy.y += 8u; + fsr_pass(gxy); + gxy.x -= 8u; + fsr_pass(gxy); +} diff --git a/servers/rendering/renderer_rd/shaders/giprobe_write.glsl b/servers/rendering/renderer_rd/shaders/giprobe_write.glsl index 25d87ca45d..6c73864bf6 100644 --- a/servers/rendering/renderer_rd/shaders/giprobe_write.glsl +++ b/servers/rendering/renderer_rd/shaders/giprobe_write.glsl @@ -7,7 +7,6 @@ layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; #define NO_CHILDREN 0xFFFFFFFF -#define GREY_VEC vec3(0.33333, 0.33333, 0.33333) struct CellChildren { uint children[8]; @@ -59,7 +58,7 @@ lights; #endif -layout(push_constant, binding = 0, std430) uniform Params { +layout(push_constant, std430) uniform Params { ivec3 limits; uint stack_size; diff --git a/servers/rendering/renderer_rd/shaders/light_data_inc.glsl b/servers/rendering/renderer_rd/shaders/light_data_inc.glsl index fdc7729338..799f7087b6 100644 --- a/servers/rendering/renderer_rd/shaders/light_data_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/light_data_inc.glsl @@ -1,6 +1,6 @@ #define LIGHT_BAKE_DISABLED 0 -#define LIGHT_BAKE_DYNAMIC 1 -#define LIGHT_BAKE_STATIC 2 +#define LIGHT_BAKE_STATIC 1 +#define LIGHT_BAKE_DYNAMIC 2 struct LightData { //this structure needs to be as packed as possible highp vec3 position; @@ -15,7 +15,7 @@ struct LightData { //this structure needs to be as packed as possible mediump float cone_attenuation; mediump float cone_angle; mediump float specular_amount; - bool shadow_enabled; + mediump float shadow_opacity; highp vec4 atlas_rect; // rect in the shadow atlas highp mat4 shadow_matrix; @@ -60,7 +60,7 @@ struct DirectionalLightData { highp float softshadow_angle; highp float soft_shadow_scale; bool blend_splits; - bool shadow_enabled; + mediump float shadow_opacity; highp float fade_from; highp float fade_to; uvec2 pad; @@ -76,10 +76,6 @@ struct DirectionalLightData { highp mat4 shadow_matrix2; highp mat4 shadow_matrix3; highp mat4 shadow_matrix4; - mediump vec4 shadow_color1; - mediump vec4 shadow_color2; - mediump vec4 shadow_color3; - mediump vec4 shadow_color4; highp vec2 uv_scale1; highp vec2 uv_scale2; highp vec2 uv_scale3; diff --git a/servers/rendering/renderer_rd/shaders/luminance_reduce.glsl b/servers/rendering/renderer_rd/shaders/luminance_reduce.glsl index 466442b67a..0ee4cf6e31 100644 --- a/servers/rendering/renderer_rd/shaders/luminance_reduce.glsl +++ b/servers/rendering/renderer_rd/shaders/luminance_reduce.glsl @@ -28,7 +28,7 @@ layout(r32f, set = 1, binding = 0) uniform restrict writeonly image2D dest_lumin layout(set = 2, binding = 0) uniform sampler2D prev_luminance; #endif -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, std430) uniform Params { ivec2 source_size; float max_luminance; float min_luminance; diff --git a/servers/rendering/renderer_rd/shaders/luminance_reduce_raster_inc.glsl b/servers/rendering/renderer_rd/shaders/luminance_reduce_raster_inc.glsl index 3cde9923fa..b8860f6518 100644 --- a/servers/rendering/renderer_rd/shaders/luminance_reduce_raster_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/luminance_reduce_raster_inc.glsl @@ -1,5 +1,5 @@ -layout(push_constant, binding = 1, std430) uniform PushConstant { +layout(push_constant, std430) uniform PushConstant { ivec2 source_size; ivec2 dest_size; diff --git a/servers/rendering/renderer_rd/shaders/particles.glsl b/servers/rendering/renderer_rd/shaders/particles.glsl index 9f8410fd8a..fb5759bc17 100644 --- a/servers/rendering/renderer_rd/shaders/particles.glsl +++ b/servers/rendering/renderer_rd/shaders/particles.glsl @@ -25,10 +25,10 @@ layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; layout(set = 0, binding = 1) uniform sampler material_samplers[12]; -layout(set = 0, binding = 2, std430) restrict readonly buffer GlobalVariableData { +layout(set = 0, binding = 2, std430) restrict readonly buffer GlobalShaderUniformData { vec4 data[]; } -global_variables; +global_shader_uniforms; /* Set 1: FRAME AND PARTICLE DATA */ @@ -112,6 +112,24 @@ struct ParticleData { uint flags; vec4 color; vec4 custom; +#ifdef USERDATA1_USED + vec4 userdata1; +#endif +#ifdef USERDATA2_USED + vec4 userdata2; +#endif +#ifdef USERDATA3_USED + vec4 userdata3; +#endif +#ifdef USERDATA4_USED + vec4 userdata4; +#endif +#ifdef USERDATA5_USED + vec4 userdata5; +#endif +#ifdef USERDATA6_USED + vec4 userdata6; +#endif }; layout(set = 1, binding = 1, std430) restrict buffer Particles { @@ -168,7 +186,7 @@ layout(set = 3, binding = 0, std140) uniform MaterialUniforms{ } material; #endif -layout(push_constant, binding = 0, std430) uniform Params { +layout(push_constant, std430) uniform Params { float lifetime; bool clear; uint total_particles; @@ -210,6 +228,14 @@ bool emit_subparticle(mat4 p_xform, vec3 p_velocity, vec4 p_color, vec4 p_custom return true; } +vec3 safe_normalize(vec3 direction) { + const float EPSILON = 0.001; + if (length(direction) < EPSILON) { + return vec3(0.0); + } + return normalize(direction); +} + #GLOBALS void main() { @@ -413,7 +439,7 @@ void main() { switch (FRAME.attractors[i].type) { case ATTRACTOR_TYPE_SPHERE: { - dir = normalize(rel_vec); + dir = safe_normalize(rel_vec); float d = length(local_pos) / FRAME.attractors[i].extents.x; if (d > 1.0) { continue; @@ -421,7 +447,7 @@ void main() { amount = max(0.0, 1.0 - d); } break; case ATTRACTOR_TYPE_BOX: { - dir = normalize(rel_vec); + dir = safe_normalize(rel_vec); vec3 abs_pos = abs(local_pos / FRAME.attractors[i].extents); float d = max(abs_pos.x, max(abs_pos.y, abs_pos.z)); @@ -432,18 +458,18 @@ void main() { } break; case ATTRACTOR_TYPE_VECTOR_FIELD: { - vec3 uvw_pos = (local_pos / FRAME.attractors[i].extents) * 2.0 - 1.0; + vec3 uvw_pos = (local_pos / FRAME.attractors[i].extents + 1.0) * 0.5; if (any(lessThan(uvw_pos, vec3(0.0))) || any(greaterThan(uvw_pos, vec3(1.0)))) { continue; } - vec3 s = texture(sampler3D(sdf_vec_textures[FRAME.attractors[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos).xyz; - dir = mat3(FRAME.attractors[i].transform) * normalize(s); //revert direction + vec3 s = texture(sampler3D(sdf_vec_textures[FRAME.attractors[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos).xyz * 2.0 - 1.0; + dir = mat3(FRAME.attractors[i].transform) * safe_normalize(s); //revert direction amount = length(s); } break; } amount = pow(amount, FRAME.attractors[i].attenuation); - dir = normalize(mix(dir, FRAME.attractors[i].transform[2].xyz, FRAME.attractors[i].directionality)); + dir = safe_normalize(mix(dir, FRAME.attractors[i].transform[2].xyz, FRAME.attractors[i].directionality)); attractor_force -= amount * dir * FRAME.attractors[i].strength; } @@ -567,11 +593,11 @@ void main() { depth = particle_size - s; const float EPSILON = 0.001; normal = mat3(FRAME.colliders[i].transform) * - normalize( - vec3( - texture(sampler3D(sdf_vec_textures[FRAME.colliders[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos + vec3(EPSILON, 0.0, 0.0)).r - texture(sampler3D(sdf_vec_textures[FRAME.colliders[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos - vec3(EPSILON, 0.0, 0.0)).r, - texture(sampler3D(sdf_vec_textures[FRAME.colliders[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos + vec3(0.0, EPSILON, 0.0)).r - texture(sampler3D(sdf_vec_textures[FRAME.colliders[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos - vec3(0.0, EPSILON, 0.0)).r, - texture(sampler3D(sdf_vec_textures[FRAME.colliders[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos + vec3(0.0, 0.0, EPSILON)).r - texture(sampler3D(sdf_vec_textures[FRAME.colliders[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos - vec3(0.0, 0.0, EPSILON)).r)); + normalize( + vec3( + texture(sampler3D(sdf_vec_textures[FRAME.colliders[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos + vec3(EPSILON, 0.0, 0.0)).r - texture(sampler3D(sdf_vec_textures[FRAME.colliders[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos - vec3(EPSILON, 0.0, 0.0)).r, + texture(sampler3D(sdf_vec_textures[FRAME.colliders[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos + vec3(0.0, EPSILON, 0.0)).r - texture(sampler3D(sdf_vec_textures[FRAME.colliders[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos - vec3(0.0, EPSILON, 0.0)).r, + texture(sampler3D(sdf_vec_textures[FRAME.colliders[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos + vec3(0.0, 0.0, EPSILON)).r - texture(sampler3D(sdf_vec_textures[FRAME.colliders[i].texture_index], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw_pos - vec3(0.0, 0.0, EPSILON)).r)); } } break; diff --git a/servers/rendering/renderer_rd/shaders/particles_copy.glsl b/servers/rendering/renderer_rd/shaders/particles_copy.glsl index e88e68b511..afbd5a9caa 100644 --- a/servers/rendering/renderer_rd/shaders/particles_copy.glsl +++ b/servers/rendering/renderer_rd/shaders/particles_copy.glsl @@ -16,6 +16,9 @@ struct ParticleData { uint flags; vec4 color; vec4 custom; +#ifdef USERDATA_COUNT + vec4 userdata[USERDATA_COUNT]; +#endif }; layout(set = 0, binding = 1, std430) restrict readonly buffer Particles { @@ -42,7 +45,7 @@ layout(set = 2, binding = 0, std430) restrict readonly buffer TrailBindPoses { } trail_bind_poses; -layout(push_constant, binding = 0, std430) uniform Params { +layout(push_constant, std430) uniform Params { vec3 sort_direction; uint total_particles; @@ -57,7 +60,9 @@ layout(push_constant, binding = 0, std430) uniform Params { bool order_by_lifetime; uint lifetime_split; bool lifetime_reverse; - uint pad; + bool copy_mode_2d; + + mat4 inv_emission_transform; } params; @@ -196,30 +201,33 @@ void main() { txform = txform * trail_bind_poses.data[part_ofs]; } + if (params.copy_mode_2d) { + // In global mode, bring 2D particles to local coordinates + // as they will be drawn with the node position as origin. + txform = params.inv_emission_transform * txform; + } + txform = transpose(txform); } else { txform = mat4(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); //zero scale, becomes invisible } -#ifdef MODE_2D - - uint write_offset = gl_GlobalInvocationID.x * (2 + 1 + 1); //xform + color + custom + if (params.copy_mode_2d) { + uint write_offset = gl_GlobalInvocationID.x * (2 + 1 + 1); //xform + color + custom - instances.data[write_offset + 0] = txform[0]; - instances.data[write_offset + 1] = txform[1]; - instances.data[write_offset + 2] = particles.data[particle].color; - instances.data[write_offset + 3] = particles.data[particle].custom; - -#else - - uint write_offset = gl_GlobalInvocationID.x * (3 + 1 + 1); //xform + color + custom + instances.data[write_offset + 0] = txform[0]; + instances.data[write_offset + 1] = txform[1]; + instances.data[write_offset + 2] = particles.data[particle].color; + instances.data[write_offset + 3] = particles.data[particle].custom; + } else { + uint write_offset = gl_GlobalInvocationID.x * (3 + 1 + 1); //xform + color + custom - instances.data[write_offset + 0] = txform[0]; - instances.data[write_offset + 1] = txform[1]; - instances.data[write_offset + 2] = txform[2]; - instances.data[write_offset + 3] = particles.data[particle].color; - instances.data[write_offset + 4] = particles.data[particle].custom; -#endif //MODE_2D + instances.data[write_offset + 0] = txform[0]; + instances.data[write_offset + 1] = txform[1]; + instances.data[write_offset + 2] = txform[2]; + instances.data[write_offset + 3] = particles.data[particle].color; + instances.data[write_offset + 4] = particles.data[particle].custom; + } #endif } diff --git a/servers/rendering/renderer_rd/shaders/roughness_limiter.glsl b/servers/rendering/renderer_rd/shaders/roughness_limiter.glsl index 7b964675ca..59027df8e9 100644 --- a/servers/rendering/renderer_rd/shaders/roughness_limiter.glsl +++ b/servers/rendering/renderer_rd/shaders/roughness_limiter.glsl @@ -9,7 +9,7 @@ layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; layout(set = 0, binding = 0) uniform sampler2D source_normal; layout(r8, set = 1, binding = 0) uniform restrict writeonly image2D dest_roughness; -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, std430) uniform Params { ivec2 screen_size; float curve; uint pad; diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_aa_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_aa_inc.glsl index 99714b4504..97c913d489 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_aa_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_aa_inc.glsl @@ -2,7 +2,7 @@ float hash_2d(vec2 p) { return fract(1.0e4 * sin(17.0 * p.x + 0.1 * p.y) * - (0.1 + abs(sin(13.0 * p.y + p.x)))); + (0.1 + abs(sin(13.0 * p.y + p.x)))); } float hash_3d(vec3 p) { @@ -29,8 +29,7 @@ float compute_alpha_hash_threshold(vec3 pos, float hash_scale) { vec3 cases = vec3(a_interp * a_interp / (2.0 * min_lerp * (1.0 - min_lerp)), (a_interp - 0.5 * min_lerp) / (1.0 - min_lerp), - 1.0 - ((1.0 - a_interp) * (1.0 - a_interp) / - (2.0 * min_lerp * (1.0 - min_lerp)))); + 1.0 - ((1.0 - a_interp) * (1.0 - a_interp) / (2.0 * min_lerp * (1.0 - min_lerp)))); float alpha_hash_threshold = (lerp_factor < (1.0 - min_lerp)) ? ((lerp_factor < min_lerp) ? cases.x : cases.y) : cases.z; diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl index edbe1031b7..e9515c7670 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl @@ -6,6 +6,8 @@ #include "scene_forward_clustered_inc.glsl" +#define SHADER_IS_SRGB false + /* INPUT ATTRIBS */ layout(location = 0) in vec3 vertex_attrib; @@ -81,6 +83,11 @@ layout(location = 5) out vec3 tangent_interp; layout(location = 6) out vec3 binormal_interp; #endif +#ifdef MOTION_VECTORS +layout(location = 7) out vec4 screen_position; +layout(location = 8) out vec4 prev_screen_position; +#endif + #ifdef MATERIAL_UNIFORMS_USED layout(set = MATERIAL_UNIFORM_SET, binding = 0, std140) uniform MaterialUniforms{ @@ -89,38 +96,43 @@ layout(set = MATERIAL_UNIFORM_SET, binding = 0, std140) uniform MaterialUniforms } material; #endif +float global_time; + #ifdef MODE_DUAL_PARABOLOID -layout(location = 8) out float dp_clip; +layout(location = 9) out float dp_clip; #endif -layout(location = 9) out flat uint instance_index; +layout(location = 10) out flat uint instance_index_interp; + +#ifdef USE_MULTIVIEW +#ifdef has_VK_KHR_multiview +#define ViewIndex gl_ViewIndex +#else // has_VK_KHR_multiview +// !BAS! This needs to become an input once we implement our fallback! +#define ViewIndex 0 +#endif // has_VK_KHR_multiview +#else // USE_MULTIVIEW +// Set to zero, not supported in non stereo +#define ViewIndex 0 +#endif //USE_MULTIVIEW invariant gl_Position; #GLOBALS -void main() { +void vertex_shader(in uint instance_index, in bool is_multimesh, in SceneData scene_data, in mat4 model_matrix, out vec4 screen_pos) { vec4 instance_custom = vec4(0.0); #if defined(COLOR_USED) color_interp = color_attrib; #endif - instance_index = draw_call.instance_index; - - bool is_multimesh = bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH); - if (!is_multimesh) { - instance_index += gl_InstanceIndex; - } - - mat4 world_matrix = instances.data[instance_index].transform; - - mat3 world_normal_matrix; + mat3 model_normal_matrix; if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_NON_UNIFORM_SCALE)) { - world_normal_matrix = transpose(inverse(mat3(world_matrix))); + model_normal_matrix = transpose(inverse(mat3(model_matrix))); } else { - world_normal_matrix = mat3(world_matrix); + model_normal_matrix = mat3(model_matrix); } if (is_multimesh) { @@ -213,8 +225,8 @@ void main() { #endif //transpose matrix = transpose(matrix); - world_matrix = world_matrix * matrix; - world_normal_matrix = world_normal_matrix * mat3(matrix); + model_matrix = model_matrix * matrix; + model_normal_matrix = model_normal_matrix * mat3(matrix); } vec3 vertex = vertex_attrib; @@ -240,27 +252,35 @@ void main() { vec4 position; #endif +#ifdef USE_MULTIVIEW + mat4 projection_matrix = scene_data.projection_matrix_view[ViewIndex]; + mat4 inv_projection_matrix = scene_data.inv_projection_matrix_view[ViewIndex]; +#else mat4 projection_matrix = scene_data.projection_matrix; + mat4 inv_projection_matrix = scene_data.inv_projection_matrix; +#endif //USE_MULTIVIEW //using world coordinates #if !defined(SKIP_TRANSFORM_USED) && defined(VERTEX_WORLD_COORDS_USED) - vertex = (world_matrix * vec4(vertex, 1.0)).xyz; + vertex = (model_matrix * vec4(vertex, 1.0)).xyz; - normal = world_normal_matrix * normal; +#ifdef NORMAL_USED + normal = model_normal_matrix * normal; +#endif #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) - tangent = world_normal_matrix * tangent; - binormal = world_normal_matrix * binormal; + tangent = model_normal_matrix * tangent; + binormal = model_normal_matrix * binormal; #endif #endif float roughness = 1.0; - mat4 modelview = scene_data.inv_camera_matrix * world_matrix; - mat3 modelview_normal = mat3(scene_data.inv_camera_matrix) * world_normal_matrix; + mat4 modelview = scene_data.view_matrix * model_matrix; + mat3 modelview_normal = mat3(scene_data.view_matrix) * model_normal_matrix; { #CODE : VERTEX @@ -285,17 +305,23 @@ void main() { //using world coordinates #if !defined(SKIP_TRANSFORM_USED) && defined(VERTEX_WORLD_COORDS_USED) - vertex = (scene_data.inv_camera_matrix * vec4(vertex, 1.0)).xyz; - normal = mat3(scene_data.inverse_normal_matrix) * normal; + vertex = (scene_data.view_matrix * vec4(vertex, 1.0)).xyz; +#ifdef NORMAL_USED + normal = (scene_data.view_matrix * vec4(normal, 0.0)).xyz; +#endif #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) - - binormal = mat3(scene_data.camera_inverse_binormal_matrix) * binormal; - tangent = mat3(scene_data.camera_inverse_tangent_matrix) * tangent; + binormal = (scene_data.view_matrix * vec4(binormal, 0.0)).xyz; + tangent = (scene_data.view_matrix * vec4(tangent, 0.0)).xyz; #endif #endif vertex_interp = vertex; + +#ifdef MOTION_VECTORS + screen_pos = projection_matrix * vec4(vertex_interp, 1.0); +#endif + #ifdef NORMAL_USED normal_interp = normal; #endif @@ -350,12 +376,37 @@ void main() { #endif } +void main() { + uint instance_index = draw_call.instance_index; + + bool is_multimesh = bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH); + if (!is_multimesh) { + instance_index += gl_InstanceIndex; + } + + instance_index_interp = instance_index; + + mat4 model_matrix = instances.data[instance_index].transform; +#if defined(MOTION_VECTORS) + global_time = scene_data_block.prev_data.time; + vertex_shader(instance_index, is_multimesh, scene_data_block.prev_data, instances.data[instance_index].prev_transform, prev_screen_position); + global_time = scene_data_block.data.time; + vertex_shader(instance_index, is_multimesh, scene_data_block.data, model_matrix, screen_position); +#else + global_time = scene_data_block.data.time; + vec4 screen_position; + vertex_shader(instance_index, is_multimesh, scene_data_block.data, model_matrix, screen_position); +#endif +} + #[fragment] #version 450 #VERSION_DEFINES +#define SHADER_IS_SRGB false + /* Specialization Constants (Toggles) */ layout(constant_id = 0) const bool sc_use_forward_gi = false; @@ -404,18 +455,43 @@ layout(location = 5) in vec3 tangent_interp; layout(location = 6) in vec3 binormal_interp; #endif +#ifdef MOTION_VECTORS +layout(location = 7) in vec4 screen_position; +layout(location = 8) in vec4 prev_screen_position; +#endif + #ifdef MODE_DUAL_PARABOLOID -layout(location = 8) in float dp_clip; +layout(location = 9) in float dp_clip; #endif -layout(location = 9) in flat uint instance_index; +layout(location = 10) in flat uint instance_index_interp; + +#ifdef USE_MULTIVIEW +#ifdef has_VK_KHR_multiview +#define ViewIndex gl_ViewIndex +#else // has_VK_KHR_multiview +// !BAS! This needs to become an input once we implement our fallback! +#define ViewIndex 0 +#endif // has_VK_KHR_multiview +#else // USE_MULTIVIEW +// Set to zero, not supported in non stereo +#define ViewIndex 0 +#endif //USE_MULTIVIEW //defines to keep compatibility with vertex -#define world_matrix instances.data[instance_index].transform +#define model_matrix instances.data[draw_call.instance_index].transform +#ifdef USE_MULTIVIEW +#define projection_matrix scene_data.projection_matrix_view[ViewIndex] +#define inv_projection_matrix scene_data.inv_projection_matrix_view[ViewIndex] +#else #define projection_matrix scene_data.projection_matrix +#define inv_projection_matrix scene_data.inv_projection_matrix +#endif + +#define global_time scene_data_block.data.time #if defined(ENABLE_SSS) && defined(ENABLE_TRANSMITTANCE) //both required for transmittance to be enabled @@ -454,23 +530,27 @@ layout(location = 1) out uvec2 voxel_gi_buffer; #endif //MODE_RENDER_NORMAL #else // RENDER DEPTH -#ifdef MODE_MULTIPLE_RENDER_TARGETS +#ifdef MODE_SEPARATE_SPECULAR layout(location = 0) out vec4 diffuse_buffer; //diffuse (rgb) and roughness layout(location = 1) out vec4 specular_buffer; //specular and SSS (subsurface scatter) #else layout(location = 0) out vec4 frag_color; -#endif // MODE_MULTIPLE_RENDER_TARGETS +#endif // MODE_SEPARATE_SPECULAR #endif // RENDER DEPTH +#ifdef MOTION_VECTORS +layout(location = 2) out vec2 motion_vector; +#endif + #include "scene_forward_aa_inc.glsl" #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) -/* Make a default specular mode SPECULAR_SCHLICK_GGX. */ -#if !defined(SPECULAR_DISABLED) && !defined(SPECULAR_SCHLICK_GGX) && !defined(SPECULAR_BLINN) && !defined(SPECULAR_PHONG) && !defined(SPECULAR_TOON) +// Default to SPECULAR_SCHLICK_GGX. +#if !defined(SPECULAR_DISABLED) && !defined(SPECULAR_SCHLICK_GGX) && !defined(SPECULAR_TOON) #define SPECULAR_SCHLICK_GGX #endif @@ -483,24 +563,24 @@ layout(location = 0) out vec4 frag_color; #ifndef MODE_RENDER_DEPTH vec4 volumetric_fog_process(vec2 screen_uv, float z) { - vec3 fog_pos = vec3(screen_uv, z * scene_data.volumetric_fog_inv_length); + vec3 fog_pos = vec3(screen_uv, z * scene_data_block.data.volumetric_fog_inv_length); if (fog_pos.z < 0.0) { return vec4(0.0); } else if (fog_pos.z < 1.0) { - fog_pos.z = pow(fog_pos.z, scene_data.volumetric_fog_detail_spread); + fog_pos.z = pow(fog_pos.z, scene_data_block.data.volumetric_fog_detail_spread); } return texture(sampler3D(volumetric_fog_texture, material_samplers[SAMPLER_LINEAR_CLAMP]), fog_pos); } vec4 fog_process(vec3 vertex) { - vec3 fog_color = scene_data.fog_light_color; + vec3 fog_color = scene_data_block.data.fog_light_color; - if (scene_data.fog_aerial_perspective > 0.0) { + if (scene_data_block.data.fog_aerial_perspective > 0.0) { vec3 sky_fog_color = vec3(0.0); - vec3 cube_view = scene_data.radiance_inverse_xform * vertex; + vec3 cube_view = scene_data_block.data.radiance_inverse_xform * vertex; // mip_level always reads from the second mipmap and higher so the fog is always slightly blurred - float mip_level = mix(1.0 / MAX_ROUGHNESS_LOD, 1.0, 1.0 - (abs(vertex.z) - scene_data.z_near) / (scene_data.z_far - scene_data.z_near)); + float mip_level = mix(1.0 / MAX_ROUGHNESS_LOD, 1.0, 1.0 - (abs(vertex.z) - scene_data_block.data.z_near) / (scene_data_block.data.z_far - scene_data_block.data.z_near)); #ifdef USE_RADIANCE_CUBEMAP_ARRAY float lod, blend; blend = modf(mip_level * MAX_ROUGHNESS_LOD, lod); @@ -509,29 +589,29 @@ vec4 fog_process(vec3 vertex) { #else sky_fog_color = textureLod(samplerCube(radiance_cubemap, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), cube_view, mip_level * MAX_ROUGHNESS_LOD).rgb; #endif //USE_RADIANCE_CUBEMAP_ARRAY - fog_color = mix(fog_color, sky_fog_color, scene_data.fog_aerial_perspective); + fog_color = mix(fog_color, sky_fog_color, scene_data_block.data.fog_aerial_perspective); } - if (scene_data.fog_sun_scatter > 0.001) { + if (scene_data_block.data.fog_sun_scatter > 0.001) { vec4 sun_scatter = vec4(0.0); float sun_total = 0.0; vec3 view = normalize(vertex); - for (uint i = 0; i < scene_data.directional_light_count; i++) { + for (uint i = 0; i < scene_data_block.data.directional_light_count; i++) { vec3 light_color = directional_lights.data[i].color * directional_lights.data[i].energy; float light_amount = pow(max(dot(view, directional_lights.data[i].direction), 0.0), 8.0); - fog_color += light_color * light_amount * scene_data.fog_sun_scatter; + fog_color += light_color * light_amount * scene_data_block.data.fog_sun_scatter; } } - float fog_amount = 1.0 - exp(min(0.0, vertex.z * scene_data.fog_density)); + float fog_amount = 1.0 - exp(min(0.0, -length(vertex) * scene_data_block.data.fog_density)); - if (abs(scene_data.fog_height_density) > 0.001) { - float y = (scene_data.camera_matrix * vec4(vertex, 1.0)).y; + if (abs(scene_data_block.data.fog_height_density) >= 0.0001) { + float y = (scene_data_block.data.inv_view_matrix * vec4(vertex, 1.0)).y; - float y_dist = scene_data.fog_height - y; + float y_dist = y - scene_data_block.data.fog_height; - float vfog_amount = clamp(exp(y_dist * scene_data.fog_height_density), 0.0, 1.0); + float vfog_amount = 1.0 - exp(min(0.0, y_dist * scene_data_block.data.fog_height_density)); fog_amount = max(vfog_amount, fog_amount); } @@ -543,7 +623,6 @@ void cluster_get_item_range(uint p_offset, out uint item_min, out uint item_max, uint item_min_max = cluster_buffer.data[p_offset]; item_min = item_min_max & 0xFFFF; item_max = item_min_max >> 16; - ; item_from = item_min >> 5; item_to = (item_max == 0) ? 0 : ((item_max - 1) >> 5) + 1; //side effect of how it is stored, as item_max 0 means no elements @@ -557,16 +636,16 @@ uint cluster_get_range_clip_mask(uint i, uint z_min, uint z_max) { #endif //!MODE_RENDER DEPTH -void main() { -#ifdef MODE_DUAL_PARABOLOID - - if (dp_clip > 0.0) - discard; -#endif +void fragment_shader(in SceneData scene_data) { + uint instance_index = instance_index_interp; - //lay out everything, whathever is unused is optimized away anyway + //lay out everything, whatever is unused is optimized away anyway vec3 vertex = vertex_interp; +#ifdef USE_MULTIVIEW + vec3 view = -normalize(vertex_interp - scene_data.eye_offset[ViewIndex].xyz); +#else vec3 view = -normalize(vertex_interp); +#endif vec3 albedo = vec3(1.0); vec3 backlight = vec3(0.0); vec4 transmittance_color = vec4(0.0, 0.0, 0.0, 1.0); @@ -579,7 +658,7 @@ void main() { float rim = 0.0; float rim_tint = 0.0; float clearcoat = 0.0; - float clearcoat_gloss = 0.0; + float clearcoat_roughness = 0.0; float anisotropy = 0.0; vec2 anisotropy_flow = vec2(1.0, 0.0); vec4 fog = vec4(0.0); @@ -593,7 +672,7 @@ void main() { float ao = 1.0; float ao_light_affect = 0.0; - float alpha = 1.0; + float alpha = float(instances.data[instance_index].flags >> INSTANCE_FLAGS_FADE_SHIFT) / float(255.0); #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) vec3 binormal = normalize(binormal_interp); @@ -633,7 +712,7 @@ void main() { float normal_map_depth = 1.0; - vec2 screen_uv = gl_FragCoord.xy * scene_data.screen_pixel_size + scene_data.screen_pixel_size * 0.5; //account for center + vec2 screen_uv = gl_FragCoord.xy * scene_data.screen_pixel_size; float sss_strength = 0.0; @@ -687,7 +766,7 @@ void main() { #endif // ALPHA_ANTIALIASING_EDGE_USED #ifdef USE_OPAQUE_PREPASS - if (alpha < opaque_prepass_threshold) { + if (alpha < scene_data.opaque_prepass_threshold) { discard; } #endif // USE_OPAQUE_PREPASS @@ -868,9 +947,9 @@ void main() { if (decals.data[decal_index].emission_rect != vec4(0.0)) { //emission is additive, so its independent from albedo if (sc_decal_use_mipmaps) { - emission += textureGrad(sampler2D(decal_atlas_srgb, decal_sampler), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, ddx * decals.data[decal_index].emission_rect.zw, ddy * decals.data[decal_index].emission_rect.zw).xyz * decals.data[decal_index].emission_energy * fade; + emission += textureGrad(sampler2D(decal_atlas_srgb, decal_sampler), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, ddx * decals.data[decal_index].emission_rect.zw, ddy * decals.data[decal_index].emission_rect.zw).xyz * decals.data[decal_index].modulate.rgb * decals.data[decal_index].emission_energy * fade; } else { - emission += textureLod(sampler2D(decal_atlas_srgb, decal_sampler), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, 0.0).xyz * decals.data[decal_index].emission_energy * fade; + emission += textureLod(sampler2D(decal_atlas_srgb, decal_sampler), uv_local.xz * decals.data[decal_index].emission_rect.zw + decals.data[decal_index].emission_rect.xy, 0.0).xyz * decals.data[decal_index].modulate.rgb * decals.data[decal_index].emission_energy * fade; } } } @@ -902,7 +981,20 @@ void main() { #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) if (scene_data.use_reflection_cubemap) { +#ifdef LIGHT_ANISOTROPY_USED + // https://google.github.io/filament/Filament.html#lighting/imagebasedlights/anisotropy + vec3 anisotropic_direction = anisotropy >= 0.0 ? binormal : tangent; + vec3 anisotropic_tangent = cross(anisotropic_direction, view); + vec3 anisotropic_normal = cross(anisotropic_tangent, anisotropic_direction); + vec3 bent_normal = normalize(mix(normal, anisotropic_normal, abs(anisotropy) * clamp(5.0 * roughness, 0.0, 1.0))); + vec3 ref_vec = reflect(-view, bent_normal); + ref_vec = mix(ref_vec, bent_normal, roughness * roughness); +#else vec3 ref_vec = reflect(-view, normal); + ref_vec = mix(ref_vec, normal, roughness * roughness); +#endif + + float horizon = min(1.0 + dot(ref_vec, normal), 1.0); ref_vec = scene_data.radiance_inverse_xform * ref_vec; #ifdef USE_RADIANCE_CUBEMAP_ARRAY @@ -915,7 +1007,6 @@ void main() { specular_light = textureLod(samplerCube(radiance_cubemap, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), ref_vec, roughness * MAX_ROUGHNESS_LOD).rgb; #endif //USE_RADIANCE_CUBEMAP_ARRAY - float horizon = min(1.0 + dot(ref_vec, normal), 1.0); specular_light *= horizon * horizon; specular_light *= scene_data.ambient_light_color_energy.a; } @@ -944,6 +1035,37 @@ void main() { #if defined(CUSTOM_IRRADIANCE_USED) ambient_light = mix(ambient_light, custom_irradiance.rgb, custom_irradiance.a); #endif + +#ifdef LIGHT_CLEARCOAT_USED + + if (scene_data.use_reflection_cubemap) { + vec3 n = normalize(normal_interp); // We want to use geometric normal, not normal_map + float NoV = max(dot(n, view), 0.0001); + vec3 ref_vec = reflect(-view, n); + // The clear coat layer assumes an IOR of 1.5 (4% reflectance) + float Fc = clearcoat * (0.04 + 0.96 * SchlickFresnel(NoV)); + float attenuation = 1.0 - Fc; + ambient_light *= attenuation; + specular_light *= attenuation; + + ref_vec = mix(ref_vec, n, clearcoat_roughness * clearcoat_roughness); + float horizon = min(1.0 + dot(ref_vec, normal), 1.0); + ref_vec = scene_data.radiance_inverse_xform * ref_vec; + float roughness_lod = mix(0.001, 0.1, clearcoat_roughness) * MAX_ROUGHNESS_LOD; +#ifdef USE_RADIANCE_CUBEMAP_ARRAY + + float lod, blend; + blend = modf(roughness_lod, lod); + vec3 clearcoat_light = texture(samplerCubeArray(radiance_cubemap, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), vec4(ref_vec, lod)).rgb; + clearcoat_light = mix(clearcoat_light, texture(samplerCubeArray(radiance_cubemap, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), vec4(ref_vec, lod + 1)).rgb, blend); + +#else + vec3 clearcoat_light = textureLod(samplerCube(radiance_cubemap, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), ref_vec, roughness_lod).rgb; + +#endif //USE_RADIANCE_CUBEMAP_ARRAY + specular_light += clearcoat_light * horizon * horizon * Fc * scene_data.ambient_light_color_energy.a; + } +#endif #endif //!defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) //radiance @@ -957,22 +1079,22 @@ void main() { if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_LIGHTMAP_CAPTURE)) { //has lightmap capture uint index = instances.data[instance_index].gi_offset; - vec3 wnormal = mat3(scene_data.camera_matrix) * normal; + vec3 wnormal = mat3(scene_data.inv_view_matrix) * normal; const float c1 = 0.429043; const float c2 = 0.511664; const float c3 = 0.743125; const float c4 = 0.886227; const float c5 = 0.247708; ambient_light += (c1 * lightmap_captures.data[index].sh[8].rgb * (wnormal.x * wnormal.x - wnormal.y * wnormal.y) + - c3 * lightmap_captures.data[index].sh[6].rgb * wnormal.z * wnormal.z + - c4 * lightmap_captures.data[index].sh[0].rgb - - c5 * lightmap_captures.data[index].sh[6].rgb + - 2.0 * c1 * lightmap_captures.data[index].sh[4].rgb * wnormal.x * wnormal.y + - 2.0 * c1 * lightmap_captures.data[index].sh[7].rgb * wnormal.x * wnormal.z + - 2.0 * c1 * lightmap_captures.data[index].sh[5].rgb * wnormal.y * wnormal.z + - 2.0 * c2 * lightmap_captures.data[index].sh[3].rgb * wnormal.x + - 2.0 * c2 * lightmap_captures.data[index].sh[1].rgb * wnormal.y + - 2.0 * c2 * lightmap_captures.data[index].sh[2].rgb * wnormal.z); + c3 * lightmap_captures.data[index].sh[6].rgb * wnormal.z * wnormal.z + + c4 * lightmap_captures.data[index].sh[0].rgb - + c5 * lightmap_captures.data[index].sh[6].rgb + + 2.0 * c1 * lightmap_captures.data[index].sh[4].rgb * wnormal.x * wnormal.y + + 2.0 * c1 * lightmap_captures.data[index].sh[7].rgb * wnormal.x * wnormal.z + + 2.0 * c1 * lightmap_captures.data[index].sh[5].rgb * wnormal.y * wnormal.z + + 2.0 * c2 * lightmap_captures.data[index].sh[3].rgb * wnormal.x + + 2.0 * c2 * lightmap_captures.data[index].sh[1].rgb * wnormal.y + + 2.0 * c2 * lightmap_captures.data[index].sh[2].rgb * wnormal.z); } else if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_LIGHTMAP)) { // has actual lightmap bool uses_sh = bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SH_LIGHTMAP); @@ -1011,9 +1133,9 @@ void main() { if (sc_use_forward_gi && bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SDFGI)) { //has lightmap capture //make vertex orientation the world one, but still align to camera - vec3 cam_pos = mat3(scene_data.camera_matrix) * vertex; - vec3 cam_normal = mat3(scene_data.camera_matrix) * normal; - vec3 cam_reflection = mat3(scene_data.camera_matrix) * reflect(-view, normal); + vec3 cam_pos = mat3(scene_data.inv_view_matrix) * vertex; + vec3 cam_normal = mat3(scene_data.inv_view_matrix) * normal; + vec3 cam_reflection = mat3(scene_data.inv_view_matrix) * reflect(-view, normal); //apply y-mult cam_pos.y *= sdfgi.y_mult; @@ -1083,7 +1205,8 @@ void main() { if (sc_use_forward_gi && bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_VOXEL_GI)) { // process voxel_gi_instances uint index1 = instances.data[instance_index].gi_offset & 0xFFFF; - vec3 ref_vec = normalize(reflect(normalize(vertex), normal)); + vec3 ref_vec = normalize(reflect(-view, normal)); + ref_vec = mix(ref_vec, normal, roughness * roughness); //find arbitrary tangent and bitangent, then build a matrix vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0); vec3 tangent = normalize(cross(v0, normal)); @@ -1119,12 +1242,20 @@ void main() { if (scene_data.gi_upscale_for_msaa) { vec2 base_coord = screen_uv; vec2 closest_coord = base_coord; +#ifdef USE_MULTIVIEW + float closest_ang = dot(normal, textureLod(sampler2DArray(normal_roughness_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), vec3(base_coord, ViewIndex), 0.0).xyz * 2.0 - 1.0); +#else // USE_MULTIVIEW float closest_ang = dot(normal, textureLod(sampler2D(normal_roughness_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), base_coord, 0.0).xyz * 2.0 - 1.0); +#endif // USE_MULTIVIEW for (int i = 0; i < 4; i++) { const vec2 neighbours[4] = vec2[](vec2(-1, 0), vec2(1, 0), vec2(0, -1), vec2(0, 1)); vec2 neighbour_coord = base_coord + neighbours[i] * scene_data.screen_pixel_size; +#ifdef USE_MULTIVIEW + float neighbour_ang = dot(normal, textureLod(sampler2DArray(normal_roughness_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), vec3(neighbour_coord, ViewIndex), 0.0).xyz * 2.0 - 1.0); +#else // USE_MULTIVIEW float neighbour_ang = dot(normal, textureLod(sampler2D(normal_roughness_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), neighbour_coord, 0.0).xyz * 2.0 - 1.0); +#endif // USE_MULTIVIEW if (neighbour_ang > closest_ang) { closest_ang = neighbour_ang; closest_coord = neighbour_coord; @@ -1137,15 +1268,20 @@ void main() { coord = screen_uv; } +#ifdef USE_MULTIVIEW + vec4 buffer_ambient = textureLod(sampler2DArray(ambient_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), vec3(coord, ViewIndex), 0.0); + vec4 buffer_reflection = textureLod(sampler2DArray(reflection_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), vec3(coord, ViewIndex), 0.0); +#else // USE_MULTIVIEW vec4 buffer_ambient = textureLod(sampler2D(ambient_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), coord, 0.0); vec4 buffer_reflection = textureLod(sampler2D(reflection_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), coord, 0.0); +#endif // USE_MULTIVIEW ambient_light = mix(ambient_light, buffer_ambient.rgb, buffer_ambient.a); specular_light = mix(specular_light, buffer_reflection.rgb, buffer_reflection.a); } #endif // !USE_LIGHTMAP - if (scene_data.ssao_enabled) { + if (bool(scene_data.ss_effects_flags & SCREEN_SPACE_EFFECTS_FLAGS_USE_SSAO)) { float ssao = texture(sampler2D(ao_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), screen_uv).r; ao = min(ao, ssao); ao_light_affect = mix(ao_light_affect, max(ao_light_affect, scene_data.ssao_light_affect), scene_data.ssao_ao_affect); @@ -1170,6 +1306,18 @@ void main() { item_to = subgroupBroadcastFirst(subgroupMax(item_to)); #endif +#ifdef LIGHT_ANISOTROPY_USED + // https://google.github.io/filament/Filament.html#lighting/imagebasedlights/anisotropy + vec3 anisotropic_direction = anisotropy >= 0.0 ? binormal : tangent; + vec3 anisotropic_tangent = cross(anisotropic_direction, view); + vec3 anisotropic_normal = cross(anisotropic_tangent, anisotropic_direction); + vec3 bent_normal = normalize(mix(normal, anisotropic_normal, abs(anisotropy) * clamp(5.0 * roughness, 0.0, 1.0))); +#else + vec3 bent_normal = normal; +#endif + vec3 ref_vec = normalize(reflect(-view, bent_normal)); + ref_vec = mix(ref_vec, bent_normal, roughness * roughness); + for (uint i = item_from; i < item_to; i++) { uint mask = cluster_buffer.data[cluster_reflection_offset + i]; mask &= cluster_get_range_clip_mask(i, item_min, item_max); @@ -1193,7 +1341,7 @@ void main() { continue; //not masked } - reflection_process(reflection_index, vertex, normal, roughness, ambient_light, specular_light, ambient_accum, reflection_accum); + reflection_process(reflection_index, vertex, ref_vec, normal, roughness, ambient_light, specular_light, ambient_accum, reflection_accum); } } @@ -1215,6 +1363,12 @@ void main() { // convert ao to direct light ao ao = mix(1.0, ao, ao_light_affect); + if (bool(scene_data.ss_effects_flags & SCREEN_SPACE_EFFECTS_FLAGS_USE_SSIL)) { + vec4 ssil = textureLod(sampler2D(ssil_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), screen_uv, 0.0); + ambient_light *= 1.0 - ssil.a; + ambient_light += ssil.rgb * albedo.rgb; + } + //this saves some VGPRs vec3 f0 = F0(metallic, specular, albedo); @@ -1235,7 +1389,7 @@ void main() { float a004 = min(r.x * r.x, exp2(-9.28 * ndotv)) * r.x + r.y; vec2 env = vec2(-1.04, 1.04) * a004 + r.zw; - specular_light *= env.x * f0 + env.y; + specular_light *= env.x * f0 + env.y * clamp(50.0 * f0.g, 0.0, 1.0); #endif } @@ -1249,9 +1403,10 @@ void main() { // LIGHTING #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) - { //directional light + { // Directional light. - // Do shadow and lighting in two passes to reduce register pressure + // Do shadow and lighting in two passes to reduce register pressure. +#ifndef SHADOWS_DISABLED uint shadow0 = 0; uint shadow1 = 0; @@ -1270,21 +1425,21 @@ void main() { float shadow = 1.0; - //version with soft shadows, more expensive - if (directional_lights.data[i].shadow_enabled) { - if (sc_use_directional_soft_shadows && directional_lights.data[i].softshadow_angle > 0) { - float depth_z = -vertex.z; - - vec3 shadow_color = vec3(0.0); - vec3 light_dir = directional_lights.data[i].direction; + if (directional_lights.data[i].shadow_opacity > 0.001) { + float depth_z = -vertex.z; + vec3 light_dir = directional_lights.data[i].direction; + vec3 base_normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(light_dir, -normalize(normal_interp)))); -#define BIAS_FUNC(m_var, m_idx) \ - m_var.xyz += light_dir * directional_lights.data[i].shadow_bias[m_idx]; \ - vec3 normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(light_dir, -normalize(normal_interp)))) * directional_lights.data[i].shadow_normal_bias[m_idx]; \ - normal_bias -= light_dir * dot(light_dir, normal_bias); \ +#define BIAS_FUNC(m_var, m_idx) \ + m_var.xyz += light_dir * directional_lights.data[i].shadow_bias[m_idx]; \ + vec3 normal_bias = base_normal_bias * directional_lights.data[i].shadow_normal_bias[m_idx]; \ + normal_bias -= light_dir * dot(light_dir, normal_bias); \ m_var.xyz += normal_bias; - uint blend_index = 0; + //version with soft shadows, more expensive + if (sc_use_directional_soft_shadows && directional_lights.data[i].softshadow_angle > 0) { + uint blend_count = 0; + const uint blend_max = directional_lights.data[i].blend_splits ? 2 : 1; if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { vec4 v = vec4(vertex, 1.0); @@ -1299,10 +1454,10 @@ void main() { float test_radius = (range_pos - range_begin) * directional_lights.data[i].softshadow_angle; vec2 tex_scale = directional_lights.data[i].uv_scale1 * test_radius; shadow = sample_directional_soft_shadow(directional_shadow_atlas, pssm_coord.xyz, tex_scale * directional_lights.data[i].soft_shadow_scale); - blend_index++; + blend_count++; } - if (blend_index < 2 && depth_z < directional_lights.data[i].shadow_split_offsets.y) { + if (blend_count < blend_max && depth_z < directional_lights.data[i].shadow_split_offsets.y) { vec4 v = vec4(vertex, 1.0); BIAS_FUNC(v, 1) @@ -1316,7 +1471,7 @@ void main() { vec2 tex_scale = directional_lights.data[i].uv_scale2 * test_radius; float s = sample_directional_soft_shadow(directional_shadow_atlas, pssm_coord.xyz, tex_scale * directional_lights.data[i].soft_shadow_scale); - if (blend_index == 0) { + if (blend_count == 0) { shadow = s; } else { //blend @@ -1324,10 +1479,10 @@ void main() { shadow = mix(shadow, s, blend); } - blend_index++; + blend_count++; } - if (blend_index < 2 && depth_z < directional_lights.data[i].shadow_split_offsets.z) { + if (blend_count < blend_max && depth_z < directional_lights.data[i].shadow_split_offsets.z) { vec4 v = vec4(vertex, 1.0); BIAS_FUNC(v, 2) @@ -1341,7 +1496,7 @@ void main() { vec2 tex_scale = directional_lights.data[i].uv_scale3 * test_radius; float s = sample_directional_soft_shadow(directional_shadow_atlas, pssm_coord.xyz, tex_scale * directional_lights.data[i].soft_shadow_scale); - if (blend_index == 0) { + if (blend_count == 0) { shadow = s; } else { //blend @@ -1349,10 +1504,10 @@ void main() { shadow = mix(shadow, s, blend); } - blend_index++; + blend_count++; } - if (blend_index < 2) { + if (blend_count < blend_max) { vec4 v = vec4(vertex, 1.0); BIAS_FUNC(v, 3) @@ -1366,7 +1521,7 @@ void main() { vec2 tex_scale = directional_lights.data[i].uv_scale4 * test_radius; float s = sample_directional_soft_shadow(directional_shadow_atlas, pssm_coord.xyz, tex_scale * directional_lights.data[i].soft_shadow_scale); - if (blend_index == 0) { + if (blend_count == 0) { shadow = s; } else { //blend @@ -1375,20 +1530,10 @@ void main() { } } -#undef BIAS_FUNC } else { //no soft shadows - float depth_z = -vertex.z; - vec4 pssm_coord; - vec3 light_dir = directional_lights.data[i].direction; - vec3 base_normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(light_dir, -normalize(normal_interp)))); - -#define BIAS_FUNC(m_var, m_idx) \ - m_var.xyz += light_dir * directional_lights.data[i].shadow_bias[m_idx]; \ - vec3 normal_bias = base_normal_bias * directional_lights.data[i].shadow_normal_bias[m_idx]; \ - normal_bias -= light_dir * dot(light_dir, normal_bias); \ - m_var.xyz += normal_bias; + float blur_factor; if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { vec4 v = vec4(vertex, 1.0); @@ -1396,63 +1541,77 @@ void main() { BIAS_FUNC(v, 0) pssm_coord = (directional_lights.data[i].shadow_matrix1 * v); + blur_factor = 1.0; } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { vec4 v = vec4(vertex, 1.0); BIAS_FUNC(v, 1) pssm_coord = (directional_lights.data[i].shadow_matrix2 * v); + // Adjust shadow blur with reference to the first split to reduce discrepancy between shadow splits. + blur_factor = directional_lights.data[i].shadow_split_offsets.x / directional_lights.data[i].shadow_split_offsets.y; } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { vec4 v = vec4(vertex, 1.0); BIAS_FUNC(v, 2) pssm_coord = (directional_lights.data[i].shadow_matrix3 * v); - + // Adjust shadow blur with reference to the first split to reduce discrepancy between shadow splits. + blur_factor = directional_lights.data[i].shadow_split_offsets.x / directional_lights.data[i].shadow_split_offsets.z; } else { vec4 v = vec4(vertex, 1.0); BIAS_FUNC(v, 3) pssm_coord = (directional_lights.data[i].shadow_matrix4 * v); + // Adjust shadow blur with reference to the first split to reduce discrepancy between shadow splits. + blur_factor = directional_lights.data[i].shadow_split_offsets.x / directional_lights.data[i].shadow_split_offsets.w; } pssm_coord /= pssm_coord.w; - shadow = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord); + shadow = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale * blur_factor, pssm_coord); if (directional_lights.data[i].blend_splits) { float pssm_blend; + float blur_factor2; if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { vec4 v = vec4(vertex, 1.0); BIAS_FUNC(v, 1) pssm_coord = (directional_lights.data[i].shadow_matrix2 * v); pssm_blend = smoothstep(0.0, directional_lights.data[i].shadow_split_offsets.x, depth_z); + // Adjust shadow blur with reference to the first split to reduce discrepancy between shadow splits. + blur_factor2 = directional_lights.data[i].shadow_split_offsets.x / directional_lights.data[i].shadow_split_offsets.y; } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { vec4 v = vec4(vertex, 1.0); BIAS_FUNC(v, 2) pssm_coord = (directional_lights.data[i].shadow_matrix3 * v); pssm_blend = smoothstep(directional_lights.data[i].shadow_split_offsets.x, directional_lights.data[i].shadow_split_offsets.y, depth_z); + // Adjust shadow blur with reference to the first split to reduce discrepancy between shadow splits. + blur_factor2 = directional_lights.data[i].shadow_split_offsets.x / directional_lights.data[i].shadow_split_offsets.z; } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { vec4 v = vec4(vertex, 1.0); BIAS_FUNC(v, 3) pssm_coord = (directional_lights.data[i].shadow_matrix4 * v); pssm_blend = smoothstep(directional_lights.data[i].shadow_split_offsets.y, directional_lights.data[i].shadow_split_offsets.z, depth_z); + // Adjust shadow blur with reference to the first split to reduce discrepancy between shadow splits. + blur_factor2 = directional_lights.data[i].shadow_split_offsets.x / directional_lights.data[i].shadow_split_offsets.w; } else { pssm_blend = 0.0; //if no blend, same coord will be used (divide by z will result in same value, and already cached) + blur_factor2 = 1.0; } pssm_coord /= pssm_coord.w; - float shadow2 = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord); + float shadow2 = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale * blur_factor2, pssm_coord); shadow = mix(shadow, shadow2, pssm_blend); } + } - shadow = mix(shadow, 1.0, smoothstep(directional_lights.data[i].fade_from, directional_lights.data[i].fade_to, vertex.z)); //done with negative values for performance + shadow = mix(shadow, 1.0, smoothstep(directional_lights.data[i].fade_from, directional_lights.data[i].fade_to, vertex.z)); //done with negative values for performance #undef BIAS_FUNC - } } // shadows if (i < 4) { @@ -1461,6 +1620,7 @@ void main() { shadow1 |= uint(clamp(shadow * 255.0, 0.0, 255.0)) << ((i - 4) * 8); } } +#endif // SHADOWS_DISABLED for (uint i = 0; i < 8; i++) { if (i >= scene_data.directional_light_count) { @@ -1474,7 +1634,7 @@ void main() { #ifdef LIGHT_TRANSMITTANCE_USED float transmittance_z = transmittance_depth; - if (directional_lights.data[i].shadow_enabled) { + if (directional_lights.data[i].shadow_opacity > 0.001) { float depth_z = -vertex.z; if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { @@ -1523,18 +1683,21 @@ void main() { #endif float shadow = 1.0; - +#ifndef SHADOWS_DISABLED if (i < 4) { shadow = float(shadow0 >> (i * 8) & 0xFF) / 255.0; } else { shadow = float(shadow1 >> ((i - 4) * 8) & 0xFF) / 255.0; } + shadow = shadow * directional_lights.data[i].shadow_opacity + 1.0 - directional_lights.data[i].shadow_opacity; +#endif + blur_shadow(shadow); float size_A = sc_use_light_soft_shadows ? directional_lights.data[i].size : 0.0; - light_compute(normal, directional_lights.data[i].direction, normalize(view), size_A, directional_lights.data[i].color * directional_lights.data[i].energy, shadow, f0, orms, 1.0, + light_compute(normal, directional_lights.data[i].direction, normalize(view), size_A, directional_lights.data[i].color * directional_lights.data[i].energy, shadow, f0, orms, 1.0, albedo, alpha, #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -1545,16 +1708,14 @@ void main() { transmittance_z, #endif #ifdef LIGHT_RIM_USED - rim, rim_tint, albedo, + rim, rim_tint, #endif #ifdef LIGHT_CLEARCOAT_USED - clearcoat, clearcoat_gloss, + clearcoat, clearcoat_roughness, normalize(normal_interp), #endif #ifdef LIGHT_ANISOTROPY_USED - binormal, tangent, anisotropy, -#endif -#ifdef USE_SHADOW_TO_OPACITY - alpha, + binormal, + tangent, anisotropy, #endif diffuse_light, specular_light); @@ -1608,7 +1769,7 @@ void main() { shadow = blur_shadow(shadow); - light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow, + light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow, albedo, alpha, #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -1620,17 +1781,13 @@ void main() { #ifdef LIGHT_RIM_USED rim, rim_tint, - albedo, #endif #ifdef LIGHT_CLEARCOAT_USED - clearcoat, clearcoat_gloss, + clearcoat, clearcoat_roughness, normalize(normal_interp), #endif #ifdef LIGHT_ANISOTROPY_USED tangent, binormal, anisotropy, #endif -#ifdef USE_SHADOW_TO_OPACITY - alpha, -#endif diffuse_light, specular_light); } } @@ -1684,7 +1841,7 @@ void main() { shadow = blur_shadow(shadow); - light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow, + light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow, albedo, alpha, #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -1696,16 +1853,13 @@ void main() { #ifdef LIGHT_RIM_USED rim, rim_tint, - albedo, #endif #ifdef LIGHT_CLEARCOAT_USED - clearcoat, clearcoat_gloss, + clearcoat, clearcoat_roughness, normalize(normal_interp), #endif #ifdef LIGHT_ANISOTROPY_USED - tangent, binormal, anisotropy, -#endif -#ifdef USE_SHADOW_TO_OPACITY - alpha, + tangent, + binormal, anisotropy, #endif diffuse_light, specular_light); } @@ -1723,7 +1877,7 @@ void main() { #ifdef USE_OPAQUE_PREPASS - if (alpha < opaque_prepass_threshold) { + if (alpha < scene_data.opaque_prepass_threshold) { discard; } @@ -1757,7 +1911,7 @@ void main() { vec3(0, -1, 0), vec3(0, 0, -1)); - vec3 cam_normal = mat3(scene_data.camera_matrix) * normalize(normal_interp); + vec3 cam_normal = mat3(scene_data.inv_view_matrix) * normalize(normal_interp); float closest_dist = -1e20; @@ -1769,7 +1923,11 @@ void main() { } } +#ifdef MOLTENVK_USED + imageStore(geom_facing_grid, grid_pos, uvec4(imageLoad(geom_facing_grid, grid_pos).r | facing_bits)); //store facing bits +#else imageAtomicOr(geom_facing_grid, grid_pos, facing_bits); //store facing bits +#endif if (length(emission) > 0.001) { float lumas[6]; @@ -1880,7 +2038,7 @@ void main() { //restore fog fog = vec4(unpackHalf2x16(fog_rg), unpackHalf2x16(fog_ba)); -#ifdef MODE_MULTIPLE_RENDER_TARGETS +#ifdef MODE_SEPARATE_SPECULAR #ifdef MODE_UNSHADED diffuse_buffer = vec4(albedo.rgb, 0.0); @@ -1898,20 +2056,38 @@ void main() { diffuse_buffer.rgb = mix(diffuse_buffer.rgb, fog.rgb, fog.a); specular_buffer.rgb = mix(specular_buffer.rgb, vec3(0.0), fog.a); -#else //MODE_MULTIPLE_RENDER_TARGETS +#else //MODE_SEPARATE_SPECULAR #ifdef MODE_UNSHADED frag_color = vec4(albedo, alpha); #else frag_color = vec4(emission + ambient_light + diffuse_light + specular_light, alpha); - //frag_color = vec4(1.0); +//frag_color = vec4(1.0); #endif //USE_NO_SHADING // Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky. frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a); - ; -#endif //MODE_MULTIPLE_RENDER_TARGETS +#endif //MODE_SEPARATE_SPECULAR #endif //MODE_RENDER_DEPTH +#ifdef MOTION_VECTORS + vec2 position_clip = (screen_position.xy / screen_position.w) - scene_data.taa_jitter; + vec2 prev_position_clip = (prev_screen_position.xy / prev_screen_position.w) - scene_data_block.prev_data.taa_jitter; + + vec2 position_uv = position_clip * vec2(0.5, 0.5); + vec2 prev_position_uv = prev_position_clip * vec2(0.5, 0.5); + + motion_vector = position_uv - prev_position_uv; +#endif +} + +void main() { +#ifdef MODE_DUAL_PARABOLOID + + if (dp_clip > 0.0) + discard; +#endif + + fragment_shader(scene_data_block.data); } diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_clustered_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_clustered_inc.glsl index b53bf6a6d4..f0717294ef 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_clustered_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_clustered_inc.glsl @@ -2,6 +2,7 @@ #define ROUGHNESS_MAX_LOD 5 #define MAX_VOXEL_GI_INSTANCES 8 +#define MAX_VIEWS 2 #if defined(has_GL_KHR_shader_subgroup_ballot) && defined(has_GL_KHR_shader_subgroup_arithmetic) @@ -12,16 +13,20 @@ #endif +#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview) +#extension GL_EXT_multiview : enable +#endif + #include "cluster_data_inc.glsl" #include "decal_data_inc.glsl" -#if !defined(MODE_RENDER_DEPTH) || defined(MODE_RENDER_MATERIAL) || defined(MODE_RENDER_SDF) || defined(MODE_RENDER_NORMAL_ROUGHNESS) || defined(MODE_RENDER_VOXEL_GI) || defined(TANGENT_USED) || defined(NORMAL_MAP_USED) +#if !defined(MODE_RENDER_DEPTH) || defined(MODE_RENDER_MATERIAL) || defined(MODE_RENDER_SDF) || defined(MODE_RENDER_NORMAL_ROUGHNESS) || defined(MODE_RENDER_VOXEL_GI) || defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) #ifndef NORMAL_USED #define NORMAL_USED #endif #endif -layout(push_constant, binding = 0, std430) uniform DrawCall { +layout(push_constant, std430) uniform DrawCall { uint instance_index; uint uv_offset; uint pad0; @@ -68,9 +73,13 @@ layout(set = 0, binding = 4) uniform sampler light_projector_sampler; #define INSTANCE_FLAGS_MULTIMESH_HAS_COLOR (1 << 14) #define INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA (1 << 15) #define INSTANCE_FLAGS_PARTICLE_TRAIL_SHIFT 16 +#define INSTANCE_FLAGS_FADE_SHIFT 24 //3 bits of stride #define INSTANCE_FLAGS_PARTICLE_TRAIL_MASK 0xFF +#define SCREEN_SPACE_EFFECTS_FLAGS_USE_SSAO 1 +#define SCREEN_SPACE_EFFECTS_FLAGS_USE_SSIL 2 + layout(set = 0, binding = 5, std430) restrict readonly buffer OmniLights { LightData data[]; } @@ -120,10 +129,10 @@ layout(set = 0, binding = 13, std430) restrict readonly buffer Decals { } decals; -layout(set = 0, binding = 14, std430) restrict readonly buffer GlobalVariableData { +layout(set = 0, binding = 14, std430) restrict readonly buffer GlobalShaderUniformData { vec4 data[]; } -global_variables; +global_shader_uniforms; struct SDFVoxelGICascadeData { vec3 position; @@ -162,12 +171,16 @@ sdfgi; /* Set 1: Render Pass (changes per render pass) */ -layout(set = 1, binding = 0, std140) uniform SceneData { +struct SceneData { mat4 projection_matrix; mat4 inv_projection_matrix; + mat4 inv_view_matrix; + mat4 view_matrix; - mat4 camera_matrix; - mat4 inv_camera_matrix; + // only used for multiview + mat4 projection_matrix_view[MAX_VIEWS]; + mat4 inv_projection_matrix_view[MAX_VIEWS]; + vec4 eye_offset[MAX_VIEWS]; vec2 viewport_size; vec2 screen_pixel_size; @@ -200,16 +213,15 @@ layout(set = 1, binding = 0, std140) uniform SceneData { float z_far; float z_near; - bool ssao_enabled; + uint ss_effects_flags; float ssao_light_affect; float ssao_ao_affect; bool roughness_limiter_enabled; float roughness_limiter_amount; float roughness_limiter_limit; - uvec2 roughness_limiter_pad; - - vec4 ao_color; + float opaque_prepass_threshold; + uint roughness_limiter_pad; mat4 sdf_to_bounds; @@ -238,11 +250,19 @@ layout(set = 1, binding = 0, std140) uniform SceneData { float reflection_multiplier; // one normally, zero when rendering reflections bool pancake_shadows; + vec2 taa_jitter; + uvec2 pad2; +}; + +layout(set = 1, binding = 0, std140) uniform SceneDataBlock { + SceneData data; + SceneData prev_data; } -scene_data; +scene_data_block; struct InstanceData { mat4 transform; + mat4 prev_transform; uint flags; uint instance_uniforms_ofs; //base offset in global buffer for instance variables uint gi_offset; //GI information when using lightmapping (VCT or lightmap index) @@ -297,27 +317,29 @@ layout(r32ui, set = 1, binding = 12) uniform restrict uimage3D geom_facing_grid; layout(set = 1, binding = 9) uniform texture2D depth_buffer; layout(set = 1, binding = 10) uniform texture2D color_buffer; +#ifdef USE_MULTIVIEW +layout(set = 1, binding = 11) uniform texture2DArray normal_roughness_buffer; +layout(set = 1, binding = 13) uniform texture2DArray ambient_buffer; +layout(set = 1, binding = 14) uniform texture2DArray reflection_buffer; +#else // USE_MULTIVIEW layout(set = 1, binding = 11) uniform texture2D normal_roughness_buffer; -layout(set = 1, binding = 12) uniform texture2D ao_buffer; layout(set = 1, binding = 13) uniform texture2D ambient_buffer; layout(set = 1, binding = 14) uniform texture2D reflection_buffer; +#endif +layout(set = 1, binding = 12) uniform texture2D ao_buffer; layout(set = 1, binding = 15) uniform texture2DArray sdfgi_lightprobe_texture; layout(set = 1, binding = 16) uniform texture3D sdfgi_occlusion_cascades; struct VoxelGIData { - mat4 xform; - vec3 bounds; - float dynamic_range; + mat4 xform; // 64 - 64 - float bias; - float normal_bias; - bool blend_ambient; - uint texture_slot; + vec3 bounds; // 12 - 76 + float dynamic_range; // 4 - 80 - float anisotropy_strength; - float ambient_occlusion; - float ambient_occlusion_size; - uint mipmaps; + float bias; // 4 - 84 + float normal_bias; // 4 - 88 + bool blend_ambient; // 4 - 92 + uint mipmaps; // 4 - 96 }; layout(set = 1, binding = 17, std140) uniform VoxelGIs { @@ -327,6 +349,8 @@ voxel_gi_instances; layout(set = 1, binding = 18) uniform texture3D volumetric_fog_texture; +layout(set = 1, binding = 19) uniform texture2D ssil_buffer; + #endif /* Set 2 Skeleton & Instancing (can change per item) */ diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_lights_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_lights_inc.glsl index f3db4abe3b..7299bb0576 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_lights_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_lights_inc.glsl @@ -1,55 +1,29 @@ // Functions related to lighting -// This returns the G_GGX function divided by 2 cos_theta_m, where in practice cos_theta_m is either N.L or N.V. -// We're dividing this factor off because the overall term we'll end up looks like -// (see, for example, the first unnumbered equation in B. Burley, "Physically Based Shading at Disney", SIGGRAPH 2012): -// -// F(L.V) D(N.H) G(N.L) G(N.V) / (4 N.L N.V) -// -// We're basically regouping this as -// -// F(L.V) D(N.H) [G(N.L)/(2 N.L)] [G(N.V) / (2 N.V)] -// -// and thus, this function implements the [G(N.m)/(2 N.m)] part with m = L or V. -// -// The contents of the D and G (G1) functions (GGX) are taken from -// E. Heitz, "Understanding the Masking-Shadowing Function in Microfacet-Based BRDFs", J. Comp. Graph. Tech. 3 (2) (2014). -// Eqns 71-72 and 85-86 (see also Eqns 43 and 80). - -float G_GGX_2cos(float cos_theta_m, float alpha) { - // Schlick's approximation - // C. Schlick, "An Inexpensive BRDF Model for Physically-based Rendering", Computer Graphics Forum. 13 (3): 233 (1994) - // Eq. (19), although see Heitz (2014) the about the problems with his derivation. - // It nevertheless approximates GGX well with k = alpha/2. - float k = 0.5 * alpha; - return 0.5 / (cos_theta_m * (1.0 - k) + k); - - // float cos2 = cos_theta_m * cos_theta_m; - // float sin2 = (1.0 - cos2); - // return 1.0 / (cos_theta_m + sqrt(cos2 + alpha * alpha * sin2)); -} - float D_GGX(float cos_theta_m, float alpha) { - float alpha2 = alpha * alpha; - float d = 1.0 + (alpha2 - 1.0) * cos_theta_m * cos_theta_m; - return alpha2 / (M_PI * d * d); + float a = cos_theta_m * alpha; + float k = alpha / (1.0 - cos_theta_m * cos_theta_m + a * a); + return k * k * (1.0 / M_PI); } -float G_GGX_anisotropic_2cos(float cos_theta_m, float alpha_x, float alpha_y, float cos_phi, float sin_phi) { - float cos2 = cos_theta_m * cos_theta_m; - float sin2 = (1.0 - cos2); - float s_x = alpha_x * cos_phi; - float s_y = alpha_y * sin_phi; - return 1.0 / max(cos_theta_m + sqrt(cos2 + (s_x * s_x + s_y * s_y) * sin2), 0.001); +// From Earl Hammon, Jr. "PBR Diffuse Lighting for GGX+Smith Microsurfaces" https://www.gdcvault.com/play/1024478/PBR-Diffuse-Lighting-for-GGX +float V_GGX(float NdotL, float NdotV, float alpha) { + return 0.5 / mix(2.0 * NdotL * NdotV, NdotL + NdotV, alpha); } float D_GGX_anisotropic(float cos_theta_m, float alpha_x, float alpha_y, float cos_phi, float sin_phi) { - float cos2 = cos_theta_m * cos_theta_m; - float sin2 = (1.0 - cos2); - float r_x = cos_phi / alpha_x; - float r_y = sin_phi / alpha_y; - float d = cos2 + sin2 * (r_x * r_x + r_y * r_y); - return 1.0 / max(M_PI * alpha_x * alpha_y * d * d, 0.001); + float alpha2 = alpha_x * alpha_y; + highp vec3 v = vec3(alpha_y * cos_phi, alpha_x * sin_phi, alpha2 * cos_theta_m); + highp float v2 = dot(v, v); + float w2 = alpha2 / v2; + float D = alpha2 * w2 * w2 * (1.0 / M_PI); + return D; +} + +float V_GGX_anisotropic(float alpha_x, float alpha_y, float TdotV, float TdotL, float BdotV, float BdotL, float NdotV, float NdotL) { + float Lambda_V = NdotL * length(vec3(alpha_x * TdotV, alpha_y * BdotV, NdotV)); + float Lambda_L = NdotV * length(vec3(alpha_x * TdotL, alpha_y * BdotL, NdotL)); + return 0.5 / (Lambda_V + Lambda_L); } float SchlickFresnel(float u) { @@ -58,14 +32,6 @@ float SchlickFresnel(float u) { return m2 * m2 * m; // pow(m,5) } -float GTR1(float NdotH, float a) { - if (a >= 1.0) - return 1.0 / M_PI; - float a2 = a * a; - float t = 1.0 + (a2 - 1.0) * NdotH * NdotH; - return (a2 - 1.0) / (M_PI * log(a2) * t); -} - vec3 F0(float metallic, float specular, vec3 albedo) { float dielectric = 0.16 * specular * specular; // use albedo * metallic as colored specular reflectance at 0 angle for metallic materials; @@ -73,7 +39,7 @@ vec3 F0(float metallic, float specular, vec3 albedo) { return mix(vec3(dielectric), albedo, vec3(metallic)); } -void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float attenuation, vec3 f0, uint orms, float specular_amount, +void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float attenuation, vec3 f0, uint orms, float specular_amount, vec3 albedo, inout float alpha, #ifdef LIGHT_BACKLIGHT_USED vec3 backlight, #endif @@ -84,19 +50,21 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float atte float transmittance_z, #endif #ifdef LIGHT_RIM_USED - float rim, float rim_tint, vec3 rim_color, + float rim, float rim_tint, #endif #ifdef LIGHT_CLEARCOAT_USED - float clearcoat, float clearcoat_gloss, + float clearcoat, float clearcoat_roughness, vec3 vertex_normal, #endif #ifdef LIGHT_ANISOTROPY_USED vec3 B, vec3 T, float anisotropy, #endif -#ifdef USE_SHADOW_TO_OPACITY - inout float alpha, -#endif inout vec3 diffuse_light, inout vec3 specular_light) { + vec4 orms_unpacked = unpackUnorm4x8(orms); + + float roughness = orms_unpacked.y; + float metallic = orms_unpacked.z; + #if defined(LIGHT_CODE_USED) // light is written by the light shader @@ -111,13 +79,13 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float atte float NdotL = min(A + dot(N, L), 1.0); float cNdotL = max(NdotL, 0.0); // clamped NdotL float NdotV = dot(N, V); - float cNdotV = max(NdotV, 0.0); + float cNdotV = max(NdotV, 1e-4); -#if defined(DIFFUSE_BURLEY) || defined(SPECULAR_BLINN) || defined(SPECULAR_SCHLICK_GGX) || defined(LIGHT_CLEARCOAT_USED) +#if defined(DIFFUSE_BURLEY) || defined(SPECULAR_SCHLICK_GGX) || defined(LIGHT_CLEARCOAT_USED) vec3 H = normalize(V + L); #endif -#if defined(SPECULAR_BLINN) || defined(SPECULAR_SCHLICK_GGX) || defined(LIGHT_CLEARCOAT_USED) +#if defined(SPECULAR_SCHLICK_GGX) float cNdotH = clamp(A + dot(N, H), 0.0, 1.0); #endif @@ -125,9 +93,7 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float atte float cLdotH = clamp(A + dot(L, H), 0.0, 1.0); #endif - float metallic = unpackUnorm4x8(orms).z; if (metallic < 1.0) { - float roughness = unpackUnorm4x8(orms).y; float diffuse_brdf_NL; // BRDF times N.L for calculating diffuse radiance #if defined(DIFFUSE_LAMBERT_WRAP) @@ -168,7 +134,7 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float atte #if defined(LIGHT_RIM_USED) float rim_light = pow(max(0.0, 1.0 - cNdotV), max(0.0, (1.0 - roughness) * 16.0)); - diffuse_light += rim_light * rim * mix(vec3(1.0), rim_color, rim_tint) * light_color; + diffuse_light += rim_light * rim * mix(vec3(1.0), albedo, rim_tint) * light_color; #endif #ifdef LIGHT_TRANSMITTANCE_USED @@ -179,11 +145,11 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float atte float d = scale * abs(transmittance_z); float dd = -d * d; vec3 profile = vec3(0.233, 0.455, 0.649) * exp(dd / 0.0064) + - vec3(0.1, 0.336, 0.344) * exp(dd / 0.0484) + - vec3(0.118, 0.198, 0.0) * exp(dd / 0.187) + - vec3(0.113, 0.007, 0.007) * exp(dd / 0.567) + - vec3(0.358, 0.004, 0.0) * exp(dd / 1.99) + - vec3(0.078, 0.0, 0.0) * exp(dd / 7.41); + vec3(0.1, 0.336, 0.344) * exp(dd / 0.0484) + + vec3(0.118, 0.198, 0.0) * exp(dd / 0.187) + + vec3(0.113, 0.007, 0.007) * exp(dd / 0.567) + + vec3(0.358, 0.004, 0.0) * exp(dd / 1.99) + + vec3(0.078, 0.0, 0.0) * exp(dd / 7.41); diffuse_light += profile * transmittance_color.a * light_color * clamp(transmittance_boost - NdotL, 0.0, 1.0) * (1.0 / M_PI); #else @@ -199,31 +165,11 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float atte #endif //LIGHT_TRANSMITTANCE_USED } - float roughness = unpackUnorm4x8(orms).y; if (roughness > 0.0) { // FIXME: roughness == 0 should not disable specular light entirely // D -#if defined(SPECULAR_BLINN) - - //normalized blinn - float shininess = exp2(15.0 * (1.0 - roughness) + 1.0) * 0.25; - float blinn = pow(cNdotH, shininess); - blinn *= (shininess + 2.0) * (1.0 / (8.0 * M_PI)); - - specular_light += light_color * attenuation * specular_amount * blinn * f0 * unpackUnorm4x8(orms).w; - -#elif defined(SPECULAR_PHONG) - - vec3 R = normalize(-reflect(L, N)); - float cRdotV = clamp(A + dot(R, V), 0.0, 1.0); - float shininess = exp2(15.0 * (1.0 - roughness) + 1.0) * 0.25; - float phong = pow(cRdotV, shininess); - phong *= (shininess + 1.0) * (1.0 / (8.0 * M_PI)); - - specular_light += light_color * attenuation * specular_amount * phong * f0 * unpackUnorm4x8(orms).w; - -#elif defined(SPECULAR_TOON) +#if defined(SPECULAR_TOON) vec3 R = normalize(-reflect(L, N)); float RdotV = dot(R, V); @@ -237,26 +183,26 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float atte #elif defined(SPECULAR_SCHLICK_GGX) // shlick+ggx as default - + float alpha_ggx = roughness * roughness; #if defined(LIGHT_ANISOTROPY_USED) - float alpha_ggx = roughness * roughness; float aspect = sqrt(1.0 - anisotropy * 0.9); float ax = alpha_ggx / aspect; float ay = alpha_ggx * aspect; float XdotH = dot(T, H); float YdotH = dot(B, H); float D = D_GGX_anisotropic(cNdotH, ax, ay, XdotH, YdotH); - float G = G_GGX_anisotropic_2cos(cNdotL, ax, ay, XdotH, YdotH) * G_GGX_anisotropic_2cos(cNdotV, ax, ay, XdotH, YdotH); - -#else - float alpha_ggx = roughness * roughness; + float G = V_GGX_anisotropic(ax, ay, dot(T, V), dot(T, L), dot(B, V), dot(B, L), cNdotV, cNdotL); +#else // LIGHT_ANISOTROPY_USED float D = D_GGX(cNdotH, alpha_ggx); - float G = G_GGX_2cos(cNdotL, alpha_ggx) * G_GGX_2cos(cNdotV, alpha_ggx); -#endif - // F + float G = V_GGX(cNdotL, cNdotV, alpha_ggx); +#endif // LIGHT_ANISOTROPY_USED + // F float cLdotH5 = SchlickFresnel(cLdotH); - vec3 F = mix(vec3(cLdotH5), vec3(1.0), f0); + // Calculate Fresnel using specular occlusion term from Filament: + // https://google.github.io/filament/Filament.html#lighting/occlusion/specularocclusion + float f90 = clamp(dot(f0, vec3(50.0 * 0.33)), 0.0, 1.0); + vec3 F = f0 + (f90 - f0) * cLdotH5; vec3 specular_brdf_NL = cNdotL * D * F * G; @@ -264,18 +210,23 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float atte #endif #if defined(LIGHT_CLEARCOAT_USED) + // Clearcoat ignores normal_map, use vertex normal instead + float ccNdotL = max(min(A + dot(vertex_normal, L), 1.0), 0.0); + float ccNdotH = clamp(A + dot(vertex_normal, H), 0.0, 1.0); + float ccNdotV = max(dot(vertex_normal, V), 1e-4); #if !defined(SPECULAR_SCHLICK_GGX) float cLdotH5 = SchlickFresnel(cLdotH); #endif - float Dr = GTR1(cNdotH, mix(.1, .001, clearcoat_gloss)); + float Dr = D_GGX(ccNdotH, mix(0.001, 0.1, clearcoat_roughness)); + float Gr = 0.25 / (cLdotH * cLdotH); float Fr = mix(.04, 1.0, cLdotH5); - float Gr = G_GGX_2cos(cNdotL, .25) * G_GGX_2cos(cNdotV, .25); - - float clearcoat_specular_brdf_NL = 0.25 * clearcoat * Gr * Fr * Dr * cNdotL; + float clearcoat_specular_brdf_NL = clearcoat * Gr * Fr * Dr * cNdotL; specular_light += clearcoat_specular_brdf_NL * light_color * attenuation * specular_amount; -#endif + // TODO: Clearcoat adds light to the scene right now (it is non-energy conserving), both diffuse and specular need to be scaled by (1.0 - FR) + // but to do so we need to rearrange this entire function +#endif // LIGHT_CLEARCOAT_USED } #ifdef USE_SHADOW_TO_OPACITY @@ -285,7 +236,7 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, float atte #endif //defined(LIGHT_CODE_USED) } -#ifndef USE_NO_SHADOWS +#ifndef SHADOWS_DISABLED // Interleaved Gradient Noise // https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare @@ -299,7 +250,7 @@ float sample_directional_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, ve float depth = coord.z; //if only one sample is taken, take it from the center - if (sc_directional_soft_shadow_samples == 1) { + if (sc_directional_soft_shadow_samples == 0) { return textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos, depth, 1.0)); } @@ -314,7 +265,7 @@ float sample_directional_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, ve float avg = 0.0; for (uint i = 0; i < sc_directional_soft_shadow_samples; i++) { - avg += textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos + shadow_pixel_size * (disk_rotation * scene_data.directional_soft_shadow_kernel[i].xy), depth, 1.0)); + avg += textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos + shadow_pixel_size * (disk_rotation * scene_data_block.data.directional_soft_shadow_kernel[i].xy), depth, 1.0)); } return avg * (1.0 / float(sc_directional_soft_shadow_samples)); @@ -325,7 +276,7 @@ float sample_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, vec3 coord) { float depth = coord.z; //if only one sample is taken, take it from the center - if (sc_soft_shadow_samples == 1) { + if (sc_soft_shadow_samples == 0) { return textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos, depth, 1.0)); } @@ -340,7 +291,7 @@ float sample_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, vec3 coord) { float avg = 0.0; for (uint i = 0; i < sc_soft_shadow_samples; i++) { - avg += textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos + shadow_pixel_size * (disk_rotation * scene_data.soft_shadow_kernel[i].xy), depth, 1.0)); + avg += textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos + shadow_pixel_size * (disk_rotation * scene_data_block.data.soft_shadow_kernel[i].xy), depth, 1.0)); } return avg * (1.0 / float(sc_soft_shadow_samples)); @@ -348,7 +299,7 @@ float sample_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, vec3 coord) { float sample_omni_pcf_shadow(texture2D shadow, float blur_scale, vec2 coord, vec4 uv_rect, vec2 flip_offset, float depth) { //if only one sample is taken, take it from the center - if (sc_soft_shadow_samples == 1) { + if (sc_soft_shadow_samples == 0) { vec2 pos = coord * 0.5 + 0.5; pos = uv_rect.xy + pos * uv_rect.zw; return textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos, depth, 1.0)); @@ -363,10 +314,10 @@ float sample_omni_pcf_shadow(texture2D shadow, float blur_scale, vec2 coord, vec } float avg = 0.0; - vec2 offset_scale = blur_scale * 2.0 * scene_data.shadow_atlas_pixel_size / uv_rect.zw; + vec2 offset_scale = blur_scale * 2.0 * scene_data_block.data.shadow_atlas_pixel_size / uv_rect.zw; for (uint i = 0; i < sc_soft_shadow_samples; i++) { - vec2 offset = offset_scale * (disk_rotation * scene_data.soft_shadow_kernel[i].xy); + vec2 offset = offset_scale * (disk_rotation * scene_data_block.data.soft_shadow_kernel[i].xy); vec2 sample_coord = coord + offset; float sample_coord_length_sqaured = dot(sample_coord, sample_coord); @@ -403,7 +354,7 @@ float sample_directional_soft_shadow(texture2D shadow, vec3 pssm_coord, vec2 tex } for (uint i = 0; i < sc_directional_penumbra_shadow_samples; i++) { - vec2 suv = pssm_coord.xy + (disk_rotation * scene_data.directional_penumbra_shadow_kernel[i].xy) * tex_scale; + vec2 suv = pssm_coord.xy + (disk_rotation * scene_data_block.data.directional_penumbra_shadow_kernel[i].xy) * tex_scale; float d = textureLod(sampler2D(shadow, material_samplers[SAMPLER_LINEAR_CLAMP]), suv, 0.0).r; if (d < pssm_coord.z) { blocker_average += d; @@ -419,7 +370,7 @@ float sample_directional_soft_shadow(texture2D shadow, vec3 pssm_coord, vec2 tex float s = 0.0; for (uint i = 0; i < sc_directional_penumbra_shadow_samples; i++) { - vec2 suv = pssm_coord.xy + (disk_rotation * scene_data.directional_penumbra_shadow_kernel[i].xy) * tex_scale; + vec2 suv = pssm_coord.xy + (disk_rotation * scene_data_block.data.directional_penumbra_shadow_kernel[i].xy) * tex_scale; s += textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(suv, pssm_coord.z, 1.0)); } @@ -431,7 +382,7 @@ float sample_directional_soft_shadow(texture2D shadow, vec3 pssm_coord, vec2 tex } } -#endif //USE_NO_SHADOWS +#endif // SHADOWS_DISABLED float get_omni_attenuation(float distance, float inv_range, float decay) { float nd = distance * inv_range; @@ -443,10 +394,10 @@ float get_omni_attenuation(float distance, float inv_range, float decay) { } float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) { -#ifndef USE_NO_SHADOWS - if (omni_lights.data[idx].shadow_enabled) { +#ifndef SHADOWS_DISABLED + if (omni_lights.data[idx].shadow_opacity > 0.001) { // there is a shadowmap - vec2 texel_size = scene_data.shadow_atlas_pixel_size; + vec2 texel_size = scene_data_block.data.shadow_atlas_pixel_size; vec4 base_uv_rect = omni_lights.data[idx].atlas_rect; base_uv_rect.xy += texel_size; base_uv_rect.zw -= texel_size * 2.0; @@ -490,7 +441,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) { bitangent *= omni_lights.data[idx].soft_shadow_size * omni_lights.data[idx].soft_shadow_scale; for (uint i = 0; i < sc_penumbra_shadow_samples; i++) { - vec2 disk = disk_rotation * scene_data.penumbra_shadow_kernel[i].xy; + vec2 disk = disk_rotation * scene_data_block.data.penumbra_shadow_kernel[i].xy; vec3 pos = local_vert + tangent * disk.x + bitangent * disk.y; @@ -526,7 +477,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) { shadow = 0.0; for (uint i = 0; i < sc_penumbra_shadow_samples; i++) { - vec2 disk = disk_rotation * scene_data.penumbra_shadow_kernel[i].xy; + vec2 disk = disk_rotation * scene_data_block.data.penumbra_shadow_kernel[i].xy; vec3 pos = local_vert + tangent * disk.x + bitangent * disk.y; pos = normalize(pos); @@ -547,6 +498,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) { } shadow /= float(sc_penumbra_shadow_samples); + shadow = mix(1.0, shadow, omni_lights.data[idx].shadow_opacity); } else { //no blockers found, so no shadow @@ -565,7 +517,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) { vec2 pos = shadow_sample.xy / shadow_sample.z; float depth = shadow_len - omni_lights.data[idx].shadow_bias; depth *= omni_lights.data[idx].inv_radius; - shadow = sample_omni_pcf_shadow(shadow_atlas, omni_lights.data[idx].soft_shadow_scale / shadow_sample.z, pos, uv_rect, flip_offset, depth); + shadow = mix(1.0, sample_omni_pcf_shadow(shadow_atlas, omni_lights.data[idx].soft_shadow_scale / shadow_sample.z, pos, uv_rect, flip_offset, depth), omni_lights.data[idx].shadow_opacity); } return shadow; @@ -575,7 +527,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) { return 1.0; } -void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, vec3 f0, uint orms, float shadow, +void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, vec3 f0, uint orms, float shadow, vec3 albedo, inout float alpha, #ifdef LIGHT_BACKLIGHT_USED vec3 backlight, #endif @@ -585,17 +537,14 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v float transmittance_boost, #endif #ifdef LIGHT_RIM_USED - float rim, float rim_tint, vec3 rim_color, + float rim, float rim_tint, #endif #ifdef LIGHT_CLEARCOAT_USED - float clearcoat, float clearcoat_gloss, + float clearcoat, float clearcoat_roughness, vec3 vertex_normal, #endif #ifdef LIGHT_ANISOTROPY_USED vec3 binormal, vec3 tangent, float anisotropy, #endif -#ifdef USE_SHADOW_TO_OPACITY - inout float alpha, -#endif inout vec3 diffuse_light, inout vec3 specular_light) { vec3 light_rel_vec = omni_lights.data[idx].position - vertex; float light_length = length(light_rel_vec); @@ -634,7 +583,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v splane.xy = splane.xy * 0.5 + 0.5; splane.z = shadow_len * omni_lights.data[idx].inv_radius; splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; - // splane.xy = clamp(splane.xy,clamp_rect.xy + scene_data.shadow_atlas_pixel_size,clamp_rect.xy + clamp_rect.zw - scene_data.shadow_atlas_pixel_size ); + // splane.xy = clamp(splane.xy,clamp_rect.xy + scene_data_block.data.shadow_atlas_pixel_size,clamp_rect.xy + clamp_rect.zw - scene_data_block.data.shadow_atlas_pixel_size ); splane.w = 1.0; //needed? i think it should be 1 already float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r; @@ -701,7 +650,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v light_attenuation *= shadow; - light_compute(normal, normalize(light_rel_vec), eye_vec, size_A, color, light_attenuation, f0, orms, omni_lights.data[idx].specular_amount, + light_compute(normal, normalize(light_rel_vec), eye_vec, size_A, color, light_attenuation, f0, orms, omni_lights.data[idx].specular_amount, albedo, alpha, #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -712,24 +661,21 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v transmittance_z, #endif #ifdef LIGHT_RIM_USED - rim * omni_attenuation, rim_tint, rim_color, + rim * omni_attenuation, rim_tint, #endif #ifdef LIGHT_CLEARCOAT_USED - clearcoat, clearcoat_gloss, + clearcoat, clearcoat_roughness, vertex_normal, #endif #ifdef LIGHT_ANISOTROPY_USED binormal, tangent, anisotropy, #endif -#ifdef USE_SHADOW_TO_OPACITY - alpha, -#endif diffuse_light, specular_light); } float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) { -#ifndef USE_NO_SHADOWS - if (spot_lights.data[idx].shadow_enabled) { +#ifndef SHADOWS_DISABLED + if (spot_lights.data[idx].shadow_opacity > 0.001) { vec3 light_rel_vec = spot_lights.data[idx].position - vertex; float light_length = length(light_rel_vec); vec3 spot_dir = spot_lights.data[idx].direction; @@ -767,7 +713,7 @@ float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) { float uv_size = spot_lights.data[idx].soft_shadow_size * z_norm * spot_lights.data[idx].soft_shadow_scale; vec2 clamp_max = spot_lights.data[idx].atlas_rect.xy + spot_lights.data[idx].atlas_rect.zw; for (uint i = 0; i < sc_penumbra_shadow_samples; i++) { - vec2 suv = shadow_uv + (disk_rotation * scene_data.penumbra_shadow_kernel[i].xy) * uv_size; + vec2 suv = shadow_uv + (disk_rotation * scene_data_block.data.penumbra_shadow_kernel[i].xy) * uv_size; suv = clamp(suv, spot_lights.data[idx].atlas_rect.xy, clamp_max); float d = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), suv, 0.0).r; if (d < splane.z) { @@ -784,12 +730,13 @@ float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) { shadow = 0.0; for (uint i = 0; i < sc_penumbra_shadow_samples; i++) { - vec2 suv = shadow_uv + (disk_rotation * scene_data.penumbra_shadow_kernel[i].xy) * uv_size; + vec2 suv = shadow_uv + (disk_rotation * scene_data_block.data.penumbra_shadow_kernel[i].xy) * uv_size; suv = clamp(suv, spot_lights.data[idx].atlas_rect.xy, clamp_max); shadow += textureProj(sampler2DShadow(shadow_atlas, shadow_sampler), vec4(suv, splane.z, 1.0)); } shadow /= float(sc_penumbra_shadow_samples); + shadow = mix(1.0, shadow, spot_lights.data[idx].shadow_opacity); } else { //no blockers found, so no shadow @@ -798,13 +745,13 @@ float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) { } else { //hard shadow vec3 shadow_uv = vec3(splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy, splane.z); - shadow = sample_pcf_shadow(shadow_atlas, spot_lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, shadow_uv); + shadow = mix(1.0, sample_pcf_shadow(shadow_atlas, spot_lights.data[idx].soft_shadow_scale * scene_data_block.data.shadow_atlas_pixel_size, shadow_uv), spot_lights.data[idx].shadow_opacity); } return shadow; } -#endif //USE_NO_SHADOWS +#endif // SHADOWS_DISABLED return 1.0; } @@ -821,7 +768,7 @@ vec2 normal_to_panorama(vec3 n) { return panorama_coords; } -void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, vec3 f0, uint orms, float shadow, +void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 vertex_ddx, vec3 vertex_ddy, vec3 f0, uint orms, float shadow, vec3 albedo, inout float alpha, #ifdef LIGHT_BACKLIGHT_USED vec3 backlight, #endif @@ -831,17 +778,14 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v float transmittance_boost, #endif #ifdef LIGHT_RIM_USED - float rim, float rim_tint, vec3 rim_color, + float rim, float rim_tint, #endif #ifdef LIGHT_CLEARCOAT_USED - float clearcoat, float clearcoat_gloss, + float clearcoat, float clearcoat_roughness, vec3 vertex_normal, #endif #ifdef LIGHT_ANISOTROPY_USED vec3 binormal, vec3 tangent, float anisotropy, #endif -#ifdef USE_SHADOW_TO_OPACITY - inout float alpha, -#endif inout vec3 diffuse_light, inout vec3 specular_light) { vec3 light_rel_vec = spot_lights.data[idx].position - vertex; @@ -887,17 +831,17 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v vec4 splane = (spot_lights.data[idx].shadow_matrix * vec4(vertex, 1.0)); splane /= splane.w; - vec2 proj_uv = normal_to_panorama(splane.xyz) * spot_lights.data[idx].projector_rect.zw; + vec2 proj_uv = splane.xy * spot_lights.data[idx].projector_rect.zw; if (sc_projector_use_mipmaps) { //ensure we have proper mipmaps vec4 splane_ddx = (spot_lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddx, 1.0)); splane_ddx /= splane_ddx.w; - vec2 proj_uv_ddx = normal_to_panorama(splane_ddx.xyz) * spot_lights.data[idx].projector_rect.zw - proj_uv; + vec2 proj_uv_ddx = splane_ddx.xy * spot_lights.data[idx].projector_rect.zw - proj_uv; vec4 splane_ddy = (spot_lights.data[idx].shadow_matrix * vec4(vertex + vertex_ddy, 1.0)); splane_ddy /= splane_ddy.w; - vec2 proj_uv_ddy = normal_to_panorama(splane_ddy.xyz) * spot_lights.data[idx].projector_rect.zw - proj_uv; + vec2 proj_uv_ddy = splane_ddy.xy * spot_lights.data[idx].projector_rect.zw - proj_uv; vec4 proj = textureGrad(sampler2D(decal_atlas_srgb, light_projector_sampler), proj_uv + spot_lights.data[idx].projector_rect.xy, proj_uv_ddx, proj_uv_ddy); color *= proj.rgb * proj.a; @@ -908,7 +852,7 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v } light_attenuation *= shadow; - light_compute(normal, normalize(light_rel_vec), eye_vec, size_A, color, light_attenuation, f0, orms, spot_lights.data[idx].specular_amount, + light_compute(normal, normalize(light_rel_vec), eye_vec, size_A, color, light_attenuation, f0, orms, spot_lights.data[idx].specular_amount, albedo, alpha, #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -919,21 +863,18 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v transmittance_z, #endif #ifdef LIGHT_RIM_USED - rim * spot_attenuation, rim_tint, rim_color, + rim * spot_attenuation, rim_tint, #endif #ifdef LIGHT_CLEARCOAT_USED - clearcoat, clearcoat_gloss, + clearcoat, clearcoat_roughness, vertex_normal, #endif #ifdef LIGHT_ANISOTROPY_USED binormal, tangent, anisotropy, #endif -#ifdef USE_SHADOW_TO_OPACITY - alpha, -#endif diffuse_light, specular_light); } -void reflection_process(uint ref_index, vec3 vertex, vec3 normal, float roughness, vec3 ambient_light, vec3 specular_light, inout vec4 ambient_accum, inout vec4 reflection_accum) { +void reflection_process(uint ref_index, vec3 vertex, vec3 ref_vec, vec3 normal, float roughness, vec3 ambient_light, vec3 specular_light, inout vec4 ambient_accum, inout vec4 reflection_accum) { vec3 box_extents = reflections.data[ref_index].box_extents; vec3 local_pos = (reflections.data[ref_index].local_matrix * vec4(vertex, 1.0)).xyz; @@ -941,8 +882,6 @@ void reflection_process(uint ref_index, vec3 vertex, vec3 normal, float roughnes return; } - vec3 ref_vec = normalize(reflect(vertex, normal)); - vec3 inner_pos = abs(local_pos / box_extents); float blend = max(inner_pos.x, max(inner_pos.y, inner_pos.z)); //make blend more rounded diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_mobile.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_mobile.glsl index 518b0a6c7f..6548793bee 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_mobile.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_mobile.glsl @@ -7,6 +7,8 @@ /* Include our forward mobile UBOs definitions etc. */ #include "scene_forward_mobile_inc.glsl" +#define SHADER_IS_SRGB false + /* INPUT ATTRIBS */ layout(location = 0) in vec3 vertex_attrib; @@ -112,6 +114,8 @@ invariant gl_Position; #GLOBALS +#define scene_data scene_data_block.data + void main() { vec4 instance_custom = vec4(0.0); #if defined(COLOR_USED) @@ -120,13 +124,13 @@ void main() { bool is_multimesh = bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH); - mat4 world_matrix = draw_call.transform; + mat4 model_matrix = draw_call.transform; - mat3 world_normal_matrix; + mat3 model_normal_matrix; if (bool(draw_call.flags & INSTANCE_FLAGS_NON_UNIFORM_SCALE)) { - world_normal_matrix = transpose(inverse(mat3(world_matrix))); + model_normal_matrix = transpose(inverse(mat3(model_matrix))); } else { - world_normal_matrix = mat3(world_matrix); + model_normal_matrix = mat3(model_matrix); } if (is_multimesh) { @@ -219,8 +223,8 @@ void main() { #endif //transpose matrix = transpose(matrix); - world_matrix = world_matrix * matrix; - world_normal_matrix = world_normal_matrix * mat3(matrix); + model_matrix = model_matrix * matrix; + model_normal_matrix = model_normal_matrix * mat3(matrix); } vec3 vertex = vertex_attrib; @@ -257,22 +261,24 @@ void main() { //using world coordinates #if !defined(SKIP_TRANSFORM_USED) && defined(VERTEX_WORLD_COORDS_USED) - vertex = (world_matrix * vec4(vertex, 1.0)).xyz; + vertex = (model_matrix * vec4(vertex, 1.0)).xyz; - normal = world_normal_matrix * normal; +#ifdef NORMAL_USED + normal = model_normal_matrix * normal; +#endif #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) - tangent = world_normal_matrix * tangent; - binormal = world_normal_matrix * binormal; + tangent = model_normal_matrix * tangent; + binormal = model_normal_matrix * binormal; #endif #endif float roughness = 1.0; - mat4 modelview = scene_data.inv_camera_matrix * world_matrix; - mat3 modelview_normal = mat3(scene_data.inv_camera_matrix) * world_normal_matrix; + mat4 modelview = scene_data.view_matrix * model_matrix; + mat3 modelview_normal = mat3(scene_data.view_matrix) * model_normal_matrix; { #CODE : VERTEX @@ -299,13 +305,14 @@ void main() { //using world coordinates #if !defined(SKIP_TRANSFORM_USED) && defined(VERTEX_WORLD_COORDS_USED) - vertex = (scene_data.inv_camera_matrix * vec4(vertex, 1.0)).xyz; - normal = mat3(scene_data.inverse_normal_matrix) * normal; + vertex = (scene_data.view_matrix * vec4(vertex, 1.0)).xyz; +#ifdef NORMAL_USED + normal = (scene_data.view_matrix * vec4(normal, 0.0)).xyz; +#endif #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) - - binormal = mat3(scene_data.camera_inverse_binormal_matrix) * binormal; - tangent = mat3(scene_data.camera_inverse_tangent_matrix) * tangent; + binormal = (scene_data.view_matrix * vec4(binormal, 0.0)).xyz; + tangent = (scene_data.view_matrix * vec4(tangent, 0.0)).xyz; #endif #endif @@ -370,6 +377,8 @@ void main() { #VERSION_DEFINES +#define SHADER_IS_SRGB false + /* Specialization Constants */ #if !defined(MODE_RENDER_DEPTH) @@ -451,7 +460,7 @@ layout(location = 8) highp in float dp_clip; //defines to keep compatibility with vertex -#define world_matrix draw_call.transform +#define model_matrix draw_call.transform #ifdef USE_MULTIVIEW #define projection_matrix scene_data.projection_matrix_view[ViewIndex] #else @@ -504,8 +513,8 @@ layout(location = 0) out mediump vec4 frag_color; #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) -/* Make a default specular mode SPECULAR_SCHLICK_GGX. */ -#if !defined(SPECULAR_DISABLED) && !defined(SPECULAR_SCHLICK_GGX) && !defined(SPECULAR_BLINN) && !defined(SPECULAR_PHONG) && !defined(SPECULAR_TOON) +// Default to SPECULAR_SCHLICK_GGX. +#if !defined(SPECULAR_DISABLED) && !defined(SPECULAR_SCHLICK_GGX) && !defined(SPECULAR_TOON) #define SPECULAR_SCHLICK_GGX #endif @@ -520,13 +529,13 @@ layout(location = 0) out mediump vec4 frag_color; */ vec4 fog_process(vec3 vertex) { - vec3 fog_color = scene_data.fog_light_color; + vec3 fog_color = scene_data_block.data.fog_light_color; - if (scene_data.fog_aerial_perspective > 0.0) { + if (scene_data_block.data.fog_aerial_perspective > 0.0) { vec3 sky_fog_color = vec3(0.0); - vec3 cube_view = scene_data.radiance_inverse_xform * vertex; + vec3 cube_view = scene_data_block.data.radiance_inverse_xform * vertex; // mip_level always reads from the second mipmap and higher so the fog is always slightly blurred - float mip_level = mix(1.0 / MAX_ROUGHNESS_LOD, 1.0, 1.0 - (abs(vertex.z) - scene_data.z_near) / (scene_data.z_far - scene_data.z_near)); + float mip_level = mix(1.0 / MAX_ROUGHNESS_LOD, 1.0, 1.0 - (abs(vertex.z) - scene_data_block.data.z_near) / (scene_data_block.data.z_far - scene_data_block.data.z_near)); #ifdef USE_RADIANCE_CUBEMAP_ARRAY float lod, blend; blend = modf(mip_level * MAX_ROUGHNESS_LOD, lod); @@ -535,29 +544,29 @@ vec4 fog_process(vec3 vertex) { #else sky_fog_color = textureLod(samplerCube(radiance_cubemap, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), cube_view, mip_level * MAX_ROUGHNESS_LOD).rgb; #endif //USE_RADIANCE_CUBEMAP_ARRAY - fog_color = mix(fog_color, sky_fog_color, scene_data.fog_aerial_perspective); + fog_color = mix(fog_color, sky_fog_color, scene_data_block.data.fog_aerial_perspective); } - if (scene_data.fog_sun_scatter > 0.001) { + if (scene_data_block.data.fog_sun_scatter > 0.001) { vec4 sun_scatter = vec4(0.0); float sun_total = 0.0; vec3 view = normalize(vertex); - for (uint i = 0; i < scene_data.directional_light_count; i++) { + for (uint i = 0; i < scene_data_block.data.directional_light_count; i++) { vec3 light_color = directional_lights.data[i].color * directional_lights.data[i].energy; float light_amount = pow(max(dot(view, directional_lights.data[i].direction), 0.0), 8.0); - fog_color += light_color * light_amount * scene_data.fog_sun_scatter; + fog_color += light_color * light_amount * scene_data_block.data.fog_sun_scatter; } } - float fog_amount = 1.0 - exp(min(0.0, vertex.z * scene_data.fog_density)); + float fog_amount = 1.0 - exp(min(0.0, -length(vertex) * scene_data_block.data.fog_density)); - if (abs(scene_data.fog_height_density) > 0.001) { - float y = (scene_data.camera_matrix * vec4(vertex, 1.0)).y; + if (abs(scene_data_block.data.fog_height_density) >= 0.0001) { + float y = (scene_data_block.data.inv_view_matrix * vec4(vertex, 1.0)).y; - float y_dist = scene_data.fog_height - y; + float y_dist = y - scene_data_block.data.fog_height; - float vfog_amount = clamp(exp(y_dist * scene_data.fog_height_density), 0.0, 1.0); + float vfog_amount = 1.0 - exp(min(0.0, y_dist * scene_data_block.data.fog_height_density)); fog_amount = max(vfog_amount, fog_amount); } @@ -567,6 +576,8 @@ vec4 fog_process(vec3 vertex) { #endif //!MODE_RENDER DEPTH +#define scene_data scene_data_block.data + void main() { #ifdef MODE_DUAL_PARABOLOID @@ -574,9 +585,13 @@ void main() { discard; #endif - //lay out everything, whathever is unused is optimized away anyway + //lay out everything, whatever is unused is optimized away anyway vec3 vertex = vertex_interp; +#ifdef USE_MULTIVIEW + vec3 view = -normalize(vertex_interp - scene_data.eye_offset[ViewIndex].xyz); +#else vec3 view = -normalize(vertex_interp); +#endif vec3 albedo = vec3(1.0); vec3 backlight = vec3(0.0); vec4 transmittance_color = vec4(0.0); @@ -589,7 +604,7 @@ void main() { float rim = 0.0; float rim_tint = 0.0; float clearcoat = 0.0; - float clearcoat_gloss = 0.0; + float clearcoat_roughness = 0.0; float anisotropy = 0.0; vec2 anisotropy_flow = vec2(1.0, 0.0); vec4 fog = vec4(0.0); @@ -643,7 +658,7 @@ void main() { float normal_map_depth = 1.0; - vec2 screen_uv = gl_FragCoord.xy * scene_data.screen_pixel_size + scene_data.screen_pixel_size * 0.5; //account for center + vec2 screen_uv = gl_FragCoord.xy * scene_data.screen_pixel_size; float sss_strength = 0.0; @@ -701,7 +716,7 @@ void main() { #endif // ALPHA_ANTIALIASING_EDGE_USED #ifdef USE_OPAQUE_PREPASS - if (alpha < opaque_prepass_threshold) { + if (alpha < scene_data.opaque_prepass_threshold) { discard; } #endif // USE_OPAQUE_PREPASS @@ -867,7 +882,19 @@ void main() { #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) if (scene_data.use_reflection_cubemap) { +#ifdef LIGHT_ANISOTROPY_USED + // https://google.github.io/filament/Filament.html#lighting/imagebasedlights/anisotropy + vec3 anisotropic_direction = anisotropy >= 0.0 ? binormal : tangent; + vec3 anisotropic_tangent = cross(anisotropic_direction, view); + vec3 anisotropic_normal = cross(anisotropic_tangent, anisotropic_direction); + vec3 bent_normal = normalize(mix(normal, anisotropic_normal, abs(anisotropy) * clamp(5.0 * roughness, 0.0, 1.0))); + vec3 ref_vec = reflect(-view, bent_normal); + ref_vec = mix(ref_vec, bent_normal, roughness * roughness); +#else vec3 ref_vec = reflect(-view, normal); + ref_vec = mix(ref_vec, normal, roughness * roughness); +#endif + float horizon = min(1.0 + dot(ref_vec, normal), 1.0); ref_vec = scene_data.radiance_inverse_xform * ref_vec; #ifdef USE_RADIANCE_CUBEMAP_ARRAY @@ -880,7 +907,6 @@ void main() { specular_light = textureLod(samplerCube(radiance_cubemap, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), ref_vec, roughness * MAX_ROUGHNESS_LOD).rgb; #endif //USE_RADIANCE_CUBEMAP_ARRAY - float horizon = min(1.0 + dot(ref_vec, normal), 1.0); specular_light *= horizon * horizon; specular_light *= scene_data.ambient_light_color_energy.a; } @@ -908,9 +934,38 @@ void main() { #endif // !USE_LIGHTMAP #if defined(CUSTOM_IRRADIANCE_USED) - ambient_light = mix(specular_light, custom_irradiance.rgb, custom_irradiance.a); + ambient_light = mix(ambient_light, custom_irradiance.rgb, custom_irradiance.a); #endif // CUSTOM_IRRADIANCE_USED +#ifdef LIGHT_CLEARCOAT_USED + + if (scene_data.use_reflection_cubemap) { + vec3 n = normalize(normal_interp); // We want to use geometric normal, not normal_map + float NoV = max(dot(n, view), 0.0001); + vec3 ref_vec = reflect(-view, n); + ref_vec = mix(ref_vec, n, clearcoat_roughness * clearcoat_roughness); + // The clear coat layer assumes an IOR of 1.5 (4% reflectance) + float Fc = clearcoat * (0.04 + 0.96 * SchlickFresnel(NoV)); + float attenuation = 1.0 - Fc; + ambient_light *= attenuation; + specular_light *= attenuation; + + float horizon = min(1.0 + dot(ref_vec, normal), 1.0); + ref_vec = scene_data.radiance_inverse_xform * ref_vec; + float roughness_lod = mix(0.001, 0.1, clearcoat_roughness) * MAX_ROUGHNESS_LOD; +#ifdef USE_RADIANCE_CUBEMAP_ARRAY + + float lod, blend; + blend = modf(roughness_lod, lod); + vec3 clearcoat_light = texture(samplerCubeArray(radiance_cubemap, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), vec4(ref_vec, lod)).rgb; + clearcoat_light = mix(clearcoat_light, texture(samplerCubeArray(radiance_cubemap, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), vec4(ref_vec, lod + 1)).rgb, blend); +#else + vec3 clearcoat_light = textureLod(samplerCube(radiance_cubemap, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), ref_vec, roughness_lod).rgb; + +#endif //USE_RADIANCE_CUBEMAP_ARRAY + specular_light += clearcoat_light * horizon * horizon * Fc * scene_data.ambient_light_color_energy.a; + } +#endif #endif //!defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) //radiance @@ -923,22 +978,22 @@ void main() { if (bool(draw_call.flags & INSTANCE_FLAGS_USE_LIGHTMAP_CAPTURE)) { //has lightmap capture uint index = draw_call.gi_offset; - vec3 wnormal = mat3(scene_data.camera_matrix) * normal; + vec3 wnormal = mat3(scene_data.inv_view_matrix) * normal; const float c1 = 0.429043; const float c2 = 0.511664; const float c3 = 0.743125; const float c4 = 0.886227; const float c5 = 0.247708; ambient_light += (c1 * lightmap_captures.data[index].sh[8].rgb * (wnormal.x * wnormal.x - wnormal.y * wnormal.y) + - c3 * lightmap_captures.data[index].sh[6].rgb * wnormal.z * wnormal.z + - c4 * lightmap_captures.data[index].sh[0].rgb - - c5 * lightmap_captures.data[index].sh[6].rgb + - 2.0 * c1 * lightmap_captures.data[index].sh[4].rgb * wnormal.x * wnormal.y + - 2.0 * c1 * lightmap_captures.data[index].sh[7].rgb * wnormal.x * wnormal.z + - 2.0 * c1 * lightmap_captures.data[index].sh[5].rgb * wnormal.y * wnormal.z + - 2.0 * c2 * lightmap_captures.data[index].sh[3].rgb * wnormal.x + - 2.0 * c2 * lightmap_captures.data[index].sh[1].rgb * wnormal.y + - 2.0 * c2 * lightmap_captures.data[index].sh[2].rgb * wnormal.z); + c3 * lightmap_captures.data[index].sh[6].rgb * wnormal.z * wnormal.z + + c4 * lightmap_captures.data[index].sh[0].rgb - + c5 * lightmap_captures.data[index].sh[6].rgb + + 2.0 * c1 * lightmap_captures.data[index].sh[4].rgb * wnormal.x * wnormal.y + + 2.0 * c1 * lightmap_captures.data[index].sh[7].rgb * wnormal.x * wnormal.z + + 2.0 * c1 * lightmap_captures.data[index].sh[5].rgb * wnormal.y * wnormal.z + + 2.0 * c2 * lightmap_captures.data[index].sh[3].rgb * wnormal.x + + 2.0 * c2 * lightmap_captures.data[index].sh[1].rgb * wnormal.y + + 2.0 * c2 * lightmap_captures.data[index].sh[2].rgb * wnormal.z); } else if (bool(draw_call.flags & INSTANCE_FLAGS_USE_LIGHTMAP)) { // has actual lightmap bool uses_sh = bool(draw_call.flags & INSTANCE_FLAGS_USE_SH_LIGHTMAP); @@ -984,6 +1039,19 @@ void main() { vec4 ambient_accum = vec4(0.0, 0.0, 0.0, 0.0); uint reflection_indices = draw_call.reflection_probes.x; + +#ifdef LIGHT_ANISOTROPY_USED + // https://google.github.io/filament/Filament.html#lighting/imagebasedlights/anisotropy + vec3 anisotropic_direction = anisotropy >= 0.0 ? binormal : tangent; + vec3 anisotropic_tangent = cross(anisotropic_direction, view); + vec3 anisotropic_normal = cross(anisotropic_tangent, anisotropic_direction); + vec3 bent_normal = normalize(mix(normal, anisotropic_normal, abs(anisotropy) * clamp(5.0 * roughness, 0.0, 1.0))); +#else + vec3 bent_normal = normal; +#endif + vec3 ref_vec = normalize(reflect(-view, bent_normal)); + ref_vec = mix(ref_vec, bent_normal, roughness * roughness); + for (uint i = 0; i < 8; i++) { uint reflection_index = reflection_indices & 0xFF; if (i == 4) { @@ -996,12 +1064,18 @@ void main() { break; } - reflection_process(reflection_index, vertex, normal, roughness, ambient_light, specular_light, ambient_accum, reflection_accum); + reflection_process(reflection_index, vertex, ref_vec, bent_normal, roughness, ambient_light, specular_light, ambient_accum, reflection_accum); } if (reflection_accum.a > 0.0) { specular_light = reflection_accum.rgb / reflection_accum.a; } + +#if !defined(USE_LIGHTMAP) + if (ambient_accum.a > 0.0) { + ambient_light = ambient_accum.rgb / ambient_accum.a; + } +#endif } //Reflection probes // finalize ambient light here @@ -1046,7 +1120,7 @@ void main() { #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) if (!sc_disable_directional_lights) { //directional light - +#ifndef SHADOWS_DISABLED // Do shadow and lighting in two passes to reduce register pressure uint shadow0 = 0; uint shadow1 = 0; @@ -1068,11 +1142,10 @@ void main() { #ifdef USE_SOFT_SHADOWS //version with soft shadows, more expensive - if (directional_lights.data[i].shadow_enabled) { + if (directional_lights.data[i].shadow_opacity > 0.001) { float depth_z = -vertex.z; vec4 pssm_coord; - vec3 shadow_color = vec3(0.0); vec3 light_dir = directional_lights.data[i].direction; #define BIAS_FUNC(m_var, m_idx) \ @@ -1098,9 +1171,6 @@ void main() { } else { shadow = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord); } - - shadow_color = directional_lights.data[i].shadow_color1.rgb; - } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { vec4 v = vec4(vertex, 1.0); @@ -1118,8 +1188,6 @@ void main() { } else { shadow = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord); } - - shadow_color = directional_lights.data[i].shadow_color2.rgb; } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { vec4 v = vec4(vertex, 1.0); @@ -1137,9 +1205,6 @@ void main() { } else { shadow = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord); } - - shadow_color = directional_lights.data[i].shadow_color3.rgb; - } else { vec4 v = vec4(vertex, 1.0); @@ -1157,12 +1222,9 @@ void main() { } else { shadow = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord); } - - shadow_color = directional_lights.data[i].shadow_color4.rgb; } if (directional_lights.data[i].blend_splits) { - vec3 shadow_color_blend = vec3(0.0); float pssm_blend; float shadow2; @@ -1183,7 +1245,6 @@ void main() { } pssm_blend = smoothstep(0.0, directional_lights.data[i].shadow_split_offsets.x, depth_z); - shadow_color_blend = directional_lights.data[i].shadow_color2.rgb; } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { vec4 v = vec4(vertex, 1.0); BIAS_FUNC(v, 2) @@ -1201,8 +1262,6 @@ void main() { } pssm_blend = smoothstep(directional_lights.data[i].shadow_split_offsets.x, directional_lights.data[i].shadow_split_offsets.y, depth_z); - - shadow_color_blend = directional_lights.data[i].shadow_color3.rgb; } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { vec4 v = vec4(vertex, 1.0); BIAS_FUNC(v, 3) @@ -1219,7 +1278,6 @@ void main() { } pssm_blend = smoothstep(directional_lights.data[i].shadow_split_offsets.y, directional_lights.data[i].shadow_split_offsets.z, depth_z); - shadow_color_blend = directional_lights.data[i].shadow_color4.rgb; } else { pssm_blend = 0.0; //if no blend, same coord will be used (divide by z will result in same value, and already cached) } @@ -1227,7 +1285,6 @@ void main() { pssm_blend = sqrt(pssm_blend); shadow = mix(shadow, shadow2, pssm_blend); - shadow_color = mix(shadow_color, shadow_color_blend, pssm_blend); } shadow = mix(shadow, 1.0, smoothstep(directional_lights.data[i].fade_from, directional_lights.data[i].fade_to, vertex.z)); //done with negative values for performance @@ -1237,10 +1294,11 @@ void main() { #else // Soft shadow disabled version - if (directional_lights.data[i].shadow_enabled) { + if (directional_lights.data[i].shadow_opacity > 0.001) { float depth_z = -vertex.z; vec4 pssm_coord; + float blur_factor; vec3 light_dir = directional_lights.data[i].direction; vec3 base_normal_bias = normalize(normal_interp) * (1.0 - max(0.0, dot(light_dir, -normalize(normal_interp)))); @@ -1256,56 +1314,71 @@ void main() { BIAS_FUNC(v, 0) pssm_coord = (directional_lights.data[i].shadow_matrix1 * v); + blur_factor = 1.0; } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { vec4 v = vec4(vertex, 1.0); BIAS_FUNC(v, 1) pssm_coord = (directional_lights.data[i].shadow_matrix2 * v); + // Adjust shadow blur with reference to the first split to reduce discrepancy between shadow splits. + blur_factor = directional_lights.data[i].shadow_split_offsets.x / directional_lights.data[i].shadow_split_offsets.y; + ; } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { vec4 v = vec4(vertex, 1.0); BIAS_FUNC(v, 2) pssm_coord = (directional_lights.data[i].shadow_matrix3 * v); - + // Adjust shadow blur with reference to the first split to reduce discrepancy between shadow splits. + blur_factor = directional_lights.data[i].shadow_split_offsets.x / directional_lights.data[i].shadow_split_offsets.z; } else { vec4 v = vec4(vertex, 1.0); BIAS_FUNC(v, 3) pssm_coord = (directional_lights.data[i].shadow_matrix4 * v); + // Adjust shadow blur with reference to the first split to reduce discrepancy between shadow splits. + blur_factor = directional_lights.data[i].shadow_split_offsets.x / directional_lights.data[i].shadow_split_offsets.w; } pssm_coord /= pssm_coord.w; - shadow = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord); + shadow = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale * blur_factor, pssm_coord); if (directional_lights.data[i].blend_splits) { float pssm_blend; + float blur_factor2; if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { vec4 v = vec4(vertex, 1.0); BIAS_FUNC(v, 1) pssm_coord = (directional_lights.data[i].shadow_matrix2 * v); pssm_blend = smoothstep(0.0, directional_lights.data[i].shadow_split_offsets.x, depth_z); + // Adjust shadow blur with reference to the first split to reduce discrepancy between shadow splits. + blur_factor2 = directional_lights.data[i].shadow_split_offsets.x / directional_lights.data[i].shadow_split_offsets.y; } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { vec4 v = vec4(vertex, 1.0); BIAS_FUNC(v, 2) pssm_coord = (directional_lights.data[i].shadow_matrix3 * v); pssm_blend = smoothstep(directional_lights.data[i].shadow_split_offsets.x, directional_lights.data[i].shadow_split_offsets.y, depth_z); + // Adjust shadow blur with reference to the first split to reduce discrepancy between shadow splits. + blur_factor2 = directional_lights.data[i].shadow_split_offsets.x / directional_lights.data[i].shadow_split_offsets.z; } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { vec4 v = vec4(vertex, 1.0); BIAS_FUNC(v, 3) pssm_coord = (directional_lights.data[i].shadow_matrix4 * v); pssm_blend = smoothstep(directional_lights.data[i].shadow_split_offsets.y, directional_lights.data[i].shadow_split_offsets.z, depth_z); + // Adjust shadow blur with reference to the first split to reduce discrepancy between shadow splits. + blur_factor2 = directional_lights.data[i].shadow_split_offsets.x / directional_lights.data[i].shadow_split_offsets.w; } else { pssm_blend = 0.0; //if no blend, same coord will be used (divide by z will result in same value, and already cached) + blur_factor2 = 1.0; } pssm_coord /= pssm_coord.w; - float shadow2 = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale, pssm_coord); + float shadow2 = sample_directional_pcf_shadow(directional_shadow_atlas, scene_data.directional_shadow_pixel_size * directional_lights.data[i].soft_shadow_scale * blur_factor2, pssm_coord); shadow = mix(shadow, shadow2, pssm_blend); } @@ -1322,6 +1395,8 @@ void main() { } } +#endif // SHADOWS_DISABLED + for (uint i = 0; i < 8; i++) { if (i >= scene_data.directional_light_count) { break; @@ -1334,16 +1409,16 @@ void main() { // We're not doing light transmittence float shadow = 1.0; - +#ifndef SHADOWS_DISABLED if (i < 4) { shadow = float(shadow0 >> (i * 8) & 0xFF) / 255.0; } else { shadow = float(shadow1 >> ((i - 4) * 8) & 0xFF) / 255.0; } - +#endif blur_shadow(shadow); - light_compute(normal, directional_lights.data[i].direction, normalize(view), 0.0, directional_lights.data[i].color * directional_lights.data[i].energy, shadow, f0, orms, 1.0, + light_compute(normal, directional_lights.data[i].direction, normalize(view), 0.0, directional_lights.data[i].color * directional_lights.data[i].energy, shadow, f0, orms, 1.0, albedo, alpha, #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -1356,10 +1431,10 @@ void main() { #endif */ #ifdef LIGHT_RIM_USED - rim, rim_tint, albedo, + rim, rim_tint, #endif #ifdef LIGHT_CLEARCOAT_USED - clearcoat, clearcoat_gloss, + clearcoat, clearcoat_roughness, normalize(normal_interp), #endif #ifdef LIGHT_ANISOTROPY_USED binormal, tangent, anisotropy, @@ -1367,9 +1442,6 @@ void main() { #ifdef USE_SOFT_SHADOW directional_lights.data[i].size, #endif -#ifdef USE_SHADOW_TO_OPACITY - alpha, -#endif diffuse_light, specular_light); } @@ -1393,7 +1465,7 @@ void main() { shadow = blur_shadow(shadow); - light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow, + light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow, albedo, alpha, #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -1407,16 +1479,13 @@ void main() { #ifdef LIGHT_RIM_USED rim, rim_tint, - albedo, #endif #ifdef LIGHT_CLEARCOAT_USED - clearcoat, clearcoat_gloss, + clearcoat, clearcoat_roughness, normalize(normal_interp), #endif #ifdef LIGHT_ANISOTROPY_USED - tangent, binormal, anisotropy, -#endif -#ifdef USE_SHADOW_TO_OPACITY - alpha, + tangent, + binormal, anisotropy, #endif diffuse_light, specular_light); } @@ -1441,7 +1510,7 @@ void main() { shadow = blur_shadow(shadow); - light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow, + light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, shadow, albedo, alpha, #ifdef LIGHT_BACKLIGHT_USED backlight, #endif @@ -1455,16 +1524,13 @@ void main() { #ifdef LIGHT_RIM_USED rim, rim_tint, - albedo, #endif #ifdef LIGHT_CLEARCOAT_USED - clearcoat, clearcoat_gloss, + clearcoat, clearcoat_roughness, normalize(normal_interp), #endif #ifdef LIGHT_ANISOTROPY_USED - tangent, binormal, anisotropy, -#endif -#ifdef USE_SHADOW_TO_OPACITY - alpha, + tangent, + binormal, anisotropy, #endif diffuse_light, specular_light); } @@ -1481,7 +1547,7 @@ void main() { #ifdef USE_OPAQUE_PREPASS - if (alpha < opaque_prepass_threshold) { + if (alpha < scene_data.opaque_prepass_threshold) { discard; } diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_mobile_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_mobile_inc.glsl index dd8879acb4..98ad674ce0 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_mobile_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_mobile_inc.glsl @@ -7,7 +7,7 @@ #include "decal_data_inc.glsl" -#if !defined(MODE_RENDER_DEPTH) || defined(MODE_RENDER_MATERIAL) || defined(TANGENT_USED) || defined(NORMAL_MAP_USED) +#if !defined(MODE_RENDER_DEPTH) || defined(MODE_RENDER_MATERIAL) || defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) #ifndef NORMAL_USED #define NORMAL_USED #endif @@ -15,7 +15,7 @@ /* don't exceed 128 bytes!! */ /* put instance data into our push content, not a array */ -layout(push_constant, binding = 0, std430) uniform DrawCall { +layout(push_constant, std430) uniform DrawCall { highp mat4 transform; // 64 - 64 uint flags; // 04 - 68 uint instance_uniforms_ofs; //base offset in global buffer for instance variables // 04 - 72 @@ -118,22 +118,23 @@ layout(set = 0, binding = 13, std430) restrict readonly buffer Decals { } decals; -layout(set = 0, binding = 14, std430) restrict readonly buffer GlobalVariableData { +layout(set = 0, binding = 14, std430) restrict readonly buffer GlobalShaderUniformData { highp vec4 data[]; } -global_variables; +global_shader_uniforms; /* Set 1: Render Pass (changes per render pass) */ -layout(set = 1, binding = 0, std140) uniform SceneData { +struct SceneData { highp mat4 projection_matrix; highp mat4 inv_projection_matrix; - highp mat4 camera_matrix; - highp mat4 inv_camera_matrix; + highp mat4 inv_view_matrix; + highp mat4 view_matrix; // only used for multiview highp mat4 projection_matrix_view[MAX_VIEWS]; highp mat4 inv_projection_matrix_view[MAX_VIEWS]; + highp vec4 eye_offset[MAX_VIEWS]; highp vec2 viewport_size; highp vec2 screen_pixel_size; @@ -168,9 +169,8 @@ layout(set = 1, binding = 0, std140) uniform SceneData { mediump float roughness_limiter_amount; mediump float roughness_limiter_limit; - uvec2 roughness_limiter_pad; - - mediump vec4 ao_color; + mediump float opaque_prepass_threshold; + uint roughness_limiter_pad; bool fog_enabled; highp float fog_density; @@ -190,8 +190,12 @@ layout(set = 1, binding = 0, std140) uniform SceneData { uint pad1; uint pad2; uint pad3; +}; + +layout(set = 1, binding = 0, std140) uniform SceneDataBlock { + SceneData data; } -scene_data; +scene_data_block; #ifdef USE_RADIANCE_CUBEMAP_ARRAY diff --git a/servers/rendering/renderer_rd/shaders/skeleton.glsl b/servers/rendering/renderer_rd/shaders/skeleton.glsl index b831005256..a893a66c94 100644 --- a/servers/rendering/renderer_rd/shaders/skeleton.glsl +++ b/servers/rendering/renderer_rd/shaders/skeleton.glsl @@ -36,7 +36,7 @@ layout(set = 2, binding = 0, std430) buffer restrict readonly SkeletonData { } bone_transforms; -layout(push_constant, binding = 0, std430) uniform Params { +layout(push_constant, std430) uniform Params { bool has_normal; bool has_tangent; bool has_skeleton; @@ -160,7 +160,7 @@ void main() { } if (params.has_tangent) { - blend_tangent += decode_abgr_2_10_10_10(src_blend_shapes.data[base_offset]).rgb; + blend_tangent += decode_abgr_2_10_10_10(src_blend_shapes.data[base_offset]).rgb * w; } blend_total += w; @@ -174,8 +174,8 @@ void main() { } vertex += blend_vertex; - normal += normalize(normal + blend_normal); - tangent.rgb += normalize(tangent.rgb + blend_tangent); + normal = normalize(normal + blend_normal); + tangent.rgb = normalize(tangent.rgb + blend_tangent); } if (params.has_skeleton) { diff --git a/servers/rendering/renderer_rd/shaders/sort.glsl b/servers/rendering/renderer_rd/shaders/sort.glsl index 307e60dc21..48cf69012a 100644 --- a/servers/rendering/renderer_rd/shaders/sort.glsl +++ b/servers/rendering/renderer_rd/shaders/sort.glsl @@ -47,7 +47,7 @@ layout(set = 1, binding = 0, std430) restrict buffer SortBuffer { } sort_buffer; -layout(push_constant, binding = 0, std430) uniform Params { +layout(push_constant, std430) uniform Params { uint total_elements; uint pad[3]; ivec4 job_params; diff --git a/servers/rendering/renderer_rd/shaders/specular_merge.glsl b/servers/rendering/renderer_rd/shaders/specular_merge.glsl deleted file mode 100644 index 3579c35cce..0000000000 --- a/servers/rendering/renderer_rd/shaders/specular_merge.glsl +++ /dev/null @@ -1,53 +0,0 @@ -#[vertex] - -#version 450 - -#VERSION_DEFINES - -layout(location = 0) out vec2 uv_interp; - -void main() { - vec2 base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0)); - uv_interp = base_arr[gl_VertexIndex]; - - gl_Position = vec4(uv_interp * 2.0 - 1.0, 0.0, 1.0); -} - -#[fragment] - -#version 450 - -#VERSION_DEFINES - -layout(location = 0) in vec2 uv_interp; - -layout(set = 0, binding = 0) uniform sampler2D specular; - -#ifdef MODE_SSR - -layout(set = 1, binding = 0) uniform sampler2D ssr; - -#endif - -#ifdef MODE_MERGE - -layout(set = 2, binding = 0) uniform sampler2D diffuse; - -#endif - -layout(location = 0) out vec4 frag_color; - -void main() { - frag_color.rgb = texture(specular, uv_interp).rgb; - frag_color.a = 0.0; -#ifdef MODE_SSR - - vec4 ssr_color = texture(ssr, uv_interp); - frag_color.rgb = mix(frag_color.rgb, ssr_color.rgb, ssr_color.a); -#endif - -#ifdef MODE_MERGE - frag_color += texture(diffuse, uv_interp); -#endif - //added using additive blend -} diff --git a/servers/rendering/renderer_rd/shaders/subsurface_scattering.glsl b/servers/rendering/renderer_rd/shaders/subsurface_scattering.glsl index 9367b641c2..fb35d3cde6 100644 --- a/servers/rendering/renderer_rd/shaders/subsurface_scattering.glsl +++ b/servers/rendering/renderer_rd/shaders/subsurface_scattering.glsl @@ -87,7 +87,7 @@ const vec4 skin_kernel[kernel_size] = vec4[]( #endif //USE_11_SAMPLES -layout(push_constant, binding = 1, std430) uniform Params { +layout(push_constant, std430) uniform Params { ivec2 screen_size; float camera_z_far; float camera_z_near; diff --git a/servers/rendering/renderer_rd/shaders/taa_resolve.glsl b/servers/rendering/renderer_rd/shaders/taa_resolve.glsl new file mode 100644 index 0000000000..b0a0839836 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/taa_resolve.glsl @@ -0,0 +1,394 @@ +/////////////////////////////////////////////////////////////////////////////////// +// Copyright(c) 2016-2022 Panos Karabelas +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is furnished +// to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2022-05-06: Panos Karabelas: first commit +// 2020-12-05: Joan Fons: convert to Vulkan and Godot +/////////////////////////////////////////////////////////////////////////////////// + +#[compute] + +#version 450 + +#VERSION_DEFINES + +// Based on Spartan Engine's TAA implementation (without TAA upscale). +// <https://github.com/PanosK92/SpartanEngine/blob/a8338d0609b85dc32f3732a5c27fb4463816a3b9/Data/shaders/temporal_antialiasing.hlsl> + +#define USE_SUBGROUPS + +#define GROUP_SIZE 8 +#define FLT_MIN 0.00000001 +#define FLT_MAX 32767.0 +#define RPC_9 0.11111111111 +#define RPC_16 0.0625 + +#ifdef USE_SUBGROUPS +layout(local_size_x = GROUP_SIZE, local_size_y = GROUP_SIZE, local_size_z = 1) in; +#endif + +layout(rgba16f, set = 0, binding = 0) uniform restrict readonly image2D color_buffer; +layout(set = 0, binding = 1) uniform sampler2D depth_buffer; +layout(rg16f, set = 0, binding = 2) uniform restrict readonly image2D velocity_buffer; +layout(rg16f, set = 0, binding = 3) uniform restrict readonly image2D last_velocity_buffer; +layout(set = 0, binding = 4) uniform sampler2D history_buffer; +layout(rgba16f, set = 0, binding = 5) uniform restrict writeonly image2D output_buffer; + +layout(push_constant, std430) uniform Params { + vec2 resolution; + float disocclusion_threshold; // 0.1 / max(params.resolution.x, params.resolution.y + float disocclusion_scale; +} +params; + +const ivec2 kOffsets3x3[9] = { + ivec2(-1, -1), + ivec2(0, -1), + ivec2(1, -1), + ivec2(-1, 0), + ivec2(0, 0), + ivec2(1, 0), + ivec2(-1, 1), + ivec2(0, 1), + ivec2(1, 1), +}; + +/*------------------------------------------------------------------------------ + THREAD GROUP SHARED MEMORY (LDS) +------------------------------------------------------------------------------*/ + +const int kBorderSize = 1; +const int kGroupSize = GROUP_SIZE; +const int kTileDimension = kGroupSize + kBorderSize * 2; +const int kTileDimension2 = kTileDimension * kTileDimension; + +vec3 reinhard(vec3 hdr) { + return hdr / (hdr + 1.0); +} +vec3 reinhard_inverse(vec3 sdr) { + return sdr / (1.0 - sdr); +} + +float get_depth(ivec2 thread_id) { + return texelFetch(depth_buffer, thread_id, 0).r; +} + +#ifdef USE_SUBGROUPS +shared vec3 tile_color[kTileDimension][kTileDimension]; +shared float tile_depth[kTileDimension][kTileDimension]; + +vec3 load_color(uvec2 group_thread_id) { + group_thread_id += kBorderSize; + return tile_color[group_thread_id.x][group_thread_id.y]; +} + +void store_color(uvec2 group_thread_id, vec3 color) { + tile_color[group_thread_id.x][group_thread_id.y] = color; +} + +float load_depth(uvec2 group_thread_id) { + group_thread_id += kBorderSize; + return tile_depth[group_thread_id.x][group_thread_id.y]; +} + +void store_depth(uvec2 group_thread_id, float depth) { + tile_depth[group_thread_id.x][group_thread_id.y] = depth; +} + +void store_color_depth(uvec2 group_thread_id, ivec2 thread_id) { + // out of bounds clamp + thread_id = clamp(thread_id, ivec2(0, 0), ivec2(params.resolution) - ivec2(1, 1)); + + store_color(group_thread_id, imageLoad(color_buffer, thread_id).rgb); + store_depth(group_thread_id, get_depth(thread_id)); +} + +void populate_group_shared_memory(uvec2 group_id, uint group_index) { + // Populate group shared memory + ivec2 group_top_left = ivec2(group_id) * kGroupSize - kBorderSize; + if (group_index < (kTileDimension2 >> 2)) { + ivec2 group_thread_id_1 = ivec2(group_index % kTileDimension, group_index / kTileDimension); + ivec2 group_thread_id_2 = ivec2((group_index + (kTileDimension2 >> 2)) % kTileDimension, (group_index + (kTileDimension2 >> 2)) / kTileDimension); + ivec2 group_thread_id_3 = ivec2((group_index + (kTileDimension2 >> 1)) % kTileDimension, (group_index + (kTileDimension2 >> 1)) / kTileDimension); + ivec2 group_thread_id_4 = ivec2((group_index + kTileDimension2 * 3 / 4) % kTileDimension, (group_index + kTileDimension2 * 3 / 4) / kTileDimension); + + store_color_depth(group_thread_id_1, group_top_left + group_thread_id_1); + store_color_depth(group_thread_id_2, group_top_left + group_thread_id_2); + store_color_depth(group_thread_id_3, group_top_left + group_thread_id_3); + store_color_depth(group_thread_id_4, group_top_left + group_thread_id_4); + } + + // Wait for group threads to load store data. + groupMemoryBarrier(); + barrier(); +} +#else +vec3 load_color(uvec2 screen_pos) { + return imageLoad(color_buffer, ivec2(screen_pos)).rgb; +} + +float load_depth(uvec2 screen_pos) { + return get_depth(ivec2(screen_pos)); +} +#endif + +/*------------------------------------------------------------------------------ + VELOCITY +------------------------------------------------------------------------------*/ + +void depth_test_min(uvec2 pos, inout float min_depth, inout uvec2 min_pos) { + float depth = load_depth(pos); + + if (depth < min_depth) { + min_depth = depth; + min_pos = pos; + } +} + +// Returns velocity with closest depth (3x3 neighborhood) +void get_closest_pixel_velocity_3x3(in uvec2 group_pos, uvec2 group_top_left, out vec2 velocity) { + float min_depth = 1.0; + uvec2 min_pos = group_pos; + + depth_test_min(group_pos + kOffsets3x3[0], min_depth, min_pos); + depth_test_min(group_pos + kOffsets3x3[1], min_depth, min_pos); + depth_test_min(group_pos + kOffsets3x3[2], min_depth, min_pos); + depth_test_min(group_pos + kOffsets3x3[3], min_depth, min_pos); + depth_test_min(group_pos + kOffsets3x3[4], min_depth, min_pos); + depth_test_min(group_pos + kOffsets3x3[5], min_depth, min_pos); + depth_test_min(group_pos + kOffsets3x3[6], min_depth, min_pos); + depth_test_min(group_pos + kOffsets3x3[7], min_depth, min_pos); + depth_test_min(group_pos + kOffsets3x3[8], min_depth, min_pos); + + // Velocity out + velocity = imageLoad(velocity_buffer, ivec2(group_top_left + min_pos)).xy; +} + +/*------------------------------------------------------------------------------ + HISTORY SAMPLING +------------------------------------------------------------------------------*/ + +vec3 sample_catmull_rom_9(sampler2D stex, vec2 uv, vec2 resolution) { + // Source: https://gist.github.com/TheRealMJP/c83b8c0f46b63f3a88a5986f4fa982b1 + // License: https://gist.github.com/TheRealMJP/bc503b0b87b643d3505d41eab8b332ae + + // We're going to sample a 4x4 grid of texels surrounding the target UV coordinate. We'll do this by rounding + // down the sample location to get the exact center of our "starting" texel. The starting texel will be at + // location [1, 1] in the grid, where [0, 0] is the top left corner. + vec2 sample_pos = uv * resolution; + vec2 texPos1 = floor(sample_pos - 0.5f) + 0.5f; + + // Compute the fractional offset from our starting texel to our original sample location, which we'll + // feed into the Catmull-Rom spline function to get our filter weights. + vec2 f = sample_pos - texPos1; + + // Compute the Catmull-Rom weights using the fractional offset that we calculated earlier. + // These equations are pre-expanded based on our knowledge of where the texels will be located, + // which lets us avoid having to evaluate a piece-wise function. + vec2 w0 = f * (-0.5f + f * (1.0f - 0.5f * f)); + vec2 w1 = 1.0f + f * f * (-2.5f + 1.5f * f); + vec2 w2 = f * (0.5f + f * (2.0f - 1.5f * f)); + vec2 w3 = f * f * (-0.5f + 0.5f * f); + + // Work out weighting factors and sampling offsets that will let us use bilinear filtering to + // simultaneously evaluate the middle 2 samples from the 4x4 grid. + vec2 w12 = w1 + w2; + vec2 offset12 = w2 / (w1 + w2); + + // Compute the final UV coordinates we'll use for sampling the texture + vec2 texPos0 = texPos1 - 1.0f; + vec2 texPos3 = texPos1 + 2.0f; + vec2 texPos12 = texPos1 + offset12; + + texPos0 /= resolution; + texPos3 /= resolution; + texPos12 /= resolution; + + vec3 result = vec3(0.0f, 0.0f, 0.0f); + + result += textureLod(stex, vec2(texPos0.x, texPos0.y), 0.0).xyz * w0.x * w0.y; + result += textureLod(stex, vec2(texPos12.x, texPos0.y), 0.0).xyz * w12.x * w0.y; + result += textureLod(stex, vec2(texPos3.x, texPos0.y), 0.0).xyz * w3.x * w0.y; + + result += textureLod(stex, vec2(texPos0.x, texPos12.y), 0.0).xyz * w0.x * w12.y; + result += textureLod(stex, vec2(texPos12.x, texPos12.y), 0.0).xyz * w12.x * w12.y; + result += textureLod(stex, vec2(texPos3.x, texPos12.y), 0.0).xyz * w3.x * w12.y; + + result += textureLod(stex, vec2(texPos0.x, texPos3.y), 0.0).xyz * w0.x * w3.y; + result += textureLod(stex, vec2(texPos12.x, texPos3.y), 0.0).xyz * w12.x * w3.y; + result += textureLod(stex, vec2(texPos3.x, texPos3.y), 0.0).xyz * w3.x * w3.y; + + return max(result, 0.0f); +} + +/*------------------------------------------------------------------------------ + HISTORY CLIPPING +------------------------------------------------------------------------------*/ + +// Based on "Temporal Reprojection Anti-Aliasing" - https://github.com/playdeadgames/temporal +vec3 clip_aabb(vec3 aabb_min, vec3 aabb_max, vec3 p, vec3 q) { + vec3 r = q - p; + vec3 rmax = (aabb_max - p.xyz); + vec3 rmin = (aabb_min - p.xyz); + + if (r.x > rmax.x + FLT_MIN) + r *= (rmax.x / r.x); + if (r.y > rmax.y + FLT_MIN) + r *= (rmax.y / r.y); + if (r.z > rmax.z + FLT_MIN) + r *= (rmax.z / r.z); + + if (r.x < rmin.x - FLT_MIN) + r *= (rmin.x / r.x); + if (r.y < rmin.y - FLT_MIN) + r *= (rmin.y / r.y); + if (r.z < rmin.z - FLT_MIN) + r *= (rmin.z / r.z); + + return p + r; +} + +// Clip history to the neighbourhood of the current sample +vec3 clip_history_3x3(uvec2 group_pos, vec3 color_history, vec2 velocity_closest) { + // Sample a 3x3 neighbourhood + vec3 s1 = load_color(group_pos + kOffsets3x3[0]); + vec3 s2 = load_color(group_pos + kOffsets3x3[1]); + vec3 s3 = load_color(group_pos + kOffsets3x3[2]); + vec3 s4 = load_color(group_pos + kOffsets3x3[3]); + vec3 s5 = load_color(group_pos + kOffsets3x3[4]); + vec3 s6 = load_color(group_pos + kOffsets3x3[5]); + vec3 s7 = load_color(group_pos + kOffsets3x3[6]); + vec3 s8 = load_color(group_pos + kOffsets3x3[7]); + vec3 s9 = load_color(group_pos + kOffsets3x3[8]); + + // Compute min and max (with an adaptive box size, which greatly reduces ghosting) + vec3 color_avg = (s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9) * RPC_9; + vec3 color_avg2 = ((s1 * s1) + (s2 * s2) + (s3 * s3) + (s4 * s4) + (s5 * s5) + (s6 * s6) + (s7 * s7) + (s8 * s8) + (s9 * s9)) * RPC_9; + float box_size = mix(0.0f, 2.5f, smoothstep(0.02f, 0.0f, length(velocity_closest))); + vec3 dev = sqrt(abs(color_avg2 - (color_avg * color_avg))) * box_size; + vec3 color_min = color_avg - dev; + vec3 color_max = color_avg + dev; + + // Variance clipping + vec3 color = clip_aabb(color_min, color_max, clamp(color_avg, color_min, color_max), color_history); + + // Clamp to prevent NaNs + color = clamp(color, FLT_MIN, FLT_MAX); + + return color; +} + +/*------------------------------------------------------------------------------ + TAA +------------------------------------------------------------------------------*/ + +const vec3 lumCoeff = vec3(0.299f, 0.587f, 0.114f); + +float luminance(vec3 color) { + return max(dot(color, lumCoeff), 0.0001f); +} + +float get_factor_disocclusion(vec2 uv_reprojected, vec2 velocity) { + vec2 velocity_previous = imageLoad(last_velocity_buffer, ivec2(uv_reprojected * params.resolution)).xy; + vec2 velocity_texels = velocity * params.resolution; + vec2 prev_velocity_texels = velocity_previous * params.resolution; + float disocclusion = length(prev_velocity_texels - velocity_texels) - params.disocclusion_threshold; + return clamp(disocclusion * params.disocclusion_scale, 0.0, 1.0); +} + +vec3 temporal_antialiasing(uvec2 pos_group_top_left, uvec2 pos_group, uvec2 pos_screen, vec2 uv, sampler2D tex_history) { + // Get the velocity of the current pixel + vec2 velocity = imageLoad(velocity_buffer, ivec2(pos_screen)).xy; + + // Get reprojected uv + vec2 uv_reprojected = uv - velocity; + + // Get input color + vec3 color_input = load_color(pos_group); + + // Get history color (catmull-rom reduces a lot of the blurring that you get under motion) + vec3 color_history = sample_catmull_rom_9(tex_history, uv_reprojected, params.resolution).rgb; + + // Clip history to the neighbourhood of the current sample (fixes a lot of the ghosting). + vec2 velocity_closest = vec2(0.0); // This is best done by using the velocity with the closest depth. + get_closest_pixel_velocity_3x3(pos_group, pos_group_top_left, velocity_closest); + color_history = clip_history_3x3(pos_group, color_history, velocity_closest); + + // Compute blend factor + float blend_factor = RPC_16; // We want to be able to accumulate as many jitter samples as we generated, that is, 16. + { + // If re-projected UV is out of screen, converge to current color immediatel + float factor_screen = any(lessThan(uv_reprojected, vec2(0.0))) || any(greaterThan(uv_reprojected, vec2(1.0))) ? 1.0 : 0.0; + + // Increase blend factor when there is disocclusion (fixes a lot of the remaining ghosting). + float factor_disocclusion = get_factor_disocclusion(uv_reprojected, velocity); + + // Add to the blend factor + blend_factor = clamp(blend_factor + factor_screen + factor_disocclusion, 0.0, 1.0); + } + + // Resolve + vec3 color_resolved = vec3(0.0); + { + // Tonemap + color_history = reinhard(color_history); + color_input = reinhard(color_input); + + // Reduce flickering + float lum_color = luminance(color_input); + float lum_history = luminance(color_history); + float diff = abs(lum_color - lum_history) / max(lum_color, max(lum_history, 1.001)); + diff = 1.0 - diff; + diff = diff * diff; + blend_factor = mix(0.0, blend_factor, diff); + + // Lerp/blend + color_resolved = mix(color_history, color_input, blend_factor); + + // Inverse tonemap + color_resolved = reinhard_inverse(color_resolved); + } + + return color_resolved; +} + +void main() { +#ifdef USE_SUBGROUPS + populate_group_shared_memory(gl_WorkGroupID.xy, gl_LocalInvocationIndex); +#endif + + // Out of bounds check + if (any(greaterThanEqual(vec2(gl_GlobalInvocationID.xy), params.resolution))) { + return; + } + +#ifdef USE_SUBGROUPS + const uvec2 pos_group = gl_LocalInvocationID.xy; + const uvec2 pos_group_top_left = gl_WorkGroupID.xy * kGroupSize - kBorderSize; +#else + const uvec2 pos_group = gl_GlobalInvocationID.xy; + const uvec2 pos_group_top_left = uvec2(0, 0); +#endif + const uvec2 pos_screen = gl_GlobalInvocationID.xy; + const vec2 uv = (gl_GlobalInvocationID.xy + 0.5f) / params.resolution; + + vec3 result = temporal_antialiasing(pos_group_top_left, pos_group, pos_screen, uv, history_buffer); + imageStore(output_buffer, ivec2(gl_GlobalInvocationID.xy), vec4(result, 1.0)); +} diff --git a/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl b/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl deleted file mode 100644 index f2010222e5..0000000000 --- a/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl +++ /dev/null @@ -1,703 +0,0 @@ -#[compute] - -#version 450 - -#VERSION_DEFINES - -/* Do not use subgroups here, seems there is not much advantage and causes glitches -#if defined(has_GL_KHR_shader_subgroup_ballot) && defined(has_GL_KHR_shader_subgroup_arithmetic) -#extension GL_KHR_shader_subgroup_ballot: enable -#extension GL_KHR_shader_subgroup_arithmetic: enable - -#define USE_SUBGROUPS -#endif -*/ - -#if defined(MODE_FOG) || defined(MODE_FILTER) - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -#endif - -#if defined(MODE_DENSITY) - -layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; - -#endif - -#include "cluster_data_inc.glsl" -#include "light_data_inc.glsl" - -#define M_PI 3.14159265359 - -layout(set = 0, binding = 1) uniform texture2D shadow_atlas; -layout(set = 0, binding = 2) uniform texture2D directional_shadow_atlas; - -layout(set = 0, binding = 3, std430) restrict readonly buffer OmniLights { - LightData data[]; -} -omni_lights; - -layout(set = 0, binding = 4, std430) restrict readonly buffer SpotLights { - LightData data[]; -} -spot_lights; - -layout(set = 0, binding = 5, std140) uniform DirectionalLights { - DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS]; -} -directional_lights; - -layout(set = 0, binding = 6, std430) buffer restrict readonly ClusterBuffer { - uint data[]; -} -cluster_buffer; - -layout(set = 0, binding = 7) uniform sampler linear_sampler; - -#ifdef MODE_DENSITY -layout(rgba16f, set = 0, binding = 8) uniform restrict writeonly image3D density_map; -layout(rgba16f, set = 0, binding = 9) uniform restrict readonly image3D fog_map; //unused -#endif - -#ifdef MODE_FOG -layout(rgba16f, set = 0, binding = 8) uniform restrict readonly image3D density_map; -layout(rgba16f, set = 0, binding = 9) uniform restrict writeonly image3D fog_map; -#endif - -#ifdef MODE_FILTER -layout(rgba16f, set = 0, binding = 8) uniform restrict readonly image3D source_map; -layout(rgba16f, set = 0, binding = 9) uniform restrict writeonly image3D dest_map; -#endif - -layout(set = 0, binding = 10) uniform sampler shadow_sampler; - -#define MAX_VOXEL_GI_INSTANCES 8 - -struct VoxelGIData { - mat4 xform; - vec3 bounds; - float dynamic_range; - - float bias; - float normal_bias; - bool blend_ambient; - uint texture_slot; - - float anisotropy_strength; - float ambient_occlusion; - float ambient_occlusion_size; - uint mipmaps; -}; - -layout(set = 0, binding = 11, std140) uniform VoxelGIs { - VoxelGIData data[MAX_VOXEL_GI_INSTANCES]; -} -voxel_gi_instances; - -layout(set = 0, binding = 12) uniform texture3D voxel_gi_textures[MAX_VOXEL_GI_INSTANCES]; - -layout(set = 0, binding = 13) uniform sampler linear_sampler_with_mipmaps; - -#ifdef ENABLE_SDFGI - -// SDFGI Integration on set 1 -#define SDFGI_MAX_CASCADES 8 - -struct SDFVoxelGICascadeData { - vec3 position; - float to_probe; - ivec3 probe_world_offset; - float to_cell; // 1/bounds * grid_size -}; - -layout(set = 1, binding = 0, std140) uniform SDFGI { - vec3 grid_size; - uint max_cascades; - - bool use_occlusion; - int probe_axis_size; - float probe_to_uvw; - float normal_bias; - - vec3 lightprobe_tex_pixel_size; - float energy; - - vec3 lightprobe_uv_offset; - float y_mult; - - vec3 occlusion_clamp; - uint pad3; - - vec3 occlusion_renormalize; - uint pad4; - - vec3 cascade_probe_size; - uint pad5; - - SDFVoxelGICascadeData cascades[SDFGI_MAX_CASCADES]; -} -sdfgi; - -layout(set = 1, binding = 1) uniform texture2DArray sdfgi_ambient_texture; - -layout(set = 1, binding = 2) uniform texture3D sdfgi_occlusion_texture; - -#endif //SDFGI - -layout(set = 0, binding = 14, std140) uniform Params { - vec2 fog_frustum_size_begin; - vec2 fog_frustum_size_end; - - float fog_frustum_end; - float z_near; - float z_far; - int filter_axis; - - ivec3 fog_volume_size; - uint directional_light_count; - - vec3 light_color; - float base_density; - - float detail_spread; - float gi_inject; - uint max_voxel_gi_instances; - uint cluster_type_size; - - vec2 screen_size; - uint cluster_shift; - uint cluster_width; - - uint max_cluster_element_count_div_32; - bool use_temporal_reprojection; - uint temporal_frame; - float temporal_blend; - - mat3x4 cam_rotation; - mat4 to_prev_view; -} -params; - -layout(set = 0, binding = 15) uniform texture3D prev_density_texture; - -float get_depth_at_pos(float cell_depth_size, int z) { - float d = float(z) * cell_depth_size + cell_depth_size * 0.5; //center of voxels - d = pow(d, params.detail_spread); - return params.fog_frustum_end * d; -} - -vec3 hash3f(uvec3 x) { - x = ((x >> 16) ^ x) * 0x45d9f3b; - x = ((x >> 16) ^ x) * 0x45d9f3b; - x = (x >> 16) ^ x; - return vec3(x & 0xFFFFF) / vec3(float(0xFFFFF)); -} - -float get_omni_attenuation(float distance, float inv_range, float decay) { - float nd = distance * inv_range; - nd *= nd; - nd *= nd; // nd^4 - nd = max(1.0 - nd, 0.0); - nd *= nd; // nd^2 - return nd * pow(max(distance, 0.0001), -decay); -} - -void cluster_get_item_range(uint p_offset, out uint item_min, out uint item_max, out uint item_from, out uint item_to) { - uint item_min_max = cluster_buffer.data[p_offset]; - item_min = item_min_max & 0xFFFF; - item_max = item_min_max >> 16; - ; - - item_from = item_min >> 5; - item_to = (item_max == 0) ? 0 : ((item_max - 1) >> 5) + 1; //side effect of how it is stored, as item_max 0 means no elements -} - -uint cluster_get_range_clip_mask(uint i, uint z_min, uint z_max) { - int local_min = clamp(int(z_min) - int(i) * 32, 0, 31); - int mask_width = min(int(z_max) - int(z_min), 32 - local_min); - return bitfieldInsert(uint(0), uint(0xFFFFFFFF), local_min, mask_width); -} - -#define TEMPORAL_FRAMES 16 - -const vec3 halton_map[TEMPORAL_FRAMES] = vec3[]( - vec3(0.5, 0.33333333, 0.2), - vec3(0.25, 0.66666667, 0.4), - vec3(0.75, 0.11111111, 0.6), - vec3(0.125, 0.44444444, 0.8), - vec3(0.625, 0.77777778, 0.04), - vec3(0.375, 0.22222222, 0.24), - vec3(0.875, 0.55555556, 0.44), - vec3(0.0625, 0.88888889, 0.64), - vec3(0.5625, 0.03703704, 0.84), - vec3(0.3125, 0.37037037, 0.08), - vec3(0.8125, 0.7037037, 0.28), - vec3(0.1875, 0.14814815, 0.48), - vec3(0.6875, 0.48148148, 0.68), - vec3(0.4375, 0.81481481, 0.88), - vec3(0.9375, 0.25925926, 0.12), - vec3(0.03125, 0.59259259, 0.32)); - -void main() { - vec3 fog_cell_size = 1.0 / vec3(params.fog_volume_size); - -#ifdef MODE_DENSITY - - ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); - if (any(greaterThanEqual(pos, params.fog_volume_size))) { - return; //do not compute - } - - vec3 posf = vec3(pos); - - //posf += mix(vec3(0.0),vec3(1.0),0.3) * hash3f(uvec3(pos)) * 2.0 - 1.0; - - vec3 fog_unit_pos = posf * fog_cell_size + fog_cell_size * 0.5; //center of voxels - - uvec2 screen_pos = uvec2(fog_unit_pos.xy * params.screen_size); - uvec2 cluster_pos = screen_pos >> params.cluster_shift; - uint cluster_offset = (params.cluster_width * cluster_pos.y + cluster_pos.x) * (params.max_cluster_element_count_div_32 + 32); - //positions in screen are too spread apart, no hopes for optimizing with subgroups - - fog_unit_pos.z = pow(fog_unit_pos.z, params.detail_spread); - - vec3 view_pos; - view_pos.xy = (fog_unit_pos.xy * 2.0 - 1.0) * mix(params.fog_frustum_size_begin, params.fog_frustum_size_end, vec2(fog_unit_pos.z)); - view_pos.z = -params.fog_frustum_end * fog_unit_pos.z; - view_pos.y = -view_pos.y; - - vec4 reprojected_density = vec4(0.0); - float reproject_amount = 0.0; - - if (params.use_temporal_reprojection) { - vec3 prev_view = (params.to_prev_view * vec4(view_pos, 1.0)).xyz; - //undo transform into prev view - prev_view.y = -prev_view.y; - //z back to unit size - prev_view.z /= -params.fog_frustum_end; - //xy back to unit size - prev_view.xy /= mix(params.fog_frustum_size_begin, params.fog_frustum_size_end, vec2(prev_view.z)); - prev_view.xy = prev_view.xy * 0.5 + 0.5; - //z back to unspread value - prev_view.z = pow(prev_view.z, 1.0 / params.detail_spread); - - if (all(greaterThan(prev_view, vec3(0.0))) && all(lessThan(prev_view, vec3(1.0)))) { - //reprojectinon fits - - reprojected_density = textureLod(sampler3D(prev_density_texture, linear_sampler), prev_view, 0.0); - reproject_amount = params.temporal_blend; - - // Since we can reproject, now we must jitter the current view pos. - // This is done here because cells that can't reproject should not jitter. - - fog_unit_pos = posf * fog_cell_size + fog_cell_size * halton_map[params.temporal_frame]; //center of voxels, offset by halton table - - screen_pos = uvec2(fog_unit_pos.xy * params.screen_size); - cluster_pos = screen_pos >> params.cluster_shift; - cluster_offset = (params.cluster_width * cluster_pos.y + cluster_pos.x) * (params.max_cluster_element_count_div_32 + 32); - //positions in screen are too spread apart, no hopes for optimizing with subgroups - - fog_unit_pos.z = pow(fog_unit_pos.z, params.detail_spread); - - view_pos.xy = (fog_unit_pos.xy * 2.0 - 1.0) * mix(params.fog_frustum_size_begin, params.fog_frustum_size_end, vec2(fog_unit_pos.z)); - view_pos.z = -params.fog_frustum_end * fog_unit_pos.z; - view_pos.y = -view_pos.y; - } - } - - uint cluster_z = uint(clamp((abs(view_pos.z) / params.z_far) * 32.0, 0.0, 31.0)); - - vec3 total_light = params.light_color; - - float total_density = params.base_density; - float cell_depth_size = abs(view_pos.z - get_depth_at_pos(fog_cell_size.z, pos.z + 1)); - //compute directional lights - - for (uint i = 0; i < params.directional_light_count; i++) { - vec3 shadow_attenuation = vec3(1.0); - - if (directional_lights.data[i].shadow_enabled) { - float depth_z = -view_pos.z; - - vec4 pssm_coord; - vec3 shadow_color = directional_lights.data[i].shadow_color1.rgb; - vec3 light_dir = directional_lights.data[i].direction; - vec4 v = vec4(view_pos, 1.0); - float z_range; - - if (depth_z < directional_lights.data[i].shadow_split_offsets.x) { - pssm_coord = (directional_lights.data[i].shadow_matrix1 * v); - pssm_coord /= pssm_coord.w; - z_range = directional_lights.data[i].shadow_z_range.x; - - } else if (depth_z < directional_lights.data[i].shadow_split_offsets.y) { - pssm_coord = (directional_lights.data[i].shadow_matrix2 * v); - pssm_coord /= pssm_coord.w; - z_range = directional_lights.data[i].shadow_z_range.y; - - } else if (depth_z < directional_lights.data[i].shadow_split_offsets.z) { - pssm_coord = (directional_lights.data[i].shadow_matrix3 * v); - pssm_coord /= pssm_coord.w; - z_range = directional_lights.data[i].shadow_z_range.z; - - } else { - pssm_coord = (directional_lights.data[i].shadow_matrix4 * v); - pssm_coord /= pssm_coord.w; - z_range = directional_lights.data[i].shadow_z_range.w; - } - - float depth = texture(sampler2D(directional_shadow_atlas, linear_sampler), pssm_coord.xy).r; - float shadow = exp(min(0.0, (depth - pssm_coord.z)) * z_range * directional_lights.data[i].shadow_volumetric_fog_fade); - - /* - //float shadow = textureProj(sampler2DShadow(directional_shadow_atlas,shadow_sampler),pssm_coord); - float shadow = 0.0; - for(float xi=-1;xi<=1;xi++) { - for(float yi=-1;yi<=1;yi++) { - vec2 ofs = vec2(xi,yi) * 1.5 * params.directional_shadow_pixel_size; - shadow += textureProj(sampler2DShadow(directional_shadow_atlas,shadow_sampler),pssm_coord + vec4(ofs,0.0,0.0)); - } - - } - - shadow /= 3.0 * 3.0; - -*/ - shadow = mix(shadow, 1.0, smoothstep(directional_lights.data[i].fade_from, directional_lights.data[i].fade_to, view_pos.z)); //done with negative values for performance - - shadow_attenuation = mix(shadow_color, vec3(1.0), shadow); - } - - total_light += shadow_attenuation * directional_lights.data[i].color * directional_lights.data[i].energy / M_PI; - } - - //compute lights from cluster - - { //omni lights - - uint cluster_omni_offset = cluster_offset; - - uint item_min; - uint item_max; - uint item_from; - uint item_to; - - cluster_get_item_range(cluster_omni_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); - -#ifdef USE_SUBGROUPS - item_from = subgroupBroadcastFirst(subgroupMin(item_from)); - item_to = subgroupBroadcastFirst(subgroupMax(item_to)); -#endif - - for (uint i = item_from; i < item_to; i++) { - uint mask = cluster_buffer.data[cluster_omni_offset + i]; - mask &= cluster_get_range_clip_mask(i, item_min, item_max); -#ifdef USE_SUBGROUPS - uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); -#else - uint merged_mask = mask; -#endif - - while (merged_mask != 0) { - uint bit = findMSB(merged_mask); - merged_mask &= ~(1 << bit); -#ifdef USE_SUBGROUPS - if (((1 << bit) & mask) == 0) { //do not process if not originally here - continue; - } -#endif - uint light_index = 32 * i + bit; - - //if (!bool(omni_omni_lights.data[light_index].mask & draw_call.layer_mask)) { - // continue; //not masked - //} - - vec3 light_pos = omni_lights.data[light_index].position; - float d = distance(omni_lights.data[light_index].position, view_pos); - float shadow_attenuation = 1.0; - - if (d * omni_lights.data[light_index].inv_radius < 1.0) { - float attenuation = get_omni_attenuation(d, omni_lights.data[light_index].inv_radius, omni_lights.data[light_index].attenuation); - - vec3 light = omni_lights.data[light_index].color / M_PI; - - if (omni_lights.data[light_index].shadow_enabled) { - //has shadow - vec4 v = vec4(view_pos, 1.0); - - vec4 splane = (omni_lights.data[light_index].shadow_matrix * v); - float shadow_len = length(splane.xyz); //need to remember shadow len from here - - splane.xyz = normalize(splane.xyz); - vec4 clamp_rect = omni_lights.data[light_index].atlas_rect; - - if (splane.z >= 0.0) { - splane.z += 1.0; - - clamp_rect.y += clamp_rect.w; - - } else { - splane.z = 1.0 - splane.z; - } - - splane.xy /= splane.z; - - splane.xy = splane.xy * 0.5 + 0.5; - splane.z = shadow_len * omni_lights.data[light_index].inv_radius; - splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; - splane.w = 1.0; //needed? i think it should be 1 already - - float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r; - - shadow_attenuation = exp(min(0.0, (depth - splane.z)) / omni_lights.data[light_index].inv_radius * omni_lights.data[light_index].shadow_volumetric_fog_fade); - } - total_light += light * attenuation * shadow_attenuation; - } - } - } - } - - { //spot lights - - uint cluster_spot_offset = cluster_offset + params.cluster_type_size; - - uint item_min; - uint item_max; - uint item_from; - uint item_to; - - cluster_get_item_range(cluster_spot_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); - -#ifdef USE_SUBGROUPS - item_from = subgroupBroadcastFirst(subgroupMin(item_from)); - item_to = subgroupBroadcastFirst(subgroupMax(item_to)); -#endif - - for (uint i = item_from; i < item_to; i++) { - uint mask = cluster_buffer.data[cluster_spot_offset + i]; - mask &= cluster_get_range_clip_mask(i, item_min, item_max); -#ifdef USE_SUBGROUPS - uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); -#else - uint merged_mask = mask; -#endif - - while (merged_mask != 0) { - uint bit = findMSB(merged_mask); - merged_mask &= ~(1 << bit); -#ifdef USE_SUBGROUPS - if (((1 << bit) & mask) == 0) { //do not process if not originally here - continue; - } -#endif - - //if (!bool(omni_lights.data[light_index].mask & draw_call.layer_mask)) { - // continue; //not masked - //} - - uint light_index = 32 * i + bit; - - vec3 light_pos = spot_lights.data[light_index].position; - vec3 light_rel_vec = spot_lights.data[light_index].position - view_pos; - float d = length(light_rel_vec); - float shadow_attenuation = 1.0; - - if (d * spot_lights.data[light_index].inv_radius < 1.0) { - float attenuation = get_omni_attenuation(d, spot_lights.data[light_index].inv_radius, spot_lights.data[light_index].attenuation); - - vec3 spot_dir = spot_lights.data[light_index].direction; - float scos = max(dot(-normalize(light_rel_vec), spot_dir), spot_lights.data[light_index].cone_angle); - float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - spot_lights.data[light_index].cone_angle)); - attenuation *= 1.0 - pow(spot_rim, spot_lights.data[light_index].cone_attenuation); - - vec3 light = spot_lights.data[light_index].color / M_PI; - - if (spot_lights.data[light_index].shadow_enabled) { - //has shadow - vec4 v = vec4(view_pos, 1.0); - - vec4 splane = (spot_lights.data[light_index].shadow_matrix * v); - splane /= splane.w; - - float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r; - - shadow_attenuation = exp(min(0.0, (depth - splane.z)) / spot_lights.data[light_index].inv_radius * spot_lights.data[light_index].shadow_volumetric_fog_fade); - } - - total_light += light * attenuation * shadow_attenuation; - } - } - } - } - - vec3 world_pos = mat3(params.cam_rotation) * view_pos; - - for (uint i = 0; i < params.max_voxel_gi_instances; i++) { - vec3 position = (voxel_gi_instances.data[i].xform * vec4(world_pos, 1.0)).xyz; - - //this causes corrupted pixels, i have no idea why.. - if (all(bvec2(all(greaterThanEqual(position, vec3(0.0))), all(lessThan(position, voxel_gi_instances.data[i].bounds))))) { - position /= voxel_gi_instances.data[i].bounds; - - vec4 light = vec4(0.0); - for (uint j = 0; j < voxel_gi_instances.data[i].mipmaps; j++) { - vec4 slight = textureLod(sampler3D(voxel_gi_textures[i], linear_sampler_with_mipmaps), position, float(j)); - float a = (1.0 - light.a); - light += a * slight; - } - - light.rgb *= voxel_gi_instances.data[i].dynamic_range * params.gi_inject; - - total_light += light.rgb; - } - } - - //sdfgi -#ifdef ENABLE_SDFGI - - { - float blend = -1.0; - vec3 ambient_total = vec3(0.0); - - for (uint i = 0; i < sdfgi.max_cascades; i++) { - vec3 cascade_pos = (world_pos - sdfgi.cascades[i].position) * sdfgi.cascades[i].to_probe; - - if (any(lessThan(cascade_pos, vec3(0.0))) || any(greaterThanEqual(cascade_pos, sdfgi.cascade_probe_size))) { - continue; //skip cascade - } - - vec3 base_pos = floor(cascade_pos); - ivec3 probe_base_pos = ivec3(base_pos); - - vec4 ambient_accum = vec4(0.0); - - ivec3 tex_pos = ivec3(probe_base_pos.xy, int(i)); - tex_pos.x += probe_base_pos.z * sdfgi.probe_axis_size; - - for (uint j = 0; j < 8; j++) { - ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1); - ivec3 probe_posi = probe_base_pos; - probe_posi += offset; - - // Compute weight - - vec3 probe_pos = vec3(probe_posi); - vec3 probe_to_pos = cascade_pos - probe_pos; - - vec3 trilinear = vec3(1.0) - abs(probe_to_pos); - float weight = trilinear.x * trilinear.y * trilinear.z; - - // Compute lightprobe occlusion - - if (sdfgi.use_occlusion) { - ivec3 occ_indexv = abs((sdfgi.cascades[i].probe_world_offset + probe_posi) & ivec3(1, 1, 1)) * ivec3(1, 2, 4); - vec4 occ_mask = mix(vec4(0.0), vec4(1.0), equal(ivec4(occ_indexv.x | occ_indexv.y), ivec4(0, 1, 2, 3))); - - vec3 occ_pos = clamp(cascade_pos, probe_pos - sdfgi.occlusion_clamp, probe_pos + sdfgi.occlusion_clamp) * sdfgi.probe_to_uvw; - occ_pos.z += float(i); - if (occ_indexv.z != 0) { //z bit is on, means index is >=4, so make it switch to the other half of textures - occ_pos.x += 1.0; - } - - occ_pos *= sdfgi.occlusion_renormalize; - float occlusion = dot(textureLod(sampler3D(sdfgi_occlusion_texture, linear_sampler), occ_pos, 0.0), occ_mask); - - weight *= max(occlusion, 0.01); - } - - // Compute ambient texture position - - ivec3 uvw = tex_pos; - uvw.xy += offset.xy; - uvw.x += offset.z * sdfgi.probe_axis_size; - - vec3 ambient = texelFetch(sampler2DArray(sdfgi_ambient_texture, linear_sampler), uvw, 0).rgb; - - ambient_accum.rgb += ambient * weight; - ambient_accum.a += weight; - } - - if (ambient_accum.a > 0) { - ambient_accum.rgb /= ambient_accum.a; - } - ambient_total = ambient_accum.rgb; - break; - } - - total_light += ambient_total * params.gi_inject; - } - -#endif - - vec4 final_density = vec4(total_light, total_density); - - final_density = mix(final_density, reprojected_density, reproject_amount); - - imageStore(density_map, pos, final_density); -#endif - -#ifdef MODE_FOG - - ivec3 pos = ivec3(gl_GlobalInvocationID.xy, 0); - - if (any(greaterThanEqual(pos, params.fog_volume_size))) { - return; //do not compute - } - - vec4 fog_accum = vec4(0.0); - float prev_z = 0.0; - - float t = 1.0; - - for (int i = 0; i < params.fog_volume_size.z; i++) { - //compute fog position - ivec3 fog_pos = pos + ivec3(0, 0, i); - //get fog value - vec4 fog = imageLoad(density_map, fog_pos); - - //get depth at cell pos - float z = get_depth_at_pos(fog_cell_size.z, i); - //get distance from previous pos - float d = abs(prev_z - z); - //compute exinction based on beer's - float extinction = t * exp(-d * fog.a); - //compute alpha based on different of extinctions - float alpha = t - extinction; - //update extinction - t = extinction; - - fog_accum += vec4(fog.rgb * alpha, alpha); - prev_z = z; - - vec4 fog_value; - - if (fog_accum.a > 0.0) { - fog_value = vec4(fog_accum.rgb / fog_accum.a, 1.0 - t); - } else { - fog_value = vec4(0.0); - } - - imageStore(fog_map, fog_pos, fog_value); - } - -#endif - -#ifdef MODE_FILTER - - ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); - - const float gauss[7] = float[](0.071303, 0.131514, 0.189879, 0.214607, 0.189879, 0.131514, 0.071303); - - const ivec3 filter_dir[3] = ivec3[](ivec3(1, 0, 0), ivec3(0, 1, 0), ivec3(0, 0, 1)); - ivec3 offset = filter_dir[params.filter_axis]; - - vec4 accum = vec4(0.0); - for (int i = -3; i <= 3; i++) { - accum += imageLoad(source_map, clamp(pos + offset * i, ivec3(0), params.fog_volume_size - ivec3(1))) * gauss[i + 3]; - } - - imageStore(dest_map, pos, accum); - -#endif -} |