diff options
Diffstat (limited to 'servers/rendering/renderer_rd/shaders')
11 files changed, 411 insertions, 393 deletions
diff --git a/servers/rendering/renderer_rd/shaders/SCsub b/servers/rendering/renderer_rd/shaders/SCsub index 1b0197c1c1..c192574ff2 100644 --- a/servers/rendering/renderer_rd/shaders/SCsub +++ b/servers/rendering/renderer_rd/shaders/SCsub @@ -39,7 +39,6 @@ if "RD_GLSL" in env["BUILDERS"]: env.RD_GLSL("sdfgi_debug.glsl") env.RD_GLSL("sdfgi_debug_probes.glsl") env.RD_GLSL("volumetric_fog.glsl") - env.RD_GLSL("shadow_reduce.glsl") env.RD_GLSL("particles.glsl") env.RD_GLSL("particles_copy.glsl") env.RD_GLSL("sort.glsl") diff --git a/servers/rendering/renderer_rd/shaders/gi.glsl b/servers/rendering/renderer_rd/shaders/gi.glsl index 35522103df..92a5682572 100644 --- a/servers/rendering/renderer_rd/shaders/gi.glsl +++ b/servers/rendering/renderer_rd/shaders/gi.glsl @@ -363,57 +363,62 @@ void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, o ray_pos += (ray_dir * 1.0 / max(abs_ray_dir.x, max(abs_ray_dir.y, abs_ray_dir.z)) + cam_normal * 1.4) * bias / sdfgi.cascades[cascade].to_cell; } float softness = 0.2 + min(1.0, roughness * 5.0) * 4.0; //approximation to roughness so it does not seem like a hard fade - while (length(ray_pos) < max_distance) { - for (uint i = 0; i < sdfgi.max_cascades; i++) { - if (i >= cascade && length(ray_pos) < radius_sizes[i]) { - cascade = max(i, cascade); //never go down - - vec3 pos = ray_pos - sdfgi.cascades[i].position; - pos *= sdfgi.cascades[i].to_cell * pos_to_uvw; - - float distance = texture(sampler3D(sdf_cascades[i], linear_sampler), pos).r * 255.0 - 1.1; - - vec4 hit_light = vec4(0.0); - if (distance < softness) { - hit_light.rgb = texture(sampler3D(light_cascades[i], linear_sampler), pos).rgb; - hit_light.rgb *= 0.5; //approximation given value read is actually meant for anisotropy - hit_light.a = clamp(1.0 - (distance / softness), 0.0, 1.0); - hit_light.rgb *= hit_light.a; - } + uint i = 0; + bool found = false; + while (true) { + if (length(ray_pos) >= max_distance || light_accum.a > 0.99) { + break; + } + if (!found && i >= cascade && length(ray_pos) < radius_sizes[i]) { + uint next_i = min(i + 1, sdfgi.max_cascades - 1); + cascade = max(i, cascade); //never go down - distance /= sdfgi.cascades[i].to_cell; + vec3 pos = ray_pos - sdfgi.cascades[i].position; + pos *= sdfgi.cascades[i].to_cell * pos_to_uvw; - if (i < (sdfgi.max_cascades - 1)) { - pos = ray_pos - sdfgi.cascades[i + 1].position; - pos *= sdfgi.cascades[i + 1].to_cell * pos_to_uvw; + float fdistance = textureLod(sampler3D(sdf_cascades[i], linear_sampler), pos, 0.0).r * 255.0 - 1.1; - float distance2 = texture(sampler3D(sdf_cascades[i + 1], linear_sampler), pos).r * 255.0 - 1.1; + vec4 hit_light = vec4(0.0); + if (fdistance < softness) { + hit_light.rgb = textureLod(sampler3D(light_cascades[i], linear_sampler), pos, 0.0).rgb; + hit_light.rgb *= 0.5; //approximation given value read is actually meant for anisotropy + hit_light.a = clamp(1.0 - (fdistance / softness), 0.0, 1.0); + hit_light.rgb *= hit_light.a; + } - vec4 hit_light2 = vec4(0.0); - if (distance2 < softness) { - hit_light2.rgb = texture(sampler3D(light_cascades[i + 1], linear_sampler), pos).rgb; - hit_light2.rgb *= 0.5; //approximation given value read is actually meant for anisotropy - hit_light2.a = clamp(1.0 - (distance2 / softness), 0.0, 1.0); - hit_light2.rgb *= hit_light2.a; - } + fdistance /= sdfgi.cascades[i].to_cell; - float prev_radius = i == 0 ? 0.0 : radius_sizes[i - 1]; - float blend = clamp((length(ray_pos) - prev_radius) / (radius_sizes[i] - prev_radius), 0.0, 1.0); + if (i < (sdfgi.max_cascades - 1)) { + pos = ray_pos - sdfgi.cascades[next_i].position; + pos *= sdfgi.cascades[next_i].to_cell * pos_to_uvw; - distance2 /= sdfgi.cascades[i + 1].to_cell; + float fdistance2 = textureLod(sampler3D(sdf_cascades[next_i], linear_sampler), pos, 0.0).r * 255.0 - 1.1; - hit_light = mix(hit_light, hit_light2, blend); - distance = mix(distance, distance2, blend); + vec4 hit_light2 = vec4(0.0); + if (fdistance2 < softness) { + hit_light2.rgb = textureLod(sampler3D(light_cascades[next_i], linear_sampler), pos, 0.0).rgb; + hit_light2.rgb *= 0.5; //approximation given value read is actually meant for anisotropy + hit_light2.a = clamp(1.0 - (fdistance2 / softness), 0.0, 1.0); + hit_light2.rgb *= hit_light2.a; } - light_accum += hit_light; - ray_pos += ray_dir * distance; - break; + float prev_radius = i == 0 ? 0.0 : radius_sizes[max(0, i - 1)]; + float blend = clamp((length(ray_pos) - prev_radius) / (radius_sizes[i] - prev_radius), 0.0, 1.0); + + fdistance2 /= sdfgi.cascades[next_i].to_cell; + + hit_light = mix(hit_light, hit_light2, blend); + fdistance = mix(fdistance, fdistance2, blend); } - } - if (light_accum.a > 0.99) { - break; + light_accum += hit_light; + ray_pos += ray_dir * fdistance; + found = true; + } + i++; + if (i == sdfgi.max_cascades) { + i = 0; + found = false; } } diff --git a/servers/rendering/renderer_rd/shaders/giprobe.glsl b/servers/rendering/renderer_rd/shaders/giprobe.glsl index 4f4753d147..b931461b31 100644 --- a/servers/rendering/renderer_rd/shaders/giprobe.glsl +++ b/servers/rendering/renderer_rd/shaders/giprobe.glsl @@ -51,10 +51,10 @@ struct Light { float attenuation; vec3 color; - float spot_angle_radians; + float cos_spot_angle; vec3 position; - float spot_attenuation; + float inv_spot_attenuation; vec3 direction; bool has_shadow; @@ -233,13 +233,15 @@ bool compute_light_vector(uint light, vec3 pos, out float attenuation, out vec3 if (lights.data[light].type == LIGHT_TYPE_SPOT) { vec3 rel = normalize(pos - light_pos); - float angle = acos(dot(rel, lights.data[light].direction)); - if (angle > lights.data[light].spot_angle_radians) { + float cos_spot_angle = lights.data[light].cos_spot_angle; + float cos_angle = dot(rel, lights.data[light].direction); + if (cos_angle < cos_spot_angle) { return false; } - float d = clamp(angle / lights.data[light].spot_angle_radians, 0, 1); - attenuation *= pow(1.0 - d, lights.data[light].spot_attenuation); + float scos = max(cos_angle, cos_spot_angle); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cos_spot_angle)); + attenuation *= 1.0 - pow(spot_rim, lights.data[light].inv_spot_attenuation); } } diff --git a/servers/rendering/renderer_rd/shaders/giprobe_write.glsl b/servers/rendering/renderer_rd/shaders/giprobe_write.glsl index 9c794f1bcc..56b3b7ccb4 100644 --- a/servers/rendering/renderer_rd/shaders/giprobe_write.glsl +++ b/servers/rendering/renderer_rd/shaders/giprobe_write.glsl @@ -43,10 +43,10 @@ struct Light { float attenuation; vec3 color; - float spot_angle_radians; + float cos_spot_angle; vec3 position; - float spot_attenuation; + float inv_spot_attenuation; vec3 direction; bool has_shadow; @@ -146,13 +146,15 @@ bool compute_light_vector(uint light, uint cell, vec3 pos, out float attenuation if (lights.data[light].type == LIGHT_TYPE_SPOT) { vec3 rel = normalize(pos - light_pos); - float angle = acos(dot(rel, lights.data[light].direction)); - if (angle > lights.data[light].spot_angle_radians) { + float cos_spot_angle = lights.data[light].cos_spot_angle; + float cos_angle = dot(rel, lights.data[light].direction); + if (cos_angle < cos_spot_angle) { return false; } - float d = clamp(angle / lights.data[light].spot_angle_radians, 0, 1); - attenuation *= pow(1.0 - d, lights.data[light].spot_attenuation); + float scos = max(cos_angle, cos_spot_angle); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cos_spot_angle)); + attenuation *= 1.0 - pow(spot_rim, lights.data[light].inv_spot_attenuation); } } diff --git a/servers/rendering/renderer_rd/shaders/scene_forward.glsl b/servers/rendering/renderer_rd/shaders/scene_forward.glsl index ea203c8abe..1cea9bf8db 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward.glsl @@ -89,12 +89,6 @@ MATERIAL_UNIFORMS } material; #endif -/* clang-format off */ - -VERTEX_SHADER_GLOBALS - -/* clang-format on */ - invariant gl_Position; #ifdef MODE_DUAL_PARABOLOID @@ -103,28 +97,43 @@ layout(location = 8) out float dp_clip; #endif +layout(location = 9) out flat uint instance_index; + +/* clang-format off */ + +VERTEX_SHADER_GLOBALS + +/* clang-format on */ + void main() { vec4 instance_custom = vec4(0.0); #if defined(COLOR_USED) color_interp = color_attrib; #endif - mat4 world_matrix = draw_call.transform; + instance_index = draw_call.instance_index; + + bool is_multimesh = bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH); + if (!is_multimesh) { + instance_index += gl_InstanceIndex; + } + + mat4 world_matrix = instances.data[instance_index].transform; mat3 world_normal_matrix; - if (bool(draw_call.flags & INSTANCE_FLAGS_NON_UNIFORM_SCALE)) { + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_NON_UNIFORM_SCALE)) { world_normal_matrix = inverse(mat3(world_matrix)); } else { world_normal_matrix = mat3(world_matrix); } - if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH)) { + if (is_multimesh) { //multimesh, instances are for it - uint offset = (draw_call.flags >> INSTANCE_FLAGS_MULTIMESH_STRIDE_SHIFT) & INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK; + uint offset = (instances.data[instance_index].flags >> INSTANCE_FLAGS_MULTIMESH_STRIDE_SHIFT) & INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK; offset *= gl_InstanceIndex; mat4 matrix; - if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH_FORMAT_2D)) { + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_FORMAT_2D)) { matrix = mat4(transforms.data[offset + 0], transforms.data[offset + 1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0)); offset += 2; } else { @@ -132,14 +141,14 @@ void main() { offset += 3; } - if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH_HAS_COLOR)) { + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_HAS_COLOR)) { #ifdef COLOR_USED color_interp *= transforms.data[offset]; #endif offset += 1; } - if (bool(draw_call.flags & INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA)) { + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA)) { instance_custom = transforms.data[offset]; } @@ -161,7 +170,7 @@ void main() { #endif #if 0 - if (bool(draw_call.flags & INSTANCE_FLAGS_SKELETON)) { + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_SKELETON)) { //multimesh, instances are for it uvec2 bones_01 = uvec2(bone_attrib.x & 0xFFFF, bone_attrib.x >> 16) * 3; @@ -194,7 +203,7 @@ void main() { uv2_interp = uv2_attrib; #endif -#ifdef USE_OVERRIDE_POSITION +#ifdef OVERRIDE_POSITION vec4 position; #endif @@ -289,7 +298,7 @@ VERTEX_SHADER_CODE #endif //MODE_RENDER_DEPTH -#ifdef USE_OVERRIDE_POSITION +#ifdef OVERRIDE_POSITION gl_Position = position; #else gl_Position = projection_matrix * vec4(vertex_interp, 1.0); @@ -304,7 +313,8 @@ VERTEX_SHADER_CODE #endif #ifdef MODE_RENDER_MATERIAL if (scene_data.material_uv2_mode) { - gl_Position.xy = (uv2_attrib.xy + draw_call.lightmap_uv_scale.xy) * 2.0 - 1.0; + vec2 uv_offset = unpackHalf2x16(draw_call.uv_offset); + gl_Position.xy = (uv2_attrib.xy + uv_offset) * 2.0 - 1.0; gl_Position.z = 0.00001; gl_Position.w = 1.0; } @@ -350,9 +360,11 @@ layout(location = 8) in float dp_clip; #endif +layout(location = 9) in flat uint instance_index; + //defines to keep compatibility with vertex -#define world_matrix draw_call.transform +#define world_matrix instances.data[instance_index].transform #define projection_matrix scene_data.projection_matrix #if defined(ENABLE_SSS) && defined(ENABLE_TRANSMITTANCE) @@ -787,13 +799,11 @@ LIGHT_SHADER_CODE #ifndef USE_NO_SHADOWS -// Produces cheap white noise, optimized for window-space -// Comes from: https://www.shadertoy.com/view/4djSRW -// Copyright: Dave Hoskins, MIT License +// Interleaved Gradient Noise +// http://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare float quick_hash(vec2 pos) { - vec3 p3 = fract(vec3(pos.xyx) * .1031); - p3 += dot(p3, p3.yzx + 33.33); - return fract((p3.x + p3.y) * p3.z); + const vec3 magic = vec3(0.06711056f, 0.00583715f, 52.9829189f); + return fract(magic.z * fract(dot(pos, magic.xy))); } float sample_directional_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, vec4 coord) { @@ -1770,7 +1780,7 @@ vec4 fog_process(vec3 vertex) { } } - float fog_amount = 1.0 - exp(vertex.z * scene_data.fog_density); + float fog_amount = 1.0 - exp(min(0.0, vertex.z * scene_data.fog_density)); if (abs(scene_data.fog_height_density) > 0.001) { float y = (scene_data.camera_matrix * vec4(vertex, 1.0)).y; @@ -2083,7 +2093,7 @@ FRAGMENT_SHADER_CODE #endif uint decal_index = 32 * i + bit; - if (!bool(decals.data[decal_index].mask & draw_call.layer_mask)) { + if (!bool(decals.data[decal_index].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -2210,8 +2220,8 @@ FRAGMENT_SHADER_CODE #ifdef USE_LIGHTMAP //lightmap - if (bool(draw_call.flags & INSTANCE_FLAGS_USE_LIGHTMAP_CAPTURE)) { //has lightmap capture - uint index = draw_call.gi_offset; + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_LIGHTMAP_CAPTURE)) { //has lightmap capture + uint index = instances.data[instance_index].gi_offset; vec3 wnormal = mat3(scene_data.camera_matrix) * normal; const float c1 = 0.429043; @@ -2230,12 +2240,12 @@ FRAGMENT_SHADER_CODE 2.0 * c2 * lightmap_captures.data[index].sh[1].rgb * wnormal.y + 2.0 * c2 * lightmap_captures.data[index].sh[2].rgb * wnormal.z); - } else if (bool(draw_call.flags & INSTANCE_FLAGS_USE_LIGHTMAP)) { // has actual lightmap - bool uses_sh = bool(draw_call.flags & INSTANCE_FLAGS_USE_SH_LIGHTMAP); - uint ofs = draw_call.gi_offset & 0xFFFF; + } else if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_LIGHTMAP)) { // has actual lightmap + bool uses_sh = bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SH_LIGHTMAP); + uint ofs = instances.data[instance_index].gi_offset & 0xFFFF; vec3 uvw; - uvw.xy = uv2 * draw_call.lightmap_uv_scale.zw + draw_call.lightmap_uv_scale.xy; - uvw.z = float((draw_call.gi_offset >> 16) & 0xFFFF); + uvw.xy = uv2 * instances.data[instance_index].lightmap_uv_scale.zw + instances.data[instance_index].lightmap_uv_scale.xy; + uvw.z = float((instances.data[instance_index].gi_offset >> 16) & 0xFFFF); if (uses_sh) { uvw.z *= 4.0; //SH textures use 4 times more data @@ -2244,7 +2254,7 @@ FRAGMENT_SHADER_CODE vec3 lm_light_l1_0 = textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw + vec3(0.0, 0.0, 2.0), 0.0).rgb; vec3 lm_light_l1p1 = textureLod(sampler2DArray(lightmap_textures[ofs], material_samplers[SAMPLER_LINEAR_CLAMP]), uvw + vec3(0.0, 0.0, 3.0), 0.0).rgb; - uint idx = draw_call.gi_offset >> 20; + uint idx = instances.data[instance_index].gi_offset >> 20; vec3 n = normalize(lightmaps.data[idx].normal_xform * normal); ambient_light += lm_light_l0 * 0.282095f; @@ -2264,7 +2274,7 @@ FRAGMENT_SHADER_CODE } #elif defined(USE_FORWARD_GI) - if (bool(draw_call.flags & INSTANCE_FLAGS_USE_SDFGI)) { //has lightmap capture + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SDFGI)) { //has lightmap capture //make vertex orientation the world one, but still align to camera vec3 cam_pos = mat3(scene_data.camera_matrix) * vertex; @@ -2336,9 +2346,9 @@ FRAGMENT_SHADER_CODE } } - if (bool(draw_call.flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes - uint index1 = draw_call.gi_offset & 0xFFFF; + uint index1 = instances.data[instance_index].gi_offset & 0xFFFF; vec3 ref_vec = normalize(reflect(normalize(vertex), normal)); //find arbitrary tangent and bitangent, then build a matrix vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0); @@ -2350,7 +2360,7 @@ FRAGMENT_SHADER_CODE vec4 spec_accum = vec4(0.0); gi_probe_compute(index1, vertex, normal, ref_vec, normal_mat, roughness * roughness, ambient_light, specular_light, spec_accum, amb_accum); - uint index2 = draw_call.gi_offset >> 16; + uint index2 = instances.data[instance_index].gi_offset >> 16; if (index2 != 0xFFFF) { gi_probe_compute(index2, vertex, normal, ref_vec, normal_mat, roughness * roughness, ambient_light, specular_light, spec_accum, amb_accum); @@ -2369,7 +2379,7 @@ FRAGMENT_SHADER_CODE } #elif !defined(LOW_END_MODE) - if (bool(draw_call.flags & INSTANCE_FLAGS_USE_GI_BUFFERS)) { //use GI buffers + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GI_BUFFERS)) { //use GI buffers vec2 coord; @@ -2448,7 +2458,7 @@ FRAGMENT_SHADER_CODE #endif uint reflection_index = 32 * i + bit; - if (!bool(reflections.data[reflection_index].mask & draw_call.layer_mask)) { + if (!bool(reflections.data[reflection_index].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -2519,7 +2529,7 @@ FRAGMENT_SHADER_CODE break; } - if (!bool(directional_lights.data[i].mask & draw_call.layer_mask)) { + if (!bool(directional_lights.data[i].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -2838,7 +2848,7 @@ FRAGMENT_SHADER_CODE break; } - if (!bool(directional_lights.data[i].mask & draw_call.layer_mask)) { + if (!bool(directional_lights.data[i].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -2968,7 +2978,7 @@ FRAGMENT_SHADER_CODE #endif uint light_index = 32 * i + bit; - if (!bool(omni_lights.data[light_index].mask & draw_call.layer_mask)) { + if (!bool(omni_lights.data[light_index].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -3041,7 +3051,7 @@ FRAGMENT_SHADER_CODE uint light_index = 32 * i + bit; - if (!bool(spot_lights.data[light_index].mask & draw_call.layer_mask)) { + if (!bool(spot_lights.data[light_index].mask & instances.data[instance_index].layer_mask)) { continue; //not masked } @@ -3214,9 +3224,9 @@ FRAGMENT_SHADER_CODE normal_roughness_output_buffer = vec4(normal * 0.5 + 0.5, roughness); #ifdef MODE_RENDER_GIPROBE - if (bool(draw_call.flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes - uint index1 = draw_call.gi_offset & 0xFFFF; - uint index2 = draw_call.gi_offset >> 16; + if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_GIPROBE)) { // process giprobes + uint index1 = instances.data[instance_index].gi_offset & 0xFFFF; + uint index2 = instances.data[instance_index].gi_offset >> 16; giprobe_buffer.x = index1 & 0xFF; giprobe_buffer.y = index2 & 0xFF; } else { @@ -3275,6 +3285,7 @@ FRAGMENT_SHADER_CODE // Draw "fixed" fog before volumetric fog to ensure volumetric fog can appear in front of the sky. frag_color.rgb = mix(frag_color.rgb, fog.rgb, fog.a); + ; #endif //MODE_MULTIPLE_RENDER_TARGETS diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl index e9b79e1560..d78890fa9e 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_inc.glsl @@ -21,12 +21,10 @@ #endif layout(push_constant, binding = 0, std430) uniform DrawCall { - mat4 transform; - uint flags; - uint instance_uniforms_ofs; //base offset in global buffer for instance variables - uint gi_offset; //GI information when using lightmapping (VCT or lightmap index) - uint layer_mask; - vec4 lightmap_uv_scale; + uint instance_index; + uint uv_offset; + uint pad0; + uint pad1; } draw_call; @@ -45,96 +43,13 @@ draw_call; #define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_REPEAT 10 #define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_REPEAT 11 -layout(set = 0, binding = 1) uniform sampler material_samplers[12]; - -layout(set = 0, binding = 2) uniform sampler shadow_sampler; - #define SDFGI_MAX_CASCADES 8 -layout(set = 0, binding = 3, std140) uniform SceneData { - mat4 projection_matrix; - mat4 inv_projection_matrix; - - mat4 camera_matrix; - mat4 inv_camera_matrix; - - vec2 viewport_size; - vec2 screen_pixel_size; - - uint cluster_shift; - uint cluster_width; - uint cluster_type_size; - uint max_cluster_element_count_div_32; - - //use vec4s because std140 doesnt play nice with vec2s, z and w are wasted - vec4 directional_penumbra_shadow_kernel[32]; - vec4 directional_soft_shadow_kernel[32]; - vec4 penumbra_shadow_kernel[32]; - vec4 soft_shadow_kernel[32]; - - uint directional_penumbra_shadow_samples; - uint directional_soft_shadow_samples; - uint penumbra_shadow_samples; - uint soft_shadow_samples; - - vec4 ambient_light_color_energy; +/* Set 1: Base Pass (never changes) */ - float ambient_color_sky_mix; - bool use_ambient_light; - bool use_ambient_cubemap; - bool use_reflection_cubemap; - - mat3 radiance_inverse_xform; - - vec2 shadow_atlas_pixel_size; - vec2 directional_shadow_pixel_size; - - uint directional_light_count; - float dual_paraboloid_side; - float z_far; - float z_near; - - bool ssao_enabled; - float ssao_light_affect; - float ssao_ao_affect; - bool roughness_limiter_enabled; - - float roughness_limiter_amount; - float roughness_limiter_limit; - uvec2 roughness_limiter_pad; - - vec4 ao_color; - - mat4 sdf_to_bounds; - - ivec3 sdf_offset; - bool material_uv2_mode; - - ivec3 sdf_size; - bool gi_upscale_for_msaa; - - bool volumetric_fog_enabled; - float volumetric_fog_inv_length; - float volumetric_fog_detail_spread; - uint volumetric_fog_pad; - - bool fog_enabled; - float fog_density; - float fog_height; - float fog_height_density; - - vec3 fog_light_color; - float fog_sun_scatter; - - float fog_aerial_perspective; - - float time; - float reflection_multiplier; // one normally, zero when rendering reflections - - bool pancake_shadows; -} +layout(set = 0, binding = 1) uniform sampler material_samplers[12]; -scene_data; +layout(set = 0, binding = 2) uniform sampler shadow_sampler; #define INSTANCE_FLAGS_USE_GI_BUFFERS (1 << 6) #define INSTANCE_FLAGS_USE_SDFGI (1 << 7) @@ -153,22 +68,22 @@ scene_data; #define INSTANCE_FLAGS_SKELETON (1 << 19) #define INSTANCE_FLAGS_NON_UNIFORM_SCALE (1 << 20) -layout(set = 0, binding = 4, std430) restrict readonly buffer OmniLights { +layout(set = 0, binding = 3, std430) restrict readonly buffer OmniLights { LightData data[]; } omni_lights; -layout(set = 0, binding = 5, std430) restrict readonly buffer SpotLights { +layout(set = 0, binding = 4, std430) restrict readonly buffer SpotLights { LightData data[]; } spot_lights; -layout(set = 0, binding = 6) buffer restrict readonly ReflectionProbeData { +layout(set = 0, binding = 5) buffer restrict readonly ReflectionProbeData { ReflectionData data[]; } reflections; -layout(set = 0, binding = 7, std140) uniform DirectionalLights { +layout(set = 0, binding = 6, std140) uniform DirectionalLights { DirectionalLightData data[MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS]; } directional_lights; @@ -180,7 +95,7 @@ struct Lightmap { mat3 normal_xform; }; -layout(set = 0, binding = 8, std140) restrict readonly buffer Lightmaps { +layout(set = 0, binding = 7, std140) restrict readonly buffer Lightmaps { Lightmap data[]; } lightmaps; @@ -189,20 +104,20 @@ struct LightmapCapture { vec4 sh[9]; }; -layout(set = 0, binding = 9, std140) restrict readonly buffer LightmapCaptures { +layout(set = 0, binding = 8, std140) restrict readonly buffer LightmapCaptures { LightmapCapture data[]; } lightmap_captures; -layout(set = 0, binding = 10) uniform texture2D decal_atlas; -layout(set = 0, binding = 11) uniform texture2D decal_atlas_srgb; +layout(set = 0, binding = 9) uniform texture2D decal_atlas; +layout(set = 0, binding = 10) uniform texture2D decal_atlas_srgb; -layout(set = 0, binding = 12, std430) restrict readonly buffer Decals { +layout(set = 0, binding = 11, std430) restrict readonly buffer Decals { DecalData data[]; } decals; -layout(set = 0, binding = 13, std430) restrict readonly buffer GlobalVariableData { +layout(set = 0, binding = 12, std430) restrict readonly buffer GlobalVariableData { vec4 data[]; } global_variables; @@ -216,7 +131,7 @@ struct SDFGIProbeCascadeData { float to_cell; // 1/bounds * grid_size }; -layout(set = 0, binding = 14, std140) uniform SDFGI { +layout(set = 0, binding = 13, std140) uniform SDFGI { vec3 grid_size; uint max_cascades; @@ -246,47 +161,140 @@ sdfgi; #endif //LOW_END_MODE -// decal atlas +/* Set 2: Render Pass (changes per render pass) */ -/* Set 1, Radiance */ +layout(set = 1, binding = 0, std140) uniform SceneData { + mat4 projection_matrix; + mat4 inv_projection_matrix; + + mat4 camera_matrix; + mat4 inv_camera_matrix; + + vec2 viewport_size; + vec2 screen_pixel_size; + + uint cluster_shift; + uint cluster_width; + uint cluster_type_size; + uint max_cluster_element_count_div_32; + + //use vec4s because std140 doesnt play nice with vec2s, z and w are wasted + vec4 directional_penumbra_shadow_kernel[32]; + vec4 directional_soft_shadow_kernel[32]; + vec4 penumbra_shadow_kernel[32]; + vec4 soft_shadow_kernel[32]; + + uint directional_penumbra_shadow_samples; + uint directional_soft_shadow_samples; + uint penumbra_shadow_samples; + uint soft_shadow_samples; + + vec4 ambient_light_color_energy; + + float ambient_color_sky_mix; + bool use_ambient_light; + bool use_ambient_cubemap; + bool use_reflection_cubemap; + + mat3 radiance_inverse_xform; + + vec2 shadow_atlas_pixel_size; + vec2 directional_shadow_pixel_size; + + uint directional_light_count; + float dual_paraboloid_side; + float z_far; + float z_near; + + bool ssao_enabled; + float ssao_light_affect; + float ssao_ao_affect; + bool roughness_limiter_enabled; + + float roughness_limiter_amount; + float roughness_limiter_limit; + uvec2 roughness_limiter_pad; + + vec4 ao_color; + + mat4 sdf_to_bounds; + + ivec3 sdf_offset; + bool material_uv2_mode; + + ivec3 sdf_size; + bool gi_upscale_for_msaa; + + bool volumetric_fog_enabled; + float volumetric_fog_inv_length; + float volumetric_fog_detail_spread; + uint volumetric_fog_pad; + + bool fog_enabled; + float fog_density; + float fog_height; + float fog_height_density; + + vec3 fog_light_color; + float fog_sun_scatter; + + float fog_aerial_perspective; + + float time; + float reflection_multiplier; // one normally, zero when rendering reflections + + bool pancake_shadows; +} + +scene_data; + +struct InstanceData { + mat4 transform; + uint flags; + uint instance_uniforms_ofs; //base offset in global buffer for instance variables + uint gi_offset; //GI information when using lightmapping (VCT or lightmap index) + uint layer_mask; + vec4 lightmap_uv_scale; +}; + +layout(set = 1, binding = 1, std430) buffer restrict readonly InstanceDataBuffer { + InstanceData data[]; +} +instances; #ifdef USE_RADIANCE_CUBEMAP_ARRAY -layout(set = 1, binding = 0) uniform textureCubeArray radiance_cubemap; +layout(set = 1, binding = 2) uniform textureCubeArray radiance_cubemap; #else -layout(set = 1, binding = 0) uniform textureCube radiance_cubemap; +layout(set = 1, binding = 2) uniform textureCube radiance_cubemap; #endif -/* Set 2, Reflection and Shadow Atlases (view dependent) */ - -layout(set = 1, binding = 1) uniform textureCubeArray reflection_atlas; +layout(set = 1, binding = 3) uniform textureCubeArray reflection_atlas; -layout(set = 1, binding = 2) uniform texture2D shadow_atlas; +layout(set = 1, binding = 4) uniform texture2D shadow_atlas; -layout(set = 1, binding = 3) uniform texture2D directional_shadow_atlas; +layout(set = 1, binding = 5) uniform texture2D directional_shadow_atlas; -layout(set = 1, binding = 4) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES]; +layout(set = 1, binding = 6) uniform texture2DArray lightmap_textures[MAX_LIGHTMAP_TEXTURES]; -#ifndef LOW_END_MODE -layout(set = 1, binding = 5) uniform texture3D gi_probe_textures[MAX_GI_PROBES]; +#ifndef LOW_END_MOD +layout(set = 1, binding = 7) uniform texture3D gi_probe_textures[MAX_GI_PROBES]; #endif -layout(set = 1, binding = 6, std430) buffer restrict readonly ClusterBuffer { +layout(set = 1, binding = 8, std430) buffer restrict readonly ClusterBuffer { uint data[]; } cluster_buffer; -/* Set 3, Render Buffers */ - #ifdef MODE_RENDER_SDF -layout(r16ui, set = 1, binding = 7) uniform restrict writeonly uimage3D albedo_volume_grid; -layout(r32ui, set = 1, binding = 8) uniform restrict writeonly uimage3D emission_grid; -layout(r32ui, set = 1, binding = 9) uniform restrict writeonly uimage3D emission_aniso_grid; -layout(r32ui, set = 1, binding = 10) uniform restrict uimage3D geom_facing_grid; +layout(r16ui, set = 1, binding = 9) uniform restrict writeonly uimage3D albedo_volume_grid; +layout(r32ui, set = 1, binding = 10) uniform restrict writeonly uimage3D emission_grid; +layout(r32ui, set = 1, binding = 11) uniform restrict writeonly uimage3D emission_aniso_grid; +layout(r32ui, set = 1, binding = 12) uniform restrict uimage3D geom_facing_grid; //still need to be present for shaders that use it, so remap them to something #define depth_buffer shadow_atlas @@ -295,17 +303,17 @@ layout(r32ui, set = 1, binding = 10) uniform restrict uimage3D geom_facing_grid; #else -layout(set = 1, binding = 7) uniform texture2D depth_buffer; -layout(set = 1, binding = 8) uniform texture2D color_buffer; +layout(set = 1, binding = 9) uniform texture2D depth_buffer; +layout(set = 1, binding = 10) uniform texture2D color_buffer; #ifndef LOW_END_MODE -layout(set = 1, binding = 9) uniform texture2D normal_roughness_buffer; -layout(set = 1, binding = 10) uniform texture2D ao_buffer; -layout(set = 1, binding = 11) uniform texture2D ambient_buffer; -layout(set = 1, binding = 12) uniform texture2D reflection_buffer; -layout(set = 1, binding = 13) uniform texture2DArray sdfgi_lightprobe_texture; -layout(set = 1, binding = 14) uniform texture3D sdfgi_occlusion_cascades; +layout(set = 1, binding = 11) uniform texture2D normal_roughness_buffer; +layout(set = 1, binding = 12) uniform texture2D ao_buffer; +layout(set = 1, binding = 13) uniform texture2D ambient_buffer; +layout(set = 1, binding = 14) uniform texture2D reflection_buffer; +layout(set = 1, binding = 15) uniform texture2DArray sdfgi_lightprobe_texture; +layout(set = 1, binding = 16) uniform texture3D sdfgi_occlusion_cascades; struct GIProbeData { mat4 xform; @@ -323,22 +331,22 @@ struct GIProbeData { uint mipmaps; }; -layout(set = 1, binding = 15, std140) uniform GIProbes { +layout(set = 1, binding = 17, std140) uniform GIProbes { GIProbeData data[MAX_GI_PROBES]; } gi_probes; -layout(set = 1, binding = 16) uniform texture3D volumetric_fog_texture; +layout(set = 1, binding = 18) uniform texture3D volumetric_fog_texture; #endif // LOW_END_MODE #endif -/* Set 4 Skeleton & Instancing (Multimesh) */ +/* Set 2 Skeleton & Instancing (can change per item) */ layout(set = 2, binding = 0, std430) restrict readonly buffer Transforms { vec4 data[]; } transforms; -/* Set 5 User Material */ +/* Set 3 User Material */ diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl index bcdfe8cc85..dc7238abed 100644 --- a/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl +++ b/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl @@ -67,8 +67,8 @@ struct Light { float attenuation; uint type; - float spot_angle; - float spot_attenuation; + float cos_spot_angle; + float inv_spot_attenuation; float radius; vec4 shadow_color; @@ -80,6 +80,7 @@ layout(set = 0, binding = 9, std140) buffer restrict readonly Lights { lights; layout(set = 0, binding = 10) uniform texture2DArray lightprobe_texture; +layout(set = 0, binding = 11) uniform texture3D occlusion_texture; layout(push_constant, binding = 0, std430) uniform Params { vec3 grid_size; @@ -91,9 +92,9 @@ layout(push_constant, binding = 0, std430) uniform Params { uint process_increment; int probe_axis_size; - bool multibounce; + float bounce_feedback; float y_mult; - uint pad; + bool use_occlusion; } params; @@ -159,7 +160,8 @@ void main() { // Add indirect light first, in order to save computation resources #ifdef MODE_PROCESS_DYNAMIC - if (params.multibounce) { + if (params.bounce_feedback > 0.001) { + vec3 feedback = (params.bounce_feedback < 1.0) ? (albedo * params.bounce_feedback) : mix(albedo, vec3(1.0), params.bounce_feedback - 1.0); vec3 pos = (vec3(positioni) + vec3(0.5)) * float(params.probe_axis_size - 1) / params.grid_size; ivec3 probe_base_pos = ivec3(pos); @@ -172,7 +174,7 @@ void main() { vec3 base_tex_posf = vec3(tex_pos); vec2 tex_pixel_size = 1.0 / vec2(ivec2((OCT_SIZE + 2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE + 2) * params.probe_axis_size)); - vec3 probe_uv_offset = (ivec3(OCT_SIZE + 2, OCT_SIZE + 2, (OCT_SIZE + 2) * params.probe_axis_size)) * tex_pixel_size.xyx; + vec3 probe_uv_offset = vec3(ivec3(OCT_SIZE + 2, OCT_SIZE + 2, (OCT_SIZE + 2) * params.probe_axis_size)) * tex_pixel_size.xyx; for (uint j = 0; j < 8; j++) { ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1); @@ -192,18 +194,35 @@ void main() { for (uint k = 0; k < 6; k++) { if (bool(valid_aniso & (1 << k))) { vec3 n = aniso_dir[k]; - float weight = trilinear.x * trilinear.y * trilinear.z * max(0.005, dot(n, probe_dir)); - - vec3 tex_posf = base_tex_posf + vec3(octahedron_encode(n) * float(OCT_SIZE), 0.0); - tex_posf.xy *= tex_pixel_size; - - vec3 pos_uvw = tex_posf; - pos_uvw.xy += vec2(offset.xy) * probe_uv_offset.xy; - pos_uvw.x += float(offset.z) * probe_uv_offset.z; - vec3 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0).rgb; - - light_accum[k] += indirect_light * weight; - weight_accum[k] += weight; + float weight = trilinear.x * trilinear.y * trilinear.z * max(0, dot(n, probe_dir)); + + if (weight > 0.0 && params.use_occlusion) { + ivec3 occ_indexv = abs((cascades.data[params.cascade].probe_world_offset + probe_posi) & ivec3(1, 1, 1)) * ivec3(1, 2, 4); + vec4 occ_mask = mix(vec4(0.0), vec4(1.0), equal(ivec4(occ_indexv.x | occ_indexv.y), ivec4(0, 1, 2, 3))); + + vec3 occ_pos = (vec3(positioni) + aniso_dir[k] + vec3(0.5)) / params.grid_size; + occ_pos.z += float(params.cascade); + if (occ_indexv.z != 0) { //z bit is on, means index is >=4, so make it switch to the other half of textures + occ_pos.x += 1.0; + } + occ_pos *= vec3(0.5, 1.0, 1.0 / float(params.max_cascades)); //renormalize + float occlusion = dot(textureLod(sampler3D(occlusion_texture, linear_sampler), occ_pos, 0.0), occ_mask); + + weight *= occlusion; + } + + if (weight > 0.0) { + vec3 tex_posf = base_tex_posf + vec3(octahedron_encode(n) * float(OCT_SIZE), 0.0); + tex_posf.xy *= tex_pixel_size; + + vec3 pos_uvw = tex_posf; + pos_uvw.xy += vec2(offset.xy) * probe_uv_offset.xy; + pos_uvw.x += float(offset.z) * probe_uv_offset.z; + vec3 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0).rgb; + + light_accum[k] += indirect_light * weight; + weight_accum[k] += weight; + } } } } @@ -211,7 +230,7 @@ void main() { for (uint k = 0; k < 6; k++) { if (weight_accum[k] > 0.0) { light_accum[k] /= weight_accum[k]; - light_accum[k] *= albedo; + light_accum[k] *= feedback; } } } @@ -266,13 +285,16 @@ void main() { rel_vec.y /= params.y_mult; attenuation = get_omni_attenuation(light_distance, 1.0 / lights.data[i].radius, lights.data[i].attenuation); - float angle = acos(dot(normalize(rel_vec), -lights.data[i].direction)); - if (angle > lights.data[i].spot_angle) { - attenuation = 0.0; - } else { - float d = clamp(angle / lights.data[i].spot_angle, 0, 1); - attenuation *= pow(1.0 - d, lights.data[i].spot_attenuation); + float cos_spot_angle = lights.data[i].cos_spot_angle; + float cos_angle = dot(-direction, lights.data[i].direction); + + if (cos_angle < cos_spot_angle) { + continue; } + + float scos = max(cos_angle, cos_spot_angle); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cos_spot_angle)); + attenuation *= 1.0 - pow(spot_rim, lights.data[i].inv_spot_attenuation); } break; } diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl index e4f6f4b7ea..007e4c113a 100644 --- a/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl +++ b/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl @@ -244,20 +244,26 @@ void main() { vec4 light; if (hit) { - const float EPSILON = 0.001; - vec3 hit_normal = normalize(vec3( - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw + vec3(EPSILON, 0.0, 0.0)).r - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw - vec3(EPSILON, 0.0, 0.0)).r, - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw + vec3(0.0, EPSILON, 0.0)).r - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw - vec3(0.0, EPSILON, 0.0)).r, - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw + vec3(0.0, 0.0, EPSILON)).r - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw - vec3(0.0, 0.0, EPSILON)).r)); - - vec3 hit_light = texture(sampler3D(light_cascades[hit_cascade], linear_sampler), uvw).rgb; - vec4 aniso0 = texture(sampler3D(aniso0_cascades[hit_cascade], linear_sampler), uvw); - vec3 hit_aniso0 = aniso0.rgb; - vec3 hit_aniso1 = vec3(aniso0.a, texture(sampler3D(aniso1_cascades[hit_cascade], linear_sampler), uvw).rg); - - //one liner magic - light.rgb = hit_light * (dot(max(vec3(0.0), (hit_normal * hit_aniso0)), vec3(1.0)) + dot(max(vec3(0.0), (-hit_normal * hit_aniso1)), vec3(1.0))); - light.a = 1.0; + //avoid reading different texture from different threads + for (uint j = params.cascade; j < params.max_cascades; j++) { + if (j == hit_cascade) { + const float EPSILON = 0.001; + vec3 hit_normal = normalize(vec3( + texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw + vec3(EPSILON, 0.0, 0.0)).r - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw - vec3(EPSILON, 0.0, 0.0)).r, + texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw + vec3(0.0, EPSILON, 0.0)).r - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw - vec3(0.0, EPSILON, 0.0)).r, + texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw + vec3(0.0, 0.0, EPSILON)).r - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw - vec3(0.0, 0.0, EPSILON)).r)); + + vec3 hit_light = texture(sampler3D(light_cascades[hit_cascade], linear_sampler), uvw).rgb; + vec4 aniso0 = texture(sampler3D(aniso0_cascades[hit_cascade], linear_sampler), uvw); + vec3 hit_aniso0 = aniso0.rgb; + vec3 hit_aniso1 = vec3(aniso0.a, texture(sampler3D(aniso1_cascades[hit_cascade], linear_sampler), uvw).rg); + + //one liner magic + light.rgb = hit_light * (dot(max(vec3(0.0), (hit_normal * hit_aniso0)), vec3(1.0)) + dot(max(vec3(0.0), (-hit_normal * hit_aniso1)), vec3(1.0))); + light.a = 1.0; + } + } + } else if (params.sky_mode == SKY_MODE_SKY) { #ifdef USE_CUBEMAP_ARRAY light.rgb = textureLod(samplerCubeArray(sky_irradiance, linear_sampler_mipmaps), vec4(ray_dir, 0.0), 2.0).rgb; //use second mipmap because we dont usually throw a lot of rays, so this compensates diff --git a/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl b/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl deleted file mode 100644 index 29443ae7db..0000000000 --- a/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl +++ /dev/null @@ -1,105 +0,0 @@ -#[compute] - -#version 450 - -VERSION_DEFINES - -#define BLOCK_SIZE 8 - -layout(local_size_x = BLOCK_SIZE, local_size_y = BLOCK_SIZE, local_size_z = 1) in; - -#ifdef MODE_REDUCE - -shared float tmp_data[BLOCK_SIZE * BLOCK_SIZE]; -const uint swizzle_table[BLOCK_SIZE] = uint[](0, 4, 2, 6, 1, 5, 3, 7); -const uint unswizzle_table[BLOCK_SIZE] = uint[](0, 0, 0, 1, 0, 2, 1, 3); - -#endif - -layout(r32f, set = 0, binding = 0) uniform restrict readonly image2D source_depth; -layout(r32f, set = 0, binding = 1) uniform restrict writeonly image2D dst_depth; - -layout(push_constant, binding = 1, std430) uniform Params { - ivec2 source_size; - ivec2 source_offset; - uint min_size; - uint gaussian_kernel_version; - ivec2 filter_dir; -} -params; - -void main() { -#ifdef MODE_REDUCE - - uvec2 pos = gl_LocalInvocationID.xy; - - ivec2 image_offset = params.source_offset; - ivec2 image_pos = image_offset + ivec2(gl_GlobalInvocationID.xy); - uint dst_t = swizzle_table[pos.y] * BLOCK_SIZE + swizzle_table[pos.x]; - tmp_data[dst_t] = imageLoad(source_depth, min(image_pos, params.source_size - ivec2(1))).r; - ivec2 image_size = params.source_size; - - uint t = pos.y * BLOCK_SIZE + pos.x; - - //neighbours - uint size = BLOCK_SIZE; - - do { - groupMemoryBarrier(); - barrier(); - - size >>= 1; - image_size >>= 1; - image_offset >>= 1; - - if (all(lessThan(pos, uvec2(size)))) { - uint nx = t + size; - uint ny = t + (BLOCK_SIZE * size); - uint nxy = ny + size; - - tmp_data[t] += tmp_data[nx]; - tmp_data[t] += tmp_data[ny]; - tmp_data[t] += tmp_data[nxy]; - tmp_data[t] /= 4.0; - } - - } while (size > params.min_size); - - if (all(lessThan(pos, uvec2(size)))) { - image_pos = ivec2(unswizzle_table[size + pos.x], unswizzle_table[size + pos.y]); - image_pos += image_offset + ivec2(gl_WorkGroupID.xy) * int(size); - - image_size = max(ivec2(1), image_size); //in case image size became 0 - - if (all(lessThan(image_pos, uvec2(image_size)))) { - imageStore(dst_depth, image_pos, vec4(tmp_data[t])); - } - } -#endif - -#ifdef MODE_FILTER - - ivec2 image_pos = params.source_offset + ivec2(gl_GlobalInvocationID.xy); - if (any(greaterThanEqual(image_pos, params.source_size))) { - return; - } - - ivec2 clamp_min = ivec2(params.source_offset); - ivec2 clamp_max = ivec2(params.source_size) - 1; - - //gaussian kernel, size 9, sigma 4 - const int kernel_size = 9; - const float gaussian_kernel[kernel_size * 3] = float[]( - 0.000229, 0.005977, 0.060598, 0.241732, 0.382928, 0.241732, 0.060598, 0.005977, 0.000229, - 0.028532, 0.067234, 0.124009, 0.179044, 0.20236, 0.179044, 0.124009, 0.067234, 0.028532, - 0.081812, 0.101701, 0.118804, 0.130417, 0.134535, 0.130417, 0.118804, 0.101701, 0.081812); - float accum = 0.0; - for (int i = 0; i < kernel_size; i++) { - ivec2 ofs = clamp(image_pos + params.filter_dir * (i - kernel_size / 2), clamp_min, clamp_max); - accum += imageLoad(source_depth, ofs).r * gaussian_kernel[params.gaussian_kernel_version + i]; - } - - imageStore(dst_depth, image_pos, vec4(accum)); - -#endif -} diff --git a/servers/rendering/renderer_rd/shaders/skeleton.glsl b/servers/rendering/renderer_rd/shaders/skeleton.glsl index b19f5a9ad3..680d1045cd 100644 --- a/servers/rendering/renderer_rd/shaders/skeleton.glsl +++ b/servers/rendering/renderer_rd/shaders/skeleton.glsl @@ -100,7 +100,7 @@ void main() { for (uint i = 0; i < params.blend_shape_count; i++) { float w = blend_shape_weights.data[i]; - if (w > 0.0001) { + if (abs(w) > 0.0001) { uint base_offset = (params.vertex_count * i + index) * params.vertex_stride; blend_vertex += uintBitsToFloat(uvec3(src_blend_shapes.data[base_offset + 0], src_blend_shapes.data[base_offset + 1], src_blend_shapes.data[base_offset + 2])) * w; diff --git a/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl b/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl index aa32809a06..e7ba8feb80 100644 --- a/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl +++ b/servers/rendering/renderer_rd/shaders/volumetric_fog.glsl @@ -168,13 +168,18 @@ layout(set = 0, binding = 14, std140) uniform Params { uint cluster_shift; uint cluster_width; - uvec3 cluster_pad; uint max_cluster_element_count_div_32; + bool use_temporal_reprojection; + uint temporal_frame; + float temporal_blend; mat3x4 cam_rotation; + mat4 to_prev_view; } params; +layout(set = 0, binding = 15) uniform texture3D prev_density_texture; + float get_depth_at_pos(float cell_depth_size, int z) { float d = float(z) * cell_depth_size + cell_depth_size * 0.5; //center of voxels d = pow(d, params.detail_spread); @@ -213,6 +218,26 @@ uint cluster_get_range_clip_mask(uint i, uint z_min, uint z_max) { return bitfieldInsert(uint(0), uint(0xFFFFFFFF), local_min, mask_width); } +#define TEMPORAL_FRAMES 16 + +const vec3 halton_map[TEMPORAL_FRAMES] = vec3[]( + vec3(0.5, 0.33333333, 0.2), + vec3(0.25, 0.66666667, 0.4), + vec3(0.75, 0.11111111, 0.6), + vec3(0.125, 0.44444444, 0.8), + vec3(0.625, 0.77777778, 0.04), + vec3(0.375, 0.22222222, 0.24), + vec3(0.875, 0.55555556, 0.44), + vec3(0.0625, 0.88888889, 0.64), + vec3(0.5625, 0.03703704, 0.84), + vec3(0.3125, 0.37037037, 0.08), + vec3(0.8125, 0.7037037, 0.28), + vec3(0.1875, 0.14814815, 0.48), + vec3(0.6875, 0.48148148, 0.68), + vec3(0.4375, 0.81481481, 0.88), + vec3(0.9375, 0.25925926, 0.12), + vec3(0.03125, 0.59259259, 0.32)); + void main() { vec3 fog_cell_size = 1.0 / vec3(params.fog_volume_size); @@ -241,6 +266,45 @@ void main() { view_pos.z = -params.fog_frustum_end * fog_unit_pos.z; view_pos.y = -view_pos.y; + vec4 reprojected_density = vec4(0.0); + float reproject_amount = 0.0; + + if (params.use_temporal_reprojection) { + vec3 prev_view = (params.to_prev_view * vec4(view_pos, 1.0)).xyz; + //undo transform into prev view + prev_view.y = -prev_view.y; + //z back to unit size + prev_view.z /= -params.fog_frustum_end; + //xy back to unit size + prev_view.xy /= mix(params.fog_frustum_size_begin, params.fog_frustum_size_end, vec2(prev_view.z)); + prev_view.xy = prev_view.xy * 0.5 + 0.5; + //z back to unspread value + prev_view.z = pow(prev_view.z, 1.0 / params.detail_spread); + + if (all(greaterThan(prev_view, vec3(0.0))) && all(lessThan(prev_view, vec3(1.0)))) { + //reprojectinon fits + + reprojected_density = textureLod(sampler3D(prev_density_texture, linear_sampler), prev_view, 0.0); + reproject_amount = params.temporal_blend; + + // Since we can reproject, now we must jitter the current view pos. + // This is done here because cells that can't reproject should not jitter. + + fog_unit_pos = posf * fog_cell_size + fog_cell_size * halton_map[params.temporal_frame]; //center of voxels, offset by halton table + + screen_pos = uvec2(fog_unit_pos.xy * params.screen_size); + cluster_pos = screen_pos >> params.cluster_shift; + cluster_offset = (params.cluster_width * cluster_pos.y + cluster_pos.x) * (params.max_cluster_element_count_div_32 + 32); + //positions in screen are too spread apart, no hopes for optimizing with subgroups + + fog_unit_pos.z = pow(fog_unit_pos.z, params.detail_spread); + + view_pos.xy = (fog_unit_pos.xy * 2.0 - 1.0) * mix(params.fog_frustum_size_begin, params.fog_frustum_size_end, vec2(fog_unit_pos.z)); + view_pos.z = -params.fog_frustum_end * fog_unit_pos.z; + view_pos.y = -view_pos.y; + } + } + uint cluster_z = uint(clamp((abs(view_pos.z) / params.z_far) * 32.0, 0.0, 31.0)); vec3 total_light = params.light_color; @@ -433,31 +497,31 @@ void main() { uint light_index = 32 * i + bit; - vec3 light_pos = omni_lights.data[light_index].position; - vec3 light_rel_vec = omni_lights.data[light_index].position - view_pos; + vec3 light_pos = spot_lights.data[light_index].position; + vec3 light_rel_vec = spot_lights.data[light_index].position - view_pos; float d = length(light_rel_vec); float shadow_attenuation = 1.0; - if (d * omni_lights.data[light_index].inv_radius < 1.0) { - float attenuation = get_omni_attenuation(d, omni_lights.data[light_index].inv_radius, omni_lights.data[light_index].attenuation); + if (d * spot_lights.data[light_index].inv_radius < 1.0) { + float attenuation = get_omni_attenuation(d, spot_lights.data[light_index].inv_radius, spot_lights.data[light_index].attenuation); - vec3 spot_dir = omni_lights.data[light_index].direction; - float scos = max(dot(-normalize(light_rel_vec), spot_dir), omni_lights.data[light_index].cone_angle); - float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - omni_lights.data[light_index].cone_angle)); - attenuation *= 1.0 - pow(spot_rim, omni_lights.data[light_index].cone_attenuation); + vec3 spot_dir = spot_lights.data[light_index].direction; + float scos = max(dot(-normalize(light_rel_vec), spot_dir), spot_lights.data[light_index].cone_angle); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - spot_lights.data[light_index].cone_angle)); + attenuation *= 1.0 - pow(spot_rim, spot_lights.data[light_index].cone_attenuation); - vec3 light = omni_lights.data[light_index].color / M_PI; + vec3 light = spot_lights.data[light_index].color / M_PI; - if (omni_lights.data[light_index].shadow_enabled) { + if (spot_lights.data[light_index].shadow_enabled) { //has shadow vec4 v = vec4(view_pos, 1.0); - vec4 splane = (omni_lights.data[light_index].shadow_matrix * v); + vec4 splane = (spot_lights.data[light_index].shadow_matrix * v); splane /= splane.w; float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r; - shadow_attenuation = exp(min(0.0, (depth - splane.z)) / omni_lights.data[light_index].inv_radius * omni_lights.data[light_index].shadow_volumetric_fog_fade); + shadow_attenuation = exp(min(0.0, (depth - splane.z)) / spot_lights.data[light_index].inv_radius * spot_lights.data[light_index].shadow_volumetric_fog_fade); } total_light += light * attenuation * shadow_attenuation; @@ -565,7 +629,11 @@ void main() { #endif - imageStore(density_map, pos, vec4(total_light, total_density)); + vec4 final_density = vec4(total_light, total_density); + + final_density = mix(final_density, reprojected_density, reproject_amount); + + imageStore(density_map, pos, final_density); #endif #ifdef MODE_FOG |