diff options
author | reduz <reduzio@gmail.com> | 2021-02-06 11:51:56 -0300 |
---|---|---|
committer | reduz <reduzio@gmail.com> | 2021-02-06 15:08:21 -0300 |
commit | 8faf23b52b0147eea6903e84973893802d97d339 (patch) | |
tree | c672077cff56f43a5a318e89be7c2e956de4aa6e /servers/rendering/renderer_rd/shaders | |
parent | fa2f7693bb9efc44755b1b693b22ec2788910a0d (diff) |
Simplify Volumetric Fog
-Always use temporal reproject, it just loos way better than any other filter.
-By always using termporal reproject, the shadowmap reduction can be done away with, massively improving performance.
-Disadvantage of temporal reproject is update latency so..
-Made sure a gaussian filter runs in XY after fog, this allows to keep stability and lower latency.
Diffstat (limited to 'servers/rendering/renderer_rd/shaders')
-rw-r--r-- | servers/rendering/renderer_rd/shaders/SCsub | 1 | ||||
-rw-r--r-- | servers/rendering/renderer_rd/shaders/shadow_reduce.glsl | 163 |
2 files changed, 0 insertions, 164 deletions
diff --git a/servers/rendering/renderer_rd/shaders/SCsub b/servers/rendering/renderer_rd/shaders/SCsub index 1b0197c1c1..c192574ff2 100644 --- a/servers/rendering/renderer_rd/shaders/SCsub +++ b/servers/rendering/renderer_rd/shaders/SCsub @@ -39,7 +39,6 @@ if "RD_GLSL" in env["BUILDERS"]: env.RD_GLSL("sdfgi_debug.glsl") env.RD_GLSL("sdfgi_debug_probes.glsl") env.RD_GLSL("volumetric_fog.glsl") - env.RD_GLSL("shadow_reduce.glsl") env.RD_GLSL("particles.glsl") env.RD_GLSL("particles_copy.glsl") env.RD_GLSL("sort.glsl") diff --git a/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl b/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl deleted file mode 100644 index a29b24e560..0000000000 --- a/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl +++ /dev/null @@ -1,163 +0,0 @@ -#[compute] - -#version 450 - -VERSION_DEFINES - -#define BLOCK_SIZE 8 - -#ifdef MODE_REDUCE_SUBGROUP - -#extension GL_KHR_shader_subgroup_ballot : enable -#extension GL_KHR_shader_subgroup_arithmetic : enable - -//nvidia friendly, max 32 -layout(local_size_x = 8, local_size_y = 4, local_size_z = 1) in; - -#else - -layout(local_size_x = BLOCK_SIZE, local_size_y = BLOCK_SIZE, local_size_z = 1) in; - -#endif - -#ifdef MODE_REDUCE - -shared float tmp_data[BLOCK_SIZE * BLOCK_SIZE]; -const uint swizzle_table[BLOCK_SIZE] = uint[](0, 4, 2, 6, 1, 5, 3, 7); -const uint unswizzle_table[BLOCK_SIZE] = uint[](0, 0, 0, 1, 0, 2, 1, 3); - -#endif - -#if defined(MODE_REDUCE) || defined(MODE_REDUCE_SUBGROUP) -layout(set = 0, binding = 0) uniform sampler2D source_depth; -#else -layout(r16, set = 0, binding = 0) uniform restrict readonly image2D source_depth; -#endif -layout(r16, set = 1, binding = 0) uniform restrict writeonly image2D dst_depth; - -layout(push_constant, binding = 1, std430) uniform Params { - ivec2 source_size; - ivec2 source_offset; - uint min_size; - uint gaussian_kernel_version; - ivec2 filter_dir; -} -params; - -void main() { -#ifdef MODE_REDUCE_SUBGROUP - - uvec2 local_pos = gl_LocalInvocationID.xy; - ivec2 image_offset = params.source_offset; - ivec2 image_pos = image_offset + ivec2(gl_GlobalInvocationID.xy * ivec2(1, 2)); - - float depth = texelFetch(source_depth, min(image_pos, params.source_size - ivec2(1)), 0).r; - depth += texelFetch(source_depth, min(image_pos + ivec2(0, 1), params.source_size - ivec2(1)), 0).r; - depth *= 0.5; - -#ifdef MODE_REDUCE_8 - //fast version, reduce all - float depth_average = subgroupAdd(depth) / 32.0; - if (local_pos == uvec2(0)) { - imageStore(dst_depth, image_pos / 8, vec4(depth_average)); - } -#else - //bit slower version, reduce by regions - uint group_size = (8 / params.min_size); - uvec2 group_id = local_pos / (8 / params.min_size); - - uvec4 mask; - float depth_average = 0; - - while (true) { - uvec2 first = subgroupBroadcastFirst(group_id); - mask = subgroupBallot(first == group_id); - if (first == group_id) { - depth_average = subgroupAdd(depth); - break; - } - } - - depth_average /= float(group_size * group_size); - - if (local_pos == group_id) { - imageStore(dst_depth, image_pos / int(group_size), vec4(depth_average)); - } -#endif - -#endif - -#ifdef MODE_REDUCE - - uvec2 pos = gl_LocalInvocationID.xy; - - ivec2 image_offset = params.source_offset; - ivec2 image_pos = image_offset + ivec2(gl_GlobalInvocationID.xy); - uint dst_t = swizzle_table[pos.y] * BLOCK_SIZE + swizzle_table[pos.x]; - tmp_data[dst_t] = texelFetch(source_depth, min(image_pos, params.source_size - ivec2(1)), 0).r; - ivec2 image_size = params.source_size; - - uint t = pos.y * BLOCK_SIZE + pos.x; - - //neighbours - uint size = BLOCK_SIZE; - - do { - groupMemoryBarrier(); - barrier(); - - size >>= 1; - image_size >>= 1; - image_offset >>= 1; - - if (all(lessThan(pos, uvec2(size)))) { - uint nx = t + size; - uint ny = t + (BLOCK_SIZE * size); - uint nxy = ny + size; - - tmp_data[t] += tmp_data[nx]; - tmp_data[t] += tmp_data[ny]; - tmp_data[t] += tmp_data[nxy]; - tmp_data[t] /= 4.0; - } - - } while (size > params.min_size); - - if (all(lessThan(pos, uvec2(size)))) { - image_pos = ivec2(unswizzle_table[size + pos.x], unswizzle_table[size + pos.y]); - image_pos += image_offset + ivec2(gl_WorkGroupID.xy) * int(size); - - image_size = max(ivec2(1), image_size); //in case image size became 0 - - if (all(lessThan(image_pos, uvec2(image_size)))) { - imageStore(dst_depth, image_pos, vec4(tmp_data[t])); - } - } -#endif - -#ifdef MODE_FILTER - - ivec2 image_pos = params.source_offset + ivec2(gl_GlobalInvocationID.xy); - if (any(greaterThanEqual(image_pos, params.source_size))) { - return; - } - - ivec2 clamp_min = ivec2(params.source_offset); - ivec2 clamp_max = ivec2(params.source_size) - 1; - - //gaussian kernel, size 9, sigma 4 - const int kernel_size = 9; - const float gaussian_kernel[kernel_size * 3] = float[]( - 0.000229, 0.005977, 0.060598, 0.241732, 0.382928, 0.241732, 0.060598, 0.005977, 0.000229, - 0.028532, 0.067234, 0.124009, 0.179044, 0.20236, 0.179044, 0.124009, 0.067234, 0.028532, - 0.081812, 0.101701, 0.118804, 0.130417, 0.134535, 0.130417, 0.118804, 0.101701, 0.081812); - float accum = 0.0; - for (int i = 0; i < kernel_size; i++) { - ivec2 ofs = clamp(image_pos + params.filter_dir * (i - kernel_size / 2), clamp_min, clamp_max); - accum += imageLoad(source_depth, ofs).r * gaussian_kernel[params.gaussian_kernel_version + i]; - } - - imageStore(dst_depth, image_pos, vec4(accum)); - -#endif -} |