From eefcb5ed678901fafb16a73f24aa85fbd03b69b7 Mon Sep 17 00:00:00 2001 From: Bastiaan Olij Date: Tue, 28 Jun 2022 19:10:36 +1000 Subject: Move screen space effects into a separate class --- drivers/gles3/rasterizer_scene_gles3.h | 1 - .../rendering/renderer_rd/effects/copy_effects.cpp | 106 ++ .../rendering/renderer_rd/effects/copy_effects.h | 30 + .../rendering/renderer_rd/effects/ss_effects.cpp | 1715 ++++++++++++++++++++ servers/rendering/renderer_rd/effects/ss_effects.h | 508 ++++++ servers/rendering/renderer_rd/effects_rd.cpp | 1241 +------------- servers/rendering/renderer_rd/effects_rd.h | 384 ----- servers/rendering/renderer_rd/environment/gi.h | 2 +- .../forward_clustered/render_forward_clustered.cpp | 26 +- .../forward_clustered/render_forward_clustered.h | 2 + .../renderer_rd/renderer_scene_render_rd.cpp | 361 +--- .../renderer_rd/renderer_scene_render_rd.h | 44 +- .../shaders/effects/screen_space_reflection.glsl | 254 +++ .../effects/screen_space_reflection_filter.glsl | 148 ++ .../effects/screen_space_reflection_inc.glsl | 28 + .../effects/screen_space_reflection_scale.glsl | 106 ++ .../shaders/effects/specular_merge.glsl | 112 ++ .../shaders/effects/ss_effects_downsample.glsl | 229 +++ .../renderer_rd/shaders/effects/ssao.glsl | 483 ++++++ .../renderer_rd/shaders/effects/ssao_blur.glsl | 154 ++ .../shaders/effects/ssao_importance_map.glsl | 123 ++ .../shaders/effects/ssao_interleave.glsl | 119 ++ .../renderer_rd/shaders/effects/ssil.glsl | 444 +++++ .../renderer_rd/shaders/effects/ssil_blur.glsl | 144 ++ .../shaders/effects/ssil_importance_map.glsl | 125 ++ .../shaders/effects/ssil_interleave.glsl | 122 ++ .../shaders/screen_space_reflection.glsl | 244 --- .../shaders/screen_space_reflection_filter.glsl | 154 -- .../shaders/screen_space_reflection_scale.glsl | 90 - .../renderer_rd/shaders/specular_merge.glsl | 53 - .../renderer_rd/shaders/ss_effects_downsample.glsl | 229 --- servers/rendering/renderer_rd/shaders/ssao.glsl | 483 ------ .../rendering/renderer_rd/shaders/ssao_blur.glsl | 154 -- .../renderer_rd/shaders/ssao_importance_map.glsl | 123 -- .../renderer_rd/shaders/ssao_interleave.glsl | 119 -- servers/rendering/renderer_rd/shaders/ssil.glsl | 444 ----- .../rendering/renderer_rd/shaders/ssil_blur.glsl | 144 -- .../renderer_rd/shaders/ssil_importance_map.glsl | 125 -- .../renderer_rd/shaders/ssil_interleave.glsl | 122 -- 39 files changed, 5021 insertions(+), 4474 deletions(-) create mode 100644 servers/rendering/renderer_rd/effects/ss_effects.cpp create mode 100644 servers/rendering/renderer_rd/effects/ss_effects.h create mode 100644 servers/rendering/renderer_rd/shaders/effects/screen_space_reflection.glsl create mode 100644 servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_filter.glsl create mode 100644 servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_inc.glsl create mode 100644 servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_scale.glsl create mode 100644 servers/rendering/renderer_rd/shaders/effects/specular_merge.glsl create mode 100644 servers/rendering/renderer_rd/shaders/effects/ss_effects_downsample.glsl create mode 100644 servers/rendering/renderer_rd/shaders/effects/ssao.glsl create mode 100644 servers/rendering/renderer_rd/shaders/effects/ssao_blur.glsl create mode 100644 servers/rendering/renderer_rd/shaders/effects/ssao_importance_map.glsl create mode 100644 servers/rendering/renderer_rd/shaders/effects/ssao_interleave.glsl create mode 100644 servers/rendering/renderer_rd/shaders/effects/ssil.glsl create mode 100644 servers/rendering/renderer_rd/shaders/effects/ssil_blur.glsl create mode 100644 servers/rendering/renderer_rd/shaders/effects/ssil_importance_map.glsl create mode 100644 servers/rendering/renderer_rd/shaders/effects/ssil_interleave.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/screen_space_reflection.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/screen_space_reflection_filter.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/screen_space_reflection_scale.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/specular_merge.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/ss_effects_downsample.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/ssao.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/ssao_blur.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/ssao_interleave.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/ssil.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/ssil_blur.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/ssil_importance_map.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/ssil_interleave.glsl diff --git a/drivers/gles3/rasterizer_scene_gles3.h b/drivers/gles3/rasterizer_scene_gles3.h index 4222743cec..af47e2eab0 100644 --- a/drivers/gles3/rasterizer_scene_gles3.h +++ b/drivers/gles3/rasterizer_scene_gles3.h @@ -651,7 +651,6 @@ protected: RS::EnvironmentSSAOQuality ssao_quality = RS::ENV_SSAO_QUALITY_MEDIUM; bool ssao_half_size = false; - bool ssao_using_half_size = false; float ssao_adaptive_target = 0.5; int ssao_blur_passes = 2; float ssao_fadeout_from = 50.0; diff --git a/servers/rendering/renderer_rd/effects/copy_effects.cpp b/servers/rendering/renderer_rd/effects/copy_effects.cpp index cbf7046887..5507483cee 100644 --- a/servers/rendering/renderer_rd/effects/copy_effects.cpp +++ b/servers/rendering/renderer_rd/effects/copy_effects.cpp @@ -249,6 +249,56 @@ CopyEffects::CopyEffects(bool p_prefer_raster_effects) { roughness.raster_pipeline.clear(); } } + + { + Vector specular_modes; + specular_modes.push_back("\n#define MODE_MERGE\n"); // SPECULAR_MERGE_ADD + specular_modes.push_back("\n#define MODE_MERGE\n#define MODE_SSR\n"); // SPECULAR_MERGE_SSR + specular_modes.push_back("\n"); // SPECULAR_MERGE_ADDITIVE_ADD + specular_modes.push_back("\n#define MODE_SSR\n"); // SPECULAR_MERGE_ADDITIVE_SSR + + specular_modes.push_back("\n#define USE_MULTIVIEW\n#define MODE_MERGE\n"); // SPECULAR_MERGE_ADD_MULTIVIEW + specular_modes.push_back("\n#define USE_MULTIVIEW\n#define MODE_MERGE\n#define MODE_SSR\n"); // SPECULAR_MERGE_SSR_MULTIVIEW + specular_modes.push_back("\n#define USE_MULTIVIEW\n"); // SPECULAR_MERGE_ADDITIVE_ADD_MULTIVIEW + specular_modes.push_back("\n#define USE_MULTIVIEW\n#define MODE_SSR\n"); // SPECULAR_MERGE_ADDITIVE_SSR_MULTIVIEW + + specular_merge.shader.initialize(specular_modes); + + if (!RendererCompositorRD::singleton->is_xr_enabled()) { + specular_merge.shader.set_variant_enabled(SPECULAR_MERGE_ADD_MULTIVIEW, false); + specular_merge.shader.set_variant_enabled(SPECULAR_MERGE_SSR_MULTIVIEW, false); + specular_merge.shader.set_variant_enabled(SPECULAR_MERGE_ADDITIVE_ADD_MULTIVIEW, false); + specular_merge.shader.set_variant_enabled(SPECULAR_MERGE_ADDITIVE_SSR_MULTIVIEW, false); + } + + specular_merge.shader_version = specular_merge.shader.version_create(); + + //use additive + + RD::PipelineColorBlendState::Attachment ba; + ba.enable_blend = true; + ba.src_color_blend_factor = RD::BLEND_FACTOR_ONE; + ba.dst_color_blend_factor = RD::BLEND_FACTOR_ONE; + ba.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; + ba.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE; + ba.color_blend_op = RD::BLEND_OP_ADD; + ba.alpha_blend_op = RD::BLEND_OP_ADD; + + RD::PipelineColorBlendState blend_additive; + blend_additive.attachments.push_back(ba); + + for (int i = 0; i < SPECULAR_MERGE_MAX; i++) { + if (specular_merge.shader.is_variant_enabled(i)) { + RD::PipelineColorBlendState blend_state; + if (i == SPECULAR_MERGE_ADDITIVE_ADD || i == SPECULAR_MERGE_ADDITIVE_SSR || i == SPECULAR_MERGE_ADDITIVE_ADD_MULTIVIEW || i == SPECULAR_MERGE_ADDITIVE_SSR_MULTIVIEW) { + blend_state = blend_additive; + } else { + blend_state = RD::PipelineColorBlendState::create_disabled(); + } + specular_merge.pipelines[i].setup(specular_merge.shader.version_get_shader(specular_merge.shader_version, i), RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), blend_state, 0); + } + } + } } CopyEffects::~CopyEffects() { @@ -264,6 +314,8 @@ CopyEffects::~CopyEffects() { roughness.compute_shader.version_free(roughness.shader_version); } + specular_merge.shader.version_free(specular_merge.shader_version); + RD::get_singleton()->free(filter.coefficient_buffer); if (RD::get_singleton()->uniform_set_is_valid(filter.image_uniform_set)) { @@ -1083,3 +1135,57 @@ void CopyEffects::cubemap_roughness_raster(RID p_source_rd_texture, RID p_dest_f RD::get_singleton()->draw_list_draw(draw_list, true); RD::get_singleton()->draw_list_end(); } + +void CopyEffects::merge_specular(RID p_dest_framebuffer, RID p_specular, RID p_base, RID p_reflection, uint32_t p_view_count) { + UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton(); + ERR_FAIL_NULL(uniform_set_cache); + MaterialStorage *material_storage = MaterialStorage::get_singleton(); + ERR_FAIL_NULL(material_storage); + + RID default_sampler = material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + + RD::get_singleton()->draw_command_begin_label("Merge specular"); + + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, Vector()); + + int mode; + if (p_reflection.is_valid()) { + if (p_base.is_valid()) { + mode = SPECULAR_MERGE_SSR; + } else { + mode = SPECULAR_MERGE_ADDITIVE_SSR; + } + } else { + if (p_base.is_valid()) { + mode = SPECULAR_MERGE_ADD; + } else { + mode = SPECULAR_MERGE_ADDITIVE_ADD; + } + } + + if (p_view_count > 1) { + mode += SPECULAR_MERGE_ADD_MULTIVIEW; + } + + RID shader = specular_merge.shader.version_get_shader(specular_merge.shader_version, mode); + RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, specular_merge.pipelines[mode].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_framebuffer))); + + if (p_base.is_valid()) { + RD::Uniform u_base(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_base })); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 2, u_base), 2); + } + + RD::Uniform u_specular(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_specular })); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 0, u_specular), 0); + + if (p_reflection.is_valid()) { + RD::Uniform u_reflection(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_reflection })); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, uniform_set_cache->get_cache(shader, 1, u_reflection), 1); + } + + RD::get_singleton()->draw_list_bind_index_array(draw_list, material_storage->get_quad_index_array()); + RD::get_singleton()->draw_list_draw(draw_list, true); + RD::get_singleton()->draw_list_end(); + + RD::get_singleton()->draw_command_end_label(); +} diff --git a/servers/rendering/renderer_rd/effects/copy_effects.h b/servers/rendering/renderer_rd/effects/copy_effects.h index 882b446964..0066f2be31 100644 --- a/servers/rendering/renderer_rd/effects/copy_effects.h +++ b/servers/rendering/renderer_rd/effects/copy_effects.h @@ -42,6 +42,7 @@ #include "servers/rendering/renderer_rd/shaders/effects/cubemap_filter_raster.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/effects/cubemap_roughness.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/effects/cubemap_roughness_raster.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/specular_merge.glsl.gen.h" #include "servers/rendering/renderer_scene_render.h" #include "servers/rendering_server.h" @@ -274,6 +275,33 @@ private: PipelineCacheRD raster_pipeline; } roughness; + // Merge specular + + enum SpecularMergeMode { + SPECULAR_MERGE_ADD, + SPECULAR_MERGE_SSR, + SPECULAR_MERGE_ADDITIVE_ADD, + SPECULAR_MERGE_ADDITIVE_SSR, + + SPECULAR_MERGE_ADD_MULTIVIEW, + SPECULAR_MERGE_SSR_MULTIVIEW, + SPECULAR_MERGE_ADDITIVE_ADD_MULTIVIEW, + SPECULAR_MERGE_ADDITIVE_SSR_MULTIVIEW, + + SPECULAR_MERGE_MAX + }; + + /* Specular merge must be done using raster, rather than compute + * because it must continue the existing color buffer + */ + + struct SpecularMerge { + SpecularMergeShaderRD shader; + RID shader_version; + PipelineCacheRD pipelines[SPECULAR_MERGE_MAX]; + + } specular_merge; + static CopyEffects *singleton; public: @@ -309,6 +337,8 @@ public: void cubemap_roughness(RID p_source_rd_texture, RID p_dest_texture, uint32_t p_face_id, uint32_t p_sample_count, float p_roughness, float p_size); void cubemap_roughness_raster(RID p_source_rd_texture, RID p_dest_framebuffer, uint32_t p_face_id, uint32_t p_sample_count, float p_roughness, float p_size); + + void merge_specular(RID p_dest_framebuffer, RID p_specular, RID p_base, RID p_reflection, uint32_t p_view_count); }; } // namespace RendererRD diff --git a/servers/rendering/renderer_rd/effects/ss_effects.cpp b/servers/rendering/renderer_rd/effects/ss_effects.cpp new file mode 100644 index 0000000000..49d66023d8 --- /dev/null +++ b/servers/rendering/renderer_rd/effects/ss_effects.cpp @@ -0,0 +1,1715 @@ +/*************************************************************************/ +/* ss_effects.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "ss_effects.h" + +#include "servers/rendering/renderer_rd/renderer_compositor_rd.h" +#include "servers/rendering/renderer_rd/storage_rd/material_storage.h" +#include "servers/rendering/renderer_rd/uniform_set_cache_rd.h" + +using namespace RendererRD; + +SSEffects *SSEffects::singleton = nullptr; + +static _FORCE_INLINE_ void store_camera(const CameraMatrix &p_mtx, float *p_array) { + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + p_array[i * 4 + j] = p_mtx.matrix[i][j]; + } + } +} + +SSEffects::SSEffects() { + singleton = this; + + { + // Initialize depth buffer for screen space effects + Vector downsampler_modes; + downsampler_modes.push_back("\n"); + downsampler_modes.push_back("\n#define USE_HALF_SIZE\n"); + downsampler_modes.push_back("\n#define GENERATE_MIPS\n"); + downsampler_modes.push_back("\n#define GENERATE_MIPS\n#define USE_HALF_SIZE\n"); + downsampler_modes.push_back("\n#define USE_HALF_BUFFERS\n"); + downsampler_modes.push_back("\n#define USE_HALF_BUFFERS\n#define USE_HALF_SIZE\n"); + downsampler_modes.push_back("\n#define GENERATE_MIPS\n#define GENERATE_FULL_MIPS"); + + ss_effects.downsample_shader.initialize(downsampler_modes); + + ss_effects.downsample_shader_version = ss_effects.downsample_shader.version_create(); + + for (int i = 0; i < SS_EFFECTS_MAX; i++) { + ss_effects.pipelines[i] = RD::get_singleton()->compute_pipeline_create(ss_effects.downsample_shader.version_get_shader(ss_effects.downsample_shader_version, i)); + } + + ss_effects.gather_constants_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(SSEffectsGatherConstants)); + SSEffectsGatherConstants gather_constants; + + const int sub_pass_count = 5; + for (int pass = 0; pass < 4; pass++) { + for (int subPass = 0; subPass < sub_pass_count; subPass++) { + int a = pass; + int b = subPass; + + int spmap[5]{ 0, 1, 4, 3, 2 }; + b = spmap[subPass]; + + float ca, sa; + float angle0 = (float(a) + float(b) / float(sub_pass_count)) * Math_PI * 0.5f; + + ca = Math::cos(angle0); + sa = Math::sin(angle0); + + float scale = 1.0f + (a - 1.5f + (b - (sub_pass_count - 1.0f) * 0.5f) / float(sub_pass_count)) * 0.07f; + + gather_constants.rotation_matrices[pass * 20 + subPass * 4 + 0] = scale * ca; + gather_constants.rotation_matrices[pass * 20 + subPass * 4 + 1] = scale * -sa; + gather_constants.rotation_matrices[pass * 20 + subPass * 4 + 2] = -scale * sa; + gather_constants.rotation_matrices[pass * 20 + subPass * 4 + 3] = -scale * ca; + } + } + + RD::get_singleton()->buffer_update(ss_effects.gather_constants_buffer, 0, sizeof(SSEffectsGatherConstants), &gather_constants); + } + + // Initialize Screen Space Indirect Lighting (SSIL) + + { + Vector ssil_modes; + ssil_modes.push_back("\n"); + ssil_modes.push_back("\n#define SSIL_BASE\n"); + ssil_modes.push_back("\n#define ADAPTIVE\n"); + + ssil.gather_shader.initialize(ssil_modes); + + ssil.gather_shader_version = ssil.gather_shader.version_create(); + + for (int i = SSIL_GATHER; i <= SSIL_GATHER_ADAPTIVE; i++) { + ssil.pipelines[i] = RD::get_singleton()->compute_pipeline_create(ssil.gather_shader.version_get_shader(ssil.gather_shader_version, i)); + } + ssil.projection_uniform_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(SSILProjectionUniforms)); + } + + { + Vector ssil_modes; + ssil_modes.push_back("\n#define GENERATE_MAP\n"); + ssil_modes.push_back("\n#define PROCESS_MAPA\n"); + ssil_modes.push_back("\n#define PROCESS_MAPB\n"); + + ssil.importance_map_shader.initialize(ssil_modes); + + ssil.importance_map_shader_version = ssil.importance_map_shader.version_create(); + + for (int i = SSIL_GENERATE_IMPORTANCE_MAP; i <= SSIL_PROCESS_IMPORTANCE_MAPB; i++) { + ssil.pipelines[i] = RD::get_singleton()->compute_pipeline_create(ssil.importance_map_shader.version_get_shader(ssil.importance_map_shader_version, i - SSIL_GENERATE_IMPORTANCE_MAP)); + } + ssil.importance_map_load_counter = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t)); + int zero[1] = { 0 }; + RD::get_singleton()->buffer_update(ssil.importance_map_load_counter, 0, sizeof(uint32_t), &zero); + RD::get_singleton()->set_resource_name(ssil.importance_map_load_counter, "Importance Map Load Counter"); + + Vector uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 0; + u.append_id(ssil.importance_map_load_counter); + uniforms.push_back(u); + } + ssil.counter_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssil.importance_map_shader.version_get_shader(ssil.importance_map_shader_version, 2), 2); + RD::get_singleton()->set_resource_name(ssil.counter_uniform_set, "Load Counter Uniform Set"); + } + + { + Vector ssil_modes; + ssil_modes.push_back("\n#define MODE_NON_SMART\n"); + ssil_modes.push_back("\n#define MODE_SMART\n"); + ssil_modes.push_back("\n#define MODE_WIDE\n"); + + ssil.blur_shader.initialize(ssil_modes); + + ssil.blur_shader_version = ssil.blur_shader.version_create(); + for (int i = SSIL_BLUR_PASS; i <= SSIL_BLUR_PASS_WIDE; i++) { + ssil.pipelines[i] = RD::get_singleton()->compute_pipeline_create(ssil.blur_shader.version_get_shader(ssil.blur_shader_version, i - SSIL_BLUR_PASS)); + } + } + + { + Vector ssil_modes; + ssil_modes.push_back("\n#define MODE_NON_SMART\n"); + ssil_modes.push_back("\n#define MODE_SMART\n"); + ssil_modes.push_back("\n#define MODE_HALF\n"); + + ssil.interleave_shader.initialize(ssil_modes); + + ssil.interleave_shader_version = ssil.interleave_shader.version_create(); + for (int i = SSIL_INTERLEAVE; i <= SSIL_INTERLEAVE_HALF; i++) { + ssil.pipelines[i] = RD::get_singleton()->compute_pipeline_create(ssil.interleave_shader.version_get_shader(ssil.interleave_shader_version, i - SSIL_INTERLEAVE)); + } + } + + { + // Initialize Screen Space Ambient Occlusion (SSAO) + + RD::SamplerState sampler; + sampler.mag_filter = RD::SAMPLER_FILTER_NEAREST; + sampler.min_filter = RD::SAMPLER_FILTER_NEAREST; + sampler.mip_filter = RD::SAMPLER_FILTER_NEAREST; + sampler.repeat_u = RD::SAMPLER_REPEAT_MODE_MIRRORED_REPEAT; + sampler.repeat_v = RD::SAMPLER_REPEAT_MODE_MIRRORED_REPEAT; + sampler.repeat_w = RD::SAMPLER_REPEAT_MODE_MIRRORED_REPEAT; + sampler.max_lod = 4; + + uint32_t pipeline = 0; + { + Vector ssao_modes; + + ssao_modes.push_back("\n"); + ssao_modes.push_back("\n#define SSAO_BASE\n"); + ssao_modes.push_back("\n#define ADAPTIVE\n"); + + ssao.gather_shader.initialize(ssao_modes); + + ssao.gather_shader_version = ssao.gather_shader.version_create(); + + for (int i = 0; i <= SSAO_GATHER_ADAPTIVE; i++) { + ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.gather_shader.version_get_shader(ssao.gather_shader_version, i)); + pipeline++; + } + } + + { + Vector ssao_modes; + ssao_modes.push_back("\n#define GENERATE_MAP\n"); + ssao_modes.push_back("\n#define PROCESS_MAPA\n"); + ssao_modes.push_back("\n#define PROCESS_MAPB\n"); + + ssao.importance_map_shader.initialize(ssao_modes); + + ssao.importance_map_shader_version = ssao.importance_map_shader.version_create(); + + for (int i = SSAO_GENERATE_IMPORTANCE_MAP; i <= SSAO_PROCESS_IMPORTANCE_MAPB; i++) { + ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.importance_map_shader.version_get_shader(ssao.importance_map_shader_version, i - SSAO_GENERATE_IMPORTANCE_MAP)); + + pipeline++; + } + + ssao.importance_map_load_counter = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t)); + int zero[1] = { 0 }; + RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero); + RD::get_singleton()->set_resource_name(ssao.importance_map_load_counter, "Importance Map Load Counter"); + + Vector uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 0; + u.append_id(ssao.importance_map_load_counter); + uniforms.push_back(u); + } + ssao.counter_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssao.importance_map_shader.version_get_shader(ssao.importance_map_shader_version, 2), 2); + RD::get_singleton()->set_resource_name(ssao.counter_uniform_set, "Load Counter Uniform Set"); + } + + { + Vector ssao_modes; + ssao_modes.push_back("\n#define MODE_NON_SMART\n"); + ssao_modes.push_back("\n#define MODE_SMART\n"); + ssao_modes.push_back("\n#define MODE_WIDE\n"); + + ssao.blur_shader.initialize(ssao_modes); + + ssao.blur_shader_version = ssao.blur_shader.version_create(); + + for (int i = SSAO_BLUR_PASS; i <= SSAO_BLUR_PASS_WIDE; i++) { + ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.blur_shader.version_get_shader(ssao.blur_shader_version, i - SSAO_BLUR_PASS)); + + pipeline++; + } + } + + { + Vector ssao_modes; + ssao_modes.push_back("\n#define MODE_NON_SMART\n"); + ssao_modes.push_back("\n#define MODE_SMART\n"); + ssao_modes.push_back("\n#define MODE_HALF\n"); + + ssao.interleave_shader.initialize(ssao_modes); + + ssao.interleave_shader_version = ssao.interleave_shader.version_create(); + for (int i = SSAO_INTERLEAVE; i <= SSAO_INTERLEAVE_HALF; i++) { + ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.interleave_shader.version_get_shader(ssao.interleave_shader_version, i - SSAO_INTERLEAVE)); + RD::get_singleton()->set_resource_name(ssao.pipelines[pipeline], "Interleave Pipeline " + itos(i)); + pipeline++; + } + } + + ERR_FAIL_COND(pipeline != SSAO_MAX); + + ss_effects.mirror_sampler = RD::get_singleton()->sampler_create(sampler); + } + + { + // Screen Space Reflections + + Vector specialization_constants; + + { + RD::PipelineSpecializationConstant sc; + sc.type = RD::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_BOOL; + sc.constant_id = 0; // SSR_USE_FULL_PROJECTION_MATRIX + sc.bool_value = false; + specialization_constants.push_back(sc); + } + + { + Vector ssr_scale_modes; + ssr_scale_modes.push_back("\n"); + + ssr_scale.shader.initialize(ssr_scale_modes); + ssr_scale.shader_version = ssr_scale.shader.version_create(); + + for (int v = 0; v < SSR_VARIATIONS; v++) { + specialization_constants.ptrw()[0].bool_value = (v & SSR_MULTIVIEW) ? true : false; + ssr_scale.pipelines[v] = RD::get_singleton()->compute_pipeline_create(ssr_scale.shader.version_get_shader(ssr_scale.shader_version, 0), specialization_constants); + } + } + + { + Vector ssr_modes; + ssr_modes.push_back("\n"); // SCREEN_SPACE_REFLECTION_NORMAL + ssr_modes.push_back("\n#define MODE_ROUGH\n"); // SCREEN_SPACE_REFLECTION_ROUGH + + ssr.shader.initialize(ssr_modes); + ssr.shader_version = ssr.shader.version_create(); + + for (int v = 0; v < SSR_VARIATIONS; v++) { + specialization_constants.ptrw()[0].bool_value = (v & SSR_MULTIVIEW) ? true : false; + for (int i = 0; i < SCREEN_SPACE_REFLECTION_MAX; i++) { + ssr.pipelines[v][i] = RD::get_singleton()->compute_pipeline_create(ssr.shader.version_get_shader(ssr.shader_version, i), specialization_constants); + } + } + } + + { + Vector ssr_filter_modes; + ssr_filter_modes.push_back("\n"); // SCREEN_SPACE_REFLECTION_FILTER_HORIZONTAL + ssr_filter_modes.push_back("\n#define VERTICAL_PASS\n"); // SCREEN_SPACE_REFLECTION_FILTER_VERTICAL + + ssr_filter.shader.initialize(ssr_filter_modes); + ssr_filter.shader_version = ssr_filter.shader.version_create(); + + for (int v = 0; v < SSR_VARIATIONS; v++) { + specialization_constants.ptrw()[0].bool_value = (v & SSR_MULTIVIEW) ? true : false; + for (int i = 0; i < SCREEN_SPACE_REFLECTION_FILTER_MAX; i++) { + ssr_filter.pipelines[v][i] = RD::get_singleton()->compute_pipeline_create(ssr_filter.shader.version_get_shader(ssr_filter.shader_version, i), specialization_constants); + } + } + } + } +} + +SSEffects::~SSEffects() { + { + // Cleanup SS Reflections + ssr.shader.version_free(ssr.shader_version); + ssr_filter.shader.version_free(ssr_filter.shader_version); + ssr_scale.shader.version_free(ssr_scale.shader_version); + + if (ssr.ubo.is_valid()) { + RD::get_singleton()->free(ssr.ubo); + } + } + + { + // Cleanup SS downsampler + ss_effects.downsample_shader.version_free(ss_effects.downsample_shader_version); + + RD::get_singleton()->free(ss_effects.mirror_sampler); + RD::get_singleton()->free(ss_effects.gather_constants_buffer); + } + + { + // Cleanup SSIL + ssil.blur_shader.version_free(ssil.blur_shader_version); + ssil.gather_shader.version_free(ssil.gather_shader_version); + ssil.interleave_shader.version_free(ssil.interleave_shader_version); + ssil.importance_map_shader.version_free(ssil.importance_map_shader_version); + + RD::get_singleton()->free(ssil.importance_map_load_counter); + RD::get_singleton()->free(ssil.projection_uniform_buffer); + } + + { + // Cleanup SSAO + ssao.blur_shader.version_free(ssao.blur_shader_version); + ssao.gather_shader.version_free(ssao.gather_shader_version); + ssao.interleave_shader.version_free(ssao.interleave_shader_version); + ssao.importance_map_shader.version_free(ssao.importance_map_shader_version); + + RD::get_singleton()->free(ssao.importance_map_load_counter); + } + + singleton = nullptr; +} + +/* SS Downsampler */ + +void SSEffects::downsample_depth(RID p_depth_buffer, const Vector &p_depth_mipmaps, RS::EnvironmentSSAOQuality p_ssao_quality, RS::EnvironmentSSILQuality p_ssil_quality, bool p_invalidate_uniform_set, bool p_ssao_half_size, bool p_ssil_half_size, Size2i p_full_screen_size, const CameraMatrix &p_projection) { + UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton(); + ERR_FAIL_NULL(uniform_set_cache); + MaterialStorage *material_storage = MaterialStorage::get_singleton(); + ERR_FAIL_NULL(material_storage); + + // Downsample and deinterleave the depth buffer for SSAO and SSIL + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + + int downsample_mode = SS_EFFECTS_DOWNSAMPLE; + bool use_mips = p_ssao_quality > RS::ENV_SSAO_QUALITY_MEDIUM || p_ssil_quality > RS::ENV_SSIL_QUALITY_MEDIUM; + + if (p_ssao_quality == RS::ENV_SSAO_QUALITY_VERY_LOW && p_ssil_quality == RS::ENV_SSIL_QUALITY_VERY_LOW) { + downsample_mode = SS_EFFECTS_DOWNSAMPLE_HALF; + } else if (use_mips) { + downsample_mode = SS_EFFECTS_DOWNSAMPLE_MIPMAP; + } + + bool use_half_size = false; + bool use_full_mips = false; + + if (p_ssao_half_size && p_ssil_half_size) { + downsample_mode++; + use_half_size = true; + } else if (p_ssao_half_size != p_ssil_half_size) { + if (use_mips) { + downsample_mode = SS_EFFECTS_DOWNSAMPLE_FULL_MIPS; + use_full_mips = true; + } else { + // Only need the first two mipmaps, but the cost to generate the next two is trivial + // TODO investigate the benefit of a shader version to generate only 2 mips + downsample_mode = SS_EFFECTS_DOWNSAMPLE_MIPMAP; + use_mips = true; + } + } + + int depth_index = use_half_size ? 1 : 0; + + RD::get_singleton()->draw_command_begin_label("Downsample Depth"); + if (p_invalidate_uniform_set || use_full_mips != ss_effects.used_full_mips_last_frame || use_half_size != ss_effects.used_half_size_last_frame || use_mips != ss_effects.used_mips_last_frame) { + Vector uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 0; + u.append_id(p_depth_mipmaps[depth_index + 1]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.append_id(p_depth_mipmaps[depth_index + 2]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.append_id(p_depth_mipmaps[depth_index + 3]); + uniforms.push_back(u); + } + if (use_full_mips) { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 3; + u.append_id(p_depth_mipmaps[4]); + uniforms.push_back(u); + } + ss_effects.downsample_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ss_effects.downsample_shader.version_get_shader(ss_effects.downsample_shader_version, use_full_mips ? 6 : 2), 2); + } + + float depth_linearize_mul = -p_projection.matrix[3][2]; + float depth_linearize_add = p_projection.matrix[2][2]; + if (depth_linearize_mul * depth_linearize_add < 0) { + depth_linearize_add = -depth_linearize_add; + } + + ss_effects.downsample_push_constant.orthogonal = p_projection.is_orthogonal(); + ss_effects.downsample_push_constant.z_near = depth_linearize_mul; + ss_effects.downsample_push_constant.z_far = depth_linearize_add; + if (ss_effects.downsample_push_constant.orthogonal) { + ss_effects.downsample_push_constant.z_near = p_projection.get_z_near(); + ss_effects.downsample_push_constant.z_far = p_projection.get_z_far(); + } + ss_effects.downsample_push_constant.pixel_size[0] = 1.0 / p_full_screen_size.x; + ss_effects.downsample_push_constant.pixel_size[1] = 1.0 / p_full_screen_size.y; + ss_effects.downsample_push_constant.radius_sq = 1.0; + + RID shader = ss_effects.downsample_shader.version_get_shader(ss_effects.downsample_shader_version, downsample_mode); + RID default_sampler = material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + + RD::Uniform u_depth_buffer(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_depth_buffer })); + RD::Uniform u_depth_mipmaps(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_depth_mipmaps[depth_index + 0] })); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ss_effects.pipelines[downsample_mode]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_depth_buffer), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_depth_mipmaps), 1); + if (use_mips) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, ss_effects.downsample_uniform_set, 2); + } + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ss_effects.downsample_push_constant, sizeof(SSEffectsDownsamplePushConstant)); + + Size2i size(MAX(1, p_full_screen_size.x >> (use_half_size ? 2 : 1)), MAX(1, p_full_screen_size.y >> (use_half_size ? 2 : 1))); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, size.x, size.y, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + RD::get_singleton()->draw_command_end_label(); + + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_COMPUTE); + + ss_effects.used_full_mips_last_frame = use_full_mips; + ss_effects.used_half_size_last_frame = use_half_size; +} + +/* SSIL */ + +void SSEffects::gather_ssil(RD::ComputeListID p_compute_list, const Vector p_ssil_slices, const Vector p_edges_slices, const SSILSettings &p_settings, bool p_adaptive_base_pass, RID p_gather_uniform_set, RID p_importance_map_uniform_set, RID p_projection_uniform_set) { + UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton(); + ERR_FAIL_NULL(uniform_set_cache); + + RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, p_gather_uniform_set, 0); + if ((p_settings.quality == RS::ENV_SSIL_QUALITY_ULTRA) && !p_adaptive_base_pass) { + RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, p_importance_map_uniform_set, 1); + } + RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, p_projection_uniform_set, 3); + + RID shader = ssil.gather_shader.version_get_shader(ssil.gather_shader_version, 0); + + for (int i = 0; i < 4; i++) { + if ((p_settings.quality == RS::ENV_SSIL_QUALITY_VERY_LOW) && ((i == 1) || (i == 2))) { + continue; + } + + RD::Uniform u_ssil_slice(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssil_slices[i] })); + RD::Uniform u_edges_slice(RD::UNIFORM_TYPE_IMAGE, 1, Vector({ p_edges_slices[i] })); + + ssil.gather_push_constant.pass_coord_offset[0] = i % 2; + ssil.gather_push_constant.pass_coord_offset[1] = i / 2; + ssil.gather_push_constant.pass_uv_offset[0] = ((i % 2) - 0.0) / p_settings.full_screen_size.x; + ssil.gather_push_constant.pass_uv_offset[1] = ((i / 2) - 0.0) / p_settings.full_screen_size.y; + ssil.gather_push_constant.pass = i; + RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, uniform_set_cache->get_cache(shader, 2, u_ssil_slice, u_edges_slice), 2); + RD::get_singleton()->compute_list_set_push_constant(p_compute_list, &ssil.gather_push_constant, sizeof(SSILGatherPushConstant)); + + Size2i size = Size2i(p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1), p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)); + + RD::get_singleton()->compute_list_dispatch_threads(p_compute_list, size.x, size.y, 1); + } + RD::get_singleton()->compute_list_add_barrier(p_compute_list); +} + +void SSEffects::ssil_allocate_buffers(SSILRenderBuffers &p_ssil_buffers, const SSILSettings &p_settings, RID p_linear_depth) { + if (p_ssil_buffers.half_size != p_settings.half_size) { + ssil_free(p_ssil_buffers); + } + + if (p_settings.half_size) { + p_ssil_buffers.buffer_width = (p_settings.full_screen_size.x + 3) / 4; + p_ssil_buffers.buffer_height = (p_settings.full_screen_size.y + 3) / 4; + p_ssil_buffers.half_buffer_width = (p_settings.full_screen_size.x + 7) / 8; + p_ssil_buffers.half_buffer_height = (p_settings.full_screen_size.y + 7) / 8; + } else { + p_ssil_buffers.buffer_width = (p_settings.full_screen_size.x + 1) / 2; + p_ssil_buffers.buffer_height = (p_settings.full_screen_size.y + 1) / 2; + p_ssil_buffers.half_buffer_width = (p_settings.full_screen_size.x + 3) / 4; + p_ssil_buffers.half_buffer_height = (p_settings.full_screen_size.y + 3) / 4; + } + + if (p_ssil_buffers.ssil_final.is_null()) { + { + p_ssil_buffers.depth_texture_view = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), p_linear_depth, 0, p_settings.half_size ? 1 : 0, 4, RD::TEXTURE_SLICE_2D_ARRAY); + } + { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + tf.width = p_settings.full_screen_size.x; + tf.height = p_settings.full_screen_size.y; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; + p_ssil_buffers.ssil_final = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(p_ssil_buffers.ssil_final, "SSIL texture"); + RD::get_singleton()->texture_clear(p_ssil_buffers.ssil_final, Color(0, 0, 0, 0), 0, 1, 0, 1); + if (p_ssil_buffers.last_frame.is_null()) { + tf.mipmaps = 6; + p_ssil_buffers.last_frame = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(p_ssil_buffers.last_frame, "Last Frame Radiance"); + RD::get_singleton()->texture_clear(p_ssil_buffers.last_frame, Color(0, 0, 0, 0), 0, tf.mipmaps, 0, 1); + for (uint32_t i = 0; i < 6; i++) { + RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), p_ssil_buffers.last_frame, 0, i); + p_ssil_buffers.last_frame_slices.push_back(slice); + RD::get_singleton()->set_resource_name(slice, "Last Frame Radiance Mip " + itos(i) + " "); + } + } + } + { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + tf.width = p_ssil_buffers.buffer_width; + tf.height = p_ssil_buffers.buffer_height; + tf.array_layers = 4; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + p_ssil_buffers.deinterleaved = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(p_ssil_buffers.deinterleaved, "SSIL deinterleaved buffer"); + for (uint32_t i = 0; i < 4; i++) { + RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), p_ssil_buffers.deinterleaved, i, 0); + p_ssil_buffers.deinterleaved_slices.push_back(slice); + RD::get_singleton()->set_resource_name(slice, "SSIL deinterleaved buffer array " + itos(i) + " "); + } + } + + { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + tf.width = p_ssil_buffers.buffer_width; + tf.height = p_ssil_buffers.buffer_height; + tf.array_layers = 4; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + p_ssil_buffers.pong = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(p_ssil_buffers.pong, "SSIL deinterleaved pong buffer"); + for (uint32_t i = 0; i < 4; i++) { + RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), p_ssil_buffers.pong, i, 0); + p_ssil_buffers.pong_slices.push_back(slice); + RD::get_singleton()->set_resource_name(slice, "SSIL deinterleaved buffer pong array " + itos(i) + " "); + } + } + + { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8_UNORM; + tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + tf.width = p_ssil_buffers.buffer_width; + tf.height = p_ssil_buffers.buffer_height; + tf.array_layers = 4; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + p_ssil_buffers.edges = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(p_ssil_buffers.edges, "SSIL edges buffer"); + for (uint32_t i = 0; i < 4; i++) { + RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), p_ssil_buffers.edges, i, 0); + p_ssil_buffers.edges_slices.push_back(slice); + RD::get_singleton()->set_resource_name(slice, "SSIL edges buffer slice " + itos(i) + " "); + } + } + + { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8_UNORM; + tf.width = p_ssil_buffers.half_buffer_width; + tf.height = p_ssil_buffers.half_buffer_height; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + p_ssil_buffers.importance_map[0] = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(p_ssil_buffers.importance_map[0], "SSIL Importance Map"); + p_ssil_buffers.importance_map[1] = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(p_ssil_buffers.importance_map[1], "SSIL Importance Map Pong"); + } + p_ssil_buffers.half_size = p_settings.half_size; + } +} + +void SSEffects::screen_space_indirect_lighting(SSILRenderBuffers &p_ssil_buffers, RID p_normal_buffer, const CameraMatrix &p_projection, const CameraMatrix &p_last_projection, const SSILSettings &p_settings) { + UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton(); + ERR_FAIL_NULL(uniform_set_cache); + MaterialStorage *material_storage = MaterialStorage::get_singleton(); + ERR_FAIL_NULL(material_storage); + + RD::get_singleton()->draw_command_begin_label("Process Screen Space Indirect Lighting"); + //Store projection info before starting the compute list + SSILProjectionUniforms projection_uniforms; + store_camera(p_last_projection, projection_uniforms.inv_last_frame_projection_matrix); + + RD::get_singleton()->buffer_update(ssil.projection_uniform_buffer, 0, sizeof(SSILProjectionUniforms), &projection_uniforms); + + memset(&ssil.gather_push_constant, 0, sizeof(SSILGatherPushConstant)); + + RID shader = ssil.gather_shader.version_get_shader(ssil.gather_shader_version, 0); + RID default_sampler = material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + RID default_mipmap_sampler = material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + { + RD::get_singleton()->draw_command_begin_label("Gather Samples"); + ssil.gather_push_constant.screen_size[0] = p_settings.full_screen_size.x; + ssil.gather_push_constant.screen_size[1] = p_settings.full_screen_size.y; + + ssil.gather_push_constant.half_screen_pixel_size[0] = 1.0 / p_ssil_buffers.buffer_width; + ssil.gather_push_constant.half_screen_pixel_size[1] = 1.0 / p_ssil_buffers.buffer_height; + float tan_half_fov_x = 1.0 / p_projection.matrix[0][0]; + float tan_half_fov_y = 1.0 / p_projection.matrix[1][1]; + ssil.gather_push_constant.NDC_to_view_mul[0] = tan_half_fov_x * 2.0; + ssil.gather_push_constant.NDC_to_view_mul[1] = tan_half_fov_y * -2.0; + ssil.gather_push_constant.NDC_to_view_add[0] = tan_half_fov_x * -1.0; + ssil.gather_push_constant.NDC_to_view_add[1] = tan_half_fov_y; + ssil.gather_push_constant.z_near = p_projection.get_z_near(); + ssil.gather_push_constant.z_far = p_projection.get_z_far(); + ssil.gather_push_constant.is_orthogonal = p_projection.is_orthogonal(); + + ssil.gather_push_constant.half_screen_pixel_size_x025[0] = ssil.gather_push_constant.half_screen_pixel_size[0] * 0.25; + ssil.gather_push_constant.half_screen_pixel_size_x025[1] = ssil.gather_push_constant.half_screen_pixel_size[1] * 0.25; + + ssil.gather_push_constant.radius = p_settings.radius; + float radius_near_limit = (p_settings.radius * 1.2f); + if (p_settings.quality <= RS::ENV_SSIL_QUALITY_LOW) { + radius_near_limit *= 1.50f; + + if (p_settings.quality == RS::ENV_SSIL_QUALITY_VERY_LOW) { + ssil.gather_push_constant.radius *= 0.8f; + } + } + radius_near_limit /= tan_half_fov_y; + ssil.gather_push_constant.intensity = p_settings.intensity * Math_PI; + ssil.gather_push_constant.fade_out_mul = -1.0 / (p_settings.fadeout_to - p_settings.fadeout_from); + ssil.gather_push_constant.fade_out_add = p_settings.fadeout_from / (p_settings.fadeout_to - p_settings.fadeout_from) + 1.0; + ssil.gather_push_constant.inv_radius_near_limit = 1.0f / radius_near_limit; + ssil.gather_push_constant.neg_inv_radius = -1.0 / ssil.gather_push_constant.radius; + ssil.gather_push_constant.normal_rejection_amount = p_settings.normal_rejection; + + ssil.gather_push_constant.load_counter_avg_div = 9.0 / float((p_ssil_buffers.half_buffer_width) * (p_ssil_buffers.half_buffer_height) * 255); + ssil.gather_push_constant.adaptive_sample_limit = p_settings.adaptive_target; + + ssil.gather_push_constant.quality = MAX(0, p_settings.quality - 1); + ssil.gather_push_constant.size_multiplier = p_settings.half_size ? 2 : 1; + + if (p_ssil_buffers.projection_uniform_set.is_null()) { + Vector uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; + u.binding = 0; + u.append_id(default_mipmap_sampler); + u.append_id(p_ssil_buffers.last_frame); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 1; + u.append_id(ssil.projection_uniform_buffer); + uniforms.push_back(u); + } + p_ssil_buffers.projection_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssil.gather_shader.version_get_shader(ssil.gather_shader_version, 0), 3); + } + + if (p_ssil_buffers.gather_uniform_set.is_null()) { + Vector uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; + u.binding = 0; + u.append_id(default_sampler); + u.append_id(p_ssil_buffers.depth_texture_view); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.append_id(p_normal_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 2; + u.append_id(ss_effects.gather_constants_buffer); + uniforms.push_back(u); + } + p_ssil_buffers.gather_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssil.gather_shader.version_get_shader(ssil.gather_shader_version, 0), 0); + } + + if (p_ssil_buffers.importance_map_uniform_set.is_null()) { + Vector uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 0; + u.append_id(p_ssil_buffers.pong); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; + u.binding = 1; + u.append_id(default_sampler); + u.append_id(p_ssil_buffers.importance_map[0]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 2; + u.append_id(ssil.importance_map_load_counter); + uniforms.push_back(u); + } + p_ssil_buffers.importance_map_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssil.gather_shader.version_get_shader(ssil.gather_shader_version, 2), 1); + } + + if (p_settings.quality == RS::ENV_SSIL_QUALITY_ULTRA) { + RD::get_singleton()->draw_command_begin_label("Generate Importance Map"); + ssil.importance_map_push_constant.half_screen_pixel_size[0] = 1.0 / p_ssil_buffers.buffer_width; + ssil.importance_map_push_constant.half_screen_pixel_size[1] = 1.0 / p_ssil_buffers.buffer_height; + ssil.importance_map_push_constant.intensity = p_settings.intensity * Math_PI; + //base pass + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssil.pipelines[SSIL_GATHER_BASE]); + gather_ssil(compute_list, p_ssil_buffers.pong_slices, p_ssil_buffers.edges_slices, p_settings, true, p_ssil_buffers.gather_uniform_set, p_ssil_buffers.importance_map_uniform_set, p_ssil_buffers.projection_uniform_set); + + //generate importance map + RD::Uniform u_ssil_pong_with_sampler(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_ssil_buffers.pong })); + RD::Uniform u_importance_map(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssil_buffers.importance_map[0] })); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssil.pipelines[SSIL_GENERATE_IMPORTANCE_MAP]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_ssil_pong_with_sampler), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_importance_map), 1); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssil.importance_map_push_constant, sizeof(SSILImportanceMapPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_ssil_buffers.half_buffer_width, p_ssil_buffers.half_buffer_height, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + + // process Importance Map A + RD::Uniform u_importance_map_with_sampler(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_ssil_buffers.importance_map[0] })); + RD::Uniform u_importance_map_pong(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssil_buffers.importance_map[1] })); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssil.pipelines[SSIL_PROCESS_IMPORTANCE_MAPA]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_importance_map_with_sampler), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_importance_map_pong), 1); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssil.importance_map_push_constant, sizeof(SSILImportanceMapPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_ssil_buffers.half_buffer_width, p_ssil_buffers.half_buffer_height, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + + // process Importance Map B + RD::Uniform u_importance_map_pong_with_sampler(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_ssil_buffers.importance_map[1] })); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssil.pipelines[SSIL_PROCESS_IMPORTANCE_MAPB]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_importance_map_pong_with_sampler), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_importance_map), 1); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, ssil.counter_uniform_set, 2); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssil.importance_map_push_constant, sizeof(SSILImportanceMapPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_ssil_buffers.half_buffer_width, p_ssil_buffers.half_buffer_height, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + + RD::get_singleton()->draw_command_end_label(); // Importance Map + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssil.pipelines[SSIL_GATHER_ADAPTIVE]); + } else { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssil.pipelines[SSIL_GATHER]); + } + + gather_ssil(compute_list, p_ssil_buffers.deinterleaved_slices, p_ssil_buffers.edges_slices, p_settings, false, p_ssil_buffers.gather_uniform_set, p_ssil_buffers.importance_map_uniform_set, p_ssil_buffers.projection_uniform_set); + RD::get_singleton()->draw_command_end_label(); //Gather + } + + { + RD::get_singleton()->draw_command_begin_label("Edge Aware Blur"); + ssil.blur_push_constant.edge_sharpness = 1.0 - p_settings.sharpness; + ssil.blur_push_constant.half_screen_pixel_size[0] = 1.0 / p_ssil_buffers.buffer_width; + ssil.blur_push_constant.half_screen_pixel_size[1] = 1.0 / p_ssil_buffers.buffer_height; + + int blur_passes = p_settings.quality > RS::ENV_SSIL_QUALITY_VERY_LOW ? p_settings.blur_passes : 1; + + shader = ssil.blur_shader.version_get_shader(ssil.blur_shader_version, 0); + + for (int pass = 0; pass < blur_passes; pass++) { + int blur_pipeline = SSIL_BLUR_PASS; + if (p_settings.quality > RS::ENV_SSIL_QUALITY_VERY_LOW) { + blur_pipeline = SSIL_BLUR_PASS_SMART; + if (pass < blur_passes - 2) { + blur_pipeline = SSIL_BLUR_PASS_WIDE; + } + } + + for (int i = 0; i < 4; i++) { + if ((p_settings.quality == RS::ENV_SSIL_QUALITY_VERY_LOW) && ((i == 1) || (i == 2))) { + continue; + } + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssil.pipelines[blur_pipeline]); + if (pass % 2 == 0) { + if (p_settings.quality == RS::ENV_SSIL_QUALITY_VERY_LOW) { + RD::Uniform u_ssil_slice(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_ssil_buffers.deinterleaved_slices[i] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_ssil_slice), 0); + } else { + RD::Uniform u_ssil_slice(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ ss_effects.mirror_sampler, p_ssil_buffers.deinterleaved_slices[i] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_ssil_slice), 0); + } + + RD::Uniform u_ssil_pong_slice(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssil_buffers.pong_slices[i] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_ssil_pong_slice), 1); + } else { + if (p_settings.quality == RS::ENV_SSIL_QUALITY_VERY_LOW) { + RD::Uniform u_ssil_pong_slice(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_ssil_buffers.pong_slices[i] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_ssil_pong_slice), 0); + } else { + RD::Uniform u_ssil_pong_slice(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ ss_effects.mirror_sampler, p_ssil_buffers.pong_slices[i] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_ssil_pong_slice), 0); + } + + RD::Uniform u_ssil_slice(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssil_buffers.deinterleaved_slices[i] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_ssil_slice), 1); + } + + RD::Uniform u_edges_slice(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssil_buffers.edges_slices[i] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 2, u_edges_slice), 2); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssil.blur_push_constant, sizeof(SSILBlurPushConstant)); + + int x_groups = (p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1)); + int y_groups = (p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, x_groups, y_groups, 1); + if (p_settings.quality > RS::ENV_SSIL_QUALITY_VERY_LOW) { + RD::get_singleton()->compute_list_add_barrier(compute_list); + } + } + } + + RD::get_singleton()->draw_command_end_label(); // Blur + } + + { + RD::get_singleton()->draw_command_begin_label("Interleave Buffers"); + ssil.interleave_push_constant.inv_sharpness = 1.0 - p_settings.sharpness; + ssil.interleave_push_constant.pixel_size[0] = 1.0 / p_settings.full_screen_size.x; + ssil.interleave_push_constant.pixel_size[1] = 1.0 / p_settings.full_screen_size.y; + ssil.interleave_push_constant.size_modifier = uint32_t(p_settings.half_size ? 4 : 2); + + int interleave_pipeline = SSIL_INTERLEAVE_HALF; + if (p_settings.quality == RS::ENV_SSIL_QUALITY_LOW) { + interleave_pipeline = SSIL_INTERLEAVE; + } else if (p_settings.quality >= RS::ENV_SSIL_QUALITY_MEDIUM) { + interleave_pipeline = SSIL_INTERLEAVE_SMART; + } + + shader = ssil.interleave_shader.version_get_shader(ssil.interleave_shader_version, 0); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssil.pipelines[interleave_pipeline]); + + RD::Uniform u_destination(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssil_buffers.ssil_final })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_destination), 0); + + if (p_settings.quality > RS::ENV_SSIL_QUALITY_VERY_LOW && p_settings.blur_passes % 2 == 0) { + RD::Uniform u_ssil(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_ssil_buffers.deinterleaved })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_ssil), 1); + } else { + RD::Uniform u_ssil_pong(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_ssil_buffers.pong })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_ssil_pong), 1); + } + + RD::Uniform u_edges(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssil_buffers.edges })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 2, u_edges), 2); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssil.interleave_push_constant, sizeof(SSILInterleavePushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.full_screen_size.x, p_settings.full_screen_size.y, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + RD::get_singleton()->draw_command_end_label(); // Interleave + } + + RD::get_singleton()->draw_command_end_label(); // SSIL + + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); + + int zero[1] = { 0 }; + RD::get_singleton()->buffer_update(ssil.importance_map_load_counter, 0, sizeof(uint32_t), &zero, 0); //no barrier +} + +void SSEffects::ssil_free(SSILRenderBuffers &p_ssil_buffers) { + if (p_ssil_buffers.ssil_final.is_valid()) { + RD::get_singleton()->free(p_ssil_buffers.ssil_final); + RD::get_singleton()->free(p_ssil_buffers.deinterleaved); + RD::get_singleton()->free(p_ssil_buffers.pong); + RD::get_singleton()->free(p_ssil_buffers.edges); + RD::get_singleton()->free(p_ssil_buffers.importance_map[0]); + RD::get_singleton()->free(p_ssil_buffers.importance_map[1]); + RD::get_singleton()->free(p_ssil_buffers.last_frame); + + p_ssil_buffers.ssil_final = RID(); + p_ssil_buffers.deinterleaved = RID(); + p_ssil_buffers.pong = RID(); + p_ssil_buffers.edges = RID(); + p_ssil_buffers.deinterleaved_slices.clear(); + p_ssil_buffers.pong_slices.clear(); + p_ssil_buffers.edges_slices.clear(); + p_ssil_buffers.importance_map[0] = RID(); + p_ssil_buffers.importance_map[1] = RID(); + p_ssil_buffers.last_frame = RID(); + p_ssil_buffers.last_frame_slices.clear(); + + p_ssil_buffers.gather_uniform_set = RID(); + p_ssil_buffers.importance_map_uniform_set = RID(); + p_ssil_buffers.projection_uniform_set = RID(); + } +} + +/* SSAO */ + +void SSEffects::gather_ssao(RD::ComputeListID p_compute_list, const Vector p_ao_slices, const SSAOSettings &p_settings, bool p_adaptive_base_pass, RID p_gather_uniform_set, RID p_importance_map_uniform_set) { + UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton(); + ERR_FAIL_NULL(uniform_set_cache); + + RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, p_gather_uniform_set, 0); + if ((p_settings.quality == RS::ENV_SSAO_QUALITY_ULTRA) && !p_adaptive_base_pass) { + RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, p_importance_map_uniform_set, 0); + } + + RID shader = ssao.gather_shader.version_get_shader(ssao.gather_shader_version, 1); // + + for (int i = 0; i < 4; i++) { + if ((p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) && ((i == 1) || (i == 2))) { + continue; + } + + RD::Uniform u_ao_slice(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ao_slices[i] })); + + ssao.gather_push_constant.pass_coord_offset[0] = i % 2; + ssao.gather_push_constant.pass_coord_offset[1] = i / 2; + ssao.gather_push_constant.pass_uv_offset[0] = ((i % 2) - 0.0) / p_settings.full_screen_size.x; + ssao.gather_push_constant.pass_uv_offset[1] = ((i / 2) - 0.0) / p_settings.full_screen_size.y; + ssao.gather_push_constant.pass = i; + RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, uniform_set_cache->get_cache(shader, 2, u_ao_slice), 2); + RD::get_singleton()->compute_list_set_push_constant(p_compute_list, &ssao.gather_push_constant, sizeof(SSAOGatherPushConstant)); + + Size2i size = Size2i(p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1), p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)); + + RD::get_singleton()->compute_list_dispatch_threads(p_compute_list, size.x, size.y, 1); + } + RD::get_singleton()->compute_list_add_barrier(p_compute_list); +} + +void SSEffects::ssao_allocate_buffers(SSAORenderBuffers &p_ssao_buffers, const SSAOSettings &p_settings, RID p_linear_depth) { + if (p_ssao_buffers.half_size != p_settings.half_size) { + ssao_free(p_ssao_buffers); + } + + if (p_settings.half_size) { + p_ssao_buffers.buffer_width = (p_settings.full_screen_size.x + 3) / 4; + p_ssao_buffers.buffer_height = (p_settings.full_screen_size.y + 3) / 4; + p_ssao_buffers.half_buffer_width = (p_settings.full_screen_size.x + 7) / 8; + p_ssao_buffers.half_buffer_height = (p_settings.full_screen_size.y + 7) / 8; + } else { + p_ssao_buffers.buffer_width = (p_settings.full_screen_size.x + 1) / 2; + p_ssao_buffers.buffer_height = (p_settings.full_screen_size.y + 1) / 2; + p_ssao_buffers.half_buffer_width = (p_settings.full_screen_size.x + 3) / 4; + p_ssao_buffers.half_buffer_height = (p_settings.full_screen_size.y + 3) / 4; + } + + if (p_ssao_buffers.ao_deinterleaved.is_null()) { + { + p_ssao_buffers.depth_texture_view = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), p_linear_depth, 0, p_settings.half_size ? 1 : 0, 4, RD::TEXTURE_SLICE_2D_ARRAY); + } + { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8G8_UNORM; + tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + tf.width = p_ssao_buffers.buffer_width; + tf.height = p_ssao_buffers.buffer_height; + tf.array_layers = 4; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + p_ssao_buffers.ao_deinterleaved = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(p_ssao_buffers.ao_deinterleaved, "SSAO De-interleaved Array"); + for (uint32_t i = 0; i < 4; i++) { + RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), p_ssao_buffers.ao_deinterleaved, i, 0); + p_ssao_buffers.ao_deinterleaved_slices.push_back(slice); + RD::get_singleton()->set_resource_name(slice, "SSAO De-interleaved Array Layer " + itos(i) + " "); + } + } + + { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8G8_UNORM; + tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + tf.width = p_ssao_buffers.buffer_width; + tf.height = p_ssao_buffers.buffer_height; + tf.array_layers = 4; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + p_ssao_buffers.ao_pong = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(p_ssao_buffers.ao_pong, "SSAO De-interleaved Array Pong"); + for (uint32_t i = 0; i < 4; i++) { + RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), p_ssao_buffers.ao_pong, i, 0); + p_ssao_buffers.ao_pong_slices.push_back(slice); + RD::get_singleton()->set_resource_name(slice, "SSAO De-interleaved Array Layer " + itos(i) + " Pong"); + } + } + + { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8_UNORM; + tf.width = p_ssao_buffers.buffer_width; + tf.height = p_ssao_buffers.buffer_height; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + p_ssao_buffers.importance_map[0] = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(p_ssao_buffers.importance_map[0], "SSAO Importance Map"); + p_ssao_buffers.importance_map[1] = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(p_ssao_buffers.importance_map[1], "SSAO Importance Map Pong"); + } + { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8_UNORM; + tf.width = p_settings.full_screen_size.x; + tf.height = p_settings.full_screen_size.y; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + p_ssao_buffers.ao_final = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(p_ssao_buffers.ao_final, "SSAO Final"); + } + p_ssao_buffers.half_size = p_settings.half_size; + } +} + +void SSEffects::generate_ssao(SSAORenderBuffers &p_ssao_buffers, RID p_normal_buffer, const CameraMatrix &p_projection, const SSAOSettings &p_settings) { + UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton(); + ERR_FAIL_NULL(uniform_set_cache); + MaterialStorage *material_storage = MaterialStorage::get_singleton(); + ERR_FAIL_NULL(material_storage); + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + memset(&ssao.gather_push_constant, 0, sizeof(SSAOGatherPushConstant)); + /* FIRST PASS */ + + RID shader = ssao.gather_shader.version_get_shader(ssao.gather_shader_version, 0); + RID default_sampler = material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + + RD::get_singleton()->draw_command_begin_label("Process Screen Space Ambient Occlusion"); + /* SECOND PASS */ + // Sample SSAO + { + RD::get_singleton()->draw_command_begin_label("Gather Samples"); + ssao.gather_push_constant.screen_size[0] = p_settings.full_screen_size.x; + ssao.gather_push_constant.screen_size[1] = p_settings.full_screen_size.y; + + ssao.gather_push_constant.half_screen_pixel_size[0] = 1.0 / p_ssao_buffers.buffer_width; + ssao.gather_push_constant.half_screen_pixel_size[1] = 1.0 / p_ssao_buffers.buffer_height; + float tan_half_fov_x = 1.0 / p_projection.matrix[0][0]; + float tan_half_fov_y = 1.0 / p_projection.matrix[1][1]; + ssao.gather_push_constant.NDC_to_view_mul[0] = tan_half_fov_x * 2.0; + ssao.gather_push_constant.NDC_to_view_mul[1] = tan_half_fov_y * -2.0; + ssao.gather_push_constant.NDC_to_view_add[0] = tan_half_fov_x * -1.0; + ssao.gather_push_constant.NDC_to_view_add[1] = tan_half_fov_y; + ssao.gather_push_constant.is_orthogonal = p_projection.is_orthogonal(); + + ssao.gather_push_constant.half_screen_pixel_size_x025[0] = ssao.gather_push_constant.half_screen_pixel_size[0] * 0.25; + ssao.gather_push_constant.half_screen_pixel_size_x025[1] = ssao.gather_push_constant.half_screen_pixel_size[1] * 0.25; + + ssao.gather_push_constant.radius = p_settings.radius; + float radius_near_limit = (p_settings.radius * 1.2f); + if (p_settings.quality <= RS::ENV_SSAO_QUALITY_LOW) { + radius_near_limit *= 1.50f; + + if (p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) { + ssao.gather_push_constant.radius *= 0.8f; + } + } + radius_near_limit /= tan_half_fov_y; + ssao.gather_push_constant.intensity = p_settings.intensity; + ssao.gather_push_constant.shadow_power = p_settings.power; + ssao.gather_push_constant.shadow_clamp = 0.98; + ssao.gather_push_constant.fade_out_mul = -1.0 / (p_settings.fadeout_to - p_settings.fadeout_from); + ssao.gather_push_constant.fade_out_add = p_settings.fadeout_from / (p_settings.fadeout_to - p_settings.fadeout_from) + 1.0; + ssao.gather_push_constant.horizon_angle_threshold = p_settings.horizon; + ssao.gather_push_constant.inv_radius_near_limit = 1.0f / radius_near_limit; + ssao.gather_push_constant.neg_inv_radius = -1.0 / ssao.gather_push_constant.radius; + + ssao.gather_push_constant.load_counter_avg_div = 9.0 / float((p_ssao_buffers.half_buffer_width) * (p_ssao_buffers.half_buffer_height) * 255); + ssao.gather_push_constant.adaptive_sample_limit = p_settings.adaptive_target; + + ssao.gather_push_constant.detail_intensity = p_settings.detail; + ssao.gather_push_constant.quality = MAX(0, p_settings.quality - 1); + ssao.gather_push_constant.size_multiplier = p_settings.half_size ? 2 : 1; + + if (p_ssao_buffers.gather_uniform_set.is_null()) { + Vector uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; + u.binding = 0; + u.append_id(default_sampler); + u.append_id(p_ssao_buffers.depth_texture_view); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.append_id(p_normal_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 2; + u.append_id(ss_effects.gather_constants_buffer); + uniforms.push_back(u); + } + p_ssao_buffers.gather_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shader, 0); + RD::get_singleton()->set_resource_name(p_ssao_buffers.gather_uniform_set, "SSAO Gather Uniform Set"); + } + + if (p_ssao_buffers.importance_map_uniform_set.is_null()) { + Vector uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 0; + u.append_id(p_ssao_buffers.ao_pong); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; + u.binding = 1; + u.append_id(default_sampler); + u.append_id(p_ssao_buffers.importance_map[0]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 2; + u.append_id(ssao.importance_map_load_counter); + uniforms.push_back(u); + } + p_ssao_buffers.importance_map_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssao.gather_shader.version_get_shader(ssao.gather_shader_version, 2), 1); + RD::get_singleton()->set_resource_name(p_ssao_buffers.importance_map_uniform_set, "SSAO Importance Map Uniform Set"); + } + + if (p_settings.quality == RS::ENV_SSAO_QUALITY_ULTRA) { + RD::get_singleton()->draw_command_begin_label("Generate Importance Map"); + ssao.importance_map_push_constant.half_screen_pixel_size[0] = 1.0 / p_ssao_buffers.buffer_width; + ssao.importance_map_push_constant.half_screen_pixel_size[1] = 1.0 / p_ssao_buffers.buffer_height; + ssao.importance_map_push_constant.intensity = p_settings.intensity; + ssao.importance_map_push_constant.power = p_settings.power; + + //base pass + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER_BASE]); + gather_ssao(compute_list, p_ssao_buffers.ao_pong_slices, p_settings, true, p_ssao_buffers.gather_uniform_set, RID()); + + //generate importance map + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GENERATE_IMPORTANCE_MAP]); + + RD::Uniform u_ao_pong_with_sampler(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_ssao_buffers.ao_pong })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_ao_pong_with_sampler), 0); + + RD::Uniform u_importance_map(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssao_buffers.importance_map[0] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_importance_map), 1); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_ssao_buffers.half_buffer_width, p_ssao_buffers.half_buffer_height, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + + //process importance map A + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_PROCESS_IMPORTANCE_MAPA]); + + RD::Uniform u_importance_map_with_sampler(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_ssao_buffers.importance_map[0] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_importance_map_with_sampler), 0); + + RD::Uniform u_importance_map_pong(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssao_buffers.importance_map[1] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_importance_map_pong), 1); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_ssao_buffers.half_buffer_width, p_ssao_buffers.half_buffer_height, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + + //process Importance Map B + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_PROCESS_IMPORTANCE_MAPB]); + + RD::Uniform u_importance_map_pong_with_sampler(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_ssao_buffers.importance_map[1] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_importance_map_pong_with_sampler), 0); + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_importance_map), 1); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, ssao.counter_uniform_set, 2); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_ssao_buffers.half_buffer_width, p_ssao_buffers.half_buffer_height, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER_ADAPTIVE]); + RD::get_singleton()->draw_command_end_label(); // Importance Map + } else { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER]); + } + + gather_ssao(compute_list, p_ssao_buffers.ao_deinterleaved_slices, p_settings, false, p_ssao_buffers.gather_uniform_set, p_ssao_buffers.importance_map_uniform_set); + RD::get_singleton()->draw_command_end_label(); // Gather SSAO + } + + // /* THIRD PASS */ + // // Blur + // + { + RD::get_singleton()->draw_command_begin_label("Edge Aware Blur"); + ssao.blur_push_constant.edge_sharpness = 1.0 - p_settings.sharpness; + ssao.blur_push_constant.half_screen_pixel_size[0] = 1.0 / p_ssao_buffers.buffer_width; + ssao.blur_push_constant.half_screen_pixel_size[1] = 1.0 / p_ssao_buffers.buffer_height; + + int blur_passes = p_settings.quality > RS::ENV_SSAO_QUALITY_VERY_LOW ? p_settings.blur_passes : 1; + + shader = ssao.blur_shader.version_get_shader(ssao.blur_shader_version, 0); + + for (int pass = 0; pass < blur_passes; pass++) { + int blur_pipeline = SSAO_BLUR_PASS; + if (p_settings.quality > RS::ENV_SSAO_QUALITY_VERY_LOW) { + blur_pipeline = SSAO_BLUR_PASS_SMART; + if (pass < blur_passes - 2) { + blur_pipeline = SSAO_BLUR_PASS_WIDE; + } else { + blur_pipeline = SSAO_BLUR_PASS_SMART; + } + } + + for (int i = 0; i < 4; i++) { + if ((p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) && ((i == 1) || (i == 2))) { + continue; + } + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[blur_pipeline]); + if (pass % 2 == 0) { + if (p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) { + RD::Uniform u_ao_slices_with_sampler(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_ssao_buffers.ao_deinterleaved_slices[i] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_ao_slices_with_sampler), 0); + } else { + RD::Uniform u_ao_slices_with_sampler(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ ss_effects.mirror_sampler, p_ssao_buffers.ao_deinterleaved_slices[i] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_ao_slices_with_sampler), 0); + } + + RD::Uniform u_ao_pong_slices(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssao_buffers.ao_pong_slices[i] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_ao_pong_slices), 1); + } else { + if (p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) { + RD::Uniform u_ao_pong_slices_with_sampler(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_ssao_buffers.ao_pong_slices[i] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_ao_pong_slices_with_sampler), 0); + } else { + RD::Uniform u_ao_pong_slices_with_sampler(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ ss_effects.mirror_sampler, p_ssao_buffers.ao_pong_slices[i] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_ao_pong_slices_with_sampler), 0); + } + + RD::Uniform u_ao_slices(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssao_buffers.ao_deinterleaved_slices[i] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_ao_slices), 1); + } + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.blur_push_constant, sizeof(SSAOBlurPushConstant)); + + Size2i size(p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1), p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, size.x, size.y, 1); + } + + if (p_settings.quality > RS::ENV_SSAO_QUALITY_VERY_LOW) { + RD::get_singleton()->compute_list_add_barrier(compute_list); + } + } + RD::get_singleton()->draw_command_end_label(); // Blur + } + + /* FOURTH PASS */ + // Interleave buffers + // back to full size + { + RD::get_singleton()->draw_command_begin_label("Interleave Buffers"); + ssao.interleave_push_constant.inv_sharpness = 1.0 - p_settings.sharpness; + ssao.interleave_push_constant.pixel_size[0] = 1.0 / p_settings.full_screen_size.x; + ssao.interleave_push_constant.pixel_size[1] = 1.0 / p_settings.full_screen_size.y; + ssao.interleave_push_constant.size_modifier = uint32_t(p_settings.half_size ? 4 : 2); + + shader = ssao.interleave_shader.version_get_shader(ssao.interleave_shader_version, 0); + + int interleave_pipeline = SSAO_INTERLEAVE_HALF; + if (p_settings.quality == RS::ENV_SSAO_QUALITY_LOW) { + interleave_pipeline = SSAO_INTERLEAVE; + } else if (p_settings.quality >= RS::ENV_SSAO_QUALITY_MEDIUM) { + interleave_pipeline = SSAO_INTERLEAVE_SMART; + } + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[interleave_pipeline]); + + RD::Uniform u_upscale_buffer(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssao_buffers.ao_final })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_upscale_buffer), 0); + + if (p_settings.quality > RS::ENV_SSAO_QUALITY_VERY_LOW && p_settings.blur_passes % 2 == 0) { + RD::Uniform u_ao(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_ssao_buffers.ao_deinterleaved })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_ao), 1); + } else { + RD::Uniform u_ao(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_ssao_buffers.ao_pong })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_ao), 1); + } + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.interleave_push_constant, sizeof(SSAOInterleavePushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.full_screen_size.x, p_settings.full_screen_size.y, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + RD::get_singleton()->draw_command_end_label(); // Interleave + } + RD::get_singleton()->draw_command_end_label(); //SSAO + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); //wait for upcoming transfer + + int zero[1] = { 0 }; + RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero, 0); //no barrier +} + +void SSEffects::ssao_free(SSAORenderBuffers &p_ssao_buffers) { + if (p_ssao_buffers.ao_final.is_valid()) { + RD::get_singleton()->free(p_ssao_buffers.ao_deinterleaved); + RD::get_singleton()->free(p_ssao_buffers.ao_pong); + RD::get_singleton()->free(p_ssao_buffers.ao_final); + + RD::get_singleton()->free(p_ssao_buffers.importance_map[0]); + RD::get_singleton()->free(p_ssao_buffers.importance_map[1]); + + p_ssao_buffers.ao_deinterleaved = RID(); + p_ssao_buffers.ao_pong = RID(); + p_ssao_buffers.ao_final = RID(); + p_ssao_buffers.importance_map[0] = RID(); + p_ssao_buffers.importance_map[1] = RID(); + p_ssao_buffers.ao_deinterleaved_slices.clear(); + p_ssao_buffers.ao_pong_slices.clear(); + + p_ssao_buffers.gather_uniform_set = RID(); + p_ssao_buffers.importance_map_uniform_set = RID(); + } +} + +/* Screen Space Reflection */ + +void SSEffects::ssr_allocate_buffers(SSRRenderBuffers &p_ssr_buffers, const RenderingDevice::DataFormat p_color_format, RenderingServer::EnvironmentSSRRoughnessQuality p_roughness_quality, const Size2i &p_screen_size, const uint32_t p_view_count) { + // As we are processing one view at a time, we can reuse buffers, only our output needs to have layers for each view. + + if (p_ssr_buffers.depth_scaled.is_null()) { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R32_SFLOAT; + tf.width = p_screen_size.x; + tf.height = p_screen_size.y; + tf.texture_type = RD::TEXTURE_TYPE_2D; + tf.array_layers = 1; + tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT; + + p_ssr_buffers.depth_scaled = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(p_ssr_buffers.depth_scaled, "SSR Depth Scaled"); + + tf.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + + p_ssr_buffers.normal_scaled = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(p_ssr_buffers.normal_scaled, "SSR Normal Scaled"); + } + + if (p_roughness_quality != RS::ENV_SSR_ROUGHNESS_QUALITY_DISABLED && !p_ssr_buffers.blur_radius[0].is_valid()) { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8_UNORM; + tf.width = p_screen_size.x; + tf.height = p_screen_size.y; + tf.texture_type = RD::TEXTURE_TYPE_2D; + tf.array_layers = 1; + tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT; + + p_ssr_buffers.blur_radius[0] = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(p_ssr_buffers.blur_radius[0], "SSR Blur Radius 0"); + p_ssr_buffers.blur_radius[1] = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(p_ssr_buffers.blur_radius[1], "SSR Blur Radius 1"); + } + + if (p_ssr_buffers.intermediate.is_null()) { + RD::TextureFormat tf; + tf.format = p_color_format; + tf.width = p_screen_size.x; + tf.height = p_screen_size.y; + tf.texture_type = RD::TEXTURE_TYPE_2D; + tf.array_layers = 1; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + + p_ssr_buffers.intermediate = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(p_ssr_buffers.intermediate, "SSR Intermediate"); + + if (p_view_count > 1) { + tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + tf.array_layers = p_view_count; + } else { + tf.texture_type = RD::TEXTURE_TYPE_2D; + tf.array_layers = 1; + } + + p_ssr_buffers.output = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(p_ssr_buffers.output, "SSR Output"); + + for (uint32_t v = 0; v < p_view_count; v++) { + p_ssr_buffers.output_slices[v] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), p_ssr_buffers.output, v, 0); + } + } +} + +void SSEffects::screen_space_reflection(SSRRenderBuffers &p_ssr_buffers, const RID *p_diffuse_slices, const RID *p_normal_roughness_slices, RenderingServer::EnvironmentSSRRoughnessQuality p_roughness_quality, const RID *p_metallic_slices, const Color &p_metallic_mask, const RID *p_depth_slices, const Size2i &p_screen_size, int p_max_steps, float p_fade_in, float p_fade_out, float p_tolerance, const uint32_t p_view_count, const CameraMatrix *p_projections, const Vector3 *p_eye_offsets) { + UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton(); + ERR_FAIL_NULL(uniform_set_cache); + MaterialStorage *material_storage = MaterialStorage::get_singleton(); + ERR_FAIL_NULL(material_storage); + + RID default_sampler = material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + + { + // Store some scene data in a UBO, in the near future we will use a UBO shared with other shaders + ScreenSpaceReflectionSceneData scene_data; + + if (ssr.ubo.is_null()) { + ssr.ubo = RD::get_singleton()->uniform_buffer_create(sizeof(ScreenSpaceReflectionSceneData)); + } + + for (uint32_t v = 0; v < p_view_count; v++) { + store_camera(p_projections[v], scene_data.projection[v]); + store_camera(p_projections[v].inverse(), scene_data.inv_projection[v]); + scene_data.eye_offset[v][0] = p_eye_offsets[v].x; + scene_data.eye_offset[v][1] = p_eye_offsets[v].y; + scene_data.eye_offset[v][2] = p_eye_offsets[v].z; + scene_data.eye_offset[v][3] = 0.0; + } + + RD::get_singleton()->buffer_update(ssr.ubo, 0, sizeof(ScreenSpaceReflectionSceneData), &scene_data, RD::BARRIER_MASK_COMPUTE); + } + + uint32_t pipeline_specialization = 0; + if (p_view_count > 1) { + pipeline_specialization |= SSR_MULTIVIEW; + } + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + + for (uint32_t v = 0; v < p_view_count; v++) { + RD::get_singleton()->draw_command_begin_label(String("SSR View ") + itos(v)); + + { //scale color and depth to half + RD::get_singleton()->draw_command_begin_label("SSR Scale"); + + ScreenSpaceReflectionScalePushConstant push_constant; + push_constant.view_index = v; + push_constant.camera_z_far = p_projections[v].get_z_far(); + push_constant.camera_z_near = p_projections[v].get_z_near(); + push_constant.orthogonal = p_projections[v].is_orthogonal(); + push_constant.filter = false; //enabling causes arctifacts + push_constant.screen_size[0] = p_screen_size.x; + push_constant.screen_size[1] = p_screen_size.y; + + RID shader = ssr_scale.shader.version_get_shader(ssr_scale.shader_version, 0); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssr_scale.pipelines[pipeline_specialization]); + + RD::Uniform u_diffuse(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_diffuse_slices[v] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_diffuse), 0); + + RD::Uniform u_depth(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_depth_slices[v] })); + RD::Uniform u_normal_roughness(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 1, Vector({ default_sampler, p_normal_roughness_slices[v] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_depth, u_normal_roughness), 1); + + RD::Uniform u_output_blur(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssr_buffers.output_slices[v] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 2, u_output_blur), 2); + + RD::Uniform u_scale_depth(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssr_buffers.depth_scaled })); + RD::Uniform u_scale_normal(RD::UNIFORM_TYPE_IMAGE, 1, Vector({ p_ssr_buffers.normal_scaled })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 3, u_scale_depth, u_scale_normal), 3); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ScreenSpaceReflectionScalePushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); + + RD::get_singleton()->compute_list_add_barrier(compute_list); + + RD::get_singleton()->draw_command_end_label(); + } + + { + RD::get_singleton()->draw_command_begin_label("SSR main"); + + ScreenSpaceReflectionPushConstant push_constant; + push_constant.view_index = v; + push_constant.camera_z_far = p_projections[v].get_z_far(); + push_constant.camera_z_near = p_projections[v].get_z_near(); + push_constant.orthogonal = p_projections[v].is_orthogonal(); + push_constant.screen_size[0] = p_screen_size.x; + push_constant.screen_size[1] = p_screen_size.y; + push_constant.curve_fade_in = p_fade_in; + push_constant.distance_fade = p_fade_out; + push_constant.num_steps = p_max_steps; + push_constant.depth_tolerance = p_tolerance; + push_constant.use_half_res = true; + push_constant.proj_info[0] = -2.0f / (p_screen_size.width * p_projections[v].matrix[0][0]); + push_constant.proj_info[1] = -2.0f / (p_screen_size.height * p_projections[v].matrix[1][1]); + push_constant.proj_info[2] = (1.0f - p_projections[v].matrix[0][2]) / p_projections[v].matrix[0][0]; + push_constant.proj_info[3] = (1.0f + p_projections[v].matrix[1][2]) / p_projections[v].matrix[1][1]; + push_constant.metallic_mask[0] = CLAMP(p_metallic_mask.r * 255.0, 0, 255); + push_constant.metallic_mask[1] = CLAMP(p_metallic_mask.g * 255.0, 0, 255); + push_constant.metallic_mask[2] = CLAMP(p_metallic_mask.b * 255.0, 0, 255); + push_constant.metallic_mask[3] = CLAMP(p_metallic_mask.a * 255.0, 0, 255); + + ScreenSpaceReflectionMode mode = (p_roughness_quality != RS::ENV_SSR_ROUGHNESS_QUALITY_DISABLED) ? SCREEN_SPACE_REFLECTION_ROUGH : SCREEN_SPACE_REFLECTION_NORMAL; + RID shader = ssr.shader.version_get_shader(ssr.shader_version, mode); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssr.pipelines[pipeline_specialization][mode]); + + RD::Uniform u_scene_data(RD::UNIFORM_TYPE_UNIFORM_BUFFER, 0, ssr.ubo); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 4, u_scene_data), 4); + + RD::Uniform u_output_blur(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssr_buffers.output_slices[v] })); + RD::Uniform u_scale_depth(RD::UNIFORM_TYPE_IMAGE, 1, Vector({ p_ssr_buffers.depth_scaled })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_output_blur, u_scale_depth), 0); + + if (p_roughness_quality != RS::ENV_SSR_ROUGHNESS_QUALITY_DISABLED) { + RD::Uniform u_intermediate(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssr_buffers.intermediate })); + RD::Uniform u_blur_radius(RD::UNIFORM_TYPE_IMAGE, 1, Vector({ p_ssr_buffers.blur_radius[0] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_intermediate, u_blur_radius), 1); + } else { + RD::Uniform u_intermediate(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssr_buffers.intermediate })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_intermediate), 1); + } + + RD::Uniform u_scale_normal(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssr_buffers.normal_scaled })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 2, u_scale_normal), 2); + + RD::Uniform u_metallic(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_metallic_slices[v] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 3, u_metallic), 3); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ScreenSpaceReflectionPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); + + RD::get_singleton()->draw_command_end_label(); + } + + if (p_roughness_quality != RS::ENV_SSR_ROUGHNESS_QUALITY_DISABLED) { + RD::get_singleton()->draw_command_begin_label("SSR filter"); + //blur + + RD::get_singleton()->compute_list_add_barrier(compute_list); + + ScreenSpaceReflectionFilterPushConstant push_constant; + push_constant.view_index = v; + push_constant.orthogonal = p_projections[v].is_orthogonal(); + push_constant.edge_tolerance = Math::sin(Math::deg2rad(15.0)); + push_constant.proj_info[0] = -2.0f / (p_screen_size.width * p_projections[v].matrix[0][0]); + push_constant.proj_info[1] = -2.0f / (p_screen_size.height * p_projections[v].matrix[1][1]); + push_constant.proj_info[2] = (1.0f - p_projections[v].matrix[0][2]) / p_projections[v].matrix[0][0]; + push_constant.proj_info[3] = (1.0f + p_projections[v].matrix[1][2]) / p_projections[v].matrix[1][1]; + push_constant.vertical = 0; + if (p_roughness_quality == RS::ENV_SSR_ROUGHNESS_QUALITY_LOW) { + push_constant.steps = p_max_steps / 3; + push_constant.increment = 3; + } else if (p_roughness_quality == RS::ENV_SSR_ROUGHNESS_QUALITY_MEDIUM) { + push_constant.steps = p_max_steps / 2; + push_constant.increment = 2; + } else { + push_constant.steps = p_max_steps; + push_constant.increment = 1; + } + + push_constant.screen_size[0] = p_screen_size.width; + push_constant.screen_size[1] = p_screen_size.height; + + // Horizontal pass + + SSRReflectionMode mode = SCREEN_SPACE_REFLECTION_FILTER_HORIZONTAL; + + RID shader = ssr_filter.shader.version_get_shader(ssr_filter.shader_version, mode); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssr_filter.pipelines[pipeline_specialization][mode]); + + RD::Uniform u_scene_data(RD::UNIFORM_TYPE_UNIFORM_BUFFER, 0, ssr.ubo); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 4, u_scene_data), 4); + + RD::Uniform u_intermediate(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssr_buffers.intermediate })); + RD::Uniform u_blur_radius(RD::UNIFORM_TYPE_IMAGE, 1, Vector({ p_ssr_buffers.blur_radius[0] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_intermediate, u_blur_radius), 0); + + RD::Uniform u_scale_normal(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssr_buffers.normal_scaled })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_scale_normal), 1); + + RD::Uniform u_output_blur(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssr_buffers.output_slices[v] })); + RD::Uniform u_blur_radius2(RD::UNIFORM_TYPE_IMAGE, 1, Vector({ p_ssr_buffers.blur_radius[1] })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 2, u_output_blur, u_blur_radius2), 2); + + RD::Uniform u_scale_depth(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_ssr_buffers.depth_scaled })); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 3, u_scale_depth), 3); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ScreenSpaceReflectionFilterPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); + RD::get_singleton()->compute_list_add_barrier(compute_list); + + // Vertical pass + + mode = SCREEN_SPACE_REFLECTION_FILTER_VERTICAL; + shader = ssr_filter.shader.version_get_shader(ssr_filter.shader_version, mode); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssr_filter.pipelines[pipeline_specialization][mode]); + + push_constant.vertical = 1; + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_output_blur, u_blur_radius2), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_scale_normal), 1); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 2, u_intermediate), 2); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 3, u_scale_depth), 3); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 4, u_scene_data), 4); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ScreenSpaceReflectionFilterPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); + + if (v != p_view_count - 1) { + RD::get_singleton()->compute_list_add_barrier(compute_list); + } + + RD::get_singleton()->draw_command_end_label(); + } + + RD::get_singleton()->draw_command_end_label(); + } + + RD::get_singleton()->compute_list_end(); +} + +void SSEffects::ssr_free(SSRRenderBuffers &p_ssr_buffers) { + for (uint32_t v = 0; v < RendererSceneRender::MAX_RENDER_VIEWS; v++) { + p_ssr_buffers.output_slices[v] = RID(); + } + + if (p_ssr_buffers.output.is_valid()) { + RD::get_singleton()->free(p_ssr_buffers.output); + p_ssr_buffers.output = RID(); + } + + if (p_ssr_buffers.intermediate.is_valid()) { + RD::get_singleton()->free(p_ssr_buffers.intermediate); + p_ssr_buffers.intermediate = RID(); + } + + if (p_ssr_buffers.blur_radius[0].is_valid()) { + RD::get_singleton()->free(p_ssr_buffers.blur_radius[0]); + RD::get_singleton()->free(p_ssr_buffers.blur_radius[1]); + p_ssr_buffers.blur_radius[0] = RID(); + p_ssr_buffers.blur_radius[1] = RID(); + } + + if (p_ssr_buffers.depth_scaled.is_valid()) { + RD::get_singleton()->free(p_ssr_buffers.depth_scaled); + p_ssr_buffers.depth_scaled = RID(); + RD::get_singleton()->free(p_ssr_buffers.normal_scaled); + p_ssr_buffers.normal_scaled = RID(); + } +} diff --git a/servers/rendering/renderer_rd/effects/ss_effects.h b/servers/rendering/renderer_rd/effects/ss_effects.h new file mode 100644 index 0000000000..38b127aba6 --- /dev/null +++ b/servers/rendering/renderer_rd/effects/ss_effects.h @@ -0,0 +1,508 @@ +/*************************************************************************/ +/* ss_effects.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef SS_EFFECTS_RD_H +#define SS_EFFECTS_RD_H + +#include "servers/rendering/renderer_rd/pipeline_cache_rd.h" +#include "servers/rendering/renderer_rd/shaders/effects/screen_space_reflection.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_filter.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_scale.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ss_effects_downsample.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ssao.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ssao_blur.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ssao_importance_map.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ssao_interleave.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ssil.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ssil_blur.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ssil_importance_map.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ssil_interleave.glsl.gen.h" +#include "servers/rendering/renderer_scene_render.h" +#include "servers/rendering_server.h" + +namespace RendererRD { + +class SSEffects { +private: + static SSEffects *singleton; + +public: + static SSEffects *get_singleton() { return singleton; } + + SSEffects(); + ~SSEffects(); + + /* SS Downsampler */ + + void downsample_depth(RID p_depth_buffer, const Vector &p_depth_mipmaps, RS::EnvironmentSSAOQuality p_ssao_quality, RS::EnvironmentSSILQuality p_ssil_quality, bool p_invalidate_uniform_set, bool p_ssao_half_size, bool p_ssil_half_size, Size2i p_full_screen_size, const CameraMatrix &p_projection); + + /* SSIL */ + + struct SSILRenderBuffers { + bool half_size = false; + int buffer_width; + int buffer_height; + int half_buffer_width; + int half_buffer_height; + + RID ssil_final; + RID deinterleaved; + Vector deinterleaved_slices; + RID pong; + Vector pong_slices; + RID edges; + Vector edges_slices; + RID importance_map[2]; + RID depth_texture_view; + + RID last_frame; + Vector last_frame_slices; + + RID gather_uniform_set; + RID importance_map_uniform_set; + RID projection_uniform_set; + }; + + struct SSILSettings { + float radius = 1.0; + float intensity = 2.0; + float sharpness = 0.98; + float normal_rejection = 1.0; + + RS::EnvironmentSSILQuality quality = RS::ENV_SSIL_QUALITY_MEDIUM; + bool half_size = true; + float adaptive_target = 0.5; + int blur_passes = 4; + float fadeout_from = 50.0; + float fadeout_to = 300.0; + + Size2i full_screen_size = Size2i(); + }; + + void ssil_allocate_buffers(SSILRenderBuffers &p_ssil_buffers, const SSILSettings &p_settings, RID p_linear_depth); + void screen_space_indirect_lighting(SSILRenderBuffers &p_ssil_buffers, RID p_normal_buffer, const CameraMatrix &p_projection, const CameraMatrix &p_last_projection, const SSILSettings &p_settings); + void ssil_free(SSILRenderBuffers &p_ssil_buffers); + + /* SSAO */ + + struct SSAORenderBuffers { + bool half_size = false; + int buffer_width; + int buffer_height; + int half_buffer_width; + int half_buffer_height; + + RID ao_deinterleaved; + Vector ao_deinterleaved_slices; + RID ao_pong; + Vector ao_pong_slices; + RID ao_final; + RID importance_map[2]; + RID depth_texture_view; + + RID gather_uniform_set; + RID importance_map_uniform_set; + }; + + struct SSAOSettings { + float radius = 1.0; + float intensity = 2.0; + float power = 1.5; + float detail = 0.5; + float horizon = 0.06; + float sharpness = 0.98; + + RS::EnvironmentSSAOQuality quality = RS::ENV_SSAO_QUALITY_MEDIUM; + bool half_size = false; + float adaptive_target = 0.5; + int blur_passes = 2; + float fadeout_from = 50.0; + float fadeout_to = 300.0; + + Size2i full_screen_size = Size2i(); + }; + + void ssao_allocate_buffers(SSAORenderBuffers &p_ssao_buffers, const SSAOSettings &p_settings, RID p_linear_depth); + void generate_ssao(SSAORenderBuffers &p_ssao_buffers, RID p_normal_buffer, const CameraMatrix &p_projection, const SSAOSettings &p_settings); + void ssao_free(SSAORenderBuffers &p_ssao_buffers); + + /* Screen Space Reflection */ + + struct SSRRenderBuffers { + RID normal_scaled; + RID depth_scaled; + RID blur_radius[2]; + RID intermediate; + RID output; + RID output_slices[RendererSceneRender::MAX_RENDER_VIEWS]; + }; + + void ssr_allocate_buffers(SSRRenderBuffers &p_ssr_buffers, const RenderingDevice::DataFormat p_color_format, RenderingServer::EnvironmentSSRRoughnessQuality p_roughness_quality, const Size2i &p_screen_size, const uint32_t p_view_count); + void screen_space_reflection(SSRRenderBuffers &p_ssr_buffers, const RID *p_diffuse_slices, const RID *p_normal_roughness_slices, RS::EnvironmentSSRRoughnessQuality p_roughness_quality, const RID *p_metallic_slices, const Color &p_metallic_mask, const RID *p_depth_slices, const Size2i &p_screen_size, int p_max_steps, float p_fade_in, float p_fade_out, float p_tolerance, const uint32_t p_view_count, const CameraMatrix *p_projections, const Vector3 *p_eye_offsets); + void ssr_free(SSRRenderBuffers &p_ssr_buffers); + +private: + /* SS Downsampler */ + + struct SSEffectsDownsamplePushConstant { + float pixel_size[2]; + float z_far; + float z_near; + uint32_t orthogonal; + float radius_sq; + uint32_t pad[2]; + }; + + enum SSEffectsMode { + SS_EFFECTS_DOWNSAMPLE, + SS_EFFECTS_DOWNSAMPLE_HALF_RES, + SS_EFFECTS_DOWNSAMPLE_MIPMAP, + SS_EFFECTS_DOWNSAMPLE_MIPMAP_HALF_RES, + SS_EFFECTS_DOWNSAMPLE_HALF, + SS_EFFECTS_DOWNSAMPLE_HALF_RES_HALF, + SS_EFFECTS_DOWNSAMPLE_FULL_MIPS, + SS_EFFECTS_MAX + }; + + struct SSEffectsGatherConstants { + float rotation_matrices[80]; //5 vec4s * 4 + }; + + struct SSEffectsShader { + SSEffectsDownsamplePushConstant downsample_push_constant; + SsEffectsDownsampleShaderRD downsample_shader; + RID downsample_shader_version; + RID downsample_uniform_set; + bool used_half_size_last_frame = false; + bool used_mips_last_frame = false; + bool used_full_mips_last_frame = false; + + RID gather_constants_buffer; + + RID mirror_sampler; + + RID pipelines[SS_EFFECTS_MAX]; + } ss_effects; + + /* SSIL */ + + enum SSILMode { + SSIL_GATHER, + SSIL_GATHER_BASE, + SSIL_GATHER_ADAPTIVE, + SSIL_GENERATE_IMPORTANCE_MAP, + SSIL_PROCESS_IMPORTANCE_MAPA, + SSIL_PROCESS_IMPORTANCE_MAPB, + SSIL_BLUR_PASS, + SSIL_BLUR_PASS_SMART, + SSIL_BLUR_PASS_WIDE, + SSIL_INTERLEAVE, + SSIL_INTERLEAVE_SMART, + SSIL_INTERLEAVE_HALF, + SSIL_MAX + }; + + struct SSILGatherPushConstant { + int32_t screen_size[2]; + int pass; + int quality; + + float half_screen_pixel_size[2]; + float half_screen_pixel_size_x025[2]; + + float NDC_to_view_mul[2]; + float NDC_to_view_add[2]; + + float pad2[2]; + float z_near; + float z_far; + + float radius; + float intensity; + int size_multiplier; + int pad; + + float fade_out_mul; + float fade_out_add; + float normal_rejection_amount; + float inv_radius_near_limit; + + uint32_t is_orthogonal; + float neg_inv_radius; + float load_counter_avg_div; + float adaptive_sample_limit; + + int32_t pass_coord_offset[2]; + float pass_uv_offset[2]; + }; + + struct SSILImportanceMapPushConstant { + float half_screen_pixel_size[2]; + float intensity; + float pad; + }; + + struct SSILBlurPushConstant { + float edge_sharpness; + float pad; + float half_screen_pixel_size[2]; + }; + + struct SSILInterleavePushConstant { + float inv_sharpness; + uint32_t size_modifier; + float pixel_size[2]; + }; + + struct SSILProjectionUniforms { + float inv_last_frame_projection_matrix[16]; + }; + + struct SSIL { + SSILGatherPushConstant gather_push_constant; + SsilShaderRD gather_shader; + RID gather_shader_version; + RID projection_uniform_buffer; + + SSILImportanceMapPushConstant importance_map_push_constant; + SsilImportanceMapShaderRD importance_map_shader; + RID importance_map_shader_version; + RID importance_map_load_counter; + RID counter_uniform_set; + + SSILBlurPushConstant blur_push_constant; + SsilBlurShaderRD blur_shader; + RID blur_shader_version; + + SSILInterleavePushConstant interleave_push_constant; + SsilInterleaveShaderRD interleave_shader; + RID interleave_shader_version; + + RID pipelines[SSIL_MAX]; + } ssil; + + void gather_ssil(RD::ComputeListID p_compute_list, const Vector p_ssil_slices, const Vector p_edges_slices, const SSILSettings &p_settings, bool p_adaptive_base_pass, RID p_gather_uniform_set, RID p_importance_map_uniform_set, RID p_projection_uniform_set); + + /* SSAO */ + + enum SSAOMode { + SSAO_GATHER, + SSAO_GATHER_BASE, + SSAO_GATHER_ADAPTIVE, + SSAO_GENERATE_IMPORTANCE_MAP, + SSAO_PROCESS_IMPORTANCE_MAPA, + SSAO_PROCESS_IMPORTANCE_MAPB, + SSAO_BLUR_PASS, + SSAO_BLUR_PASS_SMART, + SSAO_BLUR_PASS_WIDE, + SSAO_INTERLEAVE, + SSAO_INTERLEAVE_SMART, + SSAO_INTERLEAVE_HALF, + SSAO_MAX + }; + + struct SSAOGatherPushConstant { + int32_t screen_size[2]; + int pass; + int quality; + + float half_screen_pixel_size[2]; + int size_multiplier; + float detail_intensity; + + float NDC_to_view_mul[2]; + float NDC_to_view_add[2]; + + float pad[2]; + float half_screen_pixel_size_x025[2]; + + float radius; + float intensity; + float shadow_power; + float shadow_clamp; + + float fade_out_mul; + float fade_out_add; + float horizon_angle_threshold; + float inv_radius_near_limit; + + uint32_t is_orthogonal; + float neg_inv_radius; + float load_counter_avg_div; + float adaptive_sample_limit; + + int32_t pass_coord_offset[2]; + float pass_uv_offset[2]; + }; + + struct SSAOImportanceMapPushConstant { + float half_screen_pixel_size[2]; + float intensity; + float power; + }; + + struct SSAOBlurPushConstant { + float edge_sharpness; + float pad; + float half_screen_pixel_size[2]; + }; + + struct SSAOInterleavePushConstant { + float inv_sharpness; + uint32_t size_modifier; + float pixel_size[2]; + }; + + struct SSAO { + SSAOGatherPushConstant gather_push_constant; + SsaoShaderRD gather_shader; + RID gather_shader_version; + + SSAOImportanceMapPushConstant importance_map_push_constant; + SsaoImportanceMapShaderRD importance_map_shader; + RID importance_map_shader_version; + RID importance_map_load_counter; + RID counter_uniform_set; + + SSAOBlurPushConstant blur_push_constant; + SsaoBlurShaderRD blur_shader; + RID blur_shader_version; + + SSAOInterleavePushConstant interleave_push_constant; + SsaoInterleaveShaderRD interleave_shader; + RID interleave_shader_version; + + RID pipelines[SSAO_MAX]; + } ssao; + + void gather_ssao(RD::ComputeListID p_compute_list, const Vector p_ao_slices, const SSAOSettings &p_settings, bool p_adaptive_base_pass, RID p_gather_uniform_set, RID p_importance_map_uniform_set); + + /* Screen Space Reflection */ + + enum SSRShaderSpecializations { + SSR_MULTIVIEW = 1 << 0, + SSR_VARIATIONS = 2, + }; + + struct ScreenSpaceReflectionSceneData { + float projection[2][16]; + float inv_projection[2][16]; + float eye_offset[2][4]; + }; + + // SSR Scale + + struct ScreenSpaceReflectionScalePushConstant { + int32_t screen_size[2]; + float camera_z_near; + float camera_z_far; + + uint32_t orthogonal; + uint32_t filter; + uint32_t view_index; + uint32_t pad1; + }; + + struct ScreenSpaceReflectionScale { + ScreenSpaceReflectionScaleShaderRD shader; + RID shader_version; + RID pipelines[SSR_VARIATIONS]; + } ssr_scale; + + // SSR main + + enum ScreenSpaceReflectionMode { + SCREEN_SPACE_REFLECTION_NORMAL, + SCREEN_SPACE_REFLECTION_ROUGH, + SCREEN_SPACE_REFLECTION_MAX, + }; + + struct ScreenSpaceReflectionPushConstant { + float proj_info[4]; // 16 - 16 + + int32_t screen_size[2]; // 8 - 24 + float camera_z_near; // 4 - 28 + float camera_z_far; // 4 - 32 + + int32_t num_steps; // 4 - 36 + float depth_tolerance; // 4 - 40 + float distance_fade; // 4 - 44 + float curve_fade_in; // 4 - 48 + + uint32_t orthogonal; // 4 - 52 + float filter_mipmap_levels; // 4 - 56 + uint32_t use_half_res; // 4 - 60 + uint8_t metallic_mask[4]; // 4 - 64 + + uint32_t view_index; // 4 - 68 + uint32_t pad[3]; // 12 - 80 + + // float projection[16]; // this is in our ScreenSpaceReflectionSceneData now + }; + + struct ScreenSpaceReflection { + ScreenSpaceReflectionShaderRD shader; + RID shader_version; + RID pipelines[SSR_VARIATIONS][SCREEN_SPACE_REFLECTION_MAX]; + + RID ubo; + } ssr; + + // SSR Filter + + struct ScreenSpaceReflectionFilterPushConstant { + float proj_info[4]; // 16 - 16 + + uint32_t orthogonal; // 4 - 20 + float edge_tolerance; // 4 - 24 + int32_t increment; // 4 - 28 + uint32_t view_index; // 4 - 32 + + int32_t screen_size[2]; // 8 - 40 + uint32_t vertical; // 4 - 44 + uint32_t steps; // 4 - 48 + }; + + enum SSRReflectionMode { + SCREEN_SPACE_REFLECTION_FILTER_HORIZONTAL, + SCREEN_SPACE_REFLECTION_FILTER_VERTICAL, + SCREEN_SPACE_REFLECTION_FILTER_MAX, + }; + + struct ScreenSpaceReflectionFilter { + ScreenSpaceReflectionFilterShaderRD shader; + RID shader_version; + RID pipelines[SSR_VARIATIONS][SCREEN_SPACE_REFLECTION_FILTER_MAX]; + } ssr_filter; +}; + +} // namespace RendererRD + +#endif // !SS_EFFECTS_RD_H diff --git a/servers/rendering/renderer_rd/effects_rd.cpp b/servers/rendering/renderer_rd/effects_rd.cpp index f731a0007a..57dead51f4 100644 --- a/servers/rendering/renderer_rd/effects_rd.cpp +++ b/servers/rendering/renderer_rd/effects_rd.cpp @@ -41,14 +41,6 @@ bool EffectsRD::get_prefer_raster_effects() { return prefer_raster_effects; } -static _FORCE_INLINE_ void store_camera(const CameraMatrix &p_mtx, float *p_array) { - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - p_array[i * 4 + j] = p_mtx.matrix[i][j]; - } - } -} - RID EffectsRD::_get_uniform_set_from_image(RID p_image) { if (image_to_uniform_set_cache.has(p_image)) { RID uniform_set = image_to_uniform_set_cache[p_image]; @@ -86,7 +78,7 @@ RID EffectsRD::_get_uniform_set_from_texture(RID p_texture, bool p_use_mipmaps) u.append_id(p_texture); uniforms.push_back(u); // anything with the same configuration (one texture in binding 0 for set 0), is good - RID uniform_set = RD::get_singleton()->uniform_set_create(uniforms, specular_merge.shader.version_get_shader(specular_merge.shader_version, 0), 0); + RID uniform_set = RD::get_singleton()->uniform_set_create(uniforms, luminance_reduce_raster.shader.version_get_shader(luminance_reduce_raster.shader_version, 0), 0); texture_to_uniform_set_cache[p_texture] = uniform_set; @@ -116,105 +108,6 @@ RID EffectsRD::_get_compute_uniform_set_from_texture(RID p_texture, bool p_use_m return uniform_set; } -RID EffectsRD::_get_compute_uniform_set_from_texture_and_sampler(RID p_texture, RID p_sampler) { - TextureSamplerPair tsp; - tsp.texture = p_texture; - tsp.sampler = p_sampler; - - if (texture_sampler_to_compute_uniform_set_cache.has(tsp)) { - RID uniform_set = texture_sampler_to_compute_uniform_set_cache[tsp]; - if (RD::get_singleton()->uniform_set_is_valid(uniform_set)) { - return uniform_set; - } - } - - Vector uniforms; - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; - u.binding = 0; - u.append_id(p_sampler); - u.append_id(p_texture); - uniforms.push_back(u); - //any thing with the same configuration (one texture in binding 0 for set 0), is good - RID uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssao.blur_shader.version_get_shader(ssao.blur_shader_version, 0), 0); - - texture_sampler_to_compute_uniform_set_cache[tsp] = uniform_set; - - return uniform_set; -} - -RID EffectsRD::_get_compute_uniform_set_from_texture_pair(RID p_texture1, RID p_texture2, bool p_use_mipmaps) { - TexturePair tp; - tp.texture1 = p_texture1; - tp.texture2 = p_texture2; - - if (texture_pair_to_compute_uniform_set_cache.has(tp)) { - RID uniform_set = texture_pair_to_compute_uniform_set_cache[tp]; - if (RD::get_singleton()->uniform_set_is_valid(uniform_set)) { - return uniform_set; - } - } - - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; - u.binding = 0; - u.append_id(p_use_mipmaps ? default_mipmap_sampler : default_sampler); - u.append_id(p_texture1); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; - u.binding = 1; - u.append_id(p_use_mipmaps ? default_mipmap_sampler : default_sampler); - u.append_id(p_texture2); - uniforms.push_back(u); - } - //any thing with the same configuration (one texture in binding 0 for set 0), is good - RID uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssr_scale.shader.version_get_shader(ssr_scale.shader_version, 0), 1); - - texture_pair_to_compute_uniform_set_cache[tp] = uniform_set; - - return uniform_set; -} - -RID EffectsRD::_get_compute_uniform_set_from_image_pair(RID p_texture1, RID p_texture2) { - TexturePair tp; - tp.texture1 = p_texture1; - tp.texture2 = p_texture2; - - if (image_pair_to_compute_uniform_set_cache.has(tp)) { - RID uniform_set = image_pair_to_compute_uniform_set_cache[tp]; - if (RD::get_singleton()->uniform_set_is_valid(uniform_set)) { - return uniform_set; - } - } - - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 0; - u.append_id(p_texture1); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 1; - u.append_id(p_texture2); - uniforms.push_back(u); - } - //any thing with the same configuration (one texture in binding 0 for set 0), is good - RID uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssr_scale.shader.version_get_shader(ssr_scale.shader_version, 0), 3); - - image_pair_to_compute_uniform_set_cache[tp] = uniform_set; - - return uniform_set; -} - void EffectsRD::fsr_upscale(RID p_source_rd_texture, RID p_secondary_texture, RID p_destination_texture, const Size2i &p_internal_size, const Size2i &p_size, float p_fsr_upscale_sharpness) { memset(&FSR_upscale.push_constant, 0, sizeof(FSRUpscalePushConstant)); @@ -281,125 +174,6 @@ void EffectsRD::taa_resolve(RID p_frame, RID p_temp, RID p_depth, RID p_velocity RD::get_singleton()->compute_list_end(); } -void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, RenderingServer::EnvironmentSSRRoughnessQuality p_roughness_quality, RID p_blur_radius, RID p_blur_radius2, RID p_metallic, const Color &p_metallic_mask, RID p_depth, RID p_scale_depth, RID p_scale_normal, RID p_output, RID p_output_blur, const Size2i &p_screen_size, int p_max_steps, float p_fade_in, float p_fade_out, float p_tolerance, const CameraMatrix &p_camera) { - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - - { //scale color and depth to half - ssr_scale.push_constant.camera_z_far = p_camera.get_z_far(); - ssr_scale.push_constant.camera_z_near = p_camera.get_z_near(); - ssr_scale.push_constant.orthogonal = p_camera.is_orthogonal(); - ssr_scale.push_constant.filter = false; //enabling causes arctifacts - ssr_scale.push_constant.screen_size[0] = p_screen_size.x; - ssr_scale.push_constant.screen_size[1] = p_screen_size.y; - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssr_scale.pipeline); - - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_diffuse), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture_pair(p_depth, p_normal_roughness), 1); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_output_blur), 2); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_scale_depth, p_scale_normal), 3); - - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr_scale.push_constant, sizeof(ScreenSpaceReflectionScalePushConstant)); - - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); - - RD::get_singleton()->compute_list_add_barrier(compute_list); - } - - { - ssr.push_constant.camera_z_far = p_camera.get_z_far(); - ssr.push_constant.camera_z_near = p_camera.get_z_near(); - ssr.push_constant.orthogonal = p_camera.is_orthogonal(); - ssr.push_constant.screen_size[0] = p_screen_size.x; - ssr.push_constant.screen_size[1] = p_screen_size.y; - ssr.push_constant.curve_fade_in = p_fade_in; - ssr.push_constant.distance_fade = p_fade_out; - ssr.push_constant.num_steps = p_max_steps; - ssr.push_constant.depth_tolerance = p_tolerance; - ssr.push_constant.use_half_res = true; - ssr.push_constant.proj_info[0] = -2.0f / (p_screen_size.width * p_camera.matrix[0][0]); - ssr.push_constant.proj_info[1] = -2.0f / (p_screen_size.height * p_camera.matrix[1][1]); - ssr.push_constant.proj_info[2] = (1.0f - p_camera.matrix[0][2]) / p_camera.matrix[0][0]; - ssr.push_constant.proj_info[3] = (1.0f + p_camera.matrix[1][2]) / p_camera.matrix[1][1]; - ssr.push_constant.metallic_mask[0] = CLAMP(p_metallic_mask.r * 255.0, 0, 255); - ssr.push_constant.metallic_mask[1] = CLAMP(p_metallic_mask.g * 255.0, 0, 255); - ssr.push_constant.metallic_mask[2] = CLAMP(p_metallic_mask.b * 255.0, 0, 255); - ssr.push_constant.metallic_mask[3] = CLAMP(p_metallic_mask.a * 255.0, 0, 255); - store_camera(p_camera, ssr.push_constant.projection); - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssr.pipelines[(p_roughness_quality != RS::ENV_SSR_ROUGHNESS_QUALITY_DISABLED) ? SCREEN_SPACE_REFLECTION_ROUGH : SCREEN_SPACE_REFLECTION_NORMAL]); - - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr.push_constant, sizeof(ScreenSpaceReflectionPushConstant)); - - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_output_blur, p_scale_depth), 0); - - if (p_roughness_quality != RS::ENV_SSR_ROUGHNESS_QUALITY_DISABLED) { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_output, p_blur_radius), 1); - } else { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_output), 1); - } - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_metallic), 3); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_scale_normal), 2); - - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); - } - - if (p_roughness_quality != RS::ENV_SSR_ROUGHNESS_QUALITY_DISABLED) { - //blur - - RD::get_singleton()->compute_list_add_barrier(compute_list); - - ssr_filter.push_constant.orthogonal = p_camera.is_orthogonal(); - ssr_filter.push_constant.edge_tolerance = Math::sin(Math::deg2rad(15.0)); - ssr_filter.push_constant.proj_info[0] = -2.0f / (p_screen_size.width * p_camera.matrix[0][0]); - ssr_filter.push_constant.proj_info[1] = -2.0f / (p_screen_size.height * p_camera.matrix[1][1]); - ssr_filter.push_constant.proj_info[2] = (1.0f - p_camera.matrix[0][2]) / p_camera.matrix[0][0]; - ssr_filter.push_constant.proj_info[3] = (1.0f + p_camera.matrix[1][2]) / p_camera.matrix[1][1]; - ssr_filter.push_constant.vertical = 0; - if (p_roughness_quality == RS::ENV_SSR_ROUGHNESS_QUALITY_LOW) { - ssr_filter.push_constant.steps = p_max_steps / 3; - ssr_filter.push_constant.increment = 3; - } else if (p_roughness_quality == RS::ENV_SSR_ROUGHNESS_QUALITY_MEDIUM) { - ssr_filter.push_constant.steps = p_max_steps / 2; - ssr_filter.push_constant.increment = 2; - } else { - ssr_filter.push_constant.steps = p_max_steps; - ssr_filter.push_constant.increment = 1; - } - - ssr_filter.push_constant.screen_size[0] = p_screen_size.width; - ssr_filter.push_constant.screen_size[1] = p_screen_size.height; - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssr_filter.pipelines[SCREEN_SPACE_REFLECTION_FILTER_HORIZONTAL]); - - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_output, p_blur_radius), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_scale_normal), 1); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_output_blur, p_blur_radius2), 2); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_scale_depth), 3); - - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr_filter.push_constant, sizeof(ScreenSpaceReflectionFilterPushConstant)); - - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); - - RD::get_singleton()->compute_list_add_barrier(compute_list); - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssr_filter.pipelines[SCREEN_SPACE_REFLECTION_FILTER_VERTICAL]); - - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_output_blur, p_blur_radius2), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_scale_normal), 1); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_output), 2); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_scale_depth), 3); - - ssr_filter.push_constant.vertical = 1; - - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssr_filter.push_constant, sizeof(ScreenSpaceReflectionFilterPushConstant)); - - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.width, p_screen_size.height, 1); - } - - RD::get_singleton()->compute_list_end(); -} - void EffectsRD::sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_depth, const CameraMatrix &p_camera, const Size2i &p_screen_size, float p_scale, float p_depth_scale, RenderingServer::SubSurfaceScatteringQuality p_quality) { RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); @@ -443,36 +217,6 @@ void EffectsRD::sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_dept } } -void EffectsRD::merge_specular(RID p_dest_framebuffer, RID p_specular, RID p_base, RID p_reflection) { - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, Vector()); - - if (p_reflection.is_valid()) { - if (p_base.is_valid()) { - RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, specular_merge.pipelines[SPECULAR_MERGE_SSR].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_framebuffer))); - RD::get_singleton()->draw_list_bind_uniform_set(draw_list, _get_uniform_set_from_texture(p_base), 2); - } else { - RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, specular_merge.pipelines[SPECULAR_MERGE_ADDITIVE_SSR].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_framebuffer))); - } - - RD::get_singleton()->draw_list_bind_uniform_set(draw_list, _get_uniform_set_from_texture(p_specular), 0); - RD::get_singleton()->draw_list_bind_uniform_set(draw_list, _get_uniform_set_from_texture(p_reflection), 1); - - } else { - if (p_base.is_valid()) { - RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, specular_merge.pipelines[SPECULAR_MERGE_ADD].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_framebuffer))); - RD::get_singleton()->draw_list_bind_uniform_set(draw_list, _get_uniform_set_from_texture(p_base), 2); - } else { - RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, specular_merge.pipelines[SPECULAR_MERGE_ADDITIVE_ADD].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_dest_framebuffer))); - } - - RD::get_singleton()->draw_list_bind_uniform_set(draw_list, _get_uniform_set_from_texture(p_specular), 0); - } - - RD::get_singleton()->draw_list_bind_index_array(draw_list, index_array); - RD::get_singleton()->draw_list_draw(draw_list, true); - RD::get_singleton()->draw_list_end(); -} - void EffectsRD::luminance_reduction(RID p_source_texture, const Size2i p_source_size, const Vector p_reduce, RID p_prev_luminance, float p_min_luminance, float p_max_luminance, float p_adjust, bool p_set) { ERR_FAIL_COND_MSG(prefer_raster_effects, "Can't use compute version of luminance reduction with the mobile renderer."); @@ -546,674 +290,6 @@ void EffectsRD::luminance_reduction_raster(RID p_source_texture, const Size2i p_ } } -void EffectsRD::downsample_depth(RID p_depth_buffer, const Vector &p_depth_mipmaps, RS::EnvironmentSSAOQuality p_ssao_quality, RS::EnvironmentSSILQuality p_ssil_quality, bool p_invalidate_uniform_set, bool p_ssao_half_size, bool p_ssil_half_size, Size2i p_full_screen_size, const CameraMatrix &p_projection) { - // Downsample and deinterleave the depth buffer for SSAO and SSIL - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - - int downsample_pipeline = SS_EFFECTS_DOWNSAMPLE; - bool use_mips = p_ssao_quality > RS::ENV_SSAO_QUALITY_MEDIUM || p_ssil_quality > RS::ENV_SSIL_QUALITY_MEDIUM; - - if (p_ssao_quality == RS::ENV_SSAO_QUALITY_VERY_LOW && p_ssil_quality == RS::ENV_SSIL_QUALITY_VERY_LOW) { - downsample_pipeline = SS_EFFECTS_DOWNSAMPLE_HALF; - } else if (use_mips) { - downsample_pipeline = SS_EFFECTS_DOWNSAMPLE_MIPMAP; - } - - bool use_half_size = false; - bool use_full_mips = false; - - if (p_ssao_half_size && p_ssil_half_size) { - downsample_pipeline++; - use_half_size = true; - } else if (p_ssao_half_size != p_ssil_half_size) { - if (use_mips) { - downsample_pipeline = SS_EFFECTS_DOWNSAMPLE_FULL_MIPS; - use_full_mips = true; - } else { - // Only need the first two mipmaps, but the cost to generate the next two is trivial - // TODO investigate the benefit of a shader version to generate only 2 mips - downsample_pipeline = SS_EFFECTS_DOWNSAMPLE_MIPMAP; - use_mips = true; - } - } - - int depth_index = use_half_size ? 1 : 0; - - RD::get_singleton()->draw_command_begin_label("Downsample Depth"); - if (p_invalidate_uniform_set || use_full_mips != ss_effects.used_full_mips_last_frame || use_half_size != ss_effects.used_half_size_last_frame || use_mips != ss_effects.used_mips_last_frame) { - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 0; - u.append_id(p_depth_mipmaps[depth_index + 1]); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 1; - u.append_id(p_depth_mipmaps[depth_index + 2]); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 2; - u.append_id(p_depth_mipmaps[depth_index + 3]); - uniforms.push_back(u); - } - if (use_full_mips) { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 3; - u.append_id(p_depth_mipmaps[4]); - uniforms.push_back(u); - } - ss_effects.downsample_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ss_effects.downsample_shader.version_get_shader(ss_effects.downsample_shader_version, use_full_mips ? 6 : 2), 2); - } - - float depth_linearize_mul = -p_projection.matrix[3][2]; - float depth_linearize_add = p_projection.matrix[2][2]; - if (depth_linearize_mul * depth_linearize_add < 0) { - depth_linearize_add = -depth_linearize_add; - } - - ss_effects.downsample_push_constant.orthogonal = p_projection.is_orthogonal(); - ss_effects.downsample_push_constant.z_near = depth_linearize_mul; - ss_effects.downsample_push_constant.z_far = depth_linearize_add; - if (ss_effects.downsample_push_constant.orthogonal) { - ss_effects.downsample_push_constant.z_near = p_projection.get_z_near(); - ss_effects.downsample_push_constant.z_far = p_projection.get_z_far(); - } - ss_effects.downsample_push_constant.pixel_size[0] = 1.0 / p_full_screen_size.x; - ss_effects.downsample_push_constant.pixel_size[1] = 1.0 / p_full_screen_size.y; - ss_effects.downsample_push_constant.radius_sq = 1.0; - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ss_effects.pipelines[downsample_pipeline]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_depth_buffer), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_depth_mipmaps[depth_index + 0]), 1); - if (use_mips) { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, ss_effects.downsample_uniform_set, 2); - } - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ss_effects.downsample_push_constant, sizeof(SSEffectsDownsamplePushConstant)); - - Size2i size(MAX(1, p_full_screen_size.x >> (use_half_size ? 2 : 1)), MAX(1, p_full_screen_size.y >> (use_half_size ? 2 : 1))); - - RD::get_singleton()->compute_list_dispatch_threads(compute_list, size.x, size.y, 1); - RD::get_singleton()->compute_list_add_barrier(compute_list); - RD::get_singleton()->draw_command_end_label(); - - RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_COMPUTE); - - ss_effects.used_full_mips_last_frame = use_full_mips; - ss_effects.used_half_size_last_frame = use_half_size; -} - -void EffectsRD::gather_ssao(RD::ComputeListID p_compute_list, const Vector p_ao_slices, const SSAOSettings &p_settings, bool p_adaptive_base_pass, RID p_gather_uniform_set, RID p_importance_map_uniform_set) { - RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, p_gather_uniform_set, 0); - if ((p_settings.quality == RS::ENV_SSAO_QUALITY_ULTRA) && !p_adaptive_base_pass) { - RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, p_importance_map_uniform_set, 1); - } - - for (int i = 0; i < 4; i++) { - if ((p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) && ((i == 1) || (i == 2))) { - continue; - } - - ssao.gather_push_constant.pass_coord_offset[0] = i % 2; - ssao.gather_push_constant.pass_coord_offset[1] = i / 2; - ssao.gather_push_constant.pass_uv_offset[0] = ((i % 2) - 0.0) / p_settings.full_screen_size.x; - ssao.gather_push_constant.pass_uv_offset[1] = ((i / 2) - 0.0) / p_settings.full_screen_size.y; - ssao.gather_push_constant.pass = i; - RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_uniform_set_from_image(p_ao_slices[i]), 2); - RD::get_singleton()->compute_list_set_push_constant(p_compute_list, &ssao.gather_push_constant, sizeof(SSAOGatherPushConstant)); - - Size2i size = Size2i(p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1), p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)); - - RD::get_singleton()->compute_list_dispatch_threads(p_compute_list, size.x, size.y, 1); - } - RD::get_singleton()->compute_list_add_barrier(p_compute_list); -} - -void EffectsRD::generate_ssao(RID p_normal_buffer, RID p_depth_mipmaps_texture, RID p_ao, const Vector p_ao_slices, RID p_ao_pong, const Vector p_ao_pong_slices, RID p_upscale_buffer, RID p_importance_map, RID p_importance_map_pong, const CameraMatrix &p_projection, const SSAOSettings &p_settings, bool p_invalidate_uniform_sets, RID &r_gather_uniform_set, RID &r_importance_map_uniform_set) { - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - memset(&ssao.gather_push_constant, 0, sizeof(SSAOGatherPushConstant)); - /* FIRST PASS */ - - RD::get_singleton()->draw_command_begin_label("Process Screen Space Ambient Occlusion"); - /* SECOND PASS */ - // Sample SSAO - { - RD::get_singleton()->draw_command_begin_label("Gather Samples"); - ssao.gather_push_constant.screen_size[0] = p_settings.full_screen_size.x; - ssao.gather_push_constant.screen_size[1] = p_settings.full_screen_size.y; - - ssao.gather_push_constant.half_screen_pixel_size[0] = 1.0 / p_settings.half_screen_size.x; - ssao.gather_push_constant.half_screen_pixel_size[1] = 1.0 / p_settings.half_screen_size.y; - float tan_half_fov_x = 1.0 / p_projection.matrix[0][0]; - float tan_half_fov_y = 1.0 / p_projection.matrix[1][1]; - ssao.gather_push_constant.NDC_to_view_mul[0] = tan_half_fov_x * 2.0; - ssao.gather_push_constant.NDC_to_view_mul[1] = tan_half_fov_y * -2.0; - ssao.gather_push_constant.NDC_to_view_add[0] = tan_half_fov_x * -1.0; - ssao.gather_push_constant.NDC_to_view_add[1] = tan_half_fov_y; - ssao.gather_push_constant.is_orthogonal = p_projection.is_orthogonal(); - - ssao.gather_push_constant.half_screen_pixel_size_x025[0] = ssao.gather_push_constant.half_screen_pixel_size[0] * 0.25; - ssao.gather_push_constant.half_screen_pixel_size_x025[1] = ssao.gather_push_constant.half_screen_pixel_size[1] * 0.25; - - ssao.gather_push_constant.radius = p_settings.radius; - float radius_near_limit = (p_settings.radius * 1.2f); - if (p_settings.quality <= RS::ENV_SSAO_QUALITY_LOW) { - radius_near_limit *= 1.50f; - - if (p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) { - ssao.gather_push_constant.radius *= 0.8f; - } - } - radius_near_limit /= tan_half_fov_y; - ssao.gather_push_constant.intensity = p_settings.intensity; - ssao.gather_push_constant.shadow_power = p_settings.power; - ssao.gather_push_constant.shadow_clamp = 0.98; - ssao.gather_push_constant.fade_out_mul = -1.0 / (p_settings.fadeout_to - p_settings.fadeout_from); - ssao.gather_push_constant.fade_out_add = p_settings.fadeout_from / (p_settings.fadeout_to - p_settings.fadeout_from) + 1.0; - ssao.gather_push_constant.horizon_angle_threshold = p_settings.horizon; - ssao.gather_push_constant.inv_radius_near_limit = 1.0f / radius_near_limit; - ssao.gather_push_constant.neg_inv_radius = -1.0 / ssao.gather_push_constant.radius; - - ssao.gather_push_constant.load_counter_avg_div = 9.0 / float((p_settings.quarter_screen_size.x) * (p_settings.quarter_screen_size.y) * 255); - ssao.gather_push_constant.adaptive_sample_limit = p_settings.adaptive_target; - - ssao.gather_push_constant.detail_intensity = p_settings.detail; - ssao.gather_push_constant.quality = MAX(0, p_settings.quality - 1); - ssao.gather_push_constant.size_multiplier = p_settings.half_size ? 2 : 1; - - if (p_invalidate_uniform_sets) { - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; - u.binding = 0; - u.append_id(default_sampler); - u.append_id(p_depth_mipmaps_texture); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 1; - u.append_id(p_normal_buffer); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; - u.binding = 2; - u.append_id(ss_effects.gather_constants_buffer); - uniforms.push_back(u); - } - r_gather_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssao.gather_shader.version_get_shader(ssao.gather_shader_version, 0), 0); - } - - if (p_invalidate_uniform_sets) { - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 0; - u.append_id(p_ao_pong); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; - u.binding = 1; - u.append_id(default_sampler); - u.append_id(p_importance_map); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.binding = 2; - u.append_id(ssao.importance_map_load_counter); - uniforms.push_back(u); - } - r_importance_map_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssao.gather_shader.version_get_shader(ssao.gather_shader_version, 2), 1); - } - - if (p_settings.quality == RS::ENV_SSAO_QUALITY_ULTRA) { - RD::get_singleton()->draw_command_begin_label("Generate Importance Map"); - ssao.importance_map_push_constant.half_screen_pixel_size[0] = 1.0 / p_settings.half_screen_size.x; - ssao.importance_map_push_constant.half_screen_pixel_size[1] = 1.0 / p_settings.half_screen_size.y; - ssao.importance_map_push_constant.intensity = p_settings.intensity; - ssao.importance_map_push_constant.power = p_settings.power; - //base pass - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER_BASE]); - gather_ssao(compute_list, p_ao_pong_slices, p_settings, true, r_gather_uniform_set, RID()); - //generate importance map - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GENERATE_IMPORTANCE_MAP]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao_pong), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map), 1); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.quarter_screen_size.x, p_settings.quarter_screen_size.y, 1); - RD::get_singleton()->compute_list_add_barrier(compute_list); - //process importance map A - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_PROCESS_IMPORTANCE_MAPA]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_importance_map), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map_pong), 1); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.quarter_screen_size.x, p_settings.quarter_screen_size.y, 1); - RD::get_singleton()->compute_list_add_barrier(compute_list); - //process Importance Map B - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_PROCESS_IMPORTANCE_MAPB]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_importance_map_pong), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map), 1); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, ssao.counter_uniform_set, 2); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.importance_map_push_constant, sizeof(SSAOImportanceMapPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.quarter_screen_size.x, p_settings.quarter_screen_size.y, 1); - RD::get_singleton()->compute_list_add_barrier(compute_list); - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER_ADAPTIVE]); - RD::get_singleton()->draw_command_end_label(); // Importance Map - } else { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[SSAO_GATHER]); - } - - gather_ssao(compute_list, p_ao_slices, p_settings, false, r_gather_uniform_set, r_importance_map_uniform_set); - RD::get_singleton()->draw_command_end_label(); // Gather SSAO - } - - // /* THIRD PASS */ - // // Blur - // - { - RD::get_singleton()->draw_command_begin_label("Edge Aware Blur"); - ssao.blur_push_constant.edge_sharpness = 1.0 - p_settings.sharpness; - ssao.blur_push_constant.half_screen_pixel_size[0] = 1.0 / p_settings.half_screen_size.x; - ssao.blur_push_constant.half_screen_pixel_size[1] = 1.0 / p_settings.half_screen_size.y; - - int blur_passes = p_settings.quality > RS::ENV_SSAO_QUALITY_VERY_LOW ? p_settings.blur_passes : 1; - - for (int pass = 0; pass < blur_passes; pass++) { - int blur_pipeline = SSAO_BLUR_PASS; - if (p_settings.quality > RS::ENV_SSAO_QUALITY_VERY_LOW) { - blur_pipeline = SSAO_BLUR_PASS_SMART; - if (pass < blur_passes - 2) { - blur_pipeline = SSAO_BLUR_PASS_WIDE; - } else { - blur_pipeline = SSAO_BLUR_PASS_SMART; - } - } - - for (int i = 0; i < 4; i++) { - if ((p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) && ((i == 1) || (i == 2))) { - continue; - } - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[blur_pipeline]); - if (pass % 2 == 0) { - if (p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao_slices[i]), 0); - } else { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture_and_sampler(p_ao_slices[i], ss_effects.mirror_sampler), 0); - } - - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_ao_pong_slices[i]), 1); - } else { - if (p_settings.quality == RS::ENV_SSAO_QUALITY_VERY_LOW) { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao_pong_slices[i]), 0); - } else { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture_and_sampler(p_ao_pong_slices[i], ss_effects.mirror_sampler), 0); - } - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_ao_slices[i]), 1); - } - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.blur_push_constant, sizeof(SSAOBlurPushConstant)); - - Size2i size(p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1), p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, size.x, size.y, 1); - } - - if (p_settings.quality > RS::ENV_SSAO_QUALITY_VERY_LOW) { - RD::get_singleton()->compute_list_add_barrier(compute_list); - } - } - RD::get_singleton()->draw_command_end_label(); // Blur - } - - /* FOURTH PASS */ - // Interleave buffers - // back to full size - { - RD::get_singleton()->draw_command_begin_label("Interleave Buffers"); - ssao.interleave_push_constant.inv_sharpness = 1.0 - p_settings.sharpness; - ssao.interleave_push_constant.pixel_size[0] = 1.0 / p_settings.full_screen_size.x; - ssao.interleave_push_constant.pixel_size[1] = 1.0 / p_settings.full_screen_size.y; - ssao.interleave_push_constant.size_modifier = uint32_t(p_settings.half_size ? 4 : 2); - - int interleave_pipeline = SSAO_INTERLEAVE_HALF; - if (p_settings.quality == RS::ENV_SSAO_QUALITY_LOW) { - interleave_pipeline = SSAO_INTERLEAVE; - } else if (p_settings.quality >= RS::ENV_SSAO_QUALITY_MEDIUM) { - interleave_pipeline = SSAO_INTERLEAVE_SMART; - } - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssao.pipelines[interleave_pipeline]); - - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_upscale_buffer), 0); - if (p_settings.quality > RS::ENV_SSAO_QUALITY_VERY_LOW && p_settings.blur_passes % 2 == 0) { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao), 1); - } else { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ao_pong), 1); - } - - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssao.interleave_push_constant, sizeof(SSAOInterleavePushConstant)); - - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.full_screen_size.x, p_settings.full_screen_size.y, 1); - RD::get_singleton()->compute_list_add_barrier(compute_list); - RD::get_singleton()->draw_command_end_label(); // Interleave - } - RD::get_singleton()->draw_command_end_label(); //SSAO - RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); //wait for upcoming transfer - - int zero[1] = { 0 }; - RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero, 0); //no barrier -} - -void EffectsRD::gather_ssil(RD::ComputeListID p_compute_list, const Vector p_ssil_slices, const Vector p_edges_slices, const SSILSettings &p_settings, bool p_adaptive_base_pass, RID p_gather_uniform_set, RID p_importance_map_uniform_set, RID p_projection_uniform_set) { - RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, p_gather_uniform_set, 0); - if ((p_settings.quality == RS::ENV_SSIL_QUALITY_ULTRA) && !p_adaptive_base_pass) { - RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, p_importance_map_uniform_set, 1); - } - RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, p_projection_uniform_set, 3); - - for (int i = 0; i < 4; i++) { - if ((p_settings.quality == RS::ENV_SSIL_QUALITY_VERY_LOW) && ((i == 1) || (i == 2))) { - continue; - } - - ssil.gather_push_constant.pass_coord_offset[0] = i % 2; - ssil.gather_push_constant.pass_coord_offset[1] = i / 2; - ssil.gather_push_constant.pass_uv_offset[0] = ((i % 2) - 0.0) / p_settings.full_screen_size.x; - ssil.gather_push_constant.pass_uv_offset[1] = ((i / 2) - 0.0) / p_settings.full_screen_size.y; - ssil.gather_push_constant.pass = i; - RD::get_singleton()->compute_list_bind_uniform_set(p_compute_list, _get_compute_uniform_set_from_image_pair(p_ssil_slices[i], p_edges_slices[i]), 2); - RD::get_singleton()->compute_list_set_push_constant(p_compute_list, &ssil.gather_push_constant, sizeof(SSILGatherPushConstant)); - - Size2i size = Size2i(p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1), p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)); - - RD::get_singleton()->compute_list_dispatch_threads(p_compute_list, size.x, size.y, 1); - } - RD::get_singleton()->compute_list_add_barrier(p_compute_list); -} - -void EffectsRD::screen_space_indirect_lighting(RID p_diffuse, RID p_destination, RID p_normal_buffer, RID p_depth_mipmaps_texture, RID p_ssil, const Vector p_ssil_slices, RID p_ssil_pong, const Vector p_ssil_pong_slices, RID p_importance_map, RID p_importance_map_pong, RID p_edges, const Vector p_edges_slices, const CameraMatrix &p_projection, const CameraMatrix &p_last_projection, const SSILSettings &p_settings, bool p_invalidate_uniform_sets, RID &r_gather_uniform_set, RID &r_importance_map_uniform_set, RID &r_projection_uniform_set) { - RD::get_singleton()->draw_command_begin_label("Process Screen Space Indirect Lighting"); - //Store projection info before starting the compute list - SSILProjectionUniforms projection_uniforms; - store_camera(p_last_projection, projection_uniforms.inv_last_frame_projection_matrix); - - RD::get_singleton()->buffer_update(ssil.projection_uniform_buffer, 0, sizeof(SSILProjectionUniforms), &projection_uniforms); - - memset(&ssil.gather_push_constant, 0, sizeof(SSILGatherPushConstant)); - - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - { - RD::get_singleton()->draw_command_begin_label("Gather Samples"); - ssil.gather_push_constant.screen_size[0] = p_settings.full_screen_size.x; - ssil.gather_push_constant.screen_size[1] = p_settings.full_screen_size.y; - - ssil.gather_push_constant.half_screen_pixel_size[0] = 1.0 / p_settings.half_screen_size.x; - ssil.gather_push_constant.half_screen_pixel_size[1] = 1.0 / p_settings.half_screen_size.y; - float tan_half_fov_x = 1.0 / p_projection.matrix[0][0]; - float tan_half_fov_y = 1.0 / p_projection.matrix[1][1]; - ssil.gather_push_constant.NDC_to_view_mul[0] = tan_half_fov_x * 2.0; - ssil.gather_push_constant.NDC_to_view_mul[1] = tan_half_fov_y * -2.0; - ssil.gather_push_constant.NDC_to_view_add[0] = tan_half_fov_x * -1.0; - ssil.gather_push_constant.NDC_to_view_add[1] = tan_half_fov_y; - ssil.gather_push_constant.z_near = p_projection.get_z_near(); - ssil.gather_push_constant.z_far = p_projection.get_z_far(); - ssil.gather_push_constant.is_orthogonal = p_projection.is_orthogonal(); - - ssil.gather_push_constant.half_screen_pixel_size_x025[0] = ssil.gather_push_constant.half_screen_pixel_size[0] * 0.25; - ssil.gather_push_constant.half_screen_pixel_size_x025[1] = ssil.gather_push_constant.half_screen_pixel_size[1] * 0.25; - - ssil.gather_push_constant.radius = p_settings.radius; - float radius_near_limit = (p_settings.radius * 1.2f); - if (p_settings.quality <= RS::ENV_SSIL_QUALITY_LOW) { - radius_near_limit *= 1.50f; - - if (p_settings.quality == RS::ENV_SSIL_QUALITY_VERY_LOW) { - ssil.gather_push_constant.radius *= 0.8f; - } - } - radius_near_limit /= tan_half_fov_y; - ssil.gather_push_constant.intensity = p_settings.intensity * Math_PI; - ssil.gather_push_constant.fade_out_mul = -1.0 / (p_settings.fadeout_to - p_settings.fadeout_from); - ssil.gather_push_constant.fade_out_add = p_settings.fadeout_from / (p_settings.fadeout_to - p_settings.fadeout_from) + 1.0; - ssil.gather_push_constant.inv_radius_near_limit = 1.0f / radius_near_limit; - ssil.gather_push_constant.neg_inv_radius = -1.0 / ssil.gather_push_constant.radius; - ssil.gather_push_constant.normal_rejection_amount = p_settings.normal_rejection; - - ssil.gather_push_constant.load_counter_avg_div = 9.0 / float((p_settings.quarter_screen_size.x) * (p_settings.quarter_screen_size.y) * 255); - ssil.gather_push_constant.adaptive_sample_limit = p_settings.adaptive_target; - - ssil.gather_push_constant.quality = MAX(0, p_settings.quality - 1); - ssil.gather_push_constant.size_multiplier = p_settings.half_size ? 2 : 1; - - if (p_invalidate_uniform_sets) { - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; - u.binding = 0; - u.append_id(default_mipmap_sampler); - u.append_id(p_diffuse); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; - u.binding = 1; - u.append_id(ssil.projection_uniform_buffer); - uniforms.push_back(u); - } - r_projection_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssil.gather_shader.version_get_shader(ssil.gather_shader_version, 0), 3); - } - - if (p_invalidate_uniform_sets) { - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; - u.binding = 0; - u.append_id(default_sampler); - u.append_id(p_depth_mipmaps_texture); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 1; - u.append_id(p_normal_buffer); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; - u.binding = 2; - u.append_id(ss_effects.gather_constants_buffer); - uniforms.push_back(u); - } - r_gather_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssil.gather_shader.version_get_shader(ssil.gather_shader_version, 0), 0); - } - - if (p_invalidate_uniform_sets) { - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 0; - u.append_id(p_ssil_pong); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; - u.binding = 1; - u.append_id(default_sampler); - u.append_id(p_importance_map); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.binding = 2; - u.append_id(ssil.importance_map_load_counter); - uniforms.push_back(u); - } - r_importance_map_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssil.gather_shader.version_get_shader(ssil.gather_shader_version, 2), 1); - } - - if (p_settings.quality == RS::ENV_SSIL_QUALITY_ULTRA) { - RD::get_singleton()->draw_command_begin_label("Generate Importance Map"); - ssil.importance_map_push_constant.half_screen_pixel_size[0] = 1.0 / p_settings.half_screen_size.x; - ssil.importance_map_push_constant.half_screen_pixel_size[1] = 1.0 / p_settings.half_screen_size.y; - ssil.importance_map_push_constant.intensity = p_settings.intensity * Math_PI; - //base pass - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssil.pipelines[SSIL_GATHER_BASE]); - gather_ssil(compute_list, p_ssil_pong_slices, p_edges_slices, p_settings, true, r_gather_uniform_set, r_importance_map_uniform_set, r_projection_uniform_set); - //generate importance map - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssil.pipelines[SSIL_GENERATE_IMPORTANCE_MAP]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ssil_pong), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map), 1); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssil.importance_map_push_constant, sizeof(SSILImportanceMapPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.quarter_screen_size.x, p_settings.quarter_screen_size.y, 1); - RD::get_singleton()->compute_list_add_barrier(compute_list); - // process Importance Map A - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssil.pipelines[SSIL_PROCESS_IMPORTANCE_MAPA]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_importance_map), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map_pong), 1); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssil.importance_map_push_constant, sizeof(SSILImportanceMapPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.quarter_screen_size.x, p_settings.quarter_screen_size.y, 1); - RD::get_singleton()->compute_list_add_barrier(compute_list); - // process Importance Map B - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssil.pipelines[SSIL_PROCESS_IMPORTANCE_MAPB]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_importance_map_pong), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_importance_map), 1); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, ssil.counter_uniform_set, 2); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssil.importance_map_push_constant, sizeof(SSILImportanceMapPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.quarter_screen_size.x, p_settings.quarter_screen_size.y, 1); - RD::get_singleton()->compute_list_add_barrier(compute_list); - - RD::get_singleton()->draw_command_end_label(); // Importance Map - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssil.pipelines[SSIL_GATHER_ADAPTIVE]); - } else { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssil.pipelines[SSIL_GATHER]); - } - - gather_ssil(compute_list, p_ssil_slices, p_edges_slices, p_settings, false, r_gather_uniform_set, r_importance_map_uniform_set, r_projection_uniform_set); - RD::get_singleton()->draw_command_end_label(); //Gather - } - - { - RD::get_singleton()->draw_command_begin_label("Edge Aware Blur"); - ssil.blur_push_constant.edge_sharpness = 1.0 - p_settings.sharpness; - ssil.blur_push_constant.half_screen_pixel_size[0] = 1.0 / p_settings.half_screen_size.x; - ssil.blur_push_constant.half_screen_pixel_size[1] = 1.0 / p_settings.half_screen_size.y; - - int blur_passes = p_settings.quality > RS::ENV_SSIL_QUALITY_VERY_LOW ? p_settings.blur_passes : 1; - - for (int pass = 0; pass < blur_passes; pass++) { - int blur_pipeline = SSIL_BLUR_PASS; - if (p_settings.quality > RS::ENV_SSIL_QUALITY_VERY_LOW) { - blur_pipeline = SSIL_BLUR_PASS_SMART; - if (pass < blur_passes - 2) { - blur_pipeline = SSIL_BLUR_PASS_WIDE; - } - } - - for (int i = 0; i < 4; i++) { - if ((p_settings.quality == RS::ENV_SSIL_QUALITY_VERY_LOW) && ((i == 1) || (i == 2))) { - continue; - } - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssil.pipelines[blur_pipeline]); - if (pass % 2 == 0) { - if (p_settings.quality == RS::ENV_SSIL_QUALITY_VERY_LOW) { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ssil_slices[i]), 0); - } else { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture_and_sampler(p_ssil_slices[i], ss_effects.mirror_sampler), 0); - } - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_ssil_pong_slices[i]), 1); - } else { - if (p_settings.quality == RS::ENV_SSIL_QUALITY_VERY_LOW) { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ssil_pong_slices[i]), 0); - } else { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture_and_sampler(p_ssil_pong_slices[i], ss_effects.mirror_sampler), 0); - } - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_ssil_slices[i]), 1); - } - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_edges_slices[i]), 2); - - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssil.blur_push_constant, sizeof(SSILBlurPushConstant)); - - int x_groups = (p_settings.full_screen_size.x >> (p_settings.half_size ? 2 : 1)); - int y_groups = (p_settings.full_screen_size.y >> (p_settings.half_size ? 2 : 1)); - - RD::get_singleton()->compute_list_dispatch_threads(compute_list, x_groups, y_groups, 1); - if (p_settings.quality > RS::ENV_SSIL_QUALITY_VERY_LOW) { - RD::get_singleton()->compute_list_add_barrier(compute_list); - } - } - } - - RD::get_singleton()->draw_command_end_label(); // Blur - } - - { - RD::get_singleton()->draw_command_begin_label("Interleave Buffers"); - ssil.interleave_push_constant.inv_sharpness = 1.0 - p_settings.sharpness; - ssil.interleave_push_constant.pixel_size[0] = 1.0 / p_settings.full_screen_size.x; - ssil.interleave_push_constant.pixel_size[1] = 1.0 / p_settings.full_screen_size.y; - ssil.interleave_push_constant.size_modifier = uint32_t(p_settings.half_size ? 4 : 2); - - int interleave_pipeline = SSIL_INTERLEAVE_HALF; - if (p_settings.quality == RS::ENV_SSIL_QUALITY_LOW) { - interleave_pipeline = SSIL_INTERLEAVE; - } else if (p_settings.quality >= RS::ENV_SSIL_QUALITY_MEDIUM) { - interleave_pipeline = SSIL_INTERLEAVE_SMART; - } - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, ssil.pipelines[interleave_pipeline]); - - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_destination), 0); - - if (p_settings.quality > RS::ENV_SSIL_QUALITY_VERY_LOW && p_settings.blur_passes % 2 == 0) { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ssil), 1); - } else { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_ssil_pong), 1); - } - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_edges), 2); - - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ssil.interleave_push_constant, sizeof(SSILInterleavePushConstant)); - - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_settings.full_screen_size.x, p_settings.full_screen_size.y, 1); - RD::get_singleton()->compute_list_add_barrier(compute_list); - RD::get_singleton()->draw_command_end_label(); // Interleave - } - - RD::get_singleton()->draw_command_end_label(); // SSIL - - RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); - - int zero[1] = { 0 }; - RD::get_singleton()->buffer_update(ssil.importance_map_load_counter, 0, sizeof(uint32_t), &zero, 0); //no barrier -} - void EffectsRD::roughness_limit(RID p_source_normal, RID p_roughness, const Size2i &p_size, float p_curve) { roughness_limiter.push_constant.screen_size[0] = p_size.x; roughness_limiter.push_constant.screen_size[1] = p_size.y; @@ -1357,154 +433,6 @@ EffectsRD::EffectsRD(bool p_prefer_raster_effects) { } } - if (!prefer_raster_effects) { - { - // Initialize depth buffer for screen space effects - Vector downsampler_modes; - downsampler_modes.push_back("\n"); - downsampler_modes.push_back("\n#define USE_HALF_SIZE\n"); - downsampler_modes.push_back("\n#define GENERATE_MIPS\n"); - downsampler_modes.push_back("\n#define GENERATE_MIPS\n#define USE_HALF_SIZE\n"); - downsampler_modes.push_back("\n#define USE_HALF_BUFFERS\n"); - downsampler_modes.push_back("\n#define USE_HALF_BUFFERS\n#define USE_HALF_SIZE\n"); - downsampler_modes.push_back("\n#define GENERATE_MIPS\n#define GENERATE_FULL_MIPS"); - - ss_effects.downsample_shader.initialize(downsampler_modes); - - ss_effects.downsample_shader_version = ss_effects.downsample_shader.version_create(); - - for (int i = 0; i < SS_EFFECTS_MAX; i++) { - ss_effects.pipelines[i] = RD::get_singleton()->compute_pipeline_create(ss_effects.downsample_shader.version_get_shader(ss_effects.downsample_shader_version, i)); - } - - ss_effects.gather_constants_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(SSEffectsGatherConstants)); - SSEffectsGatherConstants gather_constants; - - const int sub_pass_count = 5; - for (int pass = 0; pass < 4; pass++) { - for (int subPass = 0; subPass < sub_pass_count; subPass++) { - int a = pass; - int b = subPass; - - int spmap[5]{ 0, 1, 4, 3, 2 }; - b = spmap[subPass]; - - float ca, sa; - float angle0 = (float(a) + float(b) / float(sub_pass_count)) * Math_PI * 0.5f; - - ca = Math::cos(angle0); - sa = Math::sin(angle0); - - float scale = 1.0f + (a - 1.5f + (b - (sub_pass_count - 1.0f) * 0.5f) / float(sub_pass_count)) * 0.07f; - - gather_constants.rotation_matrices[pass * 20 + subPass * 4 + 0] = scale * ca; - gather_constants.rotation_matrices[pass * 20 + subPass * 4 + 1] = scale * -sa; - gather_constants.rotation_matrices[pass * 20 + subPass * 4 + 2] = -scale * sa; - gather_constants.rotation_matrices[pass * 20 + subPass * 4 + 3] = -scale * ca; - } - } - - RD::get_singleton()->buffer_update(ss_effects.gather_constants_buffer, 0, sizeof(SSEffectsGatherConstants), &gather_constants); - } - - { - // Initialize ssao - - RD::SamplerState sampler; - sampler.mag_filter = RD::SAMPLER_FILTER_NEAREST; - sampler.min_filter = RD::SAMPLER_FILTER_NEAREST; - sampler.mip_filter = RD::SAMPLER_FILTER_NEAREST; - sampler.repeat_u = RD::SAMPLER_REPEAT_MODE_MIRRORED_REPEAT; - sampler.repeat_v = RD::SAMPLER_REPEAT_MODE_MIRRORED_REPEAT; - sampler.repeat_w = RD::SAMPLER_REPEAT_MODE_MIRRORED_REPEAT; - sampler.max_lod = 4; - - ss_effects.mirror_sampler = RD::get_singleton()->sampler_create(sampler); - - uint32_t pipeline = 0; - { - Vector ssao_modes; - - ssao_modes.push_back("\n"); - ssao_modes.push_back("\n#define SSAO_BASE\n"); - ssao_modes.push_back("\n#define ADAPTIVE\n"); - - ssao.gather_shader.initialize(ssao_modes); - - ssao.gather_shader_version = ssao.gather_shader.version_create(); - - for (int i = 0; i <= SSAO_GATHER_ADAPTIVE; i++) { - ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.gather_shader.version_get_shader(ssao.gather_shader_version, i)); - pipeline++; - } - } - { - Vector ssao_modes; - ssao_modes.push_back("\n#define GENERATE_MAP\n"); - ssao_modes.push_back("\n#define PROCESS_MAPA\n"); - ssao_modes.push_back("\n#define PROCESS_MAPB\n"); - - ssao.importance_map_shader.initialize(ssao_modes); - - ssao.importance_map_shader_version = ssao.importance_map_shader.version_create(); - - for (int i = SSAO_GENERATE_IMPORTANCE_MAP; i <= SSAO_PROCESS_IMPORTANCE_MAPB; i++) { - ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.importance_map_shader.version_get_shader(ssao.importance_map_shader_version, i - SSAO_GENERATE_IMPORTANCE_MAP)); - - pipeline++; - } - ssao.importance_map_load_counter = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t)); - int zero[1] = { 0 }; - RD::get_singleton()->buffer_update(ssao.importance_map_load_counter, 0, sizeof(uint32_t), &zero); - RD::get_singleton()->set_resource_name(ssao.importance_map_load_counter, "Importance Map Load Counter"); - - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.binding = 0; - u.append_id(ssao.importance_map_load_counter); - uniforms.push_back(u); - } - ssao.counter_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssao.importance_map_shader.version_get_shader(ssao.importance_map_shader_version, 2), 2); - RD::get_singleton()->set_resource_name(ssao.counter_uniform_set, "Load Counter Uniform Set"); - } - { - Vector ssao_modes; - ssao_modes.push_back("\n#define MODE_NON_SMART\n"); - ssao_modes.push_back("\n#define MODE_SMART\n"); - ssao_modes.push_back("\n#define MODE_WIDE\n"); - - ssao.blur_shader.initialize(ssao_modes); - - ssao.blur_shader_version = ssao.blur_shader.version_create(); - - for (int i = SSAO_BLUR_PASS; i <= SSAO_BLUR_PASS_WIDE; i++) { - ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.blur_shader.version_get_shader(ssao.blur_shader_version, i - SSAO_BLUR_PASS)); - - pipeline++; - } - } - { - Vector ssao_modes; - ssao_modes.push_back("\n#define MODE_NON_SMART\n"); - ssao_modes.push_back("\n#define MODE_SMART\n"); - ssao_modes.push_back("\n#define MODE_HALF\n"); - - ssao.interleave_shader.initialize(ssao_modes); - - ssao.interleave_shader_version = ssao.interleave_shader.version_create(); - for (int i = SSAO_INTERLEAVE; i <= SSAO_INTERLEAVE_HALF; i++) { - ssao.pipelines[pipeline] = RD::get_singleton()->compute_pipeline_create(ssao.interleave_shader.version_get_shader(ssao.interleave_shader_version, i - SSAO_INTERLEAVE)); - RD::get_singleton()->set_resource_name(ssao.pipelines[pipeline], "Interleave Pipeline " + itos(i)); - pipeline++; - } - } - - ERR_FAIL_COND(pipeline != SSAO_MAX); - } - } - if (!prefer_raster_effects) { // Initialize roughness limiter Vector shader_modes; @@ -1517,82 +445,7 @@ EffectsRD::EffectsRD(bool p_prefer_raster_effects) { roughness_limiter.pipeline = RD::get_singleton()->compute_pipeline_create(roughness_limiter.shader.version_get_shader(roughness_limiter.shader_version, 0)); } - if (!prefer_raster_effects) { - Vector specular_modes; - specular_modes.push_back("\n#define MODE_MERGE\n"); - specular_modes.push_back("\n#define MODE_MERGE\n#define MODE_SSR\n"); - specular_modes.push_back("\n"); - specular_modes.push_back("\n#define MODE_SSR\n"); - - specular_merge.shader.initialize(specular_modes); - - specular_merge.shader_version = specular_merge.shader.version_create(); - - //use additive - - RD::PipelineColorBlendState::Attachment ba; - ba.enable_blend = true; - ba.src_color_blend_factor = RD::BLEND_FACTOR_ONE; - ba.dst_color_blend_factor = RD::BLEND_FACTOR_ONE; - ba.src_alpha_blend_factor = RD::BLEND_FACTOR_ONE; - ba.dst_alpha_blend_factor = RD::BLEND_FACTOR_ONE; - ba.color_blend_op = RD::BLEND_OP_ADD; - ba.alpha_blend_op = RD::BLEND_OP_ADD; - - RD::PipelineColorBlendState blend_additive; - blend_additive.attachments.push_back(ba); - - for (int i = 0; i < SPECULAR_MERGE_MAX; i++) { - RD::PipelineColorBlendState blend_state; - if (i == SPECULAR_MERGE_ADDITIVE_ADD || i == SPECULAR_MERGE_ADDITIVE_SSR) { - blend_state = blend_additive; - } else { - blend_state = RD::PipelineColorBlendState::create_disabled(); - } - specular_merge.pipelines[i].setup(specular_merge.shader.version_get_shader(specular_merge.shader_version, i), RD::RENDER_PRIMITIVE_TRIANGLES, RD::PipelineRasterizationState(), RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), blend_state, 0); - } - } - if (!prefer_raster_effects) { - { - Vector ssr_modes; - ssr_modes.push_back("\n"); - ssr_modes.push_back("\n#define MODE_ROUGH\n"); - - ssr.shader.initialize(ssr_modes); - - ssr.shader_version = ssr.shader.version_create(); - - for (int i = 0; i < SCREEN_SPACE_REFLECTION_MAX; i++) { - ssr.pipelines[i] = RD::get_singleton()->compute_pipeline_create(ssr.shader.version_get_shader(ssr.shader_version, i)); - } - } - - { - Vector ssr_filter_modes; - ssr_filter_modes.push_back("\n"); - ssr_filter_modes.push_back("\n#define VERTICAL_PASS\n"); - - ssr_filter.shader.initialize(ssr_filter_modes); - - ssr_filter.shader_version = ssr_filter.shader.version_create(); - - for (int i = 0; i < SCREEN_SPACE_REFLECTION_FILTER_MAX; i++) { - ssr_filter.pipelines[i] = RD::get_singleton()->compute_pipeline_create(ssr_filter.shader.version_get_shader(ssr_filter.shader_version, i)); - } - } - - { - Vector ssr_scale_modes; - ssr_scale_modes.push_back("\n"); - - ssr_scale.shader.initialize(ssr_scale_modes); - - ssr_scale.shader_version = ssr_scale.shader.version_create(); - - ssr_scale.pipeline = RD::get_singleton()->compute_pipeline_create(ssr_scale.shader.version_get_shader(ssr_scale.shader_version, 0)); - } - { Vector sss_modes; sss_modes.push_back("\n#define USE_11_SAMPLES\n"); @@ -1607,79 +460,6 @@ EffectsRD::EffectsRD(bool p_prefer_raster_effects) { sss.pipelines[i] = RD::get_singleton()->compute_pipeline_create(sss.shader.version_get_shader(sss.shader_version, i)); } } - - { - Vector ssil_modes; - ssil_modes.push_back("\n"); - ssil_modes.push_back("\n#define SSIL_BASE\n"); - ssil_modes.push_back("\n#define ADAPTIVE\n"); - - ssil.gather_shader.initialize(ssil_modes); - - ssil.gather_shader_version = ssil.gather_shader.version_create(); - - for (int i = SSIL_GATHER; i <= SSIL_GATHER_ADAPTIVE; i++) { - ssil.pipelines[i] = RD::get_singleton()->compute_pipeline_create(ssil.gather_shader.version_get_shader(ssil.gather_shader_version, i)); - } - ssil.projection_uniform_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(SSILProjectionUniforms)); - } - - { - Vector ssil_modes; - ssil_modes.push_back("\n#define GENERATE_MAP\n"); - ssil_modes.push_back("\n#define PROCESS_MAPA\n"); - ssil_modes.push_back("\n#define PROCESS_MAPB\n"); - - ssil.importance_map_shader.initialize(ssil_modes); - - ssil.importance_map_shader_version = ssil.importance_map_shader.version_create(); - - for (int i = SSIL_GENERATE_IMPORTANCE_MAP; i <= SSIL_PROCESS_IMPORTANCE_MAPB; i++) { - ssil.pipelines[i] = RD::get_singleton()->compute_pipeline_create(ssil.importance_map_shader.version_get_shader(ssil.importance_map_shader_version, i - SSIL_GENERATE_IMPORTANCE_MAP)); - } - ssil.importance_map_load_counter = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t)); - int zero[1] = { 0 }; - RD::get_singleton()->buffer_update(ssil.importance_map_load_counter, 0, sizeof(uint32_t), &zero); - RD::get_singleton()->set_resource_name(ssil.importance_map_load_counter, "Importance Map Load Counter"); - - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.binding = 0; - u.append_id(ssil.importance_map_load_counter); - uniforms.push_back(u); - } - ssil.counter_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, ssil.importance_map_shader.version_get_shader(ssil.importance_map_shader_version, 2), 2); - RD::get_singleton()->set_resource_name(ssil.counter_uniform_set, "Load Counter Uniform Set"); - } - { - Vector ssil_modes; - ssil_modes.push_back("\n#define MODE_NON_SMART\n"); - ssil_modes.push_back("\n#define MODE_SMART\n"); - ssil_modes.push_back("\n#define MODE_WIDE\n"); - - ssil.blur_shader.initialize(ssil_modes); - - ssil.blur_shader_version = ssil.blur_shader.version_create(); - for (int i = SSIL_BLUR_PASS; i <= SSIL_BLUR_PASS_WIDE; i++) { - ssil.pipelines[i] = RD::get_singleton()->compute_pipeline_create(ssil.blur_shader.version_get_shader(ssil.blur_shader_version, i - SSIL_BLUR_PASS)); - } - } - - { - Vector ssil_modes; - ssil_modes.push_back("\n#define MODE_NON_SMART\n"); - ssil_modes.push_back("\n#define MODE_SMART\n"); - ssil_modes.push_back("\n#define MODE_HALF\n"); - - ssil.interleave_shader.initialize(ssil_modes); - - ssil.interleave_shader_version = ssil.interleave_shader.version_create(); - for (int i = SSIL_INTERLEAVE; i <= SSIL_INTERLEAVE_HALF; i++) { - ssil.pipelines[i] = RD::get_singleton()->compute_pipeline_create(ssil.interleave_shader.version_get_shader(ssil.interleave_shader_version, i - SSIL_INTERLEAVE)); - } - } } { @@ -1751,27 +531,8 @@ EffectsRD::~EffectsRD() { luminance_reduce.shader.version_free(luminance_reduce.shader_version); } if (!prefer_raster_effects) { - specular_merge.shader.version_free(specular_merge.shader_version); - ss_effects.downsample_shader.version_free(ss_effects.downsample_shader_version); - ssao.blur_shader.version_free(ssao.blur_shader_version); - ssao.gather_shader.version_free(ssao.gather_shader_version); - ssao.interleave_shader.version_free(ssao.interleave_shader_version); - ssao.importance_map_shader.version_free(ssao.importance_map_shader_version); - ssil.blur_shader.version_free(ssil.blur_shader_version); - ssil.gather_shader.version_free(ssil.gather_shader_version); - ssil.interleave_shader.version_free(ssil.interleave_shader_version); - ssil.importance_map_shader.version_free(ssil.importance_map_shader_version); roughness_limiter.shader.version_free(roughness_limiter.shader_version); - ssr.shader.version_free(ssr.shader_version); - ssr_filter.shader.version_free(ssr_filter.shader_version); - ssr_scale.shader.version_free(ssr_scale.shader_version); sss.shader.version_free(sss.shader_version); - - RD::get_singleton()->free(ss_effects.mirror_sampler); - RD::get_singleton()->free(ss_effects.gather_constants_buffer); - RD::get_singleton()->free(ssao.importance_map_load_counter); - RD::get_singleton()->free(ssil.importance_map_load_counter); - RD::get_singleton()->free(ssil.projection_uniform_buffer); } sort.shader.version_free(sort.shader_version); } diff --git a/servers/rendering/renderer_rd/effects_rd.h b/servers/rendering/renderer_rd/effects_rd.h index 76627a8d7d..f8b5ecb920 100644 --- a/servers/rendering/renderer_rd/effects_rd.h +++ b/servers/rendering/renderer_rd/effects_rd.h @@ -37,20 +37,7 @@ #include "servers/rendering/renderer_rd/shaders/luminance_reduce.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/luminance_reduce_raster.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/roughness_limiter.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/screen_space_reflection.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/screen_space_reflection_filter.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/screen_space_reflection_scale.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/sort.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/specular_merge.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/ss_effects_downsample.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/ssao.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/ssao_blur.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/ssao_interleave.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/ssil.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/ssil_blur.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/ssil_importance_map.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/ssil_interleave.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/subsurface_scattering.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/taa_resolve.glsl.gen.h" #include "servers/rendering/renderer_scene_render.h" @@ -142,231 +129,6 @@ private: PipelineCacheRD pipelines[LUMINANCE_REDUCE_FRAGMENT_MAX]; } luminance_reduce_raster; - struct SSEffectsDownsamplePushConstant { - float pixel_size[2]; - float z_far; - float z_near; - uint32_t orthogonal; - float radius_sq; - uint32_t pad[2]; - }; - - enum SSEffectsMode { - SS_EFFECTS_DOWNSAMPLE, - SS_EFFECTS_DOWNSAMPLE_HALF_RES, - SS_EFFECTS_DOWNSAMPLE_MIPMAP, - SS_EFFECTS_DOWNSAMPLE_MIPMAP_HALF_RES, - SS_EFFECTS_DOWNSAMPLE_HALF, - SS_EFFECTS_DOWNSAMPLE_HALF_RES_HALF, - SS_EFFECTS_DOWNSAMPLE_FULL_MIPS, - SS_EFFECTS_MAX - }; - - struct SSEffectsGatherConstants { - float rotation_matrices[80]; //5 vec4s * 4 - }; - - struct SSEffects { - SSEffectsDownsamplePushConstant downsample_push_constant; - SsEffectsDownsampleShaderRD downsample_shader; - RID downsample_shader_version; - RID downsample_uniform_set; - bool used_half_size_last_frame = false; - bool used_mips_last_frame = false; - bool used_full_mips_last_frame = false; - - RID gather_constants_buffer; - - RID mirror_sampler; - - RID pipelines[SS_EFFECTS_MAX]; - } ss_effects; - - enum SSAOMode { - SSAO_GATHER, - SSAO_GATHER_BASE, - SSAO_GATHER_ADAPTIVE, - SSAO_GENERATE_IMPORTANCE_MAP, - SSAO_PROCESS_IMPORTANCE_MAPA, - SSAO_PROCESS_IMPORTANCE_MAPB, - SSAO_BLUR_PASS, - SSAO_BLUR_PASS_SMART, - SSAO_BLUR_PASS_WIDE, - SSAO_INTERLEAVE, - SSAO_INTERLEAVE_SMART, - SSAO_INTERLEAVE_HALF, - SSAO_MAX - }; - - struct SSAOGatherPushConstant { - int32_t screen_size[2]; - int pass; - int quality; - - float half_screen_pixel_size[2]; - int size_multiplier; - float detail_intensity; - - float NDC_to_view_mul[2]; - float NDC_to_view_add[2]; - - float pad[2]; - float half_screen_pixel_size_x025[2]; - - float radius; - float intensity; - float shadow_power; - float shadow_clamp; - - float fade_out_mul; - float fade_out_add; - float horizon_angle_threshold; - float inv_radius_near_limit; - - uint32_t is_orthogonal; - float neg_inv_radius; - float load_counter_avg_div; - float adaptive_sample_limit; - - int32_t pass_coord_offset[2]; - float pass_uv_offset[2]; - }; - - struct SSAOImportanceMapPushConstant { - float half_screen_pixel_size[2]; - float intensity; - float power; - }; - - struct SSAOBlurPushConstant { - float edge_sharpness; - float pad; - float half_screen_pixel_size[2]; - }; - - struct SSAOInterleavePushConstant { - float inv_sharpness; - uint32_t size_modifier; - float pixel_size[2]; - }; - - struct SSAO { - SSAOGatherPushConstant gather_push_constant; - SsaoShaderRD gather_shader; - RID gather_shader_version; - - SSAOImportanceMapPushConstant importance_map_push_constant; - SsaoImportanceMapShaderRD importance_map_shader; - RID importance_map_shader_version; - RID importance_map_load_counter; - RID counter_uniform_set; - - SSAOBlurPushConstant blur_push_constant; - SsaoBlurShaderRD blur_shader; - RID blur_shader_version; - - SSAOInterleavePushConstant interleave_push_constant; - SsaoInterleaveShaderRD interleave_shader; - RID interleave_shader_version; - - RID pipelines[SSAO_MAX]; - } ssao; - - enum SSILMode { - SSIL_GATHER, - SSIL_GATHER_BASE, - SSIL_GATHER_ADAPTIVE, - SSIL_GENERATE_IMPORTANCE_MAP, - SSIL_PROCESS_IMPORTANCE_MAPA, - SSIL_PROCESS_IMPORTANCE_MAPB, - SSIL_BLUR_PASS, - SSIL_BLUR_PASS_SMART, - SSIL_BLUR_PASS_WIDE, - SSIL_INTERLEAVE, - SSIL_INTERLEAVE_SMART, - SSIL_INTERLEAVE_HALF, - SSIL_MAX - }; - - struct SSILGatherPushConstant { - int32_t screen_size[2]; - int pass; - int quality; - - float half_screen_pixel_size[2]; - float half_screen_pixel_size_x025[2]; - - float NDC_to_view_mul[2]; - float NDC_to_view_add[2]; - - float pad2[2]; - float z_near; - float z_far; - - float radius; - float intensity; - int size_multiplier; - int pad; - - float fade_out_mul; - float fade_out_add; - float normal_rejection_amount; - float inv_radius_near_limit; - - uint32_t is_orthogonal; - float neg_inv_radius; - float load_counter_avg_div; - float adaptive_sample_limit; - - int32_t pass_coord_offset[2]; - float pass_uv_offset[2]; - }; - - struct SSILImportanceMapPushConstant { - float half_screen_pixel_size[2]; - float intensity; - float pad; - }; - - struct SSILBlurPushConstant { - float edge_sharpness; - float pad; - float half_screen_pixel_size[2]; - }; - - struct SSILInterleavePushConstant { - float inv_sharpness; - uint32_t size_modifier; - float pixel_size[2]; - }; - - struct SSILProjectionUniforms { - float inv_last_frame_projection_matrix[16]; - }; - - struct SSIL { - SSILGatherPushConstant gather_push_constant; - SsilShaderRD gather_shader; - RID gather_shader_version; - RID projection_uniform_buffer; - - SSILImportanceMapPushConstant importance_map_push_constant; - SsilImportanceMapShaderRD importance_map_shader; - RID importance_map_shader_version; - RID importance_map_load_counter; - RID counter_uniform_set; - - SSILBlurPushConstant blur_push_constant; - SsilBlurShaderRD blur_shader; - RID blur_shader_version; - - SSILInterleavePushConstant interleave_push_constant; - SsilInterleaveShaderRD interleave_shader; - RID interleave_shader_version; - - RID pipelines[SSIL_MAX]; - } ssil; - struct RoughnessLimiterPushConstant { int32_t screen_size[2]; float curve; @@ -381,101 +143,6 @@ private: } roughness_limiter; - enum SpecularMergeMode { - SPECULAR_MERGE_ADD, - SPECULAR_MERGE_SSR, - SPECULAR_MERGE_ADDITIVE_ADD, - SPECULAR_MERGE_ADDITIVE_SSR, - SPECULAR_MERGE_MAX - }; - - /* Specular merge must be done using raster, rather than compute - * because it must continue the existing color buffer - */ - - struct SpecularMerge { - SpecularMergeShaderRD shader; - RID shader_version; - PipelineCacheRD pipelines[SPECULAR_MERGE_MAX]; - - } specular_merge; - - enum ScreenSpaceReflectionMode { - SCREEN_SPACE_REFLECTION_NORMAL, - SCREEN_SPACE_REFLECTION_ROUGH, - SCREEN_SPACE_REFLECTION_MAX, - }; - - struct ScreenSpaceReflectionPushConstant { - float proj_info[4]; - - int32_t screen_size[2]; - float camera_z_near; - float camera_z_far; - - int32_t num_steps; - float depth_tolerance; - float distance_fade; - float curve_fade_in; - - uint32_t orthogonal; - float filter_mipmap_levels; - uint32_t use_half_res; - uint8_t metallic_mask[4]; - - float projection[16]; - }; - - struct ScreenSpaceReflection { - ScreenSpaceReflectionPushConstant push_constant; - ScreenSpaceReflectionShaderRD shader; - RID shader_version; - RID pipelines[SCREEN_SPACE_REFLECTION_MAX]; - - } ssr; - - struct ScreenSpaceReflectionFilterPushConstant { - float proj_info[4]; - - uint32_t orthogonal; - float edge_tolerance; - int32_t increment; - uint32_t pad; - - int32_t screen_size[2]; - uint32_t vertical; - uint32_t steps; - }; - enum { - SCREEN_SPACE_REFLECTION_FILTER_HORIZONTAL, - SCREEN_SPACE_REFLECTION_FILTER_VERTICAL, - SCREEN_SPACE_REFLECTION_FILTER_MAX, - }; - - struct ScreenSpaceReflectionFilter { - ScreenSpaceReflectionFilterPushConstant push_constant; - ScreenSpaceReflectionFilterShaderRD shader; - RID shader_version; - RID pipelines[SCREEN_SPACE_REFLECTION_FILTER_MAX]; - } ssr_filter; - - struct ScreenSpaceReflectionScalePushConstant { - int32_t screen_size[2]; - float camera_z_near; - float camera_z_far; - - uint32_t orthogonal; - uint32_t filter; - uint32_t pad[2]; - }; - - struct ScreenSpaceReflectionScale { - ScreenSpaceReflectionScalePushConstant push_constant; - ScreenSpaceReflectionScaleShaderRD shader; - RID shader_version; - RID pipeline; - } ssr_scale; - struct SubSurfaceScatteringPushConstant { int32_t screen_size[2]; float camera_z_far; @@ -559,9 +226,6 @@ private: RID _get_uniform_set_from_image(RID p_texture); RID _get_uniform_set_from_texture(RID p_texture, bool p_use_mipmaps = false); RID _get_compute_uniform_set_from_texture(RID p_texture, bool p_use_mipmaps = false); - RID _get_compute_uniform_set_from_texture_and_sampler(RID p_texture, RID p_sampler); - RID _get_compute_uniform_set_from_texture_pair(RID p_texture, RID p_texture2, bool p_use_mipmaps = false); - RID _get_compute_uniform_set_from_image_pair(RID p_texture, RID p_texture2); public: bool get_prefer_raster_effects(); @@ -572,56 +236,8 @@ public: void luminance_reduction(RID p_source_texture, const Size2i p_source_size, const Vector p_reduce, RID p_prev_luminance, float p_min_luminance, float p_max_luminance, float p_adjust, bool p_set = false); void luminance_reduction_raster(RID p_source_texture, const Size2i p_source_size, const Vector p_reduce, Vector p_fb, RID p_prev_luminance, float p_min_luminance, float p_max_luminance, float p_adjust, bool p_set = false); - struct SSAOSettings { - float radius = 1.0; - float intensity = 2.0; - float power = 1.5; - float detail = 0.5; - float horizon = 0.06; - float sharpness = 0.98; - - RS::EnvironmentSSAOQuality quality = RS::ENV_SSAO_QUALITY_MEDIUM; - bool half_size = false; - float adaptive_target = 0.5; - int blur_passes = 2; - float fadeout_from = 50.0; - float fadeout_to = 300.0; - - Size2i full_screen_size = Size2i(); - Size2i half_screen_size = Size2i(); - Size2i quarter_screen_size = Size2i(); - }; - - struct SSILSettings { - float radius = 1.0; - float intensity = 2.0; - float sharpness = 0.98; - float normal_rejection = 1.0; - - RS::EnvironmentSSILQuality quality = RS::ENV_SSIL_QUALITY_MEDIUM; - bool half_size = true; - float adaptive_target = 0.5; - int blur_passes = 4; - float fadeout_from = 50.0; - float fadeout_to = 300.0; - - Size2i full_screen_size = Size2i(); - Size2i half_screen_size = Size2i(); - Size2i quarter_screen_size = Size2i(); - }; - - void downsample_depth(RID p_depth_buffer, const Vector &p_depth_mipmaps, RS::EnvironmentSSAOQuality p_ssao_quality, RS::EnvironmentSSILQuality p_ssil_quality, bool p_invalidate_uniform_set, bool p_ssao_half_size, bool p_ssil_half_size, Size2i p_full_screen_size, const CameraMatrix &p_projection); - - void gather_ssao(RD::ComputeListID p_compute_list, const Vector p_ao_slices, const SSAOSettings &p_settings, bool p_adaptive_base_pass, RID p_gather_uniform_set, RID p_importance_map_uniform_set); - void generate_ssao(RID p_normal_buffer, RID p_depth_mipmaps_texture, RID p_ao, const Vector p_ao_slices, RID p_ao_pong, const Vector p_ao_pong_slices, RID p_upscale_buffer, RID p_importance_map, RID p_importance_map_pong, const CameraMatrix &p_projection, const SSAOSettings &p_settings, bool p_invalidate_uniform_sets, RID &r_gather_uniform_set, RID &r_importance_map_uniform_set); - - void gather_ssil(RD::ComputeListID p_compute_list, const Vector p_ssil_slices, const Vector p_edges_slices, const SSILSettings &p_settings, bool p_adaptive_base_pass, RID p_gather_uniform_set, RID p_importance_map_uniform_set, RID p_projection_uniform_set); - void screen_space_indirect_lighting(RID p_diffuse, RID p_destination, RID p_normal_buffer, RID p_depth_mipmaps_texture, RID p_ao, const Vector p_ao_slices, RID p_ao_pong, const Vector p_ao_pong_slices, RID p_importance_map, RID p_importance_map_pong, RID p_edges, const Vector p_edges_slices, const CameraMatrix &p_projection, const CameraMatrix &p_last_projection, const SSILSettings &p_settings, bool p_invalidate_uniform_sets, RID &r_gather_uniform_set, RID &r_importance_map_uniform_set, RID &r_projection_uniform_set); - void roughness_limit(RID p_source_normal, RID p_roughness, const Size2i &p_size, float p_curve); - void screen_space_reflection(RID p_diffuse, RID p_normal_roughness, RS::EnvironmentSSRRoughnessQuality p_roughness_quality, RID p_blur_radius, RID p_blur_radius2, RID p_metallic, const Color &p_metallic_mask, RID p_depth, RID p_scale_depth, RID p_scale_normal, RID p_output, RID p_output_blur, const Size2i &p_screen_size, int p_max_steps, float p_fade_in, float p_fade_out, float p_tolerance, const CameraMatrix &p_camera); - void merge_specular(RID p_dest_framebuffer, RID p_specular, RID p_base, RID p_reflection); void sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_depth, const CameraMatrix &p_camera, const Size2i &p_screen_size, float p_scale, float p_depth_scale, RS::SubSurfaceScatteringQuality p_quality); void sort_buffer(RID p_uniform_set, int p_size); diff --git a/servers/rendering/renderer_rd/environment/gi.h b/servers/rendering/renderer_rd/environment/gi.h index d950ff9e86..b841c427d1 100644 --- a/servers/rendering/renderer_rd/environment/gi.h +++ b/servers/rendering/renderer_rd/environment/gi.h @@ -755,7 +755,7 @@ public: SHADER_SPECIALIZATION_HALF_RES = 1 << 0, SHADER_SPECIALIZATION_USE_FULL_PROJECTION_MATRIX = 1 << 1, SHADER_SPECIALIZATION_USE_VRS = 1 << 2, - SHADER_SPECIALIZATION_VARIATIONS = 0x07, + SHADER_SPECIALIZATION_VARIATIONS = 8, }; RID default_voxel_gi_buffer; diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp index 85652a041d..d6613a60cc 100644 --- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp +++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp @@ -66,6 +66,13 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_specular() } specular = RD::get_singleton()->texture_create(tf, RD::TextureView()); + if (view_count == 1) { + specular_views[0] = specular; + } else { + for (uint32_t v = 0; v < view_count; v++) { + specular_views[v] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), specular, v, 0); + } + } if (msaa == RS::VIEWPORT_MSAA_DISABLED) { { @@ -80,6 +87,14 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_specular() tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; specular_msaa = RD::get_singleton()->texture_create(tf, RD::TextureView()); + if (view_count == 1) { + specular_msaa_views[0] = specular_msaa; + } else { + for (uint32_t v = 0; v < view_count; v++) { + specular_msaa_views[v] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), specular_msaa, v, 0); + } + } + { Vector fb; fb.push_back(specular_msaa); @@ -175,6 +190,8 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::clear() { for (uint32_t v = 0; v < RendererSceneRender::MAX_RENDER_VIEWS; v++) { color_views[v] = RID(); depth_views[v] = RID(); + specular_views[v] = RID(); + specular_msaa_views[v] = RID(); color_msaa_views[v] = RID(); depth_msaa_views[v] = RID(); normal_roughness_views[v] = RID(); @@ -1749,9 +1766,10 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co for (uint32_t v = 0; v < render_buffer->view_count; v++) { RD::get_singleton()->texture_resolve_multisample(render_buffer->color_msaa_views[v], render_buffer->color_views[v]); } - // TODO mame this do multiview if (using_separate_specular) { - RD::get_singleton()->texture_resolve_multisample(render_buffer->specular_msaa, render_buffer->specular); + for (uint32_t v = 0; v < render_buffer->view_count; v++) { + RD::get_singleton()->texture_resolve_multisample(render_buffer->specular_msaa_views[v], render_buffer->specular_views[v]); + } } } @@ -1772,12 +1790,12 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co if (using_ssr) { RENDER_TIMESTAMP("Screen-Space Reflections"); RD::get_singleton()->draw_command_begin_label("Process Screen-Space Reflections"); - _process_ssr(p_render_data->render_buffers, color_only_framebuffer, render_buffer->normal_roughness_buffer, render_buffer->specular, render_buffer->specular, Color(0, 0, 0, 1), p_render_data->environment, p_render_data->cam_projection, render_buffer->msaa == RS::VIEWPORT_MSAA_DISABLED); + _process_ssr(p_render_data->render_buffers, color_only_framebuffer, render_buffer->normal_roughness_views, render_buffer->specular, render_buffer->specular_views, Color(0, 0, 0, 1), p_render_data->environment, p_render_data->view_projection, p_render_data->view_eye_offset, render_buffer->msaa == RS::VIEWPORT_MSAA_DISABLED); RD::get_singleton()->draw_command_end_label(); } else { //just mix specular back RENDER_TIMESTAMP("Merge Specular"); - RendererCompositorRD::singleton->get_effects()->merge_specular(color_only_framebuffer, render_buffer->specular, render_buffer->msaa == RS::VIEWPORT_MSAA_DISABLED ? RID() : render_buffer->color, RID()); + copy_effects->merge_specular(color_only_framebuffer, render_buffer->specular, render_buffer->msaa == RS::VIEWPORT_MSAA_DISABLED ? RID() : render_buffer->color, RID(), p_render_data->view_count); } } diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h index ff712a20a1..6429def4f9 100644 --- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h +++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h @@ -117,6 +117,8 @@ class RenderForwardClustered : public RendererSceneRenderRD { uint32_t view_count = 1; RID color_views[RendererSceneRender::MAX_RENDER_VIEWS]; // we should rewrite this so we get access to the existing views in our renderer, something we can address when we reorg this RID depth_views[RendererSceneRender::MAX_RENDER_VIEWS]; // we should rewrite this so we get access to the existing views in our renderer, something we can address when we reorg this + RID specular_views[RendererSceneRender::MAX_RENDER_VIEWS]; + RID specular_msaa_views[RendererSceneRender::MAX_RENDER_VIEWS]; RID color_msaa_views[RendererSceneRender::MAX_RENDER_VIEWS]; RID depth_msaa_views[RendererSceneRender::MAX_RENDER_VIEWS]; RID normal_roughness_views[RendererSceneRender::MAX_RENDER_VIEWS]; diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp index a2a0538e04..c83f066b52 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp @@ -1891,60 +1891,9 @@ void RendererSceneRenderRD::_free_render_buffer_data(RenderBuffers *rb) { rb->ss_effects.linear_depth_slices.clear(); } - if (rb->ss_effects.ssao.ao_final.is_valid()) { - RD::get_singleton()->free(rb->ss_effects.ssao.ao_deinterleaved); - RD::get_singleton()->free(rb->ss_effects.ssao.ao_pong); - RD::get_singleton()->free(rb->ss_effects.ssao.ao_final); - - RD::get_singleton()->free(rb->ss_effects.ssao.importance_map[0]); - RD::get_singleton()->free(rb->ss_effects.ssao.importance_map[1]); - - rb->ss_effects.ssao.ao_deinterleaved = RID(); - rb->ss_effects.ssao.ao_pong = RID(); - rb->ss_effects.ssao.ao_final = RID(); - rb->ss_effects.ssao.importance_map[0] = RID(); - rb->ss_effects.ssao.importance_map[1] = RID(); - - rb->ss_effects.ssao.ao_deinterleaved_slices.clear(); - rb->ss_effects.ssao.ao_pong_slices.clear(); - } - - if (rb->ss_effects.ssil.ssil_final.is_valid()) { - RD::get_singleton()->free(rb->ss_effects.ssil.ssil_final); - RD::get_singleton()->free(rb->ss_effects.ssil.deinterleaved); - RD::get_singleton()->free(rb->ss_effects.ssil.pong); - RD::get_singleton()->free(rb->ss_effects.ssil.edges); - RD::get_singleton()->free(rb->ss_effects.ssil.importance_map[0]); - RD::get_singleton()->free(rb->ss_effects.ssil.importance_map[1]); - - rb->ss_effects.ssil.ssil_final = RID(); - rb->ss_effects.ssil.deinterleaved = RID(); - rb->ss_effects.ssil.pong = RID(); - rb->ss_effects.ssil.edges = RID(); - rb->ss_effects.ssil.deinterleaved_slices.clear(); - rb->ss_effects.ssil.pong_slices.clear(); - rb->ss_effects.ssil.edges_slices.clear(); - rb->ss_effects.ssil.importance_map[0] = RID(); - rb->ss_effects.ssil.importance_map[1] = RID(); - - RD::get_singleton()->free(rb->ss_effects.last_frame); - rb->ss_effects.last_frame = RID(); - rb->ss_effects.last_frame_slices.clear(); - } - - if (rb->ssr.blur_radius[0].is_valid()) { - RD::get_singleton()->free(rb->ssr.blur_radius[0]); - RD::get_singleton()->free(rb->ssr.blur_radius[1]); - rb->ssr.blur_radius[0] = RID(); - rb->ssr.blur_radius[1] = RID(); - } - - if (rb->ssr.depth_scaled.is_valid()) { - RD::get_singleton()->free(rb->ssr.depth_scaled); - rb->ssr.depth_scaled = RID(); - RD::get_singleton()->free(rb->ssr.normal_scaled); - rb->ssr.normal_scaled = RID(); - } + ss_effects->ssao_free(rb->ss_effects.ssao); + ss_effects->ssil_free(rb->ss_effects.ssil); + ss_effects->ssr_free(rb->ssr); if (rb->taa.history.is_valid()) { RD::get_singleton()->free(rb->taa.history); @@ -1982,7 +1931,9 @@ void RendererSceneRenderRD::_process_sss(RID p_render_buffers, const CameraMatri RendererCompositorRD::singleton->get_effects()->sub_surface_scattering(rb->internal_texture, rb->sss_texture, rb->depth_texture, p_camera, Size2i(rb->internal_width, rb->internal_height), sss_scale, sss_depth_scale, sss_quality); } -void RendererSceneRenderRD::_process_ssr(RID p_render_buffers, RID p_dest_framebuffer, RID p_normal_buffer, RID p_specular_buffer, RID p_metallic, const Color &p_metallic_mask, RID p_environment, const CameraMatrix &p_projection, bool p_use_additive) { +void RendererSceneRenderRD::_process_ssr(RID p_render_buffers, RID p_dest_framebuffer, const RID *p_normal_slices, RID p_specular_buffer, const RID *p_metallic_slices, const Color &p_metallic_mask, RID p_environment, const CameraMatrix *p_projections, const Vector3 *p_eye_offsets, bool p_use_additive) { + ERR_FAIL_NULL(ss_effects); + RenderBuffers *rb = render_buffers_owner.get_or_null(p_render_buffers); ERR_FAIL_COND(!rb); @@ -1990,7 +1941,7 @@ void RendererSceneRenderRD::_process_ssr(RID p_render_buffers, RID p_dest_frameb if (!can_use_effects) { //just copy - RendererCompositorRD::singleton->get_effects()->merge_specular(p_dest_framebuffer, p_specular_buffer, p_use_additive ? RID() : rb->internal_texture, RID()); + copy_effects->merge_specular(p_dest_framebuffer, p_specular_buffer, p_use_additive ? RID() : rb->internal_texture, RID(), rb->view_count); return; } @@ -1999,42 +1950,23 @@ void RendererSceneRenderRD::_process_ssr(RID p_render_buffers, RID p_dest_frameb ERR_FAIL_COND(!env->ssr_enabled); - if (rb->ssr.depth_scaled.is_null()) { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R32_SFLOAT; - tf.width = rb->internal_width / 2; - tf.height = rb->internal_height / 2; - tf.texture_type = RD::TEXTURE_TYPE_2D; - tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT; - - rb->ssr.depth_scaled = RD::get_singleton()->texture_create(tf, RD::TextureView()); - - tf.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; - - rb->ssr.normal_scaled = RD::get_singleton()->texture_create(tf, RD::TextureView()); + Size2i half_size = Size2i(rb->internal_width / 2, rb->internal_height / 2); + if (rb->ssr.output.is_null()) { + ss_effects->ssr_allocate_buffers(rb->ssr, _render_buffers_get_color_format(), ssr_roughness_quality, half_size, rb->view_count); } - - if (ssr_roughness_quality != RS::ENV_SSR_ROUGHNESS_QUALITY_DISABLED && !rb->ssr.blur_radius[0].is_valid()) { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R8_UNORM; - tf.width = rb->internal_width / 2; - tf.height = rb->internal_height / 2; - tf.texture_type = RD::TEXTURE_TYPE_2D; - tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT; - - rb->ssr.blur_radius[0] = RD::get_singleton()->texture_create(tf, RD::TextureView()); - rb->ssr.blur_radius[1] = RD::get_singleton()->texture_create(tf, RD::TextureView()); - } - - if (rb->blur[0].texture.is_null()) { - _allocate_blur_textures(rb); + RID texture_slices[RendererSceneRender::MAX_RENDER_VIEWS]; + RID depth_slices[RendererSceneRender::MAX_RENDER_VIEWS]; + for (uint32_t v = 0; v < rb->view_count; v++) { + texture_slices[v] = rb->views[v].view_texture; + depth_slices[v] = rb->views[v].view_depth; } - - RendererCompositorRD::singleton->get_effects()->screen_space_reflection(rb->internal_texture, p_normal_buffer, ssr_roughness_quality, rb->ssr.blur_radius[0], rb->ssr.blur_radius[1], p_metallic, p_metallic_mask, rb->depth_texture, rb->ssr.depth_scaled, rb->ssr.normal_scaled, rb->blur[0].layers[0].mipmaps[1].texture, rb->blur[1].layers[0].mipmaps[0].texture, Size2i(rb->internal_width / 2, rb->internal_height / 2), env->ssr_max_steps, env->ssr_fade_in, env->ssr_fade_out, env->ssr_depth_tolerance, p_projection); - RendererCompositorRD::singleton->get_effects()->merge_specular(p_dest_framebuffer, p_specular_buffer, p_use_additive ? RID() : rb->internal_texture, rb->blur[0].layers[0].mipmaps[1].texture); + ss_effects->screen_space_reflection(rb->ssr, texture_slices, p_normal_slices, ssr_roughness_quality, p_metallic_slices, p_metallic_mask, depth_slices, half_size, env->ssr_max_steps, env->ssr_fade_in, env->ssr_fade_out, env->ssr_depth_tolerance, rb->view_count, p_projections, p_eye_offsets); + copy_effects->merge_specular(p_dest_framebuffer, p_specular_buffer, p_use_additive ? RID() : rb->internal_texture, rb->ssr.output, rb->view_count); } void RendererSceneRenderRD::_process_ssao(RID p_render_buffers, RID p_environment, RID p_normal_buffer, const CameraMatrix &p_projection) { + ERR_FAIL_NULL(ss_effects); + RenderBuffers *rb = render_buffers_owner.get_or_null(p_render_buffers); ERR_FAIL_COND(!rb); @@ -2043,102 +1975,7 @@ void RendererSceneRenderRD::_process_ssao(RID p_render_buffers, RID p_environmen RENDER_TIMESTAMP("Process SSAO"); - if (rb->ss_effects.ssao.ao_final.is_valid() && ssao_using_half_size != ssao_half_size) { - RD::get_singleton()->free(rb->ss_effects.ssao.ao_deinterleaved); - RD::get_singleton()->free(rb->ss_effects.ssao.ao_pong); - RD::get_singleton()->free(rb->ss_effects.ssao.ao_final); - - RD::get_singleton()->free(rb->ss_effects.ssao.importance_map[0]); - RD::get_singleton()->free(rb->ss_effects.ssao.importance_map[1]); - - rb->ss_effects.ssao.ao_deinterleaved = RID(); - rb->ss_effects.ssao.ao_pong = RID(); - rb->ss_effects.ssao.ao_final = RID(); - rb->ss_effects.ssao.importance_map[0] = RID(); - rb->ss_effects.ssao.importance_map[1] = RID(); - rb->ss_effects.ssao.ao_deinterleaved_slices.clear(); - rb->ss_effects.ssao.ao_pong_slices.clear(); - } - - int buffer_width; - int buffer_height; - int half_width; - int half_height; - if (ssao_half_size) { - buffer_width = (rb->internal_width + 3) / 4; - buffer_height = (rb->internal_height + 3) / 4; - half_width = (rb->internal_width + 7) / 8; - half_height = (rb->internal_height + 7) / 8; - } else { - buffer_width = (rb->internal_width + 1) / 2; - buffer_height = (rb->internal_height + 1) / 2; - half_width = (rb->internal_width + 3) / 4; - half_height = (rb->internal_height + 3) / 4; - } - bool uniform_sets_are_invalid = false; - if (rb->ss_effects.ssao.ao_deinterleaved.is_null()) { - { - rb->ss_effects.ssao.depth_texture_view = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->ss_effects.linear_depth, 0, ssao_half_size ? 1 : 0, 4, RD::TEXTURE_SLICE_2D_ARRAY); - } - { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R8G8_UNORM; - tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; - tf.width = buffer_width; - tf.height = buffer_height; - tf.array_layers = 4; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - rb->ss_effects.ssao.ao_deinterleaved = RD::get_singleton()->texture_create(tf, RD::TextureView()); - RD::get_singleton()->set_resource_name(rb->ss_effects.ssao.ao_deinterleaved, "SSAO De-interleaved Array"); - for (uint32_t i = 0; i < 4; i++) { - RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->ss_effects.ssao.ao_deinterleaved, i, 0); - rb->ss_effects.ssao.ao_deinterleaved_slices.push_back(slice); - RD::get_singleton()->set_resource_name(slice, "SSAO De-interleaved Array Layer " + itos(i) + " "); - } - } - - { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R8G8_UNORM; - tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; - tf.width = buffer_width; - tf.height = buffer_height; - tf.array_layers = 4; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - rb->ss_effects.ssao.ao_pong = RD::get_singleton()->texture_create(tf, RD::TextureView()); - RD::get_singleton()->set_resource_name(rb->ss_effects.ssao.ao_pong, "SSAO De-interleaved Array Pong"); - for (uint32_t i = 0; i < 4; i++) { - RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->ss_effects.ssao.ao_pong, i, 0); - rb->ss_effects.ssao.ao_pong_slices.push_back(slice); - RD::get_singleton()->set_resource_name(slice, "SSAO De-interleaved Array Layer " + itos(i) + " Pong"); - } - } - - { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R8_UNORM; - tf.width = half_width; - tf.height = half_height; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - rb->ss_effects.ssao.importance_map[0] = RD::get_singleton()->texture_create(tf, RD::TextureView()); - RD::get_singleton()->set_resource_name(rb->ss_effects.ssao.importance_map[0], "SSAO Importance Map"); - rb->ss_effects.ssao.importance_map[1] = RD::get_singleton()->texture_create(tf, RD::TextureView()); - RD::get_singleton()->set_resource_name(rb->ss_effects.ssao.importance_map[1], "SSAO Importance Map Pong"); - } - { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R8_UNORM; - tf.width = rb->internal_width; - tf.height = rb->internal_height; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - rb->ss_effects.ssao.ao_final = RD::get_singleton()->texture_create(tf, RD::TextureView()); - RD::get_singleton()->set_resource_name(rb->ss_effects.ssao.ao_final, "SSAO Final"); - } - ssao_using_half_size = ssao_half_size; - uniform_sets_are_invalid = true; - } - - EffectsRD::SSAOSettings settings; + RendererRD::SSEffects::SSAOSettings settings; settings.radius = env->ssao_radius; settings.intensity = env->ssao_intensity; settings.power = env->ssao_power; @@ -2153,13 +1990,14 @@ void RendererSceneRenderRD::_process_ssao(RID p_render_buffers, RID p_environmen settings.fadeout_from = ssao_fadeout_from; settings.fadeout_to = ssao_fadeout_to; settings.full_screen_size = Size2i(rb->internal_width, rb->internal_height); - settings.half_screen_size = Size2i(buffer_width, buffer_height); - settings.quarter_screen_size = Size2i(half_width, half_height); - RendererCompositorRD::singleton->get_effects()->generate_ssao(p_normal_buffer, rb->ss_effects.ssao.depth_texture_view, rb->ss_effects.ssao.ao_deinterleaved, rb->ss_effects.ssao.ao_deinterleaved_slices, rb->ss_effects.ssao.ao_pong, rb->ss_effects.ssao.ao_pong_slices, rb->ss_effects.ssao.ao_final, rb->ss_effects.ssao.importance_map[0], rb->ss_effects.ssao.importance_map[1], p_projection, settings, uniform_sets_are_invalid, rb->ss_effects.ssao.gather_uniform_set, rb->ss_effects.ssao.importance_map_uniform_set); + ss_effects->ssao_allocate_buffers(rb->ss_effects.ssao, settings, rb->ss_effects.linear_depth); + ss_effects->generate_ssao(rb->ss_effects.ssao, p_normal_buffer, p_projection, settings); } void RendererSceneRenderRD::_process_ssil(RID p_render_buffers, RID p_environment, RID p_normal_buffer, const CameraMatrix &p_projection, const Transform3D &p_transform) { + ERR_FAIL_NULL(ss_effects); + RenderBuffers *rb = render_buffers_owner.get_or_null(p_render_buffers); ERR_FAIL_COND(!rb); @@ -2168,133 +2006,7 @@ void RendererSceneRenderRD::_process_ssil(RID p_render_buffers, RID p_environmen RENDER_TIMESTAMP("Process SSIL"); - if (rb->ss_effects.ssil.ssil_final.is_valid() && ssil_using_half_size != ssil_half_size) { - RD::get_singleton()->free(rb->ss_effects.ssil.ssil_final); - RD::get_singleton()->free(rb->ss_effects.ssil.deinterleaved); - RD::get_singleton()->free(rb->ss_effects.ssil.pong); - RD::get_singleton()->free(rb->ss_effects.ssil.edges); - RD::get_singleton()->free(rb->ss_effects.ssil.importance_map[0]); - RD::get_singleton()->free(rb->ss_effects.ssil.importance_map[1]); - - rb->ss_effects.ssil.ssil_final = RID(); - rb->ss_effects.ssil.deinterleaved = RID(); - rb->ss_effects.ssil.pong = RID(); - rb->ss_effects.ssil.edges = RID(); - rb->ss_effects.ssil.deinterleaved_slices.clear(); - rb->ss_effects.ssil.pong_slices.clear(); - rb->ss_effects.ssil.edges_slices.clear(); - rb->ss_effects.ssil.importance_map[0] = RID(); - rb->ss_effects.ssil.importance_map[1] = RID(); - } - - int buffer_width; - int buffer_height; - int half_width; - int half_height; - if (ssil_half_size) { - buffer_width = (rb->width + 3) / 4; - buffer_height = (rb->height + 3) / 4; - half_width = (rb->width + 7) / 8; - half_height = (rb->height + 7) / 8; - } else { - buffer_width = (rb->width + 1) / 2; - buffer_height = (rb->height + 1) / 2; - half_width = (rb->width + 3) / 4; - half_height = (rb->height + 3) / 4; - } - bool uniform_sets_are_invalid = false; - if (rb->ss_effects.ssil.ssil_final.is_null()) { - { - rb->ss_effects.ssil.depth_texture_view = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->ss_effects.linear_depth, 0, ssil_half_size ? 1 : 0, 4, RD::TEXTURE_SLICE_2D_ARRAY); - } - { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; - tf.width = rb->width; - tf.height = rb->height; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; - rb->ss_effects.ssil.ssil_final = RD::get_singleton()->texture_create(tf, RD::TextureView()); - RD::get_singleton()->set_resource_name(rb->ss_effects.ssil.ssil_final, "SSIL texture"); - RD::get_singleton()->texture_clear(rb->ss_effects.ssil.ssil_final, Color(0, 0, 0, 0), 0, 1, 0, 1); - if (rb->ss_effects.last_frame.is_null()) { - tf.mipmaps = 6; - rb->ss_effects.last_frame = RD::get_singleton()->texture_create(tf, RD::TextureView()); - RD::get_singleton()->set_resource_name(rb->ss_effects.last_frame, "Last Frame Radiance"); - RD::get_singleton()->texture_clear(rb->ss_effects.last_frame, Color(0, 0, 0, 0), 0, tf.mipmaps, 0, 1); - for (uint32_t i = 0; i < 6; i++) { - RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->ss_effects.last_frame, 0, i); - rb->ss_effects.last_frame_slices.push_back(slice); - RD::get_singleton()->set_resource_name(slice, "Last Frame Radiance Mip " + itos(i) + " "); - } - } - } - { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; - tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; - tf.width = buffer_width; - tf.height = buffer_height; - tf.array_layers = 4; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - rb->ss_effects.ssil.deinterleaved = RD::get_singleton()->texture_create(tf, RD::TextureView()); - RD::get_singleton()->set_resource_name(rb->ss_effects.ssil.deinterleaved, "SSIL deinterleaved buffer"); - for (uint32_t i = 0; i < 4; i++) { - RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->ss_effects.ssil.deinterleaved, i, 0); - rb->ss_effects.ssil.deinterleaved_slices.push_back(slice); - RD::get_singleton()->set_resource_name(slice, "SSIL deinterleaved buffer array " + itos(i) + " "); - } - } - - { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; - tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; - tf.width = buffer_width; - tf.height = buffer_height; - tf.array_layers = 4; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - rb->ss_effects.ssil.pong = RD::get_singleton()->texture_create(tf, RD::TextureView()); - RD::get_singleton()->set_resource_name(rb->ss_effects.ssil.pong, "SSIL deinterleaved pong buffer"); - for (uint32_t i = 0; i < 4; i++) { - RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->ss_effects.ssil.pong, i, 0); - rb->ss_effects.ssil.pong_slices.push_back(slice); - RD::get_singleton()->set_resource_name(slice, "SSIL deinterleaved buffer pong array " + itos(i) + " "); - } - } - - { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R8_UNORM; - tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; - tf.width = buffer_width; - tf.height = buffer_height; - tf.array_layers = 4; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - rb->ss_effects.ssil.edges = RD::get_singleton()->texture_create(tf, RD::TextureView()); - RD::get_singleton()->set_resource_name(rb->ss_effects.ssil.edges, "SSIL edges buffer"); - for (uint32_t i = 0; i < 4; i++) { - RID slice = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->ss_effects.ssil.edges, i, 0); - rb->ss_effects.ssil.edges_slices.push_back(slice); - RD::get_singleton()->set_resource_name(slice, "SSIL edges buffer slice " + itos(i) + " "); - } - } - - { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R8_UNORM; - tf.width = half_width; - tf.height = half_height; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - rb->ss_effects.ssil.importance_map[0] = RD::get_singleton()->texture_create(tf, RD::TextureView()); - RD::get_singleton()->set_resource_name(rb->ss_effects.ssil.importance_map[0], "SSIL Importance Map"); - rb->ss_effects.ssil.importance_map[1] = RD::get_singleton()->texture_create(tf, RD::TextureView()); - RD::get_singleton()->set_resource_name(rb->ss_effects.ssil.importance_map[1], "SSIL Importance Map Pong"); - } - uniform_sets_are_invalid = true; - ssil_using_half_size = ssil_half_size; - } - - EffectsRD::SSILSettings settings; + RendererRD::SSEffects::SSILSettings settings; settings.radius = env->ssil_radius; settings.intensity = env->ssil_intensity; settings.sharpness = env->ssil_sharpness; @@ -2307,8 +2019,6 @@ void RendererSceneRenderRD::_process_ssil(RID p_render_buffers, RID p_environmen settings.fadeout_from = ssil_fadeout_from; settings.fadeout_to = ssil_fadeout_to; settings.full_screen_size = Size2i(rb->width, rb->height); - settings.half_screen_size = Size2i(buffer_width, buffer_height); - settings.quarter_screen_size = Size2i(half_width, half_height); CameraMatrix correction; correction.set_depth_correction(true); @@ -2317,7 +2027,8 @@ void RendererSceneRenderRD::_process_ssil(RID p_render_buffers, RID p_environmen transform.set_origin(Vector3(0.0, 0.0, 0.0)); CameraMatrix last_frame_projection = rb->ss_effects.last_frame_projection * CameraMatrix(rb->ss_effects.last_frame_transform.affine_inverse()) * CameraMatrix(transform) * projection.inverse(); - RendererCompositorRD::singleton->get_effects()->screen_space_indirect_lighting(rb->ss_effects.last_frame, rb->ss_effects.ssil.ssil_final, p_normal_buffer, rb->ss_effects.ssil.depth_texture_view, rb->ss_effects.ssil.deinterleaved, rb->ss_effects.ssil.deinterleaved_slices, rb->ss_effects.ssil.pong, rb->ss_effects.ssil.pong_slices, rb->ss_effects.ssil.importance_map[0], rb->ss_effects.ssil.importance_map[1], rb->ss_effects.ssil.edges, rb->ss_effects.ssil.edges_slices, p_projection, last_frame_projection, settings, uniform_sets_are_invalid, rb->ss_effects.ssil.gather_uniform_set, rb->ss_effects.ssil.importance_map_uniform_set, rb->ss_effects.ssil.projection_uniform_set); + ss_effects->ssil_allocate_buffers(rb->ss_effects.ssil, settings, rb->ss_effects.linear_depth); + ss_effects->screen_space_indirect_lighting(rb->ss_effects.ssil, p_normal_buffer, p_projection, last_frame_projection, settings); rb->ss_effects.last_frame_projection = projection; rb->ss_effects.last_frame_transform = transform; } @@ -2326,15 +2037,15 @@ void RendererSceneRenderRD::_copy_framebuffer_to_ssil(RID p_render_buffers) { RenderBuffers *rb = render_buffers_owner.get_or_null(p_render_buffers); ERR_FAIL_COND(!rb); - if (rb->ss_effects.last_frame.is_valid()) { - copy_effects->copy_to_rect(rb->texture, rb->ss_effects.last_frame, Rect2i(0, 0, rb->width, rb->height)); + if (rb->ss_effects.ssil.last_frame.is_valid()) { + copy_effects->copy_to_rect(rb->texture, rb->ss_effects.ssil.last_frame, Rect2i(0, 0, rb->width, rb->height)); int width = rb->width; int height = rb->height; - for (int i = 0; i < rb->ss_effects.last_frame_slices.size() - 1; i++) { + for (int i = 0; i < rb->ss_effects.ssil.last_frame_slices.size() - 1; i++) { width = MAX(1, width >> 1); height = MAX(1, height >> 1); - copy_effects->make_mipmap(rb->ss_effects.last_frame_slices[i], rb->ss_effects.last_frame_slices[i + 1], Size2i(width, height)); + copy_effects->make_mipmap(rb->ss_effects.ssil.last_frame_slices[i], rb->ss_effects.ssil.last_frame_slices[i + 1], Size2i(width, height)); } } } @@ -5031,7 +4742,7 @@ void RendererSceneRenderRD::_pre_opaque_render(RenderDataRD *p_render_data, bool RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); //use a later barrier } - if (p_render_data->render_buffers.is_valid()) { + if (p_render_data->render_buffers.is_valid() && ss_effects) { if (p_use_ssao || p_use_ssil) { RenderBuffers *rb = render_buffers_owner.get_or_null(p_render_data->render_buffers); ERR_FAIL_COND(!rb); @@ -5056,7 +4767,7 @@ void RendererSceneRenderRD::_pre_opaque_render(RenderDataRD *p_render_data, bool invalidate_uniform_set = true; } - RendererCompositorRD::singleton->get_effects()->downsample_depth(rb->depth_texture, rb->ss_effects.linear_depth_slices, ssao_quality, ssil_quality, invalidate_uniform_set, ssao_half_size, ssil_half_size, Size2i(rb->width, rb->height), p_render_data->cam_projection); + ss_effects->downsample_depth(rb->depth_texture, rb->ss_effects.linear_depth_slices, ssao_quality, ssil_quality, invalidate_uniform_set, ssao_half_size, ssil_half_size, Size2i(rb->width, rb->height), p_render_data->cam_projection); } if (p_use_ssao) { @@ -6000,6 +5711,9 @@ void fog() { copy_effects = memnew(RendererRD::CopyEffects(!can_use_storage)); tone_mapper = memnew(RendererRD::ToneMapper); vrs = memnew(RendererRD::VRS); + if (can_use_storage) { + ss_effects = memnew(RendererRD::SSEffects); + } } RendererSceneRenderRD::~RendererSceneRenderRD() { @@ -6017,6 +5731,9 @@ RendererSceneRenderRD::~RendererSceneRenderRD() { if (vrs) { memdelete(vrs); } + if (ss_effects) { + memdelete(ss_effects); + } for (const KeyValue &E : shadow_cubemaps) { RD::get_singleton()->free(E.value.cubemap); diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.h b/servers/rendering/renderer_rd/renderer_scene_render_rd.h index d11bbd183e..16cf99314a 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.h +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.h @@ -37,6 +37,7 @@ #include "servers/rendering/renderer_rd/cluster_builder_rd.h" #include "servers/rendering/renderer_rd/effects/bokeh_dof.h" #include "servers/rendering/renderer_rd/effects/copy_effects.h" +#include "servers/rendering/renderer_rd/effects/ss_effects.h" #include "servers/rendering/renderer_rd/effects/tone_mapper.h" #include "servers/rendering/renderer_rd/effects/vrs.h" #include "servers/rendering/renderer_rd/environment/gi.h" @@ -141,7 +142,7 @@ protected: virtual RID _render_buffers_get_velocity_texture(RID p_render_buffers) = 0; void _process_ssao(RID p_render_buffers, RID p_environment, RID p_normal_buffer, const CameraMatrix &p_projection); - void _process_ssr(RID p_render_buffers, RID p_dest_framebuffer, RID p_normal_buffer, RID p_specular_buffer, RID p_metallic, const Color &p_metallic_mask, RID p_environment, const CameraMatrix &p_projection, bool p_use_additive); + void _process_ssr(RID p_render_buffers, RID p_dest_framebuffer, const RID *p_normal_buffer_slices, RID p_specular_buffer, const RID *p_metallic_slices, const Color &p_metallic_mask, RID p_environment, const CameraMatrix *p_projections, const Vector3 *p_eye_offsets, bool p_use_additive); void _process_sss(RID p_render_buffers, const CameraMatrix &p_camera); void _process_ssil(RID p_render_buffers, RID p_environment, RID p_normal_buffer, const CameraMatrix &p_projection, const Transform3D &p_transform); void _copy_framebuffer_to_ssil(RID p_render_buffers); @@ -163,6 +164,7 @@ protected: PagedArrayPool cull_argument_pool; PagedArray cull_argument; //need this to exist + RendererRD::SSEffects *ss_effects = nullptr; RendererRD::GI gi; RendererSceneSkyRD sky; @@ -418,7 +420,6 @@ private: RS::EnvironmentSSAOQuality ssao_quality = RS::ENV_SSAO_QUALITY_MEDIUM; bool ssao_half_size = false; - bool ssao_using_half_size = false; float ssao_adaptive_target = 0.5; int ssao_blur_passes = 2; float ssao_fadeout_from = 50.0; @@ -561,47 +562,14 @@ private: RID downsample_uniform_set; - RID last_frame; - Vector last_frame_slices; - CameraMatrix last_frame_projection; Transform3D last_frame_transform; - struct SSAO { - RID ao_deinterleaved; - Vector ao_deinterleaved_slices; - RID ao_pong; - Vector ao_pong_slices; - RID ao_final; - RID importance_map[2]; - RID depth_texture_view; - - RID gather_uniform_set; - RID importance_map_uniform_set; - } ssao; - - struct SSIL { - RID ssil_final; - RID deinterleaved; - Vector deinterleaved_slices; - RID pong; - Vector pong_slices; - RID edges; - Vector edges_slices; - RID importance_map[2]; - RID depth_texture_view; - - RID gather_uniform_set; - RID importance_map_uniform_set; - RID projection_uniform_set; - } ssil; + RendererRD::SSEffects::SSAORenderBuffers ssao; + RendererRD::SSEffects::SSILRenderBuffers ssil; } ss_effects; - struct SSR { - RID normal_scaled; - RID depth_scaled; - RID blur_radius[2]; - } ssr; + RendererRD::SSEffects::SSRRenderBuffers ssr; struct TAA { RID history; diff --git a/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection.glsl b/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection.glsl new file mode 100644 index 0000000000..d85ab3af2e --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection.glsl @@ -0,0 +1,254 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(rgba16f, set = 0, binding = 0) uniform restrict readonly image2D source_diffuse; +layout(r32f, set = 0, binding = 1) uniform restrict readonly image2D source_depth; +layout(rgba16f, set = 1, binding = 0) uniform restrict writeonly image2D ssr_image; +#ifdef MODE_ROUGH +layout(r8, set = 1, binding = 1) uniform restrict writeonly image2D blur_radius_image; +#endif +layout(rgba8, set = 2, binding = 0) uniform restrict readonly image2D source_normal_roughness; +layout(set = 3, binding = 0) uniform sampler2D source_metallic; + +layout(push_constant, std430) uniform Params { + vec4 proj_info; + + ivec2 screen_size; + float camera_z_near; + float camera_z_far; + + int num_steps; + float depth_tolerance; + float distance_fade; + float curve_fade_in; + + bool orthogonal; + float filter_mipmap_levels; + bool use_half_res; + uint metallic_mask; + + uint view_index; + uint pad1; + uint pad2; + uint pad3; +} +params; + +#include "screen_space_reflection_inc.glsl" + +vec2 view_to_screen(vec3 view_pos, out float w) { + vec4 projected = scene_data.projection[params.view_index] * vec4(view_pos, 1.0); + projected.xyz /= projected.w; + projected.xy = projected.xy * 0.5 + 0.5; + w = projected.w; + return projected.xy; +} + +#define M_PI 3.14159265359 + +void main() { + // Pixel being shaded + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + + if (any(greaterThanEqual(ssC.xy, params.screen_size))) { //too large, do nothing + return; + } + + vec2 pixel_size = 1.0 / vec2(params.screen_size); + vec2 uv = vec2(ssC.xy) * pixel_size; + + uv += pixel_size * 0.5; + + float base_depth = imageLoad(source_depth, ssC).r; + + // World space point being shaded + vec3 vertex = reconstructCSPosition(uv * vec2(params.screen_size), base_depth); + + vec4 normal_roughness = imageLoad(source_normal_roughness, ssC); + vec3 normal = normal_roughness.xyz * 2.0 - 1.0; + normal = normalize(normal); + normal.y = -normal.y; //because this code reads flipped + + vec3 view_dir; + if (sc_multiview) { + view_dir = normalize(vertex + scene_data.eye_offset[params.view_index].xyz); + } else { + view_dir = normalize(vertex); + } + vec3 ray_dir = normalize(reflect(view_dir, normal)); + + if (dot(ray_dir, normal) < 0.001) { + imageStore(ssr_image, ssC, vec4(0.0)); + return; + } + //ray_dir = normalize(view_dir - normal * dot(normal,view_dir) * 2.0); + //ray_dir = normalize(vec3(1.0, 1.0, -1.0)); + + //////////////// + + // make ray length and clip it against the near plane (don't want to trace beyond visible) + float ray_len = (vertex.z + ray_dir.z * params.camera_z_far) > -params.camera_z_near ? (-params.camera_z_near - vertex.z) / ray_dir.z : params.camera_z_far; + vec3 ray_end = vertex + ray_dir * ray_len; + + float w_begin; + vec2 vp_line_begin = view_to_screen(vertex, w_begin); + float w_end; + vec2 vp_line_end = view_to_screen(ray_end, w_end); + vec2 vp_line_dir = vp_line_end - vp_line_begin; + + // we need to interpolate w along the ray, to generate perspective correct reflections + w_begin = 1.0 / w_begin; + w_end = 1.0 / w_end; + + float z_begin = vertex.z * w_begin; + float z_end = ray_end.z * w_end; + + vec2 line_begin = vp_line_begin / pixel_size; + vec2 line_dir = vp_line_dir / pixel_size; + float z_dir = z_end - z_begin; + float w_dir = w_end - w_begin; + + // clip the line to the viewport edges + + float scale_max_x = min(1.0, 0.99 * (1.0 - vp_line_begin.x) / max(1e-5, vp_line_dir.x)); + float scale_max_y = min(1.0, 0.99 * (1.0 - vp_line_begin.y) / max(1e-5, vp_line_dir.y)); + float scale_min_x = min(1.0, 0.99 * vp_line_begin.x / max(1e-5, -vp_line_dir.x)); + float scale_min_y = min(1.0, 0.99 * vp_line_begin.y / max(1e-5, -vp_line_dir.y)); + float line_clip = min(scale_max_x, scale_max_y) * min(scale_min_x, scale_min_y); + line_dir *= line_clip; + z_dir *= line_clip; + w_dir *= line_clip; + + // clip z and w advance to line advance + vec2 line_advance = normalize(line_dir); // down to pixel + float step_size = length(line_advance) / length(line_dir); + float z_advance = z_dir * step_size; // adapt z advance to line advance + float w_advance = w_dir * step_size; // adapt w advance to line advance + + // make line advance faster if direction is closer to pixel edges (this avoids sampling the same pixel twice) + float advance_angle_adj = 1.0 / max(abs(line_advance.x), abs(line_advance.y)); + line_advance *= advance_angle_adj; // adapt z advance to line advance + z_advance *= advance_angle_adj; + w_advance *= advance_angle_adj; + + vec2 pos = line_begin; + float z = z_begin; + float w = w_begin; + float z_from = z / w; + float z_to = z_from; + float depth; + vec2 prev_pos = pos; + + bool found = false; + + float steps_taken = 0.0; + + for (int i = 0; i < params.num_steps; i++) { + pos += line_advance; + z += z_advance; + w += w_advance; + + // convert to linear depth + + depth = imageLoad(source_depth, ivec2(pos - 0.5)).r; + if (sc_multiview) { + depth = depth * 2.0 - 1.0; + depth = 2.0 * params.camera_z_near * params.camera_z_far / (params.camera_z_far + params.camera_z_near - depth * (params.camera_z_far - params.camera_z_near)); + depth = -depth; + } + + z_from = z_to; + z_to = z / w; + + if (depth > z_to) { + // if depth was surpassed + if (depth <= max(z_to, z_from) + params.depth_tolerance && -depth < params.camera_z_far) { + // check the depth tolerance and far clip + // check that normal is valid + found = true; + } + break; + } + + steps_taken += 1.0; + prev_pos = pos; + } + + if (found) { + float margin_blend = 1.0; + + vec2 margin = vec2((params.screen_size.x + params.screen_size.y) * 0.5 * 0.05); // make a uniform margin + if (any(bvec4(lessThan(pos, -margin), greaterThan(pos, params.screen_size + margin)))) { + // clip outside screen + margin + imageStore(ssr_image, ssC, vec4(0.0)); + return; + } + + { + //blend fading out towards external margin + vec2 margin_grad = mix(pos - params.screen_size, -pos, lessThan(pos, vec2(0.0))); + margin_blend = 1.0 - smoothstep(0.0, margin.x, max(margin_grad.x, margin_grad.y)); + //margin_blend = 1.0; + } + + vec2 final_pos; + float grad = (steps_taken + 1.0) / float(params.num_steps); + float initial_fade = params.curve_fade_in == 0.0 ? 1.0 : pow(clamp(grad, 0.0, 1.0), params.curve_fade_in); + float fade = pow(clamp(1.0 - grad, 0.0, 1.0), params.distance_fade) * initial_fade; + final_pos = pos; + + vec4 final_color; + +#ifdef MODE_ROUGH + + // if roughness is enabled, do screen space cone tracing + float blur_radius = 0.0; + float roughness = normal_roughness.w; + + if (roughness > 0.001) { + float cone_angle = min(roughness, 0.999) * M_PI * 0.5; + float cone_len = length(final_pos - line_begin); + float op_len = 2.0 * tan(cone_angle) * cone_len; // opposite side of iso triangle + { + // fit to sphere inside cone (sphere ends at end of cone), something like this: + // ___ + // \O/ + // V + // + // as it avoids bleeding from beyond the reflection as much as possible. As a plus + // it also makes the rough reflection more elongated. + float a = op_len; + float h = cone_len; + float a2 = a * a; + float fh2 = 4.0f * h * h; + blur_radius = (a * (sqrt(a2 + fh2) - a)) / (4.0f * h); + } + } + + // Isn't this going to be overwritten after our endif? + final_color = imageLoad(source_diffuse, ivec2((final_pos - 0.5) * pixel_size)); + + imageStore(blur_radius_image, ssC, vec4(blur_radius / 255.0)); //stored in r8 + +#endif // MODE_ROUGH + + final_color = vec4(imageLoad(source_diffuse, ivec2(final_pos - 0.5)).rgb, fade * margin_blend); + + //change blend by metallic + vec4 metallic_mask = unpackUnorm4x8(params.metallic_mask); + final_color.a *= dot(metallic_mask, texelFetch(source_metallic, ssC << 1, 0)); + + imageStore(ssr_image, ssC, final_color); + + } else { +#ifdef MODE_ROUGH + imageStore(blur_radius_image, ssC, vec4(0.0)); +#endif + imageStore(ssr_image, ssC, vec4(0.0)); + } +} diff --git a/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_filter.glsl b/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_filter.glsl new file mode 100644 index 0000000000..a63d60e0b2 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_filter.glsl @@ -0,0 +1,148 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(rgba16f, set = 0, binding = 0) uniform restrict readonly image2D source_ssr; +layout(r8, set = 0, binding = 1) uniform restrict readonly image2D source_radius; +layout(rgba8, set = 1, binding = 0) uniform restrict readonly image2D source_normal; + +layout(rgba16f, set = 2, binding = 0) uniform restrict writeonly image2D dest_ssr; +#ifndef VERTICAL_PASS +layout(r8, set = 2, binding = 1) uniform restrict writeonly image2D dest_radius; +#endif +layout(r32f, set = 3, binding = 0) uniform restrict readonly image2D source_depth; + +layout(push_constant, std430) uniform Params { + vec4 proj_info; + + bool orthogonal; + float edge_tolerance; + int increment; + uint view_index; + + ivec2 screen_size; + bool vertical; + uint steps; +} +params; + +#include "screen_space_reflection_inc.glsl" + +#define GAUSS_TABLE_SIZE 15 + +const float gauss_table[GAUSS_TABLE_SIZE + 1] = float[]( + 0.1847392078702266, + 0.16595854345772326, + 0.12031364177766891, + 0.07038755277896766, + 0.03322925565155569, + 0.012657819729901945, + 0.0038903040680094217, + 0.0009646503390864025, + 0.00019297087402915717, + 0.000031139936308099136, + 0.000004053309048174758, + 4.255228059965837e-7, + 3.602517634249573e-8, + 2.4592560765896795e-9, + 1.3534945386863618e-10, + 0.0 //one more for interpolation +); + +float gauss_weight(float p_val) { + float idxf; + float c = modf(max(0.0, p_val * float(GAUSS_TABLE_SIZE)), idxf); + int idx = int(idxf); + if (idx >= GAUSS_TABLE_SIZE + 1) { + return 0.0; + } + + return mix(gauss_table[idx], gauss_table[idx + 1], c); +} + +#define M_PI 3.14159265359 + +void do_filter(inout vec4 accum, inout float accum_radius, inout float divisor, ivec2 texcoord, ivec2 increment, vec3 p_pos, vec3 normal, float p_limit_radius) { + for (int i = 1; i < params.steps; i++) { + float d = float(i * params.increment); + ivec2 tc = texcoord + increment * i; + float depth = imageLoad(source_depth, tc).r; + vec3 view_pos = reconstructCSPosition(vec2(tc) + 0.5, depth); + vec3 view_normal = normalize(imageLoad(source_normal, tc).rgb * 2.0 - 1.0); + view_normal.y = -view_normal.y; + + float r = imageLoad(source_radius, tc).r; + float radius = round(r * 255.0); + + float angle_n = 1.0 - abs(dot(normal, view_normal)); + if (angle_n > params.edge_tolerance) { + break; + } + + float angle = abs(dot(normal, normalize(view_pos - p_pos))); + + if (angle > params.edge_tolerance) { + break; + } + + if (d < radius) { + float w = gauss_weight(d / radius); + accum += imageLoad(source_ssr, tc) * w; +#ifndef VERTICAL_PASS + accum_radius += r * w; +#endif + divisor += w; + } + } +} + +void main() { + // Pixel being shaded + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + + if (any(greaterThanEqual(ssC.xy, params.screen_size))) { //too large, do nothing + return; + } + + float base_contrib = gauss_table[0]; + + vec4 accum = imageLoad(source_ssr, ssC); + + float accum_radius = imageLoad(source_radius, ssC).r; + float radius = accum_radius * 255.0; + + float divisor = gauss_table[0]; + accum *= divisor; + accum_radius *= divisor; +#ifdef VERTICAL_PASS + ivec2 direction = ivec2(0, params.increment); +#else + ivec2 direction = ivec2(params.increment, 0); +#endif + float depth = imageLoad(source_depth, ssC).r; + vec3 pos = reconstructCSPosition(vec2(ssC.xy) + 0.5, depth); + vec3 normal = imageLoad(source_normal, ssC).xyz * 2.0 - 1.0; + normal = normalize(normal); + normal.y = -normal.y; + + do_filter(accum, accum_radius, divisor, ssC.xy, direction, pos, normal, radius); + do_filter(accum, accum_radius, divisor, ssC.xy, -direction, pos, normal, radius); + + if (divisor > 0.0) { + accum /= divisor; + accum_radius /= divisor; + } else { + accum = vec4(0.0); + accum_radius = 0.0; + } + + imageStore(dest_ssr, ssC, accum); + +#ifndef VERTICAL_PASS + imageStore(dest_radius, ssC, vec4(accum_radius)); +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_inc.glsl b/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_inc.glsl new file mode 100644 index 0000000000..26405ab040 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_inc.glsl @@ -0,0 +1,28 @@ +layout(constant_id = 0) const bool sc_multiview = false; + +layout(set = 4, binding = 0, std140) uniform SceneData { + mat4x4 projection[2]; + mat4x4 inv_projection[2]; + vec4 eye_offset[2]; +} +scene_data; + +vec3 reconstructCSPosition(vec2 screen_pos, float z) { + if (sc_multiview) { + vec4 pos; + pos.xy = (2.0 * vec2(screen_pos) / vec2(params.screen_size)) - 1.0; + pos.z = z * 2.0 - 1.0; + pos.w = 1.0; + + pos = scene_data.inv_projection[params.view_index] * pos; + pos.xyz /= pos.w; + + return pos.xyz; + } else { + if (params.orthogonal) { + return vec3((screen_pos.xy * params.proj_info.xy + params.proj_info.zw), z); + } else { + return vec3((screen_pos.xy * params.proj_info.xy + params.proj_info.zw) * z, z); + } + } +} diff --git a/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_scale.glsl b/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_scale.glsl new file mode 100644 index 0000000000..a7da0812df --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_scale.glsl @@ -0,0 +1,106 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +/* Specialization Constants (Toggles) */ + +layout(constant_id = 0) const bool sc_multiview = false; + +/* inputs */ +layout(set = 0, binding = 0) uniform sampler2D source_ssr; +layout(set = 1, binding = 0) uniform sampler2D source_depth; +layout(set = 1, binding = 1) uniform sampler2D source_normal; +layout(rgba16f, set = 2, binding = 0) uniform restrict writeonly image2D dest_ssr; +layout(r32f, set = 3, binding = 0) uniform restrict writeonly image2D dest_depth; +layout(rgba8, set = 3, binding = 1) uniform restrict writeonly image2D dest_normal; + +layout(push_constant, std430) uniform Params { + ivec2 screen_size; + float camera_z_near; + float camera_z_far; + + bool orthogonal; + bool filtered; + uint pad[2]; +} +params; + +void main() { + // Pixel being shaded + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + + if (any(greaterThanEqual(ssC.xy, params.screen_size))) { //too large, do nothing + return; + } + //do not filter, SSR will generate arctifacts if this is done + + float divisor = 0.0; + vec4 color; + float depth; + vec4 normal; + + if (params.filtered) { + color = vec4(0.0); + depth = 0.0; + normal = vec4(0.0); + + for (int i = 0; i < 4; i++) { + ivec2 ofs = ssC << 1; + if (bool(i & 1)) { + ofs.x += 1; + } + if (bool(i & 2)) { + ofs.y += 1; + } + color += texelFetch(source_ssr, ofs, 0); + float d = texelFetch(source_depth, ofs, 0).r; + vec4 nr = texelFetch(source_normal, ofs, 0); + normal.xyz += nr.xyz * 2.0 - 1.0; + normal.w += nr.w; + + if (sc_multiview) { + // we're doing a full unproject so we need the value as is. + depth += d; + } else { + // unproject our Z value so we can use it directly. + d = d * 2.0 - 1.0; + if (params.orthogonal) { + d = ((d + (params.camera_z_far + params.camera_z_near) / (params.camera_z_far - params.camera_z_near)) * (params.camera_z_far - params.camera_z_near)) / 2.0; + } else { + d = 2.0 * params.camera_z_near * params.camera_z_far / (params.camera_z_far + params.camera_z_near - d * (params.camera_z_far - params.camera_z_near)); + } + depth += -d; + } + } + + color /= 4.0; + depth /= 4.0; + normal.xyz = normalize(normal.xyz / 4.0) * 0.5 + 0.5; + normal.w /= 4.0; + } else { + ivec2 ofs = ssC << 1; + + color = texelFetch(source_ssr, ofs, 0); + depth = texelFetch(source_depth, ofs, 0).r; + normal = texelFetch(source_normal, ofs, 0); + + if (!sc_multiview) { + // unproject our Z value so we can use it directly. + depth = depth * 2.0 - 1.0; + if (params.orthogonal) { + depth = ((depth + (params.camera_z_far + params.camera_z_near) / (params.camera_z_far - params.camera_z_near)) * (params.camera_z_far - params.camera_z_near)) / 2.0; + } else { + depth = 2.0 * params.camera_z_near * params.camera_z_far / (params.camera_z_far + params.camera_z_near - depth * (params.camera_z_far - params.camera_z_near)); + } + depth = -depth; + } + } + + imageStore(dest_ssr, ssC, color); + imageStore(dest_depth, ssC, vec4(depth)); + imageStore(dest_normal, ssC, normal); +} diff --git a/servers/rendering/renderer_rd/shaders/effects/specular_merge.glsl b/servers/rendering/renderer_rd/shaders/effects/specular_merge.glsl new file mode 100644 index 0000000000..c62144fdaf --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/specular_merge.glsl @@ -0,0 +1,112 @@ +#[vertex] + +#version 450 + +#VERSION_DEFINES + +#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview) +#extension GL_EXT_multiview : enable +#endif + +#ifdef USE_MULTIVIEW +#ifdef has_VK_KHR_multiview +#define ViewIndex gl_ViewIndex +#else // has_VK_KHR_multiview +// !BAS! This needs to become an input once we implement our fallback! +#define ViewIndex 0 +#endif // has_VK_KHR_multiview +#else // USE_MULTIVIEW +// Set to zero, not supported in non stereo +#define ViewIndex 0 +#endif //USE_MULTIVIEW + +#ifdef USE_MULTIVIEW +layout(location = 0) out vec3 uv_interp; +#else // USE_MULTIVIEW +layout(location = 0) out vec2 uv_interp; +#endif //USE_MULTIVIEW + +void main() { + vec2 base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0)); + +#ifdef USE_MULTIVIEW + uv_interp = vec3(base_arr[gl_VertexIndex], ViewIndex); + + gl_Position = vec4(uv_interp.xy * 2.0 - 1.0, 0.0, 1.0); +#else + uv_interp = base_arr[gl_VertexIndex]; + + gl_Position = vec4(uv_interp * 2.0 - 1.0, 0.0, 1.0); +#endif +} + +#[fragment] + +#version 450 + +#VERSION_DEFINES + +#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview) +#extension GL_EXT_multiview : enable +#endif + +#ifdef USE_MULTIVIEW +#ifdef has_VK_KHR_multiview +#define ViewIndex gl_ViewIndex +#else // has_VK_KHR_multiview +// !BAS! This needs to become an input once we implement our fallback! +#define ViewIndex 0 +#endif // has_VK_KHR_multiview +#else // USE_MULTIVIEW +// Set to zero, not supported in non stereo +#define ViewIndex 0 +#endif //USE_MULTIVIEW + +#ifdef USE_MULTIVIEW +layout(location = 0) in vec3 uv_interp; +#else // USE_MULTIVIEW +layout(location = 0) in vec2 uv_interp; +#endif //USE_MULTIVIEW + +#ifdef USE_MULTIVIEW +layout(set = 0, binding = 0) uniform sampler2DArray specular; +#else // USE_MULTIVIEW +layout(set = 0, binding = 0) uniform sampler2D specular; +#endif //USE_MULTIVIEW + +#ifdef MODE_SSR + +#ifdef USE_MULTIVIEW +layout(set = 1, binding = 0) uniform sampler2DArray ssr; +#else // USE_MULTIVIEW +layout(set = 1, binding = 0) uniform sampler2D ssr; +#endif //USE_MULTIVIEW + +#endif + +#ifdef MODE_MERGE + +#ifdef USE_MULTIVIEW +layout(set = 2, binding = 0) uniform sampler2DArray diffuse; +#else // USE_MULTIVIEW +layout(set = 2, binding = 0) uniform sampler2D diffuse; +#endif //USE_MULTIVIEW + +#endif + +layout(location = 0) out vec4 frag_color; + +void main() { + frag_color.rgb = texture(specular, uv_interp).rgb; + frag_color.a = 0.0; +#ifdef MODE_SSR + + vec4 ssr_color = texture(ssr, uv_interp); + frag_color.rgb = mix(frag_color.rgb, ssr_color.rgb, ssr_color.a); +#endif + +#ifdef MODE_MERGE + frag_color += texture(diffuse, uv_interp); +#endif + //added using additive blend +} diff --git a/servers/rendering/renderer_rd/shaders/effects/ss_effects_downsample.glsl b/servers/rendering/renderer_rd/shaders/effects/ss_effects_downsample.glsl new file mode 100644 index 0000000000..134aae5ce7 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ss_effects_downsample.glsl @@ -0,0 +1,229 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[compute] + +#version 450 + +#VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(push_constant, std430) uniform Params { + vec2 pixel_size; + float z_far; + float z_near; + bool orthogonal; + float radius_sq; + uvec2 pad; +} +params; + +layout(set = 0, binding = 0) uniform sampler2D source_depth; + +layout(r16f, set = 1, binding = 0) uniform restrict writeonly image2DArray dest_image0; //rename +#ifdef GENERATE_MIPS +layout(r16f, set = 2, binding = 0) uniform restrict writeonly image2DArray dest_image1; +layout(r16f, set = 2, binding = 1) uniform restrict writeonly image2DArray dest_image2; +layout(r16f, set = 2, binding = 2) uniform restrict writeonly image2DArray dest_image3; +#ifdef GENERATE_FULL_MIPS +layout(r16f, set = 2, binding = 3) uniform restrict writeonly image2DArray dest_image4; +#endif +#endif + +vec4 screen_space_to_view_space_depth(vec4 p_depth) { + if (params.orthogonal) { + vec4 depth = p_depth * 2.0 - 1.0; + return ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; + } + + float depth_linearize_mul = params.z_near; + float depth_linearize_add = params.z_far; + + // Optimised version of "-cameraClipNear / (cameraClipFar - projDepth * (cameraClipFar - cameraClipNear)) * cameraClipFar" + + // Set your depth_linearize_mul and depth_linearize_add to: + // depth_linearize_mul = ( cameraClipFar * cameraClipNear) / ( cameraClipFar - cameraClipNear ); + // depth_linearize_add = cameraClipFar / ( cameraClipFar - cameraClipNear ); + + return depth_linearize_mul / (depth_linearize_add - p_depth); +} + +float screen_space_to_view_space_depth(float p_depth) { + if (params.orthogonal) { + float depth = p_depth * 2.0 - 1.0; + return ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / (2.0 * params.z_far); + } + + float depth_linearize_mul = params.z_near; + float depth_linearize_add = params.z_far; + + return depth_linearize_mul / (depth_linearize_add - p_depth); +} + +#ifdef GENERATE_MIPS + +shared float depth_buffer[4][8][8]; + +float mip_smart_average(vec4 p_depths) { + float closest = min(min(p_depths.x, p_depths.y), min(p_depths.z, p_depths.w)); + float fallof_sq = -1.0f / params.radius_sq; + vec4 dists = p_depths - closest.xxxx; + vec4 weights = clamp(dists * dists * fallof_sq + 1.0, 0.0, 1.0); + return dot(weights, p_depths) / dot(weights, vec4(1.0, 1.0, 1.0, 1.0)); +} + +void prepare_depths_and_mips(vec4 p_samples, uvec2 p_output_coord, uvec2 p_gtid) { + p_samples = screen_space_to_view_space_depth(p_samples); + + depth_buffer[0][p_gtid.x][p_gtid.y] = p_samples.w; + depth_buffer[1][p_gtid.x][p_gtid.y] = p_samples.z; + depth_buffer[2][p_gtid.x][p_gtid.y] = p_samples.x; + depth_buffer[3][p_gtid.x][p_gtid.y] = p_samples.y; + + imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 0), vec4(p_samples.w)); + imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 1), vec4(p_samples.z)); + imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 2), vec4(p_samples.x)); + imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 3), vec4(p_samples.y)); + + uint depth_array_index = 2 * (p_gtid.y % 2) + (p_gtid.x % 2); + uvec2 depth_array_offset = ivec2(p_gtid.x % 2, p_gtid.y % 2); + ivec2 buffer_coord = ivec2(p_gtid) - ivec2(depth_array_offset); + + p_output_coord /= 2; + groupMemoryBarrier(); + barrier(); + + // if (still_alive) <-- all threads alive here + { + float sample_00 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 0]; + float sample_01 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 1]; + float sample_10 = depth_buffer[depth_array_index][buffer_coord.x + 1][buffer_coord.y + 0]; + float sample_11 = depth_buffer[depth_array_index][buffer_coord.x + 1][buffer_coord.y + 1]; + + float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11)); + imageStore(dest_image1, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg)); + depth_buffer[depth_array_index][buffer_coord.x][buffer_coord.y] = avg; + } + + bool still_alive = p_gtid.x % 4 == depth_array_offset.x && p_gtid.y % 4 == depth_array_offset.y; + + p_output_coord /= 2; + groupMemoryBarrier(); + barrier(); + + if (still_alive) { + float sample_00 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 0]; + float sample_01 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 2]; + float sample_10 = depth_buffer[depth_array_index][buffer_coord.x + 2][buffer_coord.y + 0]; + float sample_11 = depth_buffer[depth_array_index][buffer_coord.x + 2][buffer_coord.y + 2]; + + float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11)); + imageStore(dest_image2, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg)); + depth_buffer[depth_array_index][buffer_coord.x][buffer_coord.y] = avg; + } + + still_alive = p_gtid.x % 8 == depth_array_offset.x && depth_array_offset.y % 8 == depth_array_offset.y; + + p_output_coord /= 2; + groupMemoryBarrier(); + barrier(); + + if (still_alive) { + float sample_00 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 0]; + float sample_01 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 4]; + float sample_10 = depth_buffer[depth_array_index][buffer_coord.x + 4][buffer_coord.y + 0]; + float sample_11 = depth_buffer[depth_array_index][buffer_coord.x + 4][buffer_coord.y + 4]; + + float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11)); + imageStore(dest_image3, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg)); +#ifndef GENERATE_FULL_MIPS + } +#else + depth_buffer[depth_array_index][buffer_coord.x][buffer_coord.y] = avg; + } + still_alive = p_gtid.x % 16 == depth_array_offset.x && depth_array_offset.y % 16 == depth_array_offset.y; + + p_output_coord /= 2; + groupMemoryBarrier(); + barrier(); + + if (still_alive) { + float sample_00 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 0]; + float sample_01 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 8]; + float sample_10 = depth_buffer[depth_array_index][buffer_coord.x + 8][buffer_coord.y + 0]; + float sample_11 = depth_buffer[depth_array_index][buffer_coord.x + 8][buffer_coord.y + 8]; + + float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11)); + imageStore(dest_image4, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg)); + } +#endif +} +#else +#ifndef USE_HALF_BUFFERS +void prepare_depths(vec4 p_samples, uvec2 p_tid) { + p_samples = screen_space_to_view_space_depth(p_samples); + + imageStore(dest_image0, ivec3(p_tid, 0), vec4(p_samples.w)); + imageStore(dest_image0, ivec3(p_tid, 1), vec4(p_samples.z)); + imageStore(dest_image0, ivec3(p_tid, 2), vec4(p_samples.x)); + imageStore(dest_image0, ivec3(p_tid, 3), vec4(p_samples.y)); +} +#endif +#endif + +void main() { +#ifdef USE_HALF_BUFFERS +#ifdef USE_HALF_SIZE + float sample_00 = texelFetch(source_depth, ivec2(4 * gl_GlobalInvocationID.x + 0, 4 * gl_GlobalInvocationID.y + 0), 0).x; + float sample_11 = texelFetch(source_depth, ivec2(4 * gl_GlobalInvocationID.x + 2, 4 * gl_GlobalInvocationID.y + 2), 0).x; +#else + float sample_00 = texelFetch(source_depth, ivec2(2 * gl_GlobalInvocationID.x + 0, 2 * gl_GlobalInvocationID.y + 0), 0).x; + float sample_11 = texelFetch(source_depth, ivec2(2 * gl_GlobalInvocationID.x + 1, 2 * gl_GlobalInvocationID.y + 1), 0).x; +#endif + sample_00 = screen_space_to_view_space_depth(sample_00); + sample_11 = screen_space_to_view_space_depth(sample_11); + + imageStore(dest_image0, ivec3(gl_GlobalInvocationID.xy, 0), vec4(sample_00)); + imageStore(dest_image0, ivec3(gl_GlobalInvocationID.xy, 3), vec4(sample_11)); +#else //!USE_HALF_BUFFERS +#ifdef USE_HALF_SIZE + ivec2 depth_buffer_coord = 4 * ivec2(gl_GlobalInvocationID.xy); + ivec2 output_coord = ivec2(gl_GlobalInvocationID); + + vec2 uv = (vec2(depth_buffer_coord) + 0.5f) * params.pixel_size; + vec4 samples; + samples.x = textureLodOffset(source_depth, uv, 0, ivec2(0, 2)).x; + samples.y = textureLodOffset(source_depth, uv, 0, ivec2(2, 2)).x; + samples.z = textureLodOffset(source_depth, uv, 0, ivec2(2, 0)).x; + samples.w = textureLodOffset(source_depth, uv, 0, ivec2(0, 0)).x; +#else + ivec2 depth_buffer_coord = 2 * ivec2(gl_GlobalInvocationID.xy); + ivec2 output_coord = ivec2(gl_GlobalInvocationID); + + vec2 uv = (vec2(depth_buffer_coord) + 0.5f) * params.pixel_size; + vec4 samples = textureGather(source_depth, uv); +#endif +#ifdef GENERATE_MIPS + prepare_depths_and_mips(samples, output_coord, gl_LocalInvocationID.xy); +#else + prepare_depths(samples, gl_GlobalInvocationID.xy); +#endif +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/effects/ssao.glsl b/servers/rendering/renderer_rd/shaders/effects/ssao.glsl new file mode 100644 index 0000000000..2a87e273bc --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ssao.glsl @@ -0,0 +1,483 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[compute] + +#version 450 + +#VERSION_DEFINES + +#define INTELSSAO_MAIN_DISK_SAMPLE_COUNT (32) +const vec4 sample_pattern[INTELSSAO_MAIN_DISK_SAMPLE_COUNT] = { + vec4(0.78488064, 0.56661671, 1.500000, -0.126083), vec4(0.26022232, -0.29575172, 1.500000, -1.064030), vec4(0.10459357, 0.08372527, 1.110000, -2.730563), vec4(-0.68286800, 0.04963045, 1.090000, -0.498827), + vec4(-0.13570161, -0.64190155, 1.250000, -0.532765), vec4(-0.26193795, -0.08205118, 0.670000, -1.783245), vec4(-0.61177456, 0.66664219, 0.710000, -0.044234), vec4(0.43675563, 0.25119025, 0.610000, -1.167283), + vec4(0.07884444, 0.86618668, 0.640000, -0.459002), vec4(-0.12790935, -0.29869005, 0.600000, -1.729424), vec4(-0.04031125, 0.02413622, 0.600000, -4.792042), vec4(0.16201244, -0.52851415, 0.790000, -1.067055), + vec4(-0.70991218, 0.47301072, 0.640000, -0.335236), vec4(0.03277707, -0.22349690, 0.600000, -1.982384), vec4(0.68921727, 0.36800742, 0.630000, -0.266718), vec4(0.29251814, 0.37775412, 0.610000, -1.422520), + vec4(-0.12224089, 0.96582592, 0.600000, -0.426142), vec4(0.11071457, -0.16131058, 0.600000, -2.165947), vec4(0.46562141, -0.59747696, 0.600000, -0.189760), vec4(-0.51548797, 0.11804193, 0.600000, -1.246800), + vec4(0.89141309, -0.42090443, 0.600000, 0.028192), vec4(-0.32402530, -0.01591529, 0.600000, -1.543018), vec4(0.60771245, 0.41635221, 0.600000, -0.605411), vec4(0.02379565, -0.08239821, 0.600000, -3.809046), + vec4(0.48951152, -0.23657045, 0.600000, -1.189011), vec4(-0.17611565, -0.81696892, 0.600000, -0.513724), vec4(-0.33930185, -0.20732205, 0.600000, -1.698047), vec4(-0.91974425, 0.05403209, 0.600000, 0.062246), + vec4(-0.15064627, -0.14949332, 0.600000, -1.896062), vec4(0.53180975, -0.35210401, 0.600000, -0.758838), vec4(0.41487166, 0.81442589, 0.600000, -0.505648), vec4(-0.24106961, -0.32721516, 0.600000, -1.665244) +}; + +// these values can be changed (up to SSAO_MAX_TAPS) with no changes required elsewhere; values for 4th and 5th preset are ignored but array needed to avoid compilation errors +// the actual number of texture samples is two times this value (each "tap" has two symmetrical depth texture samples) +const int num_taps[5] = { 3, 5, 12, 0, 0 }; + +#define SSAO_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET (99) // to disable simply set to 99 or similar +#define SSAO_TILT_SAMPLES_AMOUNT (0.4) +// +#define SSAO_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET (1) // to disable simply set to 99 or similar +#define SSAO_HALOING_REDUCTION_AMOUNT (0.6) // values from 0.0 - 1.0, 1.0 means max weighting (will cause artifacts, 0.8 is more reasonable) +// +#define SSAO_NORMAL_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (2) // to disable simply set to 99 or similar +#define SSAO_NORMAL_BASED_EDGES_DOT_THRESHOLD (0.5) // use 0-0.1 for super-sharp normal-based edges +// +#define SSAO_DETAIL_AO_ENABLE_AT_QUALITY_PRESET (1) // whether to use detail; to disable simply set to 99 or similar +// +#define SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET (2) // !!warning!! the MIP generation on the C++ side will be enabled on quality preset 2 regardless of this value, so if changing here, change the C++ side too +#define SSAO_DEPTH_MIPS_GLOBAL_OFFSET (-4.3) // best noise/quality/performance tradeoff, found empirically +// +// !!warning!! the edge handling is hard-coded to 'disabled' on quality level 0, and enabled above, on the C++ side; while toggling it here will work for +// testing purposes, it will not yield performance gains (or correct results) +#define SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (1) +// +#define SSAO_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET (1) + +#define SSAO_MAX_TAPS 32 +#define SSAO_ADAPTIVE_TAP_BASE_COUNT 5 +#define SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT (SSAO_MAX_TAPS - SSAO_ADAPTIVE_TAP_BASE_COUNT) +#define SSAO_DEPTH_MIP_LEVELS 4 + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform sampler2DArray source_depth_mipmaps; +layout(rgba8, set = 0, binding = 1) uniform restrict readonly image2D source_normal; +layout(set = 0, binding = 2) uniform Constants { //get into a lower set + vec4 rotation_matrices[20]; +} +constants; + +#ifdef ADAPTIVE +layout(rg8, set = 1, binding = 0) uniform restrict readonly image2DArray source_ssao; +layout(set = 1, binding = 1) uniform sampler2D source_importance; +layout(set = 1, binding = 2, std430) buffer Counter { + uint sum; +} +counter; +#endif + +layout(rg8, set = 2, binding = 0) uniform restrict writeonly image2D dest_image; + +// This push_constant is full - 128 bytes - if you need to add more data, consider adding to the uniform buffer instead +layout(push_constant, std430) uniform Params { + ivec2 screen_size; + int pass; + int quality; + + vec2 half_screen_pixel_size; + int size_multiplier; + float detail_intensity; + + vec2 NDC_to_view_mul; + vec2 NDC_to_view_add; + + vec2 pad2; + vec2 half_screen_pixel_size_x025; + + float radius; + float intensity; + float shadow_power; + float shadow_clamp; + + float fade_out_mul; + float fade_out_add; + float horizon_angle_threshold; + float inv_radius_near_limit; + + bool is_orthogonal; + float neg_inv_radius; + float load_counter_avg_div; + float adaptive_sample_limit; + + ivec2 pass_coord_offset; + vec2 pass_uv_offset; +} +params; + +// packing/unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions! +float pack_edges(vec4 p_edgesLRTB) { + p_edgesLRTB = round(clamp(p_edgesLRTB, 0.0, 1.0) * 3.05); + return dot(p_edgesLRTB, vec4(64.0 / 255.0, 16.0 / 255.0, 4.0 / 255.0, 1.0 / 255.0)); +} + +vec3 NDC_to_view_space(vec2 p_pos, float p_viewspace_depth) { + if (params.is_orthogonal) { + return vec3((params.NDC_to_view_mul * p_pos.xy + params.NDC_to_view_add), p_viewspace_depth); + } else { + return vec3((params.NDC_to_view_mul * p_pos.xy + params.NDC_to_view_add) * p_viewspace_depth, p_viewspace_depth); + } +} + +// calculate effect radius and fit our screen sampling pattern inside it +void calculate_radius_parameters(const float p_pix_center_length, const vec2 p_pixel_size_at_center, out float r_lookup_radius, out float r_radius, out float r_fallof_sq) { + r_radius = params.radius; + + // when too close, on-screen sampling disk will grow beyond screen size; limit this to avoid closeup temporal artifacts + const float too_close_limit = clamp(p_pix_center_length * params.inv_radius_near_limit, 0.0, 1.0) * 0.8 + 0.2; + + r_radius *= too_close_limit; + + // 0.85 is to reduce the radius to allow for more samples on a slope to still stay within influence + r_lookup_radius = (0.85 * r_radius) / p_pixel_size_at_center.x; + + // used to calculate falloff (both for AO samples and per-sample weights) + r_fallof_sq = -1.0 / (r_radius * r_radius); +} + +vec4 calculate_edges(const float p_center_z, const float p_left_z, const float p_right_z, const float p_top_z, const float p_bottom_z) { + // slope-sensitive depth-based edge detection + vec4 edgesLRTB = vec4(p_left_z, p_right_z, p_top_z, p_bottom_z) - p_center_z; + vec4 edgesLRTB_slope_adjusted = edgesLRTB + edgesLRTB.yxwz; + edgesLRTB = min(abs(edgesLRTB), abs(edgesLRTB_slope_adjusted)); + return clamp((1.3 - edgesLRTB / (p_center_z * 0.040)), 0.0, 1.0); +} + +vec3 decode_normal(vec3 p_encoded_normal) { + vec3 normal = p_encoded_normal * 2.0 - 1.0; + return normal; +} + +vec3 load_normal(ivec2 p_pos) { + vec3 encoded_normal = imageLoad(source_normal, p_pos).xyz; + encoded_normal.z = 1.0 - encoded_normal.z; + return decode_normal(encoded_normal); +} + +vec3 load_normal(ivec2 p_pos, ivec2 p_offset) { + vec3 encoded_normal = imageLoad(source_normal, p_pos + p_offset).xyz; + encoded_normal.z = 1.0 - encoded_normal.z; + return decode_normal(encoded_normal); +} + +// all vectors in viewspace +float calculate_pixel_obscurance(vec3 p_pixel_normal, vec3 p_hit_delta, float p_fallof_sq) { + float length_sq = dot(p_hit_delta, p_hit_delta); + float NdotD = dot(p_pixel_normal, p_hit_delta) / sqrt(length_sq); + + float falloff_mult = max(0.0, length_sq * p_fallof_sq + 1.0); + + return max(0, NdotD - params.horizon_angle_threshold) * falloff_mult; +} + +void SSAO_tap_inner(const int p_quality_level, inout float r_obscurance_sum, inout float r_weight_sum, const vec2 p_sampling_uv, const float p_mip_level, const vec3 p_pix_center_pos, vec3 p_pixel_normal, const float p_fallof_sq, const float p_weight_mod) { + // get depth at sample + float viewspace_sample_z = textureLod(source_depth_mipmaps, vec3(p_sampling_uv, params.pass), p_mip_level).x; + + // convert to viewspace + vec3 hit_pos = NDC_to_view_space(p_sampling_uv.xy, viewspace_sample_z).xyz; + vec3 hit_delta = hit_pos - p_pix_center_pos; + + float obscurance = calculate_pixel_obscurance(p_pixel_normal, hit_delta, p_fallof_sq); + float weight = 1.0; + + if (p_quality_level >= SSAO_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET) { + float reduct = max(0, -hit_delta.z); + reduct = clamp(reduct * params.neg_inv_radius + 2.0, 0.0, 1.0); + weight = SSAO_HALOING_REDUCTION_AMOUNT * reduct + (1.0 - SSAO_HALOING_REDUCTION_AMOUNT); + } + weight *= p_weight_mod; + r_obscurance_sum += obscurance * weight; + r_weight_sum += weight; +} + +void SSAOTap(const int p_quality_level, inout float r_obscurance_sum, inout float r_weight_sum, const int p_tap_index, const mat2 p_rot_scale, const vec3 p_pix_center_pos, vec3 p_pixel_normal, const vec2 p_normalized_screen_pos, const float p_mip_offset, const float p_fallof_sq, float p_weight_mod, vec2 p_norm_xy, float p_norm_xy_length) { + vec2 sample_offset; + float sample_pow_2_len; + + // patterns + { + vec4 new_sample = sample_pattern[p_tap_index]; + sample_offset = new_sample.xy * p_rot_scale; + sample_pow_2_len = new_sample.w; // precalculated, same as: sample_pow_2_len = log2( length( new_sample.xy ) ); + p_weight_mod *= new_sample.z; + } + + // snap to pixel center (more correct obscurance math, avoids artifacts) + sample_offset = round(sample_offset); + + // calculate MIP based on the sample distance from the centre, similar to as described + // in http://graphics.cs.williams.edu/papers/SAOHPG12/. + float mip_level = (p_quality_level < SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (sample_pow_2_len + p_mip_offset); + + vec2 sampling_uv = sample_offset * params.half_screen_pixel_size + p_normalized_screen_pos; + + SSAO_tap_inner(p_quality_level, r_obscurance_sum, r_weight_sum, sampling_uv, mip_level, p_pix_center_pos, p_pixel_normal, p_fallof_sq, p_weight_mod); + + // for the second tap, just use the mirrored offset + vec2 sample_offset_mirrored_uv = -sample_offset; + + // tilt the second set of samples so that the disk is effectively rotated by the normal + // effective at removing one set of artifacts, but too expensive for lower quality settings + if (p_quality_level >= SSAO_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET) { + float dot_norm = dot(sample_offset_mirrored_uv, p_norm_xy); + sample_offset_mirrored_uv -= dot_norm * p_norm_xy_length * p_norm_xy; + sample_offset_mirrored_uv = round(sample_offset_mirrored_uv); + } + + // snap to pixel center (more correct obscurance math, avoids artifacts) + vec2 sampling_mirrored_uv = sample_offset_mirrored_uv * params.half_screen_pixel_size + p_normalized_screen_pos; + + SSAO_tap_inner(p_quality_level, r_obscurance_sum, r_weight_sum, sampling_mirrored_uv, mip_level, p_pix_center_pos, p_pixel_normal, p_fallof_sq, p_weight_mod); +} + +void generate_SSAO_shadows_internal(out float r_shadow_term, out vec4 r_edges, out float r_weight, const vec2 p_pos, int p_quality_level, bool p_adaptive_base) { + vec2 pos_rounded = trunc(p_pos); + uvec2 upos = uvec2(pos_rounded); + + const int number_of_taps = (p_adaptive_base) ? (SSAO_ADAPTIVE_TAP_BASE_COUNT) : (num_taps[p_quality_level]); + float pix_z, pix_left_z, pix_top_z, pix_right_z, pix_bottom_z; + + vec4 valuesUL = textureGather(source_depth_mipmaps, vec3(pos_rounded * params.half_screen_pixel_size, params.pass)); + vec4 valuesBR = textureGather(source_depth_mipmaps, vec3((pos_rounded + vec2(1.0)) * params.half_screen_pixel_size, params.pass)); + + // get this pixel's viewspace depth + pix_z = valuesUL.y; + + // get left right top bottom neighbouring pixels for edge detection (gets compiled out on quality_level == 0) + pix_left_z = valuesUL.x; + pix_top_z = valuesUL.z; + pix_right_z = valuesBR.z; + pix_bottom_z = valuesBR.x; + + vec2 normalized_screen_pos = pos_rounded * params.half_screen_pixel_size + params.half_screen_pixel_size_x025; + vec3 pix_center_pos = NDC_to_view_space(normalized_screen_pos, pix_z); + + // Load this pixel's viewspace normal + uvec2 full_res_coord = upos * 2 * params.size_multiplier + params.pass_coord_offset.xy; + vec3 pixel_normal = load_normal(ivec2(full_res_coord)); + + const vec2 pixel_size_at_center = NDC_to_view_space(normalized_screen_pos.xy + params.half_screen_pixel_size, pix_center_pos.z).xy - pix_center_pos.xy; + + float pixel_lookup_radius; + float fallof_sq; + + // calculate effect radius and fit our screen sampling pattern inside it + float viewspace_radius; + calculate_radius_parameters(length(pix_center_pos), pixel_size_at_center, pixel_lookup_radius, viewspace_radius, fallof_sq); + + // calculate samples rotation/scaling + mat2 rot_scale_matrix; + uint pseudo_random_index; + + { + vec4 rotation_scale; + // reduce effect radius near the screen edges slightly; ideally, one would render a larger depth buffer (5% on each side) instead + if (!p_adaptive_base && (p_quality_level >= SSAO_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET)) { + float near_screen_border = min(min(normalized_screen_pos.x, 1.0 - normalized_screen_pos.x), min(normalized_screen_pos.y, 1.0 - normalized_screen_pos.y)); + near_screen_border = clamp(10.0 * near_screen_border + 0.6, 0.0, 1.0); + pixel_lookup_radius *= near_screen_border; + } + + // load & update pseudo-random rotation matrix + pseudo_random_index = uint(pos_rounded.y * 2 + pos_rounded.x) % 5; + rotation_scale = constants.rotation_matrices[params.pass * 5 + pseudo_random_index]; + rot_scale_matrix = mat2(rotation_scale.x * pixel_lookup_radius, rotation_scale.y * pixel_lookup_radius, rotation_scale.z * pixel_lookup_radius, rotation_scale.w * pixel_lookup_radius); + } + + // the main obscurance & sample weight storage + float obscurance_sum = 0.0; + float weight_sum = 0.0; + + // edge mask for between this and left/right/top/bottom neighbour pixels - not used in quality level 0 so initialize to "no edge" (1 is no edge, 0 is edge) + vec4 edgesLRTB = vec4(1.0, 1.0, 1.0, 1.0); + + // Move center pixel slightly towards camera to avoid imprecision artifacts due to using of 16bit depth buffer; a lot smaller offsets needed when using 32bit floats + pix_center_pos *= 0.9992; + + if (!p_adaptive_base && (p_quality_level >= SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) { + edgesLRTB = calculate_edges(pix_z, pix_left_z, pix_right_z, pix_top_z, pix_bottom_z); + } + + // adds a more high definition sharp effect, which gets blurred out (reuses left/right/top/bottom samples that we used for edge detection) + if (!p_adaptive_base && (p_quality_level >= SSAO_DETAIL_AO_ENABLE_AT_QUALITY_PRESET)) { + // disable in case of quality level 4 (reference) + if (p_quality_level != 4) { + //approximate neighbouring pixels positions (actually just deltas or "positions - pix_center_pos" ) + vec3 normalized_viewspace_dir = vec3(pix_center_pos.xy / pix_center_pos.zz, 1.0); + vec3 pixel_left_delta = vec3(-pixel_size_at_center.x, 0.0, 0.0) + normalized_viewspace_dir * (pix_left_z - pix_center_pos.z); + vec3 pixel_right_delta = vec3(+pixel_size_at_center.x, 0.0, 0.0) + normalized_viewspace_dir * (pix_right_z - pix_center_pos.z); + vec3 pixel_top_delta = vec3(0.0, -pixel_size_at_center.y, 0.0) + normalized_viewspace_dir * (pix_top_z - pix_center_pos.z); + vec3 pixel_bottom_delta = vec3(0.0, +pixel_size_at_center.y, 0.0) + normalized_viewspace_dir * (pix_bottom_z - pix_center_pos.z); + + const float range_reduction = 4.0f; // this is to avoid various artifacts + const float modified_fallof_sq = range_reduction * fallof_sq; + + vec4 additional_obscurance; + additional_obscurance.x = calculate_pixel_obscurance(pixel_normal, pixel_left_delta, modified_fallof_sq); + additional_obscurance.y = calculate_pixel_obscurance(pixel_normal, pixel_right_delta, modified_fallof_sq); + additional_obscurance.z = calculate_pixel_obscurance(pixel_normal, pixel_top_delta, modified_fallof_sq); + additional_obscurance.w = calculate_pixel_obscurance(pixel_normal, pixel_bottom_delta, modified_fallof_sq); + + obscurance_sum += params.detail_intensity * dot(additional_obscurance, edgesLRTB); + } + } + + // Sharp normals also create edges - but this adds to the cost as well + if (!p_adaptive_base && (p_quality_level >= SSAO_NORMAL_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) { + vec3 neighbour_normal_left = load_normal(ivec2(full_res_coord), ivec2(-2, 0)); + vec3 neighbour_normal_right = load_normal(ivec2(full_res_coord), ivec2(2, 0)); + vec3 neighbour_normal_top = load_normal(ivec2(full_res_coord), ivec2(0, -2)); + vec3 neighbour_normal_bottom = load_normal(ivec2(full_res_coord), ivec2(0, 2)); + + const float dot_threshold = SSAO_NORMAL_BASED_EDGES_DOT_THRESHOLD; + + vec4 normal_edgesLRTB; + normal_edgesLRTB.x = clamp((dot(pixel_normal, neighbour_normal_left) + dot_threshold), 0.0, 1.0); + normal_edgesLRTB.y = clamp((dot(pixel_normal, neighbour_normal_right) + dot_threshold), 0.0, 1.0); + normal_edgesLRTB.z = clamp((dot(pixel_normal, neighbour_normal_top) + dot_threshold), 0.0, 1.0); + normal_edgesLRTB.w = clamp((dot(pixel_normal, neighbour_normal_bottom) + dot_threshold), 0.0, 1.0); + + edgesLRTB *= normal_edgesLRTB; + } + + const float global_mip_offset = SSAO_DEPTH_MIPS_GLOBAL_OFFSET; + float mip_offset = (p_quality_level < SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (log2(pixel_lookup_radius) + global_mip_offset); + + // Used to tilt the second set of samples so that the disk is effectively rotated by the normal + // effective at removing one set of artifacts, but too expensive for lower quality settings + vec2 norm_xy = vec2(pixel_normal.x, pixel_normal.y); + float norm_xy_length = length(norm_xy); + norm_xy /= vec2(norm_xy_length, -norm_xy_length); + norm_xy_length *= SSAO_TILT_SAMPLES_AMOUNT; + + // standard, non-adaptive approach + if ((p_quality_level != 3) || p_adaptive_base) { + for (int i = 0; i < number_of_taps; i++) { + SSAOTap(p_quality_level, obscurance_sum, weight_sum, i, rot_scale_matrix, pix_center_pos, pixel_normal, normalized_screen_pos, mip_offset, fallof_sq, 1.0, norm_xy, norm_xy_length); + } + } +#ifdef ADAPTIVE + else { + // add new ones if needed + vec2 full_res_uv = normalized_screen_pos + params.pass_uv_offset.xy; + float importance = textureLod(source_importance, full_res_uv, 0.0).x; + + // this is to normalize SSAO_DETAIL_AO_AMOUNT across all pixel regardless of importance + obscurance_sum *= (SSAO_ADAPTIVE_TAP_BASE_COUNT / float(SSAO_MAX_TAPS)) + (importance * SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT / float(SSAO_MAX_TAPS)); + + // load existing base values + vec2 base_values = imageLoad(source_ssao, ivec3(upos, params.pass)).xy; + weight_sum += base_values.y * float(SSAO_ADAPTIVE_TAP_BASE_COUNT * 4.0); + obscurance_sum += (base_values.x) * weight_sum; + + // increase importance around edges + float edge_count = dot(1.0 - edgesLRTB, vec4(1.0, 1.0, 1.0, 1.0)); + + float avg_total_importance = float(counter.sum) * params.load_counter_avg_div; + + float importance_limiter = clamp(params.adaptive_sample_limit / avg_total_importance, 0.0, 1.0); + importance *= importance_limiter; + + float additional_sample_count = SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT * importance; + + const float blend_range = 3.0; + const float blend_range_inv = 1.0 / blend_range; + + additional_sample_count += 0.5; + uint additional_samples = uint(additional_sample_count); + uint additional_samples_to = min(SSAO_MAX_TAPS, additional_samples + SSAO_ADAPTIVE_TAP_BASE_COUNT); + + for (uint i = SSAO_ADAPTIVE_TAP_BASE_COUNT; i < additional_samples_to; i++) { + additional_sample_count -= 1.0f; + float weight_mod = clamp(additional_sample_count * blend_range_inv, 0.0, 1.0); + SSAOTap(p_quality_level, obscurance_sum, weight_sum, int(i), rot_scale_matrix, pix_center_pos, pixel_normal, normalized_screen_pos, mip_offset, fallof_sq, weight_mod, norm_xy, norm_xy_length); + } + } +#endif + + // early out for adaptive base - just output weight (used for the next pass) + if (p_adaptive_base) { + float obscurance = obscurance_sum / weight_sum; + + r_shadow_term = obscurance; + r_edges = vec4(0.0); + r_weight = weight_sum; + return; + } + + // calculate weighted average + float obscurance = obscurance_sum / weight_sum; + + // calculate fadeout (1 close, gradient, 0 far) + float fade_out = clamp(pix_center_pos.z * params.fade_out_mul + params.fade_out_add, 0.0, 1.0); + + // Reduce the SSAO shadowing if we're on the edge to remove artifacts on edges (we don't care for the lower quality one) + if (!p_adaptive_base && (p_quality_level >= SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) { + // when there's more than 2 opposite edges, start fading out the occlusion to reduce aliasing artifacts + float edge_fadeout_factor = clamp((1.0 - edgesLRTB.x - edgesLRTB.y) * 0.35, 0.0, 1.0) + clamp((1.0 - edgesLRTB.z - edgesLRTB.w) * 0.35, 0.0, 1.0); + + fade_out *= clamp(1.0 - edge_fadeout_factor, 0.0, 1.0); + } + + // strength + obscurance = params.intensity * obscurance; + + // clamp + obscurance = min(obscurance, params.shadow_clamp); + + // fadeout + obscurance *= fade_out; + + // conceptually switch to occlusion with the meaning being visibility (grows with visibility, occlusion == 1 implies full visibility), + // to be in line with what is more commonly used. + float occlusion = 1.0 - obscurance; + + // modify the gradient + // note: this cannot be moved to a later pass because of loss of precision after storing in the render target + occlusion = pow(clamp(occlusion, 0.0, 1.0), params.shadow_power); + + // outputs! + r_shadow_term = occlusion; // Our final 'occlusion' term (0 means fully occluded, 1 means fully lit) + r_edges = edgesLRTB; // These are used to prevent blurring across edges, 1 means no edge, 0 means edge, 0.5 means half way there, etc. + r_weight = weight_sum; +} + +void main() { + float out_shadow_term; + float out_weight; + vec4 out_edges; + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing + return; + } + + vec2 uv = vec2(gl_GlobalInvocationID) + vec2(0.5); +#ifdef SSAO_BASE + generate_SSAO_shadows_internal(out_shadow_term, out_edges, out_weight, uv, params.quality, true); + + imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), vec4(out_shadow_term, out_weight / (float(SSAO_ADAPTIVE_TAP_BASE_COUNT) * 4.0), 0.0, 0.0)); +#else + generate_SSAO_shadows_internal(out_shadow_term, out_edges, out_weight, uv, params.quality, false); // pass in quality levels + if (params.quality == 0) { + out_edges = vec4(1.0); + } + + imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), vec4(out_shadow_term, pack_edges(out_edges), 0.0, 0.0)); +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/effects/ssao_blur.glsl b/servers/rendering/renderer_rd/shaders/effects/ssao_blur.glsl new file mode 100644 index 0000000000..f42734c46d --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ssao_blur.glsl @@ -0,0 +1,154 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[compute] + +#version 450 + +#VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform sampler2D source_ssao; + +layout(rg8, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; + +layout(push_constant, std430) uniform Params { + float edge_sharpness; + float pad; + vec2 half_screen_pixel_size; +} +params; + +vec4 unpack_edges(float p_packed_val) { + uint packed_val = uint(p_packed_val * 255.5); + vec4 edgesLRTB; + edgesLRTB.x = float((packed_val >> 6) & 0x03) / 3.0; + edgesLRTB.y = float((packed_val >> 4) & 0x03) / 3.0; + edgesLRTB.z = float((packed_val >> 2) & 0x03) / 3.0; + edgesLRTB.w = float((packed_val >> 0) & 0x03) / 3.0; + + return clamp(edgesLRTB + params.edge_sharpness, 0.0, 1.0); +} + +void add_sample(float p_ssao_value, float p_edge_value, inout float r_sum, inout float r_sum_weight) { + float weight = p_edge_value; + + r_sum += (weight * p_ssao_value); + r_sum_weight += weight; +} + +#ifdef MODE_WIDE +vec2 sample_blurred_wide(vec2 p_coord) { + vec2 vC = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(0, 0)).xy; + vec2 vL = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(-2, 0)).xy; + vec2 vT = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(0, -2)).xy; + vec2 vR = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(2, 0)).xy; + vec2 vB = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(0, 2)).xy; + + float packed_edges = vC.y; + vec4 edgesLRTB = unpack_edges(packed_edges); + edgesLRTB.x *= unpack_edges(vL.y).y; + edgesLRTB.z *= unpack_edges(vT.y).w; + edgesLRTB.y *= unpack_edges(vR.y).x; + edgesLRTB.w *= unpack_edges(vB.y).z; + + float ssao_value = vC.x; + float ssao_valueL = vL.x; + float ssao_valueT = vT.x; + float ssao_valueR = vR.x; + float ssao_valueB = vB.x; + + float sum_weight = 0.8f; + float sum = ssao_value * sum_weight; + + add_sample(ssao_valueL, edgesLRTB.x, sum, sum_weight); + add_sample(ssao_valueR, edgesLRTB.y, sum, sum_weight); + add_sample(ssao_valueT, edgesLRTB.z, sum, sum_weight); + add_sample(ssao_valueB, edgesLRTB.w, sum, sum_weight); + + float ssao_avg = sum / sum_weight; + + ssao_value = ssao_avg; + + return vec2(ssao_value, packed_edges); +} +#endif + +#ifdef MODE_SMART +vec2 sample_blurred(vec3 p_pos, vec2 p_coord) { + float packed_edges = texelFetch(source_ssao, ivec2(p_pos.xy), 0).y; + vec4 edgesLRTB = unpack_edges(packed_edges); + + vec4 valuesUL = textureGather(source_ssao, vec2(p_coord - params.half_screen_pixel_size * 0.5)); + vec4 valuesBR = textureGather(source_ssao, vec2(p_coord + params.half_screen_pixel_size * 0.5)); + + float ssao_value = valuesUL.y; + float ssao_valueL = valuesUL.x; + float ssao_valueT = valuesUL.z; + float ssao_valueR = valuesBR.z; + float ssao_valueB = valuesBR.x; + + float sum_weight = 0.5; + float sum = ssao_value * sum_weight; + + add_sample(ssao_valueL, edgesLRTB.x, sum, sum_weight); + add_sample(ssao_valueR, edgesLRTB.y, sum, sum_weight); + + add_sample(ssao_valueT, edgesLRTB.z, sum, sum_weight); + add_sample(ssao_valueB, edgesLRTB.w, sum, sum_weight); + + float ssao_avg = sum / sum_weight; + + ssao_value = ssao_avg; + + return vec2(ssao_value, packed_edges); +} +#endif + +void main() { + // Pixel being shaded + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + +#ifdef MODE_NON_SMART + + vec2 half_pixel = params.half_screen_pixel_size * 0.5; + + vec2 uv = (vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size; + + vec2 center = textureLod(source_ssao, vec2(uv), 0.0).xy; + + vec4 vals; + vals.x = textureLod(source_ssao, vec2(uv + vec2(-half_pixel.x * 3, -half_pixel.y)), 0.0).x; + vals.y = textureLod(source_ssao, vec2(uv + vec2(+half_pixel.x, -half_pixel.y * 3)), 0.0).x; + vals.z = textureLod(source_ssao, vec2(uv + vec2(-half_pixel.x, +half_pixel.y * 3)), 0.0).x; + vals.w = textureLod(source_ssao, vec2(uv + vec2(+half_pixel.x * 3, +half_pixel.y)), 0.0).x; + + vec2 sampled = vec2(dot(vals, vec4(0.2)) + center.x * 0.2, center.y); + +#else +#ifdef MODE_SMART + vec2 sampled = sample_blurred(vec3(gl_GlobalInvocationID), (vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size); +#else // MODE_WIDE + vec2 sampled = sample_blurred_wide((vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size); +#endif + +#endif + imageStore(dest_image, ivec2(ssC), vec4(sampled, 0.0, 0.0)); +} diff --git a/servers/rendering/renderer_rd/shaders/effects/ssao_importance_map.glsl b/servers/rendering/renderer_rd/shaders/effects/ssao_importance_map.glsl new file mode 100644 index 0000000000..04f98964e8 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ssao_importance_map.glsl @@ -0,0 +1,123 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[compute] + +#version 450 + +#VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#ifdef GENERATE_MAP +layout(set = 0, binding = 0) uniform sampler2DArray source_texture; +#else +layout(set = 0, binding = 0) uniform sampler2D source_importance; +#endif +layout(r8, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; + +#ifdef PROCESS_MAPB +layout(set = 2, binding = 0, std430) buffer Counter { + uint sum; +} +counter; +#endif + +layout(push_constant, std430) uniform Params { + vec2 half_screen_pixel_size; + float intensity; + float power; +} +params; + +void main() { + // Pixel being shaded + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + +#ifdef GENERATE_MAP + // importance map stuff + uvec2 base_position = ssC * 2; + + vec2 base_uv = (vec2(base_position) + vec2(0.5f, 0.5f)) * params.half_screen_pixel_size; + + float minV = 1.0; + float maxV = 0.0; + for (int i = 0; i < 4; i++) { + vec4 vals = textureGather(source_texture, vec3(base_uv, i)); + + // apply the same modifications that would have been applied in the main shader + vals = params.intensity * vals; + + vals = 1 - vals; + + vals = pow(clamp(vals, 0.0, 1.0), vec4(params.power)); + + maxV = max(maxV, max(max(vals.x, vals.y), max(vals.z, vals.w))); + minV = min(minV, min(min(vals.x, vals.y), min(vals.z, vals.w))); + } + + float min_max_diff = maxV - minV; + + imageStore(dest_image, ssC, vec4(pow(clamp(min_max_diff * 2.0, 0.0, 1.0), 0.8))); +#endif + +#ifdef PROCESS_MAPA + vec2 uv = (vec2(ssC) + 0.5f) * params.half_screen_pixel_size * 2.0; + + float centre = textureLod(source_importance, uv, 0.0).x; + + vec2 half_pixel = params.half_screen_pixel_size; + + vec4 vals; + vals.x = textureLod(source_importance, uv + vec2(-half_pixel.x * 3, -half_pixel.y), 0.0).x; + vals.y = textureLod(source_importance, uv + vec2(+half_pixel.x, -half_pixel.y * 3), 0.0).x; + vals.z = textureLod(source_importance, uv + vec2(+half_pixel.x * 3, +half_pixel.y), 0.0).x; + vals.w = textureLod(source_importance, uv + vec2(-half_pixel.x, +half_pixel.y * 3), 0.0).x; + + float avg = dot(vals, vec4(0.25, 0.25, 0.25, 0.25)); + + imageStore(dest_image, ssC, vec4(avg)); +#endif + +#ifdef PROCESS_MAPB + vec2 uv = (vec2(ssC) + 0.5f) * params.half_screen_pixel_size * 2.0; + + float centre = textureLod(source_importance, uv, 0.0).x; + + vec2 half_pixel = params.half_screen_pixel_size; + + vec4 vals; + vals.x = textureLod(source_importance, uv + vec2(-half_pixel.x, -half_pixel.y * 3), 0.0).x; + vals.y = textureLod(source_importance, uv + vec2(+half_pixel.x * 3, -half_pixel.y), 0.0).x; + vals.z = textureLod(source_importance, uv + vec2(+half_pixel.x, +half_pixel.y * 3), 0.0).x; + vals.w = textureLod(source_importance, uv + vec2(-half_pixel.x * 3, +half_pixel.y), 0.0).x; + + float avg = dot(vals, vec4(0.25, 0.25, 0.25, 0.25)); + + imageStore(dest_image, ssC, vec4(avg)); + + // sum the average; to avoid overflowing we assume max AO resolution is not bigger than 16384x16384; so quarter res (used here) will be 4096x4096, which leaves us with 8 bits per pixel + uint sum = uint(clamp(avg, 0.0, 1.0) * 255.0 + 0.5); + + // save every 9th to avoid InterlockedAdd congestion - since we're blurring, this is good enough; compensated by multiplying load_counter_avg_div by 9 + if (((ssC.x % 3) + (ssC.y % 3)) == 0) { + atomicAdd(counter.sum, sum); + } +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/effects/ssao_interleave.glsl b/servers/rendering/renderer_rd/shaders/effects/ssao_interleave.glsl new file mode 100644 index 0000000000..f6a9a92fac --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ssao_interleave.glsl @@ -0,0 +1,119 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#[compute] + +#version 450 + +#VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(rgba8, set = 0, binding = 0) uniform restrict writeonly image2D dest_image; +layout(set = 1, binding = 0) uniform sampler2DArray source_texture; + +layout(push_constant, std430) uniform Params { + float inv_sharpness; + uint size_modifier; + vec2 pixel_size; +} +params; + +vec4 unpack_edges(float p_packed_val) { + uint packed_val = uint(p_packed_val * 255.5); + vec4 edgesLRTB; + edgesLRTB.x = float((packed_val >> 6) & 0x03) / 3.0; + edgesLRTB.y = float((packed_val >> 4) & 0x03) / 3.0; + edgesLRTB.z = float((packed_val >> 2) & 0x03) / 3.0; + edgesLRTB.w = float((packed_val >> 0) & 0x03) / 3.0; + + return clamp(edgesLRTB + params.inv_sharpness, 0.0, 1.0); +} + +void main() { + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(ssC, ivec2(1.0 / params.pixel_size)))) { //too large, do nothing + return; + } + +#ifdef MODE_SMART + float ao; + uvec2 pix_pos = uvec2(gl_GlobalInvocationID.xy); + vec2 uv = (gl_GlobalInvocationID.xy + vec2(0.5)) * params.pixel_size; + + // calculate index in the four deinterleaved source array texture + int mx = int(pix_pos.x % 2); + int my = int(pix_pos.y % 2); + int index_center = mx + my * 2; // center index + int index_horizontal = (1 - mx) + my * 2; // neighbouring, horizontal + int index_vertical = mx + (1 - my) * 2; // neighbouring, vertical + int index_diagonal = (1 - mx) + (1 - my) * 2; // diagonal + + vec2 center_val = texelFetch(source_texture, ivec3(pix_pos / uvec2(params.size_modifier), index_center), 0).xy; + + ao = center_val.x; + + vec4 edgesLRTB = unpack_edges(center_val.y); + + // convert index shifts to sampling offsets + float fmx = float(mx); + float fmy = float(my); + + // in case of an edge, push sampling offsets away from the edge (towards pixel center) + float fmxe = (edgesLRTB.y - edgesLRTB.x); + float fmye = (edgesLRTB.w - edgesLRTB.z); + + // calculate final sampling offsets and sample using bilinear filter + vec2 uv_horizontal = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(fmx + fmxe - 0.5, 0.5 - fmy)) * params.pixel_size; + float ao_horizontal = textureLod(source_texture, vec3(uv_horizontal, index_horizontal), 0.0).x; + vec2 uv_vertical = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(0.5 - fmx, fmy - 0.5 + fmye)) * params.pixel_size; + float ao_vertical = textureLod(source_texture, vec3(uv_vertical, index_vertical), 0.0).x; + vec2 uv_diagonal = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(fmx - 0.5 + fmxe, fmy - 0.5 + fmye)) * params.pixel_size; + float ao_diagonal = textureLod(source_texture, vec3(uv_diagonal, index_diagonal), 0.0).x; + + // reduce weight for samples near edge - if the edge is on both sides, weight goes to 0 + vec4 blendWeights; + blendWeights.x = 1.0; + blendWeights.y = (edgesLRTB.x + edgesLRTB.y) * 0.5; + blendWeights.z = (edgesLRTB.z + edgesLRTB.w) * 0.5; + blendWeights.w = (blendWeights.y + blendWeights.z) * 0.5; + + // calculate weighted average + float blendWeightsSum = dot(blendWeights, vec4(1.0, 1.0, 1.0, 1.0)); + ao = dot(vec4(ao, ao_horizontal, ao_vertical, ao_diagonal), blendWeights) / blendWeightsSum; + + imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), vec4(ao)); +#else // !MODE_SMART + + vec2 uv = (gl_GlobalInvocationID.xy + vec2(0.5)) * params.pixel_size; +#ifdef MODE_HALF + float a = textureLod(source_texture, vec3(uv, 0), 0.0).x; + float d = textureLod(source_texture, vec3(uv, 3), 0.0).x; + float avg = (a + d) * 0.5; + +#else + float a = textureLod(source_texture, vec3(uv, 0), 0.0).x; + float b = textureLod(source_texture, vec3(uv, 1), 0.0).x; + float c = textureLod(source_texture, vec3(uv, 2), 0.0).x; + float d = textureLod(source_texture, vec3(uv, 3), 0.0).x; + float avg = (a + b + c + d) * 0.25; + +#endif + imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), vec4(avg)); +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/effects/ssil.glsl b/servers/rendering/renderer_rd/shaders/effects/ssil.glsl new file mode 100644 index 0000000000..513791dfbf --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ssil.glsl @@ -0,0 +1,444 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +// 2021-05-27: clayjohn: convert SSAO to SSIL +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[compute] + +#version 450 + +#VERSION_DEFINES + +#define SSIL_MAIN_DISK_SAMPLE_COUNT (32) +const vec4 sample_pattern[SSIL_MAIN_DISK_SAMPLE_COUNT] = { + vec4(0.78488064, 0.56661671, 1.500000, -0.126083), vec4(0.26022232, -0.29575172, 1.500000, -1.064030), vec4(0.10459357, 0.08372527, 1.110000, -2.730563), vec4(-0.68286800, 0.04963045, 1.090000, -0.498827), + vec4(-0.13570161, -0.64190155, 1.250000, -0.532765), vec4(-0.26193795, -0.08205118, 0.670000, -1.783245), vec4(-0.61177456, 0.66664219, 0.710000, -0.044234), vec4(0.43675563, 0.25119025, 0.610000, -1.167283), + vec4(0.07884444, 0.86618668, 0.640000, -0.459002), vec4(-0.12790935, -0.29869005, 0.600000, -1.729424), vec4(-0.04031125, 0.02413622, 0.600000, -4.792042), vec4(0.16201244, -0.52851415, 0.790000, -1.067055), + vec4(-0.70991218, 0.47301072, 0.640000, -0.335236), vec4(0.03277707, -0.22349690, 0.600000, -1.982384), vec4(0.68921727, 0.36800742, 0.630000, -0.266718), vec4(0.29251814, 0.37775412, 0.610000, -1.422520), + vec4(-0.12224089, 0.96582592, 0.600000, -0.426142), vec4(0.11071457, -0.16131058, 0.600000, -2.165947), vec4(0.46562141, -0.59747696, 0.600000, -0.189760), vec4(-0.51548797, 0.11804193, 0.600000, -1.246800), + vec4(0.89141309, -0.42090443, 0.600000, 0.028192), vec4(-0.32402530, -0.01591529, 0.600000, -1.543018), vec4(0.60771245, 0.41635221, 0.600000, -0.605411), vec4(0.02379565, -0.08239821, 0.600000, -3.809046), + vec4(0.48951152, -0.23657045, 0.600000, -1.189011), vec4(-0.17611565, -0.81696892, 0.600000, -0.513724), vec4(-0.33930185, -0.20732205, 0.600000, -1.698047), vec4(-0.91974425, 0.05403209, 0.600000, 0.062246), + vec4(-0.15064627, -0.14949332, 0.600000, -1.896062), vec4(0.53180975, -0.35210401, 0.600000, -0.758838), vec4(0.41487166, 0.81442589, 0.600000, -0.505648), vec4(-0.24106961, -0.32721516, 0.600000, -1.665244) +}; + +// these values can be changed (up to SSIL_MAX_TAPS) with no changes required elsewhere; values for 4th and 5th preset are ignored but array needed to avoid compilation errors +// the actual number of texture samples is two times this value (each "tap" has two symmetrical depth texture samples) +const int num_taps[5] = { 3, 5, 12, 0, 0 }; + +#define SSIL_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET (99) // to disable simply set to 99 or similar +#define SSIL_TILT_SAMPLES_AMOUNT (0.4) +// +#define SSIL_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET (1) // to disable simply set to 99 or similar +#define SSIL_HALOING_REDUCTION_AMOUNT (0.8) // values from 0.0 - 1.0, 1.0 means max weighting (will cause artifacts, 0.8 is more reasonable) +// +#define SSIL_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET (2) +#define SSIL_DEPTH_MIPS_GLOBAL_OFFSET (-4.3) // best noise/quality/performance tradeoff, found empirically +// +// !!warning!! the edge handling is hard-coded to 'disabled' on quality level 0, and enabled above, on the C++ side; while toggling it here will work for +// testing purposes, it will not yield performance gains (or correct results) +#define SSIL_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (1) +// +#define SSIL_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET (1) + +#define SSIL_MAX_TAPS 32 +#define SSIL_ADAPTIVE_TAP_BASE_COUNT 5 +#define SSIL_ADAPTIVE_TAP_FLEXIBLE_COUNT (SSIL_MAX_TAPS - SSIL_ADAPTIVE_TAP_BASE_COUNT) +#define SSIL_DEPTH_MIP_LEVELS 4 + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform sampler2DArray source_depth_mipmaps; +layout(rgba8, set = 0, binding = 1) uniform restrict readonly image2D source_normal; +layout(set = 0, binding = 2) uniform Constants { //get into a lower set + vec4 rotation_matrices[20]; +} +constants; + +#ifdef ADAPTIVE +layout(rgba16, set = 1, binding = 0) uniform restrict readonly image2DArray source_ssil; +layout(set = 1, binding = 1) uniform sampler2D source_importance; +layout(set = 1, binding = 2, std430) buffer Counter { + uint sum; +} +counter; +#endif + +layout(rgba16, set = 2, binding = 0) uniform restrict writeonly image2D dest_image; +layout(r8, set = 2, binding = 1) uniform image2D edges_weights_image; + +layout(set = 3, binding = 0) uniform sampler2D last_frame; +layout(set = 3, binding = 1) uniform ProjectionConstants { + mat4 reprojection; +} +projection_constants; + +layout(push_constant, std430) uniform Params { + ivec2 screen_size; + int pass; + int quality; + + vec2 half_screen_pixel_size; + vec2 half_screen_pixel_size_x025; + + vec2 NDC_to_view_mul; + vec2 NDC_to_view_add; + + vec2 pad2; + float z_near; + float z_far; + + float radius; + float intensity; + int size_multiplier; + int pad; + + float fade_out_mul; + float fade_out_add; + float normal_rejection_amount; + float inv_radius_near_limit; + + bool is_orthogonal; + float neg_inv_radius; + float load_counter_avg_div; + float adaptive_sample_limit; + + ivec2 pass_coord_offset; + vec2 pass_uv_offset; +} +params; + +float pack_edges(vec4 p_edgesLRTB) { + p_edgesLRTB = round(clamp(p_edgesLRTB, 0.0, 1.0) * 3.05); + return dot(p_edgesLRTB, vec4(64.0 / 255.0, 16.0 / 255.0, 4.0 / 255.0, 1.0 / 255.0)); +} + +vec3 NDC_to_view_space(vec2 p_pos, float p_viewspace_depth) { + if (params.is_orthogonal) { + return vec3((params.NDC_to_view_mul * p_pos.xy + params.NDC_to_view_add), p_viewspace_depth); + } else { + return vec3((params.NDC_to_view_mul * p_pos.xy + params.NDC_to_view_add) * p_viewspace_depth, p_viewspace_depth); + } +} + +// calculate effect radius and fit our screen sampling pattern inside it +void calculate_radius_parameters(const float p_pix_center_length, const vec2 p_pixel_size_at_center, out float r_lookup_radius, out float r_radius, out float r_fallof_sq) { + r_radius = params.radius; + + // when too close, on-screen sampling disk will grow beyond screen size; limit this to avoid closeup temporal artifacts + const float too_close_limit = clamp(p_pix_center_length * params.inv_radius_near_limit, 0.0, 1.0) * 0.8 + 0.2; + + r_radius *= too_close_limit; + + // 0.85 is to reduce the radius to allow for more samples on a slope to still stay within influence + r_lookup_radius = (0.85 * r_radius) / p_pixel_size_at_center.x; + + // used to calculate falloff (both for AO samples and per-sample weights) + r_fallof_sq = -1.0 / (r_radius * r_radius); +} + +vec4 calculate_edges(const float p_center_z, const float p_left_z, const float p_right_z, const float p_top_z, const float p_bottom_z) { + // slope-sensitive depth-based edge detection + vec4 edgesLRTB = vec4(p_left_z, p_right_z, p_top_z, p_bottom_z) - p_center_z; + vec4 edgesLRTB_slope_adjusted = edgesLRTB + edgesLRTB.yxwz; + edgesLRTB = min(abs(edgesLRTB), abs(edgesLRTB_slope_adjusted)); + return clamp((1.3 - edgesLRTB / (p_center_z * 0.040)), 0.0, 1.0); +} + +vec3 decode_normal(vec3 p_encoded_normal) { + vec3 normal = p_encoded_normal * 2.0 - 1.0; + return normal; +} + +vec3 load_normal(ivec2 p_pos) { + vec3 encoded_normal = imageLoad(source_normal, p_pos).xyz; + encoded_normal.z = 1.0 - encoded_normal.z; + return decode_normal(encoded_normal); +} + +vec3 load_normal(ivec2 p_pos, ivec2 p_offset) { + vec3 encoded_normal = imageLoad(source_normal, p_pos + p_offset).xyz; + encoded_normal.z = 1.0 - encoded_normal.z; + return decode_normal(encoded_normal); +} + +// all vectors in viewspace +float calculate_pixel_obscurance(vec3 p_pixel_normal, vec3 p_hit_delta, float p_fallof_sq) { + float length_sq = dot(p_hit_delta, p_hit_delta); + float NdotD = dot(p_pixel_normal, p_hit_delta) / sqrt(length_sq); + + float falloff_mult = max(0.0, length_sq * p_fallof_sq + 1.0); + + return max(0, NdotD - 0.05) * falloff_mult; +} + +void SSIL_tap_inner(const int p_quality_level, inout vec3 r_color_sum, inout float r_obscurance_sum, inout float r_weight_sum, const vec2 p_sampling_uv, const float p_mip_level, const vec3 p_pix_center_pos, vec3 p_pixel_normal, const float p_fallof_sq, const float p_weight_mod) { + // get depth at sample + float viewspace_sample_z = textureLod(source_depth_mipmaps, vec3(p_sampling_uv, params.pass), p_mip_level).x; + vec3 sample_normal = load_normal(ivec2(p_sampling_uv * vec2(params.screen_size))); + + // convert to viewspace + vec3 hit_pos = NDC_to_view_space(p_sampling_uv.xy, viewspace_sample_z); + vec3 hit_delta = hit_pos - p_pix_center_pos; + + float obscurance = calculate_pixel_obscurance(p_pixel_normal, hit_delta, p_fallof_sq); + float weight = 1.0; + + if (p_quality_level >= SSIL_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET) { + float reduct = max(0, -hit_delta.z); + reduct = clamp(reduct * params.neg_inv_radius + 2.0, 0.0, 1.0); + weight = SSIL_HALOING_REDUCTION_AMOUNT * reduct + (1.0 - SSIL_HALOING_REDUCTION_AMOUNT); + } + + // Translate sampling_uv to last screen's coordinates + const vec4 sample_pos = projection_constants.reprojection * vec4(p_sampling_uv * 2.0 - 1.0, (viewspace_sample_z - params.z_near) / (params.z_far - params.z_near) * 2.0 - 1.0, 1.0); + vec2 reprojected_sampling_uv = (sample_pos.xy / sample_pos.w) * 0.5 + 0.5; + + weight *= p_weight_mod; + + r_obscurance_sum += obscurance * weight; + + vec3 sample_color = textureLod(last_frame, reprojected_sampling_uv, 5.0).rgb; + // Reduce impact of fireflies by tonemapping before averaging: http://graphicrants.blogspot.com/2013/12/tone-mapping.html + sample_color /= (1.0 + dot(sample_color, vec3(0.299, 0.587, 0.114))); + r_color_sum += sample_color * obscurance * weight * mix(1.0, smoothstep(0.0, 0.1, -dot(sample_normal, normalize(hit_delta))), params.normal_rejection_amount); + r_weight_sum += weight; +} + +void SSILTap(const int p_quality_level, inout vec3 r_color_sum, inout float r_obscurance_sum, inout float r_weight_sum, const int p_tap_index, const mat2 p_rot_scale, const vec3 p_pix_center_pos, vec3 p_pixel_normal, const vec2 p_normalized_screen_pos, const float p_mip_offset, const float p_fallof_sq, float p_weight_mod, vec2 p_norm_xy, float p_norm_xy_length) { + vec2 sample_offset; + float sample_pow_2_len; + + // patterns + { + vec4 new_sample = sample_pattern[p_tap_index]; + sample_offset = new_sample.xy * p_rot_scale; + sample_pow_2_len = new_sample.w; // precalculated, same as: sample_pow_2_len = log2( length( new_sample.xy ) ); + p_weight_mod *= new_sample.z; + } + + // snap to pixel center (more correct obscurance math, avoids artifacts) + sample_offset = round(sample_offset); + + // calculate MIP based on the sample distance from the centre, similar to as described + // in http://graphics.cs.williams.edu/papers/SAOHPG12/. + float mip_level = (p_quality_level < SSIL_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (sample_pow_2_len + p_mip_offset); + + vec2 sampling_uv = sample_offset * params.half_screen_pixel_size + p_normalized_screen_pos; + + SSIL_tap_inner(p_quality_level, r_color_sum, r_obscurance_sum, r_weight_sum, sampling_uv, mip_level, p_pix_center_pos, p_pixel_normal, p_fallof_sq, p_weight_mod); + + // for the second tap, just use the mirrored offset + vec2 sample_offset_mirrored_uv = -sample_offset; + + // tilt the second set of samples so that the disk is effectively rotated by the normal + // effective at removing one set of artifacts, but too expensive for lower quality settings + if (p_quality_level >= SSIL_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET) { + float dot_norm = dot(sample_offset_mirrored_uv, p_norm_xy); + sample_offset_mirrored_uv -= dot_norm * p_norm_xy_length * p_norm_xy; + sample_offset_mirrored_uv = round(sample_offset_mirrored_uv); + } + + // snap to pixel center (more correct obscurance math, avoids artifacts) + vec2 sampling_mirrored_uv = sample_offset_mirrored_uv * params.half_screen_pixel_size + p_normalized_screen_pos; + + SSIL_tap_inner(p_quality_level, r_color_sum, r_obscurance_sum, r_weight_sum, sampling_mirrored_uv, mip_level, p_pix_center_pos, p_pixel_normal, p_fallof_sq, p_weight_mod); +} + +void generate_SSIL(out vec3 r_color, out vec4 r_edges, out float r_obscurance, out float r_weight, const vec2 p_pos, int p_quality_level, bool p_adaptive_base) { + vec2 pos_rounded = trunc(p_pos); + uvec2 upos = uvec2(pos_rounded); + + const int number_of_taps = (p_adaptive_base) ? (SSIL_ADAPTIVE_TAP_BASE_COUNT) : (num_taps[p_quality_level]); + float pix_z, pix_left_z, pix_top_z, pix_right_z, pix_bottom_z; + + vec4 valuesUL = textureGather(source_depth_mipmaps, vec3(pos_rounded * params.half_screen_pixel_size, params.pass)); + vec4 valuesBR = textureGather(source_depth_mipmaps, vec3((pos_rounded + vec2(1.0)) * params.half_screen_pixel_size, params.pass)); + + // get this pixel's viewspace depth + pix_z = valuesUL.y; + + // get left right top bottom neighbouring pixels for edge detection (gets compiled out on quality_level == 0) + pix_left_z = valuesUL.x; + pix_top_z = valuesUL.z; + pix_right_z = valuesBR.z; + pix_bottom_z = valuesBR.x; + + vec2 normalized_screen_pos = pos_rounded * params.half_screen_pixel_size + params.half_screen_pixel_size_x025; + vec3 pix_center_pos = NDC_to_view_space(normalized_screen_pos, pix_z); + + // Load this pixel's viewspace normal + uvec2 full_res_coord = upos * 2 * params.size_multiplier + params.pass_coord_offset.xy; + vec3 pixel_normal = load_normal(ivec2(full_res_coord)); + + const vec2 pixel_size_at_center = NDC_to_view_space(normalized_screen_pos.xy + params.half_screen_pixel_size, pix_center_pos.z).xy - pix_center_pos.xy; + + float pixel_lookup_radius; + float fallof_sq; + + // calculate effect radius and fit our screen sampling pattern inside it + float viewspace_radius; + calculate_radius_parameters(length(pix_center_pos), pixel_size_at_center, pixel_lookup_radius, viewspace_radius, fallof_sq); + + // calculate samples rotation/scaling + mat2 rot_scale_matrix; + uint pseudo_random_index; + + { + vec4 rotation_scale; + // reduce effect radius near the screen edges slightly; ideally, one would render a larger depth buffer (5% on each side) instead + if (!p_adaptive_base && (p_quality_level >= SSIL_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET)) { + float near_screen_border = min(min(normalized_screen_pos.x, 1.0 - normalized_screen_pos.x), min(normalized_screen_pos.y, 1.0 - normalized_screen_pos.y)); + near_screen_border = clamp(10.0 * near_screen_border + 0.6, 0.0, 1.0); + pixel_lookup_radius *= near_screen_border; + } + + // load & update pseudo-random rotation matrix + pseudo_random_index = uint(pos_rounded.y * 2 + pos_rounded.x) % 5; + rotation_scale = constants.rotation_matrices[params.pass * 5 + pseudo_random_index]; + rot_scale_matrix = mat2(rotation_scale.x * pixel_lookup_radius, rotation_scale.y * pixel_lookup_radius, rotation_scale.z * pixel_lookup_radius, rotation_scale.w * pixel_lookup_radius); + } + + // the main obscurance & sample weight storage + vec3 color_sum = vec3(0.0); + float obscurance_sum = 0.0; + float weight_sum = 0.0; + + // edge mask for between this and left/right/top/bottom neighbour pixels - not used in quality level 0 so initialize to "no edge" (1 is no edge, 0 is edge) + vec4 edgesLRTB = vec4(1.0, 1.0, 1.0, 1.0); + + // Move center pixel slightly towards camera to avoid imprecision artifacts due to using of 16bit depth buffer; a lot smaller offsets needed when using 32bit floats + pix_center_pos *= 0.9992; + + if (!p_adaptive_base && (p_quality_level >= SSIL_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) { + edgesLRTB = calculate_edges(pix_z, pix_left_z, pix_right_z, pix_top_z, pix_bottom_z); + } + + const float global_mip_offset = SSIL_DEPTH_MIPS_GLOBAL_OFFSET; + float mip_offset = (p_quality_level < SSIL_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (log2(pixel_lookup_radius) + global_mip_offset); + + // Used to tilt the second set of samples so that the disk is effectively rotated by the normal + // effective at removing one set of artifacts, but too expensive for lower quality settings + vec2 norm_xy = vec2(pixel_normal.x, pixel_normal.y); + float norm_xy_length = length(norm_xy); + norm_xy /= vec2(norm_xy_length, -norm_xy_length); + norm_xy_length *= SSIL_TILT_SAMPLES_AMOUNT; + + // standard, non-adaptive approach + if ((p_quality_level != 3) || p_adaptive_base) { + for (int i = 0; i < number_of_taps; i++) { + SSILTap(p_quality_level, color_sum, obscurance_sum, weight_sum, i, rot_scale_matrix, pix_center_pos, pixel_normal, normalized_screen_pos, mip_offset, fallof_sq, 1.0, norm_xy, norm_xy_length); + } + } +#ifdef ADAPTIVE + else { + // add new ones if needed + vec2 full_res_uv = normalized_screen_pos + params.pass_uv_offset.xy; + float importance = textureLod(source_importance, full_res_uv, 0.0).x; + + //Need to store obscurance from base pass + // load existing base values + vec4 base_values = imageLoad(source_ssil, ivec3(upos, params.pass)); + weight_sum += imageLoad(edges_weights_image, ivec2(upos)).r * float(SSIL_ADAPTIVE_TAP_BASE_COUNT * 4.0); + color_sum += (base_values.rgb) * weight_sum; + obscurance_sum += (base_values.a) * weight_sum; + + // increase importance around edges + float edge_count = dot(1.0 - edgesLRTB, vec4(1.0, 1.0, 1.0, 1.0)); + + float avg_total_importance = float(counter.sum) * params.load_counter_avg_div; + + float importance_limiter = clamp(params.adaptive_sample_limit / avg_total_importance, 0.0, 1.0); + importance *= importance_limiter; + + float additional_sample_count = SSIL_ADAPTIVE_TAP_FLEXIBLE_COUNT * importance; + + const float blend_range = 3.0; + const float blend_range_inv = 1.0 / blend_range; + + additional_sample_count += 0.5; + uint additional_samples = uint(additional_sample_count); + uint additional_samples_to = min(SSIL_MAX_TAPS, additional_samples + SSIL_ADAPTIVE_TAP_BASE_COUNT); + + for (uint i = SSIL_ADAPTIVE_TAP_BASE_COUNT; i < additional_samples_to; i++) { + additional_sample_count -= 1.0f; + float weight_mod = clamp(additional_sample_count * blend_range_inv, 0.0, 1.0); + SSILTap(p_quality_level, color_sum, obscurance_sum, weight_sum, int(i), rot_scale_matrix, pix_center_pos, pixel_normal, normalized_screen_pos, mip_offset, fallof_sq, weight_mod, norm_xy, norm_xy_length); + } + } +#endif + + // Early out for adaptive base + if (p_adaptive_base) { + vec3 color = color_sum / weight_sum; + + r_color = color; + r_edges = vec4(0.0); + r_obscurance = obscurance_sum / weight_sum; + r_weight = weight_sum; + return; + } + + // Calculate weighted average + vec3 color = color_sum / weight_sum; + color /= 1.0 - dot(color, vec3(0.299, 0.587, 0.114)); + + // Calculate fadeout (1 close, gradient, 0 far) + float fade_out = clamp(pix_center_pos.z * params.fade_out_mul + params.fade_out_add, 0.0, 1.0); + + // Reduce the SSIL if we're on the edge to remove artifacts on edges (we don't care for the lower quality one) + if (!p_adaptive_base && (p_quality_level >= SSIL_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) { + // when there's more than 2 opposite edges, start fading out the occlusion to reduce aliasing artifacts + float edge_fadeout_factor = clamp((1.0 - edgesLRTB.x - edgesLRTB.y) * 0.35, 0.0, 1.0) + clamp((1.0 - edgesLRTB.z - edgesLRTB.w) * 0.35, 0.0, 1.0); + + fade_out *= clamp(1.0 - edge_fadeout_factor, 0.0, 1.0); + } + + color = params.intensity * color; + + color *= fade_out; + + // outputs! + r_color = color; + r_edges = edgesLRTB; // These are used to prevent blurring across edges, 1 means no edge, 0 means edge, 0.5 means half way there, etc. + r_obscurance = clamp((obscurance_sum / weight_sum) * params.intensity, 0.0, 1.0); + r_weight = weight_sum; +} + +void main() { + vec3 out_color; + float out_obscurance; + float out_weight; + vec4 out_edges; + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing + return; + } + + vec2 uv = vec2(gl_GlobalInvocationID) + vec2(0.5); +#ifdef SSIL_BASE + generate_SSIL(out_color, out_edges, out_obscurance, out_weight, uv, params.quality, true); + + imageStore(dest_image, ssC, vec4(out_color, out_obscurance)); + imageStore(edges_weights_image, ssC, vec4(out_weight / (float(SSIL_ADAPTIVE_TAP_BASE_COUNT) * 4.0))); +#else + generate_SSIL(out_color, out_edges, out_obscurance, out_weight, uv, params.quality, false); // pass in quality levels + + imageStore(dest_image, ssC, vec4(out_color, out_obscurance)); + imageStore(edges_weights_image, ssC, vec4(pack_edges(out_edges))); +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/effects/ssil_blur.glsl b/servers/rendering/renderer_rd/shaders/effects/ssil_blur.glsl new file mode 100644 index 0000000000..47c56571f6 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ssil_blur.glsl @@ -0,0 +1,144 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +// 2021-05-27: clayjohn: convert SSAO to SSIL +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[compute] + +#version 450 + +#VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(set = 0, binding = 0) uniform sampler2D source_ssil; + +layout(rgba16, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; + +layout(r8, set = 2, binding = 0) uniform restrict readonly image2D source_edges; + +layout(push_constant, std430) uniform Params { + float edge_sharpness; + float pad; + vec2 half_screen_pixel_size; +} +params; + +vec4 unpack_edges(float p_packed_val) { + uint packed_val = uint(p_packed_val * 255.5); + vec4 edgesLRTB; + edgesLRTB.x = float((packed_val >> 6) & 0x03) / 3.0; + edgesLRTB.y = float((packed_val >> 4) & 0x03) / 3.0; + edgesLRTB.z = float((packed_val >> 2) & 0x03) / 3.0; + edgesLRTB.w = float((packed_val >> 0) & 0x03) / 3.0; + + return clamp(edgesLRTB + params.edge_sharpness, 0.0, 1.0); +} + +void add_sample(vec4 p_ssil_value, float p_edge_value, inout vec4 r_sum, inout float r_sum_weight) { + float weight = p_edge_value; + + r_sum += (weight * p_ssil_value); + r_sum_weight += weight; +} + +#ifdef MODE_WIDE +vec4 sample_blurred_wide(ivec2 p_pos, vec2 p_coord) { + vec4 ssil_value = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(0, 0)); + vec4 ssil_valueL = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(-2, 0)); + vec4 ssil_valueT = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(0, -2)); + vec4 ssil_valueR = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(2, 0)); + vec4 ssil_valueB = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(0, 2)); + + vec4 edgesLRTB = unpack_edges(imageLoad(source_edges, p_pos).r); + edgesLRTB.x *= unpack_edges(imageLoad(source_edges, p_pos + ivec2(-2, 0)).r).y; + edgesLRTB.z *= unpack_edges(imageLoad(source_edges, p_pos + ivec2(0, -2)).r).w; + edgesLRTB.y *= unpack_edges(imageLoad(source_edges, p_pos + ivec2(2, 0)).r).x; + edgesLRTB.w *= unpack_edges(imageLoad(source_edges, p_pos + ivec2(0, 2)).r).z; + + float sum_weight = 0.8; + vec4 sum = ssil_value * sum_weight; + + add_sample(ssil_valueL, edgesLRTB.x, sum, sum_weight); + add_sample(ssil_valueR, edgesLRTB.y, sum, sum_weight); + add_sample(ssil_valueT, edgesLRTB.z, sum, sum_weight); + add_sample(ssil_valueB, edgesLRTB.w, sum, sum_weight); + + vec4 ssil_avg = sum / sum_weight; + + ssil_value = ssil_avg; + + return ssil_value; +} +#endif + +#ifdef MODE_SMART +vec4 sample_blurred(ivec2 p_pos, vec2 p_coord) { + vec4 vC = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(0, 0)); + vec4 vL = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(-1, 0)); + vec4 vT = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(0, -1)); + vec4 vR = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(1, 0)); + vec4 vB = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(0, 1)); + + float packed_edges = imageLoad(source_edges, p_pos).r; + vec4 edgesLRTB = unpack_edges(packed_edges); + + float sum_weight = 0.5; + vec4 sum = vC * sum_weight; + + add_sample(vL, edgesLRTB.x, sum, sum_weight); + add_sample(vR, edgesLRTB.y, sum, sum_weight); + add_sample(vT, edgesLRTB.z, sum, sum_weight); + add_sample(vB, edgesLRTB.w, sum, sum_weight); + + vec4 ssil_avg = sum / sum_weight; + + vec4 ssil_value = ssil_avg; + + return ssil_value; +} +#endif + +void main() { + // Pixel being shaded + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + +#ifdef MODE_NON_SMART + + vec2 half_pixel = params.half_screen_pixel_size * 0.5; + + vec2 uv = (vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size; + + vec4 centre = textureLod(source_ssil, uv, 0.0); + + vec4 value = textureLod(source_ssil, vec2(uv + vec2(-half_pixel.x * 3, -half_pixel.y)), 0.0) * 0.2; + value += textureLod(source_ssil, vec2(uv + vec2(+half_pixel.x, -half_pixel.y * 3)), 0.0) * 0.2; + value += textureLod(source_ssil, vec2(uv + vec2(-half_pixel.x, +half_pixel.y * 3)), 0.0) * 0.2; + value += textureLod(source_ssil, vec2(uv + vec2(+half_pixel.x * 3, +half_pixel.y)), 0.0) * 0.2; + + vec4 sampled = value + centre * 0.2; + +#else +#ifdef MODE_SMART + vec4 sampled = sample_blurred(ssC, (vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size); +#else // MODE_WIDE + vec4 sampled = sample_blurred_wide(ssC, (vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size); +#endif +#endif // MODE_NON_SMART + imageStore(dest_image, ssC, sampled); +} diff --git a/servers/rendering/renderer_rd/shaders/effects/ssil_importance_map.glsl b/servers/rendering/renderer_rd/shaders/effects/ssil_importance_map.glsl new file mode 100644 index 0000000000..6b6b02739d --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ssil_importance_map.glsl @@ -0,0 +1,125 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +// 2021-05-27: clayjohn: convert SSAO to SSIL +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[compute] + +#version 450 + +#VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#ifdef GENERATE_MAP +layout(set = 0, binding = 0) uniform sampler2DArray source_texture; +#else +layout(set = 0, binding = 0) uniform sampler2D source_importance; +#endif +layout(r8, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; + +#ifdef PROCESS_MAPB +layout(set = 2, binding = 0, std430) buffer Counter { + uint sum; +} +counter; +#endif + +layout(push_constant, std430) uniform Params { + vec2 half_screen_pixel_size; + float intensity; + float pad; +} +params; + +void main() { + // Pixel being shaded + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + +#ifdef GENERATE_MAP + // importance map stuff + uvec2 base_position = ssC * 2; + + float avg = 0.0; + float minV = 1.0; + float maxV = 0.0; + for (int i = 0; i < 4; i++) { + vec3 value_a = texelFetch(source_texture, ivec3(base_position, i), 0).rgb * params.intensity; + vec3 value_b = texelFetch(source_texture, ivec3(base_position, i) + ivec3(0, 1, 0), 0).rgb * params.intensity; + vec3 value_c = texelFetch(source_texture, ivec3(base_position, i) + ivec3(1, 0, 0), 0).rgb * params.intensity; + vec3 value_d = texelFetch(source_texture, ivec3(base_position, i) + ivec3(1, 1, 0), 0).rgb * params.intensity; + + // Calculate luminance (black and white value) + float a = dot(value_a, vec3(0.2125, 0.7154, 0.0721)); + float b = dot(value_b, vec3(0.2125, 0.7154, 0.0721)); + float c = dot(value_c, vec3(0.2125, 0.7154, 0.0721)); + float d = dot(value_d, vec3(0.2125, 0.7154, 0.0721)); + + maxV = max(maxV, max(max(a, b), max(c, d))); + minV = min(minV, min(min(a, b), min(c, d))); + } + + float min_max_diff = maxV - minV; + + imageStore(dest_image, ssC, vec4(pow(clamp(min_max_diff * 2.0, 0.0, 1.0), 0.6))); +#endif + +#ifdef PROCESS_MAPA + vec2 uv = (vec2(ssC) + 0.5) * params.half_screen_pixel_size * 2.0; + + float centre = textureLod(source_importance, uv, 0.0).x; + + vec2 half_pixel = params.half_screen_pixel_size; + + vec4 vals; + vals.x = textureLod(source_importance, uv + vec2(-half_pixel.x * 3, -half_pixel.y), 0.0).x; + vals.y = textureLod(source_importance, uv + vec2(+half_pixel.x, -half_pixel.y * 3), 0.0).x; + vals.z = textureLod(source_importance, uv + vec2(+half_pixel.x * 3, +half_pixel.y), 0.0).x; + vals.w = textureLod(source_importance, uv + vec2(-half_pixel.x, +half_pixel.y * 3), 0.0).x; + + float avg = dot(vals, vec4(0.25, 0.25, 0.25, 0.25)); + + imageStore(dest_image, ssC, vec4(avg)); +#endif + +#ifdef PROCESS_MAPB + vec2 uv = (vec2(ssC) + 0.5f) * params.half_screen_pixel_size * 2.0; + + float centre = textureLod(source_importance, uv, 0.0).x; + + vec2 half_pixel = params.half_screen_pixel_size; + + vec4 vals; + vals.x = textureLod(source_importance, uv + vec2(-half_pixel.x, -half_pixel.y * 3), 0.0).x; + vals.y = textureLod(source_importance, uv + vec2(+half_pixel.x * 3, -half_pixel.y), 0.0).x; + vals.z = textureLod(source_importance, uv + vec2(+half_pixel.x, +half_pixel.y * 3), 0.0).x; + vals.w = textureLod(source_importance, uv + vec2(-half_pixel.x * 3, +half_pixel.y), 0.0).x; + + float avg = dot(vals, vec4(0.25, 0.25, 0.25, 0.25)); + + imageStore(dest_image, ssC, vec4(avg)); + + // sum the average; to avoid overflowing we assume max AO resolution is not bigger than 16384x16384; so quarter res (used here) will be 4096x4096, which leaves us with 8 bits per pixel + uint sum = uint(clamp(avg, 0.0, 1.0) * 255.0 + 0.5); + + // save every 9th to avoid InterlockedAdd congestion - since we're blurring, this is good enough; compensated by multiplying load_counter_avg_div by 9 + if (((ssC.x % 3) + (ssC.y % 3)) == 0) { + atomicAdd(counter.sum, sum); + } +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/effects/ssil_interleave.glsl b/servers/rendering/renderer_rd/shaders/effects/ssil_interleave.glsl new file mode 100644 index 0000000000..9e86ac0cf0 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ssil_interleave.glsl @@ -0,0 +1,122 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +// 2020-12-05: clayjohn: convert to Vulkan and Godot +// 2021-05-27: clayjohn: convert SSAO to SSIL +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[compute] + +#version 450 + +#VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(rgba16, set = 0, binding = 0) uniform restrict writeonly image2D dest_image; +layout(set = 1, binding = 0) uniform sampler2DArray source_texture; +layout(r8, set = 2, binding = 0) uniform restrict readonly image2DArray source_edges; + +layout(push_constant, std430) uniform Params { + float inv_sharpness; + uint size_modifier; + vec2 pixel_size; +} +params; + +vec4 unpack_edges(float p_packed_val) { + uint packed_val = uint(p_packed_val * 255.5); + vec4 edgesLRTB; + edgesLRTB.x = float((packed_val >> 6) & 0x03) / 3.0; + edgesLRTB.y = float((packed_val >> 4) & 0x03) / 3.0; + edgesLRTB.z = float((packed_val >> 2) & 0x03) / 3.0; + edgesLRTB.w = float((packed_val >> 0) & 0x03) / 3.0; + + return clamp(edgesLRTB + params.inv_sharpness, 0.0, 1.0); +} + +void main() { + ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(ssC, ivec2(1.0 / params.pixel_size)))) { //too large, do nothing + return; + } + +#ifdef MODE_SMART + uvec2 pix_pos = uvec2(gl_GlobalInvocationID.xy); + vec2 uv = (gl_GlobalInvocationID.xy + vec2(0.5)) * params.pixel_size; + + // calculate index in the four deinterleaved source array texture + int mx = int(pix_pos.x % 2); + int my = int(pix_pos.y % 2); + int index_center = mx + my * 2; // center index + int index_horizontal = (1 - mx) + my * 2; // neighbouring, horizontal + int index_vertical = mx + (1 - my) * 2; // neighbouring, vertical + int index_diagonal = (1 - mx) + (1 - my) * 2; // diagonal + + vec4 color = texelFetch(source_texture, ivec3(pix_pos / uvec2(params.size_modifier), index_center), 0); + + vec4 edgesLRTB = unpack_edges(imageLoad(source_edges, ivec3(pix_pos / uvec2(params.size_modifier), index_center)).r); + + // convert index shifts to sampling offsets + float fmx = float(mx); + float fmy = float(my); + + // in case of an edge, push sampling offsets away from the edge (towards pixel center) + float fmxe = (edgesLRTB.y - edgesLRTB.x); + float fmye = (edgesLRTB.w - edgesLRTB.z); + + // calculate final sampling offsets and sample using bilinear filter + vec2 uv_horizontal = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(fmx + fmxe - 0.5, 0.5 - fmy)) * params.pixel_size; + vec4 color_horizontal = textureLod(source_texture, vec3(uv_horizontal, index_horizontal), 0.0); + vec2 uv_vertical = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(0.5 - fmx, fmy - 0.5 + fmye)) * params.pixel_size; + vec4 color_vertical = textureLod(source_texture, vec3(uv_vertical, index_vertical), 0.0); + vec2 uv_diagonal = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(fmx - 0.5 + fmxe, fmy - 0.5 + fmye)) * params.pixel_size; + vec4 color_diagonal = textureLod(source_texture, vec3(uv_diagonal, index_diagonal), 0.0); + + // reduce weight for samples near edge - if the edge is on both sides, weight goes to 0 + vec4 blendWeights; + blendWeights.x = 1.0; + blendWeights.y = (edgesLRTB.x + edgesLRTB.y) * 0.5; + blendWeights.z = (edgesLRTB.z + edgesLRTB.w) * 0.5; + blendWeights.w = (blendWeights.y + blendWeights.z) * 0.5; + + // calculate weighted average + float blendWeightsSum = dot(blendWeights, vec4(1.0, 1.0, 1.0, 1.0)); + color += color_horizontal * blendWeights.y; + color += color_vertical * blendWeights.z; + color += color_diagonal * blendWeights.w; + color /= blendWeightsSum; + + imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), color); +#else // !MODE_SMART + + vec2 uv = (gl_GlobalInvocationID.xy + vec2(0.5)) * params.pixel_size; +#ifdef MODE_HALF + vec4 a = textureLod(source_texture, vec3(uv, 0), 0.0); + vec4 d = textureLod(source_texture, vec3(uv, 3), 0.0); + vec4 avg = (a + d) * 0.5; + +#else + vec4 a = textureLod(source_texture, vec3(uv, 0), 0.0); + vec4 b = textureLod(source_texture, vec3(uv, 1), 0.0); + vec4 c = textureLod(source_texture, vec3(uv, 2), 0.0); + vec4 d = textureLod(source_texture, vec3(uv, 3), 0.0); + vec4 avg = (a + b + c + d) * 0.25; + +#endif + imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), avg); +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/screen_space_reflection.glsl b/servers/rendering/renderer_rd/shaders/screen_space_reflection.glsl deleted file mode 100644 index a416891ff2..0000000000 --- a/servers/rendering/renderer_rd/shaders/screen_space_reflection.glsl +++ /dev/null @@ -1,244 +0,0 @@ -#[compute] - -#version 450 - -#VERSION_DEFINES - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -layout(rgba16f, set = 0, binding = 0) uniform restrict readonly image2D source_diffuse; -layout(r32f, set = 0, binding = 1) uniform restrict readonly image2D source_depth; -layout(rgba16f, set = 1, binding = 0) uniform restrict writeonly image2D ssr_image; -#ifdef MODE_ROUGH -layout(r8, set = 1, binding = 1) uniform restrict writeonly image2D blur_radius_image; -#endif -layout(rgba8, set = 2, binding = 0) uniform restrict readonly image2D source_normal_roughness; -layout(set = 3, binding = 0) uniform sampler2D source_metallic; - -layout(push_constant, std430) uniform Params { - vec4 proj_info; - - ivec2 screen_size; - float camera_z_near; - float camera_z_far; - - int num_steps; - float depth_tolerance; - float distance_fade; - float curve_fade_in; - - bool orthogonal; - float filter_mipmap_levels; - bool use_half_res; - uint metallic_mask; - - mat4 projection; -} -params; - -vec2 view_to_screen(vec3 view_pos, out float w) { - vec4 projected = params.projection * vec4(view_pos, 1.0); - projected.xyz /= projected.w; - projected.xy = projected.xy * 0.5 + 0.5; - w = projected.w; - return projected.xy; -} - -#define M_PI 3.14159265359 - -vec3 reconstructCSPosition(vec2 S, float z) { - if (params.orthogonal) { - return vec3((S.xy * params.proj_info.xy + params.proj_info.zw), z); - } else { - return vec3((S.xy * params.proj_info.xy + params.proj_info.zw) * z, z); - } -} - -void main() { - // Pixel being shaded - ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); - - if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing - return; - } - - vec2 pixel_size = 1.0 / vec2(params.screen_size); - vec2 uv = vec2(ssC) * pixel_size; - - uv += pixel_size * 0.5; - - float base_depth = imageLoad(source_depth, ssC).r; - - // World space point being shaded - vec3 vertex = reconstructCSPosition(uv * vec2(params.screen_size), base_depth); - - vec4 normal_roughness = imageLoad(source_normal_roughness, ssC); - vec3 normal = normal_roughness.xyz * 2.0 - 1.0; - normal = normalize(normal); - normal.y = -normal.y; //because this code reads flipped - - vec3 view_dir = normalize(vertex); - vec3 ray_dir = normalize(reflect(view_dir, normal)); - - if (dot(ray_dir, normal) < 0.001) { - imageStore(ssr_image, ssC, vec4(0.0)); - return; - } - //ray_dir = normalize(view_dir - normal * dot(normal,view_dir) * 2.0); - //ray_dir = normalize(vec3(1.0, 1.0, -1.0)); - - //////////////// - - // make ray length and clip it against the near plane (don't want to trace beyond visible) - float ray_len = (vertex.z + ray_dir.z * params.camera_z_far) > -params.camera_z_near ? (-params.camera_z_near - vertex.z) / ray_dir.z : params.camera_z_far; - vec3 ray_end = vertex + ray_dir * ray_len; - - float w_begin; - vec2 vp_line_begin = view_to_screen(vertex, w_begin); - float w_end; - vec2 vp_line_end = view_to_screen(ray_end, w_end); - vec2 vp_line_dir = vp_line_end - vp_line_begin; - - // we need to interpolate w along the ray, to generate perspective correct reflections - w_begin = 1.0 / w_begin; - w_end = 1.0 / w_end; - - float z_begin = vertex.z * w_begin; - float z_end = ray_end.z * w_end; - - vec2 line_begin = vp_line_begin / pixel_size; - vec2 line_dir = vp_line_dir / pixel_size; - float z_dir = z_end - z_begin; - float w_dir = w_end - w_begin; - - // clip the line to the viewport edges - - float scale_max_x = min(1.0, 0.99 * (1.0 - vp_line_begin.x) / max(1e-5, vp_line_dir.x)); - float scale_max_y = min(1.0, 0.99 * (1.0 - vp_line_begin.y) / max(1e-5, vp_line_dir.y)); - float scale_min_x = min(1.0, 0.99 * vp_line_begin.x / max(1e-5, -vp_line_dir.x)); - float scale_min_y = min(1.0, 0.99 * vp_line_begin.y / max(1e-5, -vp_line_dir.y)); - float line_clip = min(scale_max_x, scale_max_y) * min(scale_min_x, scale_min_y); - line_dir *= line_clip; - z_dir *= line_clip; - w_dir *= line_clip; - - // clip z and w advance to line advance - vec2 line_advance = normalize(line_dir); // down to pixel - float step_size = length(line_advance) / length(line_dir); - float z_advance = z_dir * step_size; // adapt z advance to line advance - float w_advance = w_dir * step_size; // adapt w advance to line advance - - // make line advance faster if direction is closer to pixel edges (this avoids sampling the same pixel twice) - float advance_angle_adj = 1.0 / max(abs(line_advance.x), abs(line_advance.y)); - line_advance *= advance_angle_adj; // adapt z advance to line advance - z_advance *= advance_angle_adj; - w_advance *= advance_angle_adj; - - vec2 pos = line_begin; - float z = z_begin; - float w = w_begin; - float z_from = z / w; - float z_to = z_from; - float depth; - vec2 prev_pos = pos; - - bool found = false; - - float steps_taken = 0.0; - - for (int i = 0; i < params.num_steps; i++) { - pos += line_advance; - z += z_advance; - w += w_advance; - - // convert to linear depth - - depth = imageLoad(source_depth, ivec2(pos - 0.5)).r; - - z_from = z_to; - z_to = z / w; - - if (depth > z_to) { - // if depth was surpassed - if (depth <= max(z_to, z_from) + params.depth_tolerance && -depth < params.camera_z_far) { - // check the depth tolerance and far clip - // check that normal is valid - found = true; - } - break; - } - - steps_taken += 1.0; - prev_pos = pos; - } - - if (found) { - float margin_blend = 1.0; - - vec2 margin = vec2((params.screen_size.x + params.screen_size.y) * 0.5 * 0.05); // make a uniform margin - if (any(bvec4(lessThan(pos, -margin), greaterThan(pos, params.screen_size + margin)))) { - // clip outside screen + margin - imageStore(ssr_image, ssC, vec4(0.0)); - return; - } - - { - //blend fading out towards external margin - vec2 margin_grad = mix(pos - params.screen_size, -pos, lessThan(pos, vec2(0.0))); - margin_blend = 1.0 - smoothstep(0.0, margin.x, max(margin_grad.x, margin_grad.y)); - //margin_blend = 1.0; - } - - vec2 final_pos; - float grad = (steps_taken + 1.0) / float(params.num_steps); - float initial_fade = params.curve_fade_in == 0.0 ? 1.0 : pow(clamp(grad, 0.0, 1.0), params.curve_fade_in); - float fade = pow(clamp(1.0 - grad, 0.0, 1.0), params.distance_fade) * initial_fade; - final_pos = pos; - - vec4 final_color; - -#ifdef MODE_ROUGH - - // if roughness is enabled, do screen space cone tracing - float blur_radius = 0.0; - float roughness = normal_roughness.w; - - if (roughness > 0.001) { - float cone_angle = min(roughness, 0.999) * M_PI * 0.5; - float cone_len = length(final_pos - line_begin); - float op_len = 2.0 * tan(cone_angle) * cone_len; // opposite side of iso triangle - { - // fit to sphere inside cone (sphere ends at end of cone), something like this: - // ___ - // \O/ - // V - // - // as it avoids bleeding from beyond the reflection as much as possible. As a plus - // it also makes the rough reflection more elongated. - float a = op_len; - float h = cone_len; - float a2 = a * a; - float fh2 = 4.0f * h * h; - blur_radius = (a * (sqrt(a2 + fh2) - a)) / (4.0f * h); - } - } - final_color = imageLoad(source_diffuse, ivec2((final_pos - 0.5) * pixel_size)); - - imageStore(blur_radius_image, ssC, vec4(blur_radius / 255.0)); //stored in r8 - -#endif - - final_color = vec4(imageLoad(source_diffuse, ivec2(final_pos - 0.5)).rgb, fade * margin_blend); - //change blend by metallic - vec4 metallic_mask = unpackUnorm4x8(params.metallic_mask); - final_color.a *= dot(metallic_mask, texelFetch(source_metallic, ssC << 1, 0)); - - imageStore(ssr_image, ssC, final_color); - - } else { -#ifdef MODE_ROUGH - imageStore(blur_radius_image, ssC, vec4(0.0)); -#endif - imageStore(ssr_image, ssC, vec4(0.0)); - } -} diff --git a/servers/rendering/renderer_rd/shaders/screen_space_reflection_filter.glsl b/servers/rendering/renderer_rd/shaders/screen_space_reflection_filter.glsl deleted file mode 100644 index 20e1712496..0000000000 --- a/servers/rendering/renderer_rd/shaders/screen_space_reflection_filter.glsl +++ /dev/null @@ -1,154 +0,0 @@ -#[compute] - -#version 450 - -#VERSION_DEFINES - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -layout(rgba16f, set = 0, binding = 0) uniform restrict readonly image2D source_ssr; -layout(r8, set = 0, binding = 1) uniform restrict readonly image2D source_radius; -layout(rgba8, set = 1, binding = 0) uniform restrict readonly image2D source_normal; - -layout(rgba16f, set = 2, binding = 0) uniform restrict writeonly image2D dest_ssr; -#ifndef VERTICAL_PASS -layout(r8, set = 2, binding = 1) uniform restrict writeonly image2D dest_radius; -#endif -layout(r32f, set = 3, binding = 0) uniform restrict readonly image2D source_depth; - -layout(push_constant, std430) uniform Params { - vec4 proj_info; - - bool orthogonal; - float edge_tolerance; - int increment; - uint pad; - - ivec2 screen_size; - bool vertical; - uint steps; -} -params; - -#define GAUSS_TABLE_SIZE 15 - -const float gauss_table[GAUSS_TABLE_SIZE + 1] = float[]( - 0.1847392078702266, - 0.16595854345772326, - 0.12031364177766891, - 0.07038755277896766, - 0.03322925565155569, - 0.012657819729901945, - 0.0038903040680094217, - 0.0009646503390864025, - 0.00019297087402915717, - 0.000031139936308099136, - 0.000004053309048174758, - 4.255228059965837e-7, - 3.602517634249573e-8, - 2.4592560765896795e-9, - 1.3534945386863618e-10, - 0.0 //one more for interpolation -); - -float gauss_weight(float p_val) { - float idxf; - float c = modf(max(0.0, p_val * float(GAUSS_TABLE_SIZE)), idxf); - int idx = int(idxf); - if (idx >= GAUSS_TABLE_SIZE + 1) { - return 0.0; - } - - return mix(gauss_table[idx], gauss_table[idx + 1], c); -} - -#define M_PI 3.14159265359 - -vec3 reconstructCSPosition(vec2 S, float z) { - if (params.orthogonal) { - return vec3((S.xy * params.proj_info.xy + params.proj_info.zw), z); - } else { - return vec3((S.xy * params.proj_info.xy + params.proj_info.zw) * z, z); - } -} - -void do_filter(inout vec4 accum, inout float accum_radius, inout float divisor, ivec2 texcoord, ivec2 increment, vec3 p_pos, vec3 normal, float p_limit_radius) { - for (int i = 1; i < params.steps; i++) { - float d = float(i * params.increment); - ivec2 tc = texcoord + increment * i; - float depth = imageLoad(source_depth, tc).r; - vec3 view_pos = reconstructCSPosition(vec2(tc) + 0.5, depth); - vec3 view_normal = normalize(imageLoad(source_normal, tc).rgb * 2.0 - 1.0); - view_normal.y = -view_normal.y; - - float r = imageLoad(source_radius, tc).r; - float radius = round(r * 255.0); - - float angle_n = 1.0 - abs(dot(normal, view_normal)); - if (angle_n > params.edge_tolerance) { - break; - } - - float angle = abs(dot(normal, normalize(view_pos - p_pos))); - - if (angle > params.edge_tolerance) { - break; - } - - if (d < radius) { - float w = gauss_weight(d / radius); - accum += imageLoad(source_ssr, tc) * w; -#ifndef VERTICAL_PASS - accum_radius += r * w; -#endif - divisor += w; - } - } -} - -void main() { - // Pixel being shaded - ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); - - if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing - return; - } - - float base_contrib = gauss_table[0]; - - vec4 accum = imageLoad(source_ssr, ssC); - - float accum_radius = imageLoad(source_radius, ssC).r; - float radius = accum_radius * 255.0; - - float divisor = gauss_table[0]; - accum *= divisor; - accum_radius *= divisor; -#ifdef VERTICAL_PASS - ivec2 direction = ivec2(0, params.increment); -#else - ivec2 direction = ivec2(params.increment, 0); -#endif - float depth = imageLoad(source_depth, ssC).r; - vec3 pos = reconstructCSPosition(vec2(ssC) + 0.5, depth); - vec3 normal = imageLoad(source_normal, ssC).xyz * 2.0 - 1.0; - normal = normalize(normal); - normal.y = -normal.y; - - do_filter(accum, accum_radius, divisor, ssC, direction, pos, normal, radius); - do_filter(accum, accum_radius, divisor, ssC, -direction, pos, normal, radius); - - if (divisor > 0.0) { - accum /= divisor; - accum_radius /= divisor; - } else { - accum = vec4(0.0); - accum_radius = 0.0; - } - - imageStore(dest_ssr, ssC, accum); - -#ifndef VERTICAL_PASS - imageStore(dest_radius, ssC, vec4(accum_radius)); -#endif -} diff --git a/servers/rendering/renderer_rd/shaders/screen_space_reflection_scale.glsl b/servers/rendering/renderer_rd/shaders/screen_space_reflection_scale.glsl deleted file mode 100644 index 3f537e273a..0000000000 --- a/servers/rendering/renderer_rd/shaders/screen_space_reflection_scale.glsl +++ /dev/null @@ -1,90 +0,0 @@ -#[compute] - -#version 450 - -#VERSION_DEFINES - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -layout(set = 0, binding = 0) uniform sampler2D source_ssr; -layout(set = 1, binding = 0) uniform sampler2D source_depth; -layout(set = 1, binding = 1) uniform sampler2D source_normal; -layout(rgba16f, set = 2, binding = 0) uniform restrict writeonly image2D dest_ssr; -layout(r32f, set = 3, binding = 0) uniform restrict writeonly image2D dest_depth; -layout(rgba8, set = 3, binding = 1) uniform restrict writeonly image2D dest_normal; - -layout(push_constant, std430) uniform Params { - ivec2 screen_size; - float camera_z_near; - float camera_z_far; - - bool orthogonal; - bool filtered; - uint pad[2]; -} -params; - -void main() { - // Pixel being shaded - ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); - - if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing - return; - } - //do not filter, SSR will generate arctifacts if this is done - - float divisor = 0.0; - vec4 color; - float depth; - vec4 normal; - - if (params.filtered) { - color = vec4(0.0); - depth = 0.0; - normal = vec4(0.0); - - for (int i = 0; i < 4; i++) { - ivec2 ofs = ssC << 1; - if (bool(i & 1)) { - ofs.x += 1; - } - if (bool(i & 2)) { - ofs.y += 1; - } - color += texelFetch(source_ssr, ofs, 0); - float d = texelFetch(source_depth, ofs, 0).r; - vec4 nr = texelFetch(source_normal, ofs, 0); - normal.xyz += nr.xyz * 2.0 - 1.0; - normal.w += nr.w; - - d = d * 2.0 - 1.0; - if (params.orthogonal) { - d = ((d + (params.camera_z_far + params.camera_z_near) / (params.camera_z_far - params.camera_z_near)) * (params.camera_z_far - params.camera_z_near)) / 2.0; - } else { - d = 2.0 * params.camera_z_near * params.camera_z_far / (params.camera_z_far + params.camera_z_near - d * (params.camera_z_far - params.camera_z_near)); - } - depth += -d; - } - - color /= 4.0; - depth /= 4.0; - normal.xyz = normalize(normal.xyz / 4.0) * 0.5 + 0.5; - normal.w /= 4.0; - } else { - color = texelFetch(source_ssr, ssC << 1, 0); - depth = texelFetch(source_depth, ssC << 1, 0).r; - normal = texelFetch(source_normal, ssC << 1, 0); - - depth = depth * 2.0 - 1.0; - if (params.orthogonal) { - depth = ((depth + (params.camera_z_far + params.camera_z_near) / (params.camera_z_far - params.camera_z_near)) * (params.camera_z_far - params.camera_z_near)) / 2.0; - } else { - depth = 2.0 * params.camera_z_near * params.camera_z_far / (params.camera_z_far + params.camera_z_near - depth * (params.camera_z_far - params.camera_z_near)); - } - depth = -depth; - } - - imageStore(dest_ssr, ssC, color); - imageStore(dest_depth, ssC, vec4(depth)); - imageStore(dest_normal, ssC, normal); -} diff --git a/servers/rendering/renderer_rd/shaders/specular_merge.glsl b/servers/rendering/renderer_rd/shaders/specular_merge.glsl deleted file mode 100644 index 3579c35cce..0000000000 --- a/servers/rendering/renderer_rd/shaders/specular_merge.glsl +++ /dev/null @@ -1,53 +0,0 @@ -#[vertex] - -#version 450 - -#VERSION_DEFINES - -layout(location = 0) out vec2 uv_interp; - -void main() { - vec2 base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0)); - uv_interp = base_arr[gl_VertexIndex]; - - gl_Position = vec4(uv_interp * 2.0 - 1.0, 0.0, 1.0); -} - -#[fragment] - -#version 450 - -#VERSION_DEFINES - -layout(location = 0) in vec2 uv_interp; - -layout(set = 0, binding = 0) uniform sampler2D specular; - -#ifdef MODE_SSR - -layout(set = 1, binding = 0) uniform sampler2D ssr; - -#endif - -#ifdef MODE_MERGE - -layout(set = 2, binding = 0) uniform sampler2D diffuse; - -#endif - -layout(location = 0) out vec4 frag_color; - -void main() { - frag_color.rgb = texture(specular, uv_interp).rgb; - frag_color.a = 0.0; -#ifdef MODE_SSR - - vec4 ssr_color = texture(ssr, uv_interp); - frag_color.rgb = mix(frag_color.rgb, ssr_color.rgb, ssr_color.a); -#endif - -#ifdef MODE_MERGE - frag_color += texture(diffuse, uv_interp); -#endif - //added using additive blend -} diff --git a/servers/rendering/renderer_rd/shaders/ss_effects_downsample.glsl b/servers/rendering/renderer_rd/shaders/ss_effects_downsample.glsl deleted file mode 100644 index 134aae5ce7..0000000000 --- a/servers/rendering/renderer_rd/shaders/ss_effects_downsample.glsl +++ /dev/null @@ -1,229 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2016, Intel Corporation -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -// documentation files (the "Software"), to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to -// permit persons to whom the Software is furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of -// the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// File changes (yyyy-mm-dd) -// 2016-09-07: filip.strugar@intel.com: first commit -// 2020-12-05: clayjohn: convert to Vulkan and Godot -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[compute] - -#version 450 - -#VERSION_DEFINES - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -layout(push_constant, std430) uniform Params { - vec2 pixel_size; - float z_far; - float z_near; - bool orthogonal; - float radius_sq; - uvec2 pad; -} -params; - -layout(set = 0, binding = 0) uniform sampler2D source_depth; - -layout(r16f, set = 1, binding = 0) uniform restrict writeonly image2DArray dest_image0; //rename -#ifdef GENERATE_MIPS -layout(r16f, set = 2, binding = 0) uniform restrict writeonly image2DArray dest_image1; -layout(r16f, set = 2, binding = 1) uniform restrict writeonly image2DArray dest_image2; -layout(r16f, set = 2, binding = 2) uniform restrict writeonly image2DArray dest_image3; -#ifdef GENERATE_FULL_MIPS -layout(r16f, set = 2, binding = 3) uniform restrict writeonly image2DArray dest_image4; -#endif -#endif - -vec4 screen_space_to_view_space_depth(vec4 p_depth) { - if (params.orthogonal) { - vec4 depth = p_depth * 2.0 - 1.0; - return ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; - } - - float depth_linearize_mul = params.z_near; - float depth_linearize_add = params.z_far; - - // Optimised version of "-cameraClipNear / (cameraClipFar - projDepth * (cameraClipFar - cameraClipNear)) * cameraClipFar" - - // Set your depth_linearize_mul and depth_linearize_add to: - // depth_linearize_mul = ( cameraClipFar * cameraClipNear) / ( cameraClipFar - cameraClipNear ); - // depth_linearize_add = cameraClipFar / ( cameraClipFar - cameraClipNear ); - - return depth_linearize_mul / (depth_linearize_add - p_depth); -} - -float screen_space_to_view_space_depth(float p_depth) { - if (params.orthogonal) { - float depth = p_depth * 2.0 - 1.0; - return ((depth + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / (2.0 * params.z_far); - } - - float depth_linearize_mul = params.z_near; - float depth_linearize_add = params.z_far; - - return depth_linearize_mul / (depth_linearize_add - p_depth); -} - -#ifdef GENERATE_MIPS - -shared float depth_buffer[4][8][8]; - -float mip_smart_average(vec4 p_depths) { - float closest = min(min(p_depths.x, p_depths.y), min(p_depths.z, p_depths.w)); - float fallof_sq = -1.0f / params.radius_sq; - vec4 dists = p_depths - closest.xxxx; - vec4 weights = clamp(dists * dists * fallof_sq + 1.0, 0.0, 1.0); - return dot(weights, p_depths) / dot(weights, vec4(1.0, 1.0, 1.0, 1.0)); -} - -void prepare_depths_and_mips(vec4 p_samples, uvec2 p_output_coord, uvec2 p_gtid) { - p_samples = screen_space_to_view_space_depth(p_samples); - - depth_buffer[0][p_gtid.x][p_gtid.y] = p_samples.w; - depth_buffer[1][p_gtid.x][p_gtid.y] = p_samples.z; - depth_buffer[2][p_gtid.x][p_gtid.y] = p_samples.x; - depth_buffer[3][p_gtid.x][p_gtid.y] = p_samples.y; - - imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 0), vec4(p_samples.w)); - imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 1), vec4(p_samples.z)); - imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 2), vec4(p_samples.x)); - imageStore(dest_image0, ivec3(p_output_coord.x, p_output_coord.y, 3), vec4(p_samples.y)); - - uint depth_array_index = 2 * (p_gtid.y % 2) + (p_gtid.x % 2); - uvec2 depth_array_offset = ivec2(p_gtid.x % 2, p_gtid.y % 2); - ivec2 buffer_coord = ivec2(p_gtid) - ivec2(depth_array_offset); - - p_output_coord /= 2; - groupMemoryBarrier(); - barrier(); - - // if (still_alive) <-- all threads alive here - { - float sample_00 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 0]; - float sample_01 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 1]; - float sample_10 = depth_buffer[depth_array_index][buffer_coord.x + 1][buffer_coord.y + 0]; - float sample_11 = depth_buffer[depth_array_index][buffer_coord.x + 1][buffer_coord.y + 1]; - - float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11)); - imageStore(dest_image1, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg)); - depth_buffer[depth_array_index][buffer_coord.x][buffer_coord.y] = avg; - } - - bool still_alive = p_gtid.x % 4 == depth_array_offset.x && p_gtid.y % 4 == depth_array_offset.y; - - p_output_coord /= 2; - groupMemoryBarrier(); - barrier(); - - if (still_alive) { - float sample_00 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 0]; - float sample_01 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 2]; - float sample_10 = depth_buffer[depth_array_index][buffer_coord.x + 2][buffer_coord.y + 0]; - float sample_11 = depth_buffer[depth_array_index][buffer_coord.x + 2][buffer_coord.y + 2]; - - float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11)); - imageStore(dest_image2, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg)); - depth_buffer[depth_array_index][buffer_coord.x][buffer_coord.y] = avg; - } - - still_alive = p_gtid.x % 8 == depth_array_offset.x && depth_array_offset.y % 8 == depth_array_offset.y; - - p_output_coord /= 2; - groupMemoryBarrier(); - barrier(); - - if (still_alive) { - float sample_00 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 0]; - float sample_01 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 4]; - float sample_10 = depth_buffer[depth_array_index][buffer_coord.x + 4][buffer_coord.y + 0]; - float sample_11 = depth_buffer[depth_array_index][buffer_coord.x + 4][buffer_coord.y + 4]; - - float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11)); - imageStore(dest_image3, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg)); -#ifndef GENERATE_FULL_MIPS - } -#else - depth_buffer[depth_array_index][buffer_coord.x][buffer_coord.y] = avg; - } - still_alive = p_gtid.x % 16 == depth_array_offset.x && depth_array_offset.y % 16 == depth_array_offset.y; - - p_output_coord /= 2; - groupMemoryBarrier(); - barrier(); - - if (still_alive) { - float sample_00 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 0]; - float sample_01 = depth_buffer[depth_array_index][buffer_coord.x + 0][buffer_coord.y + 8]; - float sample_10 = depth_buffer[depth_array_index][buffer_coord.x + 8][buffer_coord.y + 0]; - float sample_11 = depth_buffer[depth_array_index][buffer_coord.x + 8][buffer_coord.y + 8]; - - float avg = mip_smart_average(vec4(sample_00, sample_01, sample_10, sample_11)); - imageStore(dest_image4, ivec3(p_output_coord.x, p_output_coord.y, depth_array_index), vec4(avg)); - } -#endif -} -#else -#ifndef USE_HALF_BUFFERS -void prepare_depths(vec4 p_samples, uvec2 p_tid) { - p_samples = screen_space_to_view_space_depth(p_samples); - - imageStore(dest_image0, ivec3(p_tid, 0), vec4(p_samples.w)); - imageStore(dest_image0, ivec3(p_tid, 1), vec4(p_samples.z)); - imageStore(dest_image0, ivec3(p_tid, 2), vec4(p_samples.x)); - imageStore(dest_image0, ivec3(p_tid, 3), vec4(p_samples.y)); -} -#endif -#endif - -void main() { -#ifdef USE_HALF_BUFFERS -#ifdef USE_HALF_SIZE - float sample_00 = texelFetch(source_depth, ivec2(4 * gl_GlobalInvocationID.x + 0, 4 * gl_GlobalInvocationID.y + 0), 0).x; - float sample_11 = texelFetch(source_depth, ivec2(4 * gl_GlobalInvocationID.x + 2, 4 * gl_GlobalInvocationID.y + 2), 0).x; -#else - float sample_00 = texelFetch(source_depth, ivec2(2 * gl_GlobalInvocationID.x + 0, 2 * gl_GlobalInvocationID.y + 0), 0).x; - float sample_11 = texelFetch(source_depth, ivec2(2 * gl_GlobalInvocationID.x + 1, 2 * gl_GlobalInvocationID.y + 1), 0).x; -#endif - sample_00 = screen_space_to_view_space_depth(sample_00); - sample_11 = screen_space_to_view_space_depth(sample_11); - - imageStore(dest_image0, ivec3(gl_GlobalInvocationID.xy, 0), vec4(sample_00)); - imageStore(dest_image0, ivec3(gl_GlobalInvocationID.xy, 3), vec4(sample_11)); -#else //!USE_HALF_BUFFERS -#ifdef USE_HALF_SIZE - ivec2 depth_buffer_coord = 4 * ivec2(gl_GlobalInvocationID.xy); - ivec2 output_coord = ivec2(gl_GlobalInvocationID); - - vec2 uv = (vec2(depth_buffer_coord) + 0.5f) * params.pixel_size; - vec4 samples; - samples.x = textureLodOffset(source_depth, uv, 0, ivec2(0, 2)).x; - samples.y = textureLodOffset(source_depth, uv, 0, ivec2(2, 2)).x; - samples.z = textureLodOffset(source_depth, uv, 0, ivec2(2, 0)).x; - samples.w = textureLodOffset(source_depth, uv, 0, ivec2(0, 0)).x; -#else - ivec2 depth_buffer_coord = 2 * ivec2(gl_GlobalInvocationID.xy); - ivec2 output_coord = ivec2(gl_GlobalInvocationID); - - vec2 uv = (vec2(depth_buffer_coord) + 0.5f) * params.pixel_size; - vec4 samples = textureGather(source_depth, uv); -#endif -#ifdef GENERATE_MIPS - prepare_depths_and_mips(samples, output_coord, gl_LocalInvocationID.xy); -#else - prepare_depths(samples, gl_GlobalInvocationID.xy); -#endif -#endif -} diff --git a/servers/rendering/renderer_rd/shaders/ssao.glsl b/servers/rendering/renderer_rd/shaders/ssao.glsl deleted file mode 100644 index 2a87e273bc..0000000000 --- a/servers/rendering/renderer_rd/shaders/ssao.glsl +++ /dev/null @@ -1,483 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2016, Intel Corporation -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -// documentation files (the "Software"), to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to -// permit persons to whom the Software is furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of -// the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// File changes (yyyy-mm-dd) -// 2016-09-07: filip.strugar@intel.com: first commit -// 2020-12-05: clayjohn: convert to Vulkan and Godot -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[compute] - -#version 450 - -#VERSION_DEFINES - -#define INTELSSAO_MAIN_DISK_SAMPLE_COUNT (32) -const vec4 sample_pattern[INTELSSAO_MAIN_DISK_SAMPLE_COUNT] = { - vec4(0.78488064, 0.56661671, 1.500000, -0.126083), vec4(0.26022232, -0.29575172, 1.500000, -1.064030), vec4(0.10459357, 0.08372527, 1.110000, -2.730563), vec4(-0.68286800, 0.04963045, 1.090000, -0.498827), - vec4(-0.13570161, -0.64190155, 1.250000, -0.532765), vec4(-0.26193795, -0.08205118, 0.670000, -1.783245), vec4(-0.61177456, 0.66664219, 0.710000, -0.044234), vec4(0.43675563, 0.25119025, 0.610000, -1.167283), - vec4(0.07884444, 0.86618668, 0.640000, -0.459002), vec4(-0.12790935, -0.29869005, 0.600000, -1.729424), vec4(-0.04031125, 0.02413622, 0.600000, -4.792042), vec4(0.16201244, -0.52851415, 0.790000, -1.067055), - vec4(-0.70991218, 0.47301072, 0.640000, -0.335236), vec4(0.03277707, -0.22349690, 0.600000, -1.982384), vec4(0.68921727, 0.36800742, 0.630000, -0.266718), vec4(0.29251814, 0.37775412, 0.610000, -1.422520), - vec4(-0.12224089, 0.96582592, 0.600000, -0.426142), vec4(0.11071457, -0.16131058, 0.600000, -2.165947), vec4(0.46562141, -0.59747696, 0.600000, -0.189760), vec4(-0.51548797, 0.11804193, 0.600000, -1.246800), - vec4(0.89141309, -0.42090443, 0.600000, 0.028192), vec4(-0.32402530, -0.01591529, 0.600000, -1.543018), vec4(0.60771245, 0.41635221, 0.600000, -0.605411), vec4(0.02379565, -0.08239821, 0.600000, -3.809046), - vec4(0.48951152, -0.23657045, 0.600000, -1.189011), vec4(-0.17611565, -0.81696892, 0.600000, -0.513724), vec4(-0.33930185, -0.20732205, 0.600000, -1.698047), vec4(-0.91974425, 0.05403209, 0.600000, 0.062246), - vec4(-0.15064627, -0.14949332, 0.600000, -1.896062), vec4(0.53180975, -0.35210401, 0.600000, -0.758838), vec4(0.41487166, 0.81442589, 0.600000, -0.505648), vec4(-0.24106961, -0.32721516, 0.600000, -1.665244) -}; - -// these values can be changed (up to SSAO_MAX_TAPS) with no changes required elsewhere; values for 4th and 5th preset are ignored but array needed to avoid compilation errors -// the actual number of texture samples is two times this value (each "tap" has two symmetrical depth texture samples) -const int num_taps[5] = { 3, 5, 12, 0, 0 }; - -#define SSAO_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET (99) // to disable simply set to 99 or similar -#define SSAO_TILT_SAMPLES_AMOUNT (0.4) -// -#define SSAO_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET (1) // to disable simply set to 99 or similar -#define SSAO_HALOING_REDUCTION_AMOUNT (0.6) // values from 0.0 - 1.0, 1.0 means max weighting (will cause artifacts, 0.8 is more reasonable) -// -#define SSAO_NORMAL_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (2) // to disable simply set to 99 or similar -#define SSAO_NORMAL_BASED_EDGES_DOT_THRESHOLD (0.5) // use 0-0.1 for super-sharp normal-based edges -// -#define SSAO_DETAIL_AO_ENABLE_AT_QUALITY_PRESET (1) // whether to use detail; to disable simply set to 99 or similar -// -#define SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET (2) // !!warning!! the MIP generation on the C++ side will be enabled on quality preset 2 regardless of this value, so if changing here, change the C++ side too -#define SSAO_DEPTH_MIPS_GLOBAL_OFFSET (-4.3) // best noise/quality/performance tradeoff, found empirically -// -// !!warning!! the edge handling is hard-coded to 'disabled' on quality level 0, and enabled above, on the C++ side; while toggling it here will work for -// testing purposes, it will not yield performance gains (or correct results) -#define SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (1) -// -#define SSAO_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET (1) - -#define SSAO_MAX_TAPS 32 -#define SSAO_ADAPTIVE_TAP_BASE_COUNT 5 -#define SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT (SSAO_MAX_TAPS - SSAO_ADAPTIVE_TAP_BASE_COUNT) -#define SSAO_DEPTH_MIP_LEVELS 4 - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -layout(set = 0, binding = 0) uniform sampler2DArray source_depth_mipmaps; -layout(rgba8, set = 0, binding = 1) uniform restrict readonly image2D source_normal; -layout(set = 0, binding = 2) uniform Constants { //get into a lower set - vec4 rotation_matrices[20]; -} -constants; - -#ifdef ADAPTIVE -layout(rg8, set = 1, binding = 0) uniform restrict readonly image2DArray source_ssao; -layout(set = 1, binding = 1) uniform sampler2D source_importance; -layout(set = 1, binding = 2, std430) buffer Counter { - uint sum; -} -counter; -#endif - -layout(rg8, set = 2, binding = 0) uniform restrict writeonly image2D dest_image; - -// This push_constant is full - 128 bytes - if you need to add more data, consider adding to the uniform buffer instead -layout(push_constant, std430) uniform Params { - ivec2 screen_size; - int pass; - int quality; - - vec2 half_screen_pixel_size; - int size_multiplier; - float detail_intensity; - - vec2 NDC_to_view_mul; - vec2 NDC_to_view_add; - - vec2 pad2; - vec2 half_screen_pixel_size_x025; - - float radius; - float intensity; - float shadow_power; - float shadow_clamp; - - float fade_out_mul; - float fade_out_add; - float horizon_angle_threshold; - float inv_radius_near_limit; - - bool is_orthogonal; - float neg_inv_radius; - float load_counter_avg_div; - float adaptive_sample_limit; - - ivec2 pass_coord_offset; - vec2 pass_uv_offset; -} -params; - -// packing/unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions! -float pack_edges(vec4 p_edgesLRTB) { - p_edgesLRTB = round(clamp(p_edgesLRTB, 0.0, 1.0) * 3.05); - return dot(p_edgesLRTB, vec4(64.0 / 255.0, 16.0 / 255.0, 4.0 / 255.0, 1.0 / 255.0)); -} - -vec3 NDC_to_view_space(vec2 p_pos, float p_viewspace_depth) { - if (params.is_orthogonal) { - return vec3((params.NDC_to_view_mul * p_pos.xy + params.NDC_to_view_add), p_viewspace_depth); - } else { - return vec3((params.NDC_to_view_mul * p_pos.xy + params.NDC_to_view_add) * p_viewspace_depth, p_viewspace_depth); - } -} - -// calculate effect radius and fit our screen sampling pattern inside it -void calculate_radius_parameters(const float p_pix_center_length, const vec2 p_pixel_size_at_center, out float r_lookup_radius, out float r_radius, out float r_fallof_sq) { - r_radius = params.radius; - - // when too close, on-screen sampling disk will grow beyond screen size; limit this to avoid closeup temporal artifacts - const float too_close_limit = clamp(p_pix_center_length * params.inv_radius_near_limit, 0.0, 1.0) * 0.8 + 0.2; - - r_radius *= too_close_limit; - - // 0.85 is to reduce the radius to allow for more samples on a slope to still stay within influence - r_lookup_radius = (0.85 * r_radius) / p_pixel_size_at_center.x; - - // used to calculate falloff (both for AO samples and per-sample weights) - r_fallof_sq = -1.0 / (r_radius * r_radius); -} - -vec4 calculate_edges(const float p_center_z, const float p_left_z, const float p_right_z, const float p_top_z, const float p_bottom_z) { - // slope-sensitive depth-based edge detection - vec4 edgesLRTB = vec4(p_left_z, p_right_z, p_top_z, p_bottom_z) - p_center_z; - vec4 edgesLRTB_slope_adjusted = edgesLRTB + edgesLRTB.yxwz; - edgesLRTB = min(abs(edgesLRTB), abs(edgesLRTB_slope_adjusted)); - return clamp((1.3 - edgesLRTB / (p_center_z * 0.040)), 0.0, 1.0); -} - -vec3 decode_normal(vec3 p_encoded_normal) { - vec3 normal = p_encoded_normal * 2.0 - 1.0; - return normal; -} - -vec3 load_normal(ivec2 p_pos) { - vec3 encoded_normal = imageLoad(source_normal, p_pos).xyz; - encoded_normal.z = 1.0 - encoded_normal.z; - return decode_normal(encoded_normal); -} - -vec3 load_normal(ivec2 p_pos, ivec2 p_offset) { - vec3 encoded_normal = imageLoad(source_normal, p_pos + p_offset).xyz; - encoded_normal.z = 1.0 - encoded_normal.z; - return decode_normal(encoded_normal); -} - -// all vectors in viewspace -float calculate_pixel_obscurance(vec3 p_pixel_normal, vec3 p_hit_delta, float p_fallof_sq) { - float length_sq = dot(p_hit_delta, p_hit_delta); - float NdotD = dot(p_pixel_normal, p_hit_delta) / sqrt(length_sq); - - float falloff_mult = max(0.0, length_sq * p_fallof_sq + 1.0); - - return max(0, NdotD - params.horizon_angle_threshold) * falloff_mult; -} - -void SSAO_tap_inner(const int p_quality_level, inout float r_obscurance_sum, inout float r_weight_sum, const vec2 p_sampling_uv, const float p_mip_level, const vec3 p_pix_center_pos, vec3 p_pixel_normal, const float p_fallof_sq, const float p_weight_mod) { - // get depth at sample - float viewspace_sample_z = textureLod(source_depth_mipmaps, vec3(p_sampling_uv, params.pass), p_mip_level).x; - - // convert to viewspace - vec3 hit_pos = NDC_to_view_space(p_sampling_uv.xy, viewspace_sample_z).xyz; - vec3 hit_delta = hit_pos - p_pix_center_pos; - - float obscurance = calculate_pixel_obscurance(p_pixel_normal, hit_delta, p_fallof_sq); - float weight = 1.0; - - if (p_quality_level >= SSAO_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET) { - float reduct = max(0, -hit_delta.z); - reduct = clamp(reduct * params.neg_inv_radius + 2.0, 0.0, 1.0); - weight = SSAO_HALOING_REDUCTION_AMOUNT * reduct + (1.0 - SSAO_HALOING_REDUCTION_AMOUNT); - } - weight *= p_weight_mod; - r_obscurance_sum += obscurance * weight; - r_weight_sum += weight; -} - -void SSAOTap(const int p_quality_level, inout float r_obscurance_sum, inout float r_weight_sum, const int p_tap_index, const mat2 p_rot_scale, const vec3 p_pix_center_pos, vec3 p_pixel_normal, const vec2 p_normalized_screen_pos, const float p_mip_offset, const float p_fallof_sq, float p_weight_mod, vec2 p_norm_xy, float p_norm_xy_length) { - vec2 sample_offset; - float sample_pow_2_len; - - // patterns - { - vec4 new_sample = sample_pattern[p_tap_index]; - sample_offset = new_sample.xy * p_rot_scale; - sample_pow_2_len = new_sample.w; // precalculated, same as: sample_pow_2_len = log2( length( new_sample.xy ) ); - p_weight_mod *= new_sample.z; - } - - // snap to pixel center (more correct obscurance math, avoids artifacts) - sample_offset = round(sample_offset); - - // calculate MIP based on the sample distance from the centre, similar to as described - // in http://graphics.cs.williams.edu/papers/SAOHPG12/. - float mip_level = (p_quality_level < SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (sample_pow_2_len + p_mip_offset); - - vec2 sampling_uv = sample_offset * params.half_screen_pixel_size + p_normalized_screen_pos; - - SSAO_tap_inner(p_quality_level, r_obscurance_sum, r_weight_sum, sampling_uv, mip_level, p_pix_center_pos, p_pixel_normal, p_fallof_sq, p_weight_mod); - - // for the second tap, just use the mirrored offset - vec2 sample_offset_mirrored_uv = -sample_offset; - - // tilt the second set of samples so that the disk is effectively rotated by the normal - // effective at removing one set of artifacts, but too expensive for lower quality settings - if (p_quality_level >= SSAO_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET) { - float dot_norm = dot(sample_offset_mirrored_uv, p_norm_xy); - sample_offset_mirrored_uv -= dot_norm * p_norm_xy_length * p_norm_xy; - sample_offset_mirrored_uv = round(sample_offset_mirrored_uv); - } - - // snap to pixel center (more correct obscurance math, avoids artifacts) - vec2 sampling_mirrored_uv = sample_offset_mirrored_uv * params.half_screen_pixel_size + p_normalized_screen_pos; - - SSAO_tap_inner(p_quality_level, r_obscurance_sum, r_weight_sum, sampling_mirrored_uv, mip_level, p_pix_center_pos, p_pixel_normal, p_fallof_sq, p_weight_mod); -} - -void generate_SSAO_shadows_internal(out float r_shadow_term, out vec4 r_edges, out float r_weight, const vec2 p_pos, int p_quality_level, bool p_adaptive_base) { - vec2 pos_rounded = trunc(p_pos); - uvec2 upos = uvec2(pos_rounded); - - const int number_of_taps = (p_adaptive_base) ? (SSAO_ADAPTIVE_TAP_BASE_COUNT) : (num_taps[p_quality_level]); - float pix_z, pix_left_z, pix_top_z, pix_right_z, pix_bottom_z; - - vec4 valuesUL = textureGather(source_depth_mipmaps, vec3(pos_rounded * params.half_screen_pixel_size, params.pass)); - vec4 valuesBR = textureGather(source_depth_mipmaps, vec3((pos_rounded + vec2(1.0)) * params.half_screen_pixel_size, params.pass)); - - // get this pixel's viewspace depth - pix_z = valuesUL.y; - - // get left right top bottom neighbouring pixels for edge detection (gets compiled out on quality_level == 0) - pix_left_z = valuesUL.x; - pix_top_z = valuesUL.z; - pix_right_z = valuesBR.z; - pix_bottom_z = valuesBR.x; - - vec2 normalized_screen_pos = pos_rounded * params.half_screen_pixel_size + params.half_screen_pixel_size_x025; - vec3 pix_center_pos = NDC_to_view_space(normalized_screen_pos, pix_z); - - // Load this pixel's viewspace normal - uvec2 full_res_coord = upos * 2 * params.size_multiplier + params.pass_coord_offset.xy; - vec3 pixel_normal = load_normal(ivec2(full_res_coord)); - - const vec2 pixel_size_at_center = NDC_to_view_space(normalized_screen_pos.xy + params.half_screen_pixel_size, pix_center_pos.z).xy - pix_center_pos.xy; - - float pixel_lookup_radius; - float fallof_sq; - - // calculate effect radius and fit our screen sampling pattern inside it - float viewspace_radius; - calculate_radius_parameters(length(pix_center_pos), pixel_size_at_center, pixel_lookup_radius, viewspace_radius, fallof_sq); - - // calculate samples rotation/scaling - mat2 rot_scale_matrix; - uint pseudo_random_index; - - { - vec4 rotation_scale; - // reduce effect radius near the screen edges slightly; ideally, one would render a larger depth buffer (5% on each side) instead - if (!p_adaptive_base && (p_quality_level >= SSAO_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET)) { - float near_screen_border = min(min(normalized_screen_pos.x, 1.0 - normalized_screen_pos.x), min(normalized_screen_pos.y, 1.0 - normalized_screen_pos.y)); - near_screen_border = clamp(10.0 * near_screen_border + 0.6, 0.0, 1.0); - pixel_lookup_radius *= near_screen_border; - } - - // load & update pseudo-random rotation matrix - pseudo_random_index = uint(pos_rounded.y * 2 + pos_rounded.x) % 5; - rotation_scale = constants.rotation_matrices[params.pass * 5 + pseudo_random_index]; - rot_scale_matrix = mat2(rotation_scale.x * pixel_lookup_radius, rotation_scale.y * pixel_lookup_radius, rotation_scale.z * pixel_lookup_radius, rotation_scale.w * pixel_lookup_radius); - } - - // the main obscurance & sample weight storage - float obscurance_sum = 0.0; - float weight_sum = 0.0; - - // edge mask for between this and left/right/top/bottom neighbour pixels - not used in quality level 0 so initialize to "no edge" (1 is no edge, 0 is edge) - vec4 edgesLRTB = vec4(1.0, 1.0, 1.0, 1.0); - - // Move center pixel slightly towards camera to avoid imprecision artifacts due to using of 16bit depth buffer; a lot smaller offsets needed when using 32bit floats - pix_center_pos *= 0.9992; - - if (!p_adaptive_base && (p_quality_level >= SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) { - edgesLRTB = calculate_edges(pix_z, pix_left_z, pix_right_z, pix_top_z, pix_bottom_z); - } - - // adds a more high definition sharp effect, which gets blurred out (reuses left/right/top/bottom samples that we used for edge detection) - if (!p_adaptive_base && (p_quality_level >= SSAO_DETAIL_AO_ENABLE_AT_QUALITY_PRESET)) { - // disable in case of quality level 4 (reference) - if (p_quality_level != 4) { - //approximate neighbouring pixels positions (actually just deltas or "positions - pix_center_pos" ) - vec3 normalized_viewspace_dir = vec3(pix_center_pos.xy / pix_center_pos.zz, 1.0); - vec3 pixel_left_delta = vec3(-pixel_size_at_center.x, 0.0, 0.0) + normalized_viewspace_dir * (pix_left_z - pix_center_pos.z); - vec3 pixel_right_delta = vec3(+pixel_size_at_center.x, 0.0, 0.0) + normalized_viewspace_dir * (pix_right_z - pix_center_pos.z); - vec3 pixel_top_delta = vec3(0.0, -pixel_size_at_center.y, 0.0) + normalized_viewspace_dir * (pix_top_z - pix_center_pos.z); - vec3 pixel_bottom_delta = vec3(0.0, +pixel_size_at_center.y, 0.0) + normalized_viewspace_dir * (pix_bottom_z - pix_center_pos.z); - - const float range_reduction = 4.0f; // this is to avoid various artifacts - const float modified_fallof_sq = range_reduction * fallof_sq; - - vec4 additional_obscurance; - additional_obscurance.x = calculate_pixel_obscurance(pixel_normal, pixel_left_delta, modified_fallof_sq); - additional_obscurance.y = calculate_pixel_obscurance(pixel_normal, pixel_right_delta, modified_fallof_sq); - additional_obscurance.z = calculate_pixel_obscurance(pixel_normal, pixel_top_delta, modified_fallof_sq); - additional_obscurance.w = calculate_pixel_obscurance(pixel_normal, pixel_bottom_delta, modified_fallof_sq); - - obscurance_sum += params.detail_intensity * dot(additional_obscurance, edgesLRTB); - } - } - - // Sharp normals also create edges - but this adds to the cost as well - if (!p_adaptive_base && (p_quality_level >= SSAO_NORMAL_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) { - vec3 neighbour_normal_left = load_normal(ivec2(full_res_coord), ivec2(-2, 0)); - vec3 neighbour_normal_right = load_normal(ivec2(full_res_coord), ivec2(2, 0)); - vec3 neighbour_normal_top = load_normal(ivec2(full_res_coord), ivec2(0, -2)); - vec3 neighbour_normal_bottom = load_normal(ivec2(full_res_coord), ivec2(0, 2)); - - const float dot_threshold = SSAO_NORMAL_BASED_EDGES_DOT_THRESHOLD; - - vec4 normal_edgesLRTB; - normal_edgesLRTB.x = clamp((dot(pixel_normal, neighbour_normal_left) + dot_threshold), 0.0, 1.0); - normal_edgesLRTB.y = clamp((dot(pixel_normal, neighbour_normal_right) + dot_threshold), 0.0, 1.0); - normal_edgesLRTB.z = clamp((dot(pixel_normal, neighbour_normal_top) + dot_threshold), 0.0, 1.0); - normal_edgesLRTB.w = clamp((dot(pixel_normal, neighbour_normal_bottom) + dot_threshold), 0.0, 1.0); - - edgesLRTB *= normal_edgesLRTB; - } - - const float global_mip_offset = SSAO_DEPTH_MIPS_GLOBAL_OFFSET; - float mip_offset = (p_quality_level < SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (log2(pixel_lookup_radius) + global_mip_offset); - - // Used to tilt the second set of samples so that the disk is effectively rotated by the normal - // effective at removing one set of artifacts, but too expensive for lower quality settings - vec2 norm_xy = vec2(pixel_normal.x, pixel_normal.y); - float norm_xy_length = length(norm_xy); - norm_xy /= vec2(norm_xy_length, -norm_xy_length); - norm_xy_length *= SSAO_TILT_SAMPLES_AMOUNT; - - // standard, non-adaptive approach - if ((p_quality_level != 3) || p_adaptive_base) { - for (int i = 0; i < number_of_taps; i++) { - SSAOTap(p_quality_level, obscurance_sum, weight_sum, i, rot_scale_matrix, pix_center_pos, pixel_normal, normalized_screen_pos, mip_offset, fallof_sq, 1.0, norm_xy, norm_xy_length); - } - } -#ifdef ADAPTIVE - else { - // add new ones if needed - vec2 full_res_uv = normalized_screen_pos + params.pass_uv_offset.xy; - float importance = textureLod(source_importance, full_res_uv, 0.0).x; - - // this is to normalize SSAO_DETAIL_AO_AMOUNT across all pixel regardless of importance - obscurance_sum *= (SSAO_ADAPTIVE_TAP_BASE_COUNT / float(SSAO_MAX_TAPS)) + (importance * SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT / float(SSAO_MAX_TAPS)); - - // load existing base values - vec2 base_values = imageLoad(source_ssao, ivec3(upos, params.pass)).xy; - weight_sum += base_values.y * float(SSAO_ADAPTIVE_TAP_BASE_COUNT * 4.0); - obscurance_sum += (base_values.x) * weight_sum; - - // increase importance around edges - float edge_count = dot(1.0 - edgesLRTB, vec4(1.0, 1.0, 1.0, 1.0)); - - float avg_total_importance = float(counter.sum) * params.load_counter_avg_div; - - float importance_limiter = clamp(params.adaptive_sample_limit / avg_total_importance, 0.0, 1.0); - importance *= importance_limiter; - - float additional_sample_count = SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT * importance; - - const float blend_range = 3.0; - const float blend_range_inv = 1.0 / blend_range; - - additional_sample_count += 0.5; - uint additional_samples = uint(additional_sample_count); - uint additional_samples_to = min(SSAO_MAX_TAPS, additional_samples + SSAO_ADAPTIVE_TAP_BASE_COUNT); - - for (uint i = SSAO_ADAPTIVE_TAP_BASE_COUNT; i < additional_samples_to; i++) { - additional_sample_count -= 1.0f; - float weight_mod = clamp(additional_sample_count * blend_range_inv, 0.0, 1.0); - SSAOTap(p_quality_level, obscurance_sum, weight_sum, int(i), rot_scale_matrix, pix_center_pos, pixel_normal, normalized_screen_pos, mip_offset, fallof_sq, weight_mod, norm_xy, norm_xy_length); - } - } -#endif - - // early out for adaptive base - just output weight (used for the next pass) - if (p_adaptive_base) { - float obscurance = obscurance_sum / weight_sum; - - r_shadow_term = obscurance; - r_edges = vec4(0.0); - r_weight = weight_sum; - return; - } - - // calculate weighted average - float obscurance = obscurance_sum / weight_sum; - - // calculate fadeout (1 close, gradient, 0 far) - float fade_out = clamp(pix_center_pos.z * params.fade_out_mul + params.fade_out_add, 0.0, 1.0); - - // Reduce the SSAO shadowing if we're on the edge to remove artifacts on edges (we don't care for the lower quality one) - if (!p_adaptive_base && (p_quality_level >= SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) { - // when there's more than 2 opposite edges, start fading out the occlusion to reduce aliasing artifacts - float edge_fadeout_factor = clamp((1.0 - edgesLRTB.x - edgesLRTB.y) * 0.35, 0.0, 1.0) + clamp((1.0 - edgesLRTB.z - edgesLRTB.w) * 0.35, 0.0, 1.0); - - fade_out *= clamp(1.0 - edge_fadeout_factor, 0.0, 1.0); - } - - // strength - obscurance = params.intensity * obscurance; - - // clamp - obscurance = min(obscurance, params.shadow_clamp); - - // fadeout - obscurance *= fade_out; - - // conceptually switch to occlusion with the meaning being visibility (grows with visibility, occlusion == 1 implies full visibility), - // to be in line with what is more commonly used. - float occlusion = 1.0 - obscurance; - - // modify the gradient - // note: this cannot be moved to a later pass because of loss of precision after storing in the render target - occlusion = pow(clamp(occlusion, 0.0, 1.0), params.shadow_power); - - // outputs! - r_shadow_term = occlusion; // Our final 'occlusion' term (0 means fully occluded, 1 means fully lit) - r_edges = edgesLRTB; // These are used to prevent blurring across edges, 1 means no edge, 0 means edge, 0.5 means half way there, etc. - r_weight = weight_sum; -} - -void main() { - float out_shadow_term; - float out_weight; - vec4 out_edges; - ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); - if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing - return; - } - - vec2 uv = vec2(gl_GlobalInvocationID) + vec2(0.5); -#ifdef SSAO_BASE - generate_SSAO_shadows_internal(out_shadow_term, out_edges, out_weight, uv, params.quality, true); - - imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), vec4(out_shadow_term, out_weight / (float(SSAO_ADAPTIVE_TAP_BASE_COUNT) * 4.0), 0.0, 0.0)); -#else - generate_SSAO_shadows_internal(out_shadow_term, out_edges, out_weight, uv, params.quality, false); // pass in quality levels - if (params.quality == 0) { - out_edges = vec4(1.0); - } - - imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), vec4(out_shadow_term, pack_edges(out_edges), 0.0, 0.0)); -#endif -} diff --git a/servers/rendering/renderer_rd/shaders/ssao_blur.glsl b/servers/rendering/renderer_rd/shaders/ssao_blur.glsl deleted file mode 100644 index f42734c46d..0000000000 --- a/servers/rendering/renderer_rd/shaders/ssao_blur.glsl +++ /dev/null @@ -1,154 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2016, Intel Corporation -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -// documentation files (the "Software"), to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to -// permit persons to whom the Software is furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of -// the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// File changes (yyyy-mm-dd) -// 2016-09-07: filip.strugar@intel.com: first commit -// 2020-12-05: clayjohn: convert to Vulkan and Godot -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[compute] - -#version 450 - -#VERSION_DEFINES - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -layout(set = 0, binding = 0) uniform sampler2D source_ssao; - -layout(rg8, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; - -layout(push_constant, std430) uniform Params { - float edge_sharpness; - float pad; - vec2 half_screen_pixel_size; -} -params; - -vec4 unpack_edges(float p_packed_val) { - uint packed_val = uint(p_packed_val * 255.5); - vec4 edgesLRTB; - edgesLRTB.x = float((packed_val >> 6) & 0x03) / 3.0; - edgesLRTB.y = float((packed_val >> 4) & 0x03) / 3.0; - edgesLRTB.z = float((packed_val >> 2) & 0x03) / 3.0; - edgesLRTB.w = float((packed_val >> 0) & 0x03) / 3.0; - - return clamp(edgesLRTB + params.edge_sharpness, 0.0, 1.0); -} - -void add_sample(float p_ssao_value, float p_edge_value, inout float r_sum, inout float r_sum_weight) { - float weight = p_edge_value; - - r_sum += (weight * p_ssao_value); - r_sum_weight += weight; -} - -#ifdef MODE_WIDE -vec2 sample_blurred_wide(vec2 p_coord) { - vec2 vC = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(0, 0)).xy; - vec2 vL = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(-2, 0)).xy; - vec2 vT = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(0, -2)).xy; - vec2 vR = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(2, 0)).xy; - vec2 vB = textureLodOffset(source_ssao, vec2(p_coord), 0.0, ivec2(0, 2)).xy; - - float packed_edges = vC.y; - vec4 edgesLRTB = unpack_edges(packed_edges); - edgesLRTB.x *= unpack_edges(vL.y).y; - edgesLRTB.z *= unpack_edges(vT.y).w; - edgesLRTB.y *= unpack_edges(vR.y).x; - edgesLRTB.w *= unpack_edges(vB.y).z; - - float ssao_value = vC.x; - float ssao_valueL = vL.x; - float ssao_valueT = vT.x; - float ssao_valueR = vR.x; - float ssao_valueB = vB.x; - - float sum_weight = 0.8f; - float sum = ssao_value * sum_weight; - - add_sample(ssao_valueL, edgesLRTB.x, sum, sum_weight); - add_sample(ssao_valueR, edgesLRTB.y, sum, sum_weight); - add_sample(ssao_valueT, edgesLRTB.z, sum, sum_weight); - add_sample(ssao_valueB, edgesLRTB.w, sum, sum_weight); - - float ssao_avg = sum / sum_weight; - - ssao_value = ssao_avg; - - return vec2(ssao_value, packed_edges); -} -#endif - -#ifdef MODE_SMART -vec2 sample_blurred(vec3 p_pos, vec2 p_coord) { - float packed_edges = texelFetch(source_ssao, ivec2(p_pos.xy), 0).y; - vec4 edgesLRTB = unpack_edges(packed_edges); - - vec4 valuesUL = textureGather(source_ssao, vec2(p_coord - params.half_screen_pixel_size * 0.5)); - vec4 valuesBR = textureGather(source_ssao, vec2(p_coord + params.half_screen_pixel_size * 0.5)); - - float ssao_value = valuesUL.y; - float ssao_valueL = valuesUL.x; - float ssao_valueT = valuesUL.z; - float ssao_valueR = valuesBR.z; - float ssao_valueB = valuesBR.x; - - float sum_weight = 0.5; - float sum = ssao_value * sum_weight; - - add_sample(ssao_valueL, edgesLRTB.x, sum, sum_weight); - add_sample(ssao_valueR, edgesLRTB.y, sum, sum_weight); - - add_sample(ssao_valueT, edgesLRTB.z, sum, sum_weight); - add_sample(ssao_valueB, edgesLRTB.w, sum, sum_weight); - - float ssao_avg = sum / sum_weight; - - ssao_value = ssao_avg; - - return vec2(ssao_value, packed_edges); -} -#endif - -void main() { - // Pixel being shaded - ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); - -#ifdef MODE_NON_SMART - - vec2 half_pixel = params.half_screen_pixel_size * 0.5; - - vec2 uv = (vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size; - - vec2 center = textureLod(source_ssao, vec2(uv), 0.0).xy; - - vec4 vals; - vals.x = textureLod(source_ssao, vec2(uv + vec2(-half_pixel.x * 3, -half_pixel.y)), 0.0).x; - vals.y = textureLod(source_ssao, vec2(uv + vec2(+half_pixel.x, -half_pixel.y * 3)), 0.0).x; - vals.z = textureLod(source_ssao, vec2(uv + vec2(-half_pixel.x, +half_pixel.y * 3)), 0.0).x; - vals.w = textureLod(source_ssao, vec2(uv + vec2(+half_pixel.x * 3, +half_pixel.y)), 0.0).x; - - vec2 sampled = vec2(dot(vals, vec4(0.2)) + center.x * 0.2, center.y); - -#else -#ifdef MODE_SMART - vec2 sampled = sample_blurred(vec3(gl_GlobalInvocationID), (vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size); -#else // MODE_WIDE - vec2 sampled = sample_blurred_wide((vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size); -#endif - -#endif - imageStore(dest_image, ivec2(ssC), vec4(sampled, 0.0, 0.0)); -} diff --git a/servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl b/servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl deleted file mode 100644 index 04f98964e8..0000000000 --- a/servers/rendering/renderer_rd/shaders/ssao_importance_map.glsl +++ /dev/null @@ -1,123 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2016, Intel Corporation -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -// documentation files (the "Software"), to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to -// permit persons to whom the Software is furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of -// the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// File changes (yyyy-mm-dd) -// 2016-09-07: filip.strugar@intel.com: first commit -// 2020-12-05: clayjohn: convert to Vulkan and Godot -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[compute] - -#version 450 - -#VERSION_DEFINES - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -#ifdef GENERATE_MAP -layout(set = 0, binding = 0) uniform sampler2DArray source_texture; -#else -layout(set = 0, binding = 0) uniform sampler2D source_importance; -#endif -layout(r8, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; - -#ifdef PROCESS_MAPB -layout(set = 2, binding = 0, std430) buffer Counter { - uint sum; -} -counter; -#endif - -layout(push_constant, std430) uniform Params { - vec2 half_screen_pixel_size; - float intensity; - float power; -} -params; - -void main() { - // Pixel being shaded - ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); - -#ifdef GENERATE_MAP - // importance map stuff - uvec2 base_position = ssC * 2; - - vec2 base_uv = (vec2(base_position) + vec2(0.5f, 0.5f)) * params.half_screen_pixel_size; - - float minV = 1.0; - float maxV = 0.0; - for (int i = 0; i < 4; i++) { - vec4 vals = textureGather(source_texture, vec3(base_uv, i)); - - // apply the same modifications that would have been applied in the main shader - vals = params.intensity * vals; - - vals = 1 - vals; - - vals = pow(clamp(vals, 0.0, 1.0), vec4(params.power)); - - maxV = max(maxV, max(max(vals.x, vals.y), max(vals.z, vals.w))); - minV = min(minV, min(min(vals.x, vals.y), min(vals.z, vals.w))); - } - - float min_max_diff = maxV - minV; - - imageStore(dest_image, ssC, vec4(pow(clamp(min_max_diff * 2.0, 0.0, 1.0), 0.8))); -#endif - -#ifdef PROCESS_MAPA - vec2 uv = (vec2(ssC) + 0.5f) * params.half_screen_pixel_size * 2.0; - - float centre = textureLod(source_importance, uv, 0.0).x; - - vec2 half_pixel = params.half_screen_pixel_size; - - vec4 vals; - vals.x = textureLod(source_importance, uv + vec2(-half_pixel.x * 3, -half_pixel.y), 0.0).x; - vals.y = textureLod(source_importance, uv + vec2(+half_pixel.x, -half_pixel.y * 3), 0.0).x; - vals.z = textureLod(source_importance, uv + vec2(+half_pixel.x * 3, +half_pixel.y), 0.0).x; - vals.w = textureLod(source_importance, uv + vec2(-half_pixel.x, +half_pixel.y * 3), 0.0).x; - - float avg = dot(vals, vec4(0.25, 0.25, 0.25, 0.25)); - - imageStore(dest_image, ssC, vec4(avg)); -#endif - -#ifdef PROCESS_MAPB - vec2 uv = (vec2(ssC) + 0.5f) * params.half_screen_pixel_size * 2.0; - - float centre = textureLod(source_importance, uv, 0.0).x; - - vec2 half_pixel = params.half_screen_pixel_size; - - vec4 vals; - vals.x = textureLod(source_importance, uv + vec2(-half_pixel.x, -half_pixel.y * 3), 0.0).x; - vals.y = textureLod(source_importance, uv + vec2(+half_pixel.x * 3, -half_pixel.y), 0.0).x; - vals.z = textureLod(source_importance, uv + vec2(+half_pixel.x, +half_pixel.y * 3), 0.0).x; - vals.w = textureLod(source_importance, uv + vec2(-half_pixel.x * 3, +half_pixel.y), 0.0).x; - - float avg = dot(vals, vec4(0.25, 0.25, 0.25, 0.25)); - - imageStore(dest_image, ssC, vec4(avg)); - - // sum the average; to avoid overflowing we assume max AO resolution is not bigger than 16384x16384; so quarter res (used here) will be 4096x4096, which leaves us with 8 bits per pixel - uint sum = uint(clamp(avg, 0.0, 1.0) * 255.0 + 0.5); - - // save every 9th to avoid InterlockedAdd congestion - since we're blurring, this is good enough; compensated by multiplying load_counter_avg_div by 9 - if (((ssC.x % 3) + (ssC.y % 3)) == 0) { - atomicAdd(counter.sum, sum); - } -#endif -} diff --git a/servers/rendering/renderer_rd/shaders/ssao_interleave.glsl b/servers/rendering/renderer_rd/shaders/ssao_interleave.glsl deleted file mode 100644 index f6a9a92fac..0000000000 --- a/servers/rendering/renderer_rd/shaders/ssao_interleave.glsl +++ /dev/null @@ -1,119 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2016, Intel Corporation -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -// documentation files (the "Software"), to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to -// permit persons to whom the Software is furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of -// the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// File changes (yyyy-mm-dd) -// 2016-09-07: filip.strugar@intel.com: first commit -// 2020-12-05: clayjohn: convert to Vulkan and Godot -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[compute] - -#version 450 - -#VERSION_DEFINES - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -layout(rgba8, set = 0, binding = 0) uniform restrict writeonly image2D dest_image; -layout(set = 1, binding = 0) uniform sampler2DArray source_texture; - -layout(push_constant, std430) uniform Params { - float inv_sharpness; - uint size_modifier; - vec2 pixel_size; -} -params; - -vec4 unpack_edges(float p_packed_val) { - uint packed_val = uint(p_packed_val * 255.5); - vec4 edgesLRTB; - edgesLRTB.x = float((packed_val >> 6) & 0x03) / 3.0; - edgesLRTB.y = float((packed_val >> 4) & 0x03) / 3.0; - edgesLRTB.z = float((packed_val >> 2) & 0x03) / 3.0; - edgesLRTB.w = float((packed_val >> 0) & 0x03) / 3.0; - - return clamp(edgesLRTB + params.inv_sharpness, 0.0, 1.0); -} - -void main() { - ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); - if (any(greaterThanEqual(ssC, ivec2(1.0 / params.pixel_size)))) { //too large, do nothing - return; - } - -#ifdef MODE_SMART - float ao; - uvec2 pix_pos = uvec2(gl_GlobalInvocationID.xy); - vec2 uv = (gl_GlobalInvocationID.xy + vec2(0.5)) * params.pixel_size; - - // calculate index in the four deinterleaved source array texture - int mx = int(pix_pos.x % 2); - int my = int(pix_pos.y % 2); - int index_center = mx + my * 2; // center index - int index_horizontal = (1 - mx) + my * 2; // neighbouring, horizontal - int index_vertical = mx + (1 - my) * 2; // neighbouring, vertical - int index_diagonal = (1 - mx) + (1 - my) * 2; // diagonal - - vec2 center_val = texelFetch(source_texture, ivec3(pix_pos / uvec2(params.size_modifier), index_center), 0).xy; - - ao = center_val.x; - - vec4 edgesLRTB = unpack_edges(center_val.y); - - // convert index shifts to sampling offsets - float fmx = float(mx); - float fmy = float(my); - - // in case of an edge, push sampling offsets away from the edge (towards pixel center) - float fmxe = (edgesLRTB.y - edgesLRTB.x); - float fmye = (edgesLRTB.w - edgesLRTB.z); - - // calculate final sampling offsets and sample using bilinear filter - vec2 uv_horizontal = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(fmx + fmxe - 0.5, 0.5 - fmy)) * params.pixel_size; - float ao_horizontal = textureLod(source_texture, vec3(uv_horizontal, index_horizontal), 0.0).x; - vec2 uv_vertical = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(0.5 - fmx, fmy - 0.5 + fmye)) * params.pixel_size; - float ao_vertical = textureLod(source_texture, vec3(uv_vertical, index_vertical), 0.0).x; - vec2 uv_diagonal = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(fmx - 0.5 + fmxe, fmy - 0.5 + fmye)) * params.pixel_size; - float ao_diagonal = textureLod(source_texture, vec3(uv_diagonal, index_diagonal), 0.0).x; - - // reduce weight for samples near edge - if the edge is on both sides, weight goes to 0 - vec4 blendWeights; - blendWeights.x = 1.0; - blendWeights.y = (edgesLRTB.x + edgesLRTB.y) * 0.5; - blendWeights.z = (edgesLRTB.z + edgesLRTB.w) * 0.5; - blendWeights.w = (blendWeights.y + blendWeights.z) * 0.5; - - // calculate weighted average - float blendWeightsSum = dot(blendWeights, vec4(1.0, 1.0, 1.0, 1.0)); - ao = dot(vec4(ao, ao_horizontal, ao_vertical, ao_diagonal), blendWeights) / blendWeightsSum; - - imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), vec4(ao)); -#else // !MODE_SMART - - vec2 uv = (gl_GlobalInvocationID.xy + vec2(0.5)) * params.pixel_size; -#ifdef MODE_HALF - float a = textureLod(source_texture, vec3(uv, 0), 0.0).x; - float d = textureLod(source_texture, vec3(uv, 3), 0.0).x; - float avg = (a + d) * 0.5; - -#else - float a = textureLod(source_texture, vec3(uv, 0), 0.0).x; - float b = textureLod(source_texture, vec3(uv, 1), 0.0).x; - float c = textureLod(source_texture, vec3(uv, 2), 0.0).x; - float d = textureLod(source_texture, vec3(uv, 3), 0.0).x; - float avg = (a + b + c + d) * 0.25; - -#endif - imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), vec4(avg)); -#endif -} diff --git a/servers/rendering/renderer_rd/shaders/ssil.glsl b/servers/rendering/renderer_rd/shaders/ssil.glsl deleted file mode 100644 index 513791dfbf..0000000000 --- a/servers/rendering/renderer_rd/shaders/ssil.glsl +++ /dev/null @@ -1,444 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2016, Intel Corporation -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -// documentation files (the "Software"), to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to -// permit persons to whom the Software is furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of -// the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// File changes (yyyy-mm-dd) -// 2016-09-07: filip.strugar@intel.com: first commit -// 2020-12-05: clayjohn: convert to Vulkan and Godot -// 2021-05-27: clayjohn: convert SSAO to SSIL -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[compute] - -#version 450 - -#VERSION_DEFINES - -#define SSIL_MAIN_DISK_SAMPLE_COUNT (32) -const vec4 sample_pattern[SSIL_MAIN_DISK_SAMPLE_COUNT] = { - vec4(0.78488064, 0.56661671, 1.500000, -0.126083), vec4(0.26022232, -0.29575172, 1.500000, -1.064030), vec4(0.10459357, 0.08372527, 1.110000, -2.730563), vec4(-0.68286800, 0.04963045, 1.090000, -0.498827), - vec4(-0.13570161, -0.64190155, 1.250000, -0.532765), vec4(-0.26193795, -0.08205118, 0.670000, -1.783245), vec4(-0.61177456, 0.66664219, 0.710000, -0.044234), vec4(0.43675563, 0.25119025, 0.610000, -1.167283), - vec4(0.07884444, 0.86618668, 0.640000, -0.459002), vec4(-0.12790935, -0.29869005, 0.600000, -1.729424), vec4(-0.04031125, 0.02413622, 0.600000, -4.792042), vec4(0.16201244, -0.52851415, 0.790000, -1.067055), - vec4(-0.70991218, 0.47301072, 0.640000, -0.335236), vec4(0.03277707, -0.22349690, 0.600000, -1.982384), vec4(0.68921727, 0.36800742, 0.630000, -0.266718), vec4(0.29251814, 0.37775412, 0.610000, -1.422520), - vec4(-0.12224089, 0.96582592, 0.600000, -0.426142), vec4(0.11071457, -0.16131058, 0.600000, -2.165947), vec4(0.46562141, -0.59747696, 0.600000, -0.189760), vec4(-0.51548797, 0.11804193, 0.600000, -1.246800), - vec4(0.89141309, -0.42090443, 0.600000, 0.028192), vec4(-0.32402530, -0.01591529, 0.600000, -1.543018), vec4(0.60771245, 0.41635221, 0.600000, -0.605411), vec4(0.02379565, -0.08239821, 0.600000, -3.809046), - vec4(0.48951152, -0.23657045, 0.600000, -1.189011), vec4(-0.17611565, -0.81696892, 0.600000, -0.513724), vec4(-0.33930185, -0.20732205, 0.600000, -1.698047), vec4(-0.91974425, 0.05403209, 0.600000, 0.062246), - vec4(-0.15064627, -0.14949332, 0.600000, -1.896062), vec4(0.53180975, -0.35210401, 0.600000, -0.758838), vec4(0.41487166, 0.81442589, 0.600000, -0.505648), vec4(-0.24106961, -0.32721516, 0.600000, -1.665244) -}; - -// these values can be changed (up to SSIL_MAX_TAPS) with no changes required elsewhere; values for 4th and 5th preset are ignored but array needed to avoid compilation errors -// the actual number of texture samples is two times this value (each "tap" has two symmetrical depth texture samples) -const int num_taps[5] = { 3, 5, 12, 0, 0 }; - -#define SSIL_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET (99) // to disable simply set to 99 or similar -#define SSIL_TILT_SAMPLES_AMOUNT (0.4) -// -#define SSIL_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET (1) // to disable simply set to 99 or similar -#define SSIL_HALOING_REDUCTION_AMOUNT (0.8) // values from 0.0 - 1.0, 1.0 means max weighting (will cause artifacts, 0.8 is more reasonable) -// -#define SSIL_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET (2) -#define SSIL_DEPTH_MIPS_GLOBAL_OFFSET (-4.3) // best noise/quality/performance tradeoff, found empirically -// -// !!warning!! the edge handling is hard-coded to 'disabled' on quality level 0, and enabled above, on the C++ side; while toggling it here will work for -// testing purposes, it will not yield performance gains (or correct results) -#define SSIL_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (1) -// -#define SSIL_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET (1) - -#define SSIL_MAX_TAPS 32 -#define SSIL_ADAPTIVE_TAP_BASE_COUNT 5 -#define SSIL_ADAPTIVE_TAP_FLEXIBLE_COUNT (SSIL_MAX_TAPS - SSIL_ADAPTIVE_TAP_BASE_COUNT) -#define SSIL_DEPTH_MIP_LEVELS 4 - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -layout(set = 0, binding = 0) uniform sampler2DArray source_depth_mipmaps; -layout(rgba8, set = 0, binding = 1) uniform restrict readonly image2D source_normal; -layout(set = 0, binding = 2) uniform Constants { //get into a lower set - vec4 rotation_matrices[20]; -} -constants; - -#ifdef ADAPTIVE -layout(rgba16, set = 1, binding = 0) uniform restrict readonly image2DArray source_ssil; -layout(set = 1, binding = 1) uniform sampler2D source_importance; -layout(set = 1, binding = 2, std430) buffer Counter { - uint sum; -} -counter; -#endif - -layout(rgba16, set = 2, binding = 0) uniform restrict writeonly image2D dest_image; -layout(r8, set = 2, binding = 1) uniform image2D edges_weights_image; - -layout(set = 3, binding = 0) uniform sampler2D last_frame; -layout(set = 3, binding = 1) uniform ProjectionConstants { - mat4 reprojection; -} -projection_constants; - -layout(push_constant, std430) uniform Params { - ivec2 screen_size; - int pass; - int quality; - - vec2 half_screen_pixel_size; - vec2 half_screen_pixel_size_x025; - - vec2 NDC_to_view_mul; - vec2 NDC_to_view_add; - - vec2 pad2; - float z_near; - float z_far; - - float radius; - float intensity; - int size_multiplier; - int pad; - - float fade_out_mul; - float fade_out_add; - float normal_rejection_amount; - float inv_radius_near_limit; - - bool is_orthogonal; - float neg_inv_radius; - float load_counter_avg_div; - float adaptive_sample_limit; - - ivec2 pass_coord_offset; - vec2 pass_uv_offset; -} -params; - -float pack_edges(vec4 p_edgesLRTB) { - p_edgesLRTB = round(clamp(p_edgesLRTB, 0.0, 1.0) * 3.05); - return dot(p_edgesLRTB, vec4(64.0 / 255.0, 16.0 / 255.0, 4.0 / 255.0, 1.0 / 255.0)); -} - -vec3 NDC_to_view_space(vec2 p_pos, float p_viewspace_depth) { - if (params.is_orthogonal) { - return vec3((params.NDC_to_view_mul * p_pos.xy + params.NDC_to_view_add), p_viewspace_depth); - } else { - return vec3((params.NDC_to_view_mul * p_pos.xy + params.NDC_to_view_add) * p_viewspace_depth, p_viewspace_depth); - } -} - -// calculate effect radius and fit our screen sampling pattern inside it -void calculate_radius_parameters(const float p_pix_center_length, const vec2 p_pixel_size_at_center, out float r_lookup_radius, out float r_radius, out float r_fallof_sq) { - r_radius = params.radius; - - // when too close, on-screen sampling disk will grow beyond screen size; limit this to avoid closeup temporal artifacts - const float too_close_limit = clamp(p_pix_center_length * params.inv_radius_near_limit, 0.0, 1.0) * 0.8 + 0.2; - - r_radius *= too_close_limit; - - // 0.85 is to reduce the radius to allow for more samples on a slope to still stay within influence - r_lookup_radius = (0.85 * r_radius) / p_pixel_size_at_center.x; - - // used to calculate falloff (both for AO samples and per-sample weights) - r_fallof_sq = -1.0 / (r_radius * r_radius); -} - -vec4 calculate_edges(const float p_center_z, const float p_left_z, const float p_right_z, const float p_top_z, const float p_bottom_z) { - // slope-sensitive depth-based edge detection - vec4 edgesLRTB = vec4(p_left_z, p_right_z, p_top_z, p_bottom_z) - p_center_z; - vec4 edgesLRTB_slope_adjusted = edgesLRTB + edgesLRTB.yxwz; - edgesLRTB = min(abs(edgesLRTB), abs(edgesLRTB_slope_adjusted)); - return clamp((1.3 - edgesLRTB / (p_center_z * 0.040)), 0.0, 1.0); -} - -vec3 decode_normal(vec3 p_encoded_normal) { - vec3 normal = p_encoded_normal * 2.0 - 1.0; - return normal; -} - -vec3 load_normal(ivec2 p_pos) { - vec3 encoded_normal = imageLoad(source_normal, p_pos).xyz; - encoded_normal.z = 1.0 - encoded_normal.z; - return decode_normal(encoded_normal); -} - -vec3 load_normal(ivec2 p_pos, ivec2 p_offset) { - vec3 encoded_normal = imageLoad(source_normal, p_pos + p_offset).xyz; - encoded_normal.z = 1.0 - encoded_normal.z; - return decode_normal(encoded_normal); -} - -// all vectors in viewspace -float calculate_pixel_obscurance(vec3 p_pixel_normal, vec3 p_hit_delta, float p_fallof_sq) { - float length_sq = dot(p_hit_delta, p_hit_delta); - float NdotD = dot(p_pixel_normal, p_hit_delta) / sqrt(length_sq); - - float falloff_mult = max(0.0, length_sq * p_fallof_sq + 1.0); - - return max(0, NdotD - 0.05) * falloff_mult; -} - -void SSIL_tap_inner(const int p_quality_level, inout vec3 r_color_sum, inout float r_obscurance_sum, inout float r_weight_sum, const vec2 p_sampling_uv, const float p_mip_level, const vec3 p_pix_center_pos, vec3 p_pixel_normal, const float p_fallof_sq, const float p_weight_mod) { - // get depth at sample - float viewspace_sample_z = textureLod(source_depth_mipmaps, vec3(p_sampling_uv, params.pass), p_mip_level).x; - vec3 sample_normal = load_normal(ivec2(p_sampling_uv * vec2(params.screen_size))); - - // convert to viewspace - vec3 hit_pos = NDC_to_view_space(p_sampling_uv.xy, viewspace_sample_z); - vec3 hit_delta = hit_pos - p_pix_center_pos; - - float obscurance = calculate_pixel_obscurance(p_pixel_normal, hit_delta, p_fallof_sq); - float weight = 1.0; - - if (p_quality_level >= SSIL_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET) { - float reduct = max(0, -hit_delta.z); - reduct = clamp(reduct * params.neg_inv_radius + 2.0, 0.0, 1.0); - weight = SSIL_HALOING_REDUCTION_AMOUNT * reduct + (1.0 - SSIL_HALOING_REDUCTION_AMOUNT); - } - - // Translate sampling_uv to last screen's coordinates - const vec4 sample_pos = projection_constants.reprojection * vec4(p_sampling_uv * 2.0 - 1.0, (viewspace_sample_z - params.z_near) / (params.z_far - params.z_near) * 2.0 - 1.0, 1.0); - vec2 reprojected_sampling_uv = (sample_pos.xy / sample_pos.w) * 0.5 + 0.5; - - weight *= p_weight_mod; - - r_obscurance_sum += obscurance * weight; - - vec3 sample_color = textureLod(last_frame, reprojected_sampling_uv, 5.0).rgb; - // Reduce impact of fireflies by tonemapping before averaging: http://graphicrants.blogspot.com/2013/12/tone-mapping.html - sample_color /= (1.0 + dot(sample_color, vec3(0.299, 0.587, 0.114))); - r_color_sum += sample_color * obscurance * weight * mix(1.0, smoothstep(0.0, 0.1, -dot(sample_normal, normalize(hit_delta))), params.normal_rejection_amount); - r_weight_sum += weight; -} - -void SSILTap(const int p_quality_level, inout vec3 r_color_sum, inout float r_obscurance_sum, inout float r_weight_sum, const int p_tap_index, const mat2 p_rot_scale, const vec3 p_pix_center_pos, vec3 p_pixel_normal, const vec2 p_normalized_screen_pos, const float p_mip_offset, const float p_fallof_sq, float p_weight_mod, vec2 p_norm_xy, float p_norm_xy_length) { - vec2 sample_offset; - float sample_pow_2_len; - - // patterns - { - vec4 new_sample = sample_pattern[p_tap_index]; - sample_offset = new_sample.xy * p_rot_scale; - sample_pow_2_len = new_sample.w; // precalculated, same as: sample_pow_2_len = log2( length( new_sample.xy ) ); - p_weight_mod *= new_sample.z; - } - - // snap to pixel center (more correct obscurance math, avoids artifacts) - sample_offset = round(sample_offset); - - // calculate MIP based on the sample distance from the centre, similar to as described - // in http://graphics.cs.williams.edu/papers/SAOHPG12/. - float mip_level = (p_quality_level < SSIL_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (sample_pow_2_len + p_mip_offset); - - vec2 sampling_uv = sample_offset * params.half_screen_pixel_size + p_normalized_screen_pos; - - SSIL_tap_inner(p_quality_level, r_color_sum, r_obscurance_sum, r_weight_sum, sampling_uv, mip_level, p_pix_center_pos, p_pixel_normal, p_fallof_sq, p_weight_mod); - - // for the second tap, just use the mirrored offset - vec2 sample_offset_mirrored_uv = -sample_offset; - - // tilt the second set of samples so that the disk is effectively rotated by the normal - // effective at removing one set of artifacts, but too expensive for lower quality settings - if (p_quality_level >= SSIL_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET) { - float dot_norm = dot(sample_offset_mirrored_uv, p_norm_xy); - sample_offset_mirrored_uv -= dot_norm * p_norm_xy_length * p_norm_xy; - sample_offset_mirrored_uv = round(sample_offset_mirrored_uv); - } - - // snap to pixel center (more correct obscurance math, avoids artifacts) - vec2 sampling_mirrored_uv = sample_offset_mirrored_uv * params.half_screen_pixel_size + p_normalized_screen_pos; - - SSIL_tap_inner(p_quality_level, r_color_sum, r_obscurance_sum, r_weight_sum, sampling_mirrored_uv, mip_level, p_pix_center_pos, p_pixel_normal, p_fallof_sq, p_weight_mod); -} - -void generate_SSIL(out vec3 r_color, out vec4 r_edges, out float r_obscurance, out float r_weight, const vec2 p_pos, int p_quality_level, bool p_adaptive_base) { - vec2 pos_rounded = trunc(p_pos); - uvec2 upos = uvec2(pos_rounded); - - const int number_of_taps = (p_adaptive_base) ? (SSIL_ADAPTIVE_TAP_BASE_COUNT) : (num_taps[p_quality_level]); - float pix_z, pix_left_z, pix_top_z, pix_right_z, pix_bottom_z; - - vec4 valuesUL = textureGather(source_depth_mipmaps, vec3(pos_rounded * params.half_screen_pixel_size, params.pass)); - vec4 valuesBR = textureGather(source_depth_mipmaps, vec3((pos_rounded + vec2(1.0)) * params.half_screen_pixel_size, params.pass)); - - // get this pixel's viewspace depth - pix_z = valuesUL.y; - - // get left right top bottom neighbouring pixels for edge detection (gets compiled out on quality_level == 0) - pix_left_z = valuesUL.x; - pix_top_z = valuesUL.z; - pix_right_z = valuesBR.z; - pix_bottom_z = valuesBR.x; - - vec2 normalized_screen_pos = pos_rounded * params.half_screen_pixel_size + params.half_screen_pixel_size_x025; - vec3 pix_center_pos = NDC_to_view_space(normalized_screen_pos, pix_z); - - // Load this pixel's viewspace normal - uvec2 full_res_coord = upos * 2 * params.size_multiplier + params.pass_coord_offset.xy; - vec3 pixel_normal = load_normal(ivec2(full_res_coord)); - - const vec2 pixel_size_at_center = NDC_to_view_space(normalized_screen_pos.xy + params.half_screen_pixel_size, pix_center_pos.z).xy - pix_center_pos.xy; - - float pixel_lookup_radius; - float fallof_sq; - - // calculate effect radius and fit our screen sampling pattern inside it - float viewspace_radius; - calculate_radius_parameters(length(pix_center_pos), pixel_size_at_center, pixel_lookup_radius, viewspace_radius, fallof_sq); - - // calculate samples rotation/scaling - mat2 rot_scale_matrix; - uint pseudo_random_index; - - { - vec4 rotation_scale; - // reduce effect radius near the screen edges slightly; ideally, one would render a larger depth buffer (5% on each side) instead - if (!p_adaptive_base && (p_quality_level >= SSIL_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET)) { - float near_screen_border = min(min(normalized_screen_pos.x, 1.0 - normalized_screen_pos.x), min(normalized_screen_pos.y, 1.0 - normalized_screen_pos.y)); - near_screen_border = clamp(10.0 * near_screen_border + 0.6, 0.0, 1.0); - pixel_lookup_radius *= near_screen_border; - } - - // load & update pseudo-random rotation matrix - pseudo_random_index = uint(pos_rounded.y * 2 + pos_rounded.x) % 5; - rotation_scale = constants.rotation_matrices[params.pass * 5 + pseudo_random_index]; - rot_scale_matrix = mat2(rotation_scale.x * pixel_lookup_radius, rotation_scale.y * pixel_lookup_radius, rotation_scale.z * pixel_lookup_radius, rotation_scale.w * pixel_lookup_radius); - } - - // the main obscurance & sample weight storage - vec3 color_sum = vec3(0.0); - float obscurance_sum = 0.0; - float weight_sum = 0.0; - - // edge mask for between this and left/right/top/bottom neighbour pixels - not used in quality level 0 so initialize to "no edge" (1 is no edge, 0 is edge) - vec4 edgesLRTB = vec4(1.0, 1.0, 1.0, 1.0); - - // Move center pixel slightly towards camera to avoid imprecision artifacts due to using of 16bit depth buffer; a lot smaller offsets needed when using 32bit floats - pix_center_pos *= 0.9992; - - if (!p_adaptive_base && (p_quality_level >= SSIL_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) { - edgesLRTB = calculate_edges(pix_z, pix_left_z, pix_right_z, pix_top_z, pix_bottom_z); - } - - const float global_mip_offset = SSIL_DEPTH_MIPS_GLOBAL_OFFSET; - float mip_offset = (p_quality_level < SSIL_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (log2(pixel_lookup_radius) + global_mip_offset); - - // Used to tilt the second set of samples so that the disk is effectively rotated by the normal - // effective at removing one set of artifacts, but too expensive for lower quality settings - vec2 norm_xy = vec2(pixel_normal.x, pixel_normal.y); - float norm_xy_length = length(norm_xy); - norm_xy /= vec2(norm_xy_length, -norm_xy_length); - norm_xy_length *= SSIL_TILT_SAMPLES_AMOUNT; - - // standard, non-adaptive approach - if ((p_quality_level != 3) || p_adaptive_base) { - for (int i = 0; i < number_of_taps; i++) { - SSILTap(p_quality_level, color_sum, obscurance_sum, weight_sum, i, rot_scale_matrix, pix_center_pos, pixel_normal, normalized_screen_pos, mip_offset, fallof_sq, 1.0, norm_xy, norm_xy_length); - } - } -#ifdef ADAPTIVE - else { - // add new ones if needed - vec2 full_res_uv = normalized_screen_pos + params.pass_uv_offset.xy; - float importance = textureLod(source_importance, full_res_uv, 0.0).x; - - //Need to store obscurance from base pass - // load existing base values - vec4 base_values = imageLoad(source_ssil, ivec3(upos, params.pass)); - weight_sum += imageLoad(edges_weights_image, ivec2(upos)).r * float(SSIL_ADAPTIVE_TAP_BASE_COUNT * 4.0); - color_sum += (base_values.rgb) * weight_sum; - obscurance_sum += (base_values.a) * weight_sum; - - // increase importance around edges - float edge_count = dot(1.0 - edgesLRTB, vec4(1.0, 1.0, 1.0, 1.0)); - - float avg_total_importance = float(counter.sum) * params.load_counter_avg_div; - - float importance_limiter = clamp(params.adaptive_sample_limit / avg_total_importance, 0.0, 1.0); - importance *= importance_limiter; - - float additional_sample_count = SSIL_ADAPTIVE_TAP_FLEXIBLE_COUNT * importance; - - const float blend_range = 3.0; - const float blend_range_inv = 1.0 / blend_range; - - additional_sample_count += 0.5; - uint additional_samples = uint(additional_sample_count); - uint additional_samples_to = min(SSIL_MAX_TAPS, additional_samples + SSIL_ADAPTIVE_TAP_BASE_COUNT); - - for (uint i = SSIL_ADAPTIVE_TAP_BASE_COUNT; i < additional_samples_to; i++) { - additional_sample_count -= 1.0f; - float weight_mod = clamp(additional_sample_count * blend_range_inv, 0.0, 1.0); - SSILTap(p_quality_level, color_sum, obscurance_sum, weight_sum, int(i), rot_scale_matrix, pix_center_pos, pixel_normal, normalized_screen_pos, mip_offset, fallof_sq, weight_mod, norm_xy, norm_xy_length); - } - } -#endif - - // Early out for adaptive base - if (p_adaptive_base) { - vec3 color = color_sum / weight_sum; - - r_color = color; - r_edges = vec4(0.0); - r_obscurance = obscurance_sum / weight_sum; - r_weight = weight_sum; - return; - } - - // Calculate weighted average - vec3 color = color_sum / weight_sum; - color /= 1.0 - dot(color, vec3(0.299, 0.587, 0.114)); - - // Calculate fadeout (1 close, gradient, 0 far) - float fade_out = clamp(pix_center_pos.z * params.fade_out_mul + params.fade_out_add, 0.0, 1.0); - - // Reduce the SSIL if we're on the edge to remove artifacts on edges (we don't care for the lower quality one) - if (!p_adaptive_base && (p_quality_level >= SSIL_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) { - // when there's more than 2 opposite edges, start fading out the occlusion to reduce aliasing artifacts - float edge_fadeout_factor = clamp((1.0 - edgesLRTB.x - edgesLRTB.y) * 0.35, 0.0, 1.0) + clamp((1.0 - edgesLRTB.z - edgesLRTB.w) * 0.35, 0.0, 1.0); - - fade_out *= clamp(1.0 - edge_fadeout_factor, 0.0, 1.0); - } - - color = params.intensity * color; - - color *= fade_out; - - // outputs! - r_color = color; - r_edges = edgesLRTB; // These are used to prevent blurring across edges, 1 means no edge, 0 means edge, 0.5 means half way there, etc. - r_obscurance = clamp((obscurance_sum / weight_sum) * params.intensity, 0.0, 1.0); - r_weight = weight_sum; -} - -void main() { - vec3 out_color; - float out_obscurance; - float out_weight; - vec4 out_edges; - ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); - if (any(greaterThanEqual(ssC, params.screen_size))) { //too large, do nothing - return; - } - - vec2 uv = vec2(gl_GlobalInvocationID) + vec2(0.5); -#ifdef SSIL_BASE - generate_SSIL(out_color, out_edges, out_obscurance, out_weight, uv, params.quality, true); - - imageStore(dest_image, ssC, vec4(out_color, out_obscurance)); - imageStore(edges_weights_image, ssC, vec4(out_weight / (float(SSIL_ADAPTIVE_TAP_BASE_COUNT) * 4.0))); -#else - generate_SSIL(out_color, out_edges, out_obscurance, out_weight, uv, params.quality, false); // pass in quality levels - - imageStore(dest_image, ssC, vec4(out_color, out_obscurance)); - imageStore(edges_weights_image, ssC, vec4(pack_edges(out_edges))); -#endif -} diff --git a/servers/rendering/renderer_rd/shaders/ssil_blur.glsl b/servers/rendering/renderer_rd/shaders/ssil_blur.glsl deleted file mode 100644 index 47c56571f6..0000000000 --- a/servers/rendering/renderer_rd/shaders/ssil_blur.glsl +++ /dev/null @@ -1,144 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2016, Intel Corporation -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -// documentation files (the "Software"), to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to -// permit persons to whom the Software is furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of -// the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// File changes (yyyy-mm-dd) -// 2016-09-07: filip.strugar@intel.com: first commit -// 2020-12-05: clayjohn: convert to Vulkan and Godot -// 2021-05-27: clayjohn: convert SSAO to SSIL -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[compute] - -#version 450 - -#VERSION_DEFINES - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -layout(set = 0, binding = 0) uniform sampler2D source_ssil; - -layout(rgba16, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; - -layout(r8, set = 2, binding = 0) uniform restrict readonly image2D source_edges; - -layout(push_constant, std430) uniform Params { - float edge_sharpness; - float pad; - vec2 half_screen_pixel_size; -} -params; - -vec4 unpack_edges(float p_packed_val) { - uint packed_val = uint(p_packed_val * 255.5); - vec4 edgesLRTB; - edgesLRTB.x = float((packed_val >> 6) & 0x03) / 3.0; - edgesLRTB.y = float((packed_val >> 4) & 0x03) / 3.0; - edgesLRTB.z = float((packed_val >> 2) & 0x03) / 3.0; - edgesLRTB.w = float((packed_val >> 0) & 0x03) / 3.0; - - return clamp(edgesLRTB + params.edge_sharpness, 0.0, 1.0); -} - -void add_sample(vec4 p_ssil_value, float p_edge_value, inout vec4 r_sum, inout float r_sum_weight) { - float weight = p_edge_value; - - r_sum += (weight * p_ssil_value); - r_sum_weight += weight; -} - -#ifdef MODE_WIDE -vec4 sample_blurred_wide(ivec2 p_pos, vec2 p_coord) { - vec4 ssil_value = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(0, 0)); - vec4 ssil_valueL = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(-2, 0)); - vec4 ssil_valueT = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(0, -2)); - vec4 ssil_valueR = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(2, 0)); - vec4 ssil_valueB = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(0, 2)); - - vec4 edgesLRTB = unpack_edges(imageLoad(source_edges, p_pos).r); - edgesLRTB.x *= unpack_edges(imageLoad(source_edges, p_pos + ivec2(-2, 0)).r).y; - edgesLRTB.z *= unpack_edges(imageLoad(source_edges, p_pos + ivec2(0, -2)).r).w; - edgesLRTB.y *= unpack_edges(imageLoad(source_edges, p_pos + ivec2(2, 0)).r).x; - edgesLRTB.w *= unpack_edges(imageLoad(source_edges, p_pos + ivec2(0, 2)).r).z; - - float sum_weight = 0.8; - vec4 sum = ssil_value * sum_weight; - - add_sample(ssil_valueL, edgesLRTB.x, sum, sum_weight); - add_sample(ssil_valueR, edgesLRTB.y, sum, sum_weight); - add_sample(ssil_valueT, edgesLRTB.z, sum, sum_weight); - add_sample(ssil_valueB, edgesLRTB.w, sum, sum_weight); - - vec4 ssil_avg = sum / sum_weight; - - ssil_value = ssil_avg; - - return ssil_value; -} -#endif - -#ifdef MODE_SMART -vec4 sample_blurred(ivec2 p_pos, vec2 p_coord) { - vec4 vC = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(0, 0)); - vec4 vL = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(-1, 0)); - vec4 vT = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(0, -1)); - vec4 vR = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(1, 0)); - vec4 vB = textureLodOffset(source_ssil, vec2(p_coord), 0.0, ivec2(0, 1)); - - float packed_edges = imageLoad(source_edges, p_pos).r; - vec4 edgesLRTB = unpack_edges(packed_edges); - - float sum_weight = 0.5; - vec4 sum = vC * sum_weight; - - add_sample(vL, edgesLRTB.x, sum, sum_weight); - add_sample(vR, edgesLRTB.y, sum, sum_weight); - add_sample(vT, edgesLRTB.z, sum, sum_weight); - add_sample(vB, edgesLRTB.w, sum, sum_weight); - - vec4 ssil_avg = sum / sum_weight; - - vec4 ssil_value = ssil_avg; - - return ssil_value; -} -#endif - -void main() { - // Pixel being shaded - ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); - -#ifdef MODE_NON_SMART - - vec2 half_pixel = params.half_screen_pixel_size * 0.5; - - vec2 uv = (vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size; - - vec4 centre = textureLod(source_ssil, uv, 0.0); - - vec4 value = textureLod(source_ssil, vec2(uv + vec2(-half_pixel.x * 3, -half_pixel.y)), 0.0) * 0.2; - value += textureLod(source_ssil, vec2(uv + vec2(+half_pixel.x, -half_pixel.y * 3)), 0.0) * 0.2; - value += textureLod(source_ssil, vec2(uv + vec2(-half_pixel.x, +half_pixel.y * 3)), 0.0) * 0.2; - value += textureLod(source_ssil, vec2(uv + vec2(+half_pixel.x * 3, +half_pixel.y)), 0.0) * 0.2; - - vec4 sampled = value + centre * 0.2; - -#else -#ifdef MODE_SMART - vec4 sampled = sample_blurred(ssC, (vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size); -#else // MODE_WIDE - vec4 sampled = sample_blurred_wide(ssC, (vec2(gl_GlobalInvocationID.xy) + vec2(0.5, 0.5)) * params.half_screen_pixel_size); -#endif -#endif // MODE_NON_SMART - imageStore(dest_image, ssC, sampled); -} diff --git a/servers/rendering/renderer_rd/shaders/ssil_importance_map.glsl b/servers/rendering/renderer_rd/shaders/ssil_importance_map.glsl deleted file mode 100644 index 6b6b02739d..0000000000 --- a/servers/rendering/renderer_rd/shaders/ssil_importance_map.glsl +++ /dev/null @@ -1,125 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2016, Intel Corporation -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -// documentation files (the "Software"), to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to -// permit persons to whom the Software is furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of -// the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// File changes (yyyy-mm-dd) -// 2016-09-07: filip.strugar@intel.com: first commit -// 2020-12-05: clayjohn: convert to Vulkan and Godot -// 2021-05-27: clayjohn: convert SSAO to SSIL -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[compute] - -#version 450 - -#VERSION_DEFINES - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -#ifdef GENERATE_MAP -layout(set = 0, binding = 0) uniform sampler2DArray source_texture; -#else -layout(set = 0, binding = 0) uniform sampler2D source_importance; -#endif -layout(r8, set = 1, binding = 0) uniform restrict writeonly image2D dest_image; - -#ifdef PROCESS_MAPB -layout(set = 2, binding = 0, std430) buffer Counter { - uint sum; -} -counter; -#endif - -layout(push_constant, std430) uniform Params { - vec2 half_screen_pixel_size; - float intensity; - float pad; -} -params; - -void main() { - // Pixel being shaded - ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); - -#ifdef GENERATE_MAP - // importance map stuff - uvec2 base_position = ssC * 2; - - float avg = 0.0; - float minV = 1.0; - float maxV = 0.0; - for (int i = 0; i < 4; i++) { - vec3 value_a = texelFetch(source_texture, ivec3(base_position, i), 0).rgb * params.intensity; - vec3 value_b = texelFetch(source_texture, ivec3(base_position, i) + ivec3(0, 1, 0), 0).rgb * params.intensity; - vec3 value_c = texelFetch(source_texture, ivec3(base_position, i) + ivec3(1, 0, 0), 0).rgb * params.intensity; - vec3 value_d = texelFetch(source_texture, ivec3(base_position, i) + ivec3(1, 1, 0), 0).rgb * params.intensity; - - // Calculate luminance (black and white value) - float a = dot(value_a, vec3(0.2125, 0.7154, 0.0721)); - float b = dot(value_b, vec3(0.2125, 0.7154, 0.0721)); - float c = dot(value_c, vec3(0.2125, 0.7154, 0.0721)); - float d = dot(value_d, vec3(0.2125, 0.7154, 0.0721)); - - maxV = max(maxV, max(max(a, b), max(c, d))); - minV = min(minV, min(min(a, b), min(c, d))); - } - - float min_max_diff = maxV - minV; - - imageStore(dest_image, ssC, vec4(pow(clamp(min_max_diff * 2.0, 0.0, 1.0), 0.6))); -#endif - -#ifdef PROCESS_MAPA - vec2 uv = (vec2(ssC) + 0.5) * params.half_screen_pixel_size * 2.0; - - float centre = textureLod(source_importance, uv, 0.0).x; - - vec2 half_pixel = params.half_screen_pixel_size; - - vec4 vals; - vals.x = textureLod(source_importance, uv + vec2(-half_pixel.x * 3, -half_pixel.y), 0.0).x; - vals.y = textureLod(source_importance, uv + vec2(+half_pixel.x, -half_pixel.y * 3), 0.0).x; - vals.z = textureLod(source_importance, uv + vec2(+half_pixel.x * 3, +half_pixel.y), 0.0).x; - vals.w = textureLod(source_importance, uv + vec2(-half_pixel.x, +half_pixel.y * 3), 0.0).x; - - float avg = dot(vals, vec4(0.25, 0.25, 0.25, 0.25)); - - imageStore(dest_image, ssC, vec4(avg)); -#endif - -#ifdef PROCESS_MAPB - vec2 uv = (vec2(ssC) + 0.5f) * params.half_screen_pixel_size * 2.0; - - float centre = textureLod(source_importance, uv, 0.0).x; - - vec2 half_pixel = params.half_screen_pixel_size; - - vec4 vals; - vals.x = textureLod(source_importance, uv + vec2(-half_pixel.x, -half_pixel.y * 3), 0.0).x; - vals.y = textureLod(source_importance, uv + vec2(+half_pixel.x * 3, -half_pixel.y), 0.0).x; - vals.z = textureLod(source_importance, uv + vec2(+half_pixel.x, +half_pixel.y * 3), 0.0).x; - vals.w = textureLod(source_importance, uv + vec2(-half_pixel.x * 3, +half_pixel.y), 0.0).x; - - float avg = dot(vals, vec4(0.25, 0.25, 0.25, 0.25)); - - imageStore(dest_image, ssC, vec4(avg)); - - // sum the average; to avoid overflowing we assume max AO resolution is not bigger than 16384x16384; so quarter res (used here) will be 4096x4096, which leaves us with 8 bits per pixel - uint sum = uint(clamp(avg, 0.0, 1.0) * 255.0 + 0.5); - - // save every 9th to avoid InterlockedAdd congestion - since we're blurring, this is good enough; compensated by multiplying load_counter_avg_div by 9 - if (((ssC.x % 3) + (ssC.y % 3)) == 0) { - atomicAdd(counter.sum, sum); - } -#endif -} diff --git a/servers/rendering/renderer_rd/shaders/ssil_interleave.glsl b/servers/rendering/renderer_rd/shaders/ssil_interleave.glsl deleted file mode 100644 index 9e86ac0cf0..0000000000 --- a/servers/rendering/renderer_rd/shaders/ssil_interleave.glsl +++ /dev/null @@ -1,122 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2016, Intel Corporation -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -// documentation files (the "Software"), to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to -// permit persons to whom the Software is furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of -// the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// File changes (yyyy-mm-dd) -// 2016-09-07: filip.strugar@intel.com: first commit -// 2020-12-05: clayjohn: convert to Vulkan and Godot -// 2021-05-27: clayjohn: convert SSAO to SSIL -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[compute] - -#version 450 - -#VERSION_DEFINES - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -layout(rgba16, set = 0, binding = 0) uniform restrict writeonly image2D dest_image; -layout(set = 1, binding = 0) uniform sampler2DArray source_texture; -layout(r8, set = 2, binding = 0) uniform restrict readonly image2DArray source_edges; - -layout(push_constant, std430) uniform Params { - float inv_sharpness; - uint size_modifier; - vec2 pixel_size; -} -params; - -vec4 unpack_edges(float p_packed_val) { - uint packed_val = uint(p_packed_val * 255.5); - vec4 edgesLRTB; - edgesLRTB.x = float((packed_val >> 6) & 0x03) / 3.0; - edgesLRTB.y = float((packed_val >> 4) & 0x03) / 3.0; - edgesLRTB.z = float((packed_val >> 2) & 0x03) / 3.0; - edgesLRTB.w = float((packed_val >> 0) & 0x03) / 3.0; - - return clamp(edgesLRTB + params.inv_sharpness, 0.0, 1.0); -} - -void main() { - ivec2 ssC = ivec2(gl_GlobalInvocationID.xy); - if (any(greaterThanEqual(ssC, ivec2(1.0 / params.pixel_size)))) { //too large, do nothing - return; - } - -#ifdef MODE_SMART - uvec2 pix_pos = uvec2(gl_GlobalInvocationID.xy); - vec2 uv = (gl_GlobalInvocationID.xy + vec2(0.5)) * params.pixel_size; - - // calculate index in the four deinterleaved source array texture - int mx = int(pix_pos.x % 2); - int my = int(pix_pos.y % 2); - int index_center = mx + my * 2; // center index - int index_horizontal = (1 - mx) + my * 2; // neighbouring, horizontal - int index_vertical = mx + (1 - my) * 2; // neighbouring, vertical - int index_diagonal = (1 - mx) + (1 - my) * 2; // diagonal - - vec4 color = texelFetch(source_texture, ivec3(pix_pos / uvec2(params.size_modifier), index_center), 0); - - vec4 edgesLRTB = unpack_edges(imageLoad(source_edges, ivec3(pix_pos / uvec2(params.size_modifier), index_center)).r); - - // convert index shifts to sampling offsets - float fmx = float(mx); - float fmy = float(my); - - // in case of an edge, push sampling offsets away from the edge (towards pixel center) - float fmxe = (edgesLRTB.y - edgesLRTB.x); - float fmye = (edgesLRTB.w - edgesLRTB.z); - - // calculate final sampling offsets and sample using bilinear filter - vec2 uv_horizontal = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(fmx + fmxe - 0.5, 0.5 - fmy)) * params.pixel_size; - vec4 color_horizontal = textureLod(source_texture, vec3(uv_horizontal, index_horizontal), 0.0); - vec2 uv_vertical = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(0.5 - fmx, fmy - 0.5 + fmye)) * params.pixel_size; - vec4 color_vertical = textureLod(source_texture, vec3(uv_vertical, index_vertical), 0.0); - vec2 uv_diagonal = (gl_GlobalInvocationID.xy + vec2(0.5) + vec2(fmx - 0.5 + fmxe, fmy - 0.5 + fmye)) * params.pixel_size; - vec4 color_diagonal = textureLod(source_texture, vec3(uv_diagonal, index_diagonal), 0.0); - - // reduce weight for samples near edge - if the edge is on both sides, weight goes to 0 - vec4 blendWeights; - blendWeights.x = 1.0; - blendWeights.y = (edgesLRTB.x + edgesLRTB.y) * 0.5; - blendWeights.z = (edgesLRTB.z + edgesLRTB.w) * 0.5; - blendWeights.w = (blendWeights.y + blendWeights.z) * 0.5; - - // calculate weighted average - float blendWeightsSum = dot(blendWeights, vec4(1.0, 1.0, 1.0, 1.0)); - color += color_horizontal * blendWeights.y; - color += color_vertical * blendWeights.z; - color += color_diagonal * blendWeights.w; - color /= blendWeightsSum; - - imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), color); -#else // !MODE_SMART - - vec2 uv = (gl_GlobalInvocationID.xy + vec2(0.5)) * params.pixel_size; -#ifdef MODE_HALF - vec4 a = textureLod(source_texture, vec3(uv, 0), 0.0); - vec4 d = textureLod(source_texture, vec3(uv, 3), 0.0); - vec4 avg = (a + d) * 0.5; - -#else - vec4 a = textureLod(source_texture, vec3(uv, 0), 0.0); - vec4 b = textureLod(source_texture, vec3(uv, 1), 0.0); - vec4 c = textureLod(source_texture, vec3(uv, 2), 0.0); - vec4 d = textureLod(source_texture, vec3(uv, 3), 0.0); - vec4 avg = (a + b + c + d) * 0.25; - -#endif - imageStore(dest_image, ivec2(gl_GlobalInvocationID.xy), avg); -#endif -} -- cgit v1.2.3