From 997810e417cb1f003bacf784f7a07140f7a1b583 Mon Sep 17 00:00:00 2001 From: Bastiaan Olij Date: Fri, 20 May 2022 12:52:19 +1000 Subject: Split GI effects and fix stereoscopic rendering of GI effects --- servers/rendering/renderer_rd/SCsub | 1 + servers/rendering/renderer_rd/effects/resolve.cpp | 130 + servers/rendering/renderer_rd/effects/resolve.h | 74 + servers/rendering/renderer_rd/effects_rd.cpp | 56 - servers/rendering/renderer_rd/effects_rd.h | 24 - servers/rendering/renderer_rd/environment/SCsub | 5 + servers/rendering/renderer_rd/environment/gi.cpp | 3958 ++++++++++++++++++++ servers/rendering/renderer_rd/environment/gi.h | 800 ++++ .../forward_clustered/render_forward_clustered.cpp | 204 +- .../forward_clustered/render_forward_clustered.h | 15 +- .../scene_shader_forward_clustered.cpp | 12 +- .../scene_shader_forward_clustered.h | 4 + .../forward_mobile/render_forward_mobile.cpp | 3 +- .../rendering/renderer_rd/renderer_compositor_rd.h | 14 +- .../rendering/renderer_rd/renderer_scene_gi_rd.cpp | 3416 ----------------- .../rendering/renderer_rd/renderer_scene_gi_rd.h | 665 ---- .../renderer_rd/renderer_scene_render_rd.cpp | 69 +- .../renderer_rd/renderer_scene_render_rd.h | 21 +- .../rendering/renderer_rd/renderer_storage_rd.cpp | 344 +- .../rendering/renderer_rd/renderer_storage_rd.h | 86 +- servers/rendering/renderer_rd/shaders/SCsub | 1 + .../renderer_rd/shaders/effects/resolve.glsl | 236 ++ .../renderer_rd/shaders/environment/SCsub | 17 + .../renderer_rd/shaders/environment/gi.glsl | 672 ++++ .../shaders/environment/sdfgi_debug.glsl | 178 + .../shaders/environment/sdfgi_debug_probes.glsl | 267 ++ .../shaders/environment/sdfgi_direct_light.glsl | 506 +++ .../shaders/environment/sdfgi_integrate.glsl | 612 +++ .../shaders/environment/sdfgi_preprocess.glsl | 1056 ++++++ .../renderer_rd/shaders/environment/voxel_gi.glsl | 616 +++ .../shaders/environment/voxel_gi_debug.glsl | 168 + .../shaders/environment/voxel_gi_sdf.glsl | 180 + servers/rendering/renderer_rd/shaders/gi.glsl | 650 ---- servers/rendering/renderer_rd/shaders/resolve.glsl | 236 -- .../shaders/scene_forward_clustered.glsl | 13 + .../shaders/scene_forward_clustered_inc.glsl | 8 +- .../rendering/renderer_rd/shaders/sdfgi_debug.glsl | 174 - .../renderer_rd/shaders/sdfgi_debug_probes.glsl | 231 -- .../renderer_rd/shaders/sdfgi_direct_light.glsl | 506 --- .../renderer_rd/shaders/sdfgi_integrate.glsl | 612 --- .../renderer_rd/shaders/sdfgi_preprocess.glsl | 1056 ------ .../rendering/renderer_rd/shaders/voxel_gi.glsl | 616 --- .../renderer_rd/shaders/voxel_gi_debug.glsl | 168 - .../renderer_rd/shaders/voxel_gi_sdf.glsl | 180 - 44 files changed, 9761 insertions(+), 9099 deletions(-) create mode 100644 servers/rendering/renderer_rd/effects/resolve.cpp create mode 100644 servers/rendering/renderer_rd/effects/resolve.h create mode 100644 servers/rendering/renderer_rd/environment/SCsub create mode 100644 servers/rendering/renderer_rd/environment/gi.cpp create mode 100644 servers/rendering/renderer_rd/environment/gi.h delete mode 100644 servers/rendering/renderer_rd/renderer_scene_gi_rd.cpp delete mode 100644 servers/rendering/renderer_rd/renderer_scene_gi_rd.h create mode 100644 servers/rendering/renderer_rd/shaders/effects/resolve.glsl create mode 100644 servers/rendering/renderer_rd/shaders/environment/SCsub create mode 100644 servers/rendering/renderer_rd/shaders/environment/gi.glsl create mode 100644 servers/rendering/renderer_rd/shaders/environment/sdfgi_debug.glsl create mode 100644 servers/rendering/renderer_rd/shaders/environment/sdfgi_debug_probes.glsl create mode 100644 servers/rendering/renderer_rd/shaders/environment/sdfgi_direct_light.glsl create mode 100644 servers/rendering/renderer_rd/shaders/environment/sdfgi_integrate.glsl create mode 100644 servers/rendering/renderer_rd/shaders/environment/sdfgi_preprocess.glsl create mode 100644 servers/rendering/renderer_rd/shaders/environment/voxel_gi.glsl create mode 100644 servers/rendering/renderer_rd/shaders/environment/voxel_gi_debug.glsl create mode 100644 servers/rendering/renderer_rd/shaders/environment/voxel_gi_sdf.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/gi.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/resolve.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/sdfgi_debug_probes.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/sdfgi_preprocess.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/voxel_gi.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/voxel_gi_debug.glsl delete mode 100644 servers/rendering/renderer_rd/shaders/voxel_gi_sdf.glsl (limited to 'servers/rendering/renderer_rd') diff --git a/servers/rendering/renderer_rd/SCsub b/servers/rendering/renderer_rd/SCsub index 774a6b7951..10b83dca11 100644 --- a/servers/rendering/renderer_rd/SCsub +++ b/servers/rendering/renderer_rd/SCsub @@ -5,6 +5,7 @@ Import("env") env.add_source_files(env.servers_sources, "*.cpp") SConscript("effects/SCsub") +SConscript("environment/SCsub") SConscript("forward_clustered/SCsub") SConscript("forward_mobile/SCsub") SConscript("shaders/SCsub") diff --git a/servers/rendering/renderer_rd/effects/resolve.cpp b/servers/rendering/renderer_rd/effects/resolve.cpp new file mode 100644 index 0000000000..6c49a2ebce --- /dev/null +++ b/servers/rendering/renderer_rd/effects/resolve.cpp @@ -0,0 +1,130 @@ +/*************************************************************************/ +/* resolve.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "resolve.h" +#include "servers/rendering/renderer_rd/renderer_compositor_rd.h" +#include "servers/rendering/renderer_rd/storage_rd/material_storage.h" +#include "servers/rendering/renderer_rd/uniform_set_cache_rd.h" + +using namespace RendererRD; + +Resolve::Resolve() { + Vector resolve_modes; + resolve_modes.push_back("\n#define MODE_RESOLVE_GI\n"); + resolve_modes.push_back("\n#define MODE_RESOLVE_GI\n#define VOXEL_GI_RESOLVE\n"); + resolve_modes.push_back("\n#define MODE_RESOLVE_DEPTH\n"); + + resolve.shader.initialize(resolve_modes); + + resolve.shader_version = resolve.shader.version_create(); + + for (int i = 0; i < RESOLVE_MODE_MAX; i++) { + resolve.pipelines[i] = RD::get_singleton()->compute_pipeline_create(resolve.shader.version_get_shader(resolve.shader_version, i)); + } +} + +Resolve::~Resolve() { + resolve.shader.version_free(resolve.shader_version); +} + +void Resolve::resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_voxel_gi, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_voxel_gi, Vector2i p_screen_size, int p_samples, uint32_t p_barrier) { + UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton(); + ERR_FAIL_NULL(uniform_set_cache); + MaterialStorage *material_storage = MaterialStorage::get_singleton(); + ERR_FAIL_NULL(material_storage); + + ResolvePushConstant push_constant; + push_constant.screen_size[0] = p_screen_size.x; + push_constant.screen_size[1] = p_screen_size.y; + push_constant.samples = p_samples; + + // setup our uniforms + RID default_sampler = material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + + RD::Uniform u_source_depth(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_source_depth })); + RD::Uniform u_source_normal_roughness(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 1, Vector({ default_sampler, p_source_normal_roughness })); + RD::Uniform u_dest_depth(RD::UNIFORM_TYPE_IMAGE, 0, Vector({ p_dest_depth })); + RD::Uniform u_dest_normal_roughness(RD::UNIFORM_TYPE_IMAGE, 1, Vector({ p_dest_normal_roughness })); + + ResolveMode mode = p_source_voxel_gi.is_valid() ? RESOLVE_MODE_GI_VOXEL_GI : RESOLVE_MODE_GI; + RID shader = resolve.shader.version_get_shader(resolve.shader_version, mode); + ERR_FAIL_COND(shader.is_null()); + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, resolve.pipelines[mode]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_source_depth, u_source_normal_roughness), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_dest_depth, u_dest_normal_roughness), 1); + if (p_source_voxel_gi.is_valid()) { + RD::Uniform u_source_voxel_gi(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_source_voxel_gi })); + RD::Uniform u_dest_voxel_gi(RD::UNIFORM_TYPE_IMAGE, 0, p_dest_voxel_gi); + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 2, u_source_voxel_gi), 2); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 3, u_dest_voxel_gi), 3); + } + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ResolvePushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.x, p_screen_size.y, 1); + + RD::get_singleton()->compute_list_end(p_barrier); +} + +void Resolve::resolve_depth(RID p_source_depth, RID p_dest_depth, Vector2i p_screen_size, int p_samples, uint32_t p_barrier) { + UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton(); + ERR_FAIL_NULL(uniform_set_cache); + MaterialStorage *material_storage = MaterialStorage::get_singleton(); + ERR_FAIL_NULL(material_storage); + + ResolvePushConstant push_constant; + push_constant.screen_size[0] = p_screen_size.x; + push_constant.screen_size[1] = p_screen_size.y; + push_constant.samples = p_samples; + + // setup our uniforms + RID default_sampler = material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); + + RD::Uniform u_source_depth(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, Vector({ default_sampler, p_source_depth })); + RD::Uniform u_dest_depth(RD::UNIFORM_TYPE_IMAGE, 0, p_dest_depth); + + ResolveMode mode = RESOLVE_MODE_DEPTH; + RID shader = resolve.shader.version_get_shader(resolve.shader_version, mode); + ERR_FAIL_COND(shader.is_null()); + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, resolve.pipelines[mode]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_source_depth), 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_dest_depth), 1); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ResolvePushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.x, p_screen_size.y, 1); + + RD::get_singleton()->compute_list_end(p_barrier); +} diff --git a/servers/rendering/renderer_rd/effects/resolve.h b/servers/rendering/renderer_rd/effects/resolve.h new file mode 100644 index 0000000000..d4b24a610f --- /dev/null +++ b/servers/rendering/renderer_rd/effects/resolve.h @@ -0,0 +1,74 @@ +/*************************************************************************/ +/* resolve.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef RESOLVE_RD_H +#define RESOLVE_RD_H + +#include "servers/rendering/renderer_rd/pipeline_cache_rd.h" +#include "servers/rendering/renderer_rd/shaders/effects/resolve.glsl.gen.h" +#include "servers/rendering/renderer_scene_render.h" + +#include "servers/rendering_server.h" + +namespace RendererRD { + +class Resolve { +private: + struct ResolvePushConstant { + int32_t screen_size[2]; + int32_t samples; + uint32_t pad; + }; + + enum ResolveMode { + RESOLVE_MODE_GI, + RESOLVE_MODE_GI_VOXEL_GI, + RESOLVE_MODE_DEPTH, + RESOLVE_MODE_MAX + }; + + struct ResolveShader { + ResolvePushConstant push_constant; + ResolveShaderRD shader; + RID shader_version; + RID pipelines[RESOLVE_MODE_MAX]; //3 quality levels + } resolve; + +public: + Resolve(); + ~Resolve(); + + void resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_voxel_gi, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_voxel_gi, Vector2i p_screen_size, int p_samples, uint32_t p_barrier = RD::BARRIER_MASK_ALL); + void resolve_depth(RID p_source_depth, RID p_dest_depth, Vector2i p_screen_size, int p_samples, uint32_t p_barrier = RD::BARRIER_MASK_ALL); +}; + +} // namespace RendererRD + +#endif // !RESOLVE_RD_H diff --git a/servers/rendering/renderer_rd/effects_rd.cpp b/servers/rendering/renderer_rd/effects_rd.cpp index bf97c6fbe9..4c542fa2e2 100644 --- a/servers/rendering/renderer_rd/effects_rd.cpp +++ b/servers/rendering/renderer_rd/effects_rd.cpp @@ -1396,46 +1396,6 @@ void EffectsRD::cubemap_filter_raster(RID p_source_cubemap, RID p_dest_framebuff RD::get_singleton()->draw_list_end(); } -void EffectsRD::resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_voxel_gi, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_voxel_gi, Vector2i p_screen_size, int p_samples, uint32_t p_barrier) { - ResolvePushConstant push_constant; - push_constant.screen_size[0] = p_screen_size.x; - push_constant.screen_size[1] = p_screen_size.y; - push_constant.samples = p_samples; - - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, resolve.pipelines[p_source_voxel_gi.is_valid() ? RESOLVE_MODE_GI_VOXEL_GI : RESOLVE_MODE_GI]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture_pair(p_source_depth, p_source_normal_roughness), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_image_pair(p_dest_depth, p_dest_normal_roughness), 1); - if (p_source_voxel_gi.is_valid()) { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_voxel_gi), 2); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_voxel_gi), 3); - } - - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ResolvePushConstant)); - - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.x, p_screen_size.y, 1); - - RD::get_singleton()->compute_list_end(p_barrier); -} - -void EffectsRD::resolve_depth(RID p_source_depth, RID p_dest_depth, Vector2i p_screen_size, int p_samples, uint32_t p_barrier) { - ResolvePushConstant push_constant; - push_constant.screen_size[0] = p_screen_size.x; - push_constant.screen_size[1] = p_screen_size.y; - push_constant.samples = p_samples; - - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, resolve.pipelines[RESOLVE_MODE_DEPTH]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_compute_uniform_set_from_texture(p_source_depth), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, _get_uniform_set_from_image(p_dest_depth), 1); - - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ResolvePushConstant)); - - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_screen_size.x, p_screen_size.y, 1); - - RD::get_singleton()->compute_list_end(p_barrier); -} - void EffectsRD::sort_buffer(RID p_uniform_set, int p_size) { Sort::PushConstant push_constant; push_constant.total_elements = p_size; @@ -2009,21 +1969,6 @@ EffectsRD::EffectsRD(bool p_prefer_raster_effects) { ssil.pipelines[i] = RD::get_singleton()->compute_pipeline_create(ssil.interleave_shader.version_get_shader(ssil.interleave_shader_version, i - SSIL_INTERLEAVE)); } } - - { - Vector resolve_modes; - resolve_modes.push_back("\n#define MODE_RESOLVE_GI\n"); - resolve_modes.push_back("\n#define MODE_RESOLVE_GI\n#define VOXEL_GI_RESOLVE\n"); - resolve_modes.push_back("\n#define MODE_RESOLVE_DEPTH\n"); - - resolve.shader.initialize(resolve_modes); - - resolve.shader_version = resolve.shader.version_create(); - - for (int i = 0; i < RESOLVE_MODE_MAX; i++) { - resolve.pipelines[i] = RD::get_singleton()->compute_pipeline_create(resolve.shader.version_get_shader(resolve.shader_version, i)); - } - } } { @@ -2110,7 +2055,6 @@ EffectsRD::~EffectsRD() { filter.compute_shader.version_free(filter.shader_version); } if (!prefer_raster_effects) { - resolve.shader.version_free(resolve.shader_version); specular_merge.shader.version_free(specular_merge.shader_version); ss_effects.downsample_shader.version_free(ss_effects.downsample_shader_version); ssao.blur_shader.version_free(ssao.blur_shader_version); diff --git a/servers/rendering/renderer_rd/effects_rd.h b/servers/rendering/renderer_rd/effects_rd.h index 787873642e..af4ed5a6ae 100644 --- a/servers/rendering/renderer_rd/effects_rd.h +++ b/servers/rendering/renderer_rd/effects_rd.h @@ -43,7 +43,6 @@ #include "servers/rendering/renderer_rd/shaders/fsr_upscale.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/luminance_reduce.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/luminance_reduce_raster.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/resolve.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/roughness_limiter.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/screen_space_reflection.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/screen_space_reflection_filter.glsl.gen.h" @@ -579,26 +578,6 @@ private: RID pipelines[3]; //3 quality levels } sss; - struct ResolvePushConstant { - int32_t screen_size[2]; - int32_t samples; - uint32_t pad; - }; - - enum ResolveMode { - RESOLVE_MODE_GI, - RESOLVE_MODE_GI_VOXEL_GI, - RESOLVE_MODE_DEPTH, - RESOLVE_MODE_MAX - }; - - struct Resolve { - ResolvePushConstant push_constant; - ResolveShaderRD shader; - RID shader_version; - RID pipelines[RESOLVE_MODE_MAX]; //3 quality levels - } resolve; - enum SortMode { SORT_MODE_BLOCK, SORT_MODE_STEP, @@ -733,9 +712,6 @@ public: void merge_specular(RID p_dest_framebuffer, RID p_specular, RID p_base, RID p_reflection); void sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_depth, const CameraMatrix &p_camera, const Size2i &p_screen_size, float p_scale, float p_depth_scale, RS::SubSurfaceScatteringQuality p_quality); - void resolve_gi(RID p_source_depth, RID p_source_normal_roughness, RID p_source_voxel_gi, RID p_dest_depth, RID p_dest_normal_roughness, RID p_dest_voxel_gi, Vector2i p_screen_size, int p_samples, uint32_t p_barrier = RD::BARRIER_MASK_ALL); - void resolve_depth(RID p_source_depth, RID p_dest_depth, Vector2i p_screen_size, int p_samples, uint32_t p_barrier = RD::BARRIER_MASK_ALL); - void sort_buffer(RID p_uniform_set, int p_size); EffectsRD(bool p_prefer_raster_effects); diff --git a/servers/rendering/renderer_rd/environment/SCsub b/servers/rendering/renderer_rd/environment/SCsub new file mode 100644 index 0000000000..86681f9c74 --- /dev/null +++ b/servers/rendering/renderer_rd/environment/SCsub @@ -0,0 +1,5 @@ +#!/usr/bin/env python + +Import("env") + +env.add_source_files(env.servers_sources, "*.cpp") diff --git a/servers/rendering/renderer_rd/environment/gi.cpp b/servers/rendering/renderer_rd/environment/gi.cpp new file mode 100644 index 0000000000..f3be4a7085 --- /dev/null +++ b/servers/rendering/renderer_rd/environment/gi.cpp @@ -0,0 +1,3958 @@ +/*************************************************************************/ +/* gi.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "gi.h" + +#include "core/config/project_settings.h" +#include "servers/rendering/renderer_rd/renderer_compositor_rd.h" +#include "servers/rendering/renderer_rd/renderer_scene_render_rd.h" +#include "servers/rendering/renderer_rd/renderer_storage_rd.h" +#include "servers/rendering/renderer_rd/storage_rd/material_storage.h" +#include "servers/rendering/renderer_rd/storage_rd/texture_storage.h" +#include "servers/rendering/rendering_server_default.h" + +using namespace RendererRD; + +const Vector3i GI::SDFGI::Cascade::DIRTY_ALL = Vector3i(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF); + +GI *GI::singleton = nullptr; + +//////////////////////////////////////////////////////////////////////////////// +// VOXEL GI STORAGE + +RID GI::voxel_gi_allocate() { + return voxel_gi_owner.allocate_rid(); +} + +void GI::voxel_gi_free(RID p_voxel_gi) { + voxel_gi_allocate_data(p_voxel_gi, Transform3D(), AABB(), Vector3i(), Vector(), Vector(), Vector(), Vector()); //deallocate + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + voxel_gi->dependency.deleted_notify(p_voxel_gi); + voxel_gi_owner.free(p_voxel_gi); +} + +void GI::voxel_gi_initialize(RID p_voxel_gi) { + voxel_gi_owner.initialize_rid(p_voxel_gi, VoxelGI()); +} + +void GI::voxel_gi_allocate_data(RID p_voxel_gi, const Transform3D &p_to_cell_xform, const AABB &p_aabb, const Vector3i &p_octree_size, const Vector &p_octree_cells, const Vector &p_data_cells, const Vector &p_distance_field, const Vector &p_level_counts) { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND(!voxel_gi); + + if (voxel_gi->octree_buffer.is_valid()) { + RD::get_singleton()->free(voxel_gi->octree_buffer); + RD::get_singleton()->free(voxel_gi->data_buffer); + if (voxel_gi->sdf_texture.is_valid()) { + RD::get_singleton()->free(voxel_gi->sdf_texture); + } + + voxel_gi->sdf_texture = RID(); + voxel_gi->octree_buffer = RID(); + voxel_gi->data_buffer = RID(); + voxel_gi->octree_buffer_size = 0; + voxel_gi->data_buffer_size = 0; + voxel_gi->cell_count = 0; + } + + voxel_gi->to_cell_xform = p_to_cell_xform; + voxel_gi->bounds = p_aabb; + voxel_gi->octree_size = p_octree_size; + voxel_gi->level_counts = p_level_counts; + + if (p_octree_cells.size()) { + ERR_FAIL_COND(p_octree_cells.size() % 32 != 0); //cells size must be a multiple of 32 + + uint32_t cell_count = p_octree_cells.size() / 32; + + ERR_FAIL_COND(p_data_cells.size() != (int)cell_count * 16); //see that data size matches + + voxel_gi->cell_count = cell_count; + voxel_gi->octree_buffer = RD::get_singleton()->storage_buffer_create(p_octree_cells.size(), p_octree_cells); + voxel_gi->octree_buffer_size = p_octree_cells.size(); + voxel_gi->data_buffer = RD::get_singleton()->storage_buffer_create(p_data_cells.size(), p_data_cells); + voxel_gi->data_buffer_size = p_data_cells.size(); + + if (p_distance_field.size()) { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8_UNORM; + tf.width = voxel_gi->octree_size.x; + tf.height = voxel_gi->octree_size.y; + tf.depth = voxel_gi->octree_size.z; + tf.texture_type = RD::TEXTURE_TYPE_3D; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + Vector> s; + s.push_back(p_distance_field); + voxel_gi->sdf_texture = RD::get_singleton()->texture_create(tf, RD::TextureView(), s); + } +#if 0 + { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8_UNORM; + tf.width = voxel_gi->octree_size.x; + tf.height = voxel_gi->octree_size.y; + tf.depth = voxel_gi->octree_size.z; + tf.type = RD::TEXTURE_TYPE_3D; + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; + tf.shareable_formats.push_back(RD::DATA_FORMAT_R8_UNORM); + tf.shareable_formats.push_back(RD::DATA_FORMAT_R8_UINT); + voxel_gi->sdf_texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); + } + RID shared_tex; + { + RD::TextureView tv; + tv.format_override = RD::DATA_FORMAT_R8_UINT; + shared_tex = RD::get_singleton()->texture_create_shared(tv, voxel_gi->sdf_texture); + } + //update SDF texture + Vector uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 1; + u.append_id(voxel_gi->octree_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 2; + u.append_id(voxel_gi->data_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 3; + u.append_id(shared_tex); + uniforms.push_back(u); + } + + RID uniform_set = RD::get_singleton()->uniform_set_create(uniforms, voxel_gi_sdf_shader_version_shader, 0); + + { + uint32_t push_constant[4] = { 0, 0, 0, 0 }; + + for (int i = 0; i < voxel_gi->level_counts.size() - 1; i++) { + push_constant[0] += voxel_gi->level_counts[i]; + } + push_constant[1] = push_constant[0] + voxel_gi->level_counts[voxel_gi->level_counts.size() - 1]; + + print_line("offset: " + itos(push_constant[0])); + print_line("size: " + itos(push_constant[1])); + //create SDF + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, voxel_gi_sdf_shader_pipeline); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, push_constant, sizeof(uint32_t) * 4); + RD::get_singleton()->compute_list_dispatch(compute_list, voxel_gi->octree_size.x / 4, voxel_gi->octree_size.y / 4, voxel_gi->octree_size.z / 4); + RD::get_singleton()->compute_list_end(); + } + + RD::get_singleton()->free(uniform_set); + RD::get_singleton()->free(shared_tex); + } +#endif + } + + voxel_gi->version++; + voxel_gi->data_version++; + + voxel_gi->dependency.changed_notify(RendererStorage::DEPENDENCY_CHANGED_AABB); +} + +AABB GI::voxel_gi_get_bounds(RID p_voxel_gi) const { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND_V(!voxel_gi, AABB()); + + return voxel_gi->bounds; +} + +Vector3i GI::voxel_gi_get_octree_size(RID p_voxel_gi) const { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND_V(!voxel_gi, Vector3i()); + return voxel_gi->octree_size; +} + +Vector GI::voxel_gi_get_octree_cells(RID p_voxel_gi) const { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND_V(!voxel_gi, Vector()); + + if (voxel_gi->octree_buffer.is_valid()) { + return RD::get_singleton()->buffer_get_data(voxel_gi->octree_buffer); + } + return Vector(); +} + +Vector GI::voxel_gi_get_data_cells(RID p_voxel_gi) const { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND_V(!voxel_gi, Vector()); + + if (voxel_gi->data_buffer.is_valid()) { + return RD::get_singleton()->buffer_get_data(voxel_gi->data_buffer); + } + return Vector(); +} + +Vector GI::voxel_gi_get_distance_field(RID p_voxel_gi) const { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND_V(!voxel_gi, Vector()); + + if (voxel_gi->data_buffer.is_valid()) { + return RD::get_singleton()->texture_get_data(voxel_gi->sdf_texture, 0); + } + return Vector(); +} + +Vector GI::voxel_gi_get_level_counts(RID p_voxel_gi) const { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND_V(!voxel_gi, Vector()); + + return voxel_gi->level_counts; +} + +Transform3D GI::voxel_gi_get_to_cell_xform(RID p_voxel_gi) const { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND_V(!voxel_gi, Transform3D()); + + return voxel_gi->to_cell_xform; +} + +void GI::voxel_gi_set_dynamic_range(RID p_voxel_gi, float p_range) { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND(!voxel_gi); + + voxel_gi->dynamic_range = p_range; + voxel_gi->version++; +} + +float GI::voxel_gi_get_dynamic_range(RID p_voxel_gi) const { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND_V(!voxel_gi, 0); + + return voxel_gi->dynamic_range; +} + +void GI::voxel_gi_set_propagation(RID p_voxel_gi, float p_range) { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND(!voxel_gi); + + voxel_gi->propagation = p_range; + voxel_gi->version++; +} + +float GI::voxel_gi_get_propagation(RID p_voxel_gi) const { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND_V(!voxel_gi, 0); + return voxel_gi->propagation; +} + +void GI::voxel_gi_set_energy(RID p_voxel_gi, float p_energy) { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND(!voxel_gi); + + voxel_gi->energy = p_energy; +} + +float GI::voxel_gi_get_energy(RID p_voxel_gi) const { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND_V(!voxel_gi, 0); + return voxel_gi->energy; +} + +void GI::voxel_gi_set_bias(RID p_voxel_gi, float p_bias) { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND(!voxel_gi); + + voxel_gi->bias = p_bias; +} + +float GI::voxel_gi_get_bias(RID p_voxel_gi) const { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND_V(!voxel_gi, 0); + return voxel_gi->bias; +} + +void GI::voxel_gi_set_normal_bias(RID p_voxel_gi, float p_normal_bias) { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND(!voxel_gi); + + voxel_gi->normal_bias = p_normal_bias; +} + +float GI::voxel_gi_get_normal_bias(RID p_voxel_gi) const { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND_V(!voxel_gi, 0); + return voxel_gi->normal_bias; +} + +void GI::voxel_gi_set_anisotropy_strength(RID p_voxel_gi, float p_strength) { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND(!voxel_gi); + + voxel_gi->anisotropy_strength = p_strength; +} + +float GI::voxel_gi_get_anisotropy_strength(RID p_voxel_gi) const { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND_V(!voxel_gi, 0); + return voxel_gi->anisotropy_strength; +} + +void GI::voxel_gi_set_interior(RID p_voxel_gi, bool p_enable) { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND(!voxel_gi); + + voxel_gi->interior = p_enable; +} + +void GI::voxel_gi_set_use_two_bounces(RID p_voxel_gi, bool p_enable) { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND(!voxel_gi); + + voxel_gi->use_two_bounces = p_enable; + voxel_gi->version++; +} + +bool GI::voxel_gi_is_using_two_bounces(RID p_voxel_gi) const { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND_V(!voxel_gi, false); + return voxel_gi->use_two_bounces; +} + +bool GI::voxel_gi_is_interior(RID p_voxel_gi) const { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND_V(!voxel_gi, 0); + return voxel_gi->interior; +} + +uint32_t GI::voxel_gi_get_version(RID p_voxel_gi) const { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND_V(!voxel_gi, 0); + return voxel_gi->version; +} + +uint32_t GI::voxel_gi_get_data_version(RID p_voxel_gi) { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND_V(!voxel_gi, 0); + return voxel_gi->data_version; +} + +RID GI::voxel_gi_get_octree_buffer(RID p_voxel_gi) const { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND_V(!voxel_gi, RID()); + return voxel_gi->octree_buffer; +} + +RID GI::voxel_gi_get_data_buffer(RID p_voxel_gi) const { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND_V(!voxel_gi, RID()); + return voxel_gi->data_buffer; +} + +RID GI::voxel_gi_get_sdf_texture(RID p_voxel_gi) { + VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND_V(!voxel_gi, RID()); + + return voxel_gi->sdf_texture; +} + +//////////////////////////////////////////////////////////////////////////////// +// SDFGI + +void GI::SDFGI::create(RendererSceneEnvironmentRD *p_env, const Vector3 &p_world_position, uint32_t p_requested_history_size, GI *p_gi) { + RendererRD::TextureStorage *texture_storage = RendererRD::TextureStorage::get_singleton(); + RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); + + storage = p_gi->storage; + gi = p_gi; + num_cascades = p_env->sdfgi_cascades; + min_cell_size = p_env->sdfgi_min_cell_size; + uses_occlusion = p_env->sdfgi_use_occlusion; + y_scale_mode = p_env->sdfgi_y_scale; + static const float y_scale[3] = { 2.0, 1.5, 1.0 }; + y_mult = y_scale[y_scale_mode]; + cascades.resize(num_cascades); + probe_axis_count = SDFGI::PROBE_DIVISOR + 1; + solid_cell_ratio = gi->sdfgi_solid_cell_ratio; + solid_cell_count = uint32_t(float(cascade_size * cascade_size * cascade_size) * solid_cell_ratio); + + float base_cell_size = min_cell_size; + + RD::TextureFormat tf_sdf; + tf_sdf.format = RD::DATA_FORMAT_R8_UNORM; + tf_sdf.width = cascade_size; // Always 64x64 + tf_sdf.height = cascade_size; + tf_sdf.depth = cascade_size; + tf_sdf.texture_type = RD::TEXTURE_TYPE_3D; + tf_sdf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + + { + RD::TextureFormat tf_render = tf_sdf; + tf_render.format = RD::DATA_FORMAT_R16_UINT; + render_albedo = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); + tf_render.format = RD::DATA_FORMAT_R32_UINT; + render_emission = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); + render_emission_aniso = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); + + tf_render.format = RD::DATA_FORMAT_R8_UNORM; //at least its easy to visualize + + for (int i = 0; i < 8; i++) { + render_occlusion[i] = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); + } + + tf_render.format = RD::DATA_FORMAT_R32_UINT; + render_geom_facing = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); + + tf_render.format = RD::DATA_FORMAT_R8G8B8A8_UINT; + render_sdf[0] = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); + render_sdf[1] = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); + + tf_render.width /= 2; + tf_render.height /= 2; + tf_render.depth /= 2; + + render_sdf_half[0] = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); + render_sdf_half[1] = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); + } + + RD::TextureFormat tf_occlusion = tf_sdf; + tf_occlusion.format = RD::DATA_FORMAT_R16_UINT; + tf_occlusion.shareable_formats.push_back(RD::DATA_FORMAT_R16_UINT); + tf_occlusion.shareable_formats.push_back(RD::DATA_FORMAT_R4G4B4A4_UNORM_PACK16); + tf_occlusion.depth *= cascades.size(); //use depth for occlusion slices + tf_occlusion.width *= 2; //use width for the other half + + RD::TextureFormat tf_light = tf_sdf; + tf_light.format = RD::DATA_FORMAT_R32_UINT; + tf_light.shareable_formats.push_back(RD::DATA_FORMAT_R32_UINT); + tf_light.shareable_formats.push_back(RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32); + + RD::TextureFormat tf_aniso0 = tf_sdf; + tf_aniso0.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + RD::TextureFormat tf_aniso1 = tf_sdf; + tf_aniso1.format = RD::DATA_FORMAT_R8G8_UNORM; + + int passes = nearest_shift(cascade_size) - 1; + + //store lightprobe SH + RD::TextureFormat tf_probes; + tf_probes.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + tf_probes.width = probe_axis_count * probe_axis_count; + tf_probes.height = probe_axis_count * SDFGI::SH_SIZE; + tf_probes.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + tf_probes.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + + history_size = p_requested_history_size; + + RD::TextureFormat tf_probe_history = tf_probes; + tf_probe_history.format = RD::DATA_FORMAT_R16G16B16A16_SINT; //signed integer because SH are signed + tf_probe_history.array_layers = history_size; + + RD::TextureFormat tf_probe_average = tf_probes; + tf_probe_average.format = RD::DATA_FORMAT_R32G32B32A32_SINT; //signed integer because SH are signed + tf_probe_average.texture_type = RD::TEXTURE_TYPE_2D; + + lightprobe_history_scroll = RD::get_singleton()->texture_create(tf_probe_history, RD::TextureView()); + lightprobe_average_scroll = RD::get_singleton()->texture_create(tf_probe_average, RD::TextureView()); + + { + //octahedral lightprobes + RD::TextureFormat tf_octprobes = tf_probes; + tf_octprobes.array_layers = cascades.size() * 2; + tf_octprobes.format = RD::DATA_FORMAT_R32_UINT; //pack well with RGBE + tf_octprobes.width = probe_axis_count * probe_axis_count * (SDFGI::LIGHTPROBE_OCT_SIZE + 2); + tf_octprobes.height = probe_axis_count * (SDFGI::LIGHTPROBE_OCT_SIZE + 2); + tf_octprobes.shareable_formats.push_back(RD::DATA_FORMAT_R32_UINT); + tf_octprobes.shareable_formats.push_back(RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32); + //lightprobe texture is an octahedral texture + + lightprobe_data = RD::get_singleton()->texture_create(tf_octprobes, RD::TextureView()); + RD::TextureView tv; + tv.format_override = RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32; + lightprobe_texture = RD::get_singleton()->texture_create_shared(tv, lightprobe_data); + + //texture handling ambient data, to integrate with volumetric foc + RD::TextureFormat tf_ambient = tf_probes; + tf_ambient.array_layers = cascades.size(); + tf_ambient.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; //pack well with RGBE + tf_ambient.width = probe_axis_count * probe_axis_count; + tf_ambient.height = probe_axis_count; + tf_ambient.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + //lightprobe texture is an octahedral texture + ambient_texture = RD::get_singleton()->texture_create(tf_ambient, RD::TextureView()); + } + + cascades_ubo = RD::get_singleton()->uniform_buffer_create(sizeof(SDFGI::Cascade::UBO) * SDFGI::MAX_CASCADES); + + occlusion_data = RD::get_singleton()->texture_create(tf_occlusion, RD::TextureView()); + { + RD::TextureView tv; + tv.format_override = RD::DATA_FORMAT_R4G4B4A4_UNORM_PACK16; + occlusion_texture = RD::get_singleton()->texture_create_shared(tv, occlusion_data); + } + + for (uint32_t i = 0; i < cascades.size(); i++) { + SDFGI::Cascade &cascade = cascades[i]; + + /* 3D Textures */ + + cascade.sdf_tex = RD::get_singleton()->texture_create(tf_sdf, RD::TextureView()); + + cascade.light_data = RD::get_singleton()->texture_create(tf_light, RD::TextureView()); + + cascade.light_aniso_0_tex = RD::get_singleton()->texture_create(tf_aniso0, RD::TextureView()); + cascade.light_aniso_1_tex = RD::get_singleton()->texture_create(tf_aniso1, RD::TextureView()); + + { + RD::TextureView tv; + tv.format_override = RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32; + cascade.light_tex = RD::get_singleton()->texture_create_shared(tv, cascade.light_data); + + RD::get_singleton()->texture_clear(cascade.light_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(cascade.light_aniso_0_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(cascade.light_aniso_1_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); + } + + cascade.cell_size = base_cell_size; + Vector3 world_position = p_world_position; + world_position.y *= y_mult; + int32_t probe_cells = cascade_size / SDFGI::PROBE_DIVISOR; + Vector3 probe_size = Vector3(1, 1, 1) * cascade.cell_size * probe_cells; + Vector3i probe_pos = Vector3i((world_position / probe_size + Vector3(0.5, 0.5, 0.5)).floor()); + cascade.position = probe_pos * probe_cells; + + cascade.dirty_regions = SDFGI::Cascade::DIRTY_ALL; + + base_cell_size *= 2.0; + + /* Probe History */ + + cascade.lightprobe_history_tex = RD::get_singleton()->texture_create(tf_probe_history, RD::TextureView()); + RD::get_singleton()->texture_clear(cascade.lightprobe_history_tex, Color(0, 0, 0, 0), 0, 1, 0, tf_probe_history.array_layers); //needs to be cleared for average to work + + cascade.lightprobe_average_tex = RD::get_singleton()->texture_create(tf_probe_average, RD::TextureView()); + RD::get_singleton()->texture_clear(cascade.lightprobe_average_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); //needs to be cleared for average to work + + /* Buffers */ + + cascade.solid_cell_buffer = RD::get_singleton()->storage_buffer_create(sizeof(SDFGI::Cascade::SolidCell) * solid_cell_count); + cascade.solid_cell_dispatch_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * 4, Vector(), RD::STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT); + cascade.lights_buffer = RD::get_singleton()->storage_buffer_create(sizeof(SDFGIShader::Light) * MAX(SDFGI::MAX_STATIC_LIGHTS, SDFGI::MAX_DYNAMIC_LIGHTS)); + { + Vector uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.append_id(render_sdf[(passes & 1) ? 1 : 0]); //if passes are even, we read from buffer 0, else we read from buffer 1 + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.append_id(render_albedo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 3; + for (int j = 0; j < 8; j++) { + u.append_id(render_occlusion[j]); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 4; + u.append_id(render_emission); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 5; + u.append_id(render_emission_aniso); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 6; + u.append_id(render_geom_facing); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 7; + u.append_id(cascade.sdf_tex); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 8; + u.append_id(occlusion_data); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 10; + u.append_id(cascade.solid_cell_dispatch_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 11; + u.append_id(cascade.solid_cell_buffer); + uniforms.push_back(u); + } + + cascade.sdf_store_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_STORE), 0); + } + + { + Vector uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.append_id(render_albedo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.append_id(render_geom_facing); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 3; + u.append_id(render_emission); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 4; + u.append_id(render_emission_aniso); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 5; + u.append_id(cascade.solid_cell_dispatch_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 6; + u.append_id(cascade.solid_cell_buffer); + uniforms.push_back(u); + } + + cascade.scroll_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_SCROLL), 0); + } + { + Vector uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + for (int j = 0; j < 8; j++) { + u.append_id(render_occlusion[j]); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.append_id(occlusion_data); + uniforms.push_back(u); + } + + cascade.scroll_occlusion_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_SCROLL_OCCLUSION), 0); + } + } + + //direct light + for (uint32_t i = 0; i < cascades.size(); i++) { + SDFGI::Cascade &cascade = cascades[i]; + + Vector uniforms; + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { + if (j < cascades.size()) { + u.append_id(cascades[j].sdf_tex); + } else { + u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 2; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 3; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.append_id(cascade.solid_cell_dispatch_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 4; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.append_id(cascade.solid_cell_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 5; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.append_id(cascade.light_data); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 6; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.append_id(cascade.light_aniso_0_tex); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 7; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.append_id(cascade.light_aniso_1_tex); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 8; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.append_id(cascades_ubo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 9; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.append_id(cascade.lights_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 10; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.append_id(lightprobe_texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 11; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.append_id(occlusion_texture); + uniforms.push_back(u); + } + + cascade.sdf_direct_light_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.direct_light.version_get_shader(gi->sdfgi_shader.direct_light_shader, 0), 0); + } + + //preprocess initialize uniform set + { + Vector uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.append_id(render_albedo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.append_id(render_sdf[0]); + uniforms.push_back(u); + } + + sdf_initialize_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE), 0); + } + + { + Vector uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.append_id(render_albedo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.append_id(render_sdf_half[0]); + uniforms.push_back(u); + } + + sdf_initialize_half_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE_HALF), 0); + } + + //jump flood uniform set + { + Vector uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.append_id(render_sdf[0]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.append_id(render_sdf[1]); + uniforms.push_back(u); + } + + jump_flood_uniform_set[0] = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_JUMP_FLOOD), 0); + RID aux0 = uniforms.write[0].get_id(0); + RID aux1 = uniforms.write[1].get_id(0); + uniforms.write[0].set_id(0, aux1); + uniforms.write[1].set_id(0, aux0); + jump_flood_uniform_set[1] = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_JUMP_FLOOD), 0); + } + //jump flood half uniform set + { + Vector uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.append_id(render_sdf_half[0]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.append_id(render_sdf_half[1]); + uniforms.push_back(u); + } + + jump_flood_half_uniform_set[0] = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_JUMP_FLOOD), 0); + RID aux0 = uniforms.write[0].get_id(0); + RID aux1 = uniforms.write[1].get_id(0); + uniforms.write[0].set_id(0, aux1); + uniforms.write[1].set_id(0, aux0); + jump_flood_half_uniform_set[1] = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_JUMP_FLOOD), 0); + } + + //upscale half size sdf + { + Vector uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.append_id(render_albedo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + u.append_id(render_sdf_half[(passes & 1) ? 0 : 1]); //reverse pass order because half size + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 3; + u.append_id(render_sdf[(passes & 1) ? 0 : 1]); //reverse pass order because it needs an extra JFA pass + uniforms.push_back(u); + } + + upscale_jfa_uniform_set_index = (passes & 1) ? 0 : 1; + sdf_upscale_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_JUMP_FLOOD_UPSCALE), 0); + } + + //occlusion uniform set + { + Vector uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.append_id(render_albedo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 2; + for (int i = 0; i < 8; i++) { + u.append_id(render_occlusion[i]); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 3; + u.append_id(render_geom_facing); + uniforms.push_back(u); + } + + occlusion_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_OCCLUSION), 0); + } + + for (uint32_t i = 0; i < cascades.size(); i++) { + //integrate uniform + + Vector uniforms; + + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { + if (j < cascades.size()) { + u.append_id(cascades[j].sdf_tex); + } else { + u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 2; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { + if (j < cascades.size()) { + u.append_id(cascades[j].light_tex); + } else { + u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 3; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { + if (j < cascades.size()) { + u.append_id(cascades[j].light_aniso_0_tex); + } else { + u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 4; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { + if (j < cascades.size()) { + u.append_id(cascades[j].light_aniso_1_tex); + } else { + u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 6; + u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 7; + u.append_id(cascades_ubo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 8; + u.append_id(lightprobe_data); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 9; + u.append_id(cascades[i].lightprobe_history_tex); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 10; + u.append_id(cascades[i].lightprobe_average_tex); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 11; + u.append_id(lightprobe_history_scroll); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 12; + u.append_id(lightprobe_average_scroll); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 13; + RID parent_average; + if (cascades.size() == 1) { + // If there is only one SDFGI cascade, we can't use the previous cascade for blending. + parent_average = cascades[i].lightprobe_average_tex; + } else if (i < cascades.size() - 1) { + parent_average = cascades[i + 1].lightprobe_average_tex; + } else { + parent_average = cascades[i - 1].lightprobe_average_tex; //to use something, but it won't be used + } + u.append_id(parent_average); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 14; + u.append_id(ambient_texture); + uniforms.push_back(u); + } + + cascades[i].integrate_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.integrate.version_get_shader(gi->sdfgi_shader.integrate_shader, 0), 0); + } + + bounce_feedback = p_env->sdfgi_bounce_feedback; + energy = p_env->sdfgi_energy; + normal_bias = p_env->sdfgi_normal_bias; + probe_bias = p_env->sdfgi_probe_bias; + reads_sky = p_env->sdfgi_read_sky_light; +} + +void GI::SDFGI::erase() { + for (uint32_t i = 0; i < cascades.size(); i++) { + const SDFGI::Cascade &c = cascades[i]; + RD::get_singleton()->free(c.light_data); + RD::get_singleton()->free(c.light_aniso_0_tex); + RD::get_singleton()->free(c.light_aniso_1_tex); + RD::get_singleton()->free(c.sdf_tex); + RD::get_singleton()->free(c.solid_cell_dispatch_buffer); + RD::get_singleton()->free(c.solid_cell_buffer); + RD::get_singleton()->free(c.lightprobe_history_tex); + RD::get_singleton()->free(c.lightprobe_average_tex); + RD::get_singleton()->free(c.lights_buffer); + } + + RD::get_singleton()->free(render_albedo); + RD::get_singleton()->free(render_emission); + RD::get_singleton()->free(render_emission_aniso); + + RD::get_singleton()->free(render_sdf[0]); + RD::get_singleton()->free(render_sdf[1]); + + RD::get_singleton()->free(render_sdf_half[0]); + RD::get_singleton()->free(render_sdf_half[1]); + + for (int i = 0; i < 8; i++) { + RD::get_singleton()->free(render_occlusion[i]); + } + + RD::get_singleton()->free(render_geom_facing); + + RD::get_singleton()->free(lightprobe_data); + RD::get_singleton()->free(lightprobe_history_scroll); + RD::get_singleton()->free(occlusion_data); + RD::get_singleton()->free(ambient_texture); + + RD::get_singleton()->free(cascades_ubo); + + for (uint32_t v = 0; v < RendererSceneRender::MAX_RENDER_VIEWS; v++) { + if (RD::get_singleton()->uniform_set_is_valid(debug_uniform_set[v])) { + RD::get_singleton()->free(debug_uniform_set[v]); + } + debug_uniform_set[v] = RID(); + } + + if (RD::get_singleton()->uniform_set_is_valid(debug_probes_uniform_set)) { + RD::get_singleton()->free(debug_probes_uniform_set); + } + debug_probes_uniform_set = RID(); + + if (debug_probes_scene_data_ubo.is_valid()) { + RD::get_singleton()->free(debug_probes_scene_data_ubo); + debug_probes_scene_data_ubo = RID(); + } +} + +void GI::SDFGI::update(RendererSceneEnvironmentRD *p_env, const Vector3 &p_world_position) { + bounce_feedback = p_env->sdfgi_bounce_feedback; + energy = p_env->sdfgi_energy; + normal_bias = p_env->sdfgi_normal_bias; + probe_bias = p_env->sdfgi_probe_bias; + reads_sky = p_env->sdfgi_read_sky_light; + + int32_t drag_margin = (cascade_size / SDFGI::PROBE_DIVISOR) / 2; + + for (uint32_t i = 0; i < cascades.size(); i++) { + SDFGI::Cascade &cascade = cascades[i]; + cascade.dirty_regions = Vector3i(); + + Vector3 probe_half_size = Vector3(1, 1, 1) * cascade.cell_size * float(cascade_size / SDFGI::PROBE_DIVISOR) * 0.5; + probe_half_size = Vector3(0, 0, 0); + + Vector3 world_position = p_world_position; + world_position.y *= y_mult; + Vector3i pos_in_cascade = Vector3i((world_position + probe_half_size) / cascade.cell_size); + + for (int j = 0; j < 3; j++) { + if (pos_in_cascade[j] < cascade.position[j]) { + while (pos_in_cascade[j] < (cascade.position[j] - drag_margin)) { + cascade.position[j] -= drag_margin * 2; + cascade.dirty_regions[j] += drag_margin * 2; + } + } else if (pos_in_cascade[j] > cascade.position[j]) { + while (pos_in_cascade[j] > (cascade.position[j] + drag_margin)) { + cascade.position[j] += drag_margin * 2; + cascade.dirty_regions[j] -= drag_margin * 2; + } + } + + if (cascade.dirty_regions[j] == 0) { + continue; // not dirty + } else if (uint32_t(ABS(cascade.dirty_regions[j])) >= cascade_size) { + //moved too much, just redraw everything (make all dirty) + cascade.dirty_regions = SDFGI::Cascade::DIRTY_ALL; + break; + } + } + + if (cascade.dirty_regions != Vector3i() && cascade.dirty_regions != SDFGI::Cascade::DIRTY_ALL) { + //see how much the total dirty volume represents from the total volume + uint32_t total_volume = cascade_size * cascade_size * cascade_size; + uint32_t safe_volume = 1; + for (int j = 0; j < 3; j++) { + safe_volume *= cascade_size - ABS(cascade.dirty_regions[j]); + } + uint32_t dirty_volume = total_volume - safe_volume; + if (dirty_volume > (safe_volume / 2)) { + //more than half the volume is dirty, make all dirty so its only rendered once + cascade.dirty_regions = SDFGI::Cascade::DIRTY_ALL; + } + } + } +} + +void GI::SDFGI::update_light() { + RD::get_singleton()->draw_command_begin_label("SDFGI Update dynamic Light"); + + /* Update dynamic light */ + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.direct_light_pipeline[SDFGIShader::DIRECT_LIGHT_MODE_DYNAMIC]); + + SDFGIShader::DirectLightPushConstant push_constant; + + push_constant.grid_size[0] = cascade_size; + push_constant.grid_size[1] = cascade_size; + push_constant.grid_size[2] = cascade_size; + push_constant.max_cascades = cascades.size(); + push_constant.probe_axis_size = probe_axis_count; + push_constant.bounce_feedback = bounce_feedback; + push_constant.y_mult = y_mult; + push_constant.use_occlusion = uses_occlusion; + + for (uint32_t i = 0; i < cascades.size(); i++) { + SDFGI::Cascade &cascade = cascades[i]; + push_constant.light_count = cascade_dynamic_light_count[i]; + push_constant.cascade = i; + + if (cascades[i].all_dynamic_lights_dirty || gi->sdfgi_frames_to_update_light == RS::ENV_SDFGI_UPDATE_LIGHT_IN_1_FRAME) { + push_constant.process_offset = 0; + push_constant.process_increment = 1; + } else { + static const uint32_t frames_to_update_table[RS::ENV_SDFGI_UPDATE_LIGHT_MAX] = { + 1, 2, 4, 8, 16 + }; + + uint32_t frames_to_update = frames_to_update_table[gi->sdfgi_frames_to_update_light]; + + push_constant.process_offset = RSG::rasterizer->get_frame_number() % frames_to_update; + push_constant.process_increment = frames_to_update; + } + cascades[i].all_dynamic_lights_dirty = false; + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascade.sdf_direct_light_uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::DirectLightPushConstant)); + RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cascade.solid_cell_dispatch_buffer, 0); + } + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->draw_command_end_label(); +} + +void GI::SDFGI::update_probes(RendererSceneEnvironmentRD *p_env, RendererSceneSkyRD::Sky *p_sky) { + RD::get_singleton()->draw_command_begin_label("SDFGI Update Probes"); + + SDFGIShader::IntegratePushConstant push_constant; + push_constant.grid_size[1] = cascade_size; + push_constant.grid_size[2] = cascade_size; + push_constant.grid_size[0] = cascade_size; + push_constant.max_cascades = cascades.size(); + push_constant.probe_axis_size = probe_axis_count; + push_constant.history_index = render_pass % history_size; + push_constant.history_size = history_size; + static const uint32_t ray_count[RS::ENV_SDFGI_RAY_COUNT_MAX] = { 4, 8, 16, 32, 64, 96, 128 }; + push_constant.ray_count = ray_count[gi->sdfgi_ray_count]; + push_constant.ray_bias = probe_bias; + push_constant.image_size[0] = probe_axis_count * probe_axis_count; + push_constant.image_size[1] = probe_axis_count; + push_constant.store_ambient_texture = p_env->volumetric_fog_enabled; + + RID sky_uniform_set = gi->sdfgi_shader.integrate_default_sky_uniform_set; + push_constant.sky_mode = SDFGIShader::IntegratePushConstant::SKY_MODE_DISABLED; + push_constant.y_mult = y_mult; + + if (reads_sky && p_env) { + push_constant.sky_energy = p_env->bg_energy; + + if (p_env->background == RS::ENV_BG_CLEAR_COLOR) { + push_constant.sky_mode = SDFGIShader::IntegratePushConstant::SKY_MODE_COLOR; + Color c = storage->get_default_clear_color().srgb_to_linear(); + push_constant.sky_color[0] = c.r; + push_constant.sky_color[1] = c.g; + push_constant.sky_color[2] = c.b; + } else if (p_env->background == RS::ENV_BG_COLOR) { + push_constant.sky_mode = SDFGIShader::IntegratePushConstant::SKY_MODE_COLOR; + Color c = p_env->bg_color; + push_constant.sky_color[0] = c.r; + push_constant.sky_color[1] = c.g; + push_constant.sky_color[2] = c.b; + + } else if (p_env->background == RS::ENV_BG_SKY) { + if (p_sky && p_sky->radiance.is_valid()) { + if (integrate_sky_uniform_set.is_null() || !RD::get_singleton()->uniform_set_is_valid(integrate_sky_uniform_set)) { + Vector uniforms; + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 0; + u.append_id(p_sky->radiance); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 1; + u.append_id(RendererRD::MaterialStorage::get_singleton()->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + + integrate_sky_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.integrate.version_get_shader(gi->sdfgi_shader.integrate_shader, 0), 1); + } + sky_uniform_set = integrate_sky_uniform_set; + push_constant.sky_mode = SDFGIShader::IntegratePushConstant::SKY_MODE_SKY; + } + } + } + + render_pass++; + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(true); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.integrate_pipeline[SDFGIShader::INTEGRATE_MODE_PROCESS]); + + int32_t probe_divisor = cascade_size / SDFGI::PROBE_DIVISOR; + for (uint32_t i = 0; i < cascades.size(); i++) { + push_constant.cascade = i; + push_constant.world_offset[0] = cascades[i].position.x / probe_divisor; + push_constant.world_offset[1] = cascades[i].position.y / probe_divisor; + push_constant.world_offset[2] = cascades[i].position.z / probe_divisor; + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascades[i].integrate_uniform_set, 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sky_uniform_set, 1); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::IntegratePushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, probe_axis_count * probe_axis_count, probe_axis_count, 1); + } + + //end later after raster to avoid barriering on layout changes + //RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); + + RD::get_singleton()->draw_command_end_label(); +} + +void GI::SDFGI::store_probes() { + RD::get_singleton()->barrier(RD::BARRIER_MASK_COMPUTE, RD::BARRIER_MASK_COMPUTE); + RD::get_singleton()->draw_command_begin_label("SDFGI Store Probes"); + + SDFGIShader::IntegratePushConstant push_constant; + push_constant.grid_size[1] = cascade_size; + push_constant.grid_size[2] = cascade_size; + push_constant.grid_size[0] = cascade_size; + push_constant.max_cascades = cascades.size(); + push_constant.probe_axis_size = probe_axis_count; + push_constant.history_index = render_pass % history_size; + push_constant.history_size = history_size; + static const uint32_t ray_count[RS::ENV_SDFGI_RAY_COUNT_MAX] = { 4, 8, 16, 32, 64, 96, 128 }; + push_constant.ray_count = ray_count[gi->sdfgi_ray_count]; + push_constant.ray_bias = probe_bias; + push_constant.image_size[0] = probe_axis_count * probe_axis_count; + push_constant.image_size[1] = probe_axis_count; + push_constant.store_ambient_texture = false; + + push_constant.sky_mode = 0; + push_constant.y_mult = y_mult; + + // Then store values into the lightprobe texture. Separating these steps has a small performance hit, but it allows for multiple bounces + RENDER_TIMESTAMP("Average SDFGI Probes"); + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.integrate_pipeline[SDFGIShader::INTEGRATE_MODE_STORE]); + + //convert to octahedral to store + push_constant.image_size[0] *= SDFGI::LIGHTPROBE_OCT_SIZE; + push_constant.image_size[1] *= SDFGI::LIGHTPROBE_OCT_SIZE; + + for (uint32_t i = 0; i < cascades.size(); i++) { + push_constant.cascade = i; + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascades[i].integrate_uniform_set, 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, gi->sdfgi_shader.integrate_default_sky_uniform_set, 1); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::IntegratePushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, probe_axis_count * probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, 1); + } + + RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_COMPUTE); + + RD::get_singleton()->draw_command_end_label(); +} + +int GI::SDFGI::get_pending_region_data(int p_region, Vector3i &r_local_offset, Vector3i &r_local_size, AABB &r_bounds) const { + int dirty_count = 0; + for (uint32_t i = 0; i < cascades.size(); i++) { + const SDFGI::Cascade &c = cascades[i]; + + if (c.dirty_regions == SDFGI::Cascade::DIRTY_ALL) { + if (dirty_count == p_region) { + r_local_offset = Vector3i(); + r_local_size = Vector3i(1, 1, 1) * cascade_size; + + r_bounds.position = Vector3((Vector3i(1, 1, 1) * -int32_t(cascade_size >> 1) + c.position)) * c.cell_size * Vector3(1, 1.0 / y_mult, 1); + r_bounds.size = Vector3(r_local_size) * c.cell_size * Vector3(1, 1.0 / y_mult, 1); + return i; + } + dirty_count++; + } else { + for (int j = 0; j < 3; j++) { + if (c.dirty_regions[j] != 0) { + if (dirty_count == p_region) { + Vector3i from = Vector3i(0, 0, 0); + Vector3i to = Vector3i(1, 1, 1) * cascade_size; + + if (c.dirty_regions[j] > 0) { + //fill from the beginning + to[j] = c.dirty_regions[j]; + } else { + //fill from the end + from[j] = to[j] + c.dirty_regions[j]; + } + + for (int k = 0; k < j; k++) { + // "chip" away previous regions to avoid re-voxelizing the same thing + if (c.dirty_regions[k] > 0) { + from[k] += c.dirty_regions[k]; + } else if (c.dirty_regions[k] < 0) { + to[k] += c.dirty_regions[k]; + } + } + + r_local_offset = from; + r_local_size = to - from; + + r_bounds.position = Vector3(from + Vector3i(1, 1, 1) * -int32_t(cascade_size >> 1) + c.position) * c.cell_size * Vector3(1, 1.0 / y_mult, 1); + r_bounds.size = Vector3(r_local_size) * c.cell_size * Vector3(1, 1.0 / y_mult, 1); + + return i; + } + + dirty_count++; + } + } + } + } + return -1; +} + +void GI::SDFGI::update_cascades() { + //update cascades + SDFGI::Cascade::UBO cascade_data[SDFGI::MAX_CASCADES]; + int32_t probe_divisor = cascade_size / SDFGI::PROBE_DIVISOR; + + for (uint32_t i = 0; i < cascades.size(); i++) { + Vector3 pos = Vector3((Vector3i(1, 1, 1) * -int32_t(cascade_size >> 1) + cascades[i].position)) * cascades[i].cell_size; + + cascade_data[i].offset[0] = pos.x; + cascade_data[i].offset[1] = pos.y; + cascade_data[i].offset[2] = pos.z; + cascade_data[i].to_cell = 1.0 / cascades[i].cell_size; + cascade_data[i].probe_offset[0] = cascades[i].position.x / probe_divisor; + cascade_data[i].probe_offset[1] = cascades[i].position.y / probe_divisor; + cascade_data[i].probe_offset[2] = cascades[i].position.z / probe_divisor; + cascade_data[i].pad = 0; + } + + RD::get_singleton()->buffer_update(cascades_ubo, 0, sizeof(SDFGI::Cascade::UBO) * SDFGI::MAX_CASCADES, cascade_data, RD::BARRIER_MASK_COMPUTE); +} + +void GI::SDFGI::debug_draw(uint32_t p_view_count, const CameraMatrix *p_projections, const Transform3D &p_transform, int p_width, int p_height, RID p_render_target, RID p_texture, const Vector &p_texture_views) { + RendererRD::TextureStorage *texture_storage = RendererRD::TextureStorage::get_singleton(); + RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); + RendererRD::CopyEffects *copy_effects = RendererRD::CopyEffects::get_singleton(); + + for (uint32_t v = 0; v < p_view_count; v++) { + if (!debug_uniform_set[v].is_valid() || !RD::get_singleton()->uniform_set_is_valid(debug_uniform_set[v])) { + Vector uniforms; + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t i = 0; i < SDFGI::MAX_CASCADES; i++) { + if (i < cascades.size()) { + u.append_id(cascades[i].sdf_tex); + } else { + u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 2; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t i = 0; i < SDFGI::MAX_CASCADES; i++) { + if (i < cascades.size()) { + u.append_id(cascades[i].light_tex); + } else { + u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 3; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t i = 0; i < SDFGI::MAX_CASCADES; i++) { + if (i < cascades.size()) { + u.append_id(cascades[i].light_aniso_0_tex); + } else { + u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 4; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t i = 0; i < SDFGI::MAX_CASCADES; i++) { + if (i < cascades.size()) { + u.append_id(cascades[i].light_aniso_1_tex); + } else { + u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 5; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.append_id(occlusion_texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 8; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 9; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.append_id(cascades_ubo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 10; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.append_id(p_texture_views[v]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 11; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.append_id(lightprobe_texture); + uniforms.push_back(u); + } + debug_uniform_set[v] = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.debug_shader_version, 0); + } + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.debug_pipeline); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, debug_uniform_set[v], 0); + + SDFGIShader::DebugPushConstant push_constant; + push_constant.grid_size[0] = cascade_size; + push_constant.grid_size[1] = cascade_size; + push_constant.grid_size[2] = cascade_size; + push_constant.max_cascades = cascades.size(); + push_constant.screen_size[0] = p_width; + push_constant.screen_size[1] = p_height; + push_constant.probe_axis_size = probe_axis_count; + push_constant.use_occlusion = uses_occlusion; + push_constant.y_mult = y_mult; + + push_constant.z_near = -p_projections[v].get_z_near(); + + push_constant.cam_transform[0] = p_transform.basis.rows[0][0]; + push_constant.cam_transform[1] = p_transform.basis.rows[1][0]; + push_constant.cam_transform[2] = p_transform.basis.rows[2][0]; + push_constant.cam_transform[3] = 0; + push_constant.cam_transform[4] = p_transform.basis.rows[0][1]; + push_constant.cam_transform[5] = p_transform.basis.rows[1][1]; + push_constant.cam_transform[6] = p_transform.basis.rows[2][1]; + push_constant.cam_transform[7] = 0; + push_constant.cam_transform[8] = p_transform.basis.rows[0][2]; + push_constant.cam_transform[9] = p_transform.basis.rows[1][2]; + push_constant.cam_transform[10] = p_transform.basis.rows[2][2]; + push_constant.cam_transform[11] = 0; + push_constant.cam_transform[12] = p_transform.origin.x; + push_constant.cam_transform[13] = p_transform.origin.y; + push_constant.cam_transform[14] = p_transform.origin.z; + push_constant.cam_transform[15] = 1; + + // need to properly unproject for asymmetric projection matrices in stereo.. + CameraMatrix inv_projection = p_projections[v].inverse(); + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + push_constant.inv_projection[i * 4 + j] = inv_projection.matrix[i][j]; + } + } + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::DebugPushConstant)); + + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_width, p_height, 1); + RD::get_singleton()->compute_list_end(); + } + + Size2 rtsize = texture_storage->render_target_get_size(p_render_target); + copy_effects->copy_to_fb_rect(p_texture, texture_storage->render_target_get_rd_framebuffer(p_render_target), Rect2(Vector2(), rtsize), true, false, false, false, RID(), p_view_count > 1); +} + +void GI::SDFGI::debug_probes(RID p_framebuffer, const uint32_t p_view_count, const CameraMatrix *p_camera_with_transforms, bool p_will_continue_color, bool p_will_continue_depth) { + RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); + + // setup scene data + { + SDFGIShader::DebugProbesSceneData scene_data; + + if (debug_probes_scene_data_ubo.is_null()) { + debug_probes_scene_data_ubo = RD::get_singleton()->uniform_buffer_create(sizeof(SDFGIShader::DebugProbesSceneData)); + } + + for (uint32_t v = 0; v < p_view_count; v++) { + RendererStorageRD::store_camera(p_camera_with_transforms[v], scene_data.projection[v]); + } + + RD::get_singleton()->buffer_update(debug_probes_scene_data_ubo, 0, sizeof(SDFGIShader::DebugProbesSceneData), &scene_data, RD::BARRIER_MASK_RASTER); + } + + // setup push constant + SDFGIShader::DebugProbesPushConstant push_constant; + + //gen spheres from strips + uint32_t band_points = 16; + push_constant.band_power = 4; + push_constant.sections_in_band = ((band_points / 2) - 1); + push_constant.band_mask = band_points - 2; + push_constant.section_arc = Math_TAU / float(push_constant.sections_in_band); + push_constant.y_mult = y_mult; + + uint32_t total_points = push_constant.sections_in_band * band_points; + uint32_t total_probes = probe_axis_count * probe_axis_count * probe_axis_count; + + push_constant.grid_size[0] = cascade_size; + push_constant.grid_size[1] = cascade_size; + push_constant.grid_size[2] = cascade_size; + push_constant.cascade = 0; + + push_constant.probe_axis_size = probe_axis_count; + + if (!debug_probes_uniform_set.is_valid() || !RD::get_singleton()->uniform_set_is_valid(debug_probes_uniform_set)) { + Vector uniforms; + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.append_id(cascades_ubo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 2; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.append_id(lightprobe_texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 3; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 4; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.append_id(occlusion_texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 5; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.append_id(debug_probes_scene_data_ubo); + uniforms.push_back(u); + } + + debug_probes_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.debug_probes.version_get_shader(gi->sdfgi_shader.debug_probes_shader, 0), 0); + } + + SDFGIShader::ProbeDebugMode mode = p_view_count > 1 ? SDFGIShader::PROBE_DEBUG_PROBES_MULTIVIEW : SDFGIShader::PROBE_DEBUG_PROBES; + + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_framebuffer, RD::INITIAL_ACTION_CONTINUE, p_will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, p_will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ); + RD::get_singleton()->draw_command_begin_label("Debug SDFGI"); + + RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, gi->sdfgi_shader.debug_probes_pipeline[mode].get_render_pipeline(RD::INVALID_FORMAT_ID, RD::get_singleton()->framebuffer_get_format(p_framebuffer))); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, debug_probes_uniform_set, 0); + RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(SDFGIShader::DebugProbesPushConstant)); + RD::get_singleton()->draw_list_draw(draw_list, false, total_probes, total_points); + + if (gi->sdfgi_debug_probe_dir != Vector3()) { + uint32_t cascade = 0; + Vector3 offset = Vector3((Vector3i(1, 1, 1) * -int32_t(cascade_size >> 1) + cascades[cascade].position)) * cascades[cascade].cell_size * Vector3(1.0, 1.0 / y_mult, 1.0); + Vector3 probe_size = cascades[cascade].cell_size * (cascade_size / SDFGI::PROBE_DIVISOR) * Vector3(1.0, 1.0 / y_mult, 1.0); + Vector3 ray_from = gi->sdfgi_debug_probe_pos; + Vector3 ray_to = gi->sdfgi_debug_probe_pos + gi->sdfgi_debug_probe_dir * cascades[cascade].cell_size * Math::sqrt(3.0) * cascade_size; + float sphere_radius = 0.2; + float closest_dist = 1e20; + gi->sdfgi_debug_probe_enabled = false; + + Vector3i probe_from = cascades[cascade].position / (cascade_size / SDFGI::PROBE_DIVISOR); + for (int i = 0; i < (SDFGI::PROBE_DIVISOR + 1); i++) { + for (int j = 0; j < (SDFGI::PROBE_DIVISOR + 1); j++) { + for (int k = 0; k < (SDFGI::PROBE_DIVISOR + 1); k++) { + Vector3 pos = offset + probe_size * Vector3(i, j, k); + Vector3 res; + if (Geometry3D::segment_intersects_sphere(ray_from, ray_to, pos, sphere_radius, &res)) { + float d = ray_from.distance_to(res); + if (d < closest_dist) { + closest_dist = d; + gi->sdfgi_debug_probe_enabled = true; + gi->sdfgi_debug_probe_index = probe_from + Vector3i(i, j, k); + } + } + } + } + } + + gi->sdfgi_debug_probe_dir = Vector3(); + } + + if (gi->sdfgi_debug_probe_enabled) { + uint32_t cascade = 0; + uint32_t probe_cells = (cascade_size / SDFGI::PROBE_DIVISOR); + Vector3i probe_from = cascades[cascade].position / probe_cells; + Vector3i ofs = gi->sdfgi_debug_probe_index - probe_from; + if (ofs.x < 0 || ofs.y < 0 || ofs.z < 0) { + return; + } + if (ofs.x > SDFGI::PROBE_DIVISOR || ofs.y > SDFGI::PROBE_DIVISOR || ofs.z > SDFGI::PROBE_DIVISOR) { + return; + } + + uint32_t mult = (SDFGI::PROBE_DIVISOR + 1); + uint32_t index = ofs.z * mult * mult + ofs.y * mult + ofs.x; + + push_constant.probe_debug_index = index; + + uint32_t cell_count = probe_cells * 2 * probe_cells * 2 * probe_cells * 2; + + RD::get_singleton()->draw_list_bind_render_pipeline(draw_list, gi->sdfgi_shader.debug_probes_pipeline[p_view_count > 1 ? SDFGIShader::PROBE_DEBUG_VISIBILITY_MULTIVIEW : SDFGIShader::PROBE_DEBUG_VISIBILITY].get_render_pipeline(RD::INVALID_FORMAT_ID, RD::get_singleton()->framebuffer_get_format(p_framebuffer))); + RD::get_singleton()->draw_list_bind_uniform_set(draw_list, debug_probes_uniform_set, 0); + RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(SDFGIShader::DebugProbesPushConstant)); + RD::get_singleton()->draw_list_draw(draw_list, false, cell_count, total_points); + } + + RD::get_singleton()->draw_command_end_label(); + RD::get_singleton()->draw_list_end(); +} + +void GI::SDFGI::pre_process_gi(const Transform3D &p_transform, RenderDataRD *p_render_data, RendererSceneRenderRD *p_scene_render) { + /* Update general SDFGI Buffer */ + + SDFGIData sdfgi_data; + + sdfgi_data.grid_size[0] = cascade_size; + sdfgi_data.grid_size[1] = cascade_size; + sdfgi_data.grid_size[2] = cascade_size; + + sdfgi_data.max_cascades = cascades.size(); + sdfgi_data.probe_axis_size = probe_axis_count; + sdfgi_data.cascade_probe_size[0] = sdfgi_data.probe_axis_size - 1; //float version for performance + sdfgi_data.cascade_probe_size[1] = sdfgi_data.probe_axis_size - 1; + sdfgi_data.cascade_probe_size[2] = sdfgi_data.probe_axis_size - 1; + + float csize = cascade_size; + sdfgi_data.probe_to_uvw = 1.0 / float(sdfgi_data.cascade_probe_size[0]); + sdfgi_data.use_occlusion = uses_occlusion; + //sdfgi_data.energy = energy; + + sdfgi_data.y_mult = y_mult; + + float cascade_voxel_size = (csize / sdfgi_data.cascade_probe_size[0]); + float occlusion_clamp = (cascade_voxel_size - 0.5) / cascade_voxel_size; + sdfgi_data.occlusion_clamp[0] = occlusion_clamp; + sdfgi_data.occlusion_clamp[1] = occlusion_clamp; + sdfgi_data.occlusion_clamp[2] = occlusion_clamp; + sdfgi_data.normal_bias = (normal_bias / csize) * sdfgi_data.cascade_probe_size[0]; + + //vec2 tex_pixel_size = 1.0 / vec2(ivec2( (OCT_SIZE+2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE+2) * params.probe_axis_size ) ); + //vec3 probe_uv_offset = (ivec3(OCT_SIZE+2,OCT_SIZE+2,(OCT_SIZE+2) * params.probe_axis_size)) * tex_pixel_size.xyx; + + uint32_t oct_size = SDFGI::LIGHTPROBE_OCT_SIZE; + + sdfgi_data.lightprobe_tex_pixel_size[0] = 1.0 / ((oct_size + 2) * sdfgi_data.probe_axis_size * sdfgi_data.probe_axis_size); + sdfgi_data.lightprobe_tex_pixel_size[1] = 1.0 / ((oct_size + 2) * sdfgi_data.probe_axis_size); + sdfgi_data.lightprobe_tex_pixel_size[2] = 1.0; + + sdfgi_data.energy = energy; + + sdfgi_data.lightprobe_uv_offset[0] = float(oct_size + 2) * sdfgi_data.lightprobe_tex_pixel_size[0]; + sdfgi_data.lightprobe_uv_offset[1] = float(oct_size + 2) * sdfgi_data.lightprobe_tex_pixel_size[1]; + sdfgi_data.lightprobe_uv_offset[2] = float((oct_size + 2) * sdfgi_data.probe_axis_size) * sdfgi_data.lightprobe_tex_pixel_size[0]; + + sdfgi_data.occlusion_renormalize[0] = 0.5; + sdfgi_data.occlusion_renormalize[1] = 1.0; + sdfgi_data.occlusion_renormalize[2] = 1.0 / float(sdfgi_data.max_cascades); + + int32_t probe_divisor = cascade_size / SDFGI::PROBE_DIVISOR; + + for (uint32_t i = 0; i < sdfgi_data.max_cascades; i++) { + SDFGIData::ProbeCascadeData &c = sdfgi_data.cascades[i]; + Vector3 pos = Vector3((Vector3i(1, 1, 1) * -int32_t(cascade_size >> 1) + cascades[i].position)) * cascades[i].cell_size; + Vector3 cam_origin = p_transform.origin; + cam_origin.y *= y_mult; + pos -= cam_origin; //make pos local to camera, to reduce numerical error + c.position[0] = pos.x; + c.position[1] = pos.y; + c.position[2] = pos.z; + c.to_probe = 1.0 / (float(cascade_size) * cascades[i].cell_size / float(probe_axis_count - 1)); + + Vector3i probe_ofs = cascades[i].position / probe_divisor; + c.probe_world_offset[0] = probe_ofs.x; + c.probe_world_offset[1] = probe_ofs.y; + c.probe_world_offset[2] = probe_ofs.z; + + c.to_cell = 1.0 / cascades[i].cell_size; + } + + RD::get_singleton()->buffer_update(gi->sdfgi_ubo, 0, sizeof(SDFGIData), &sdfgi_data, RD::BARRIER_MASK_COMPUTE); + + /* Update dynamic lights in SDFGI cascades */ + + for (uint32_t i = 0; i < cascades.size(); i++) { + SDFGI::Cascade &cascade = cascades[i]; + + SDFGIShader::Light lights[SDFGI::MAX_DYNAMIC_LIGHTS]; + uint32_t idx = 0; + for (uint32_t j = 0; j < (uint32_t)p_scene_render->render_state.sdfgi_update_data->directional_lights->size(); j++) { + if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) { + break; + } + + RendererSceneRenderRD::LightInstance *li = p_scene_render->light_instance_owner.get_or_null(p_scene_render->render_state.sdfgi_update_data->directional_lights->get(j)); + ERR_CONTINUE(!li); + + if (RSG::light_storage->light_directional_get_sky_mode(li->light) == RS::LIGHT_DIRECTIONAL_SKY_MODE_SKY_ONLY) { + continue; + } + + Vector3 dir = -li->transform.basis.get_column(Vector3::AXIS_Z); + dir.y *= y_mult; + dir.normalize(); + lights[idx].direction[0] = dir.x; + lights[idx].direction[1] = dir.y; + lights[idx].direction[2] = dir.z; + Color color = RSG::light_storage->light_get_color(li->light); + color = color.srgb_to_linear(); + lights[idx].color[0] = color.r; + lights[idx].color[1] = color.g; + lights[idx].color[2] = color.b; + lights[idx].type = RS::LIGHT_DIRECTIONAL; + lights[idx].energy = RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY) * RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_INDIRECT_ENERGY); + lights[idx].has_shadow = RSG::light_storage->light_has_shadow(li->light); + + idx++; + } + + AABB cascade_aabb; + cascade_aabb.position = Vector3((Vector3i(1, 1, 1) * -int32_t(cascade_size >> 1) + cascade.position)) * cascade.cell_size; + cascade_aabb.size = Vector3(1, 1, 1) * cascade_size * cascade.cell_size; + + for (uint32_t j = 0; j < p_scene_render->render_state.sdfgi_update_data->positional_light_count; j++) { + if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) { + break; + } + + RendererSceneRenderRD::LightInstance *li = p_scene_render->light_instance_owner.get_or_null(p_scene_render->render_state.sdfgi_update_data->positional_light_instances[j]); + ERR_CONTINUE(!li); + + uint32_t max_sdfgi_cascade = RSG::light_storage->light_get_max_sdfgi_cascade(li->light); + if (i > max_sdfgi_cascade) { + continue; + } + + if (!cascade_aabb.intersects(li->aabb)) { + continue; + } + + Vector3 dir = -li->transform.basis.get_column(Vector3::AXIS_Z); + //faster to not do this here + //dir.y *= y_mult; + //dir.normalize(); + lights[idx].direction[0] = dir.x; + lights[idx].direction[1] = dir.y; + lights[idx].direction[2] = dir.z; + Vector3 pos = li->transform.origin; + pos.y *= y_mult; + lights[idx].position[0] = pos.x; + lights[idx].position[1] = pos.y; + lights[idx].position[2] = pos.z; + Color color = RSG::light_storage->light_get_color(li->light); + color = color.srgb_to_linear(); + lights[idx].color[0] = color.r; + lights[idx].color[1] = color.g; + lights[idx].color[2] = color.b; + lights[idx].type = RSG::light_storage->light_get_type(li->light); + lights[idx].energy = RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY) * RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_INDIRECT_ENERGY); + lights[idx].has_shadow = RSG::light_storage->light_has_shadow(li->light); + lights[idx].attenuation = RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_ATTENUATION); + lights[idx].radius = RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_RANGE); + lights[idx].cos_spot_angle = Math::cos(Math::deg2rad(RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ANGLE))); + lights[idx].inv_spot_attenuation = 1.0f / RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ATTENUATION); + + idx++; + } + + if (idx > 0) { + RD::get_singleton()->buffer_update(cascade.lights_buffer, 0, idx * sizeof(SDFGIShader::Light), lights, RD::BARRIER_MASK_COMPUTE); + } + + cascade_dynamic_light_count[i] = idx; + } +} + +void GI::SDFGI::render_region(RID p_render_buffers, int p_region, const PagedArray &p_instances, RendererSceneRenderRD *p_scene_render) { + //print_line("rendering region " + itos(p_region)); + RendererSceneRenderRD::RenderBuffers *rb = p_scene_render->render_buffers_owner.get_or_null(p_render_buffers); + ERR_FAIL_COND(!rb); // we wouldn't be here if this failed but... + AABB bounds; + Vector3i from; + Vector3i size; + + int cascade_prev = get_pending_region_data(p_region - 1, from, size, bounds); + int cascade_next = get_pending_region_data(p_region + 1, from, size, bounds); + int cascade = get_pending_region_data(p_region, from, size, bounds); + ERR_FAIL_COND(cascade < 0); + + if (cascade_prev != cascade) { + //initialize render + RD::get_singleton()->texture_clear(render_albedo, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(render_emission, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(render_emission_aniso, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(render_geom_facing, Color(0, 0, 0, 0), 0, 1, 0, 1); + } + + //print_line("rendering cascade " + itos(p_region) + " objects: " + itos(p_cull_count) + " bounds: " + bounds + " from: " + from + " size: " + size + " cell size: " + rtos(cascades[cascade].cell_size)); + p_scene_render->_render_sdfgi(p_render_buffers, from, size, bounds, p_instances, render_albedo, render_emission, render_emission_aniso, render_geom_facing); + + if (cascade_next != cascade) { + RD::get_singleton()->draw_command_begin_label("SDFGI Pre-Process Cascade"); + + RENDER_TIMESTAMP("> SDFGI Update SDF"); + //done rendering! must update SDF + //clear dispatch indirect data + + SDFGIShader::PreprocessPushConstant push_constant; + memset(&push_constant, 0, sizeof(SDFGIShader::PreprocessPushConstant)); + + RENDER_TIMESTAMP("SDFGI Scroll SDF"); + + //scroll + if (cascades[cascade].dirty_regions != SDFGI::Cascade::DIRTY_ALL) { + //for scroll + Vector3i dirty = cascades[cascade].dirty_regions; + push_constant.scroll[0] = dirty.x; + push_constant.scroll[1] = dirty.y; + push_constant.scroll[2] = dirty.z; + } else { + //for no scroll + push_constant.scroll[0] = 0; + push_constant.scroll[1] = 0; + push_constant.scroll[2] = 0; + } + + cascades[cascade].all_dynamic_lights_dirty = true; + + push_constant.grid_size = cascade_size; + push_constant.cascade = cascade; + + if (cascades[cascade].dirty_regions != SDFGI::Cascade::DIRTY_ALL) { + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + + //must pre scroll existing data because not all is dirty + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_SCROLL]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascades[cascade].scroll_uniform_set, 0); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cascades[cascade].solid_cell_dispatch_buffer, 0); + // no barrier do all together + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_SCROLL_OCCLUSION]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascades[cascade].scroll_occlusion_uniform_set, 0); + + Vector3i dirty = cascades[cascade].dirty_regions; + Vector3i groups; + groups.x = cascade_size - ABS(dirty.x); + groups.y = cascade_size - ABS(dirty.y); + groups.z = cascade_size - ABS(dirty.z); + + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, groups.x, groups.y, groups.z); + + //no barrier, continue together + + { + //scroll probes and their history also + + SDFGIShader::IntegratePushConstant ipush_constant; + ipush_constant.grid_size[1] = cascade_size; + ipush_constant.grid_size[2] = cascade_size; + ipush_constant.grid_size[0] = cascade_size; + ipush_constant.max_cascades = cascades.size(); + ipush_constant.probe_axis_size = probe_axis_count; + ipush_constant.history_index = 0; + ipush_constant.history_size = history_size; + ipush_constant.ray_count = 0; + ipush_constant.ray_bias = 0; + ipush_constant.sky_mode = 0; + ipush_constant.sky_energy = 0; + ipush_constant.sky_color[0] = 0; + ipush_constant.sky_color[1] = 0; + ipush_constant.sky_color[2] = 0; + ipush_constant.y_mult = y_mult; + ipush_constant.store_ambient_texture = false; + + ipush_constant.image_size[0] = probe_axis_count * probe_axis_count; + ipush_constant.image_size[1] = probe_axis_count; + + int32_t probe_divisor = cascade_size / SDFGI::PROBE_DIVISOR; + ipush_constant.cascade = cascade; + ipush_constant.world_offset[0] = cascades[cascade].position.x / probe_divisor; + ipush_constant.world_offset[1] = cascades[cascade].position.y / probe_divisor; + ipush_constant.world_offset[2] = cascades[cascade].position.z / probe_divisor; + + ipush_constant.scroll[0] = dirty.x / probe_divisor; + ipush_constant.scroll[1] = dirty.y / probe_divisor; + ipush_constant.scroll[2] = dirty.z / probe_divisor; + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.integrate_pipeline[SDFGIShader::INTEGRATE_MODE_SCROLL]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascades[cascade].integrate_uniform_set, 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, gi->sdfgi_shader.integrate_default_sky_uniform_set, 1); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ipush_constant, sizeof(SDFGIShader::IntegratePushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, probe_axis_count * probe_axis_count, probe_axis_count, 1); + + RD::get_singleton()->compute_list_add_barrier(compute_list); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.integrate_pipeline[SDFGIShader::INTEGRATE_MODE_SCROLL_STORE]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascades[cascade].integrate_uniform_set, 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, gi->sdfgi_shader.integrate_default_sky_uniform_set, 1); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ipush_constant, sizeof(SDFGIShader::IntegratePushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, probe_axis_count * probe_axis_count, probe_axis_count, 1); + + RD::get_singleton()->compute_list_add_barrier(compute_list); + + if (bounce_feedback > 0.0) { + //multibounce requires this to be stored so direct light can read from it + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.integrate_pipeline[SDFGIShader::INTEGRATE_MODE_STORE]); + + //convert to octahedral to store + ipush_constant.image_size[0] *= SDFGI::LIGHTPROBE_OCT_SIZE; + ipush_constant.image_size[1] *= SDFGI::LIGHTPROBE_OCT_SIZE; + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascades[cascade].integrate_uniform_set, 0); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, gi->sdfgi_shader.integrate_default_sky_uniform_set, 1); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &ipush_constant, sizeof(SDFGIShader::IntegratePushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, probe_axis_count * probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, 1); + } + } + + //ok finally barrier + RD::get_singleton()->compute_list_end(); + } + + //clear dispatch indirect data + uint32_t dispatch_indirct_data[4] = { 0, 0, 0, 0 }; + RD::get_singleton()->buffer_update(cascades[cascade].solid_cell_dispatch_buffer, 0, sizeof(uint32_t) * 4, dispatch_indirct_data); + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + + bool half_size = true; //much faster, very little difference + static const int optimized_jf_group_size = 8; + + if (half_size) { + push_constant.grid_size >>= 1; + + uint32_t cascade_half_size = cascade_size >> 1; + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE_HALF]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdf_initialize_half_uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size); + RD::get_singleton()->compute_list_add_barrier(compute_list); + + //must start with regular jumpflood + + push_constant.half_size = true; + { + RENDER_TIMESTAMP("SDFGI Jump Flood (Half-Size)"); + + uint32_t s = cascade_half_size; + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_JUMP_FLOOD]); + + int jf_us = 0; + //start with regular jump flood for very coarse reads, as this is impossible to optimize + while (s > 1) { + s /= 2; + push_constant.step_size = s; + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, jump_flood_half_uniform_set[jf_us], 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size); + RD::get_singleton()->compute_list_add_barrier(compute_list); + jf_us = jf_us == 0 ? 1 : 0; + + if (cascade_half_size / (s / 2) >= optimized_jf_group_size) { + break; + } + } + + RENDER_TIMESTAMP("SDFGI Jump Flood Optimized (Half-Size)"); + + //continue with optimized jump flood for smaller reads + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_JUMP_FLOOD_OPTIMIZED]); + while (s > 1) { + s /= 2; + push_constant.step_size = s; + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, jump_flood_half_uniform_set[jf_us], 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size); + RD::get_singleton()->compute_list_add_barrier(compute_list); + jf_us = jf_us == 0 ? 1 : 0; + } + } + + // restore grid size for last passes + push_constant.grid_size = cascade_size; + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_JUMP_FLOOD_UPSCALE]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdf_upscale_uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_size, cascade_size, cascade_size); + RD::get_singleton()->compute_list_add_barrier(compute_list); + + //run one pass of fullsize jumpflood to fix up half size arctifacts + + push_constant.half_size = false; + push_constant.step_size = 1; + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_JUMP_FLOOD_OPTIMIZED]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, jump_flood_uniform_set[upscale_jfa_uniform_set_index], 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_size, cascade_size, cascade_size); + RD::get_singleton()->compute_list_add_barrier(compute_list); + + } else { + //full size jumpflood + RENDER_TIMESTAMP("SDFGI Jump Flood (Full-Size)"); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdf_initialize_uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_size, cascade_size, cascade_size); + + RD::get_singleton()->compute_list_add_barrier(compute_list); + + push_constant.half_size = false; + { + uint32_t s = cascade_size; + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_JUMP_FLOOD]); + + int jf_us = 0; + //start with regular jump flood for very coarse reads, as this is impossible to optimize + while (s > 1) { + s /= 2; + push_constant.step_size = s; + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, jump_flood_uniform_set[jf_us], 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_size, cascade_size, cascade_size); + RD::get_singleton()->compute_list_add_barrier(compute_list); + jf_us = jf_us == 0 ? 1 : 0; + + if (cascade_size / (s / 2) >= optimized_jf_group_size) { + break; + } + } + + RENDER_TIMESTAMP("SDFGI Jump Flood Optimized (Full-Size)"); + + //continue with optimized jump flood for smaller reads + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_JUMP_FLOOD_OPTIMIZED]); + while (s > 1) { + s /= 2; + push_constant.step_size = s; + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, jump_flood_uniform_set[jf_us], 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_size, cascade_size, cascade_size); + RD::get_singleton()->compute_list_add_barrier(compute_list); + jf_us = jf_us == 0 ? 1 : 0; + } + } + } + + RENDER_TIMESTAMP("SDFGI Occlusion"); + + // occlusion + { + uint32_t probe_size = cascade_size / SDFGI::PROBE_DIVISOR; + Vector3i probe_global_pos = cascades[cascade].position / probe_size; + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_OCCLUSION]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, occlusion_uniform_set, 0); + for (int i = 0; i < 8; i++) { + //dispatch all at once for performance + Vector3i offset(i & 1, (i >> 1) & 1, (i >> 2) & 1); + + if ((probe_global_pos.x & 1) != 0) { + offset.x = (offset.x + 1) & 1; + } + if ((probe_global_pos.y & 1) != 0) { + offset.y = (offset.y + 1) & 1; + } + if ((probe_global_pos.z & 1) != 0) { + offset.z = (offset.z + 1) & 1; + } + push_constant.probe_offset[0] = offset.x; + push_constant.probe_offset[1] = offset.y; + push_constant.probe_offset[2] = offset.z; + push_constant.occlusion_index = i; + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); + + Vector3i groups = Vector3i(probe_size + 1, probe_size + 1, probe_size + 1) - offset; //if offset, it's one less probe per axis to compute + RD::get_singleton()->compute_list_dispatch(compute_list, groups.x, groups.y, groups.z); + } + RD::get_singleton()->compute_list_add_barrier(compute_list); + } + + RENDER_TIMESTAMP("SDFGI Store"); + + // store + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_STORE]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascades[cascade].sdf_store_uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_size, cascade_size, cascade_size); + + RD::get_singleton()->compute_list_end(); + + //clear these textures, as they will have previous garbage on next draw + RD::get_singleton()->texture_clear(cascades[cascade].light_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(cascades[cascade].light_aniso_0_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); + RD::get_singleton()->texture_clear(cascades[cascade].light_aniso_1_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); + +#if 0 + Vector data = RD::get_singleton()->texture_get_data(cascades[cascade].sdf, 0); + Ref img; + img.instantiate(); + for (uint32_t i = 0; i < cascade_size; i++) { + Vector subarr = data.slice(128 * 128 * i, 128 * 128 * (i + 1)); + img->create(cascade_size, cascade_size, false, Image::FORMAT_L8, subarr); + img->save_png("res://cascade_sdf_" + itos(cascade) + "_" + itos(i) + ".png"); + } + + //finalize render and update sdf +#endif + +#if 0 + Vector data = RD::get_singleton()->texture_get_data(render_albedo, 0); + Ref img; + img.instantiate(); + for (uint32_t i = 0; i < cascade_size; i++) { + Vector subarr = data.slice(128 * 128 * i * 2, 128 * 128 * (i + 1) * 2); + img->createcascade_size, cascade_size, false, Image::FORMAT_RGB565, subarr); + img->convert(Image::FORMAT_RGBA8); + img->save_png("res://cascade_" + itos(cascade) + "_" + itos(i) + ".png"); + } + + //finalize render and update sdf +#endif + + RENDER_TIMESTAMP("< SDFGI Update SDF"); + RD::get_singleton()->draw_command_end_label(); + } +} + +void GI::SDFGI::render_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray *p_positional_light_cull_result, RendererSceneRenderRD *p_scene_render) { + RendererSceneRenderRD::RenderBuffers *rb = p_scene_render->render_buffers_owner.get_or_null(p_render_buffers); + ERR_FAIL_COND(!rb); // we wouldn't be here if this failed but... + + RD::get_singleton()->draw_command_begin_label("SDFGI Render Static Lights"); + + update_cascades(); + + SDFGIShader::Light lights[SDFGI::MAX_STATIC_LIGHTS]; + uint32_t light_count[SDFGI::MAX_STATIC_LIGHTS]; + + for (uint32_t i = 0; i < p_cascade_count; i++) { + ERR_CONTINUE(p_cascade_indices[i] >= cascades.size()); + + SDFGI::Cascade &cc = cascades[p_cascade_indices[i]]; + + { //fill light buffer + + AABB cascade_aabb; + cascade_aabb.position = Vector3((Vector3i(1, 1, 1) * -int32_t(cascade_size >> 1) + cc.position)) * cc.cell_size; + cascade_aabb.size = Vector3(1, 1, 1) * cascade_size * cc.cell_size; + + int idx = 0; + + for (uint32_t j = 0; j < (uint32_t)p_positional_light_cull_result[i].size(); j++) { + if (idx == SDFGI::MAX_STATIC_LIGHTS) { + break; + } + + RendererSceneRenderRD::LightInstance *li = p_scene_render->light_instance_owner.get_or_null(p_positional_light_cull_result[i][j]); + ERR_CONTINUE(!li); + + uint32_t max_sdfgi_cascade = RSG::light_storage->light_get_max_sdfgi_cascade(li->light); + if (p_cascade_indices[i] > max_sdfgi_cascade) { + continue; + } + + if (!cascade_aabb.intersects(li->aabb)) { + continue; + } + + lights[idx].type = RSG::light_storage->light_get_type(li->light); + + Vector3 dir = -li->transform.basis.get_column(Vector3::AXIS_Z); + if (lights[idx].type == RS::LIGHT_DIRECTIONAL) { + dir.y *= y_mult; //only makes sense for directional + dir.normalize(); + } + lights[idx].direction[0] = dir.x; + lights[idx].direction[1] = dir.y; + lights[idx].direction[2] = dir.z; + Vector3 pos = li->transform.origin; + pos.y *= y_mult; + lights[idx].position[0] = pos.x; + lights[idx].position[1] = pos.y; + lights[idx].position[2] = pos.z; + Color color = RSG::light_storage->light_get_color(li->light); + color = color.srgb_to_linear(); + lights[idx].color[0] = color.r; + lights[idx].color[1] = color.g; + lights[idx].color[2] = color.b; + lights[idx].energy = RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY) * RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_INDIRECT_ENERGY); + lights[idx].has_shadow = RSG::light_storage->light_has_shadow(li->light); + lights[idx].attenuation = RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_ATTENUATION); + lights[idx].radius = RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_RANGE); + lights[idx].cos_spot_angle = Math::cos(Math::deg2rad(RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ANGLE))); + lights[idx].inv_spot_attenuation = 1.0f / RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ATTENUATION); + + idx++; + } + + if (idx > 0) { + RD::get_singleton()->buffer_update(cc.lights_buffer, 0, idx * sizeof(SDFGIShader::Light), lights); + } + + light_count[i] = idx; + } + } + + /* Static Lights */ + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.direct_light_pipeline[SDFGIShader::DIRECT_LIGHT_MODE_STATIC]); + + SDFGIShader::DirectLightPushConstant dl_push_constant; + + dl_push_constant.grid_size[0] = cascade_size; + dl_push_constant.grid_size[1] = cascade_size; + dl_push_constant.grid_size[2] = cascade_size; + dl_push_constant.max_cascades = cascades.size(); + dl_push_constant.probe_axis_size = probe_axis_count; + dl_push_constant.bounce_feedback = 0.0; // this is static light, do not multibounce yet + dl_push_constant.y_mult = y_mult; + dl_push_constant.use_occlusion = uses_occlusion; + + //all must be processed + dl_push_constant.process_offset = 0; + dl_push_constant.process_increment = 1; + + for (uint32_t i = 0; i < p_cascade_count; i++) { + ERR_CONTINUE(p_cascade_indices[i] >= cascades.size()); + + SDFGI::Cascade &cc = cascades[p_cascade_indices[i]]; + + dl_push_constant.light_count = light_count[i]; + dl_push_constant.cascade = p_cascade_indices[i]; + + if (dl_push_constant.light_count > 0) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cc.sdf_direct_light_uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &dl_push_constant, sizeof(SDFGIShader::DirectLightPushConstant)); + RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cc.solid_cell_dispatch_buffer, 0); + } + } + + RD::get_singleton()->compute_list_end(); + + RD::get_singleton()->draw_command_end_label(); +} + +//////////////////////////////////////////////////////////////////////////////// +// VoxelGIInstance + +void GI::VoxelGIInstance::update(bool p_update_light_instances, const Vector &p_light_instances, const PagedArray &p_dynamic_objects, RendererSceneRenderRD *p_scene_render) { + RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); + + uint32_t data_version = gi->voxel_gi_get_data_version(probe); + + // (RE)CREATE IF NEEDED + + if (last_probe_data_version != data_version) { + //need to re-create everything + if (texture.is_valid()) { + RD::get_singleton()->free(texture); + RD::get_singleton()->free(write_buffer); + mipmaps.clear(); + } + + for (int i = 0; i < dynamic_maps.size(); i++) { + RD::get_singleton()->free(dynamic_maps[i].texture); + RD::get_singleton()->free(dynamic_maps[i].depth); + } + + dynamic_maps.clear(); + + Vector3i octree_size = gi->voxel_gi_get_octree_size(probe); + + if (octree_size != Vector3i()) { + //can create a 3D texture + Vector levels = gi->voxel_gi_get_level_counts(probe); + + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + tf.width = octree_size.x; + tf.height = octree_size.y; + tf.depth = octree_size.z; + tf.texture_type = RD::TEXTURE_TYPE_3D; + tf.mipmaps = levels.size(); + + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; + + texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); + + RD::get_singleton()->texture_clear(texture, Color(0, 0, 0, 0), 0, levels.size(), 0, 1); + + { + int total_elements = 0; + for (int i = 0; i < levels.size(); i++) { + total_elements += levels[i]; + } + + write_buffer = RD::get_singleton()->storage_buffer_create(total_elements * 16); + } + + for (int i = 0; i < levels.size(); i++) { + VoxelGIInstance::Mipmap mipmap; + mipmap.texture = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), texture, 0, i, 1, RD::TEXTURE_SLICE_3D); + mipmap.level = levels.size() - i - 1; + mipmap.cell_offset = 0; + for (uint32_t j = 0; j < mipmap.level; j++) { + mipmap.cell_offset += levels[j]; + } + mipmap.cell_count = levels[mipmap.level]; + + Vector uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 1; + u.append_id(gi->voxel_gi_get_octree_buffer(probe)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 2; + u.append_id(gi->voxel_gi_get_data_buffer(probe)); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 4; + u.append_id(write_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 9; + u.append_id(gi->voxel_gi_get_sdf_texture(probe)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 10; + u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + + { + Vector copy_uniforms = uniforms; + if (i == 0) { + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 3; + u.append_id(gi->voxel_gi_lights_uniform); + copy_uniforms.push_back(u); + } + + mipmap.uniform_set = RD::get_singleton()->uniform_set_create(copy_uniforms, gi->voxel_gi_lighting_shader_version_shaders[VOXEL_GI_SHADER_VERSION_COMPUTE_LIGHT], 0); + + copy_uniforms = uniforms; //restore + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 5; + u.append_id(texture); + copy_uniforms.push_back(u); + } + mipmap.second_bounce_uniform_set = RD::get_singleton()->uniform_set_create(copy_uniforms, gi->voxel_gi_lighting_shader_version_shaders[VOXEL_GI_SHADER_VERSION_COMPUTE_SECOND_BOUNCE], 0); + } else { + mipmap.uniform_set = RD::get_singleton()->uniform_set_create(copy_uniforms, gi->voxel_gi_lighting_shader_version_shaders[VOXEL_GI_SHADER_VERSION_COMPUTE_MIPMAP], 0); + } + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 5; + u.append_id(mipmap.texture); + uniforms.push_back(u); + } + + mipmap.write_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->voxel_gi_lighting_shader_version_shaders[VOXEL_GI_SHADER_VERSION_WRITE_TEXTURE], 0); + + mipmaps.push_back(mipmap); + } + + { + uint32_t dynamic_map_size = MAX(MAX(octree_size.x, octree_size.y), octree_size.z); + uint32_t oversample = nearest_power_of_2_templated(4); + int mipmap_index = 0; + + while (mipmap_index < mipmaps.size()) { + VoxelGIInstance::DynamicMap dmap; + + if (oversample > 0) { + dmap.size = dynamic_map_size * (1 << oversample); + dmap.mipmap = -1; + oversample--; + } else { + dmap.size = dynamic_map_size >> mipmap_index; + dmap.mipmap = mipmap_index; + mipmap_index++; + } + + RD::TextureFormat dtf; + dtf.width = dmap.size; + dtf.height = dmap.size; + dtf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + dtf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT; + + if (dynamic_maps.size() == 0) { + dtf.usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + } + dmap.texture = RD::get_singleton()->texture_create(dtf, RD::TextureView()); + + if (dynamic_maps.size() == 0) { + // Render depth for first one. + // Use 16-bit depth when supported to improve performance. + dtf.format = RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_D16_UNORM, RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ? RD::DATA_FORMAT_D16_UNORM : RD::DATA_FORMAT_X8_D24_UNORM_PACK32; + dtf.usage_bits = RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + dmap.fb_depth = RD::get_singleton()->texture_create(dtf, RD::TextureView()); + } + + //just use depth as-is + dtf.format = RD::DATA_FORMAT_R32_SFLOAT; + dtf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + + dmap.depth = RD::get_singleton()->texture_create(dtf, RD::TextureView()); + + if (dynamic_maps.size() == 0) { + dtf.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; + dtf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + dmap.albedo = RD::get_singleton()->texture_create(dtf, RD::TextureView()); + dmap.normal = RD::get_singleton()->texture_create(dtf, RD::TextureView()); + dmap.orm = RD::get_singleton()->texture_create(dtf, RD::TextureView()); + + Vector fb; + fb.push_back(dmap.albedo); + fb.push_back(dmap.normal); + fb.push_back(dmap.orm); + fb.push_back(dmap.texture); //emission + fb.push_back(dmap.depth); + fb.push_back(dmap.fb_depth); + + dmap.fb = RD::get_singleton()->framebuffer_create(fb); + + { + Vector uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 3; + u.append_id(gi->voxel_gi_lights_uniform); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 5; + u.append_id(dmap.albedo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 6; + u.append_id(dmap.normal); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 7; + u.append_id(dmap.orm); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 8; + u.append_id(dmap.fb_depth); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 9; + u.append_id(gi->voxel_gi_get_sdf_texture(probe)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 10; + u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 11; + u.append_id(dmap.texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 12; + u.append_id(dmap.depth); + uniforms.push_back(u); + } + + dmap.uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->voxel_gi_lighting_shader_version_shaders[VOXEL_GI_SHADER_VERSION_DYNAMIC_OBJECT_LIGHTING], 0); + } + } else { + bool plot = dmap.mipmap >= 0; + bool write = dmap.mipmap < (mipmaps.size() - 1); + + Vector uniforms; + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 5; + u.append_id(dynamic_maps[dynamic_maps.size() - 1].texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 6; + u.append_id(dynamic_maps[dynamic_maps.size() - 1].depth); + uniforms.push_back(u); + } + + if (write) { + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 7; + u.append_id(dmap.texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 8; + u.append_id(dmap.depth); + uniforms.push_back(u); + } + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 9; + u.append_id(gi->voxel_gi_get_sdf_texture(probe)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 10; + u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + + if (plot) { + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 11; + u.append_id(mipmaps[dmap.mipmap].texture); + uniforms.push_back(u); + } + } + + dmap.uniform_set = RD::get_singleton()->uniform_set_create( + uniforms, + gi->voxel_gi_lighting_shader_version_shaders[(write && plot) ? VOXEL_GI_SHADER_VERSION_DYNAMIC_SHRINK_WRITE_PLOT : (write ? VOXEL_GI_SHADER_VERSION_DYNAMIC_SHRINK_WRITE : VOXEL_GI_SHADER_VERSION_DYNAMIC_SHRINK_PLOT)], + 0); + } + + dynamic_maps.push_back(dmap); + } + } + } + + last_probe_data_version = data_version; + p_update_light_instances = true; //just in case + + p_scene_render->_base_uniforms_changed(); + } + + // UDPDATE TIME + + if (has_dynamic_object_data) { + //if it has dynamic object data, it needs to be cleared + RD::get_singleton()->texture_clear(texture, Color(0, 0, 0, 0), 0, mipmaps.size(), 0, 1); + } + + uint32_t light_count = 0; + + if (p_update_light_instances || p_dynamic_objects.size() > 0) { + light_count = MIN(gi->voxel_gi_max_lights, (uint32_t)p_light_instances.size()); + + { + Transform3D to_cell = gi->voxel_gi_get_to_cell_xform(probe); + Transform3D to_probe_xform = (transform * to_cell.affine_inverse()).affine_inverse(); + //update lights + + for (uint32_t i = 0; i < light_count; i++) { + VoxelGILight &l = gi->voxel_gi_lights[i]; + RID light_instance = p_light_instances[i]; + RID light = p_scene_render->light_instance_get_base_light(light_instance); + + l.type = RSG::light_storage->light_get_type(light); + if (l.type == RS::LIGHT_DIRECTIONAL && RSG::light_storage->light_directional_get_sky_mode(light) == RS::LIGHT_DIRECTIONAL_SKY_MODE_SKY_ONLY) { + light_count--; + continue; + } + + l.attenuation = RSG::light_storage->light_get_param(light, RS::LIGHT_PARAM_ATTENUATION); + l.energy = RSG::light_storage->light_get_param(light, RS::LIGHT_PARAM_ENERGY) * RSG::light_storage->light_get_param(light, RS::LIGHT_PARAM_INDIRECT_ENERGY); + l.radius = to_cell.basis.xform(Vector3(RSG::light_storage->light_get_param(light, RS::LIGHT_PARAM_RANGE), 0, 0)).length(); + Color color = RSG::light_storage->light_get_color(light).srgb_to_linear(); + l.color[0] = color.r; + l.color[1] = color.g; + l.color[2] = color.b; + + l.cos_spot_angle = Math::cos(Math::deg2rad(RSG::light_storage->light_get_param(light, RS::LIGHT_PARAM_SPOT_ANGLE))); + l.inv_spot_attenuation = 1.0f / RSG::light_storage->light_get_param(light, RS::LIGHT_PARAM_SPOT_ATTENUATION); + + Transform3D xform = p_scene_render->light_instance_get_base_transform(light_instance); + + Vector3 pos = to_probe_xform.xform(xform.origin); + Vector3 dir = to_probe_xform.basis.xform(-xform.basis.get_column(2)).normalized(); + + l.position[0] = pos.x; + l.position[1] = pos.y; + l.position[2] = pos.z; + + l.direction[0] = dir.x; + l.direction[1] = dir.y; + l.direction[2] = dir.z; + + l.has_shadow = RSG::light_storage->light_has_shadow(light); + } + + RD::get_singleton()->buffer_update(gi->voxel_gi_lights_uniform, 0, sizeof(VoxelGILight) * light_count, gi->voxel_gi_lights); + } + } + + if (has_dynamic_object_data || p_update_light_instances || p_dynamic_objects.size()) { + // PROCESS MIPMAPS + if (mipmaps.size()) { + //can update mipmaps + + Vector3i probe_size = gi->voxel_gi_get_octree_size(probe); + + VoxelGIPushConstant push_constant; + + push_constant.limits[0] = probe_size.x; + push_constant.limits[1] = probe_size.y; + push_constant.limits[2] = probe_size.z; + push_constant.stack_size = mipmaps.size(); + push_constant.emission_scale = 1.0; + push_constant.propagation = gi->voxel_gi_get_propagation(probe); + push_constant.dynamic_range = gi->voxel_gi_get_dynamic_range(probe); + push_constant.light_count = light_count; + push_constant.aniso_strength = 0; + + /* print_line("probe update to version " + itos(last_probe_version)); + print_line("propagation " + rtos(push_constant.propagation)); + print_line("dynrange " + rtos(push_constant.dynamic_range)); + */ + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + + int passes; + if (p_update_light_instances) { + passes = gi->voxel_gi_is_using_two_bounces(probe) ? 2 : 1; + } else { + passes = 1; //only re-blitting is necessary + } + int wg_size = 64; + int64_t wg_limit_x = (int64_t)RD::get_singleton()->limit_get(RD::LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_X); + + for (int pass = 0; pass < passes; pass++) { + if (p_update_light_instances) { + for (int i = 0; i < mipmaps.size(); i++) { + if (i == 0) { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->voxel_gi_lighting_shader_version_pipelines[pass == 0 ? VOXEL_GI_SHADER_VERSION_COMPUTE_LIGHT : VOXEL_GI_SHADER_VERSION_COMPUTE_SECOND_BOUNCE]); + } else if (i == 1) { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->voxel_gi_lighting_shader_version_pipelines[VOXEL_GI_SHADER_VERSION_COMPUTE_MIPMAP]); + } + + if (pass == 1 || i > 0) { + RD::get_singleton()->compute_list_add_barrier(compute_list); //wait til previous step is done + } + if (pass == 0 || i > 0) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, mipmaps[i].uniform_set, 0); + } else { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, mipmaps[i].second_bounce_uniform_set, 0); + } + + push_constant.cell_offset = mipmaps[i].cell_offset; + push_constant.cell_count = mipmaps[i].cell_count; + + int64_t wg_todo = (mipmaps[i].cell_count - 1) / wg_size + 1; + while (wg_todo) { + int64_t wg_count = MIN(wg_todo, wg_limit_x); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(VoxelGIPushConstant)); + RD::get_singleton()->compute_list_dispatch(compute_list, wg_count, 1, 1); + wg_todo -= wg_count; + push_constant.cell_offset += wg_count * wg_size; + } + } + + RD::get_singleton()->compute_list_add_barrier(compute_list); //wait til previous step is done + } + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->voxel_gi_lighting_shader_version_pipelines[VOXEL_GI_SHADER_VERSION_WRITE_TEXTURE]); + + for (int i = 0; i < mipmaps.size(); i++) { + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, mipmaps[i].write_uniform_set, 0); + + push_constant.cell_offset = mipmaps[i].cell_offset; + push_constant.cell_count = mipmaps[i].cell_count; + + int64_t wg_todo = (mipmaps[i].cell_count - 1) / wg_size + 1; + while (wg_todo) { + int64_t wg_count = MIN(wg_todo, wg_limit_x); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(VoxelGIPushConstant)); + RD::get_singleton()->compute_list_dispatch(compute_list, wg_count, 1, 1); + wg_todo -= wg_count; + push_constant.cell_offset += wg_count * wg_size; + } + } + } + + RD::get_singleton()->compute_list_end(); + } + } + + has_dynamic_object_data = false; //clear until dynamic object data is used again + + if (p_dynamic_objects.size() && dynamic_maps.size()) { + Vector3i octree_size = gi->voxel_gi_get_octree_size(probe); + int multiplier = dynamic_maps[0].size / MAX(MAX(octree_size.x, octree_size.y), octree_size.z); + + Transform3D oversample_scale; + oversample_scale.basis.scale(Vector3(multiplier, multiplier, multiplier)); + + Transform3D to_cell = oversample_scale * gi->voxel_gi_get_to_cell_xform(probe); + Transform3D to_world_xform = transform * to_cell.affine_inverse(); + Transform3D to_probe_xform = to_world_xform.affine_inverse(); + + AABB probe_aabb(Vector3(), octree_size); + + //this could probably be better parallelized in compute.. + for (int i = 0; i < (int)p_dynamic_objects.size(); i++) { + RendererSceneRender::GeometryInstance *instance = p_dynamic_objects[i]; + + //transform aabb to voxel_gi + AABB aabb = (to_probe_xform * p_scene_render->geometry_instance_get_transform(instance)).xform(p_scene_render->geometry_instance_get_aabb(instance)); + + //this needs to wrap to grid resolution to avoid jitter + //also extend margin a bit just in case + Vector3i begin = aabb.position - Vector3i(1, 1, 1); + Vector3i end = aabb.position + aabb.size + Vector3i(1, 1, 1); + + for (int j = 0; j < 3; j++) { + if ((end[j] - begin[j]) & 1) { + end[j]++; //for half extents split, it needs to be even + } + begin[j] = MAX(begin[j], 0); + end[j] = MIN(end[j], octree_size[j] * multiplier); + } + + //aabb = aabb.intersection(probe_aabb); //intersect + aabb.position = begin; + aabb.size = end - begin; + + //print_line("aabb: " + aabb); + + for (int j = 0; j < 6; j++) { + //if (j != 0 && j != 3) { + // continue; + //} + static const Vector3 render_z[6] = { + Vector3(1, 0, 0), + Vector3(0, 1, 0), + Vector3(0, 0, 1), + Vector3(-1, 0, 0), + Vector3(0, -1, 0), + Vector3(0, 0, -1), + }; + static const Vector3 render_up[6] = { + Vector3(0, 1, 0), + Vector3(0, 0, 1), + Vector3(0, 1, 0), + Vector3(0, 1, 0), + Vector3(0, 0, 1), + Vector3(0, 1, 0), + }; + + Vector3 render_dir = render_z[j]; + Vector3 up_dir = render_up[j]; + + Vector3 center = aabb.get_center(); + Transform3D xform; + xform.set_look_at(center - aabb.size * 0.5 * render_dir, center, up_dir); + + Vector3 x_dir = xform.basis.get_column(0).abs(); + int x_axis = int(Vector3(0, 1, 2).dot(x_dir)); + Vector3 y_dir = xform.basis.get_column(1).abs(); + int y_axis = int(Vector3(0, 1, 2).dot(y_dir)); + Vector3 z_dir = -xform.basis.get_column(2); + int z_axis = int(Vector3(0, 1, 2).dot(z_dir.abs())); + + Rect2i rect(aabb.position[x_axis], aabb.position[y_axis], aabb.size[x_axis], aabb.size[y_axis]); + bool x_flip = bool(Vector3(1, 1, 1).dot(xform.basis.get_column(0)) < 0); + bool y_flip = bool(Vector3(1, 1, 1).dot(xform.basis.get_column(1)) < 0); + bool z_flip = bool(Vector3(1, 1, 1).dot(xform.basis.get_column(2)) > 0); + + CameraMatrix cm; + cm.set_orthogonal(-rect.size.width / 2, rect.size.width / 2, -rect.size.height / 2, rect.size.height / 2, 0.0001, aabb.size[z_axis]); + + if (p_scene_render->cull_argument.size() == 0) { + p_scene_render->cull_argument.push_back(nullptr); + } + p_scene_render->cull_argument[0] = instance; + + p_scene_render->_render_material(to_world_xform * xform, cm, true, p_scene_render->cull_argument, dynamic_maps[0].fb, Rect2i(Vector2i(), rect.size)); + + VoxelGIDynamicPushConstant push_constant; + memset(&push_constant, 0, sizeof(VoxelGIDynamicPushConstant)); + push_constant.limits[0] = octree_size.x; + push_constant.limits[1] = octree_size.y; + push_constant.limits[2] = octree_size.z; + push_constant.light_count = p_light_instances.size(); + push_constant.x_dir[0] = x_dir[0]; + push_constant.x_dir[1] = x_dir[1]; + push_constant.x_dir[2] = x_dir[2]; + push_constant.y_dir[0] = y_dir[0]; + push_constant.y_dir[1] = y_dir[1]; + push_constant.y_dir[2] = y_dir[2]; + push_constant.z_dir[0] = z_dir[0]; + push_constant.z_dir[1] = z_dir[1]; + push_constant.z_dir[2] = z_dir[2]; + push_constant.z_base = xform.origin[z_axis]; + push_constant.z_sign = (z_flip ? -1.0 : 1.0); + push_constant.pos_multiplier = float(1.0) / multiplier; + push_constant.dynamic_range = gi->voxel_gi_get_dynamic_range(probe); + push_constant.flip_x = x_flip; + push_constant.flip_y = y_flip; + push_constant.rect_pos[0] = rect.position[0]; + push_constant.rect_pos[1] = rect.position[1]; + push_constant.rect_size[0] = rect.size[0]; + push_constant.rect_size[1] = rect.size[1]; + push_constant.prev_rect_ofs[0] = 0; + push_constant.prev_rect_ofs[1] = 0; + push_constant.prev_rect_size[0] = 0; + push_constant.prev_rect_size[1] = 0; + push_constant.on_mipmap = false; + push_constant.propagation = gi->voxel_gi_get_propagation(probe); + push_constant.pad[0] = 0; + push_constant.pad[1] = 0; + push_constant.pad[2] = 0; + + //process lighting + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->voxel_gi_lighting_shader_version_pipelines[VOXEL_GI_SHADER_VERSION_DYNAMIC_OBJECT_LIGHTING]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, dynamic_maps[0].uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(VoxelGIDynamicPushConstant)); + RD::get_singleton()->compute_list_dispatch(compute_list, (rect.size.x - 1) / 8 + 1, (rect.size.y - 1) / 8 + 1, 1); + //print_line("rect: " + itos(i) + ": " + rect); + + for (int k = 1; k < dynamic_maps.size(); k++) { + // enlarge the rect if needed so all pixels fit when downscaled, + // this ensures downsampling is smooth and optimal because no pixels are left behind + + //x + if (rect.position.x & 1) { + rect.size.x++; + push_constant.prev_rect_ofs[0] = 1; //this is used to ensure reading is also optimal + } else { + push_constant.prev_rect_ofs[0] = 0; + } + if (rect.size.x & 1) { + rect.size.x++; + } + + rect.position.x >>= 1; + rect.size.x = MAX(1, rect.size.x >> 1); + + //y + if (rect.position.y & 1) { + rect.size.y++; + push_constant.prev_rect_ofs[1] = 1; + } else { + push_constant.prev_rect_ofs[1] = 0; + } + if (rect.size.y & 1) { + rect.size.y++; + } + + rect.position.y >>= 1; + rect.size.y = MAX(1, rect.size.y >> 1); + + //shrink limits to ensure plot does not go outside map + if (dynamic_maps[k].mipmap > 0) { + for (int l = 0; l < 3; l++) { + push_constant.limits[l] = MAX(1, push_constant.limits[l] >> 1); + } + } + + //print_line("rect: " + itos(i) + ": " + rect); + push_constant.rect_pos[0] = rect.position[0]; + push_constant.rect_pos[1] = rect.position[1]; + push_constant.prev_rect_size[0] = push_constant.rect_size[0]; + push_constant.prev_rect_size[1] = push_constant.rect_size[1]; + push_constant.rect_size[0] = rect.size[0]; + push_constant.rect_size[1] = rect.size[1]; + push_constant.on_mipmap = dynamic_maps[k].mipmap > 0; + + RD::get_singleton()->compute_list_add_barrier(compute_list); + + if (dynamic_maps[k].mipmap < 0) { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->voxel_gi_lighting_shader_version_pipelines[VOXEL_GI_SHADER_VERSION_DYNAMIC_SHRINK_WRITE]); + } else if (k < dynamic_maps.size() - 1) { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->voxel_gi_lighting_shader_version_pipelines[VOXEL_GI_SHADER_VERSION_DYNAMIC_SHRINK_WRITE_PLOT]); + } else { + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->voxel_gi_lighting_shader_version_pipelines[VOXEL_GI_SHADER_VERSION_DYNAMIC_SHRINK_PLOT]); + } + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, dynamic_maps[k].uniform_set, 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(VoxelGIDynamicPushConstant)); + RD::get_singleton()->compute_list_dispatch(compute_list, (rect.size.x - 1) / 8 + 1, (rect.size.y - 1) / 8 + 1, 1); + } + + RD::get_singleton()->compute_list_end(); + } + } + + has_dynamic_object_data = true; //clear until dynamic object data is used again + } + + last_probe_version = gi->voxel_gi_get_version(probe); +} + +void GI::VoxelGIInstance::debug(RD::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform, bool p_lighting, bool p_emission, float p_alpha) { + RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); + + if (mipmaps.size() == 0) { + return; + } + + CameraMatrix cam_transform = (p_camera_with_transform * CameraMatrix(transform)) * CameraMatrix(gi->voxel_gi_get_to_cell_xform(probe).affine_inverse()); + + int level = 0; + Vector3i octree_size = gi->voxel_gi_get_octree_size(probe); + + VoxelGIDebugPushConstant push_constant; + push_constant.alpha = p_alpha; + push_constant.dynamic_range = gi->voxel_gi_get_dynamic_range(probe); + push_constant.cell_offset = mipmaps[level].cell_offset; + push_constant.level = level; + + push_constant.bounds[0] = octree_size.x >> level; + push_constant.bounds[1] = octree_size.y >> level; + push_constant.bounds[2] = octree_size.z >> level; + push_constant.pad = 0; + + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + push_constant.projection[i * 4 + j] = cam_transform.matrix[i][j]; + } + } + + if (gi->voxel_gi_debug_uniform_set.is_valid()) { + RD::get_singleton()->free(gi->voxel_gi_debug_uniform_set); + } + Vector uniforms; + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 1; + u.append_id(gi->voxel_gi_get_data_buffer(probe)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 2; + u.append_id(texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 3; + u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + + int cell_count; + if (!p_emission && p_lighting && has_dynamic_object_data) { + cell_count = push_constant.bounds[0] * push_constant.bounds[1] * push_constant.bounds[2]; + } else { + cell_count = mipmaps[level].cell_count; + } + + gi->voxel_gi_debug_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->voxel_gi_debug_shader_version_shaders[0], 0); + + int voxel_gi_debug_pipeline = VOXEL_GI_DEBUG_COLOR; + if (p_emission) { + voxel_gi_debug_pipeline = VOXEL_GI_DEBUG_EMISSION; + } else if (p_lighting) { + voxel_gi_debug_pipeline = has_dynamic_object_data ? VOXEL_GI_DEBUG_LIGHT_FULL : VOXEL_GI_DEBUG_LIGHT; + } + RD::get_singleton()->draw_list_bind_render_pipeline( + p_draw_list, + gi->voxel_gi_debug_shader_version_pipelines[voxel_gi_debug_pipeline].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_framebuffer))); + RD::get_singleton()->draw_list_bind_uniform_set(p_draw_list, gi->voxel_gi_debug_uniform_set, 0); + RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(VoxelGIDebugPushConstant)); + RD::get_singleton()->draw_list_draw(p_draw_list, false, cell_count, 36); +} + +//////////////////////////////////////////////////////////////////////////////// +// GI + +GI::GI() { + singleton = this; + + sdfgi_ray_count = RS::EnvironmentSDFGIRayCount(CLAMP(int32_t(GLOBAL_GET("rendering/global_illumination/sdfgi/probe_ray_count")), 0, int32_t(RS::ENV_SDFGI_RAY_COUNT_MAX - 1))); + sdfgi_frames_to_converge = RS::EnvironmentSDFGIFramesToConverge(CLAMP(int32_t(GLOBAL_GET("rendering/global_illumination/sdfgi/frames_to_converge")), 0, int32_t(RS::ENV_SDFGI_CONVERGE_MAX - 1))); + sdfgi_frames_to_update_light = RS::EnvironmentSDFGIFramesToUpdateLight(CLAMP(int32_t(GLOBAL_GET("rendering/global_illumination/sdfgi/frames_to_update_lights")), 0, int32_t(RS::ENV_SDFGI_UPDATE_LIGHT_MAX - 1))); +} + +GI::~GI() { + singleton = nullptr; +} + +void GI::init(RendererStorageRD *p_storage, RendererSceneSkyRD *p_sky) { + RendererRD::TextureStorage *texture_storage = RendererRD::TextureStorage::get_singleton(); + RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); + + storage = p_storage; + + /* GI */ + + { + //kinda complicated to compute the amount of slots, we try to use as many as we can + + voxel_gi_lights = memnew_arr(VoxelGILight, voxel_gi_max_lights); + voxel_gi_lights_uniform = RD::get_singleton()->uniform_buffer_create(voxel_gi_max_lights * sizeof(VoxelGILight)); + voxel_gi_quality = RS::VoxelGIQuality(CLAMP(int(GLOBAL_GET("rendering/global_illumination/voxel_gi/quality")), 0, 1)); + + String defines = "\n#define MAX_LIGHTS " + itos(voxel_gi_max_lights) + "\n"; + + Vector versions; + versions.push_back("\n#define MODE_COMPUTE_LIGHT\n"); + versions.push_back("\n#define MODE_SECOND_BOUNCE\n"); + versions.push_back("\n#define MODE_UPDATE_MIPMAPS\n"); + versions.push_back("\n#define MODE_WRITE_TEXTURE\n"); + versions.push_back("\n#define MODE_DYNAMIC\n#define MODE_DYNAMIC_LIGHTING\n"); + versions.push_back("\n#define MODE_DYNAMIC\n#define MODE_DYNAMIC_SHRINK\n#define MODE_DYNAMIC_SHRINK_WRITE\n"); + versions.push_back("\n#define MODE_DYNAMIC\n#define MODE_DYNAMIC_SHRINK\n#define MODE_DYNAMIC_SHRINK_PLOT\n"); + versions.push_back("\n#define MODE_DYNAMIC\n#define MODE_DYNAMIC_SHRINK\n#define MODE_DYNAMIC_SHRINK_PLOT\n#define MODE_DYNAMIC_SHRINK_WRITE\n"); + + voxel_gi_shader.initialize(versions, defines); + voxel_gi_lighting_shader_version = voxel_gi_shader.version_create(); + for (int i = 0; i < VOXEL_GI_SHADER_VERSION_MAX; i++) { + voxel_gi_lighting_shader_version_shaders[i] = voxel_gi_shader.version_get_shader(voxel_gi_lighting_shader_version, i); + voxel_gi_lighting_shader_version_pipelines[i] = RD::get_singleton()->compute_pipeline_create(voxel_gi_lighting_shader_version_shaders[i]); + } + } + + { + String defines; + Vector versions; + versions.push_back("\n#define MODE_DEBUG_COLOR\n"); + versions.push_back("\n#define MODE_DEBUG_LIGHT\n"); + versions.push_back("\n#define MODE_DEBUG_EMISSION\n"); + versions.push_back("\n#define MODE_DEBUG_LIGHT\n#define MODE_DEBUG_LIGHT_FULL\n"); + + voxel_gi_debug_shader.initialize(versions, defines); + voxel_gi_debug_shader_version = voxel_gi_debug_shader.version_create(); + for (int i = 0; i < VOXEL_GI_DEBUG_MAX; i++) { + voxel_gi_debug_shader_version_shaders[i] = voxel_gi_debug_shader.version_get_shader(voxel_gi_debug_shader_version, i); + + RD::PipelineRasterizationState rs; + rs.cull_mode = RD::POLYGON_CULL_FRONT; + RD::PipelineDepthStencilState ds; + ds.enable_depth_test = true; + ds.enable_depth_write = true; + ds.depth_compare_operator = RD::COMPARE_OP_LESS_OR_EQUAL; + + voxel_gi_debug_shader_version_pipelines[i].setup(voxel_gi_debug_shader_version_shaders[i], RD::RENDER_PRIMITIVE_TRIANGLES, rs, RD::PipelineMultisampleState(), ds, RD::PipelineColorBlendState::create_disabled(), 0); + } + } + + /* SDGFI */ + + { + Vector preprocess_modes; + preprocess_modes.push_back("\n#define MODE_SCROLL\n"); + preprocess_modes.push_back("\n#define MODE_SCROLL_OCCLUSION\n"); + preprocess_modes.push_back("\n#define MODE_INITIALIZE_JUMP_FLOOD\n"); + preprocess_modes.push_back("\n#define MODE_INITIALIZE_JUMP_FLOOD_HALF\n"); + preprocess_modes.push_back("\n#define MODE_JUMPFLOOD\n"); + preprocess_modes.push_back("\n#define MODE_JUMPFLOOD_OPTIMIZED\n"); + preprocess_modes.push_back("\n#define MODE_UPSCALE_JUMP_FLOOD\n"); + preprocess_modes.push_back("\n#define MODE_OCCLUSION\n"); + preprocess_modes.push_back("\n#define MODE_STORE\n"); + String defines = "\n#define OCCLUSION_SIZE " + itos(SDFGI::CASCADE_SIZE / SDFGI::PROBE_DIVISOR) + "\n"; + sdfgi_shader.preprocess.initialize(preprocess_modes, defines); + sdfgi_shader.preprocess_shader = sdfgi_shader.preprocess.version_create(); + for (int i = 0; i < SDFGIShader::PRE_PROCESS_MAX; i++) { + sdfgi_shader.preprocess_pipeline[i] = RD::get_singleton()->compute_pipeline_create(sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, i)); + } + } + + { + //calculate tables + String defines = "\n#define OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; + + Vector direct_light_modes; + direct_light_modes.push_back("\n#define MODE_PROCESS_STATIC\n"); + direct_light_modes.push_back("\n#define MODE_PROCESS_DYNAMIC\n"); + sdfgi_shader.direct_light.initialize(direct_light_modes, defines); + sdfgi_shader.direct_light_shader = sdfgi_shader.direct_light.version_create(); + for (int i = 0; i < SDFGIShader::DIRECT_LIGHT_MODE_MAX; i++) { + sdfgi_shader.direct_light_pipeline[i] = RD::get_singleton()->compute_pipeline_create(sdfgi_shader.direct_light.version_get_shader(sdfgi_shader.direct_light_shader, i)); + } + } + + { + //calculate tables + String defines = "\n#define OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; + defines += "\n#define SH_SIZE " + itos(SDFGI::SH_SIZE) + "\n"; + if (p_sky->sky_use_cubemap_array) { + defines += "\n#define USE_CUBEMAP_ARRAY\n"; + } + + Vector integrate_modes; + integrate_modes.push_back("\n#define MODE_PROCESS\n"); + integrate_modes.push_back("\n#define MODE_STORE\n"); + integrate_modes.push_back("\n#define MODE_SCROLL\n"); + integrate_modes.push_back("\n#define MODE_SCROLL_STORE\n"); + sdfgi_shader.integrate.initialize(integrate_modes, defines); + sdfgi_shader.integrate_shader = sdfgi_shader.integrate.version_create(); + + for (int i = 0; i < SDFGIShader::INTEGRATE_MODE_MAX; i++) { + sdfgi_shader.integrate_pipeline[i] = RD::get_singleton()->compute_pipeline_create(sdfgi_shader.integrate.version_get_shader(sdfgi_shader.integrate_shader, i)); + } + + { + Vector uniforms; + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 0; + u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_CUBEMAP_WHITE)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 1; + u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + + sdfgi_shader.integrate_default_sky_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.integrate.version_get_shader(sdfgi_shader.integrate_shader, 0), 1); + } + } + + //GK + { + //calculate tables + String defines = "\n#define SDFGI_OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; + Vector gi_modes; + gi_modes.push_back("\n#define USE_VOXEL_GI_INSTANCES\n"); // MODE_VOXEL_GI + gi_modes.push_back("\n#define USE_SDFGI\n"); // MODE_SDFGI + gi_modes.push_back("\n#define USE_SDFGI\n\n#define USE_VOXEL_GI_INSTANCES\n"); // MODE_COMBINED + gi_modes.push_back("\n#define MODE_HALF_RES\n#define USE_VOXEL_GI_INSTANCES\n"); // MODE_HALF_RES_VOXEL_GI + gi_modes.push_back("\n#define MODE_HALF_RES\n#define USE_SDFGI\n"); // MODE_HALF_RES_SDFGI + gi_modes.push_back("\n#define MODE_HALF_RES\n#define USE_SDFGI\n\n#define USE_VOXEL_GI_INSTANCES\n"); // MODE_HALF_RES_COMBINED + + gi_modes.push_back("\n#define USE_VOXEL_GI_INSTANCES\n#define USE_MULTIVIEW\n"); // MODE_VOXEL_GI_MULTIVIEW + gi_modes.push_back("\n#define USE_SDFGI\n#define USE_MULTIVIEW\n"); // MODE_SDFGI_MULTIVIEW + gi_modes.push_back("\n#define USE_SDFGI\n\n#define USE_VOXEL_GI_INSTANCES\n#define USE_MULTIVIEW\n"); // MODE_COMBINED_MULTIVIEW + gi_modes.push_back("\n#define MODE_HALF_RES\n#define USE_VOXEL_GI_INSTANCES\n#define USE_MULTIVIEW\n"); // MODE_HALF_RES_VOXEL_GI_MULTIVIEW + gi_modes.push_back("\n#define MODE_HALF_RES\n#define USE_SDFGI\n#define USE_MULTIVIEW\n"); // MODE_HALF_RES_SDFGI_MULTIVIEW + gi_modes.push_back("\n#define MODE_HALF_RES\n#define USE_SDFGI\n\n#define USE_VOXEL_GI_INSTANCES\n#define USE_MULTIVIEW\n"); // MODE_HALF_RES_COMBINED_MULTIVIEW + + shader.initialize(gi_modes, defines); + + if (!RendererCompositorRD::singleton->is_xr_enabled()) { + shader.set_variant_enabled(MODE_VOXEL_GI_MULTIVIEW, false); + shader.set_variant_enabled(MODE_SDFGI_MULTIVIEW, false); + shader.set_variant_enabled(MODE_COMBINED_MULTIVIEW, false); + shader.set_variant_enabled(MODE_HALF_RES_VOXEL_GI_MULTIVIEW, false); + shader.set_variant_enabled(MODE_HALF_RES_SDFGI_MULTIVIEW, false); + shader.set_variant_enabled(MODE_HALF_RES_COMBINED_MULTIVIEW, false); + } + + shader_version = shader.version_create(); + for (int i = 0; i < MODE_MAX; i++) { + if (shader.is_variant_enabled(i)) { + pipelines[i] = RD::get_singleton()->compute_pipeline_create(shader.version_get_shader(shader_version, i)); + } else { + pipelines[i] = RID(); + } + } + + sdfgi_ubo = RD::get_singleton()->uniform_buffer_create(sizeof(SDFGIData)); + } + { + String defines = "\n#define OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; + Vector debug_modes; + debug_modes.push_back(""); + sdfgi_shader.debug.initialize(debug_modes, defines); + sdfgi_shader.debug_shader = sdfgi_shader.debug.version_create(); + sdfgi_shader.debug_shader_version = sdfgi_shader.debug.version_get_shader(sdfgi_shader.debug_shader, 0); + sdfgi_shader.debug_pipeline = RD::get_singleton()->compute_pipeline_create(sdfgi_shader.debug_shader_version); + } + { + String defines = "\n#define OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; + + Vector versions; + versions.push_back("\n#define MODE_PROBES\n"); + versions.push_back("\n#define MODE_PROBES\n#define USE_MULTIVIEW\n"); + versions.push_back("\n#define MODE_VISIBILITY\n"); + versions.push_back("\n#define MODE_VISIBILITY\n#define USE_MULTIVIEW\n"); + + sdfgi_shader.debug_probes.initialize(versions, defines); + + // TODO disable multiview versions if turned off + + sdfgi_shader.debug_probes_shader = sdfgi_shader.debug_probes.version_create(); + + { + RD::PipelineRasterizationState rs; + rs.cull_mode = RD::POLYGON_CULL_DISABLED; + RD::PipelineDepthStencilState ds; + ds.enable_depth_test = true; + ds.enable_depth_write = true; + ds.depth_compare_operator = RD::COMPARE_OP_LESS_OR_EQUAL; + for (int i = 0; i < SDFGIShader::PROBE_DEBUG_MAX; i++) { + // TODO check if version is enabled + + RID debug_probes_shader_version = sdfgi_shader.debug_probes.version_get_shader(sdfgi_shader.debug_probes_shader, i); + sdfgi_shader.debug_probes_pipeline[i].setup(debug_probes_shader_version, RD::RENDER_PRIMITIVE_TRIANGLE_STRIPS, rs, RD::PipelineMultisampleState(), ds, RD::PipelineColorBlendState::create_disabled(), 0); + } + } + } + default_voxel_gi_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(VoxelGIData) * MAX_VOXEL_GI_INSTANCES); + half_resolution = GLOBAL_GET("rendering/global_illumination/gi/use_half_resolution"); +} + +void GI::free() { + RD::get_singleton()->free(default_voxel_gi_buffer); + RD::get_singleton()->free(voxel_gi_lights_uniform); + RD::get_singleton()->free(sdfgi_ubo); + + voxel_gi_debug_shader.version_free(voxel_gi_debug_shader_version); + voxel_gi_shader.version_free(voxel_gi_lighting_shader_version); + shader.version_free(shader_version); + sdfgi_shader.debug_probes.version_free(sdfgi_shader.debug_probes_shader); + sdfgi_shader.debug.version_free(sdfgi_shader.debug_shader); + sdfgi_shader.direct_light.version_free(sdfgi_shader.direct_light_shader); + sdfgi_shader.integrate.version_free(sdfgi_shader.integrate_shader); + sdfgi_shader.preprocess.version_free(sdfgi_shader.preprocess_shader); + + if (voxel_gi_lights) { + memdelete_arr(voxel_gi_lights); + } +} + +GI::SDFGI *GI::create_sdfgi(RendererSceneEnvironmentRD *p_env, const Vector3 &p_world_position, uint32_t p_requested_history_size) { + SDFGI *sdfgi = memnew(SDFGI); + + sdfgi->create(p_env, p_world_position, p_requested_history_size, this); + + return sdfgi; +} + +void GI::setup_voxel_gi_instances(RID p_render_buffers, const Transform3D &p_transform, const PagedArray &p_voxel_gi_instances, uint32_t &r_voxel_gi_instances_used, RendererSceneRenderRD *p_scene_render) { + RendererRD::TextureStorage *texture_storage = RendererRD::TextureStorage::get_singleton(); + + r_voxel_gi_instances_used = 0; + + // feels a little dirty to use our container this way but.... + RendererSceneRenderRD::RenderBuffers *rb = p_scene_render->render_buffers_owner.get_or_null(p_render_buffers); + ERR_FAIL_COND(rb == nullptr); + + RID voxel_gi_buffer = p_scene_render->render_buffers_get_voxel_gi_buffer(p_render_buffers); + + VoxelGIData voxel_gi_data[MAX_VOXEL_GI_INSTANCES]; + + bool voxel_gi_instances_changed = false; + + Transform3D to_camera; + to_camera.origin = p_transform.origin; //only translation, make local + + for (int i = 0; i < MAX_VOXEL_GI_INSTANCES; i++) { + RID texture; + if (i < (int)p_voxel_gi_instances.size()) { + VoxelGIInstance *gipi = get_probe_instance(p_voxel_gi_instances[i]); + + if (gipi) { + texture = gipi->texture; + VoxelGIData &gipd = voxel_gi_data[i]; + + RID base_probe = gipi->probe; + + Transform3D to_cell = voxel_gi_get_to_cell_xform(gipi->probe) * gipi->transform.affine_inverse() * to_camera; + + gipd.xform[0] = to_cell.basis.rows[0][0]; + gipd.xform[1] = to_cell.basis.rows[1][0]; + gipd.xform[2] = to_cell.basis.rows[2][0]; + gipd.xform[3] = 0; + gipd.xform[4] = to_cell.basis.rows[0][1]; + gipd.xform[5] = to_cell.basis.rows[1][1]; + gipd.xform[6] = to_cell.basis.rows[2][1]; + gipd.xform[7] = 0; + gipd.xform[8] = to_cell.basis.rows[0][2]; + gipd.xform[9] = to_cell.basis.rows[1][2]; + gipd.xform[10] = to_cell.basis.rows[2][2]; + gipd.xform[11] = 0; + gipd.xform[12] = to_cell.origin.x; + gipd.xform[13] = to_cell.origin.y; + gipd.xform[14] = to_cell.origin.z; + gipd.xform[15] = 1; + + Vector3 bounds = voxel_gi_get_octree_size(base_probe); + + gipd.bounds[0] = bounds.x; + gipd.bounds[1] = bounds.y; + gipd.bounds[2] = bounds.z; + + gipd.dynamic_range = voxel_gi_get_dynamic_range(base_probe) * voxel_gi_get_energy(base_probe); + gipd.bias = voxel_gi_get_bias(base_probe); + gipd.normal_bias = voxel_gi_get_normal_bias(base_probe); + gipd.blend_ambient = !voxel_gi_is_interior(base_probe); + gipd.mipmaps = gipi->mipmaps.size(); + } + + r_voxel_gi_instances_used++; + } + + if (texture == RID()) { + texture = texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE); + } + + if (texture != rb->rbgi.voxel_gi_textures[i]) { + voxel_gi_instances_changed = true; + rb->rbgi.voxel_gi_textures[i] = texture; + } + } + + if (voxel_gi_instances_changed) { + for (uint32_t v = 0; v < RendererSceneRender::MAX_RENDER_VIEWS; v++) { + if (RD::get_singleton()->uniform_set_is_valid(rb->rbgi.uniform_set[v])) { + RD::get_singleton()->free(rb->rbgi.uniform_set[v]); + } + rb->rbgi.uniform_set[v] = RID(); + } + if (rb->volumetric_fog) { + if (RD::get_singleton()->uniform_set_is_valid(rb->volumetric_fog->fog_uniform_set)) { + RD::get_singleton()->free(rb->volumetric_fog->fog_uniform_set); + RD::get_singleton()->free(rb->volumetric_fog->process_uniform_set); + RD::get_singleton()->free(rb->volumetric_fog->process_uniform_set2); + } + rb->volumetric_fog->fog_uniform_set = RID(); + rb->volumetric_fog->process_uniform_set = RID(); + rb->volumetric_fog->process_uniform_set2 = RID(); + } + } + + if (p_voxel_gi_instances.size() > 0) { + RD::get_singleton()->draw_command_begin_label("VoxelGIs Setup"); + + RD::get_singleton()->buffer_update(voxel_gi_buffer, 0, sizeof(VoxelGIData) * MIN((uint64_t)MAX_VOXEL_GI_INSTANCES, p_voxel_gi_instances.size()), voxel_gi_data, RD::BARRIER_MASK_COMPUTE); + + RD::get_singleton()->draw_command_end_label(); + } +} + +void GI::RenderBuffersGI::free() { + for (uint32_t v = 0; v < RendererSceneRender::MAX_RENDER_VIEWS; v++) { + if (RD::get_singleton()->uniform_set_is_valid(uniform_set[v])) { + RD::get_singleton()->free(uniform_set[v]); + } + uniform_set[v] = RID(); + } + + if (scene_data_ubo.is_valid()) { + RD::get_singleton()->free(scene_data_ubo); + scene_data_ubo = RID(); + } + + if (ambient_buffer.is_valid()) { + if (view_count == 1) { + // Only one view? then these are copies of our main buffers. + ambient_view[0] = RID(); + reflection_view[0] = RID(); + } else { + // Multiple views? free our slices. + for (uint32_t v = 0; v < view_count; v++) { + RD::get_singleton()->free(ambient_view[v]); + RD::get_singleton()->free(reflection_view[v]); + ambient_view[v] = RID(); + reflection_view[v] = RID(); + } + } + + // Now we can free our buffers. + RD::get_singleton()->free(ambient_buffer); + RD::get_singleton()->free(reflection_buffer); + ambient_buffer = RID(); + reflection_buffer = RID(); + view_count = 0; + } + + if (voxel_gi_buffer.is_valid()) { + RD::get_singleton()->free(voxel_gi_buffer); + voxel_gi_buffer = RID(); + } +} + +void GI::process_gi(RID p_render_buffers, RID *p_normal_roughness_views, RID p_voxel_gi_buffer, RID p_environment, uint32_t p_view_count, const CameraMatrix *p_projections, const Vector3 *p_eye_offsets, const Transform3D &p_cam_transform, const PagedArray &p_voxel_gi_instances, RendererSceneRenderRD *p_scene_render) { + RendererRD::TextureStorage *texture_storage = RendererRD::TextureStorage::get_singleton(); + RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); + + ERR_FAIL_COND_MSG(p_view_count > 2, "Maximum of 2 views supported for Processing GI."); + + RD::get_singleton()->draw_command_begin_label("GI Render"); + + RendererSceneRenderRD::RenderBuffers *rb = p_scene_render->render_buffers_owner.get_or_null(p_render_buffers); + ERR_FAIL_COND(rb == nullptr); + + if (rb->rbgi.ambient_buffer.is_null() || rb->rbgi.using_half_size_gi != half_resolution || rb->rbgi.view_count != p_view_count) { + // Free our old buffer if applicable + if (rb->rbgi.ambient_buffer.is_valid()) { + if (rb->rbgi.view_count > 1) { + for (uint32_t v = 0; v < rb->rbgi.view_count; v++) { + RD::get_singleton()->free(rb->rbgi.ambient_view[v]); + RD::get_singleton()->free(rb->rbgi.reflection_view[v]); + } + } + RD::get_singleton()->free(rb->rbgi.ambient_buffer); + RD::get_singleton()->free(rb->rbgi.reflection_buffer); + } + + print_line("Allocating GI buffers"); // TESTING REMOVE BEFORE MERGING + + // Remember the view count we're using + rb->rbgi.view_count = p_view_count; + + // Create textures for our ambient and reflection data + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + tf.width = rb->internal_width; + tf.height = rb->internal_height; + if (half_resolution) { + tf.width >>= 1; + tf.height >>= 1; + } + if (p_view_count > 1) { + tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + tf.array_layers = p_view_count; + } else { + tf.texture_type = RD::TEXTURE_TYPE_2D; + tf.array_layers = 1; + } + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + rb->rbgi.ambient_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->rbgi.reflection_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->rbgi.using_half_size_gi = half_resolution; + + if (p_view_count == 1) { + // Just one view? Copy our buffers + rb->rbgi.ambient_view[0] = rb->rbgi.ambient_buffer; + rb->rbgi.reflection_view[0] = rb->rbgi.reflection_buffer; + } else { + // More then one view? Create slices for each view + for (uint32_t v = 0; v < p_view_count; v++) { + rb->rbgi.ambient_view[v] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->rbgi.ambient_buffer, v, 0); + rb->rbgi.reflection_view[v] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->rbgi.reflection_buffer, v, 0); + } + } + } + + // Setup our scene data + { + SceneData scene_data; + + if (rb->rbgi.scene_data_ubo.is_null()) { + rb->rbgi.scene_data_ubo = RD::get_singleton()->uniform_buffer_create(sizeof(SceneData)); + } + + for (uint32_t v = 0; v < p_view_count; v++) { + RendererStorageRD::store_camera(p_projections[v].inverse(), scene_data.inv_projection[v]); + scene_data.eye_offset[v][0] = p_eye_offsets[v].x; + scene_data.eye_offset[v][1] = p_eye_offsets[v].y; + scene_data.eye_offset[v][2] = p_eye_offsets[v].z; + scene_data.eye_offset[v][3] = 0.0; + } + + // Note that we will be ignoring the origin of this transform. + RendererStorageRD::store_transform(p_cam_transform, scene_data.cam_transform); + + scene_data.screen_size[0] = rb->internal_width; + scene_data.screen_size[1] = rb->internal_height; + + RD::get_singleton()->buffer_update(rb->rbgi.scene_data_ubo, 0, sizeof(SceneData), &scene_data, RD::BARRIER_MASK_COMPUTE); + } + + // Now compute the contents of our buffers. + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(true); + + for (uint32_t v = 0; v < p_view_count; v++) { + // Render each eye seperately. + // We need to look into whether we can make our compute shader use Multiview but not sure that works or makes a difference.. + + // setup our push constant + + PushConstant push_constant; + + push_constant.view_index = v; + push_constant.orthogonal = p_projections[v].is_orthogonal(); + push_constant.max_voxel_gi_instances = MIN((uint64_t)MAX_VOXEL_GI_INSTANCES, p_voxel_gi_instances.size()); + push_constant.high_quality_vct = voxel_gi_quality == RS::VOXEL_GI_QUALITY_HIGH; + + push_constant.z_near = p_projections[v].get_z_near(); + push_constant.z_far = p_projections[v].get_z_far(); + + push_constant.proj_info[0] = -2.0f / (rb->internal_width * p_projections[v].matrix[0][0]); + push_constant.proj_info[1] = -2.0f / (rb->internal_height * p_projections[v].matrix[1][1]); + push_constant.proj_info[2] = (1.0f - p_projections[v].matrix[0][2]) / p_projections[v].matrix[0][0]; + push_constant.proj_info[3] = (1.0f + p_projections[v].matrix[1][2]) / p_projections[v].matrix[1][1]; + + bool use_sdfgi = rb->sdfgi != nullptr; + bool use_voxel_gi_instances = push_constant.max_voxel_gi_instances > 0; + + // setup our uniform set + if (rb->rbgi.uniform_set[v].is_null() || !RD::get_singleton()->uniform_set_is_valid(rb->rbgi.uniform_set[v])) { + Vector uniforms; + { + RD::Uniform u; + u.binding = 1; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { + if (rb->sdfgi && j < rb->sdfgi->cascades.size()) { + u.append_id(rb->sdfgi->cascades[j].sdf_tex); + } else { + u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 2; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { + if (rb->sdfgi && j < rb->sdfgi->cascades.size()) { + u.append_id(rb->sdfgi->cascades[j].light_tex); + } else { + u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 3; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { + if (rb->sdfgi && j < rb->sdfgi->cascades.size()) { + u.append_id(rb->sdfgi->cascades[j].light_aniso_0_tex); + } else { + u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.binding = 4; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { + if (rb->sdfgi && j < rb->sdfgi->cascades.size()) { + u.append_id(rb->sdfgi->cascades[j].light_aniso_1_tex); + } else { + u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 5; + if (rb->sdfgi) { + u.append_id(rb->sdfgi->occlusion_texture); + } else { + u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 6; + u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; + u.binding = 7; + u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 9; + u.append_id(rb->rbgi.ambient_view[v]); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 10; + u.append_id(rb->rbgi.reflection_view[v]); + uniforms.push_back(u); + } + + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 11; + if (rb->sdfgi) { + u.append_id(rb->sdfgi->lightprobe_texture); + } else { + u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_2D_ARRAY_WHITE)); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 12; + u.append_id(rb->views[v].view_depth); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 13; + u.append_id(p_normal_roughness_views[v]); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 14; + RID buffer = p_voxel_gi_buffer.is_valid() ? p_voxel_gi_buffer : texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_BLACK); + u.append_id(buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 15; + u.append_id(sdfgi_ubo); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 16; + u.append_id(rb->rbgi.voxel_gi_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; + u.binding = 17; + for (int i = 0; i < MAX_VOXEL_GI_INSTANCES; i++) { + u.append_id(rb->rbgi.voxel_gi_textures[i]); + } + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.binding = 18; + u.append_id(rb->rbgi.scene_data_ubo); + uniforms.push_back(u); + } + + rb->rbgi.uniform_set[v] = RD::get_singleton()->uniform_set_create(uniforms, shader.version_get_shader(shader_version, 0), 0); + } + + Mode mode; + + if (p_view_count > 1) { + if (rb->rbgi.using_half_size_gi) { + mode = (use_sdfgi && use_voxel_gi_instances) ? MODE_HALF_RES_COMBINED_MULTIVIEW : (use_sdfgi ? MODE_HALF_RES_SDFGI_MULTIVIEW : MODE_HALF_RES_VOXEL_GI_MULTIVIEW); + } else { + mode = (use_sdfgi && use_voxel_gi_instances) ? MODE_COMBINED_MULTIVIEW : (use_sdfgi ? MODE_SDFGI_MULTIVIEW : MODE_VOXEL_GI_MULTIVIEW); + } + } else { + if (rb->rbgi.using_half_size_gi) { + mode = (use_sdfgi && use_voxel_gi_instances) ? MODE_HALF_RES_COMBINED : (use_sdfgi ? MODE_HALF_RES_SDFGI : MODE_HALF_RES_VOXEL_GI); + } else { + mode = (use_sdfgi && use_voxel_gi_instances) ? MODE_COMBINED : (use_sdfgi ? MODE_SDFGI : MODE_VOXEL_GI); + } + } + + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, pipelines[mode]); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->rbgi.uniform_set[v], 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(PushConstant)); + + if (rb->rbgi.using_half_size_gi) { + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->internal_width >> 1, rb->internal_height >> 1, 1); + } else { + RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->internal_width, rb->internal_height, 1); + } + } + + //do barrier later to allow oeverlap + //RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); //no barriers, let other compute, raster and transfer happen at the same time + RD::get_singleton()->draw_command_end_label(); +} + +RID GI::voxel_gi_instance_create(RID p_base) { + VoxelGIInstance voxel_gi; + voxel_gi.gi = this; + voxel_gi.storage = storage; + voxel_gi.probe = p_base; + RID rid = voxel_gi_instance_owner.make_rid(voxel_gi); + return rid; +} + +void GI::voxel_gi_instance_set_transform_to_data(RID p_probe, const Transform3D &p_xform) { + VoxelGIInstance *voxel_gi = get_probe_instance(p_probe); + ERR_FAIL_COND(!voxel_gi); + + voxel_gi->transform = p_xform; +} + +bool GI::voxel_gi_needs_update(RID p_probe) const { + VoxelGIInstance *voxel_gi = get_probe_instance(p_probe); + ERR_FAIL_COND_V(!voxel_gi, false); + + return voxel_gi->last_probe_version != voxel_gi_get_version(voxel_gi->probe); +} + +void GI::voxel_gi_update(RID p_probe, bool p_update_light_instances, const Vector &p_light_instances, const PagedArray &p_dynamic_objects, RendererSceneRenderRD *p_scene_render) { + VoxelGIInstance *voxel_gi = get_probe_instance(p_probe); + ERR_FAIL_COND(!voxel_gi); + + voxel_gi->update(p_update_light_instances, p_light_instances, p_dynamic_objects, p_scene_render); +} + +void GI::debug_voxel_gi(RID p_voxel_gi, RD::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform, bool p_lighting, bool p_emission, float p_alpha) { + VoxelGIInstance *voxel_gi = voxel_gi_instance_owner.get_or_null(p_voxel_gi); + ERR_FAIL_COND(!voxel_gi); + + voxel_gi->debug(p_draw_list, p_framebuffer, p_camera_with_transform, p_lighting, p_emission, p_alpha); +} diff --git a/servers/rendering/renderer_rd/environment/gi.h b/servers/rendering/renderer_rd/environment/gi.h new file mode 100644 index 0000000000..b6ecfe42ea --- /dev/null +++ b/servers/rendering/renderer_rd/environment/gi.h @@ -0,0 +1,800 @@ +/*************************************************************************/ +/* gi.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef GI_RD_H +#define GI_RD_H + +#include "core/templates/local_vector.h" +#include "core/templates/rid_owner.h" +#include "servers/rendering/environment/renderer_gi.h" +#include "servers/rendering/renderer_compositor.h" +#include "servers/rendering/renderer_rd/renderer_scene_environment_rd.h" +#include "servers/rendering/renderer_rd/renderer_scene_sky_rd.h" +#include "servers/rendering/renderer_rd/renderer_storage_rd.h" +#include "servers/rendering/renderer_rd/shaders/environment/gi.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/environment/sdfgi_debug.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/environment/sdfgi_debug_probes.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/environment/sdfgi_direct_light.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/environment/sdfgi_integrate.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/environment/sdfgi_preprocess.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/environment/voxel_gi.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/environment/voxel_gi_debug.glsl.gen.h" +#include "servers/rendering/renderer_scene_render.h" +#include "servers/rendering/renderer_storage.h" +#include "servers/rendering/rendering_device.h" + +// Forward declare RenderDataRD and RendererSceneRenderRD so we can pass it into some of our methods, these classes are pretty tightly bound +struct RenderDataRD; +class RendererSceneRenderRD; + +namespace RendererRD { + +class GI : public RendererGI { +public: + /* VOXEL GI STORAGE */ + + struct VoxelGI { + RID octree_buffer; + RID data_buffer; + RID sdf_texture; + + uint32_t octree_buffer_size = 0; + uint32_t data_buffer_size = 0; + + Vector level_counts; + + int cell_count = 0; + + Transform3D to_cell_xform; + AABB bounds; + Vector3i octree_size; + + float dynamic_range = 2.0; + float energy = 1.0; + float bias = 1.4; + float normal_bias = 0.0; + float propagation = 0.7; + bool interior = false; + bool use_two_bounces = false; + + float anisotropy_strength = 0.5; + + uint32_t version = 1; + uint32_t data_version = 1; + + RendererStorage::Dependency dependency; + }; + +private: + RendererStorageRD *storage = nullptr; + static GI *singleton; + + /* VOXEL GI STORAGE */ + + mutable RID_Owner voxel_gi_owner; + + /* VOXEL_GI INSTANCE */ + + struct VoxelGILight { + uint32_t type; + float energy; + float radius; + float attenuation; + + float color[3]; + float cos_spot_angle; + + float position[3]; + float inv_spot_attenuation; + + float direction[3]; + uint32_t has_shadow; + }; + + struct VoxelGIPushConstant { + int32_t limits[3]; + uint32_t stack_size; + + float emission_scale; + float propagation; + float dynamic_range; + uint32_t light_count; + + uint32_t cell_offset; + uint32_t cell_count; + float aniso_strength; + uint32_t pad; + }; + + struct VoxelGIDynamicPushConstant { + int32_t limits[3]; + uint32_t light_count; + int32_t x_dir[3]; + float z_base; + int32_t y_dir[3]; + float z_sign; + int32_t z_dir[3]; + float pos_multiplier; + uint32_t rect_pos[2]; + uint32_t rect_size[2]; + uint32_t prev_rect_ofs[2]; + uint32_t prev_rect_size[2]; + uint32_t flip_x; + uint32_t flip_y; + float dynamic_range; + uint32_t on_mipmap; + float propagation; + float pad[3]; + }; + + VoxelGILight *voxel_gi_lights = nullptr; + uint32_t voxel_gi_max_lights = 32; + RID voxel_gi_lights_uniform; + + enum { + VOXEL_GI_SHADER_VERSION_COMPUTE_LIGHT, + VOXEL_GI_SHADER_VERSION_COMPUTE_SECOND_BOUNCE, + VOXEL_GI_SHADER_VERSION_COMPUTE_MIPMAP, + VOXEL_GI_SHADER_VERSION_WRITE_TEXTURE, + VOXEL_GI_SHADER_VERSION_DYNAMIC_OBJECT_LIGHTING, + VOXEL_GI_SHADER_VERSION_DYNAMIC_SHRINK_WRITE, + VOXEL_GI_SHADER_VERSION_DYNAMIC_SHRINK_PLOT, + VOXEL_GI_SHADER_VERSION_DYNAMIC_SHRINK_WRITE_PLOT, + VOXEL_GI_SHADER_VERSION_MAX + }; + + VoxelGiShaderRD voxel_gi_shader; + RID voxel_gi_lighting_shader_version; + RID voxel_gi_lighting_shader_version_shaders[VOXEL_GI_SHADER_VERSION_MAX]; + RID voxel_gi_lighting_shader_version_pipelines[VOXEL_GI_SHADER_VERSION_MAX]; + + enum { + VOXEL_GI_DEBUG_COLOR, + VOXEL_GI_DEBUG_LIGHT, + VOXEL_GI_DEBUG_EMISSION, + VOXEL_GI_DEBUG_LIGHT_FULL, + VOXEL_GI_DEBUG_MAX + }; + + struct VoxelGIDebugPushConstant { + float projection[16]; + uint32_t cell_offset; + float dynamic_range; + float alpha; + uint32_t level; + int32_t bounds[3]; + uint32_t pad; + }; + + VoxelGiDebugShaderRD voxel_gi_debug_shader; + RID voxel_gi_debug_shader_version; + RID voxel_gi_debug_shader_version_shaders[VOXEL_GI_DEBUG_MAX]; + PipelineCacheRD voxel_gi_debug_shader_version_pipelines[VOXEL_GI_DEBUG_MAX]; + RID voxel_gi_debug_uniform_set; + + /* SDFGI */ + + struct SDFGIShader { + enum SDFGIPreprocessShaderVersion { + PRE_PROCESS_SCROLL, + PRE_PROCESS_SCROLL_OCCLUSION, + PRE_PROCESS_JUMP_FLOOD_INITIALIZE, + PRE_PROCESS_JUMP_FLOOD_INITIALIZE_HALF, + PRE_PROCESS_JUMP_FLOOD, + PRE_PROCESS_JUMP_FLOOD_OPTIMIZED, + PRE_PROCESS_JUMP_FLOOD_UPSCALE, + PRE_PROCESS_OCCLUSION, + PRE_PROCESS_STORE, + PRE_PROCESS_MAX + }; + + struct PreprocessPushConstant { + int32_t scroll[3]; + int32_t grid_size; + + int32_t probe_offset[3]; + int32_t step_size; + + int32_t half_size; + uint32_t occlusion_index; + int32_t cascade; + uint32_t pad; + }; + + SdfgiPreprocessShaderRD preprocess; + RID preprocess_shader; + RID preprocess_pipeline[PRE_PROCESS_MAX]; + + struct DebugPushConstant { + float grid_size[3]; + uint32_t max_cascades; + + int32_t screen_size[2]; + uint32_t use_occlusion; + float y_mult; + + uint32_t probe_axis_size; + float z_near; + float reserved1; + float reserved2; + + float cam_transform[16]; + float inv_projection[16]; + }; + + SdfgiDebugShaderRD debug; + RID debug_shader; + RID debug_shader_version; + RID debug_pipeline; + + enum ProbeDebugMode { + PROBE_DEBUG_PROBES, + PROBE_DEBUG_PROBES_MULTIVIEW, + PROBE_DEBUG_VISIBILITY, + PROBE_DEBUG_VISIBILITY_MULTIVIEW, + PROBE_DEBUG_MAX + }; + + struct DebugProbesSceneData { + float projection[2][16]; + }; + + struct DebugProbesPushConstant { + uint32_t band_power; + uint32_t sections_in_band; + uint32_t band_mask; + float section_arc; + + float grid_size[3]; + uint32_t cascade; + + uint32_t pad; + float y_mult; + int32_t probe_debug_index; + int32_t probe_axis_size; + }; + + SdfgiDebugProbesShaderRD debug_probes; + RID debug_probes_shader; + RID debug_probes_shader_version; + + PipelineCacheRD debug_probes_pipeline[PROBE_DEBUG_MAX]; + + struct Light { + float color[3]; + float energy; + + float direction[3]; + uint32_t has_shadow; + + float position[3]; + float attenuation; + + uint32_t type; + float cos_spot_angle; + float inv_spot_attenuation; + float radius; + }; + + struct DirectLightPushConstant { + float grid_size[3]; + uint32_t max_cascades; + + uint32_t cascade; + uint32_t light_count; + uint32_t process_offset; + uint32_t process_increment; + + int32_t probe_axis_size; + float bounce_feedback; + float y_mult; + uint32_t use_occlusion; + }; + + enum { + DIRECT_LIGHT_MODE_STATIC, + DIRECT_LIGHT_MODE_DYNAMIC, + DIRECT_LIGHT_MODE_MAX + }; + SdfgiDirectLightShaderRD direct_light; + RID direct_light_shader; + RID direct_light_pipeline[DIRECT_LIGHT_MODE_MAX]; + + enum { + INTEGRATE_MODE_PROCESS, + INTEGRATE_MODE_STORE, + INTEGRATE_MODE_SCROLL, + INTEGRATE_MODE_SCROLL_STORE, + INTEGRATE_MODE_MAX + }; + struct IntegratePushConstant { + enum { + SKY_MODE_DISABLED, + SKY_MODE_COLOR, + SKY_MODE_SKY, + }; + + float grid_size[3]; + uint32_t max_cascades; + + uint32_t probe_axis_size; + uint32_t cascade; + uint32_t history_index; + uint32_t history_size; + + uint32_t ray_count; + float ray_bias; + int32_t image_size[2]; + + int32_t world_offset[3]; + uint32_t sky_mode; + + int32_t scroll[3]; + float sky_energy; + + float sky_color[3]; + float y_mult; + + uint32_t store_ambient_texture; + uint32_t pad[3]; + }; + + SdfgiIntegrateShaderRD integrate; + RID integrate_shader; + RID integrate_pipeline[INTEGRATE_MODE_MAX]; + + RID integrate_default_sky_uniform_set; + + } sdfgi_shader; + +public: + static GI *get_singleton() { return singleton; } + + /* VOXEL GI API */ + + VoxelGI *get_voxel_gi(RID p_rid) { return voxel_gi_owner.get_or_null(p_rid); }; + bool owns_voxel_gi(RID p_rid) { return voxel_gi_owner.owns(p_rid); }; + + virtual RID voxel_gi_allocate() override; + virtual void voxel_gi_free(RID p_voxel_gi) override; + virtual void voxel_gi_initialize(RID p_voxel_gi) override; + + virtual void voxel_gi_allocate_data(RID p_voxel_gi, const Transform3D &p_to_cell_xform, const AABB &p_aabb, const Vector3i &p_octree_size, const Vector &p_octree_cells, const Vector &p_data_cells, const Vector &p_distance_field, const Vector &p_level_counts) override; + + virtual AABB voxel_gi_get_bounds(RID p_voxel_gi) const override; + virtual Vector3i voxel_gi_get_octree_size(RID p_voxel_gi) const override; + virtual Vector voxel_gi_get_octree_cells(RID p_voxel_gi) const override; + virtual Vector voxel_gi_get_data_cells(RID p_voxel_gi) const override; + virtual Vector voxel_gi_get_distance_field(RID p_voxel_gi) const override; + + virtual Vector voxel_gi_get_level_counts(RID p_voxel_gi) const override; + virtual Transform3D voxel_gi_get_to_cell_xform(RID p_voxel_gi) const override; + + virtual void voxel_gi_set_dynamic_range(RID p_voxel_gi, float p_range) override; + virtual float voxel_gi_get_dynamic_range(RID p_voxel_gi) const override; + + virtual void voxel_gi_set_propagation(RID p_voxel_gi, float p_range) override; + virtual float voxel_gi_get_propagation(RID p_voxel_gi) const override; + + virtual void voxel_gi_set_energy(RID p_voxel_gi, float p_energy) override; + virtual float voxel_gi_get_energy(RID p_voxel_gi) const override; + + virtual void voxel_gi_set_bias(RID p_voxel_gi, float p_bias) override; + virtual float voxel_gi_get_bias(RID p_voxel_gi) const override; + + virtual void voxel_gi_set_normal_bias(RID p_voxel_gi, float p_range) override; + virtual float voxel_gi_get_normal_bias(RID p_voxel_gi) const override; + + virtual void voxel_gi_set_interior(RID p_voxel_gi, bool p_enable) override; + virtual bool voxel_gi_is_interior(RID p_voxel_gi) const override; + + virtual void voxel_gi_set_use_two_bounces(RID p_voxel_gi, bool p_enable) override; + virtual bool voxel_gi_is_using_two_bounces(RID p_voxel_gi) const override; + + virtual void voxel_gi_set_anisotropy_strength(RID p_voxel_gi, float p_strength) override; + virtual float voxel_gi_get_anisotropy_strength(RID p_voxel_gi) const override; + + virtual uint32_t voxel_gi_get_version(RID p_probe) const override; + uint32_t voxel_gi_get_data_version(RID p_probe); + + RID voxel_gi_get_octree_buffer(RID p_voxel_gi) const; + RID voxel_gi_get_data_buffer(RID p_voxel_gi) const; + + RID voxel_gi_get_sdf_texture(RID p_voxel_gi); + + /* VOXEL_GI INSTANCE */ + + //@TODO VoxelGIInstance is still directly used in the render code, we'll address this when we refactor the render code itself. + + struct VoxelGIInstance { + // access to our containers + RendererStorageRD *storage = nullptr; + GI *gi = nullptr; + + RID probe; + RID texture; + RID write_buffer; + + struct Mipmap { + RID texture; + RID uniform_set; + RID second_bounce_uniform_set; + RID write_uniform_set; + uint32_t level; + uint32_t cell_offset; + uint32_t cell_count; + }; + Vector mipmaps; + + struct DynamicMap { + RID texture; //color normally, or emission on first pass + RID fb_depth; //actual depth buffer for the first pass, float depth for later passes + RID depth; //actual depth buffer for the first pass, float depth for later passes + RID normal; //normal buffer for the first pass + RID albedo; //emission buffer for the first pass + RID orm; //orm buffer for the first pass + RID fb; //used for rendering, only valid on first map + RID uniform_set; + uint32_t size; + int mipmap; // mipmap to write to, -1 if no mipmap assigned + }; + + Vector dynamic_maps; + + int slot = -1; + uint32_t last_probe_version = 0; + uint32_t last_probe_data_version = 0; + + //uint64_t last_pass = 0; + uint32_t render_index = 0; + + bool has_dynamic_object_data = false; + + Transform3D transform; + + void update(bool p_update_light_instances, const Vector &p_light_instances, const PagedArray &p_dynamic_objects, RendererSceneRenderRD *p_scene_render); + void debug(RD::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform, bool p_lighting, bool p_emission, float p_alpha); + }; + + mutable RID_Owner voxel_gi_instance_owner; + + _FORCE_INLINE_ VoxelGIInstance *get_probe_instance(RID p_probe) const { + return voxel_gi_instance_owner.get_or_null(p_probe); + }; + + _FORCE_INLINE_ RID voxel_gi_instance_get_texture(RID p_probe) { + VoxelGIInstance *voxel_gi = get_probe_instance(p_probe); + ERR_FAIL_COND_V(!voxel_gi, RID()); + return voxel_gi->texture; + }; + + RS::VoxelGIQuality voxel_gi_quality = RS::VOXEL_GI_QUALITY_LOW; + + /* SDFGI */ + + struct SDFGI { + enum { + MAX_CASCADES = 8, + CASCADE_SIZE = 128, + PROBE_DIVISOR = 16, + ANISOTROPY_SIZE = 6, + MAX_DYNAMIC_LIGHTS = 128, + MAX_STATIC_LIGHTS = 1024, + LIGHTPROBE_OCT_SIZE = 6, + SH_SIZE = 16 + }; + + struct Cascade { + struct UBO { + float offset[3]; + float to_cell; + int32_t probe_offset[3]; + uint32_t pad; + }; + + //cascade blocks are full-size for volume (128^3), half size for albedo/emission + RID sdf_tex; + RID light_tex; + RID light_aniso_0_tex; + RID light_aniso_1_tex; + + RID light_data; + RID light_aniso_0_data; + RID light_aniso_1_data; + + struct SolidCell { // this struct is unused, but remains as reference for size + uint32_t position; + uint32_t albedo; + uint32_t static_light; + uint32_t static_light_aniso; + }; + + RID solid_cell_dispatch_buffer; //buffer for indirect compute dispatch + RID solid_cell_buffer; + + RID lightprobe_history_tex; + RID lightprobe_average_tex; + + float cell_size; + Vector3i position; + + static const Vector3i DIRTY_ALL; + Vector3i dirty_regions; //(0,0,0 is not dirty, negative is refresh from the end, DIRTY_ALL is refresh all. + + RID sdf_store_uniform_set; + RID sdf_direct_light_uniform_set; + RID scroll_uniform_set; + RID scroll_occlusion_uniform_set; + RID integrate_uniform_set; + RID lights_buffer; + + bool all_dynamic_lights_dirty = true; + }; + + // access to our containers + RendererStorageRD *storage = nullptr; + GI *gi = nullptr; + + // used for rendering (voxelization) + RID render_albedo; + RID render_emission; + RID render_emission_aniso; + RID render_occlusion[8]; + RID render_geom_facing; + + RID render_sdf[2]; + RID render_sdf_half[2]; + + // used for ping pong processing in cascades + RID sdf_initialize_uniform_set; + RID sdf_initialize_half_uniform_set; + RID jump_flood_uniform_set[2]; + RID jump_flood_half_uniform_set[2]; + RID sdf_upscale_uniform_set; + int upscale_jfa_uniform_set_index; + RID occlusion_uniform_set; + + uint32_t cascade_size = 128; + + LocalVector cascades; + + RID lightprobe_texture; + RID lightprobe_data; + RID occlusion_texture; + RID occlusion_data; + RID ambient_texture; //integrates with volumetric fog + + RID lightprobe_history_scroll; //used for scrolling lightprobes + RID lightprobe_average_scroll; //used for scrolling lightprobes + + uint32_t history_size = 0; + float solid_cell_ratio = 0; + uint32_t solid_cell_count = 0; + + int num_cascades = 6; + float min_cell_size = 0; + uint32_t probe_axis_count = 0; //amount of probes per axis, this is an odd number because it encloses endpoints + + RID debug_uniform_set[RendererSceneRender::MAX_RENDER_VIEWS]; + RID debug_probes_scene_data_ubo; + RID debug_probes_uniform_set; + RID cascades_ubo; + + bool uses_occlusion = false; + float bounce_feedback = 0.5; + bool reads_sky = true; + float energy = 1.0; + float normal_bias = 1.1; + float probe_bias = 1.1; + RS::EnvironmentSDFGIYScale y_scale_mode = RS::ENV_SDFGI_Y_SCALE_75_PERCENT; + + float y_mult = 1.0; + + uint32_t render_pass = 0; + + int32_t cascade_dynamic_light_count[SDFGI::MAX_CASCADES]; //used dynamically + RID integrate_sky_uniform_set; + + void create(RendererSceneEnvironmentRD *p_env, const Vector3 &p_world_position, uint32_t p_requested_history_size, GI *p_gi); + void erase(); + void update(RendererSceneEnvironmentRD *p_env, const Vector3 &p_world_position); + void update_light(); + void update_probes(RendererSceneEnvironmentRD *p_env, RendererSceneSkyRD::Sky *p_sky); + void store_probes(); + int get_pending_region_data(int p_region, Vector3i &r_local_offset, Vector3i &r_local_size, AABB &r_bounds) const; + void update_cascades(); + + void debug_draw(uint32_t p_view_count, const CameraMatrix *p_projections, const Transform3D &p_transform, int p_width, int p_height, RID p_render_target, RID p_texture, const Vector &p_texture_views); + void debug_probes(RID p_framebuffer, const uint32_t p_view_count, const CameraMatrix *p_camera_with_transforms, bool p_will_continue_color, bool p_will_continue_depth); + + void pre_process_gi(const Transform3D &p_transform, RenderDataRD *p_render_data, RendererSceneRenderRD *p_scene_render); + void render_region(RID p_render_buffers, int p_region, const PagedArray &p_instances, RendererSceneRenderRD *p_scene_render); + void render_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray *p_positional_light_cull_result, RendererSceneRenderRD *p_scene_render); + }; + + RS::EnvironmentSDFGIRayCount sdfgi_ray_count = RS::ENV_SDFGI_RAY_COUNT_16; + RS::EnvironmentSDFGIFramesToConverge sdfgi_frames_to_converge = RS::ENV_SDFGI_CONVERGE_IN_30_FRAMES; + RS::EnvironmentSDFGIFramesToUpdateLight sdfgi_frames_to_update_light = RS::ENV_SDFGI_UPDATE_LIGHT_IN_4_FRAMES; + + float sdfgi_solid_cell_ratio = 0.25; + Vector3 sdfgi_debug_probe_pos; + Vector3 sdfgi_debug_probe_dir; + bool sdfgi_debug_probe_enabled = false; + Vector3i sdfgi_debug_probe_index; + + /* SDFGI UPDATE */ + + int sdfgi_get_lightprobe_octahedron_size() const { return SDFGI::LIGHTPROBE_OCT_SIZE; } + + /* GI */ + enum { + MAX_VOXEL_GI_INSTANCES = 8 + }; + + // Struct for use in render buffer + struct RenderBuffersGI { + RID voxel_gi_textures[MAX_VOXEL_GI_INSTANCES]; + RID voxel_gi_buffer; + + RID full_buffer; + RID full_dispatch; + RID full_mask; + + /* GI buffers */ + RID ambient_buffer; + RID reflection_buffer; + RID ambient_view[RendererSceneRender::MAX_RENDER_VIEWS]; + RID reflection_view[RendererSceneRender::MAX_RENDER_VIEWS]; + RID uniform_set[RendererSceneRender::MAX_RENDER_VIEWS]; + bool using_half_size_gi = false; + uint32_t view_count = 1; + + RID scene_data_ubo; + + void free(); + }; + + struct SDFGIData { + float grid_size[3]; + uint32_t max_cascades; + + uint32_t use_occlusion; + int32_t probe_axis_size; + float probe_to_uvw; + float normal_bias; + + float lightprobe_tex_pixel_size[3]; + float energy; + + float lightprobe_uv_offset[3]; + float y_mult; + + float occlusion_clamp[3]; + uint32_t pad3; + + float occlusion_renormalize[3]; + uint32_t pad4; + + float cascade_probe_size[3]; + uint32_t pad5; + + struct ProbeCascadeData { + float position[3]; //offset of (0,0,0) in world coordinates + float to_probe; // 1/bounds * grid_size + int32_t probe_world_offset[3]; + float to_cell; // 1/bounds * grid_size + }; + + ProbeCascadeData cascades[SDFGI::MAX_CASCADES]; + }; + + struct VoxelGIData { + float xform[16]; // 64 - 64 + + float bounds[3]; // 12 - 76 + float dynamic_range; // 4 - 80 + + float bias; // 4 - 84 + float normal_bias; // 4 - 88 + uint32_t blend_ambient; // 4 - 92 + uint32_t mipmaps; // 4 - 96 + }; + + struct SceneData { + float inv_projection[2][16]; + float cam_transform[16]; + float eye_offset[2][4]; + + int32_t screen_size[2]; + float pad1; + float pad2; + }; + + struct PushConstant { + uint32_t view_index; + uint32_t max_voxel_gi_instances; + uint32_t high_quality_vct; + uint32_t orthogonal; + + float proj_info[4]; + + float z_near; + float z_far; + float pad1; + float pad2; + }; + + RID sdfgi_ubo; + enum Mode { + MODE_VOXEL_GI, + MODE_SDFGI, + MODE_COMBINED, + MODE_HALF_RES_VOXEL_GI, + MODE_HALF_RES_SDFGI, + MODE_HALF_RES_COMBINED, + + MODE_VOXEL_GI_MULTIVIEW, + MODE_SDFGI_MULTIVIEW, + MODE_COMBINED_MULTIVIEW, + MODE_HALF_RES_VOXEL_GI_MULTIVIEW, + MODE_HALF_RES_SDFGI_MULTIVIEW, + MODE_HALF_RES_COMBINED_MULTIVIEW, + + MODE_MAX + }; + + RID default_voxel_gi_buffer; + + bool half_resolution = false; + GiShaderRD shader; + RID shader_version; + RID pipelines[MODE_MAX]; + + GI(); + ~GI(); + + void init(RendererStorageRD *p_storage, RendererSceneSkyRD *p_sky); + void free(); + + SDFGI *create_sdfgi(RendererSceneEnvironmentRD *p_env, const Vector3 &p_world_position, uint32_t p_requested_history_size); + + void setup_voxel_gi_instances(RID p_render_buffers, const Transform3D &p_transform, const PagedArray &p_voxel_gi_instances, uint32_t &r_voxel_gi_instances_used, RendererSceneRenderRD *p_scene_render); + void process_gi(RID p_render_buffers, RID *p_normal_roughness_views, RID p_voxel_gi_buffer, RID p_environment, uint32_t p_view_count, const CameraMatrix *p_projections, const Vector3 *p_eye_offsets, const Transform3D &p_cam_transform, const PagedArray &p_voxel_gi_instances, RendererSceneRenderRD *p_scene_render); + + RID voxel_gi_instance_create(RID p_base); + void voxel_gi_instance_set_transform_to_data(RID p_probe, const Transform3D &p_xform); + bool voxel_gi_needs_update(RID p_probe) const; + void voxel_gi_update(RID p_probe, bool p_update_light_instances, const Vector &p_light_instances, const PagedArray &p_dynamic_objects, RendererSceneRenderRD *p_scene_render); + void debug_voxel_gi(RID p_voxel_gi, RD::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform, bool p_lighting, bool p_emission, float p_alpha); +}; + +} // namespace RendererRD + +#endif /* !GI_RD_H */ diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp index d390614e53..c7048289c8 100644 --- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp +++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp @@ -48,6 +48,13 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_specular() if (!specular.is_valid()) { RD::TextureFormat tf; tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + if (view_count > 1) { + tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + tf.array_layers = view_count; + } else { + tf.texture_type = RD::TEXTURE_TYPE_2D; + tf.array_layers = 1; + } tf.width = width; tf.height = height; tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; @@ -64,7 +71,7 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_specular() Vector fb; fb.push_back(specular); - specular_only_fb = RD::get_singleton()->framebuffer_create(fb); + specular_only_fb = RD::get_singleton()->framebuffer_create(fb, RD::INVALID_ID, view_count); } } else { @@ -76,7 +83,7 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_specular() Vector fb; fb.push_back(specular_msaa); - specular_only_fb = RD::get_singleton()->framebuffer_create(fb); + specular_only_fb = RD::get_singleton()->framebuffer_create(fb, RD::INVALID_ID, view_count); } } } @@ -106,6 +113,13 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_velocity() void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_voxelgi() { if (!voxelgi_buffer.is_valid()) { RD::TextureFormat tf; + if (view_count > 1) { + tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + tf.array_layers = view_count; + } else { + tf.texture_type = RD::TEXTURE_TYPE_2D; + tf.array_layers = 1; + } tf.format = RD::DATA_FORMAT_R8G8_UINT; tf.width = width; tf.height = height; @@ -116,6 +130,14 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_voxelgi() tf_aa.usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; tf_aa.samples = texture_samples; voxelgi_buffer_msaa = RD::get_singleton()->texture_create(tf_aa, RD::TextureView()); + + if (view_count == 1) { + voxelgi_msaa_views[0] = voxelgi_buffer_msaa; + } else { + for (uint32_t v = 0; v < view_count; v++) { + voxelgi_msaa_views[v] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), voxelgi_buffer_msaa, v, 0); + } + } } else { tf.usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; } @@ -124,6 +146,14 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_voxelgi() voxelgi_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); + if (view_count == 1) { + voxelgi_views[0] = voxelgi_buffer; + } else { + for (uint32_t v = 0; v < view_count; v++) { + voxelgi_views[v] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), voxelgi_buffer, v, 0); + } + } + Vector fb; if (msaa != RS::VIEWPORT_MSAA_DISABLED) { fb.push_back(depth_msaa); @@ -135,7 +165,7 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_voxelgi() fb.push_back(voxelgi_buffer); } - depth_normal_roughness_voxelgi_fb = RD::get_singleton()->framebuffer_create(fb); + depth_normal_roughness_voxelgi_fb = RD::get_singleton()->framebuffer_create(fb, RD::INVALID_ID, view_count); } } @@ -144,7 +174,25 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::clear() { RD::get_singleton()->free(voxelgi_buffer); voxelgi_buffer = RID(); + if (view_count == 1) { + voxelgi_views[0] = RID(); + } else { + for (uint32_t v = 0; v < view_count; v++) { + RD::get_singleton()->free(voxelgi_views[v]); + voxelgi_views[v] = RID(); + } + } + if (voxelgi_buffer_msaa.is_valid()) { + if (view_count == 1) { + voxelgi_msaa_views[0] = RID(); + } else { + for (uint32_t v = 0; v < view_count; v++) { + RD::get_singleton()->free(voxelgi_msaa_views[v]); + voxelgi_msaa_views[v] = RID(); + } + } + RD::get_singleton()->free(voxelgi_buffer_msaa); voxelgi_buffer_msaa = RID(); } @@ -153,11 +201,35 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::clear() { } if (color_msaa.is_valid()) { + if (view_count == 1) { + color_views[0] = RID(); + color_msaa_views[0] = RID(); + } else { + for (uint32_t v = 0; v < view_count; v++) { + RD::get_singleton()->free(color_views[v]); + RD::get_singleton()->free(color_msaa_views[v]); + color_views[v] = RID(); + color_msaa_views[v] = RID(); + } + } + RD::get_singleton()->free(color_msaa); color_msaa = RID(); } if (depth_msaa.is_valid()) { + if (view_count == 1) { + depth_views[0] = RID(); + depth_msaa_views[0] = RID(); + } else { + for (uint32_t v = 0; v < view_count; v++) { + RD::get_singleton()->free(depth_views[v]); + RD::get_singleton()->free(depth_msaa_views[v]); + depth_views[v] = RID(); + depth_msaa_views[v] = RID(); + } + } + RD::get_singleton()->free(depth_msaa); depth_msaa = RID(); } @@ -178,12 +250,31 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::clear() { color_framebuffers.clear(); // Color pass framebuffers are freed automatically by their dependency relations if (normal_roughness_buffer.is_valid()) { + if (view_count == 1) { + normal_roughness_views[0] = RID(); + } else { + for (uint32_t v = 0; v < view_count; v++) { + RD::get_singleton()->free(normal_roughness_views[v]); + normal_roughness_views[v] = RID(); + } + } + RD::get_singleton()->free(normal_roughness_buffer); + normal_roughness_buffer = RID(); + if (normal_roughness_buffer_msaa.is_valid()) { + if (view_count == 1) { + normal_roughness_msaa_views[0] = RID(); + } else { + for (uint32_t v = 0; v < view_count; v++) { + RD::get_singleton()->free(normal_roughness_msaa_views[v]); + normal_roughness_msaa_views[v] = RID(); + } + } RD::get_singleton()->free(normal_roughness_buffer_msaa); normal_roughness_buffer_msaa = RID(); } - normal_roughness_buffer = RID(); + depth_normal_roughness_fb = RID(); } @@ -259,6 +350,22 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::configure(RID p_c depth_msaa = RD::get_singleton()->texture_create(tf, RD::TextureView()); + if (view_count == 1) { + // just reuse + color_views[0] = color; + depth_views[0] = depth; + color_msaa_views[0] = color_msaa; + depth_msaa_views[0] = depth_msaa; + } else { + // create slices + for (uint32_t v = 0; v < view_count; v++) { + color_views[v] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), color, v, 0); + depth_views[v] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), depth, v, 0); + color_msaa_views[v] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), color_msaa, v, 0); + depth_msaa_views[v] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), depth_msaa, v, 0); + } + } + { Vector fb; fb.push_back(color_msaa); @@ -308,6 +415,8 @@ RID RenderForwardClustered::RenderBufferDataForwardClustered::get_color_pass_fb( } void RenderForwardClustered::_allocate_normal_roughness_texture(RenderBufferDataForwardClustered *rb) { + ERR_FAIL_COND_MSG(rb->view_count > 2, "Only support up to two views for roughness texture"); + if (rb->normal_roughness_buffer.is_valid()) { return; } @@ -316,6 +425,13 @@ void RenderForwardClustered::_allocate_normal_roughness_texture(RenderBufferData tf.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; tf.width = rb->width; tf.height = rb->height; + if (rb->view_count > 1) { + tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + tf.array_layers = rb->view_count; + } else { + tf.texture_type = RD::TEXTURE_TYPE_2D; + tf.array_layers = 1; + } tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; if (rb->msaa != RS::VIEWPORT_MSAA_DISABLED) { @@ -330,7 +446,7 @@ void RenderForwardClustered::_allocate_normal_roughness_texture(RenderBufferData Vector fb; fb.push_back(rb->depth); fb.push_back(rb->normal_roughness_buffer); - rb->depth_normal_roughness_fb = RD::get_singleton()->framebuffer_create(fb); + rb->depth_normal_roughness_fb = RD::get_singleton()->framebuffer_create(fb, RD::INVALID_ID, rb->view_count); } else { tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; tf.samples = rb->texture_samples; @@ -339,7 +455,21 @@ void RenderForwardClustered::_allocate_normal_roughness_texture(RenderBufferData Vector fb; fb.push_back(rb->depth_msaa); fb.push_back(rb->normal_roughness_buffer_msaa); - rb->depth_normal_roughness_fb = RD::get_singleton()->framebuffer_create(fb); + rb->depth_normal_roughness_fb = RD::get_singleton()->framebuffer_create(fb, RD::INVALID_ID, rb->view_count); + } + + if (rb->view_count == 1) { + rb->normal_roughness_views[0] = rb->normal_roughness_buffer; + if (rb->msaa != RS::VIEWPORT_MSAA_DISABLED) { + rb->normal_roughness_msaa_views[0] = rb->normal_roughness_buffer_msaa; + } + } else { + for (uint32_t v = 0; v < rb->view_count; v++) { + rb->normal_roughness_views[v] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->normal_roughness_buffer, v, 0); + if (rb->msaa != RS::VIEWPORT_MSAA_DISABLED) { + rb->normal_roughness_msaa_views[v] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), rb->normal_roughness_buffer_msaa, v, 0); + } + } } } @@ -503,22 +633,21 @@ void RenderForwardClustered::_render_list_template(RenderingDevice::DrawListID p pipeline_version = p_params->view_count > 1 ? SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_MULTIVIEW : SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS; } break; case PASS_MODE_SHADOW_DP: { - ERR_FAIL_COND_MSG(p_params->view_count > 1, "Multiview not supported for shadow DP pass"); + ERR_FAIL_COND_MSG(p_params->view_count > 1, "Multiview not supported for shadow DP pass"); pipeline_version = SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_DP; } break; case PASS_MODE_DEPTH_NORMAL_ROUGHNESS: { - ERR_FAIL_COND_MSG(p_params->view_count > 1, "Multiview not supported for depth/roughness pass"); - pipeline_version = SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS; + pipeline_version = p_params->view_count > 1 ? SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_MULTIVIEW : SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS; } break; case PASS_MODE_DEPTH_NORMAL_ROUGHNESS_VOXEL_GI: { - ERR_FAIL_COND_MSG(p_params->view_count > 1, "Multiview not supported for voxel GI pass"); - pipeline_version = SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI; + pipeline_version = p_params->view_count > 1 ? SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI_MULTIVIEW : SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI; } break; case PASS_MODE_DEPTH_MATERIAL: { ERR_FAIL_COND_MSG(p_params->view_count > 1, "Multiview not supported for material pass"); pipeline_version = SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_MATERIAL; } break; case PASS_MODE_SDF: { + // Note, SDF is prepared in world space, this shouldn't be a multiview buffer even when stereoscopic rendering is used. ERR_FAIL_COND_MSG(p_params->view_count > 1, "Multiview not supported for SDF pass"); pipeline_version = SceneShaderForwardClustered::PIPELINE_VERSION_DEPTH_PASS_WITH_SDF; } break; @@ -1323,9 +1452,7 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co using_voxelgi = true; } - if (p_render_data->view_count > 1) { - depth_pass_mode = PASS_MODE_DEPTH; - } else if (!p_render_data->environment.is_valid() && using_voxelgi) { + if (!p_render_data->environment.is_valid() && using_voxelgi) { depth_pass_mode = PASS_MODE_DEPTH_NORMAL_ROUGHNESS_VOXEL_GI; } else if (p_render_data->environment.is_valid() && (environment_is_ssr_enabled(p_render_data->environment) || environment_is_sdfgi_enabled(p_render_data->environment) || using_voxelgi)) { if (environment_is_sdfgi_enabled(p_render_data->environment)) { @@ -1531,9 +1658,13 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co if (needs_pre_resolve) { RD::get_singleton()->barrier(RD::BARRIER_MASK_RASTER, RD::BARRIER_MASK_COMPUTE); } - storage->get_effects()->resolve_gi(render_buffer->depth_msaa, render_buffer->normal_roughness_buffer_msaa, using_voxelgi ? render_buffer->voxelgi_buffer_msaa : RID(), render_buffer->depth, render_buffer->normal_roughness_buffer, using_voxelgi ? render_buffer->voxelgi_buffer : RID(), Vector2i(render_buffer->width, render_buffer->height), texture_multisamples[render_buffer->msaa]); + for (uint32_t v = 0; v < render_buffer->view_count; v++) { + resolve_effects->resolve_gi(render_buffer->depth_msaa_views[v], render_buffer->normal_roughness_msaa_views[v], using_voxelgi ? render_buffer->voxelgi_msaa_views[v] : RID(), render_buffer->depth_views[v], render_buffer->normal_roughness_views[v], using_voxelgi ? render_buffer->voxelgi_views[v] : RID(), Vector2i(render_buffer->width, render_buffer->height), texture_multisamples[render_buffer->msaa]); + } } else if (finish_depth) { - storage->get_effects()->resolve_depth(render_buffer->depth_msaa, render_buffer->depth, Vector2i(render_buffer->width, render_buffer->height), texture_multisamples[render_buffer->msaa]); + for (uint32_t v = 0; v < render_buffer->view_count; v++) { + resolve_effects->resolve_depth(render_buffer->depth_msaa_views[v], render_buffer->depth_views[v], Vector2i(render_buffer->width, render_buffer->height), texture_multisamples[render_buffer->msaa]); + } } RD::get_singleton()->draw_command_end_label(); } @@ -1541,7 +1672,8 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co continue_depth = !finish_depth; } - _pre_opaque_render(p_render_data, using_ssao, using_ssil, using_sdfgi || using_voxelgi, render_buffer ? render_buffer->normal_roughness_buffer : RID(), render_buffer ? render_buffer->voxelgi_buffer : RID()); + RID null_rids[2]; + _pre_opaque_render(p_render_data, using_ssao, using_ssil, using_sdfgi || using_voxelgi, render_buffer ? render_buffer->normal_roughness_views : null_rids, render_buffer ? render_buffer->voxelgi_buffer : RID()); RD::get_singleton()->draw_command_begin_label("Render Opaque Pass"); @@ -1604,18 +1736,17 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co } if (debug_sdfgi_probes) { - //debug voxelgis + //debug sdfgi bool will_continue_color = (can_continue_color || draw_sky || draw_sky_fog_only); bool will_continue_depth = (can_continue_depth || draw_sky || draw_sky_fog_only); CameraMatrix dc; dc.set_depth_correction(true); - CameraMatrix cm = (dc * p_render_data->cam_projection) * CameraMatrix(p_render_data->cam_transform.affine_inverse()); - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(color_only_framebuffer, RD::INITIAL_ACTION_CONTINUE, will_continue_color ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_CONTINUE, will_continue_depth ? RD::FINAL_ACTION_CONTINUE : RD::FINAL_ACTION_READ); - RD::get_singleton()->draw_command_begin_label("Debug SDFGI"); - _debug_sdfgi_probes(p_render_data->render_buffers, draw_list, color_only_framebuffer, cm); - RD::get_singleton()->draw_command_end_label(); - RD::get_singleton()->draw_list_end(); + CameraMatrix cms[RendererSceneRender::MAX_RENDER_VIEWS]; + for (uint32_t v = 0; v < p_render_data->view_count; v++) { + cms[v] = (dc * p_render_data->view_projection[v]) * CameraMatrix(p_render_data->cam_transform.affine_inverse()); + } + _debug_sdfgi_probes(p_render_data->render_buffers, color_only_framebuffer, p_render_data->view_count, cms, will_continue_color, will_continue_depth); } if (draw_sky || draw_sky_fog_only) { @@ -1635,14 +1766,20 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co } if (render_buffer && !can_continue_color && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { - RD::get_singleton()->texture_resolve_multisample(render_buffer->color_msaa, render_buffer->color); + // Handle views individual, might want to look at rewriting our resolve to do both layers in one pass. + for (uint32_t v = 0; v < render_buffer->view_count; v++) { + RD::get_singleton()->texture_resolve_multisample(render_buffer->color_msaa_views[v], render_buffer->color_views[v]); + } + // TODO mame this do multiview if (using_separate_specular) { RD::get_singleton()->texture_resolve_multisample(render_buffer->specular_msaa, render_buffer->specular); } } if (render_buffer && !can_continue_depth && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { - storage->get_effects()->resolve_depth(render_buffer->depth_msaa, render_buffer->depth, Vector2i(render_buffer->width, render_buffer->height), texture_multisamples[render_buffer->msaa]); + for (uint32_t v = 0; v < render_buffer->view_count; v++) { + resolve_effects->resolve_depth(render_buffer->depth_msaa_views[v], render_buffer->depth_views[v], Vector2i(render_buffer->width, render_buffer->height), texture_multisamples[render_buffer->msaa]); + } } if (using_separate_specular) { @@ -1697,11 +1834,13 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co RD::get_singleton()->draw_command_begin_label("Resolve"); if (render_buffer && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { - RD::get_singleton()->texture_resolve_multisample(render_buffer->color_msaa, render_buffer->color); - if (render_buffer->use_taa) { + for (uint32_t v = 0; v < render_buffer->view_count; v++) { + RD::get_singleton()->texture_resolve_multisample(render_buffer->color_msaa_views[v], render_buffer->color_views[v]); + resolve_effects->resolve_depth(render_buffer->depth_msaa_views[v], render_buffer->depth_views[v], Vector2i(render_buffer->width, render_buffer->height), texture_multisamples[render_buffer->msaa]); + } + if (render_buffer->use_taa) { // TODO make TAA stereo capable, this will need to be handled in a separate PR RD::get_singleton()->texture_resolve_multisample(render_buffer->velocity_buffer_msaa, render_buffer->velocity_buffer); } - storage->get_effects()->resolve_depth(render_buffer->depth_msaa, render_buffer->depth, Vector2i(render_buffer->width, render_buffer->height), texture_multisamples[render_buffer->msaa]); } RD::get_singleton()->draw_command_end_label(); @@ -3320,9 +3459,16 @@ RenderForwardClustered::RenderForwardClustered(RendererStorageRD *p_storage) : render_list_thread_threshold = GLOBAL_GET("rendering/limits/forward_renderer/threaded_render_minimum_instances"); _update_shader_quality_settings(); + + resolve_effects = memnew(RendererRD::Resolve()); } RenderForwardClustered::~RenderForwardClustered() { + if (resolve_effects != nullptr) { + memdelete(resolve_effects); + resolve_effects = nullptr; + } + directional_shadow_atlas_set_size(0); { diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h index 97f39164a4..dd3d14f0a8 100644 --- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h +++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h @@ -32,6 +32,7 @@ #define RENDERING_SERVER_SCENE_RENDER_FORWARD_CLUSTERED_H #include "core/templates/paged_allocator.h" +#include "servers/rendering/renderer_rd/effects/resolve.h" #include "servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h" #include "servers/rendering/renderer_rd/pipeline_cache_rd.h" #include "servers/rendering/renderer_rd/renderer_scene_render_rd.h" @@ -72,7 +73,6 @@ class RenderForwardClustered : public RendererSceneRenderRD { RENDER_LIST_ALPHA, //used for transparent objects RENDER_LIST_SECONDARY, //used for shadows and other objects RENDER_LIST_MAX - }; /* Scene Shader */ @@ -99,7 +99,6 @@ class RenderForwardClustered : public RendererSceneRenderRD { RID depth_msaa; RID specular_msaa; RID normal_roughness_buffer_msaa; - RID roughness_buffer_msaa; RID voxelgi_buffer_msaa; RID velocity_buffer_msaa; @@ -110,7 +109,17 @@ class RenderForwardClustered : public RendererSceneRenderRD { RID specular_only_fb; int width, height; HashMap color_framebuffers; + + // for multiview uint32_t view_count; + RID color_views[RendererSceneRender::MAX_RENDER_VIEWS]; // we should rewrite this so we get access to the existing views in our renderer, something we can address when we reorg this + RID depth_views[RendererSceneRender::MAX_RENDER_VIEWS]; // we should rewrite this so we get access to the existing views in our renderer, something we can address when we reorg this + RID color_msaa_views[RendererSceneRender::MAX_RENDER_VIEWS]; + RID depth_msaa_views[RendererSceneRender::MAX_RENDER_VIEWS]; + RID normal_roughness_views[RendererSceneRender::MAX_RENDER_VIEWS]; + RID normal_roughness_msaa_views[RendererSceneRender::MAX_RENDER_VIEWS]; + RID voxelgi_views[RendererSceneRender::MAX_RENDER_VIEWS]; + RID voxelgi_msaa_views[RendererSceneRender::MAX_RENDER_VIEWS]; RID render_sdfgi_uniform_set; void ensure_specular(); @@ -619,6 +628,8 @@ class RenderForwardClustered : public RendererSceneRenderRD { virtual void _update_shader_quality_settings() override; + RendererRD::Resolve *resolve_effects = nullptr; + protected: virtual void _render_scene(RenderDataRD *p_render_data, const Color &p_default_bg_color) override; diff --git a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp index cfb30ef2f3..87bcfd2e07 100644 --- a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp +++ b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp @@ -282,6 +282,8 @@ void SceneShaderForwardClustered::ShaderData::set_code(const String &p_code) { SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL, SHADER_VERSION_DEPTH_PASS_WITH_SDF, SHADER_VERSION_DEPTH_PASS_MULTIVIEW, + SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_MULTIVIEW, + SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI_MULTIVIEW, SHADER_VERSION_COLOR_PASS, }; @@ -349,9 +351,9 @@ void SceneShaderForwardClustered::ShaderData::set_code(const String &p_code) { if (k == PIPELINE_VERSION_DEPTH_PASS || k == PIPELINE_VERSION_DEPTH_PASS_DP || k == PIPELINE_VERSION_DEPTH_PASS_MULTIVIEW) { //none, leave empty - } else if (k == PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS) { + } else if (k == PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS || k == PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_MULTIVIEW) { blend_state = blend_state_depth_normal_roughness; - } else if (k == PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI) { + } else if (k == PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI || k == PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI_MULTIVIEW) { blend_state = blend_state_depth_normal_roughness_giprobe; } else if (k == PIPELINE_VERSION_DEPTH_PASS_WITH_MATERIAL) { blend_state = RD::PipelineColorBlendState::create_disabled(5); //writes to normal and roughness in opaque way @@ -527,10 +529,12 @@ void SceneShaderForwardClustered::init(RendererStorageRD *p_storage, const Strin shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n"); // SHADER_VERSION_DEPTH_PASS shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n#define MODE_DUAL_PARABOLOID\n"); // SHADER_VERSION_DEPTH_PASS_DP shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_NORMAL_ROUGHNESS\n"); // SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS - shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_NORMAL_ROUGHNESS\n#define MODE_RENDER_VOXEL_GI\n"); // SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_GIPROBE + shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_NORMAL_ROUGHNESS\n#define MODE_RENDER_VOXEL_GI\n"); // SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_MATERIAL\n"); // SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL shader_versions.push_back("\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_SDF\n"); // SHADER_VERSION_DEPTH_PASS_WITH_SDF shader_versions.push_back("\n#define USE_MULTIVIEW\n#define MODE_RENDER_DEPTH\n"); // SHADER_VERSION_DEPTH_PASS_MULTIVIEW + shader_versions.push_back("\n#define USE_MULTIVIEW\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_NORMAL_ROUGHNESS\n"); // SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_MULTIVIEW + shader_versions.push_back("\n#define USE_MULTIVIEW\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_NORMAL_ROUGHNESS\n#define MODE_RENDER_VOXEL_GI\n"); // SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI_MULTIVIEW Vector color_pass_flags = { "\n#define MODE_SEPARATE_SPECULAR\n", // SHADER_COLOR_PASS_FLAG_SEPARATE_SPECULAR @@ -553,6 +557,8 @@ void SceneShaderForwardClustered::init(RendererStorageRD *p_storage, const Strin if (!RendererCompositorRD::singleton->is_xr_enabled()) { shader.set_variant_enabled(SHADER_VERSION_DEPTH_PASS_MULTIVIEW, false); + shader.set_variant_enabled(SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_MULTIVIEW, false); + shader.set_variant_enabled(SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI_MULTIVIEW, false); // TODO Add a way to enable/disable color pass flags } } diff --git a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h index 79ccf10090..ffa3893b6a 100644 --- a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h +++ b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h @@ -52,6 +52,8 @@ public: SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL, SHADER_VERSION_DEPTH_PASS_WITH_SDF, SHADER_VERSION_DEPTH_PASS_MULTIVIEW, + SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_MULTIVIEW, + SHADER_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI_MULTIVIEW, SHADER_VERSION_COLOR_PASS, SHADER_VERSION_MAX }; @@ -72,6 +74,8 @@ public: PIPELINE_VERSION_DEPTH_PASS_WITH_MATERIAL, PIPELINE_VERSION_DEPTH_PASS_WITH_SDF, PIPELINE_VERSION_DEPTH_PASS_MULTIVIEW, + PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_MULTIVIEW, + PIPELINE_VERSION_DEPTH_PASS_WITH_NORMAL_AND_ROUGHNESS_AND_VOXEL_GI_MULTIVIEW, PIPELINE_VERSION_COLOR_PASS, PIPELINE_VERSION_MAX }; diff --git a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp index eae5685dd1..25acd2e25f 100644 --- a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp +++ b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp @@ -675,7 +675,8 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color RD::get_singleton()->draw_command_end_label(); // Setup Sky resolution buffers } - _pre_opaque_render(p_render_data, false, false, false, RID(), RID()); + RID null_rids[2]; + _pre_opaque_render(p_render_data, false, false, false, null_rids, RID()); uint32_t spec_constant_base_flags = 0; diff --git a/servers/rendering/renderer_rd/renderer_compositor_rd.h b/servers/rendering/renderer_rd/renderer_compositor_rd.h index aefc189f68..12bcfc4684 100644 --- a/servers/rendering/renderer_rd/renderer_compositor_rd.h +++ b/servers/rendering/renderer_rd/renderer_compositor_rd.h @@ -98,11 +98,15 @@ protected: static uint64_t frame; public: - RendererLightStorage *get_light_storage() { return light_storage; }; - RendererMaterialStorage *get_material_storage() { return material_storage; }; - RendererMeshStorage *get_mesh_storage() { return mesh_storage; }; - RendererParticlesStorage *get_particles_storage() { return particles_storage; }; - RendererTextureStorage *get_texture_storage() { return texture_storage; }; + RendererLightStorage *get_light_storage() { return light_storage; } + RendererMaterialStorage *get_material_storage() { return material_storage; } + RendererMeshStorage *get_mesh_storage() { return mesh_storage; } + RendererParticlesStorage *get_particles_storage() { return particles_storage; } + RendererTextureStorage *get_texture_storage() { return texture_storage; } + RendererGI *get_gi() { + ERR_FAIL_NULL_V(scene, nullptr); + return scene->get_gi(); + } RendererStorage *get_storage() { return storage; } RendererCanvasRender *get_canvas() { return canvas; } RendererSceneRender *get_scene() { return scene; } diff --git a/servers/rendering/renderer_rd/renderer_scene_gi_rd.cpp b/servers/rendering/renderer_rd/renderer_scene_gi_rd.cpp deleted file mode 100644 index 7aede6bb48..0000000000 --- a/servers/rendering/renderer_rd/renderer_scene_gi_rd.cpp +++ /dev/null @@ -1,3416 +0,0 @@ -/*************************************************************************/ -/* renderer_scene_gi_rd.cpp */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ - -#include "renderer_scene_gi_rd.h" - -#include "core/config/project_settings.h" -#include "servers/rendering/renderer_rd/renderer_scene_render_rd.h" -#include "servers/rendering/renderer_rd/storage_rd/material_storage.h" -#include "servers/rendering/renderer_rd/storage_rd/texture_storage.h" -#include "servers/rendering/rendering_server_default.h" - -const Vector3i RendererSceneGIRD::SDFGI::Cascade::DIRTY_ALL = Vector3i(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF); - -//////////////////////////////////////////////////////////////////////////////// -// SDFGI - -void RendererSceneGIRD::SDFGI::create(RendererSceneEnvironmentRD *p_env, const Vector3 &p_world_position, uint32_t p_requested_history_size, RendererSceneGIRD *p_gi) { - RendererRD::TextureStorage *texture_storage = RendererRD::TextureStorage::get_singleton(); - RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); - - storage = p_gi->storage; - gi = p_gi; - num_cascades = p_env->sdfgi_cascades; - min_cell_size = p_env->sdfgi_min_cell_size; - uses_occlusion = p_env->sdfgi_use_occlusion; - y_scale_mode = p_env->sdfgi_y_scale; - static const float y_scale[3] = { 2.0, 1.5, 1.0 }; - y_mult = y_scale[y_scale_mode]; - cascades.resize(num_cascades); - probe_axis_count = SDFGI::PROBE_DIVISOR + 1; - solid_cell_ratio = gi->sdfgi_solid_cell_ratio; - solid_cell_count = uint32_t(float(cascade_size * cascade_size * cascade_size) * solid_cell_ratio); - - float base_cell_size = min_cell_size; - - RD::TextureFormat tf_sdf; - tf_sdf.format = RD::DATA_FORMAT_R8_UNORM; - tf_sdf.width = cascade_size; // Always 64x64 - tf_sdf.height = cascade_size; - tf_sdf.depth = cascade_size; - tf_sdf.texture_type = RD::TEXTURE_TYPE_3D; - tf_sdf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; - - { - RD::TextureFormat tf_render = tf_sdf; - tf_render.format = RD::DATA_FORMAT_R16_UINT; - render_albedo = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); - tf_render.format = RD::DATA_FORMAT_R32_UINT; - render_emission = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); - render_emission_aniso = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); - - tf_render.format = RD::DATA_FORMAT_R8_UNORM; //at least its easy to visualize - - for (int i = 0; i < 8; i++) { - render_occlusion[i] = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); - } - - tf_render.format = RD::DATA_FORMAT_R32_UINT; - render_geom_facing = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); - - tf_render.format = RD::DATA_FORMAT_R8G8B8A8_UINT; - render_sdf[0] = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); - render_sdf[1] = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); - - tf_render.width /= 2; - tf_render.height /= 2; - tf_render.depth /= 2; - - render_sdf_half[0] = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); - render_sdf_half[1] = RD::get_singleton()->texture_create(tf_render, RD::TextureView()); - } - - RD::TextureFormat tf_occlusion = tf_sdf; - tf_occlusion.format = RD::DATA_FORMAT_R16_UINT; - tf_occlusion.shareable_formats.push_back(RD::DATA_FORMAT_R16_UINT); - tf_occlusion.shareable_formats.push_back(RD::DATA_FORMAT_R4G4B4A4_UNORM_PACK16); - tf_occlusion.depth *= cascades.size(); //use depth for occlusion slices - tf_occlusion.width *= 2; //use width for the other half - - RD::TextureFormat tf_light = tf_sdf; - tf_light.format = RD::DATA_FORMAT_R32_UINT; - tf_light.shareable_formats.push_back(RD::DATA_FORMAT_R32_UINT); - tf_light.shareable_formats.push_back(RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32); - - RD::TextureFormat tf_aniso0 = tf_sdf; - tf_aniso0.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; - RD::TextureFormat tf_aniso1 = tf_sdf; - tf_aniso1.format = RD::DATA_FORMAT_R8G8_UNORM; - - int passes = nearest_shift(cascade_size) - 1; - - //store lightprobe SH - RD::TextureFormat tf_probes; - tf_probes.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; - tf_probes.width = probe_axis_count * probe_axis_count; - tf_probes.height = probe_axis_count * SDFGI::SH_SIZE; - tf_probes.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; - tf_probes.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; - - history_size = p_requested_history_size; - - RD::TextureFormat tf_probe_history = tf_probes; - tf_probe_history.format = RD::DATA_FORMAT_R16G16B16A16_SINT; //signed integer because SH are signed - tf_probe_history.array_layers = history_size; - - RD::TextureFormat tf_probe_average = tf_probes; - tf_probe_average.format = RD::DATA_FORMAT_R32G32B32A32_SINT; //signed integer because SH are signed - tf_probe_average.texture_type = RD::TEXTURE_TYPE_2D; - - lightprobe_history_scroll = RD::get_singleton()->texture_create(tf_probe_history, RD::TextureView()); - lightprobe_average_scroll = RD::get_singleton()->texture_create(tf_probe_average, RD::TextureView()); - - { - //octahedral lightprobes - RD::TextureFormat tf_octprobes = tf_probes; - tf_octprobes.array_layers = cascades.size() * 2; - tf_octprobes.format = RD::DATA_FORMAT_R32_UINT; //pack well with RGBE - tf_octprobes.width = probe_axis_count * probe_axis_count * (SDFGI::LIGHTPROBE_OCT_SIZE + 2); - tf_octprobes.height = probe_axis_count * (SDFGI::LIGHTPROBE_OCT_SIZE + 2); - tf_octprobes.shareable_formats.push_back(RD::DATA_FORMAT_R32_UINT); - tf_octprobes.shareable_formats.push_back(RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32); - //lightprobe texture is an octahedral texture - - lightprobe_data = RD::get_singleton()->texture_create(tf_octprobes, RD::TextureView()); - RD::TextureView tv; - tv.format_override = RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32; - lightprobe_texture = RD::get_singleton()->texture_create_shared(tv, lightprobe_data); - - //texture handling ambient data, to integrate with volumetric foc - RD::TextureFormat tf_ambient = tf_probes; - tf_ambient.array_layers = cascades.size(); - tf_ambient.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; //pack well with RGBE - tf_ambient.width = probe_axis_count * probe_axis_count; - tf_ambient.height = probe_axis_count; - tf_ambient.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; - //lightprobe texture is an octahedral texture - ambient_texture = RD::get_singleton()->texture_create(tf_ambient, RD::TextureView()); - } - - cascades_ubo = RD::get_singleton()->uniform_buffer_create(sizeof(SDFGI::Cascade::UBO) * SDFGI::MAX_CASCADES); - - occlusion_data = RD::get_singleton()->texture_create(tf_occlusion, RD::TextureView()); - { - RD::TextureView tv; - tv.format_override = RD::DATA_FORMAT_R4G4B4A4_UNORM_PACK16; - occlusion_texture = RD::get_singleton()->texture_create_shared(tv, occlusion_data); - } - - for (uint32_t i = 0; i < cascades.size(); i++) { - SDFGI::Cascade &cascade = cascades[i]; - - /* 3D Textures */ - - cascade.sdf_tex = RD::get_singleton()->texture_create(tf_sdf, RD::TextureView()); - - cascade.light_data = RD::get_singleton()->texture_create(tf_light, RD::TextureView()); - - cascade.light_aniso_0_tex = RD::get_singleton()->texture_create(tf_aniso0, RD::TextureView()); - cascade.light_aniso_1_tex = RD::get_singleton()->texture_create(tf_aniso1, RD::TextureView()); - - { - RD::TextureView tv; - tv.format_override = RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32; - cascade.light_tex = RD::get_singleton()->texture_create_shared(tv, cascade.light_data); - - RD::get_singleton()->texture_clear(cascade.light_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); - RD::get_singleton()->texture_clear(cascade.light_aniso_0_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); - RD::get_singleton()->texture_clear(cascade.light_aniso_1_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); - } - - cascade.cell_size = base_cell_size; - Vector3 world_position = p_world_position; - world_position.y *= y_mult; - int32_t probe_cells = cascade_size / SDFGI::PROBE_DIVISOR; - Vector3 probe_size = Vector3(1, 1, 1) * cascade.cell_size * probe_cells; - Vector3i probe_pos = Vector3i((world_position / probe_size + Vector3(0.5, 0.5, 0.5)).floor()); - cascade.position = probe_pos * probe_cells; - - cascade.dirty_regions = SDFGI::Cascade::DIRTY_ALL; - - base_cell_size *= 2.0; - - /* Probe History */ - - cascade.lightprobe_history_tex = RD::get_singleton()->texture_create(tf_probe_history, RD::TextureView()); - RD::get_singleton()->texture_clear(cascade.lightprobe_history_tex, Color(0, 0, 0, 0), 0, 1, 0, tf_probe_history.array_layers); //needs to be cleared for average to work - - cascade.lightprobe_average_tex = RD::get_singleton()->texture_create(tf_probe_average, RD::TextureView()); - RD::get_singleton()->texture_clear(cascade.lightprobe_average_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); //needs to be cleared for average to work - - /* Buffers */ - - cascade.solid_cell_buffer = RD::get_singleton()->storage_buffer_create(sizeof(SDFGI::Cascade::SolidCell) * solid_cell_count); - cascade.solid_cell_dispatch_buffer = RD::get_singleton()->storage_buffer_create(sizeof(uint32_t) * 4, Vector(), RD::STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT); - cascade.lights_buffer = RD::get_singleton()->storage_buffer_create(sizeof(SDFGIShader::Light) * MAX(SDFGI::MAX_STATIC_LIGHTS, SDFGI::MAX_DYNAMIC_LIGHTS)); - { - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 1; - u.append_id(render_sdf[(passes & 1) ? 1 : 0]); //if passes are even, we read from buffer 0, else we read from buffer 1 - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 2; - u.append_id(render_albedo); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 3; - for (int j = 0; j < 8; j++) { - u.append_id(render_occlusion[j]); - } - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 4; - u.append_id(render_emission); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 5; - u.append_id(render_emission_aniso); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 6; - u.append_id(render_geom_facing); - uniforms.push_back(u); - } - - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 7; - u.append_id(cascade.sdf_tex); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 8; - u.append_id(occlusion_data); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.binding = 10; - u.append_id(cascade.solid_cell_dispatch_buffer); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.binding = 11; - u.append_id(cascade.solid_cell_buffer); - uniforms.push_back(u); - } - - cascade.sdf_store_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_STORE), 0); - } - - { - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 1; - u.append_id(render_albedo); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 2; - u.append_id(render_geom_facing); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 3; - u.append_id(render_emission); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 4; - u.append_id(render_emission_aniso); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.binding = 5; - u.append_id(cascade.solid_cell_dispatch_buffer); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.binding = 6; - u.append_id(cascade.solid_cell_buffer); - uniforms.push_back(u); - } - - cascade.scroll_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_SCROLL), 0); - } - { - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 1; - for (int j = 0; j < 8; j++) { - u.append_id(render_occlusion[j]); - } - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 2; - u.append_id(occlusion_data); - uniforms.push_back(u); - } - - cascade.scroll_occlusion_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_SCROLL_OCCLUSION), 0); - } - } - - //direct light - for (uint32_t i = 0; i < cascades.size(); i++) { - SDFGI::Cascade &cascade = cascades[i]; - - Vector uniforms; - { - RD::Uniform u; - u.binding = 1; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { - if (j < cascades.size()) { - u.append_id(cascades[j].sdf_tex); - } else { - u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); - } - } - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 2; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; - u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 3; - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.append_id(cascade.solid_cell_dispatch_buffer); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 4; - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.append_id(cascade.solid_cell_buffer); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 5; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.append_id(cascade.light_data); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 6; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.append_id(cascade.light_aniso_0_tex); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 7; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.append_id(cascade.light_aniso_1_tex); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 8; - u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; - u.append_id(cascades_ubo); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 9; - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.append_id(cascade.lights_buffer); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 10; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.append_id(lightprobe_texture); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 11; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.append_id(occlusion_texture); - uniforms.push_back(u); - } - - cascade.sdf_direct_light_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.direct_light.version_get_shader(gi->sdfgi_shader.direct_light_shader, 0), 0); - } - - //preprocess initialize uniform set - { - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 1; - u.append_id(render_albedo); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 2; - u.append_id(render_sdf[0]); - uniforms.push_back(u); - } - - sdf_initialize_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE), 0); - } - - { - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 1; - u.append_id(render_albedo); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 2; - u.append_id(render_sdf_half[0]); - uniforms.push_back(u); - } - - sdf_initialize_half_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE_HALF), 0); - } - - //jump flood uniform set - { - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 1; - u.append_id(render_sdf[0]); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 2; - u.append_id(render_sdf[1]); - uniforms.push_back(u); - } - - jump_flood_uniform_set[0] = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_JUMP_FLOOD), 0); - RID aux0 = uniforms.write[0].get_id(0); - RID aux1 = uniforms.write[1].get_id(0); - uniforms.write[0].set_id(0, aux1); - uniforms.write[1].set_id(0, aux0); - jump_flood_uniform_set[1] = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_JUMP_FLOOD), 0); - } - //jump flood half uniform set - { - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 1; - u.append_id(render_sdf_half[0]); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 2; - u.append_id(render_sdf_half[1]); - uniforms.push_back(u); - } - - jump_flood_half_uniform_set[0] = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_JUMP_FLOOD), 0); - RID aux0 = uniforms.write[0].get_id(0); - RID aux1 = uniforms.write[1].get_id(0); - uniforms.write[0].set_id(0, aux1); - uniforms.write[1].set_id(0, aux0); - jump_flood_half_uniform_set[1] = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_JUMP_FLOOD), 0); - } - - //upscale half size sdf - { - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 1; - u.append_id(render_albedo); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 2; - u.append_id(render_sdf_half[(passes & 1) ? 0 : 1]); //reverse pass order because half size - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 3; - u.append_id(render_sdf[(passes & 1) ? 0 : 1]); //reverse pass order because it needs an extra JFA pass - uniforms.push_back(u); - } - - upscale_jfa_uniform_set_index = (passes & 1) ? 0 : 1; - sdf_upscale_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_JUMP_FLOOD_UPSCALE), 0); - } - - //occlusion uniform set - { - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 1; - u.append_id(render_albedo); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 2; - for (int i = 0; i < 8; i++) { - u.append_id(render_occlusion[i]); - } - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 3; - u.append_id(render_geom_facing); - uniforms.push_back(u); - } - - occlusion_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.preprocess.version_get_shader(gi->sdfgi_shader.preprocess_shader, SDFGIShader::PRE_PROCESS_OCCLUSION), 0); - } - - for (uint32_t i = 0; i < cascades.size(); i++) { - //integrate uniform - - Vector uniforms; - - { - RD::Uniform u; - u.binding = 1; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { - if (j < cascades.size()) { - u.append_id(cascades[j].sdf_tex); - } else { - u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); - } - } - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 2; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { - if (j < cascades.size()) { - u.append_id(cascades[j].light_tex); - } else { - u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); - } - } - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 3; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { - if (j < cascades.size()) { - u.append_id(cascades[j].light_aniso_0_tex); - } else { - u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); - } - } - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 4; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { - if (j < cascades.size()) { - u.append_id(cascades[j].light_aniso_1_tex); - } else { - u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); - } - } - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; - u.binding = 6; - u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); - uniforms.push_back(u); - } - - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; - u.binding = 7; - u.append_id(cascades_ubo); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 8; - u.append_id(lightprobe_data); - uniforms.push_back(u); - } - - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 9; - u.append_id(cascades[i].lightprobe_history_tex); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 10; - u.append_id(cascades[i].lightprobe_average_tex); - uniforms.push_back(u); - } - - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 11; - u.append_id(lightprobe_history_scroll); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 12; - u.append_id(lightprobe_average_scroll); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 13; - RID parent_average; - if (cascades.size() == 1) { - // If there is only one SDFGI cascade, we can't use the previous cascade for blending. - parent_average = cascades[i].lightprobe_average_tex; - } else if (i < cascades.size() - 1) { - parent_average = cascades[i + 1].lightprobe_average_tex; - } else { - parent_average = cascades[i - 1].lightprobe_average_tex; //to use something, but it won't be used - } - u.append_id(parent_average); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 14; - u.append_id(ambient_texture); - uniforms.push_back(u); - } - - cascades[i].integrate_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.integrate.version_get_shader(gi->sdfgi_shader.integrate_shader, 0), 0); - } - - bounce_feedback = p_env->sdfgi_bounce_feedback; - energy = p_env->sdfgi_energy; - normal_bias = p_env->sdfgi_normal_bias; - probe_bias = p_env->sdfgi_probe_bias; - reads_sky = p_env->sdfgi_read_sky_light; -} - -void RendererSceneGIRD::SDFGI::erase() { - for (uint32_t i = 0; i < cascades.size(); i++) { - const SDFGI::Cascade &c = cascades[i]; - RD::get_singleton()->free(c.light_data); - RD::get_singleton()->free(c.light_aniso_0_tex); - RD::get_singleton()->free(c.light_aniso_1_tex); - RD::get_singleton()->free(c.sdf_tex); - RD::get_singleton()->free(c.solid_cell_dispatch_buffer); - RD::get_singleton()->free(c.solid_cell_buffer); - RD::get_singleton()->free(c.lightprobe_history_tex); - RD::get_singleton()->free(c.lightprobe_average_tex); - RD::get_singleton()->free(c.lights_buffer); - } - - RD::get_singleton()->free(render_albedo); - RD::get_singleton()->free(render_emission); - RD::get_singleton()->free(render_emission_aniso); - - RD::get_singleton()->free(render_sdf[0]); - RD::get_singleton()->free(render_sdf[1]); - - RD::get_singleton()->free(render_sdf_half[0]); - RD::get_singleton()->free(render_sdf_half[1]); - - for (int i = 0; i < 8; i++) { - RD::get_singleton()->free(render_occlusion[i]); - } - - RD::get_singleton()->free(render_geom_facing); - - RD::get_singleton()->free(lightprobe_data); - RD::get_singleton()->free(lightprobe_history_scroll); - RD::get_singleton()->free(occlusion_data); - RD::get_singleton()->free(ambient_texture); - - RD::get_singleton()->free(cascades_ubo); -} - -void RendererSceneGIRD::SDFGI::update(RendererSceneEnvironmentRD *p_env, const Vector3 &p_world_position) { - bounce_feedback = p_env->sdfgi_bounce_feedback; - energy = p_env->sdfgi_energy; - normal_bias = p_env->sdfgi_normal_bias; - probe_bias = p_env->sdfgi_probe_bias; - reads_sky = p_env->sdfgi_read_sky_light; - - int32_t drag_margin = (cascade_size / SDFGI::PROBE_DIVISOR) / 2; - - for (uint32_t i = 0; i < cascades.size(); i++) { - SDFGI::Cascade &cascade = cascades[i]; - cascade.dirty_regions = Vector3i(); - - Vector3 probe_half_size = Vector3(1, 1, 1) * cascade.cell_size * float(cascade_size / SDFGI::PROBE_DIVISOR) * 0.5; - probe_half_size = Vector3(0, 0, 0); - - Vector3 world_position = p_world_position; - world_position.y *= y_mult; - Vector3i pos_in_cascade = Vector3i((world_position + probe_half_size) / cascade.cell_size); - - for (int j = 0; j < 3; j++) { - if (pos_in_cascade[j] < cascade.position[j]) { - while (pos_in_cascade[j] < (cascade.position[j] - drag_margin)) { - cascade.position[j] -= drag_margin * 2; - cascade.dirty_regions[j] += drag_margin * 2; - } - } else if (pos_in_cascade[j] > cascade.position[j]) { - while (pos_in_cascade[j] > (cascade.position[j] + drag_margin)) { - cascade.position[j] += drag_margin * 2; - cascade.dirty_regions[j] -= drag_margin * 2; - } - } - - if (cascade.dirty_regions[j] == 0) { - continue; // not dirty - } else if (uint32_t(ABS(cascade.dirty_regions[j])) >= cascade_size) { - //moved too much, just redraw everything (make all dirty) - cascade.dirty_regions = SDFGI::Cascade::DIRTY_ALL; - break; - } - } - - if (cascade.dirty_regions != Vector3i() && cascade.dirty_regions != SDFGI::Cascade::DIRTY_ALL) { - //see how much the total dirty volume represents from the total volume - uint32_t total_volume = cascade_size * cascade_size * cascade_size; - uint32_t safe_volume = 1; - for (int j = 0; j < 3; j++) { - safe_volume *= cascade_size - ABS(cascade.dirty_regions[j]); - } - uint32_t dirty_volume = total_volume - safe_volume; - if (dirty_volume > (safe_volume / 2)) { - //more than half the volume is dirty, make all dirty so its only rendered once - cascade.dirty_regions = SDFGI::Cascade::DIRTY_ALL; - } - } - } -} - -void RendererSceneGIRD::SDFGI::update_light() { - RD::get_singleton()->draw_command_begin_label("SDFGI Update dynamic Light"); - - /* Update dynamic light */ - - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.direct_light_pipeline[SDFGIShader::DIRECT_LIGHT_MODE_DYNAMIC]); - - SDFGIShader::DirectLightPushConstant push_constant; - - push_constant.grid_size[0] = cascade_size; - push_constant.grid_size[1] = cascade_size; - push_constant.grid_size[2] = cascade_size; - push_constant.max_cascades = cascades.size(); - push_constant.probe_axis_size = probe_axis_count; - push_constant.bounce_feedback = bounce_feedback; - push_constant.y_mult = y_mult; - push_constant.use_occlusion = uses_occlusion; - - for (uint32_t i = 0; i < cascades.size(); i++) { - SDFGI::Cascade &cascade = cascades[i]; - push_constant.light_count = cascade_dynamic_light_count[i]; - push_constant.cascade = i; - - if (cascades[i].all_dynamic_lights_dirty || gi->sdfgi_frames_to_update_light == RS::ENV_SDFGI_UPDATE_LIGHT_IN_1_FRAME) { - push_constant.process_offset = 0; - push_constant.process_increment = 1; - } else { - static const uint32_t frames_to_update_table[RS::ENV_SDFGI_UPDATE_LIGHT_MAX] = { - 1, 2, 4, 8, 16 - }; - - uint32_t frames_to_update = frames_to_update_table[gi->sdfgi_frames_to_update_light]; - - push_constant.process_offset = RSG::rasterizer->get_frame_number() % frames_to_update; - push_constant.process_increment = frames_to_update; - } - cascades[i].all_dynamic_lights_dirty = false; - - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascade.sdf_direct_light_uniform_set, 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::DirectLightPushConstant)); - RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cascade.solid_cell_dispatch_buffer, 0); - } - RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_COMPUTE); - RD::get_singleton()->draw_command_end_label(); -} - -void RendererSceneGIRD::SDFGI::update_probes(RendererSceneEnvironmentRD *p_env, RendererSceneSkyRD::Sky *p_sky) { - RD::get_singleton()->draw_command_begin_label("SDFGI Update Probes"); - - SDFGIShader::IntegratePushConstant push_constant; - push_constant.grid_size[1] = cascade_size; - push_constant.grid_size[2] = cascade_size; - push_constant.grid_size[0] = cascade_size; - push_constant.max_cascades = cascades.size(); - push_constant.probe_axis_size = probe_axis_count; - push_constant.history_index = render_pass % history_size; - push_constant.history_size = history_size; - static const uint32_t ray_count[RS::ENV_SDFGI_RAY_COUNT_MAX] = { 4, 8, 16, 32, 64, 96, 128 }; - push_constant.ray_count = ray_count[gi->sdfgi_ray_count]; - push_constant.ray_bias = probe_bias; - push_constant.image_size[0] = probe_axis_count * probe_axis_count; - push_constant.image_size[1] = probe_axis_count; - push_constant.store_ambient_texture = p_env->volumetric_fog_enabled; - - RID sky_uniform_set = gi->sdfgi_shader.integrate_default_sky_uniform_set; - push_constant.sky_mode = SDFGIShader::IntegratePushConstant::SKY_MODE_DISABLED; - push_constant.y_mult = y_mult; - - if (reads_sky && p_env) { - push_constant.sky_energy = p_env->bg_energy; - - if (p_env->background == RS::ENV_BG_CLEAR_COLOR) { - push_constant.sky_mode = SDFGIShader::IntegratePushConstant::SKY_MODE_COLOR; - Color c = storage->get_default_clear_color().srgb_to_linear(); - push_constant.sky_color[0] = c.r; - push_constant.sky_color[1] = c.g; - push_constant.sky_color[2] = c.b; - } else if (p_env->background == RS::ENV_BG_COLOR) { - push_constant.sky_mode = SDFGIShader::IntegratePushConstant::SKY_MODE_COLOR; - Color c = p_env->bg_color; - push_constant.sky_color[0] = c.r; - push_constant.sky_color[1] = c.g; - push_constant.sky_color[2] = c.b; - - } else if (p_env->background == RS::ENV_BG_SKY) { - if (p_sky && p_sky->radiance.is_valid()) { - if (integrate_sky_uniform_set.is_null() || !RD::get_singleton()->uniform_set_is_valid(integrate_sky_uniform_set)) { - Vector uniforms; - - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.binding = 0; - u.append_id(p_sky->radiance); - uniforms.push_back(u); - } - - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; - u.binding = 1; - u.append_id(RendererRD::MaterialStorage::get_singleton()->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); - uniforms.push_back(u); - } - - integrate_sky_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.integrate.version_get_shader(gi->sdfgi_shader.integrate_shader, 0), 1); - } - sky_uniform_set = integrate_sky_uniform_set; - push_constant.sky_mode = SDFGIShader::IntegratePushConstant::SKY_MODE_SKY; - } - } - } - - render_pass++; - - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(true); - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.integrate_pipeline[SDFGIShader::INTEGRATE_MODE_PROCESS]); - - int32_t probe_divisor = cascade_size / SDFGI::PROBE_DIVISOR; - for (uint32_t i = 0; i < cascades.size(); i++) { - push_constant.cascade = i; - push_constant.world_offset[0] = cascades[i].position.x / probe_divisor; - push_constant.world_offset[1] = cascades[i].position.y / probe_divisor; - push_constant.world_offset[2] = cascades[i].position.z / probe_divisor; - - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascades[i].integrate_uniform_set, 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sky_uniform_set, 1); - - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::IntegratePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, probe_axis_count * probe_axis_count, probe_axis_count, 1); - } - - //end later after raster to avoid barriering on layout changes - //RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); - - RD::get_singleton()->draw_command_end_label(); -} - -void RendererSceneGIRD::SDFGI::store_probes() { - RD::get_singleton()->barrier(RD::BARRIER_MASK_COMPUTE, RD::BARRIER_MASK_COMPUTE); - RD::get_singleton()->draw_command_begin_label("SDFGI Store Probes"); - - SDFGIShader::IntegratePushConstant push_constant; - push_constant.grid_size[1] = cascade_size; - push_constant.grid_size[2] = cascade_size; - push_constant.grid_size[0] = cascade_size; - push_constant.max_cascades = cascades.size(); - push_constant.probe_axis_size = probe_axis_count; - push_constant.history_index = render_pass % history_size; - push_constant.history_size = history_size; - static const uint32_t ray_count[RS::ENV_SDFGI_RAY_COUNT_MAX] = { 4, 8, 16, 32, 64, 96, 128 }; - push_constant.ray_count = ray_count[gi->sdfgi_ray_count]; - push_constant.ray_bias = probe_bias; - push_constant.image_size[0] = probe_axis_count * probe_axis_count; - push_constant.image_size[1] = probe_axis_count; - push_constant.store_ambient_texture = false; - - push_constant.sky_mode = 0; - push_constant.y_mult = y_mult; - - // Then store values into the lightprobe texture. Separating these steps has a small performance hit, but it allows for multiple bounces - RENDER_TIMESTAMP("Average SDFGI Probes"); - - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.integrate_pipeline[SDFGIShader::INTEGRATE_MODE_STORE]); - - //convert to octahedral to store - push_constant.image_size[0] *= SDFGI::LIGHTPROBE_OCT_SIZE; - push_constant.image_size[1] *= SDFGI::LIGHTPROBE_OCT_SIZE; - - for (uint32_t i = 0; i < cascades.size(); i++) { - push_constant.cascade = i; - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascades[i].integrate_uniform_set, 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, gi->sdfgi_shader.integrate_default_sky_uniform_set, 1); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::IntegratePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, probe_axis_count * probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, 1); - } - - RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_COMPUTE); - - RD::get_singleton()->draw_command_end_label(); -} - -int RendererSceneGIRD::SDFGI::get_pending_region_data(int p_region, Vector3i &r_local_offset, Vector3i &r_local_size, AABB &r_bounds) const { - int dirty_count = 0; - for (uint32_t i = 0; i < cascades.size(); i++) { - const SDFGI::Cascade &c = cascades[i]; - - if (c.dirty_regions == SDFGI::Cascade::DIRTY_ALL) { - if (dirty_count == p_region) { - r_local_offset = Vector3i(); - r_local_size = Vector3i(1, 1, 1) * cascade_size; - - r_bounds.position = Vector3((Vector3i(1, 1, 1) * -int32_t(cascade_size >> 1) + c.position)) * c.cell_size * Vector3(1, 1.0 / y_mult, 1); - r_bounds.size = Vector3(r_local_size) * c.cell_size * Vector3(1, 1.0 / y_mult, 1); - return i; - } - dirty_count++; - } else { - for (int j = 0; j < 3; j++) { - if (c.dirty_regions[j] != 0) { - if (dirty_count == p_region) { - Vector3i from = Vector3i(0, 0, 0); - Vector3i to = Vector3i(1, 1, 1) * cascade_size; - - if (c.dirty_regions[j] > 0) { - //fill from the beginning - to[j] = c.dirty_regions[j]; - } else { - //fill from the end - from[j] = to[j] + c.dirty_regions[j]; - } - - for (int k = 0; k < j; k++) { - // "chip" away previous regions to avoid re-voxelizing the same thing - if (c.dirty_regions[k] > 0) { - from[k] += c.dirty_regions[k]; - } else if (c.dirty_regions[k] < 0) { - to[k] += c.dirty_regions[k]; - } - } - - r_local_offset = from; - r_local_size = to - from; - - r_bounds.position = Vector3(from + Vector3i(1, 1, 1) * -int32_t(cascade_size >> 1) + c.position) * c.cell_size * Vector3(1, 1.0 / y_mult, 1); - r_bounds.size = Vector3(r_local_size) * c.cell_size * Vector3(1, 1.0 / y_mult, 1); - - return i; - } - - dirty_count++; - } - } - } - } - return -1; -} - -void RendererSceneGIRD::SDFGI::update_cascades() { - //update cascades - SDFGI::Cascade::UBO cascade_data[SDFGI::MAX_CASCADES]; - int32_t probe_divisor = cascade_size / SDFGI::PROBE_DIVISOR; - - for (uint32_t i = 0; i < cascades.size(); i++) { - Vector3 pos = Vector3((Vector3i(1, 1, 1) * -int32_t(cascade_size >> 1) + cascades[i].position)) * cascades[i].cell_size; - - cascade_data[i].offset[0] = pos.x; - cascade_data[i].offset[1] = pos.y; - cascade_data[i].offset[2] = pos.z; - cascade_data[i].to_cell = 1.0 / cascades[i].cell_size; - cascade_data[i].probe_offset[0] = cascades[i].position.x / probe_divisor; - cascade_data[i].probe_offset[1] = cascades[i].position.y / probe_divisor; - cascade_data[i].probe_offset[2] = cascades[i].position.z / probe_divisor; - cascade_data[i].pad = 0; - } - - RD::get_singleton()->buffer_update(cascades_ubo, 0, sizeof(SDFGI::Cascade::UBO) * SDFGI::MAX_CASCADES, cascade_data, RD::BARRIER_MASK_COMPUTE); -} - -void RendererSceneGIRD::SDFGI::debug_draw(const CameraMatrix &p_projection, const Transform3D &p_transform, int p_width, int p_height, RID p_render_target, RID p_texture) { - RendererRD::TextureStorage *texture_storage = RendererRD::TextureStorage::get_singleton(); - RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); - RendererRD::CopyEffects *copy_effects = RendererRD::CopyEffects::get_singleton(); - - if (!debug_uniform_set.is_valid() || !RD::get_singleton()->uniform_set_is_valid(debug_uniform_set)) { - Vector uniforms; - { - RD::Uniform u; - u.binding = 1; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - for (uint32_t i = 0; i < SDFGI::MAX_CASCADES; i++) { - if (i < cascades.size()) { - u.append_id(cascades[i].sdf_tex); - } else { - u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); - } - } - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 2; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - for (uint32_t i = 0; i < SDFGI::MAX_CASCADES; i++) { - if (i < cascades.size()) { - u.append_id(cascades[i].light_tex); - } else { - u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); - } - } - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 3; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - for (uint32_t i = 0; i < SDFGI::MAX_CASCADES; i++) { - if (i < cascades.size()) { - u.append_id(cascades[i].light_aniso_0_tex); - } else { - u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); - } - } - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 4; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - for (uint32_t i = 0; i < SDFGI::MAX_CASCADES; i++) { - if (i < cascades.size()) { - u.append_id(cascades[i].light_aniso_1_tex); - } else { - u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); - } - } - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 5; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.append_id(occlusion_texture); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 8; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; - u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 9; - u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; - u.append_id(cascades_ubo); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 10; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.append_id(p_texture); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 11; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.append_id(lightprobe_texture); - uniforms.push_back(u); - } - debug_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.debug_shader_version, 0); - } - - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.debug_pipeline); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, debug_uniform_set, 0); - - SDFGIShader::DebugPushConstant push_constant; - push_constant.grid_size[0] = cascade_size; - push_constant.grid_size[1] = cascade_size; - push_constant.grid_size[2] = cascade_size; - push_constant.max_cascades = cascades.size(); - push_constant.screen_size[0] = p_width; - push_constant.screen_size[1] = p_height; - push_constant.probe_axis_size = probe_axis_count; - push_constant.use_occlusion = uses_occlusion; - push_constant.y_mult = y_mult; - - Vector2 vp_half = p_projection.get_viewport_half_extents(); - push_constant.cam_extent[0] = vp_half.x; - push_constant.cam_extent[1] = vp_half.y; - push_constant.cam_extent[2] = -p_projection.get_z_near(); - - push_constant.cam_transform[0] = p_transform.basis.rows[0][0]; - push_constant.cam_transform[1] = p_transform.basis.rows[1][0]; - push_constant.cam_transform[2] = p_transform.basis.rows[2][0]; - push_constant.cam_transform[3] = 0; - push_constant.cam_transform[4] = p_transform.basis.rows[0][1]; - push_constant.cam_transform[5] = p_transform.basis.rows[1][1]; - push_constant.cam_transform[6] = p_transform.basis.rows[2][1]; - push_constant.cam_transform[7] = 0; - push_constant.cam_transform[8] = p_transform.basis.rows[0][2]; - push_constant.cam_transform[9] = p_transform.basis.rows[1][2]; - push_constant.cam_transform[10] = p_transform.basis.rows[2][2]; - push_constant.cam_transform[11] = 0; - push_constant.cam_transform[12] = p_transform.origin.x; - push_constant.cam_transform[13] = p_transform.origin.y; - push_constant.cam_transform[14] = p_transform.origin.z; - push_constant.cam_transform[15] = 1; - - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::DebugPushConstant)); - - RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_width, p_height, 1); - RD::get_singleton()->compute_list_end(); - - Size2 rtsize = texture_storage->render_target_get_size(p_render_target); - copy_effects->copy_to_fb_rect(p_texture, texture_storage->render_target_get_rd_framebuffer(p_render_target), Rect2(Vector2(), rtsize), true); -} - -void RendererSceneGIRD::SDFGI::debug_probes(RD::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform) { - RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); - - SDFGIShader::DebugProbesPushConstant push_constant; - - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - push_constant.projection[i * 4 + j] = p_camera_with_transform.matrix[i][j]; - } - } - - //gen spheres from strips - uint32_t band_points = 16; - push_constant.band_power = 4; - push_constant.sections_in_band = ((band_points / 2) - 1); - push_constant.band_mask = band_points - 2; - push_constant.section_arc = Math_TAU / float(push_constant.sections_in_band); - push_constant.y_mult = y_mult; - - uint32_t total_points = push_constant.sections_in_band * band_points; - uint32_t total_probes = probe_axis_count * probe_axis_count * probe_axis_count; - - push_constant.grid_size[0] = cascade_size; - push_constant.grid_size[1] = cascade_size; - push_constant.grid_size[2] = cascade_size; - push_constant.cascade = 0; - - push_constant.probe_axis_size = probe_axis_count; - - if (!debug_probes_uniform_set.is_valid() || !RD::get_singleton()->uniform_set_is_valid(debug_probes_uniform_set)) { - Vector uniforms; - { - RD::Uniform u; - u.binding = 1; - u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; - u.append_id(cascades_ubo); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 2; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.append_id(lightprobe_texture); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 3; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; - u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 4; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.append_id(occlusion_texture); - uniforms.push_back(u); - } - - debug_probes_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->sdfgi_shader.debug_probes.version_get_shader(gi->sdfgi_shader.debug_probes_shader, 0), 0); - } - - RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, gi->sdfgi_shader.debug_probes_pipeline[SDFGIShader::PROBE_DEBUG_PROBES].get_render_pipeline(RD::INVALID_FORMAT_ID, RD::get_singleton()->framebuffer_get_format(p_framebuffer))); - RD::get_singleton()->draw_list_bind_uniform_set(p_draw_list, debug_probes_uniform_set, 0); - RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(SDFGIShader::DebugProbesPushConstant)); - RD::get_singleton()->draw_list_draw(p_draw_list, false, total_probes, total_points); - - if (gi->sdfgi_debug_probe_dir != Vector3()) { - uint32_t cascade = 0; - Vector3 offset = Vector3((Vector3i(1, 1, 1) * -int32_t(cascade_size >> 1) + cascades[cascade].position)) * cascades[cascade].cell_size * Vector3(1.0, 1.0 / y_mult, 1.0); - Vector3 probe_size = cascades[cascade].cell_size * (cascade_size / SDFGI::PROBE_DIVISOR) * Vector3(1.0, 1.0 / y_mult, 1.0); - Vector3 ray_from = gi->sdfgi_debug_probe_pos; - Vector3 ray_to = gi->sdfgi_debug_probe_pos + gi->sdfgi_debug_probe_dir * cascades[cascade].cell_size * Math::sqrt(3.0) * cascade_size; - float sphere_radius = 0.2; - float closest_dist = 1e20; - gi->sdfgi_debug_probe_enabled = false; - - Vector3i probe_from = cascades[cascade].position / (cascade_size / SDFGI::PROBE_DIVISOR); - for (int i = 0; i < (SDFGI::PROBE_DIVISOR + 1); i++) { - for (int j = 0; j < (SDFGI::PROBE_DIVISOR + 1); j++) { - for (int k = 0; k < (SDFGI::PROBE_DIVISOR + 1); k++) { - Vector3 pos = offset + probe_size * Vector3(i, j, k); - Vector3 res; - if (Geometry3D::segment_intersects_sphere(ray_from, ray_to, pos, sphere_radius, &res)) { - float d = ray_from.distance_to(res); - if (d < closest_dist) { - closest_dist = d; - gi->sdfgi_debug_probe_enabled = true; - gi->sdfgi_debug_probe_index = probe_from + Vector3i(i, j, k); - } - } - } - } - } - - gi->sdfgi_debug_probe_dir = Vector3(); - } - - if (gi->sdfgi_debug_probe_enabled) { - uint32_t cascade = 0; - uint32_t probe_cells = (cascade_size / SDFGI::PROBE_DIVISOR); - Vector3i probe_from = cascades[cascade].position / probe_cells; - Vector3i ofs = gi->sdfgi_debug_probe_index - probe_from; - if (ofs.x < 0 || ofs.y < 0 || ofs.z < 0) { - return; - } - if (ofs.x > SDFGI::PROBE_DIVISOR || ofs.y > SDFGI::PROBE_DIVISOR || ofs.z > SDFGI::PROBE_DIVISOR) { - return; - } - - uint32_t mult = (SDFGI::PROBE_DIVISOR + 1); - uint32_t index = ofs.z * mult * mult + ofs.y * mult + ofs.x; - - push_constant.probe_debug_index = index; - - uint32_t cell_count = probe_cells * 2 * probe_cells * 2 * probe_cells * 2; - - RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, gi->sdfgi_shader.debug_probes_pipeline[SDFGIShader::PROBE_DEBUG_VISIBILITY].get_render_pipeline(RD::INVALID_FORMAT_ID, RD::get_singleton()->framebuffer_get_format(p_framebuffer))); - RD::get_singleton()->draw_list_bind_uniform_set(p_draw_list, debug_probes_uniform_set, 0); - RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(SDFGIShader::DebugProbesPushConstant)); - RD::get_singleton()->draw_list_draw(p_draw_list, false, cell_count, total_points); - } -} - -void RendererSceneGIRD::SDFGI::pre_process_gi(const Transform3D &p_transform, RenderDataRD *p_render_data, RendererSceneRenderRD *p_scene_render) { - /* Update general SDFGI Buffer */ - - SDFGIData sdfgi_data; - - sdfgi_data.grid_size[0] = cascade_size; - sdfgi_data.grid_size[1] = cascade_size; - sdfgi_data.grid_size[2] = cascade_size; - - sdfgi_data.max_cascades = cascades.size(); - sdfgi_data.probe_axis_size = probe_axis_count; - sdfgi_data.cascade_probe_size[0] = sdfgi_data.probe_axis_size - 1; //float version for performance - sdfgi_data.cascade_probe_size[1] = sdfgi_data.probe_axis_size - 1; - sdfgi_data.cascade_probe_size[2] = sdfgi_data.probe_axis_size - 1; - - float csize = cascade_size; - sdfgi_data.probe_to_uvw = 1.0 / float(sdfgi_data.cascade_probe_size[0]); - sdfgi_data.use_occlusion = uses_occlusion; - //sdfgi_data.energy = energy; - - sdfgi_data.y_mult = y_mult; - - float cascade_voxel_size = (csize / sdfgi_data.cascade_probe_size[0]); - float occlusion_clamp = (cascade_voxel_size - 0.5) / cascade_voxel_size; - sdfgi_data.occlusion_clamp[0] = occlusion_clamp; - sdfgi_data.occlusion_clamp[1] = occlusion_clamp; - sdfgi_data.occlusion_clamp[2] = occlusion_clamp; - sdfgi_data.normal_bias = (normal_bias / csize) * sdfgi_data.cascade_probe_size[0]; - - //vec2 tex_pixel_size = 1.0 / vec2(ivec2( (OCT_SIZE+2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE+2) * params.probe_axis_size ) ); - //vec3 probe_uv_offset = (ivec3(OCT_SIZE+2,OCT_SIZE+2,(OCT_SIZE+2) * params.probe_axis_size)) * tex_pixel_size.xyx; - - uint32_t oct_size = SDFGI::LIGHTPROBE_OCT_SIZE; - - sdfgi_data.lightprobe_tex_pixel_size[0] = 1.0 / ((oct_size + 2) * sdfgi_data.probe_axis_size * sdfgi_data.probe_axis_size); - sdfgi_data.lightprobe_tex_pixel_size[1] = 1.0 / ((oct_size + 2) * sdfgi_data.probe_axis_size); - sdfgi_data.lightprobe_tex_pixel_size[2] = 1.0; - - sdfgi_data.energy = energy; - - sdfgi_data.lightprobe_uv_offset[0] = float(oct_size + 2) * sdfgi_data.lightprobe_tex_pixel_size[0]; - sdfgi_data.lightprobe_uv_offset[1] = float(oct_size + 2) * sdfgi_data.lightprobe_tex_pixel_size[1]; - sdfgi_data.lightprobe_uv_offset[2] = float((oct_size + 2) * sdfgi_data.probe_axis_size) * sdfgi_data.lightprobe_tex_pixel_size[0]; - - sdfgi_data.occlusion_renormalize[0] = 0.5; - sdfgi_data.occlusion_renormalize[1] = 1.0; - sdfgi_data.occlusion_renormalize[2] = 1.0 / float(sdfgi_data.max_cascades); - - int32_t probe_divisor = cascade_size / SDFGI::PROBE_DIVISOR; - - for (uint32_t i = 0; i < sdfgi_data.max_cascades; i++) { - SDFGIData::ProbeCascadeData &c = sdfgi_data.cascades[i]; - Vector3 pos = Vector3((Vector3i(1, 1, 1) * -int32_t(cascade_size >> 1) + cascades[i].position)) * cascades[i].cell_size; - Vector3 cam_origin = p_transform.origin; - cam_origin.y *= y_mult; - pos -= cam_origin; //make pos local to camera, to reduce numerical error - c.position[0] = pos.x; - c.position[1] = pos.y; - c.position[2] = pos.z; - c.to_probe = 1.0 / (float(cascade_size) * cascades[i].cell_size / float(probe_axis_count - 1)); - - Vector3i probe_ofs = cascades[i].position / probe_divisor; - c.probe_world_offset[0] = probe_ofs.x; - c.probe_world_offset[1] = probe_ofs.y; - c.probe_world_offset[2] = probe_ofs.z; - - c.to_cell = 1.0 / cascades[i].cell_size; - } - - RD::get_singleton()->buffer_update(gi->sdfgi_ubo, 0, sizeof(SDFGIData), &sdfgi_data, RD::BARRIER_MASK_COMPUTE); - - /* Update dynamic lights in SDFGI cascades */ - - for (uint32_t i = 0; i < cascades.size(); i++) { - SDFGI::Cascade &cascade = cascades[i]; - - SDFGIShader::Light lights[SDFGI::MAX_DYNAMIC_LIGHTS]; - uint32_t idx = 0; - for (uint32_t j = 0; j < (uint32_t)p_scene_render->render_state.sdfgi_update_data->directional_lights->size(); j++) { - if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) { - break; - } - - RendererSceneRenderRD::LightInstance *li = p_scene_render->light_instance_owner.get_or_null(p_scene_render->render_state.sdfgi_update_data->directional_lights->get(j)); - ERR_CONTINUE(!li); - - if (RSG::light_storage->light_directional_get_sky_mode(li->light) == RS::LIGHT_DIRECTIONAL_SKY_MODE_SKY_ONLY) { - continue; - } - - Vector3 dir = -li->transform.basis.get_column(Vector3::AXIS_Z); - dir.y *= y_mult; - dir.normalize(); - lights[idx].direction[0] = dir.x; - lights[idx].direction[1] = dir.y; - lights[idx].direction[2] = dir.z; - Color color = RSG::light_storage->light_get_color(li->light); - color = color.srgb_to_linear(); - lights[idx].color[0] = color.r; - lights[idx].color[1] = color.g; - lights[idx].color[2] = color.b; - lights[idx].type = RS::LIGHT_DIRECTIONAL; - lights[idx].energy = RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY) * RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_INDIRECT_ENERGY); - lights[idx].has_shadow = RSG::light_storage->light_has_shadow(li->light); - - idx++; - } - - AABB cascade_aabb; - cascade_aabb.position = Vector3((Vector3i(1, 1, 1) * -int32_t(cascade_size >> 1) + cascade.position)) * cascade.cell_size; - cascade_aabb.size = Vector3(1, 1, 1) * cascade_size * cascade.cell_size; - - for (uint32_t j = 0; j < p_scene_render->render_state.sdfgi_update_data->positional_light_count; j++) { - if (idx == SDFGI::MAX_DYNAMIC_LIGHTS) { - break; - } - - RendererSceneRenderRD::LightInstance *li = p_scene_render->light_instance_owner.get_or_null(p_scene_render->render_state.sdfgi_update_data->positional_light_instances[j]); - ERR_CONTINUE(!li); - - uint32_t max_sdfgi_cascade = RSG::light_storage->light_get_max_sdfgi_cascade(li->light); - if (i > max_sdfgi_cascade) { - continue; - } - - if (!cascade_aabb.intersects(li->aabb)) { - continue; - } - - Vector3 dir = -li->transform.basis.get_column(Vector3::AXIS_Z); - //faster to not do this here - //dir.y *= y_mult; - //dir.normalize(); - lights[idx].direction[0] = dir.x; - lights[idx].direction[1] = dir.y; - lights[idx].direction[2] = dir.z; - Vector3 pos = li->transform.origin; - pos.y *= y_mult; - lights[idx].position[0] = pos.x; - lights[idx].position[1] = pos.y; - lights[idx].position[2] = pos.z; - Color color = RSG::light_storage->light_get_color(li->light); - color = color.srgb_to_linear(); - lights[idx].color[0] = color.r; - lights[idx].color[1] = color.g; - lights[idx].color[2] = color.b; - lights[idx].type = RSG::light_storage->light_get_type(li->light); - lights[idx].energy = RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY) * RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_INDIRECT_ENERGY); - lights[idx].has_shadow = RSG::light_storage->light_has_shadow(li->light); - lights[idx].attenuation = RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_ATTENUATION); - lights[idx].radius = RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_RANGE); - lights[idx].cos_spot_angle = Math::cos(Math::deg2rad(RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ANGLE))); - lights[idx].inv_spot_attenuation = 1.0f / RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ATTENUATION); - - idx++; - } - - if (idx > 0) { - RD::get_singleton()->buffer_update(cascade.lights_buffer, 0, idx * sizeof(SDFGIShader::Light), lights, RD::BARRIER_MASK_COMPUTE); - } - - cascade_dynamic_light_count[i] = idx; - } -} - -void RendererSceneGIRD::SDFGI::render_region(RID p_render_buffers, int p_region, const PagedArray &p_instances, RendererSceneRenderRD *p_scene_render) { - //print_line("rendering region " + itos(p_region)); - RendererSceneRenderRD::RenderBuffers *rb = p_scene_render->render_buffers_owner.get_or_null(p_render_buffers); - ERR_FAIL_COND(!rb); // we wouldn't be here if this failed but... - AABB bounds; - Vector3i from; - Vector3i size; - - int cascade_prev = get_pending_region_data(p_region - 1, from, size, bounds); - int cascade_next = get_pending_region_data(p_region + 1, from, size, bounds); - int cascade = get_pending_region_data(p_region, from, size, bounds); - ERR_FAIL_COND(cascade < 0); - - if (cascade_prev != cascade) { - //initialize render - RD::get_singleton()->texture_clear(render_albedo, Color(0, 0, 0, 0), 0, 1, 0, 1); - RD::get_singleton()->texture_clear(render_emission, Color(0, 0, 0, 0), 0, 1, 0, 1); - RD::get_singleton()->texture_clear(render_emission_aniso, Color(0, 0, 0, 0), 0, 1, 0, 1); - RD::get_singleton()->texture_clear(render_geom_facing, Color(0, 0, 0, 0), 0, 1, 0, 1); - } - - //print_line("rendering cascade " + itos(p_region) + " objects: " + itos(p_cull_count) + " bounds: " + bounds + " from: " + from + " size: " + size + " cell size: " + rtos(cascades[cascade].cell_size)); - p_scene_render->_render_sdfgi(p_render_buffers, from, size, bounds, p_instances, render_albedo, render_emission, render_emission_aniso, render_geom_facing); - - if (cascade_next != cascade) { - RD::get_singleton()->draw_command_begin_label("SDFGI Pre-Process Cascade"); - - RENDER_TIMESTAMP("> SDFGI Update SDF"); - //done rendering! must update SDF - //clear dispatch indirect data - - SDFGIShader::PreprocessPushConstant push_constant; - memset(&push_constant, 0, sizeof(SDFGIShader::PreprocessPushConstant)); - - RENDER_TIMESTAMP("SDFGI Scroll SDF"); - - //scroll - if (cascades[cascade].dirty_regions != SDFGI::Cascade::DIRTY_ALL) { - //for scroll - Vector3i dirty = cascades[cascade].dirty_regions; - push_constant.scroll[0] = dirty.x; - push_constant.scroll[1] = dirty.y; - push_constant.scroll[2] = dirty.z; - } else { - //for no scroll - push_constant.scroll[0] = 0; - push_constant.scroll[1] = 0; - push_constant.scroll[2] = 0; - } - - cascades[cascade].all_dynamic_lights_dirty = true; - - push_constant.grid_size = cascade_size; - push_constant.cascade = cascade; - - if (cascades[cascade].dirty_regions != SDFGI::Cascade::DIRTY_ALL) { - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - - //must pre scroll existing data because not all is dirty - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_SCROLL]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascades[cascade].scroll_uniform_set, 0); - - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cascades[cascade].solid_cell_dispatch_buffer, 0); - // no barrier do all together - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_SCROLL_OCCLUSION]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascades[cascade].scroll_occlusion_uniform_set, 0); - - Vector3i dirty = cascades[cascade].dirty_regions; - Vector3i groups; - groups.x = cascade_size - ABS(dirty.x); - groups.y = cascade_size - ABS(dirty.y); - groups.z = cascade_size - ABS(dirty.z); - - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, groups.x, groups.y, groups.z); - - //no barrier, continue together - - { - //scroll probes and their history also - - SDFGIShader::IntegratePushConstant ipush_constant; - ipush_constant.grid_size[1] = cascade_size; - ipush_constant.grid_size[2] = cascade_size; - ipush_constant.grid_size[0] = cascade_size; - ipush_constant.max_cascades = cascades.size(); - ipush_constant.probe_axis_size = probe_axis_count; - ipush_constant.history_index = 0; - ipush_constant.history_size = history_size; - ipush_constant.ray_count = 0; - ipush_constant.ray_bias = 0; - ipush_constant.sky_mode = 0; - ipush_constant.sky_energy = 0; - ipush_constant.sky_color[0] = 0; - ipush_constant.sky_color[1] = 0; - ipush_constant.sky_color[2] = 0; - ipush_constant.y_mult = y_mult; - ipush_constant.store_ambient_texture = false; - - ipush_constant.image_size[0] = probe_axis_count * probe_axis_count; - ipush_constant.image_size[1] = probe_axis_count; - - int32_t probe_divisor = cascade_size / SDFGI::PROBE_DIVISOR; - ipush_constant.cascade = cascade; - ipush_constant.world_offset[0] = cascades[cascade].position.x / probe_divisor; - ipush_constant.world_offset[1] = cascades[cascade].position.y / probe_divisor; - ipush_constant.world_offset[2] = cascades[cascade].position.z / probe_divisor; - - ipush_constant.scroll[0] = dirty.x / probe_divisor; - ipush_constant.scroll[1] = dirty.y / probe_divisor; - ipush_constant.scroll[2] = dirty.z / probe_divisor; - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.integrate_pipeline[SDFGIShader::INTEGRATE_MODE_SCROLL]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascades[cascade].integrate_uniform_set, 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, gi->sdfgi_shader.integrate_default_sky_uniform_set, 1); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ipush_constant, sizeof(SDFGIShader::IntegratePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, probe_axis_count * probe_axis_count, probe_axis_count, 1); - - RD::get_singleton()->compute_list_add_barrier(compute_list); - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.integrate_pipeline[SDFGIShader::INTEGRATE_MODE_SCROLL_STORE]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascades[cascade].integrate_uniform_set, 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, gi->sdfgi_shader.integrate_default_sky_uniform_set, 1); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ipush_constant, sizeof(SDFGIShader::IntegratePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, probe_axis_count * probe_axis_count, probe_axis_count, 1); - - RD::get_singleton()->compute_list_add_barrier(compute_list); - - if (bounce_feedback > 0.0) { - //multibounce requires this to be stored so direct light can read from it - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.integrate_pipeline[SDFGIShader::INTEGRATE_MODE_STORE]); - - //convert to octahedral to store - ipush_constant.image_size[0] *= SDFGI::LIGHTPROBE_OCT_SIZE; - ipush_constant.image_size[1] *= SDFGI::LIGHTPROBE_OCT_SIZE; - - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascades[cascade].integrate_uniform_set, 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, gi->sdfgi_shader.integrate_default_sky_uniform_set, 1); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &ipush_constant, sizeof(SDFGIShader::IntegratePushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, probe_axis_count * probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, probe_axis_count * SDFGI::LIGHTPROBE_OCT_SIZE, 1); - } - } - - //ok finally barrier - RD::get_singleton()->compute_list_end(); - } - - //clear dispatch indirect data - uint32_t dispatch_indirct_data[4] = { 0, 0, 0, 0 }; - RD::get_singleton()->buffer_update(cascades[cascade].solid_cell_dispatch_buffer, 0, sizeof(uint32_t) * 4, dispatch_indirct_data); - - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - - bool half_size = true; //much faster, very little difference - static const int optimized_jf_group_size = 8; - - if (half_size) { - push_constant.grid_size >>= 1; - - uint32_t cascade_half_size = cascade_size >> 1; - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE_HALF]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdf_initialize_half_uniform_set, 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size); - RD::get_singleton()->compute_list_add_barrier(compute_list); - - //must start with regular jumpflood - - push_constant.half_size = true; - { - RENDER_TIMESTAMP("SDFGI Jump Flood (Half-Size)"); - - uint32_t s = cascade_half_size; - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_JUMP_FLOOD]); - - int jf_us = 0; - //start with regular jump flood for very coarse reads, as this is impossible to optimize - while (s > 1) { - s /= 2; - push_constant.step_size = s; - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, jump_flood_half_uniform_set[jf_us], 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size); - RD::get_singleton()->compute_list_add_barrier(compute_list); - jf_us = jf_us == 0 ? 1 : 0; - - if (cascade_half_size / (s / 2) >= optimized_jf_group_size) { - break; - } - } - - RENDER_TIMESTAMP("SDFGI Jump Flood Optimized (Half-Size)"); - - //continue with optimized jump flood for smaller reads - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_JUMP_FLOOD_OPTIMIZED]); - while (s > 1) { - s /= 2; - push_constant.step_size = s; - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, jump_flood_half_uniform_set[jf_us], 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_half_size, cascade_half_size, cascade_half_size); - RD::get_singleton()->compute_list_add_barrier(compute_list); - jf_us = jf_us == 0 ? 1 : 0; - } - } - - // restore grid size for last passes - push_constant.grid_size = cascade_size; - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_JUMP_FLOOD_UPSCALE]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdf_upscale_uniform_set, 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_size, cascade_size, cascade_size); - RD::get_singleton()->compute_list_add_barrier(compute_list); - - //run one pass of fullsize jumpflood to fix up half size arctifacts - - push_constant.half_size = false; - push_constant.step_size = 1; - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_JUMP_FLOOD_OPTIMIZED]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, jump_flood_uniform_set[upscale_jfa_uniform_set_index], 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_size, cascade_size, cascade_size); - RD::get_singleton()->compute_list_add_barrier(compute_list); - - } else { - //full size jumpflood - RENDER_TIMESTAMP("SDFGI Jump Flood (Full-Size)"); - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_JUMP_FLOOD_INITIALIZE]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, sdf_initialize_uniform_set, 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_size, cascade_size, cascade_size); - - RD::get_singleton()->compute_list_add_barrier(compute_list); - - push_constant.half_size = false; - { - uint32_t s = cascade_size; - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_JUMP_FLOOD]); - - int jf_us = 0; - //start with regular jump flood for very coarse reads, as this is impossible to optimize - while (s > 1) { - s /= 2; - push_constant.step_size = s; - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, jump_flood_uniform_set[jf_us], 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_size, cascade_size, cascade_size); - RD::get_singleton()->compute_list_add_barrier(compute_list); - jf_us = jf_us == 0 ? 1 : 0; - - if (cascade_size / (s / 2) >= optimized_jf_group_size) { - break; - } - } - - RENDER_TIMESTAMP("SDFGI Jump Flood Optimized (Full-Size)"); - - //continue with optimized jump flood for smaller reads - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_JUMP_FLOOD_OPTIMIZED]); - while (s > 1) { - s /= 2; - push_constant.step_size = s; - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, jump_flood_uniform_set[jf_us], 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_size, cascade_size, cascade_size); - RD::get_singleton()->compute_list_add_barrier(compute_list); - jf_us = jf_us == 0 ? 1 : 0; - } - } - } - - RENDER_TIMESTAMP("SDFGI Occlusion"); - - // occlusion - { - uint32_t probe_size = cascade_size / SDFGI::PROBE_DIVISOR; - Vector3i probe_global_pos = cascades[cascade].position / probe_size; - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_OCCLUSION]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, occlusion_uniform_set, 0); - for (int i = 0; i < 8; i++) { - //dispatch all at once for performance - Vector3i offset(i & 1, (i >> 1) & 1, (i >> 2) & 1); - - if ((probe_global_pos.x & 1) != 0) { - offset.x = (offset.x + 1) & 1; - } - if ((probe_global_pos.y & 1) != 0) { - offset.y = (offset.y + 1) & 1; - } - if ((probe_global_pos.z & 1) != 0) { - offset.z = (offset.z + 1) & 1; - } - push_constant.probe_offset[0] = offset.x; - push_constant.probe_offset[1] = offset.y; - push_constant.probe_offset[2] = offset.z; - push_constant.occlusion_index = i; - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); - - Vector3i groups = Vector3i(probe_size + 1, probe_size + 1, probe_size + 1) - offset; //if offset, it's one less probe per axis to compute - RD::get_singleton()->compute_list_dispatch(compute_list, groups.x, groups.y, groups.z); - } - RD::get_singleton()->compute_list_add_barrier(compute_list); - } - - RENDER_TIMESTAMP("SDFGI Store"); - - // store - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.preprocess_pipeline[SDFGIShader::PRE_PROCESS_STORE]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cascades[cascade].sdf_store_uniform_set, 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(SDFGIShader::PreprocessPushConstant)); - RD::get_singleton()->compute_list_dispatch_threads(compute_list, cascade_size, cascade_size, cascade_size); - - RD::get_singleton()->compute_list_end(); - - //clear these textures, as they will have previous garbage on next draw - RD::get_singleton()->texture_clear(cascades[cascade].light_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); - RD::get_singleton()->texture_clear(cascades[cascade].light_aniso_0_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); - RD::get_singleton()->texture_clear(cascades[cascade].light_aniso_1_tex, Color(0, 0, 0, 0), 0, 1, 0, 1); - -#if 0 - Vector data = RD::get_singleton()->texture_get_data(cascades[cascade].sdf, 0); - Ref img; - img.instantiate(); - for (uint32_t i = 0; i < cascade_size; i++) { - Vector subarr = data.slice(128 * 128 * i, 128 * 128 * (i + 1)); - img->create(cascade_size, cascade_size, false, Image::FORMAT_L8, subarr); - img->save_png("res://cascade_sdf_" + itos(cascade) + "_" + itos(i) + ".png"); - } - - //finalize render and update sdf -#endif - -#if 0 - Vector data = RD::get_singleton()->texture_get_data(render_albedo, 0); - Ref img; - img.instantiate(); - for (uint32_t i = 0; i < cascade_size; i++) { - Vector subarr = data.slice(128 * 128 * i * 2, 128 * 128 * (i + 1) * 2); - img->createcascade_size, cascade_size, false, Image::FORMAT_RGB565, subarr); - img->convert(Image::FORMAT_RGBA8); - img->save_png("res://cascade_" + itos(cascade) + "_" + itos(i) + ".png"); - } - - //finalize render and update sdf -#endif - - RENDER_TIMESTAMP("< SDFGI Update SDF"); - RD::get_singleton()->draw_command_end_label(); - } -} - -void RendererSceneGIRD::SDFGI::render_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray *p_positional_light_cull_result, RendererSceneRenderRD *p_scene_render) { - RendererSceneRenderRD::RenderBuffers *rb = p_scene_render->render_buffers_owner.get_or_null(p_render_buffers); - ERR_FAIL_COND(!rb); // we wouldn't be here if this failed but... - - RD::get_singleton()->draw_command_begin_label("SDFGI Render Static Lights"); - - update_cascades(); - - SDFGIShader::Light lights[SDFGI::MAX_STATIC_LIGHTS]; - uint32_t light_count[SDFGI::MAX_STATIC_LIGHTS]; - - for (uint32_t i = 0; i < p_cascade_count; i++) { - ERR_CONTINUE(p_cascade_indices[i] >= cascades.size()); - - SDFGI::Cascade &cc = cascades[p_cascade_indices[i]]; - - { //fill light buffer - - AABB cascade_aabb; - cascade_aabb.position = Vector3((Vector3i(1, 1, 1) * -int32_t(cascade_size >> 1) + cc.position)) * cc.cell_size; - cascade_aabb.size = Vector3(1, 1, 1) * cascade_size * cc.cell_size; - - int idx = 0; - - for (uint32_t j = 0; j < (uint32_t)p_positional_light_cull_result[i].size(); j++) { - if (idx == SDFGI::MAX_STATIC_LIGHTS) { - break; - } - - RendererSceneRenderRD::LightInstance *li = p_scene_render->light_instance_owner.get_or_null(p_positional_light_cull_result[i][j]); - ERR_CONTINUE(!li); - - uint32_t max_sdfgi_cascade = RSG::light_storage->light_get_max_sdfgi_cascade(li->light); - if (p_cascade_indices[i] > max_sdfgi_cascade) { - continue; - } - - if (!cascade_aabb.intersects(li->aabb)) { - continue; - } - - lights[idx].type = RSG::light_storage->light_get_type(li->light); - - Vector3 dir = -li->transform.basis.get_column(Vector3::AXIS_Z); - if (lights[idx].type == RS::LIGHT_DIRECTIONAL) { - dir.y *= y_mult; //only makes sense for directional - dir.normalize(); - } - lights[idx].direction[0] = dir.x; - lights[idx].direction[1] = dir.y; - lights[idx].direction[2] = dir.z; - Vector3 pos = li->transform.origin; - pos.y *= y_mult; - lights[idx].position[0] = pos.x; - lights[idx].position[1] = pos.y; - lights[idx].position[2] = pos.z; - Color color = RSG::light_storage->light_get_color(li->light); - color = color.srgb_to_linear(); - lights[idx].color[0] = color.r; - lights[idx].color[1] = color.g; - lights[idx].color[2] = color.b; - lights[idx].energy = RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_ENERGY) * RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_INDIRECT_ENERGY); - lights[idx].has_shadow = RSG::light_storage->light_has_shadow(li->light); - lights[idx].attenuation = RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_ATTENUATION); - lights[idx].radius = RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_RANGE); - lights[idx].cos_spot_angle = Math::cos(Math::deg2rad(RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ANGLE))); - lights[idx].inv_spot_attenuation = 1.0f / RSG::light_storage->light_get_param(li->light, RS::LIGHT_PARAM_SPOT_ATTENUATION); - - idx++; - } - - if (idx > 0) { - RD::get_singleton()->buffer_update(cc.lights_buffer, 0, idx * sizeof(SDFGIShader::Light), lights); - } - - light_count[i] = idx; - } - } - - /* Static Lights */ - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->sdfgi_shader.direct_light_pipeline[SDFGIShader::DIRECT_LIGHT_MODE_STATIC]); - - SDFGIShader::DirectLightPushConstant dl_push_constant; - - dl_push_constant.grid_size[0] = cascade_size; - dl_push_constant.grid_size[1] = cascade_size; - dl_push_constant.grid_size[2] = cascade_size; - dl_push_constant.max_cascades = cascades.size(); - dl_push_constant.probe_axis_size = probe_axis_count; - dl_push_constant.bounce_feedback = 0.0; // this is static light, do not multibounce yet - dl_push_constant.y_mult = y_mult; - dl_push_constant.use_occlusion = uses_occlusion; - - //all must be processed - dl_push_constant.process_offset = 0; - dl_push_constant.process_increment = 1; - - for (uint32_t i = 0; i < p_cascade_count; i++) { - ERR_CONTINUE(p_cascade_indices[i] >= cascades.size()); - - SDFGI::Cascade &cc = cascades[p_cascade_indices[i]]; - - dl_push_constant.light_count = light_count[i]; - dl_push_constant.cascade = p_cascade_indices[i]; - - if (dl_push_constant.light_count > 0) { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, cc.sdf_direct_light_uniform_set, 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &dl_push_constant, sizeof(SDFGIShader::DirectLightPushConstant)); - RD::get_singleton()->compute_list_dispatch_indirect(compute_list, cc.solid_cell_dispatch_buffer, 0); - } - } - - RD::get_singleton()->compute_list_end(); - - RD::get_singleton()->draw_command_end_label(); -} - -//////////////////////////////////////////////////////////////////////////////// -// VoxelGIInstance - -void RendererSceneGIRD::VoxelGIInstance::update(bool p_update_light_instances, const Vector &p_light_instances, const PagedArray &p_dynamic_objects, RendererSceneRenderRD *p_scene_render) { - RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); - - uint32_t data_version = storage->voxel_gi_get_data_version(probe); - - // (RE)CREATE IF NEEDED - - if (last_probe_data_version != data_version) { - //need to re-create everything - if (texture.is_valid()) { - RD::get_singleton()->free(texture); - RD::get_singleton()->free(write_buffer); - mipmaps.clear(); - } - - for (int i = 0; i < dynamic_maps.size(); i++) { - RD::get_singleton()->free(dynamic_maps[i].texture); - RD::get_singleton()->free(dynamic_maps[i].depth); - } - - dynamic_maps.clear(); - - Vector3i octree_size = storage->voxel_gi_get_octree_size(probe); - - if (octree_size != Vector3i()) { - //can create a 3D texture - Vector levels = storage->voxel_gi_get_level_counts(probe); - - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; - tf.width = octree_size.x; - tf.height = octree_size.y; - tf.depth = octree_size.z; - tf.texture_type = RD::TEXTURE_TYPE_3D; - tf.mipmaps = levels.size(); - - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; - - texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); - - RD::get_singleton()->texture_clear(texture, Color(0, 0, 0, 0), 0, levels.size(), 0, 1); - - { - int total_elements = 0; - for (int i = 0; i < levels.size(); i++) { - total_elements += levels[i]; - } - - write_buffer = RD::get_singleton()->storage_buffer_create(total_elements * 16); - } - - for (int i = 0; i < levels.size(); i++) { - VoxelGIInstance::Mipmap mipmap; - mipmap.texture = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), texture, 0, i, 1, RD::TEXTURE_SLICE_3D); - mipmap.level = levels.size() - i - 1; - mipmap.cell_offset = 0; - for (uint32_t j = 0; j < mipmap.level; j++) { - mipmap.cell_offset += levels[j]; - } - mipmap.cell_count = levels[mipmap.level]; - - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.binding = 1; - u.append_id(storage->voxel_gi_get_octree_buffer(probe)); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.binding = 2; - u.append_id(storage->voxel_gi_get_data_buffer(probe)); - uniforms.push_back(u); - } - - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.binding = 4; - u.append_id(write_buffer); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.binding = 9; - u.append_id(storage->voxel_gi_get_sdf_texture(probe)); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; - u.binding = 10; - u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); - uniforms.push_back(u); - } - - { - Vector copy_uniforms = uniforms; - if (i == 0) { - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; - u.binding = 3; - u.append_id(gi->voxel_gi_lights_uniform); - copy_uniforms.push_back(u); - } - - mipmap.uniform_set = RD::get_singleton()->uniform_set_create(copy_uniforms, gi->voxel_gi_lighting_shader_version_shaders[VOXEL_GI_SHADER_VERSION_COMPUTE_LIGHT], 0); - - copy_uniforms = uniforms; //restore - - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.binding = 5; - u.append_id(texture); - copy_uniforms.push_back(u); - } - mipmap.second_bounce_uniform_set = RD::get_singleton()->uniform_set_create(copy_uniforms, gi->voxel_gi_lighting_shader_version_shaders[VOXEL_GI_SHADER_VERSION_COMPUTE_SECOND_BOUNCE], 0); - } else { - mipmap.uniform_set = RD::get_singleton()->uniform_set_create(copy_uniforms, gi->voxel_gi_lighting_shader_version_shaders[VOXEL_GI_SHADER_VERSION_COMPUTE_MIPMAP], 0); - } - } - - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 5; - u.append_id(mipmap.texture); - uniforms.push_back(u); - } - - mipmap.write_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->voxel_gi_lighting_shader_version_shaders[VOXEL_GI_SHADER_VERSION_WRITE_TEXTURE], 0); - - mipmaps.push_back(mipmap); - } - - { - uint32_t dynamic_map_size = MAX(MAX(octree_size.x, octree_size.y), octree_size.z); - uint32_t oversample = nearest_power_of_2_templated(4); - int mipmap_index = 0; - - while (mipmap_index < mipmaps.size()) { - VoxelGIInstance::DynamicMap dmap; - - if (oversample > 0) { - dmap.size = dynamic_map_size * (1 << oversample); - dmap.mipmap = -1; - oversample--; - } else { - dmap.size = dynamic_map_size >> mipmap_index; - dmap.mipmap = mipmap_index; - mipmap_index++; - } - - RD::TextureFormat dtf; - dtf.width = dmap.size; - dtf.height = dmap.size; - dtf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; - dtf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT; - - if (dynamic_maps.size() == 0) { - dtf.usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; - } - dmap.texture = RD::get_singleton()->texture_create(dtf, RD::TextureView()); - - if (dynamic_maps.size() == 0) { - // Render depth for first one. - // Use 16-bit depth when supported to improve performance. - dtf.format = RD::get_singleton()->texture_is_format_supported_for_usage(RD::DATA_FORMAT_D16_UNORM, RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) ? RD::DATA_FORMAT_D16_UNORM : RD::DATA_FORMAT_X8_D24_UNORM_PACK32; - dtf.usage_bits = RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; - dmap.fb_depth = RD::get_singleton()->texture_create(dtf, RD::TextureView()); - } - - //just use depth as-is - dtf.format = RD::DATA_FORMAT_R32_SFLOAT; - dtf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; - - dmap.depth = RD::get_singleton()->texture_create(dtf, RD::TextureView()); - - if (dynamic_maps.size() == 0) { - dtf.format = RD::DATA_FORMAT_R8G8B8A8_UNORM; - dtf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; - dmap.albedo = RD::get_singleton()->texture_create(dtf, RD::TextureView()); - dmap.normal = RD::get_singleton()->texture_create(dtf, RD::TextureView()); - dmap.orm = RD::get_singleton()->texture_create(dtf, RD::TextureView()); - - Vector fb; - fb.push_back(dmap.albedo); - fb.push_back(dmap.normal); - fb.push_back(dmap.orm); - fb.push_back(dmap.texture); //emission - fb.push_back(dmap.depth); - fb.push_back(dmap.fb_depth); - - dmap.fb = RD::get_singleton()->framebuffer_create(fb); - - { - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; - u.binding = 3; - u.append_id(gi->voxel_gi_lights_uniform); - uniforms.push_back(u); - } - - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 5; - u.append_id(dmap.albedo); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 6; - u.append_id(dmap.normal); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 7; - u.append_id(dmap.orm); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.binding = 8; - u.append_id(dmap.fb_depth); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.binding = 9; - u.append_id(storage->voxel_gi_get_sdf_texture(probe)); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; - u.binding = 10; - u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 11; - u.append_id(dmap.texture); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 12; - u.append_id(dmap.depth); - uniforms.push_back(u); - } - - dmap.uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->voxel_gi_lighting_shader_version_shaders[VOXEL_GI_SHADER_VERSION_DYNAMIC_OBJECT_LIGHTING], 0); - } - } else { - bool plot = dmap.mipmap >= 0; - bool write = dmap.mipmap < (mipmaps.size() - 1); - - Vector uniforms; - - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 5; - u.append_id(dynamic_maps[dynamic_maps.size() - 1].texture); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 6; - u.append_id(dynamic_maps[dynamic_maps.size() - 1].depth); - uniforms.push_back(u); - } - - if (write) { - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 7; - u.append_id(dmap.texture); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 8; - u.append_id(dmap.depth); - uniforms.push_back(u); - } - } - - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.binding = 9; - u.append_id(storage->voxel_gi_get_sdf_texture(probe)); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; - u.binding = 10; - u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); - uniforms.push_back(u); - } - - if (plot) { - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 11; - u.append_id(mipmaps[dmap.mipmap].texture); - uniforms.push_back(u); - } - } - - dmap.uniform_set = RD::get_singleton()->uniform_set_create( - uniforms, - gi->voxel_gi_lighting_shader_version_shaders[(write && plot) ? VOXEL_GI_SHADER_VERSION_DYNAMIC_SHRINK_WRITE_PLOT : (write ? VOXEL_GI_SHADER_VERSION_DYNAMIC_SHRINK_WRITE : VOXEL_GI_SHADER_VERSION_DYNAMIC_SHRINK_PLOT)], - 0); - } - - dynamic_maps.push_back(dmap); - } - } - } - - last_probe_data_version = data_version; - p_update_light_instances = true; //just in case - - p_scene_render->_base_uniforms_changed(); - } - - // UDPDATE TIME - - if (has_dynamic_object_data) { - //if it has dynamic object data, it needs to be cleared - RD::get_singleton()->texture_clear(texture, Color(0, 0, 0, 0), 0, mipmaps.size(), 0, 1); - } - - uint32_t light_count = 0; - - if (p_update_light_instances || p_dynamic_objects.size() > 0) { - light_count = MIN(gi->voxel_gi_max_lights, (uint32_t)p_light_instances.size()); - - { - Transform3D to_cell = storage->voxel_gi_get_to_cell_xform(probe); - Transform3D to_probe_xform = (transform * to_cell.affine_inverse()).affine_inverse(); - //update lights - - for (uint32_t i = 0; i < light_count; i++) { - VoxelGILight &l = gi->voxel_gi_lights[i]; - RID light_instance = p_light_instances[i]; - RID light = p_scene_render->light_instance_get_base_light(light_instance); - - l.type = RSG::light_storage->light_get_type(light); - if (l.type == RS::LIGHT_DIRECTIONAL && RSG::light_storage->light_directional_get_sky_mode(light) == RS::LIGHT_DIRECTIONAL_SKY_MODE_SKY_ONLY) { - light_count--; - continue; - } - - l.attenuation = RSG::light_storage->light_get_param(light, RS::LIGHT_PARAM_ATTENUATION); - l.energy = RSG::light_storage->light_get_param(light, RS::LIGHT_PARAM_ENERGY) * RSG::light_storage->light_get_param(light, RS::LIGHT_PARAM_INDIRECT_ENERGY); - l.radius = to_cell.basis.xform(Vector3(RSG::light_storage->light_get_param(light, RS::LIGHT_PARAM_RANGE), 0, 0)).length(); - Color color = RSG::light_storage->light_get_color(light).srgb_to_linear(); - l.color[0] = color.r; - l.color[1] = color.g; - l.color[2] = color.b; - - l.cos_spot_angle = Math::cos(Math::deg2rad(RSG::light_storage->light_get_param(light, RS::LIGHT_PARAM_SPOT_ANGLE))); - l.inv_spot_attenuation = 1.0f / RSG::light_storage->light_get_param(light, RS::LIGHT_PARAM_SPOT_ATTENUATION); - - Transform3D xform = p_scene_render->light_instance_get_base_transform(light_instance); - - Vector3 pos = to_probe_xform.xform(xform.origin); - Vector3 dir = to_probe_xform.basis.xform(-xform.basis.get_column(2)).normalized(); - - l.position[0] = pos.x; - l.position[1] = pos.y; - l.position[2] = pos.z; - - l.direction[0] = dir.x; - l.direction[1] = dir.y; - l.direction[2] = dir.z; - - l.has_shadow = RSG::light_storage->light_has_shadow(light); - } - - RD::get_singleton()->buffer_update(gi->voxel_gi_lights_uniform, 0, sizeof(VoxelGILight) * light_count, gi->voxel_gi_lights); - } - } - - if (has_dynamic_object_data || p_update_light_instances || p_dynamic_objects.size()) { - // PROCESS MIPMAPS - if (mipmaps.size()) { - //can update mipmaps - - Vector3i probe_size = storage->voxel_gi_get_octree_size(probe); - - VoxelGIPushConstant push_constant; - - push_constant.limits[0] = probe_size.x; - push_constant.limits[1] = probe_size.y; - push_constant.limits[2] = probe_size.z; - push_constant.stack_size = mipmaps.size(); - push_constant.emission_scale = 1.0; - push_constant.propagation = storage->voxel_gi_get_propagation(probe); - push_constant.dynamic_range = storage->voxel_gi_get_dynamic_range(probe); - push_constant.light_count = light_count; - push_constant.aniso_strength = 0; - - /* print_line("probe update to version " + itos(last_probe_version)); - print_line("propagation " + rtos(push_constant.propagation)); - print_line("dynrange " + rtos(push_constant.dynamic_range)); - */ - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - - int passes; - if (p_update_light_instances) { - passes = storage->voxel_gi_is_using_two_bounces(probe) ? 2 : 1; - } else { - passes = 1; //only re-blitting is necessary - } - int wg_size = 64; - int64_t wg_limit_x = (int64_t)RD::get_singleton()->limit_get(RD::LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_X); - - for (int pass = 0; pass < passes; pass++) { - if (p_update_light_instances) { - for (int i = 0; i < mipmaps.size(); i++) { - if (i == 0) { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->voxel_gi_lighting_shader_version_pipelines[pass == 0 ? VOXEL_GI_SHADER_VERSION_COMPUTE_LIGHT : VOXEL_GI_SHADER_VERSION_COMPUTE_SECOND_BOUNCE]); - } else if (i == 1) { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->voxel_gi_lighting_shader_version_pipelines[VOXEL_GI_SHADER_VERSION_COMPUTE_MIPMAP]); - } - - if (pass == 1 || i > 0) { - RD::get_singleton()->compute_list_add_barrier(compute_list); //wait til previous step is done - } - if (pass == 0 || i > 0) { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, mipmaps[i].uniform_set, 0); - } else { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, mipmaps[i].second_bounce_uniform_set, 0); - } - - push_constant.cell_offset = mipmaps[i].cell_offset; - push_constant.cell_count = mipmaps[i].cell_count; - - int64_t wg_todo = (mipmaps[i].cell_count - 1) / wg_size + 1; - while (wg_todo) { - int64_t wg_count = MIN(wg_todo, wg_limit_x); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(VoxelGIPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, wg_count, 1, 1); - wg_todo -= wg_count; - push_constant.cell_offset += wg_count * wg_size; - } - } - - RD::get_singleton()->compute_list_add_barrier(compute_list); //wait til previous step is done - } - - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->voxel_gi_lighting_shader_version_pipelines[VOXEL_GI_SHADER_VERSION_WRITE_TEXTURE]); - - for (int i = 0; i < mipmaps.size(); i++) { - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, mipmaps[i].write_uniform_set, 0); - - push_constant.cell_offset = mipmaps[i].cell_offset; - push_constant.cell_count = mipmaps[i].cell_count; - - int64_t wg_todo = (mipmaps[i].cell_count - 1) / wg_size + 1; - while (wg_todo) { - int64_t wg_count = MIN(wg_todo, wg_limit_x); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(VoxelGIPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, wg_count, 1, 1); - wg_todo -= wg_count; - push_constant.cell_offset += wg_count * wg_size; - } - } - } - - RD::get_singleton()->compute_list_end(); - } - } - - has_dynamic_object_data = false; //clear until dynamic object data is used again - - if (p_dynamic_objects.size() && dynamic_maps.size()) { - Vector3i octree_size = storage->voxel_gi_get_octree_size(probe); - int multiplier = dynamic_maps[0].size / MAX(MAX(octree_size.x, octree_size.y), octree_size.z); - - Transform3D oversample_scale; - oversample_scale.basis.scale(Vector3(multiplier, multiplier, multiplier)); - - Transform3D to_cell = oversample_scale * storage->voxel_gi_get_to_cell_xform(probe); - Transform3D to_world_xform = transform * to_cell.affine_inverse(); - Transform3D to_probe_xform = to_world_xform.affine_inverse(); - - AABB probe_aabb(Vector3(), octree_size); - - //this could probably be better parallelized in compute.. - for (int i = 0; i < (int)p_dynamic_objects.size(); i++) { - RendererSceneRender::GeometryInstance *instance = p_dynamic_objects[i]; - - //transform aabb to voxel_gi - AABB aabb = (to_probe_xform * p_scene_render->geometry_instance_get_transform(instance)).xform(p_scene_render->geometry_instance_get_aabb(instance)); - - //this needs to wrap to grid resolution to avoid jitter - //also extend margin a bit just in case - Vector3i begin = aabb.position - Vector3i(1, 1, 1); - Vector3i end = aabb.position + aabb.size + Vector3i(1, 1, 1); - - for (int j = 0; j < 3; j++) { - if ((end[j] - begin[j]) & 1) { - end[j]++; //for half extents split, it needs to be even - } - begin[j] = MAX(begin[j], 0); - end[j] = MIN(end[j], octree_size[j] * multiplier); - } - - //aabb = aabb.intersection(probe_aabb); //intersect - aabb.position = begin; - aabb.size = end - begin; - - //print_line("aabb: " + aabb); - - for (int j = 0; j < 6; j++) { - //if (j != 0 && j != 3) { - // continue; - //} - static const Vector3 render_z[6] = { - Vector3(1, 0, 0), - Vector3(0, 1, 0), - Vector3(0, 0, 1), - Vector3(-1, 0, 0), - Vector3(0, -1, 0), - Vector3(0, 0, -1), - }; - static const Vector3 render_up[6] = { - Vector3(0, 1, 0), - Vector3(0, 0, 1), - Vector3(0, 1, 0), - Vector3(0, 1, 0), - Vector3(0, 0, 1), - Vector3(0, 1, 0), - }; - - Vector3 render_dir = render_z[j]; - Vector3 up_dir = render_up[j]; - - Vector3 center = aabb.get_center(); - Transform3D xform; - xform.set_look_at(center - aabb.size * 0.5 * render_dir, center, up_dir); - - Vector3 x_dir = xform.basis.get_column(0).abs(); - int x_axis = int(Vector3(0, 1, 2).dot(x_dir)); - Vector3 y_dir = xform.basis.get_column(1).abs(); - int y_axis = int(Vector3(0, 1, 2).dot(y_dir)); - Vector3 z_dir = -xform.basis.get_column(2); - int z_axis = int(Vector3(0, 1, 2).dot(z_dir.abs())); - - Rect2i rect(aabb.position[x_axis], aabb.position[y_axis], aabb.size[x_axis], aabb.size[y_axis]); - bool x_flip = bool(Vector3(1, 1, 1).dot(xform.basis.get_column(0)) < 0); - bool y_flip = bool(Vector3(1, 1, 1).dot(xform.basis.get_column(1)) < 0); - bool z_flip = bool(Vector3(1, 1, 1).dot(xform.basis.get_column(2)) > 0); - - CameraMatrix cm; - cm.set_orthogonal(-rect.size.width / 2, rect.size.width / 2, -rect.size.height / 2, rect.size.height / 2, 0.0001, aabb.size[z_axis]); - - if (p_scene_render->cull_argument.size() == 0) { - p_scene_render->cull_argument.push_back(nullptr); - } - p_scene_render->cull_argument[0] = instance; - - p_scene_render->_render_material(to_world_xform * xform, cm, true, p_scene_render->cull_argument, dynamic_maps[0].fb, Rect2i(Vector2i(), rect.size)); - - VoxelGIDynamicPushConstant push_constant; - memset(&push_constant, 0, sizeof(VoxelGIDynamicPushConstant)); - push_constant.limits[0] = octree_size.x; - push_constant.limits[1] = octree_size.y; - push_constant.limits[2] = octree_size.z; - push_constant.light_count = p_light_instances.size(); - push_constant.x_dir[0] = x_dir[0]; - push_constant.x_dir[1] = x_dir[1]; - push_constant.x_dir[2] = x_dir[2]; - push_constant.y_dir[0] = y_dir[0]; - push_constant.y_dir[1] = y_dir[1]; - push_constant.y_dir[2] = y_dir[2]; - push_constant.z_dir[0] = z_dir[0]; - push_constant.z_dir[1] = z_dir[1]; - push_constant.z_dir[2] = z_dir[2]; - push_constant.z_base = xform.origin[z_axis]; - push_constant.z_sign = (z_flip ? -1.0 : 1.0); - push_constant.pos_multiplier = float(1.0) / multiplier; - push_constant.dynamic_range = storage->voxel_gi_get_dynamic_range(probe); - push_constant.flip_x = x_flip; - push_constant.flip_y = y_flip; - push_constant.rect_pos[0] = rect.position[0]; - push_constant.rect_pos[1] = rect.position[1]; - push_constant.rect_size[0] = rect.size[0]; - push_constant.rect_size[1] = rect.size[1]; - push_constant.prev_rect_ofs[0] = 0; - push_constant.prev_rect_ofs[1] = 0; - push_constant.prev_rect_size[0] = 0; - push_constant.prev_rect_size[1] = 0; - push_constant.on_mipmap = false; - push_constant.propagation = storage->voxel_gi_get_propagation(probe); - push_constant.pad[0] = 0; - push_constant.pad[1] = 0; - push_constant.pad[2] = 0; - - //process lighting - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->voxel_gi_lighting_shader_version_pipelines[VOXEL_GI_SHADER_VERSION_DYNAMIC_OBJECT_LIGHTING]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, dynamic_maps[0].uniform_set, 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(VoxelGIDynamicPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, (rect.size.x - 1) / 8 + 1, (rect.size.y - 1) / 8 + 1, 1); - //print_line("rect: " + itos(i) + ": " + rect); - - for (int k = 1; k < dynamic_maps.size(); k++) { - // enlarge the rect if needed so all pixels fit when downscaled, - // this ensures downsampling is smooth and optimal because no pixels are left behind - - //x - if (rect.position.x & 1) { - rect.size.x++; - push_constant.prev_rect_ofs[0] = 1; //this is used to ensure reading is also optimal - } else { - push_constant.prev_rect_ofs[0] = 0; - } - if (rect.size.x & 1) { - rect.size.x++; - } - - rect.position.x >>= 1; - rect.size.x = MAX(1, rect.size.x >> 1); - - //y - if (rect.position.y & 1) { - rect.size.y++; - push_constant.prev_rect_ofs[1] = 1; - } else { - push_constant.prev_rect_ofs[1] = 0; - } - if (rect.size.y & 1) { - rect.size.y++; - } - - rect.position.y >>= 1; - rect.size.y = MAX(1, rect.size.y >> 1); - - //shrink limits to ensure plot does not go outside map - if (dynamic_maps[k].mipmap > 0) { - for (int l = 0; l < 3; l++) { - push_constant.limits[l] = MAX(1, push_constant.limits[l] >> 1); - } - } - - //print_line("rect: " + itos(i) + ": " + rect); - push_constant.rect_pos[0] = rect.position[0]; - push_constant.rect_pos[1] = rect.position[1]; - push_constant.prev_rect_size[0] = push_constant.rect_size[0]; - push_constant.prev_rect_size[1] = push_constant.rect_size[1]; - push_constant.rect_size[0] = rect.size[0]; - push_constant.rect_size[1] = rect.size[1]; - push_constant.on_mipmap = dynamic_maps[k].mipmap > 0; - - RD::get_singleton()->compute_list_add_barrier(compute_list); - - if (dynamic_maps[k].mipmap < 0) { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->voxel_gi_lighting_shader_version_pipelines[VOXEL_GI_SHADER_VERSION_DYNAMIC_SHRINK_WRITE]); - } else if (k < dynamic_maps.size() - 1) { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->voxel_gi_lighting_shader_version_pipelines[VOXEL_GI_SHADER_VERSION_DYNAMIC_SHRINK_WRITE_PLOT]); - } else { - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, gi->voxel_gi_lighting_shader_version_pipelines[VOXEL_GI_SHADER_VERSION_DYNAMIC_SHRINK_PLOT]); - } - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, dynamic_maps[k].uniform_set, 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(VoxelGIDynamicPushConstant)); - RD::get_singleton()->compute_list_dispatch(compute_list, (rect.size.x - 1) / 8 + 1, (rect.size.y - 1) / 8 + 1, 1); - } - - RD::get_singleton()->compute_list_end(); - } - } - - has_dynamic_object_data = true; //clear until dynamic object data is used again - } - - last_probe_version = storage->voxel_gi_get_version(probe); -} - -void RendererSceneGIRD::VoxelGIInstance::debug(RD::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform, bool p_lighting, bool p_emission, float p_alpha) { - RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); - - if (mipmaps.size() == 0) { - return; - } - - CameraMatrix cam_transform = (p_camera_with_transform * CameraMatrix(transform)) * CameraMatrix(storage->voxel_gi_get_to_cell_xform(probe).affine_inverse()); - - int level = 0; - Vector3i octree_size = storage->voxel_gi_get_octree_size(probe); - - VoxelGIDebugPushConstant push_constant; - push_constant.alpha = p_alpha; - push_constant.dynamic_range = storage->voxel_gi_get_dynamic_range(probe); - push_constant.cell_offset = mipmaps[level].cell_offset; - push_constant.level = level; - - push_constant.bounds[0] = octree_size.x >> level; - push_constant.bounds[1] = octree_size.y >> level; - push_constant.bounds[2] = octree_size.z >> level; - push_constant.pad = 0; - - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - push_constant.projection[i * 4 + j] = cam_transform.matrix[i][j]; - } - } - - if (gi->voxel_gi_debug_uniform_set.is_valid()) { - RD::get_singleton()->free(gi->voxel_gi_debug_uniform_set); - } - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.binding = 1; - u.append_id(storage->voxel_gi_get_data_buffer(probe)); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.binding = 2; - u.append_id(texture); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; - u.binding = 3; - u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); - uniforms.push_back(u); - } - - int cell_count; - if (!p_emission && p_lighting && has_dynamic_object_data) { - cell_count = push_constant.bounds[0] * push_constant.bounds[1] * push_constant.bounds[2]; - } else { - cell_count = mipmaps[level].cell_count; - } - - gi->voxel_gi_debug_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, gi->voxel_gi_debug_shader_version_shaders[0], 0); - - int voxel_gi_debug_pipeline = VOXEL_GI_DEBUG_COLOR; - if (p_emission) { - voxel_gi_debug_pipeline = VOXEL_GI_DEBUG_EMISSION; - } else if (p_lighting) { - voxel_gi_debug_pipeline = has_dynamic_object_data ? VOXEL_GI_DEBUG_LIGHT_FULL : VOXEL_GI_DEBUG_LIGHT; - } - RD::get_singleton()->draw_list_bind_render_pipeline( - p_draw_list, - gi->voxel_gi_debug_shader_version_pipelines[voxel_gi_debug_pipeline].get_render_pipeline(RD::INVALID_ID, RD::get_singleton()->framebuffer_get_format(p_framebuffer))); - RD::get_singleton()->draw_list_bind_uniform_set(p_draw_list, gi->voxel_gi_debug_uniform_set, 0); - RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(VoxelGIDebugPushConstant)); - RD::get_singleton()->draw_list_draw(p_draw_list, false, cell_count, 36); -} - -//////////////////////////////////////////////////////////////////////////////// -// GIRD - -RendererSceneGIRD::RendererSceneGIRD() { - sdfgi_ray_count = RS::EnvironmentSDFGIRayCount(CLAMP(int32_t(GLOBAL_GET("rendering/global_illumination/sdfgi/probe_ray_count")), 0, int32_t(RS::ENV_SDFGI_RAY_COUNT_MAX - 1))); - sdfgi_frames_to_converge = RS::EnvironmentSDFGIFramesToConverge(CLAMP(int32_t(GLOBAL_GET("rendering/global_illumination/sdfgi/frames_to_converge")), 0, int32_t(RS::ENV_SDFGI_CONVERGE_MAX - 1))); - sdfgi_frames_to_update_light = RS::EnvironmentSDFGIFramesToUpdateLight(CLAMP(int32_t(GLOBAL_GET("rendering/global_illumination/sdfgi/frames_to_update_lights")), 0, int32_t(RS::ENV_SDFGI_UPDATE_LIGHT_MAX - 1))); -} - -RendererSceneGIRD::~RendererSceneGIRD() { -} - -void RendererSceneGIRD::init(RendererStorageRD *p_storage, RendererSceneSkyRD *p_sky) { - RendererRD::TextureStorage *texture_storage = RendererRD::TextureStorage::get_singleton(); - RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); - - storage = p_storage; - - /* GI */ - - { - //kinda complicated to compute the amount of slots, we try to use as many as we can - - voxel_gi_lights = memnew_arr(VoxelGILight, voxel_gi_max_lights); - voxel_gi_lights_uniform = RD::get_singleton()->uniform_buffer_create(voxel_gi_max_lights * sizeof(VoxelGILight)); - voxel_gi_quality = RS::VoxelGIQuality(CLAMP(int(GLOBAL_GET("rendering/global_illumination/voxel_gi/quality")), 0, 1)); - - String defines = "\n#define MAX_LIGHTS " + itos(voxel_gi_max_lights) + "\n"; - - Vector versions; - versions.push_back("\n#define MODE_COMPUTE_LIGHT\n"); - versions.push_back("\n#define MODE_SECOND_BOUNCE\n"); - versions.push_back("\n#define MODE_UPDATE_MIPMAPS\n"); - versions.push_back("\n#define MODE_WRITE_TEXTURE\n"); - versions.push_back("\n#define MODE_DYNAMIC\n#define MODE_DYNAMIC_LIGHTING\n"); - versions.push_back("\n#define MODE_DYNAMIC\n#define MODE_DYNAMIC_SHRINK\n#define MODE_DYNAMIC_SHRINK_WRITE\n"); - versions.push_back("\n#define MODE_DYNAMIC\n#define MODE_DYNAMIC_SHRINK\n#define MODE_DYNAMIC_SHRINK_PLOT\n"); - versions.push_back("\n#define MODE_DYNAMIC\n#define MODE_DYNAMIC_SHRINK\n#define MODE_DYNAMIC_SHRINK_PLOT\n#define MODE_DYNAMIC_SHRINK_WRITE\n"); - - voxel_gi_shader.initialize(versions, defines); - voxel_gi_lighting_shader_version = voxel_gi_shader.version_create(); - for (int i = 0; i < VOXEL_GI_SHADER_VERSION_MAX; i++) { - voxel_gi_lighting_shader_version_shaders[i] = voxel_gi_shader.version_get_shader(voxel_gi_lighting_shader_version, i); - voxel_gi_lighting_shader_version_pipelines[i] = RD::get_singleton()->compute_pipeline_create(voxel_gi_lighting_shader_version_shaders[i]); - } - } - - { - String defines; - Vector versions; - versions.push_back("\n#define MODE_DEBUG_COLOR\n"); - versions.push_back("\n#define MODE_DEBUG_LIGHT\n"); - versions.push_back("\n#define MODE_DEBUG_EMISSION\n"); - versions.push_back("\n#define MODE_DEBUG_LIGHT\n#define MODE_DEBUG_LIGHT_FULL\n"); - - voxel_gi_debug_shader.initialize(versions, defines); - voxel_gi_debug_shader_version = voxel_gi_debug_shader.version_create(); - for (int i = 0; i < VOXEL_GI_DEBUG_MAX; i++) { - voxel_gi_debug_shader_version_shaders[i] = voxel_gi_debug_shader.version_get_shader(voxel_gi_debug_shader_version, i); - - RD::PipelineRasterizationState rs; - rs.cull_mode = RD::POLYGON_CULL_FRONT; - RD::PipelineDepthStencilState ds; - ds.enable_depth_test = true; - ds.enable_depth_write = true; - ds.depth_compare_operator = RD::COMPARE_OP_LESS_OR_EQUAL; - - voxel_gi_debug_shader_version_pipelines[i].setup(voxel_gi_debug_shader_version_shaders[i], RD::RENDER_PRIMITIVE_TRIANGLES, rs, RD::PipelineMultisampleState(), ds, RD::PipelineColorBlendState::create_disabled(), 0); - } - } - - /* SDGFI */ - - { - Vector preprocess_modes; - preprocess_modes.push_back("\n#define MODE_SCROLL\n"); - preprocess_modes.push_back("\n#define MODE_SCROLL_OCCLUSION\n"); - preprocess_modes.push_back("\n#define MODE_INITIALIZE_JUMP_FLOOD\n"); - preprocess_modes.push_back("\n#define MODE_INITIALIZE_JUMP_FLOOD_HALF\n"); - preprocess_modes.push_back("\n#define MODE_JUMPFLOOD\n"); - preprocess_modes.push_back("\n#define MODE_JUMPFLOOD_OPTIMIZED\n"); - preprocess_modes.push_back("\n#define MODE_UPSCALE_JUMP_FLOOD\n"); - preprocess_modes.push_back("\n#define MODE_OCCLUSION\n"); - preprocess_modes.push_back("\n#define MODE_STORE\n"); - String defines = "\n#define OCCLUSION_SIZE " + itos(SDFGI::CASCADE_SIZE / SDFGI::PROBE_DIVISOR) + "\n"; - sdfgi_shader.preprocess.initialize(preprocess_modes, defines); - sdfgi_shader.preprocess_shader = sdfgi_shader.preprocess.version_create(); - for (int i = 0; i < SDFGIShader::PRE_PROCESS_MAX; i++) { - sdfgi_shader.preprocess_pipeline[i] = RD::get_singleton()->compute_pipeline_create(sdfgi_shader.preprocess.version_get_shader(sdfgi_shader.preprocess_shader, i)); - } - } - - { - //calculate tables - String defines = "\n#define OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; - - Vector direct_light_modes; - direct_light_modes.push_back("\n#define MODE_PROCESS_STATIC\n"); - direct_light_modes.push_back("\n#define MODE_PROCESS_DYNAMIC\n"); - sdfgi_shader.direct_light.initialize(direct_light_modes, defines); - sdfgi_shader.direct_light_shader = sdfgi_shader.direct_light.version_create(); - for (int i = 0; i < SDFGIShader::DIRECT_LIGHT_MODE_MAX; i++) { - sdfgi_shader.direct_light_pipeline[i] = RD::get_singleton()->compute_pipeline_create(sdfgi_shader.direct_light.version_get_shader(sdfgi_shader.direct_light_shader, i)); - } - } - - { - //calculate tables - String defines = "\n#define OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; - defines += "\n#define SH_SIZE " + itos(SDFGI::SH_SIZE) + "\n"; - if (p_sky->sky_use_cubemap_array) { - defines += "\n#define USE_CUBEMAP_ARRAY\n"; - } - - Vector integrate_modes; - integrate_modes.push_back("\n#define MODE_PROCESS\n"); - integrate_modes.push_back("\n#define MODE_STORE\n"); - integrate_modes.push_back("\n#define MODE_SCROLL\n"); - integrate_modes.push_back("\n#define MODE_SCROLL_STORE\n"); - sdfgi_shader.integrate.initialize(integrate_modes, defines); - sdfgi_shader.integrate_shader = sdfgi_shader.integrate.version_create(); - - for (int i = 0; i < SDFGIShader::INTEGRATE_MODE_MAX; i++) { - sdfgi_shader.integrate_pipeline[i] = RD::get_singleton()->compute_pipeline_create(sdfgi_shader.integrate.version_get_shader(sdfgi_shader.integrate_shader, i)); - } - - { - Vector uniforms; - - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.binding = 0; - u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_CUBEMAP_WHITE)); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; - u.binding = 1; - u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); - uniforms.push_back(u); - } - - sdfgi_shader.integrate_default_sky_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, sdfgi_shader.integrate.version_get_shader(sdfgi_shader.integrate_shader, 0), 1); - } - } - - //GK - { - //calculate tables - String defines = "\n#define SDFGI_OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; - Vector gi_modes; - gi_modes.push_back("\n#define USE_VOXEL_GI_INSTANCES\n"); - gi_modes.push_back("\n#define USE_SDFGI\n"); - gi_modes.push_back("\n#define USE_SDFGI\n\n#define USE_VOXEL_GI_INSTANCES\n"); - gi_modes.push_back("\n#define MODE_HALF_RES\n#define USE_VOXEL_GI_INSTANCES\n"); - gi_modes.push_back("\n#define MODE_HALF_RES\n#define USE_SDFGI\n"); - gi_modes.push_back("\n#define MODE_HALF_RES\n#define USE_SDFGI\n\n#define USE_VOXEL_GI_INSTANCES\n"); - - shader.initialize(gi_modes, defines); - shader_version = shader.version_create(); - for (int i = 0; i < MODE_MAX; i++) { - pipelines[i] = RD::get_singleton()->compute_pipeline_create(shader.version_get_shader(shader_version, i)); - } - - sdfgi_ubo = RD::get_singleton()->uniform_buffer_create(sizeof(SDFGIData)); - } - { - String defines = "\n#define OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; - Vector debug_modes; - debug_modes.push_back(""); - sdfgi_shader.debug.initialize(debug_modes, defines); - sdfgi_shader.debug_shader = sdfgi_shader.debug.version_create(); - sdfgi_shader.debug_shader_version = sdfgi_shader.debug.version_get_shader(sdfgi_shader.debug_shader, 0); - sdfgi_shader.debug_pipeline = RD::get_singleton()->compute_pipeline_create(sdfgi_shader.debug_shader_version); - } - { - String defines = "\n#define OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; - - Vector versions; - versions.push_back("\n#define MODE_PROBES\n"); - versions.push_back("\n#define MODE_VISIBILITY\n"); - - sdfgi_shader.debug_probes.initialize(versions, defines); - sdfgi_shader.debug_probes_shader = sdfgi_shader.debug_probes.version_create(); - - { - RD::PipelineRasterizationState rs; - rs.cull_mode = RD::POLYGON_CULL_DISABLED; - RD::PipelineDepthStencilState ds; - ds.enable_depth_test = true; - ds.enable_depth_write = true; - ds.depth_compare_operator = RD::COMPARE_OP_LESS_OR_EQUAL; - for (int i = 0; i < SDFGIShader::PROBE_DEBUG_MAX; i++) { - RID debug_probes_shader_version = sdfgi_shader.debug_probes.version_get_shader(sdfgi_shader.debug_probes_shader, i); - sdfgi_shader.debug_probes_pipeline[i].setup(debug_probes_shader_version, RD::RENDER_PRIMITIVE_TRIANGLE_STRIPS, rs, RD::PipelineMultisampleState(), ds, RD::PipelineColorBlendState::create_disabled(), 0); - } - } - } - default_voxel_gi_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(VoxelGIData) * MAX_VOXEL_GI_INSTANCES); - half_resolution = GLOBAL_GET("rendering/global_illumination/gi/use_half_resolution"); -} - -void RendererSceneGIRD::free() { - RD::get_singleton()->free(default_voxel_gi_buffer); - RD::get_singleton()->free(voxel_gi_lights_uniform); - RD::get_singleton()->free(sdfgi_ubo); - - voxel_gi_debug_shader.version_free(voxel_gi_debug_shader_version); - voxel_gi_shader.version_free(voxel_gi_lighting_shader_version); - shader.version_free(shader_version); - sdfgi_shader.debug_probes.version_free(sdfgi_shader.debug_probes_shader); - sdfgi_shader.debug.version_free(sdfgi_shader.debug_shader); - sdfgi_shader.direct_light.version_free(sdfgi_shader.direct_light_shader); - sdfgi_shader.integrate.version_free(sdfgi_shader.integrate_shader); - sdfgi_shader.preprocess.version_free(sdfgi_shader.preprocess_shader); - - if (voxel_gi_lights) { - memdelete_arr(voxel_gi_lights); - } -} - -RendererSceneGIRD::SDFGI *RendererSceneGIRD::create_sdfgi(RendererSceneEnvironmentRD *p_env, const Vector3 &p_world_position, uint32_t p_requested_history_size) { - SDFGI *sdfgi = memnew(SDFGI); - - sdfgi->create(p_env, p_world_position, p_requested_history_size, this); - - return sdfgi; -} - -void RendererSceneGIRD::setup_voxel_gi_instances(RID p_render_buffers, const Transform3D &p_transform, const PagedArray &p_voxel_gi_instances, uint32_t &r_voxel_gi_instances_used, RendererSceneRenderRD *p_scene_render) { - RendererRD::TextureStorage *texture_storage = RendererRD::TextureStorage::get_singleton(); - - r_voxel_gi_instances_used = 0; - - // feels a little dirty to use our container this way but.... - RendererSceneRenderRD::RenderBuffers *rb = p_scene_render->render_buffers_owner.get_or_null(p_render_buffers); - ERR_FAIL_COND(rb == nullptr); - - RID voxel_gi_buffer = p_scene_render->render_buffers_get_voxel_gi_buffer(p_render_buffers); - - VoxelGIData voxel_gi_data[MAX_VOXEL_GI_INSTANCES]; - - bool voxel_gi_instances_changed = false; - - Transform3D to_camera; - to_camera.origin = p_transform.origin; //only translation, make local - - for (int i = 0; i < MAX_VOXEL_GI_INSTANCES; i++) { - RID texture; - if (i < (int)p_voxel_gi_instances.size()) { - VoxelGIInstance *gipi = get_probe_instance(p_voxel_gi_instances[i]); - - if (gipi) { - texture = gipi->texture; - VoxelGIData &gipd = voxel_gi_data[i]; - - RID base_probe = gipi->probe; - - Transform3D to_cell = storage->voxel_gi_get_to_cell_xform(gipi->probe) * gipi->transform.affine_inverse() * to_camera; - - gipd.xform[0] = to_cell.basis.rows[0][0]; - gipd.xform[1] = to_cell.basis.rows[1][0]; - gipd.xform[2] = to_cell.basis.rows[2][0]; - gipd.xform[3] = 0; - gipd.xform[4] = to_cell.basis.rows[0][1]; - gipd.xform[5] = to_cell.basis.rows[1][1]; - gipd.xform[6] = to_cell.basis.rows[2][1]; - gipd.xform[7] = 0; - gipd.xform[8] = to_cell.basis.rows[0][2]; - gipd.xform[9] = to_cell.basis.rows[1][2]; - gipd.xform[10] = to_cell.basis.rows[2][2]; - gipd.xform[11] = 0; - gipd.xform[12] = to_cell.origin.x; - gipd.xform[13] = to_cell.origin.y; - gipd.xform[14] = to_cell.origin.z; - gipd.xform[15] = 1; - - Vector3 bounds = storage->voxel_gi_get_octree_size(base_probe); - - gipd.bounds[0] = bounds.x; - gipd.bounds[1] = bounds.y; - gipd.bounds[2] = bounds.z; - - gipd.dynamic_range = storage->voxel_gi_get_dynamic_range(base_probe) * storage->voxel_gi_get_energy(base_probe); - gipd.bias = storage->voxel_gi_get_bias(base_probe); - gipd.normal_bias = storage->voxel_gi_get_normal_bias(base_probe); - gipd.blend_ambient = !storage->voxel_gi_is_interior(base_probe); - gipd.mipmaps = gipi->mipmaps.size(); - } - - r_voxel_gi_instances_used++; - } - - if (texture == RID()) { - texture = texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE); - } - - if (texture != rb->gi.voxel_gi_textures[i]) { - voxel_gi_instances_changed = true; - rb->gi.voxel_gi_textures[i] = texture; - } - } - - if (voxel_gi_instances_changed) { - if (RD::get_singleton()->uniform_set_is_valid(rb->gi.uniform_set)) { - RD::get_singleton()->free(rb->gi.uniform_set); - } - rb->gi.uniform_set = RID(); - if (rb->volumetric_fog) { - if (RD::get_singleton()->uniform_set_is_valid(rb->volumetric_fog->fog_uniform_set)) { - RD::get_singleton()->free(rb->volumetric_fog->fog_uniform_set); - RD::get_singleton()->free(rb->volumetric_fog->process_uniform_set); - RD::get_singleton()->free(rb->volumetric_fog->process_uniform_set2); - } - rb->volumetric_fog->fog_uniform_set = RID(); - rb->volumetric_fog->process_uniform_set = RID(); - rb->volumetric_fog->process_uniform_set2 = RID(); - } - } - - if (p_voxel_gi_instances.size() > 0) { - RD::get_singleton()->draw_command_begin_label("VoxelGIs Setup"); - - RD::get_singleton()->buffer_update(voxel_gi_buffer, 0, sizeof(VoxelGIData) * MIN((uint64_t)MAX_VOXEL_GI_INSTANCES, p_voxel_gi_instances.size()), voxel_gi_data, RD::BARRIER_MASK_COMPUTE); - - RD::get_singleton()->draw_command_end_label(); - } -} - -void RendererSceneGIRD::process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_voxel_gi_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform3D &p_transform, const PagedArray &p_voxel_gi_instances, RendererSceneRenderRD *p_scene_render) { - RendererRD::TextureStorage *texture_storage = RendererRD::TextureStorage::get_singleton(); - RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); - - RD::get_singleton()->draw_command_begin_label("GI Render"); - - RendererSceneRenderRD::RenderBuffers *rb = p_scene_render->render_buffers_owner.get_or_null(p_render_buffers); - ERR_FAIL_COND(rb == nullptr); - - if (rb->ambient_buffer.is_null() || rb->gi.using_half_size_gi != half_resolution) { - if (rb->ambient_buffer.is_valid()) { - RD::get_singleton()->free(rb->ambient_buffer); - RD::get_singleton()->free(rb->reflection_buffer); - } - - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; - tf.width = rb->internal_width; - tf.height = rb->internal_height; - if (half_resolution) { - tf.width >>= 1; - tf.height >>= 1; - } - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; - rb->reflection_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); - rb->ambient_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); - rb->gi.using_half_size_gi = half_resolution; - } - - PushConstant push_constant; - - push_constant.screen_size[0] = rb->internal_width; - push_constant.screen_size[1] = rb->internal_height; - push_constant.z_near = p_projection.get_z_near(); - push_constant.z_far = p_projection.get_z_far(); - push_constant.orthogonal = p_projection.is_orthogonal(); - push_constant.proj_info[0] = -2.0f / (rb->internal_width * p_projection.matrix[0][0]); - push_constant.proj_info[1] = -2.0f / (rb->internal_height * p_projection.matrix[1][1]); - push_constant.proj_info[2] = (1.0f - p_projection.matrix[0][2]) / p_projection.matrix[0][0]; - push_constant.proj_info[3] = (1.0f + p_projection.matrix[1][2]) / p_projection.matrix[1][1]; - push_constant.max_voxel_gi_instances = MIN((uint64_t)MAX_VOXEL_GI_INSTANCES, p_voxel_gi_instances.size()); - push_constant.high_quality_vct = voxel_gi_quality == RS::VOXEL_GI_QUALITY_HIGH; - - bool use_sdfgi = rb->sdfgi != nullptr; - bool use_voxel_gi_instances = push_constant.max_voxel_gi_instances > 0; - - push_constant.cam_rotation[0] = p_transform.basis[0][0]; - push_constant.cam_rotation[1] = p_transform.basis[1][0]; - push_constant.cam_rotation[2] = p_transform.basis[2][0]; - push_constant.cam_rotation[3] = 0; - push_constant.cam_rotation[4] = p_transform.basis[0][1]; - push_constant.cam_rotation[5] = p_transform.basis[1][1]; - push_constant.cam_rotation[6] = p_transform.basis[2][1]; - push_constant.cam_rotation[7] = 0; - push_constant.cam_rotation[8] = p_transform.basis[0][2]; - push_constant.cam_rotation[9] = p_transform.basis[1][2]; - push_constant.cam_rotation[10] = p_transform.basis[2][2]; - push_constant.cam_rotation[11] = 0; - - if (rb->gi.uniform_set.is_null() || !RD::get_singleton()->uniform_set_is_valid(rb->gi.uniform_set)) { - Vector uniforms; - { - RD::Uniform u; - u.binding = 1; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { - if (rb->sdfgi && j < rb->sdfgi->cascades.size()) { - u.append_id(rb->sdfgi->cascades[j].sdf_tex); - } else { - u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); - } - } - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 2; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { - if (rb->sdfgi && j < rb->sdfgi->cascades.size()) { - u.append_id(rb->sdfgi->cascades[j].light_tex); - } else { - u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); - } - } - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 3; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { - if (rb->sdfgi && j < rb->sdfgi->cascades.size()) { - u.append_id(rb->sdfgi->cascades[j].light_aniso_0_tex); - } else { - u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); - } - } - uniforms.push_back(u); - } - { - RD::Uniform u; - u.binding = 4; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - for (uint32_t j = 0; j < SDFGI::MAX_CASCADES; j++) { - if (rb->sdfgi && j < rb->sdfgi->cascades.size()) { - u.append_id(rb->sdfgi->cascades[j].light_aniso_1_tex); - } else { - u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); - } - } - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.binding = 5; - if (rb->sdfgi) { - u.append_id(rb->sdfgi->occlusion_texture); - } else { - u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_3D_WHITE)); - } - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; - u.binding = 6; - u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_SAMPLER; - u.binding = 7; - u.append_id(material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR_WITH_MIPMAPS, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED)); - uniforms.push_back(u); - } - - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 9; - u.append_id(rb->ambient_buffer); - uniforms.push_back(u); - } - - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 10; - u.append_id(rb->reflection_buffer); - uniforms.push_back(u); - } - - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.binding = 11; - if (rb->sdfgi) { - u.append_id(rb->sdfgi->lightprobe_texture); - } else { - u.append_id(texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_2D_ARRAY_WHITE)); - } - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.binding = 12; - u.append_id(rb->depth_texture); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.binding = 13; - u.append_id(p_normal_roughness_buffer); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.binding = 14; - RID buffer = p_voxel_gi_buffer.is_valid() ? p_voxel_gi_buffer : texture_storage->texture_rd_get_default(RendererRD::DEFAULT_RD_TEXTURE_BLACK); - u.append_id(buffer); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; - u.binding = 15; - u.append_id(sdfgi_ubo); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; - u.binding = 16; - u.append_id(rb->gi.voxel_gi_buffer); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; - u.binding = 17; - for (int i = 0; i < MAX_VOXEL_GI_INSTANCES; i++) { - u.append_id(rb->gi.voxel_gi_textures[i]); - } - uniforms.push_back(u); - } - - rb->gi.uniform_set = RD::get_singleton()->uniform_set_create(uniforms, shader.version_get_shader(shader_version, 0), 0); - } - - Mode mode; - - if (rb->gi.using_half_size_gi) { - mode = (use_sdfgi && use_voxel_gi_instances) ? MODE_HALF_RES_COMBINED : (use_sdfgi ? MODE_HALF_RES_SDFGI : MODE_HALF_RES_VOXEL_GI); - } else { - mode = (use_sdfgi && use_voxel_gi_instances) ? MODE_COMBINED : (use_sdfgi ? MODE_SDFGI : MODE_VOXEL_GI); - } - - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(true); - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, pipelines[mode]); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, rb->gi.uniform_set, 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(PushConstant)); - - if (rb->gi.using_half_size_gi) { - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->internal_width >> 1, rb->internal_height >> 1, 1); - } else { - RD::get_singleton()->compute_list_dispatch_threads(compute_list, rb->internal_width, rb->internal_height, 1); - } - //do barrier later to allow oeverlap - //RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_NO_BARRIER); //no barriers, let other compute, raster and transfer happen at the same time - RD::get_singleton()->draw_command_end_label(); -} - -RID RendererSceneGIRD::voxel_gi_instance_create(RID p_base) { - VoxelGIInstance voxel_gi; - voxel_gi.gi = this; - voxel_gi.storage = storage; - voxel_gi.probe = p_base; - RID rid = voxel_gi_instance_owner.make_rid(voxel_gi); - return rid; -} - -void RendererSceneGIRD::voxel_gi_instance_set_transform_to_data(RID p_probe, const Transform3D &p_xform) { - VoxelGIInstance *voxel_gi = get_probe_instance(p_probe); - ERR_FAIL_COND(!voxel_gi); - - voxel_gi->transform = p_xform; -} - -bool RendererSceneGIRD::voxel_gi_needs_update(RID p_probe) const { - VoxelGIInstance *voxel_gi = get_probe_instance(p_probe); - ERR_FAIL_COND_V(!voxel_gi, false); - - return voxel_gi->last_probe_version != storage->voxel_gi_get_version(voxel_gi->probe); -} - -void RendererSceneGIRD::voxel_gi_update(RID p_probe, bool p_update_light_instances, const Vector &p_light_instances, const PagedArray &p_dynamic_objects, RendererSceneRenderRD *p_scene_render) { - VoxelGIInstance *voxel_gi = get_probe_instance(p_probe); - ERR_FAIL_COND(!voxel_gi); - - voxel_gi->update(p_update_light_instances, p_light_instances, p_dynamic_objects, p_scene_render); -} - -void RendererSceneGIRD::debug_voxel_gi(RID p_voxel_gi, RD::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform, bool p_lighting, bool p_emission, float p_alpha) { - VoxelGIInstance *voxel_gi = voxel_gi_instance_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND(!voxel_gi); - - voxel_gi->debug(p_draw_list, p_framebuffer, p_camera_with_transform, p_lighting, p_emission, p_alpha); -} diff --git a/servers/rendering/renderer_rd/renderer_scene_gi_rd.h b/servers/rendering/renderer_rd/renderer_scene_gi_rd.h deleted file mode 100644 index 122644498b..0000000000 --- a/servers/rendering/renderer_rd/renderer_scene_gi_rd.h +++ /dev/null @@ -1,665 +0,0 @@ -/*************************************************************************/ -/* renderer_scene_gi_rd.h */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur. */ -/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md). */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ - -#ifndef RENDERING_SERVER_SCENE_GI_RD_H -#define RENDERING_SERVER_SCENE_GI_RD_H - -#include "core/templates/local_vector.h" -#include "core/templates/rid_owner.h" -#include "servers/rendering/renderer_compositor.h" -#include "servers/rendering/renderer_rd/renderer_scene_environment_rd.h" -#include "servers/rendering/renderer_rd/renderer_scene_sky_rd.h" -#include "servers/rendering/renderer_rd/renderer_storage_rd.h" -#include "servers/rendering/renderer_rd/shaders/gi.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/sdfgi_debug_probes.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/sdfgi_preprocess.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/voxel_gi.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/voxel_gi_debug.glsl.gen.h" -#include "servers/rendering/renderer_scene_render.h" -#include "servers/rendering/rendering_device.h" - -// Forward declare RenderDataRD and RendererSceneRenderRD so we can pass it into some of our methods, these classes are pretty tightly bound -struct RenderDataRD; -class RendererSceneRenderRD; - -class RendererSceneGIRD { -private: - RendererStorageRD *storage = nullptr; - - /* VOXEL_GI INSTANCE */ - - struct VoxelGILight { - uint32_t type; - float energy; - float radius; - float attenuation; - - float color[3]; - float cos_spot_angle; - - float position[3]; - float inv_spot_attenuation; - - float direction[3]; - uint32_t has_shadow; - }; - - struct VoxelGIPushConstant { - int32_t limits[3]; - uint32_t stack_size; - - float emission_scale; - float propagation; - float dynamic_range; - uint32_t light_count; - - uint32_t cell_offset; - uint32_t cell_count; - float aniso_strength; - uint32_t pad; - }; - - struct VoxelGIDynamicPushConstant { - int32_t limits[3]; - uint32_t light_count; - int32_t x_dir[3]; - float z_base; - int32_t y_dir[3]; - float z_sign; - int32_t z_dir[3]; - float pos_multiplier; - uint32_t rect_pos[2]; - uint32_t rect_size[2]; - uint32_t prev_rect_ofs[2]; - uint32_t prev_rect_size[2]; - uint32_t flip_x; - uint32_t flip_y; - float dynamic_range; - uint32_t on_mipmap; - float propagation; - float pad[3]; - }; - - VoxelGILight *voxel_gi_lights = nullptr; - uint32_t voxel_gi_max_lights = 32; - RID voxel_gi_lights_uniform; - - enum { - VOXEL_GI_SHADER_VERSION_COMPUTE_LIGHT, - VOXEL_GI_SHADER_VERSION_COMPUTE_SECOND_BOUNCE, - VOXEL_GI_SHADER_VERSION_COMPUTE_MIPMAP, - VOXEL_GI_SHADER_VERSION_WRITE_TEXTURE, - VOXEL_GI_SHADER_VERSION_DYNAMIC_OBJECT_LIGHTING, - VOXEL_GI_SHADER_VERSION_DYNAMIC_SHRINK_WRITE, - VOXEL_GI_SHADER_VERSION_DYNAMIC_SHRINK_PLOT, - VOXEL_GI_SHADER_VERSION_DYNAMIC_SHRINK_WRITE_PLOT, - VOXEL_GI_SHADER_VERSION_MAX - }; - - VoxelGiShaderRD voxel_gi_shader; - RID voxel_gi_lighting_shader_version; - RID voxel_gi_lighting_shader_version_shaders[VOXEL_GI_SHADER_VERSION_MAX]; - RID voxel_gi_lighting_shader_version_pipelines[VOXEL_GI_SHADER_VERSION_MAX]; - - enum { - VOXEL_GI_DEBUG_COLOR, - VOXEL_GI_DEBUG_LIGHT, - VOXEL_GI_DEBUG_EMISSION, - VOXEL_GI_DEBUG_LIGHT_FULL, - VOXEL_GI_DEBUG_MAX - }; - - struct VoxelGIDebugPushConstant { - float projection[16]; - uint32_t cell_offset; - float dynamic_range; - float alpha; - uint32_t level; - int32_t bounds[3]; - uint32_t pad; - }; - - VoxelGiDebugShaderRD voxel_gi_debug_shader; - RID voxel_gi_debug_shader_version; - RID voxel_gi_debug_shader_version_shaders[VOXEL_GI_DEBUG_MAX]; - PipelineCacheRD voxel_gi_debug_shader_version_pipelines[VOXEL_GI_DEBUG_MAX]; - RID voxel_gi_debug_uniform_set; - - /* SDFGI */ - - struct SDFGIShader { - enum SDFGIPreprocessShaderVersion { - PRE_PROCESS_SCROLL, - PRE_PROCESS_SCROLL_OCCLUSION, - PRE_PROCESS_JUMP_FLOOD_INITIALIZE, - PRE_PROCESS_JUMP_FLOOD_INITIALIZE_HALF, - PRE_PROCESS_JUMP_FLOOD, - PRE_PROCESS_JUMP_FLOOD_OPTIMIZED, - PRE_PROCESS_JUMP_FLOOD_UPSCALE, - PRE_PROCESS_OCCLUSION, - PRE_PROCESS_STORE, - PRE_PROCESS_MAX - }; - - struct PreprocessPushConstant { - int32_t scroll[3]; - int32_t grid_size; - - int32_t probe_offset[3]; - int32_t step_size; - - int32_t half_size; - uint32_t occlusion_index; - int32_t cascade; - uint32_t pad; - }; - - SdfgiPreprocessShaderRD preprocess; - RID preprocess_shader; - RID preprocess_pipeline[PRE_PROCESS_MAX]; - - struct DebugPushConstant { - float grid_size[3]; - uint32_t max_cascades; - - int32_t screen_size[2]; - uint32_t use_occlusion; - float y_mult; - - float cam_extent[3]; - uint32_t probe_axis_size; - - float cam_transform[16]; - }; - - SdfgiDebugShaderRD debug; - RID debug_shader; - RID debug_shader_version; - RID debug_pipeline; - - enum ProbeDebugMode { - PROBE_DEBUG_PROBES, - PROBE_DEBUG_VISIBILITY, - PROBE_DEBUG_MAX - }; - - struct DebugProbesPushConstant { - float projection[16]; - - uint32_t band_power; - uint32_t sections_in_band; - uint32_t band_mask; - float section_arc; - - float grid_size[3]; - uint32_t cascade; - - uint32_t pad; - float y_mult; - int32_t probe_debug_index; - int32_t probe_axis_size; - }; - - SdfgiDebugProbesShaderRD debug_probes; - RID debug_probes_shader; - RID debug_probes_shader_version; - - PipelineCacheRD debug_probes_pipeline[PROBE_DEBUG_MAX]; - - struct Light { - float color[3]; - float energy; - - float direction[3]; - uint32_t has_shadow; - - float position[3]; - float attenuation; - - uint32_t type; - float cos_spot_angle; - float inv_spot_attenuation; - float radius; - }; - - struct DirectLightPushConstant { - float grid_size[3]; - uint32_t max_cascades; - - uint32_t cascade; - uint32_t light_count; - uint32_t process_offset; - uint32_t process_increment; - - int32_t probe_axis_size; - float bounce_feedback; - float y_mult; - uint32_t use_occlusion; - }; - - enum { - DIRECT_LIGHT_MODE_STATIC, - DIRECT_LIGHT_MODE_DYNAMIC, - DIRECT_LIGHT_MODE_MAX - }; - SdfgiDirectLightShaderRD direct_light; - RID direct_light_shader; - RID direct_light_pipeline[DIRECT_LIGHT_MODE_MAX]; - - enum { - INTEGRATE_MODE_PROCESS, - INTEGRATE_MODE_STORE, - INTEGRATE_MODE_SCROLL, - INTEGRATE_MODE_SCROLL_STORE, - INTEGRATE_MODE_MAX - }; - struct IntegratePushConstant { - enum { - SKY_MODE_DISABLED, - SKY_MODE_COLOR, - SKY_MODE_SKY, - }; - - float grid_size[3]; - uint32_t max_cascades; - - uint32_t probe_axis_size; - uint32_t cascade; - uint32_t history_index; - uint32_t history_size; - - uint32_t ray_count; - float ray_bias; - int32_t image_size[2]; - - int32_t world_offset[3]; - uint32_t sky_mode; - - int32_t scroll[3]; - float sky_energy; - - float sky_color[3]; - float y_mult; - - uint32_t store_ambient_texture; - uint32_t pad[3]; - }; - - SdfgiIntegrateShaderRD integrate; - RID integrate_shader; - RID integrate_pipeline[INTEGRATE_MODE_MAX]; - - RID integrate_default_sky_uniform_set; - - } sdfgi_shader; - -public: - /* VOXEL_GI INSTANCE */ - - //@TODO VoxelGIInstance is still directly used in the render code, we'll address this when we refactor the render code itself. - - struct VoxelGIInstance { - // access to our containers - RendererStorageRD *storage = nullptr; - RendererSceneGIRD *gi = nullptr; - - RID probe; - RID texture; - RID write_buffer; - - struct Mipmap { - RID texture; - RID uniform_set; - RID second_bounce_uniform_set; - RID write_uniform_set; - uint32_t level; - uint32_t cell_offset; - uint32_t cell_count; - }; - Vector mipmaps; - - struct DynamicMap { - RID texture; //color normally, or emission on first pass - RID fb_depth; //actual depth buffer for the first pass, float depth for later passes - RID depth; //actual depth buffer for the first pass, float depth for later passes - RID normal; //normal buffer for the first pass - RID albedo; //emission buffer for the first pass - RID orm; //orm buffer for the first pass - RID fb; //used for rendering, only valid on first map - RID uniform_set; - uint32_t size; - int mipmap; // mipmap to write to, -1 if no mipmap assigned - }; - - Vector dynamic_maps; - - int slot = -1; - uint32_t last_probe_version = 0; - uint32_t last_probe_data_version = 0; - - //uint64_t last_pass = 0; - uint32_t render_index = 0; - - bool has_dynamic_object_data = false; - - Transform3D transform; - - void update(bool p_update_light_instances, const Vector &p_light_instances, const PagedArray &p_dynamic_objects, RendererSceneRenderRD *p_scene_render); - void debug(RD::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform, bool p_lighting, bool p_emission, float p_alpha); - }; - - mutable RID_Owner voxel_gi_instance_owner; - - _FORCE_INLINE_ VoxelGIInstance *get_probe_instance(RID p_probe) const { - return voxel_gi_instance_owner.get_or_null(p_probe); - }; - - _FORCE_INLINE_ RID voxel_gi_instance_get_texture(RID p_probe) { - VoxelGIInstance *voxel_gi = get_probe_instance(p_probe); - ERR_FAIL_COND_V(!voxel_gi, RID()); - return voxel_gi->texture; - }; - - RS::VoxelGIQuality voxel_gi_quality = RS::VOXEL_GI_QUALITY_LOW; - - /* SDFGI */ - - struct SDFGI { - enum { - MAX_CASCADES = 8, - CASCADE_SIZE = 128, - PROBE_DIVISOR = 16, - ANISOTROPY_SIZE = 6, - MAX_DYNAMIC_LIGHTS = 128, - MAX_STATIC_LIGHTS = 1024, - LIGHTPROBE_OCT_SIZE = 6, - SH_SIZE = 16 - }; - - struct Cascade { - struct UBO { - float offset[3]; - float to_cell; - int32_t probe_offset[3]; - uint32_t pad; - }; - - //cascade blocks are full-size for volume (128^3), half size for albedo/emission - RID sdf_tex; - RID light_tex; - RID light_aniso_0_tex; - RID light_aniso_1_tex; - - RID light_data; - RID light_aniso_0_data; - RID light_aniso_1_data; - - struct SolidCell { // this struct is unused, but remains as reference for size - uint32_t position; - uint32_t albedo; - uint32_t static_light; - uint32_t static_light_aniso; - }; - - RID solid_cell_dispatch_buffer; //buffer for indirect compute dispatch - RID solid_cell_buffer; - - RID lightprobe_history_tex; - RID lightprobe_average_tex; - - float cell_size; - Vector3i position; - - static const Vector3i DIRTY_ALL; - Vector3i dirty_regions; //(0,0,0 is not dirty, negative is refresh from the end, DIRTY_ALL is refresh all. - - RID sdf_store_uniform_set; - RID sdf_direct_light_uniform_set; - RID scroll_uniform_set; - RID scroll_occlusion_uniform_set; - RID integrate_uniform_set; - RID lights_buffer; - - bool all_dynamic_lights_dirty = true; - }; - - // access to our containers - RendererStorageRD *storage = nullptr; - RendererSceneGIRD *gi = nullptr; - - // used for rendering (voxelization) - RID render_albedo; - RID render_emission; - RID render_emission_aniso; - RID render_occlusion[8]; - RID render_geom_facing; - - RID render_sdf[2]; - RID render_sdf_half[2]; - - // used for ping pong processing in cascades - RID sdf_initialize_uniform_set; - RID sdf_initialize_half_uniform_set; - RID jump_flood_uniform_set[2]; - RID jump_flood_half_uniform_set[2]; - RID sdf_upscale_uniform_set; - int upscale_jfa_uniform_set_index; - RID occlusion_uniform_set; - - uint32_t cascade_size = 128; - - LocalVector cascades; - - RID lightprobe_texture; - RID lightprobe_data; - RID occlusion_texture; - RID occlusion_data; - RID ambient_texture; //integrates with volumetric fog - - RID lightprobe_history_scroll; //used for scrolling lightprobes - RID lightprobe_average_scroll; //used for scrolling lightprobes - - uint32_t history_size = 0; - float solid_cell_ratio = 0; - uint32_t solid_cell_count = 0; - - int num_cascades = 6; - float min_cell_size = 0; - uint32_t probe_axis_count = 0; //amount of probes per axis, this is an odd number because it encloses endpoints - - RID debug_uniform_set; - RID debug_probes_uniform_set; - RID cascades_ubo; - - bool uses_occlusion = false; - float bounce_feedback = 0.5; - bool reads_sky = true; - float energy = 1.0; - float normal_bias = 1.1; - float probe_bias = 1.1; - RS::EnvironmentSDFGIYScale y_scale_mode = RS::ENV_SDFGI_Y_SCALE_75_PERCENT; - - float y_mult = 1.0; - - uint32_t render_pass = 0; - - int32_t cascade_dynamic_light_count[SDFGI::MAX_CASCADES]; //used dynamically - RID integrate_sky_uniform_set; - - void create(RendererSceneEnvironmentRD *p_env, const Vector3 &p_world_position, uint32_t p_requested_history_size, RendererSceneGIRD *p_gi); - void erase(); - void update(RendererSceneEnvironmentRD *p_env, const Vector3 &p_world_position); - void update_light(); - void update_probes(RendererSceneEnvironmentRD *p_env, RendererSceneSkyRD::Sky *p_sky); - void store_probes(); - int get_pending_region_data(int p_region, Vector3i &r_local_offset, Vector3i &r_local_size, AABB &r_bounds) const; - void update_cascades(); - - void debug_draw(const CameraMatrix &p_projection, const Transform3D &p_transform, int p_width, int p_height, RID p_render_target, RID p_texture); - void debug_probes(RD::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform); - - void pre_process_gi(const Transform3D &p_transform, RenderDataRD *p_render_data, RendererSceneRenderRD *p_scene_render); - void render_region(RID p_render_buffers, int p_region, const PagedArray &p_instances, RendererSceneRenderRD *p_scene_render); - void render_static_lights(RID p_render_buffers, uint32_t p_cascade_count, const uint32_t *p_cascade_indices, const PagedArray *p_positional_light_cull_result, RendererSceneRenderRD *p_scene_render); - }; - - RS::EnvironmentSDFGIRayCount sdfgi_ray_count = RS::ENV_SDFGI_RAY_COUNT_16; - RS::EnvironmentSDFGIFramesToConverge sdfgi_frames_to_converge = RS::ENV_SDFGI_CONVERGE_IN_30_FRAMES; - RS::EnvironmentSDFGIFramesToUpdateLight sdfgi_frames_to_update_light = RS::ENV_SDFGI_UPDATE_LIGHT_IN_4_FRAMES; - - float sdfgi_solid_cell_ratio = 0.25; - Vector3 sdfgi_debug_probe_pos; - Vector3 sdfgi_debug_probe_dir; - bool sdfgi_debug_probe_enabled = false; - Vector3i sdfgi_debug_probe_index; - - /* SDFGI UPDATE */ - - int sdfgi_get_lightprobe_octahedron_size() const { return SDFGI::LIGHTPROBE_OCT_SIZE; } - - /* GI */ - enum { - MAX_VOXEL_GI_INSTANCES = 8 - }; - - // Struct for use in render buffer - struct RenderBuffersGI { - RID voxel_gi_textures[MAX_VOXEL_GI_INSTANCES]; - RID voxel_gi_buffer; - - RID full_buffer; - RID full_dispatch; - RID full_mask; - - RID uniform_set; - bool using_half_size_gi = false; - }; - - struct SDFGIData { - float grid_size[3]; - uint32_t max_cascades; - - uint32_t use_occlusion; - int32_t probe_axis_size; - float probe_to_uvw; - float normal_bias; - - float lightprobe_tex_pixel_size[3]; - float energy; - - float lightprobe_uv_offset[3]; - float y_mult; - - float occlusion_clamp[3]; - uint32_t pad3; - - float occlusion_renormalize[3]; - uint32_t pad4; - - float cascade_probe_size[3]; - uint32_t pad5; - - struct ProbeCascadeData { - float position[3]; //offset of (0,0,0) in world coordinates - float to_probe; // 1/bounds * grid_size - int32_t probe_world_offset[3]; - float to_cell; // 1/bounds * grid_size - }; - - ProbeCascadeData cascades[SDFGI::MAX_CASCADES]; - }; - - struct VoxelGIData { - float xform[16]; // 64 - 64 - - float bounds[3]; // 12 - 76 - float dynamic_range; // 4 - 80 - - float bias; // 4 - 84 - float normal_bias; // 4 - 88 - uint32_t blend_ambient; // 4 - 92 - uint32_t mipmaps; // 4 - 96 - }; - - struct PushConstant { - int32_t screen_size[2]; - float z_near; - float z_far; - - float proj_info[4]; - - uint32_t max_voxel_gi_instances; - uint32_t high_quality_vct; - uint32_t orthogonal; - uint32_t pad; - - float cam_rotation[12]; - }; - - RID sdfgi_ubo; - enum Mode { - MODE_VOXEL_GI, - MODE_SDFGI, - MODE_COMBINED, - MODE_HALF_RES_VOXEL_GI, - MODE_HALF_RES_SDFGI, - MODE_HALF_RES_COMBINED, - MODE_MAX - }; - - RID default_voxel_gi_buffer; - - bool half_resolution = false; - GiShaderRD shader; - RID shader_version; - RID pipelines[MODE_MAX]; - - RendererSceneGIRD(); - ~RendererSceneGIRD(); - - void init(RendererStorageRD *p_storage, RendererSceneSkyRD *p_sky); - void free(); - - SDFGI *create_sdfgi(RendererSceneEnvironmentRD *p_env, const Vector3 &p_world_position, uint32_t p_requested_history_size); - - void setup_voxel_gi_instances(RID p_render_buffers, const Transform3D &p_transform, const PagedArray &p_voxel_gi_instances, uint32_t &r_voxel_gi_instances_used, RendererSceneRenderRD *p_scene_render); - void process_gi(RID p_render_buffers, RID p_normal_roughness_buffer, RID p_voxel_gi_buffer, RID p_environment, const CameraMatrix &p_projection, const Transform3D &p_transform, const PagedArray &p_voxel_gi_instances, RendererSceneRenderRD *p_scene_render); - - RID voxel_gi_instance_create(RID p_base); - void voxel_gi_instance_set_transform_to_data(RID p_probe, const Transform3D &p_xform); - bool voxel_gi_needs_update(RID p_probe) const; - void voxel_gi_update(RID p_probe, bool p_update_light_instances, const Vector &p_light_instances, const PagedArray &p_dynamic_objects, RendererSceneRenderRD *p_scene_render); - void debug_voxel_gi(RID p_voxel_gi, RD::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform, bool p_lighting, bool p_emission, float p_alpha); -}; - -#endif /* !RENDERING_SERVER_SCENE_GI_RD_H */ diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp index a50a05d905..6f16c0972e 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp @@ -74,7 +74,7 @@ void RendererSceneRenderRD::sdfgi_update(RID p_render_buffers, RID p_environment rb->sdfgi = nullptr; } - RendererSceneGIRD::SDFGI *sdfgi = rb->sdfgi; + RendererRD::GI::SDFGI *sdfgi = rb->sdfgi; if (sdfgi == nullptr) { // re-create rb->sdfgi = gi.create_sdfgi(env, p_world_position, requested_history_size); @@ -95,9 +95,9 @@ int RendererSceneRenderRD::sdfgi_get_pending_region_count(RID p_render_buffers) int dirty_count = 0; for (uint32_t i = 0; i < rb->sdfgi->cascades.size(); i++) { - const RendererSceneGIRD::SDFGI::Cascade &c = rb->sdfgi->cascades[i]; + const RendererRD::GI::SDFGI::Cascade &c = rb->sdfgi->cascades[i]; - if (c.dirty_regions == RendererSceneGIRD::SDFGI::Cascade::DIRTY_ALL) { + if (c.dirty_regions == RendererRD::GI::SDFGI::Cascade::DIRTY_ALL) { dirty_count++; } else { for (int j = 0; j < 3; j++) { @@ -1533,7 +1533,7 @@ void RendererSceneRenderRD::voxel_gi_update(RID p_probe, bool p_update_light_ins gi.voxel_gi_update(p_probe, p_update_light_instances, p_light_instances, p_dynamic_objects, this); } -void RendererSceneRenderRD::_debug_sdfgi_probes(RID p_render_buffers, RD::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform) { +void RendererSceneRenderRD::_debug_sdfgi_probes(RID p_render_buffers, RID p_framebuffer, const uint32_t p_view_count, const CameraMatrix *p_camera_with_transforms, bool p_will_continue_color, bool p_will_continue_depth) { RenderBuffers *rb = render_buffers_owner.get_or_null(p_render_buffers); ERR_FAIL_COND(!rb); @@ -1541,7 +1541,7 @@ void RendererSceneRenderRD::_debug_sdfgi_probes(RID p_render_buffers, RD::DrawLi return; //nothing to debug } - rb->sdfgi->debug_probes(p_draw_list, p_framebuffer, p_camera_with_transform); + rb->sdfgi->debug_probes(p_framebuffer, p_view_count, p_camera_with_transforms, p_will_continue_color, p_will_continue_depth); } //////////////////////////////// @@ -1950,17 +1950,7 @@ void RendererSceneRenderRD::_free_render_buffer_data(RenderBuffers *rb) { rb->taa.prev_velocity = RID(); } - if (rb->ambient_buffer.is_valid()) { - RD::get_singleton()->free(rb->ambient_buffer); - RD::get_singleton()->free(rb->reflection_buffer); - rb->ambient_buffer = RID(); - rb->reflection_buffer = RID(); - } - - if (rb->gi.voxel_gi_buffer.is_valid()) { - RD::get_singleton()->free(rb->gi.voxel_gi_buffer); - rb->gi.voxel_gi_buffer = RID(); - } + rb->rbgi.free(); } void RendererSceneRenderRD::_process_sss(RID p_render_buffers, const CameraMatrix &p_camera) { @@ -2796,11 +2786,11 @@ void RendererSceneRenderRD::_render_buffers_debug_draw(RID p_render_buffers, RID copy_effects->copy_to_fb_rect(_render_buffers_get_normal_texture(p_render_buffers), texture_storage->render_target_get_rd_framebuffer(rb->render_target), Rect2(Vector2(), rtsize), false, false); } - if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_GI_BUFFER && rb->ambient_buffer.is_valid()) { + if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_GI_BUFFER && rb->rbgi.ambient_buffer.is_valid()) { Size2 rtsize = texture_storage->render_target_get_size(rb->render_target); - RID ambient_texture = rb->ambient_buffer; - RID reflection_texture = rb->reflection_buffer; - copy_effects->copy_to_fb_rect(ambient_texture, texture_storage->render_target_get_rd_framebuffer(rb->render_target), Rect2(Vector2(), rtsize), false, false, false, true, reflection_texture); + RID ambient_texture = rb->rbgi.ambient_buffer; + RID reflection_texture = rb->rbgi.reflection_buffer; + copy_effects->copy_to_fb_rect(ambient_texture, texture_storage->render_target_get_rd_framebuffer(rb->render_target), Rect2(Vector2(), rtsize), false, false, false, true, reflection_texture, rb->view_count > 1); } if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_OCCLUDERS) { @@ -2869,10 +2859,10 @@ RID RendererSceneRenderRD::render_buffers_get_ssil_texture(RID p_render_buffers) RID RendererSceneRenderRD::render_buffers_get_voxel_gi_buffer(RID p_render_buffers) { RenderBuffers *rb = render_buffers_owner.get_or_null(p_render_buffers); ERR_FAIL_COND_V(!rb, RID()); - if (rb->gi.voxel_gi_buffer.is_null()) { - rb->gi.voxel_gi_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(RendererSceneGIRD::VoxelGIData) * RendererSceneGIRD::MAX_VOXEL_GI_INSTANCES); + if (rb->rbgi.voxel_gi_buffer.is_null()) { + rb->rbgi.voxel_gi_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(RendererRD::GI::VoxelGIData) * RendererRD::GI::MAX_VOXEL_GI_INSTANCES); } - return rb->gi.voxel_gi_buffer; + return rb->rbgi.voxel_gi_buffer; } RID RendererSceneRenderRD::render_buffers_get_default_voxel_gi_buffer() { @@ -2882,12 +2872,13 @@ RID RendererSceneRenderRD::render_buffers_get_default_voxel_gi_buffer() { RID RendererSceneRenderRD::render_buffers_get_gi_ambient_texture(RID p_render_buffers) { RenderBuffers *rb = render_buffers_owner.get_or_null(p_render_buffers); ERR_FAIL_COND_V(!rb, RID()); - return rb->ambient_buffer; + + return rb->rbgi.ambient_buffer; } RID RendererSceneRenderRD::render_buffers_get_gi_reflection_texture(RID p_render_buffers) { RenderBuffers *rb = render_buffers_owner.get_or_null(p_render_buffers); ERR_FAIL_COND_V(!rb, RID()); - return rb->reflection_buffer; + return rb->rbgi.reflection_buffer; } uint32_t RendererSceneRenderRD::render_buffers_get_sdfgi_cascade_count(RID p_render_buffers) const { @@ -2925,7 +2916,7 @@ Vector3i RendererSceneRenderRD::render_buffers_get_sdfgi_cascade_probe_offset(RI ERR_FAIL_COND_V(!rb, Vector3i()); ERR_FAIL_COND_V(!rb->sdfgi, Vector3i()); ERR_FAIL_UNSIGNED_INDEX_V(p_cascade, rb->sdfgi->cascades.size(), Vector3i()); - int32_t probe_divisor = rb->sdfgi->cascade_size / RendererSceneGIRD::SDFGI::PROBE_DIVISOR; + int32_t probe_divisor = rb->sdfgi->cascade_size / RendererRD::GI::SDFGI::PROBE_DIVISOR; return rb->sdfgi->cascades[p_cascade].position / probe_divisor; } @@ -4615,8 +4606,8 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e RD::Uniform u; u.uniform_type = RD::UNIFORM_TYPE_TEXTURE; u.binding = 12; - for (int i = 0; i < RendererSceneGIRD::MAX_VOXEL_GI_INSTANCES; i++) { - u.append_id(rb->gi.voxel_gi_textures[i]); + for (int i = 0; i < RendererRD::GI::MAX_VOXEL_GI_INSTANCES; i++) { + u.append_id(rb->rbgi.voxel_gi_textures[i]); } uniforms.push_back(u); copy_uniforms.push_back(u); @@ -4930,7 +4921,7 @@ void RendererSceneRenderRD::_pre_resolve_render(RenderDataRD *p_render_data, boo } } -void RendererSceneRenderRD::_pre_opaque_render(RenderDataRD *p_render_data, bool p_use_ssao, bool p_use_ssil, bool p_use_gi, RID p_normal_roughness_buffer, RID p_voxel_gi_buffer) { +void RendererSceneRenderRD::_pre_opaque_render(RenderDataRD *p_render_data, bool p_use_ssao, bool p_use_ssil, bool p_use_gi, RID *p_normal_roughness_views, RID p_voxel_gi_buffer) { // Render shadows while GI is rendering, due to how barriers are handled, this should happen at the same time RendererRD::LightStorage *light_storage = RendererRD::LightStorage::get_singleton(); @@ -5005,7 +4996,7 @@ void RendererSceneRenderRD::_pre_opaque_render(RenderDataRD *p_render_data, bool //start GI if (render_gi) { - gi.process_gi(p_render_data->render_buffers, p_normal_roughness_buffer, p_voxel_gi_buffer, p_render_data->environment, p_render_data->cam_projection, p_render_data->cam_transform, *p_render_data->voxel_gi_instances, this); + gi.process_gi(p_render_data->render_buffers, p_normal_roughness_views, p_voxel_gi_buffer, p_render_data->environment, p_render_data->view_count, p_render_data->view_projection, p_render_data->view_eye_offset, p_render_data->cam_transform, *p_render_data->voxel_gi_instances, this); } //Do shadow rendering (in parallel with GI) @@ -5046,11 +5037,13 @@ void RendererSceneRenderRD::_pre_opaque_render(RenderDataRD *p_render_data, bool } if (p_use_ssao) { - _process_ssao(p_render_data->render_buffers, p_render_data->environment, p_normal_roughness_buffer, p_render_data->cam_projection); + // TODO make these proper stereo and thus use p_normal_roughness_views correctly + _process_ssao(p_render_data->render_buffers, p_render_data->environment, p_normal_roughness_views[0], p_render_data->cam_projection); } if (p_use_ssil) { - _process_ssil(p_render_data->render_buffers, p_render_data->environment, p_normal_roughness_buffer, p_render_data->cam_projection, p_render_data->cam_transform); + // TODO make these proper stereo and thus use p_normal_roughness_views correctly + _process_ssil(p_render_data->render_buffers, p_render_data->environment, p_normal_roughness_views[0], p_render_data->cam_projection, p_render_data->cam_transform); } } @@ -5195,7 +5188,7 @@ void RendererSceneRenderRD::render_scene(RID p_render_buffers, const CameraData //assign render indices to voxel_gi_instances if (is_dynamic_gi_supported()) { for (uint32_t i = 0; i < (uint32_t)p_voxel_gi_instances.size(); i++) { - RendererSceneGIRD::VoxelGIInstance *voxel_gi_inst = gi.voxel_gi_instance_owner.get_or_null(p_voxel_gi_instances[i]); + RendererRD::GI::VoxelGIInstance *voxel_gi_inst = gi.voxel_gi_instance_owner.get_or_null(p_voxel_gi_instances[i]); if (voxel_gi_inst) { voxel_gi_inst->render_index = i; } @@ -5249,7 +5242,13 @@ void RendererSceneRenderRD::render_scene(RID p_render_buffers, const CameraData _render_buffers_debug_draw(p_render_buffers, p_shadow_atlas, p_occluder_debug_tex); if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_SDFGI && rb != nullptr && rb->sdfgi != nullptr) { - rb->sdfgi->debug_draw(render_data.cam_projection, render_data.cam_transform, rb->width, rb->height, rb->render_target, rb->texture); + Vector view_rids; + + for (int v = 0; v < rb->views.size(); v++) { + view_rids.push_back(rb->views[v].view_texture); + } + + rb->sdfgi->debug_draw(render_data.view_count, render_data.view_projection, render_data.cam_transform, rb->width, rb->height, rb->render_target, rb->texture, view_rids); } } } @@ -5518,7 +5517,7 @@ bool RendererSceneRenderRD::free(RID p_rid) { } else if (lightmap_instance_owner.owns(p_rid)) { lightmap_instance_owner.free(p_rid); } else if (gi.voxel_gi_instance_owner.owns(p_rid)) { - RendererSceneGIRD::VoxelGIInstance *voxel_gi = gi.voxel_gi_instance_owner.get_or_null(p_rid); + RendererRD::GI::VoxelGIInstance *voxel_gi = gi.voxel_gi_instance_owner.get_or_null(p_rid); if (voxel_gi->texture.is_valid()) { RD::get_singleton()->free(voxel_gi->texture); RD::get_singleton()->free(voxel_gi->write_buffer); diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.h b/servers/rendering/renderer_rd/renderer_scene_render_rd.h index a90c165d83..c87fd6703f 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.h +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.h @@ -38,8 +38,8 @@ #include "servers/rendering/renderer_rd/effects/bokeh_dof.h" #include "servers/rendering/renderer_rd/effects/copy_effects.h" #include "servers/rendering/renderer_rd/effects/tone_mapper.h" +#include "servers/rendering/renderer_rd/environment/gi.h" #include "servers/rendering/renderer_rd/renderer_scene_environment_rd.h" -#include "servers/rendering/renderer_rd/renderer_scene_gi_rd.h" #include "servers/rendering/renderer_rd/renderer_scene_sky_rd.h" #include "servers/rendering/renderer_rd/renderer_storage_rd.h" #include "servers/rendering/renderer_rd/shaders/volumetric_fog.glsl.gen.h" @@ -99,7 +99,7 @@ struct RenderDataRD { class RendererSceneRenderRD : public RendererSceneRender { friend RendererSceneSkyRD; - friend RendererSceneGIRD; + friend RendererRD::GI; protected: RendererStorageRD *storage = nullptr; @@ -131,7 +131,7 @@ protected: virtual void _render_sdfgi(RID p_render_buffers, const Vector3i &p_from, const Vector3i &p_size, const AABB &p_bounds, const PagedArray &p_instances, const RID &p_albedo_texture, const RID &p_emission_texture, const RID &p_emission_aniso_texture, const RID &p_geom_facing_texture) = 0; virtual void _render_particle_collider_heightfield(RID p_fb, const Transform3D &p_cam_transform, const CameraMatrix &p_cam_projection, const PagedArray &p_instances) = 0; - void _debug_sdfgi_probes(RID p_render_buffers, RD::DrawListID p_draw_list, RID p_framebuffer, const CameraMatrix &p_camera_with_transform); + void _debug_sdfgi_probes(RID p_render_buffers, RID p_framebuffer, uint32_t p_view_count, const CameraMatrix *p_camera_with_transforms, bool p_will_continue_color, bool p_will_continue_depth); void _debug_draw_cluster(RID p_render_buffers); RenderBufferData *render_buffers_get_data(RID p_render_buffers); @@ -151,7 +151,7 @@ protected: void _post_prepass_render(RenderDataRD *p_render_data, bool p_use_gi); void _pre_resolve_render(RenderDataRD *p_render_data, bool p_use_gi); - void _pre_opaque_render(RenderDataRD *p_render_data, bool p_use_ssao, bool p_use_ssil, bool p_use_gi, RID p_normal_roughness_buffer, RID p_voxel_gi_buffer); + void _pre_opaque_render(RenderDataRD *p_render_data, bool p_use_ssao, bool p_use_ssil, bool p_use_gi, RID *p_normal_roughness_views, RID p_voxel_gi_buffer); void _render_buffers_copy_screen_texture(const RenderDataRD *p_render_data); void _render_buffers_copy_depth_texture(const RenderDataRD *p_render_data); @@ -163,7 +163,7 @@ protected: PagedArrayPool cull_argument_pool; PagedArray cull_argument; //need this to exist - RendererSceneGIRD gi; + RendererRD::GI gi; RendererSceneSkyRD sky; RendererSceneEnvironmentRD *get_environment(RID p_environment) { @@ -503,9 +503,9 @@ private: }; Vector views; - RendererSceneGIRD::SDFGI *sdfgi = nullptr; + RendererRD::GI::SDFGI *sdfgi = nullptr; VolumetricFog *volumetric_fog = nullptr; - RendererSceneGIRD::RenderBuffersGI gi; + RendererRD::GI::RenderBuffersGI rbgi; ClusterBuilderRD *cluster_builder = nullptr; @@ -606,9 +606,6 @@ private: RID temp; RID prev_velocity; // Last frame velocity buffer } taa; - - RID ambient_buffer; - RID reflection_buffer; }; /* GI */ @@ -997,6 +994,10 @@ public: virtual Transform3D geometry_instance_get_transform(GeometryInstance *p_instance) = 0; virtual AABB geometry_instance_get_aabb(GeometryInstance *p_instance) = 0; + /* GI */ + + RendererRD::GI *get_gi() { return &gi; } + /* SHADOW ATLAS API */ virtual RID shadow_atlas_create() override; diff --git a/servers/rendering/renderer_rd/renderer_storage_rd.cpp b/servers/rendering/renderer_rd/renderer_storage_rd.cpp index d5166c6905..8c55ff1d0a 100644 --- a/servers/rendering/renderer_rd/renderer_storage_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_storage_rd.cpp @@ -35,6 +35,7 @@ #include "core/io/resource_loader.h" #include "core/math/math_defs.h" #include "renderer_compositor_rd.h" +#include "servers/rendering/renderer_rd/environment/gi.h" #include "servers/rendering/renderer_rd/storage_rd/light_storage.h" #include "servers/rendering/renderer_rd/storage_rd/mesh_storage.h" #include "servers/rendering/renderer_rd/storage_rd/particles_storage.h" @@ -173,336 +174,6 @@ void RendererStorageRD::visibility_notifier_call(RID p_notifier, bool p_enter, b } } -/* VOXEL GI */ - -RID RendererStorageRD::voxel_gi_allocate() { - return voxel_gi_owner.allocate_rid(); -} -void RendererStorageRD::voxel_gi_initialize(RID p_voxel_gi) { - voxel_gi_owner.initialize_rid(p_voxel_gi, VoxelGI()); -} - -void RendererStorageRD::voxel_gi_allocate_data(RID p_voxel_gi, const Transform3D &p_to_cell_xform, const AABB &p_aabb, const Vector3i &p_octree_size, const Vector &p_octree_cells, const Vector &p_data_cells, const Vector &p_distance_field, const Vector &p_level_counts) { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND(!voxel_gi); - - if (voxel_gi->octree_buffer.is_valid()) { - RD::get_singleton()->free(voxel_gi->octree_buffer); - RD::get_singleton()->free(voxel_gi->data_buffer); - if (voxel_gi->sdf_texture.is_valid()) { - RD::get_singleton()->free(voxel_gi->sdf_texture); - } - - voxel_gi->sdf_texture = RID(); - voxel_gi->octree_buffer = RID(); - voxel_gi->data_buffer = RID(); - voxel_gi->octree_buffer_size = 0; - voxel_gi->data_buffer_size = 0; - voxel_gi->cell_count = 0; - } - - voxel_gi->to_cell_xform = p_to_cell_xform; - voxel_gi->bounds = p_aabb; - voxel_gi->octree_size = p_octree_size; - voxel_gi->level_counts = p_level_counts; - - if (p_octree_cells.size()) { - ERR_FAIL_COND(p_octree_cells.size() % 32 != 0); //cells size must be a multiple of 32 - - uint32_t cell_count = p_octree_cells.size() / 32; - - ERR_FAIL_COND(p_data_cells.size() != (int)cell_count * 16); //see that data size matches - - voxel_gi->cell_count = cell_count; - voxel_gi->octree_buffer = RD::get_singleton()->storage_buffer_create(p_octree_cells.size(), p_octree_cells); - voxel_gi->octree_buffer_size = p_octree_cells.size(); - voxel_gi->data_buffer = RD::get_singleton()->storage_buffer_create(p_data_cells.size(), p_data_cells); - voxel_gi->data_buffer_size = p_data_cells.size(); - - if (p_distance_field.size()) { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R8_UNORM; - tf.width = voxel_gi->octree_size.x; - tf.height = voxel_gi->octree_size.y; - tf.depth = voxel_gi->octree_size.z; - tf.texture_type = RD::TEXTURE_TYPE_3D; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; - Vector> s; - s.push_back(p_distance_field); - voxel_gi->sdf_texture = RD::get_singleton()->texture_create(tf, RD::TextureView(), s); - } -#if 0 - { - RD::TextureFormat tf; - tf.format = RD::DATA_FORMAT_R8_UNORM; - tf.width = voxel_gi->octree_size.x; - tf.height = voxel_gi->octree_size.y; - tf.depth = voxel_gi->octree_size.z; - tf.type = RD::TEXTURE_TYPE_3D; - tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; - tf.shareable_formats.push_back(RD::DATA_FORMAT_R8_UNORM); - tf.shareable_formats.push_back(RD::DATA_FORMAT_R8_UINT); - voxel_gi->sdf_texture = RD::get_singleton()->texture_create(tf, RD::TextureView()); - } - RID shared_tex; - { - RD::TextureView tv; - tv.format_override = RD::DATA_FORMAT_R8_UINT; - shared_tex = RD::get_singleton()->texture_create_shared(tv, voxel_gi->sdf_texture); - } - //update SDF texture - Vector uniforms; - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.binding = 1; - u.append_id(voxel_gi->octree_buffer); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; - u.binding = 2; - u.append_id(voxel_gi->data_buffer); - uniforms.push_back(u); - } - { - RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; - u.binding = 3; - u.append_id(shared_tex); - uniforms.push_back(u); - } - - RID uniform_set = RD::get_singleton()->uniform_set_create(uniforms, voxel_gi_sdf_shader_version_shader, 0); - - { - uint32_t push_constant[4] = { 0, 0, 0, 0 }; - - for (int i = 0; i < voxel_gi->level_counts.size() - 1; i++) { - push_constant[0] += voxel_gi->level_counts[i]; - } - push_constant[1] = push_constant[0] + voxel_gi->level_counts[voxel_gi->level_counts.size() - 1]; - - print_line("offset: " + itos(push_constant[0])); - print_line("size: " + itos(push_constant[1])); - //create SDF - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, voxel_gi_sdf_shader_pipeline); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set, 0); - RD::get_singleton()->compute_list_set_push_constant(compute_list, push_constant, sizeof(uint32_t) * 4); - RD::get_singleton()->compute_list_dispatch(compute_list, voxel_gi->octree_size.x / 4, voxel_gi->octree_size.y / 4, voxel_gi->octree_size.z / 4); - RD::get_singleton()->compute_list_end(); - } - - RD::get_singleton()->free(uniform_set); - RD::get_singleton()->free(shared_tex); - } -#endif - } - - voxel_gi->version++; - voxel_gi->data_version++; - - voxel_gi->dependency.changed_notify(DEPENDENCY_CHANGED_AABB); -} - -AABB RendererStorageRD::voxel_gi_get_bounds(RID p_voxel_gi) const { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND_V(!voxel_gi, AABB()); - - return voxel_gi->bounds; -} - -Vector3i RendererStorageRD::voxel_gi_get_octree_size(RID p_voxel_gi) const { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND_V(!voxel_gi, Vector3i()); - return voxel_gi->octree_size; -} - -Vector RendererStorageRD::voxel_gi_get_octree_cells(RID p_voxel_gi) const { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND_V(!voxel_gi, Vector()); - - if (voxel_gi->octree_buffer.is_valid()) { - return RD::get_singleton()->buffer_get_data(voxel_gi->octree_buffer); - } - return Vector(); -} - -Vector RendererStorageRD::voxel_gi_get_data_cells(RID p_voxel_gi) const { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND_V(!voxel_gi, Vector()); - - if (voxel_gi->data_buffer.is_valid()) { - return RD::get_singleton()->buffer_get_data(voxel_gi->data_buffer); - } - return Vector(); -} - -Vector RendererStorageRD::voxel_gi_get_distance_field(RID p_voxel_gi) const { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND_V(!voxel_gi, Vector()); - - if (voxel_gi->data_buffer.is_valid()) { - return RD::get_singleton()->texture_get_data(voxel_gi->sdf_texture, 0); - } - return Vector(); -} - -Vector RendererStorageRD::voxel_gi_get_level_counts(RID p_voxel_gi) const { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND_V(!voxel_gi, Vector()); - - return voxel_gi->level_counts; -} - -Transform3D RendererStorageRD::voxel_gi_get_to_cell_xform(RID p_voxel_gi) const { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND_V(!voxel_gi, Transform3D()); - - return voxel_gi->to_cell_xform; -} - -void RendererStorageRD::voxel_gi_set_dynamic_range(RID p_voxel_gi, float p_range) { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND(!voxel_gi); - - voxel_gi->dynamic_range = p_range; - voxel_gi->version++; -} - -float RendererStorageRD::voxel_gi_get_dynamic_range(RID p_voxel_gi) const { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND_V(!voxel_gi, 0); - - return voxel_gi->dynamic_range; -} - -void RendererStorageRD::voxel_gi_set_propagation(RID p_voxel_gi, float p_range) { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND(!voxel_gi); - - voxel_gi->propagation = p_range; - voxel_gi->version++; -} - -float RendererStorageRD::voxel_gi_get_propagation(RID p_voxel_gi) const { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND_V(!voxel_gi, 0); - return voxel_gi->propagation; -} - -void RendererStorageRD::voxel_gi_set_energy(RID p_voxel_gi, float p_energy) { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND(!voxel_gi); - - voxel_gi->energy = p_energy; -} - -float RendererStorageRD::voxel_gi_get_energy(RID p_voxel_gi) const { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND_V(!voxel_gi, 0); - return voxel_gi->energy; -} - -void RendererStorageRD::voxel_gi_set_bias(RID p_voxel_gi, float p_bias) { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND(!voxel_gi); - - voxel_gi->bias = p_bias; -} - -float RendererStorageRD::voxel_gi_get_bias(RID p_voxel_gi) const { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND_V(!voxel_gi, 0); - return voxel_gi->bias; -} - -void RendererStorageRD::voxel_gi_set_normal_bias(RID p_voxel_gi, float p_normal_bias) { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND(!voxel_gi); - - voxel_gi->normal_bias = p_normal_bias; -} - -float RendererStorageRD::voxel_gi_get_normal_bias(RID p_voxel_gi) const { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND_V(!voxel_gi, 0); - return voxel_gi->normal_bias; -} - -void RendererStorageRD::voxel_gi_set_anisotropy_strength(RID p_voxel_gi, float p_strength) { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND(!voxel_gi); - - voxel_gi->anisotropy_strength = p_strength; -} - -float RendererStorageRD::voxel_gi_get_anisotropy_strength(RID p_voxel_gi) const { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND_V(!voxel_gi, 0); - return voxel_gi->anisotropy_strength; -} - -void RendererStorageRD::voxel_gi_set_interior(RID p_voxel_gi, bool p_enable) { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND(!voxel_gi); - - voxel_gi->interior = p_enable; -} - -void RendererStorageRD::voxel_gi_set_use_two_bounces(RID p_voxel_gi, bool p_enable) { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND(!voxel_gi); - - voxel_gi->use_two_bounces = p_enable; - voxel_gi->version++; -} - -bool RendererStorageRD::voxel_gi_is_using_two_bounces(RID p_voxel_gi) const { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND_V(!voxel_gi, false); - return voxel_gi->use_two_bounces; -} - -bool RendererStorageRD::voxel_gi_is_interior(RID p_voxel_gi) const { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND_V(!voxel_gi, 0); - return voxel_gi->interior; -} - -uint32_t RendererStorageRD::voxel_gi_get_version(RID p_voxel_gi) { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND_V(!voxel_gi, 0); - return voxel_gi->version; -} - -uint32_t RendererStorageRD::voxel_gi_get_data_version(RID p_voxel_gi) { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND_V(!voxel_gi, 0); - return voxel_gi->data_version; -} - -RID RendererStorageRD::voxel_gi_get_octree_buffer(RID p_voxel_gi) const { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND_V(!voxel_gi, RID()); - return voxel_gi->octree_buffer; -} - -RID RendererStorageRD::voxel_gi_get_data_buffer(RID p_voxel_gi) const { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND_V(!voxel_gi, RID()); - return voxel_gi->data_buffer; -} - -RID RendererStorageRD::voxel_gi_get_sdf_texture(RID p_voxel_gi) { - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_voxel_gi); - ERR_FAIL_COND_V(!voxel_gi, RID()); - - return voxel_gi->sdf_texture; -} - /* misc */ void RendererStorageRD::base_update_dependency(RID p_base, DependencyTracker *p_instance) { @@ -521,8 +192,8 @@ void RendererStorageRD::base_update_dependency(RID p_base, DependencyTracker *p_ } else if (RendererRD::TextureStorage::get_singleton()->owns_decal(p_base)) { RendererRD::Decal *decal = RendererRD::TextureStorage::get_singleton()->get_decal(p_base); p_instance->update_dependency(&decal->dependency); - } else if (voxel_gi_owner.owns(p_base)) { - VoxelGI *gip = voxel_gi_owner.get_or_null(p_base); + } else if (RendererRD::GI::get_singleton()->owns_voxel_gi(p_base)) { + RendererRD::GI::VoxelGI *gip = RendererRD::GI::get_singleton()->get_voxel_gi(p_base); p_instance->update_dependency(&gip->dependency); } else if (RendererRD::LightStorage::get_singleton()->owns_lightmap(p_base)) { RendererRD::Lightmap *lm = RendererRD::LightStorage::get_singleton()->get_lightmap(p_base); @@ -558,7 +229,7 @@ RS::InstanceType RendererStorageRD::get_base_type(RID p_rid) const { if (RendererRD::TextureStorage::get_singleton()->owns_decal(p_rid)) { return RS::INSTANCE_DECAL; } - if (voxel_gi_owner.owns(p_rid)) { + if (RendererRD::GI::get_singleton()->owns_voxel_gi(p_rid)) { return RS::INSTANCE_VOXEL_GI; } if (RendererRD::LightStorage::get_singleton()->owns_light(p_rid)) { @@ -636,11 +307,8 @@ bool RendererStorageRD::free(RID p_rid) { RendererRD::LightStorage::get_singleton()->reflection_probe_free(p_rid); } else if (RendererRD::TextureStorage::get_singleton()->owns_decal(p_rid)) { RendererRD::TextureStorage::get_singleton()->decal_free(p_rid); - } else if (voxel_gi_owner.owns(p_rid)) { - voxel_gi_allocate_data(p_rid, Transform3D(), AABB(), Vector3i(), Vector(), Vector(), Vector(), Vector()); //deallocate - VoxelGI *voxel_gi = voxel_gi_owner.get_or_null(p_rid); - voxel_gi->dependency.deleted_notify(p_rid); - voxel_gi_owner.free(p_rid); + } else if (RendererRD::GI::get_singleton()->owns_voxel_gi(p_rid)) { + RendererRD::GI::get_singleton()->voxel_gi_free(p_rid); } else if (RendererRD::LightStorage::get_singleton()->owns_lightmap(p_rid)) { RendererRD::LightStorage::get_singleton()->lightmap_free(p_rid); } else if (RendererRD::LightStorage::get_singleton()->owns_light(p_rid)) { diff --git a/servers/rendering/renderer_rd/renderer_storage_rd.h b/servers/rendering/renderer_rd/renderer_storage_rd.h index 07fae45a26..d41129d678 100644 --- a/servers/rendering/renderer_rd/renderer_storage_rd.h +++ b/servers/rendering/renderer_rd/renderer_storage_rd.h @@ -36,7 +36,7 @@ #include "core/templates/rid_owner.h" #include "servers/rendering/renderer_compositor.h" #include "servers/rendering/renderer_rd/effects_rd.h" -#include "servers/rendering/renderer_rd/shaders/voxel_gi_sdf.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/environment/voxel_gi_sdf.glsl.gen.h" #include "servers/rendering/renderer_rd/storage_rd/material_storage.h" #include "servers/rendering/renderer_scene_render.h" #include "servers/rendering/rendering_device.h" @@ -147,42 +147,6 @@ private: mutable RID_Owner visibility_notifier_owner; - /* VOXEL GI */ - - struct VoxelGI { - RID octree_buffer; - RID data_buffer; - RID sdf_texture; - - uint32_t octree_buffer_size = 0; - uint32_t data_buffer_size = 0; - - Vector level_counts; - - int cell_count = 0; - - Transform3D to_cell_xform; - AABB bounds; - Vector3i octree_size; - - float dynamic_range = 2.0; - float energy = 1.0; - float bias = 1.4; - float normal_bias = 0.0; - float propagation = 0.7; - bool interior = false; - bool use_two_bounces = false; - - float anisotropy_strength = 0.5; - - uint32_t version = 1; - uint32_t data_version = 1; - - Dependency dependency; - }; - - mutable RID_Owner voxel_gi_owner; - /* EFFECTS */ EffectsRD *effects = nullptr; @@ -192,54 +156,6 @@ public: void base_update_dependency(RID p_base, DependencyTracker *p_instance); - /* VOXEL GI API */ - - RID voxel_gi_allocate(); - void voxel_gi_initialize(RID p_voxel_gi); - - void voxel_gi_allocate_data(RID p_voxel_gi, const Transform3D &p_to_cell_xform, const AABB &p_aabb, const Vector3i &p_octree_size, const Vector &p_octree_cells, const Vector &p_data_cells, const Vector &p_distance_field, const Vector &p_level_counts); - - AABB voxel_gi_get_bounds(RID p_voxel_gi) const; - Vector3i voxel_gi_get_octree_size(RID p_voxel_gi) const; - Vector voxel_gi_get_octree_cells(RID p_voxel_gi) const; - Vector voxel_gi_get_data_cells(RID p_voxel_gi) const; - Vector voxel_gi_get_distance_field(RID p_voxel_gi) const; - - Vector voxel_gi_get_level_counts(RID p_voxel_gi) const; - Transform3D voxel_gi_get_to_cell_xform(RID p_voxel_gi) const; - - void voxel_gi_set_dynamic_range(RID p_voxel_gi, float p_range); - float voxel_gi_get_dynamic_range(RID p_voxel_gi) const; - - void voxel_gi_set_propagation(RID p_voxel_gi, float p_range); - float voxel_gi_get_propagation(RID p_voxel_gi) const; - - void voxel_gi_set_energy(RID p_voxel_gi, float p_energy); - float voxel_gi_get_energy(RID p_voxel_gi) const; - - void voxel_gi_set_bias(RID p_voxel_gi, float p_bias); - float voxel_gi_get_bias(RID p_voxel_gi) const; - - void voxel_gi_set_normal_bias(RID p_voxel_gi, float p_range); - float voxel_gi_get_normal_bias(RID p_voxel_gi) const; - - void voxel_gi_set_interior(RID p_voxel_gi, bool p_enable); - bool voxel_gi_is_interior(RID p_voxel_gi) const; - - void voxel_gi_set_use_two_bounces(RID p_voxel_gi, bool p_enable); - bool voxel_gi_is_using_two_bounces(RID p_voxel_gi) const; - - void voxel_gi_set_anisotropy_strength(RID p_voxel_gi, float p_strength); - float voxel_gi_get_anisotropy_strength(RID p_voxel_gi) const; - - uint32_t voxel_gi_get_version(RID p_probe); - uint32_t voxel_gi_get_data_version(RID p_probe); - - RID voxel_gi_get_octree_buffer(RID p_voxel_gi) const; - RID voxel_gi_get_data_buffer(RID p_voxel_gi) const; - - RID voxel_gi_get_sdf_texture(RID p_voxel_gi); - /* FOG VOLUMES */ virtual RID fog_volume_allocate(); diff --git a/servers/rendering/renderer_rd/shaders/SCsub b/servers/rendering/renderer_rd/shaders/SCsub index acb843bfb6..05663226c0 100644 --- a/servers/rendering/renderer_rd/shaders/SCsub +++ b/servers/rendering/renderer_rd/shaders/SCsub @@ -17,3 +17,4 @@ if "RD_GLSL" in env["BUILDERS"]: env.RD_GLSL(glsl_file) SConscript("effects/SCsub") +SConscript("environment/SCsub") diff --git a/servers/rendering/renderer_rd/shaders/effects/resolve.glsl b/servers/rendering/renderer_rd/shaders/effects/resolve.glsl new file mode 100644 index 0000000000..0e086331c0 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/resolve.glsl @@ -0,0 +1,236 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#ifdef MODE_RESOLVE_DEPTH +layout(set = 0, binding = 0) uniform sampler2DMS source_depth; +layout(r32f, set = 1, binding = 0) uniform restrict writeonly image2D dest_depth; +#endif + +#ifdef MODE_RESOLVE_GI +layout(set = 0, binding = 0) uniform sampler2DMS source_depth; +layout(set = 0, binding = 1) uniform sampler2DMS source_normal_roughness; + +layout(r32f, set = 1, binding = 0) uniform restrict writeonly image2D dest_depth; +layout(rgba8, set = 1, binding = 1) uniform restrict writeonly image2D dest_normal_roughness; + +#ifdef VOXEL_GI_RESOLVE +layout(set = 2, binding = 0) uniform usampler2DMS source_voxel_gi; +layout(rg8ui, set = 3, binding = 0) uniform restrict writeonly uimage2D dest_voxel_gi; +#endif + +#endif + +layout(push_constant, std430) uniform Params { + ivec2 screen_size; + int sample_count; + uint pad; +} +params; + +void main() { + // Pixel being shaded + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(pos, params.screen_size))) { //too large, do nothing + return; + } + +#ifdef MODE_RESOLVE_DEPTH + + float depth_avg = 0.0; + for (int i = 0; i < params.sample_count; i++) { + depth_avg += texelFetch(source_depth, pos, i).r; + } + depth_avg /= float(params.sample_count); + imageStore(dest_depth, pos, vec4(depth_avg)); + +#endif + +#ifdef MODE_RESOLVE_GI + + float best_depth = 1e20; + vec4 best_normal_roughness = vec4(0.0); +#ifdef VOXEL_GI_RESOLVE + uvec2 best_voxel_gi; +#endif + +#if 0 + + for(int i=0;i= 4) { + group1.z = texelFetch(source_depth, pos, 2).r; + group1.w = texelFetch(source_depth, pos, 3).r; + } + //8X + if (params.sample_count >= 8) { + group2.x = texelFetch(source_depth, pos, 4).r; + group2.y = texelFetch(source_depth, pos, 5).r; + group2.z = texelFetch(source_depth, pos, 6).r; + group2.w = texelFetch(source_depth, pos, 7).r; + } + //16X + if (params.sample_count >= 16) { + group3.x = texelFetch(source_depth, pos, 8).r; + group3.y = texelFetch(source_depth, pos, 9).r; + group3.z = texelFetch(source_depth, pos, 10).r; + group3.w = texelFetch(source_depth, pos, 11).r; + + group4.x = texelFetch(source_depth, pos, 12).r; + group4.y = texelFetch(source_depth, pos, 13).r; + group4.z = texelFetch(source_depth, pos, 14).r; + group4.w = texelFetch(source_depth, pos, 15).r; + } + + if (params.sample_count == 2) { + best_index = (pos.x & 1) ^ ((pos.y >> 1) & 1); //not much can be done here + } else if (params.sample_count == 4) { + vec4 freq = vec4(equal(group1, vec4(group1.x))); + freq += vec4(equal(group1, vec4(group1.y))); + freq += vec4(equal(group1, vec4(group1.z))); + freq += vec4(equal(group1, vec4(group1.w))); + + float min_f = freq.x; + best_index = 0; + if (freq.y < min_f) { + best_index = 1; + min_f = freq.y; + } + if (freq.z < min_f) { + best_index = 2; + min_f = freq.z; + } + if (freq.w < min_f) { + best_index = 3; + } + } else if (params.sample_count == 8) { + vec4 freq0 = vec4(equal(group1, vec4(group1.x))); + vec4 freq1 = vec4(equal(group2, vec4(group1.x))); + freq0 += vec4(equal(group1, vec4(group1.y))); + freq1 += vec4(equal(group2, vec4(group1.y))); + freq0 += vec4(equal(group1, vec4(group1.z))); + freq1 += vec4(equal(group2, vec4(group1.z))); + freq0 += vec4(equal(group1, vec4(group1.w))); + freq1 += vec4(equal(group2, vec4(group1.w))); + freq0 += vec4(equal(group1, vec4(group2.x))); + freq1 += vec4(equal(group2, vec4(group2.x))); + freq0 += vec4(equal(group1, vec4(group2.y))); + freq1 += vec4(equal(group2, vec4(group2.y))); + freq0 += vec4(equal(group1, vec4(group2.z))); + freq1 += vec4(equal(group2, vec4(group2.z))); + freq0 += vec4(equal(group1, vec4(group2.w))); + freq1 += vec4(equal(group2, vec4(group2.w))); + + float min_f0 = freq0.x; + int best_index0 = 0; + if (freq0.y < min_f0) { + best_index0 = 1; + min_f0 = freq0.y; + } + if (freq0.z < min_f0) { + best_index0 = 2; + min_f0 = freq0.z; + } + if (freq0.w < min_f0) { + best_index0 = 3; + min_f0 = freq0.w; + } + + float min_f1 = freq1.x; + int best_index1 = 4; + if (freq1.y < min_f1) { + best_index1 = 5; + min_f1 = freq1.y; + } + if (freq1.z < min_f1) { + best_index1 = 6; + min_f1 = freq1.z; + } + if (freq1.w < min_f1) { + best_index1 = 7; + min_f1 = freq1.w; + } + + best_index = mix(best_index0, best_index1, min_f0 < min_f1); + } + +#else + float depths[16]; + int depth_indices[16]; + int depth_amount[16]; + int depth_count = 0; + + for (int i = 0; i < params.sample_count; i++) { + float depth = texelFetch(source_depth, pos, i).r; + int depth_index = -1; + for (int j = 0; j < depth_count; j++) { + if (abs(depths[j] - depth) < 0.000001) { + depth_index = j; + break; + } + } + + if (depth_index == -1) { + depths[depth_count] = depth; + depth_indices[depth_count] = i; + depth_amount[depth_count] = 1; + depth_count += 1; + } else { + depth_amount[depth_index] += 1; + } + } + + int depth_least = 0xFFFF; + int best_index = 0; + for (int j = 0; j < depth_count; j++) { + if (depth_amount[j] < depth_least) { + best_index = depth_indices[j]; + depth_least = depth_amount[j]; + } + } +#endif + best_depth = texelFetch(source_depth, pos, best_index).r; + best_normal_roughness = texelFetch(source_normal_roughness, pos, best_index); +#ifdef VOXEL_GI_RESOLVE + best_voxel_gi = texelFetch(source_voxel_gi, pos, best_index).rg; +#endif + +#endif + + imageStore(dest_depth, pos, vec4(best_depth)); + imageStore(dest_normal_roughness, pos, vec4(best_normal_roughness)); +#ifdef VOXEL_GI_RESOLVE + imageStore(dest_voxel_gi, pos, uvec4(best_voxel_gi, 0, 0)); +#endif + +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/environment/SCsub b/servers/rendering/renderer_rd/shaders/environment/SCsub new file mode 100644 index 0000000000..fc513d3fb9 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/environment/SCsub @@ -0,0 +1,17 @@ +#!/usr/bin/env python + +Import("env") + +if "RD_GLSL" in env["BUILDERS"]: + # find all include files + gl_include_files = [str(f) for f in Glob("*_inc.glsl")] + + # find all shader code(all glsl files excluding our include files) + glsl_files = [str(f) for f in Glob("*.glsl") if str(f) not in gl_include_files] + + # make sure we recompile shaders if include files change + env.Depends([f + ".gen.h" for f in glsl_files], gl_include_files) + + # compile shaders + for glsl_file in glsl_files: + env.RD_GLSL(glsl_file) diff --git a/servers/rendering/renderer_rd/shaders/environment/gi.glsl b/servers/rendering/renderer_rd/shaders/environment/gi.glsl new file mode 100644 index 0000000000..f687d50a2d --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/environment/gi.glsl @@ -0,0 +1,672 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#define M_PI 3.141592 + +#define SDFGI_MAX_CASCADES 8 + +//set 0 for SDFGI and render buffers + +layout(set = 0, binding = 1) uniform texture3D sdf_cascades[SDFGI_MAX_CASCADES]; +layout(set = 0, binding = 2) uniform texture3D light_cascades[SDFGI_MAX_CASCADES]; +layout(set = 0, binding = 3) uniform texture3D aniso0_cascades[SDFGI_MAX_CASCADES]; +layout(set = 0, binding = 4) uniform texture3D aniso1_cascades[SDFGI_MAX_CASCADES]; +layout(set = 0, binding = 5) uniform texture3D occlusion_texture; + +layout(set = 0, binding = 6) uniform sampler linear_sampler; +layout(set = 0, binding = 7) uniform sampler linear_sampler_with_mipmaps; + +struct ProbeCascadeData { + vec3 position; + float to_probe; + ivec3 probe_world_offset; + float to_cell; // 1/bounds * grid_size +}; + +layout(rgba16f, set = 0, binding = 9) uniform restrict writeonly image2D ambient_buffer; +layout(rgba16f, set = 0, binding = 10) uniform restrict writeonly image2D reflection_buffer; + +layout(set = 0, binding = 11) uniform texture2DArray lightprobe_texture; + +layout(set = 0, binding = 12) uniform texture2D depth_buffer; +layout(set = 0, binding = 13) uniform texture2D normal_roughness_buffer; +layout(set = 0, binding = 14) uniform utexture2D voxel_gi_buffer; + +layout(set = 0, binding = 15, std140) uniform SDFGI { + vec3 grid_size; + uint max_cascades; + + bool use_occlusion; + int probe_axis_size; + float probe_to_uvw; + float normal_bias; + + vec3 lightprobe_tex_pixel_size; + float energy; + + vec3 lightprobe_uv_offset; + float y_mult; + + vec3 occlusion_clamp; + uint pad3; + + vec3 occlusion_renormalize; + uint pad4; + + vec3 cascade_probe_size; + uint pad5; + + ProbeCascadeData cascades[SDFGI_MAX_CASCADES]; +} +sdfgi; + +#define MAX_VOXEL_GI_INSTANCES 8 + +struct VoxelGIData { + mat4 xform; // 64 - 64 + + vec3 bounds; // 12 - 76 + float dynamic_range; // 4 - 80 + + float bias; // 4 - 84 + float normal_bias; // 4 - 88 + bool blend_ambient; // 4 - 92 + uint mipmaps; // 4 - 96 +}; + +layout(set = 0, binding = 16, std140) uniform VoxelGIs { + VoxelGIData data[MAX_VOXEL_GI_INSTANCES]; +} +voxel_gi_instances; + +layout(set = 0, binding = 17) uniform texture3D voxel_gi_textures[MAX_VOXEL_GI_INSTANCES]; + +layout(set = 0, binding = 18, std140) uniform SceneData { + mat4x4 inv_projection[2]; + mat4x4 cam_transform; + vec4 eye_offset[2]; + + ivec2 screen_size; + float pad1; + float pad2; +} +scene_data; + +layout(push_constant, std430) uniform Params { + uint view_index; + uint max_voxel_gi_instances; + bool high_quality_vct; + bool orthogonal; + + vec4 proj_info; + + float z_near; + float z_far; + float pad1; + float pad2; +} +params; + +vec2 octahedron_wrap(vec2 v) { + vec2 signVal; + signVal.x = v.x >= 0.0 ? 1.0 : -1.0; + signVal.y = v.y >= 0.0 ? 1.0 : -1.0; + return (1.0 - abs(v.yx)) * signVal; +} + +vec2 octahedron_encode(vec3 n) { + // https://twitter.com/Stubbesaurus/status/937994790553227264 + n /= (abs(n.x) + abs(n.y) + abs(n.z)); + n.xy = n.z >= 0.0 ? n.xy : octahedron_wrap(n.xy); + n.xy = n.xy * 0.5 + 0.5; + return n.xy; +} + +vec4 blend_color(vec4 src, vec4 dst) { + vec4 res; + float sa = 1.0 - src.a; + res.a = dst.a * sa + src.a; + if (res.a == 0.0) { + res.rgb = vec3(0); + } else { + res.rgb = (dst.rgb * dst.a * sa + src.rgb * src.a) / res.a; + } + return res; +} + +vec3 reconstruct_position(ivec2 screen_pos) { +#ifdef USE_MULTIVIEW + vec4 pos; + pos.xy = (2.0 * vec2(screen_pos) / vec2(scene_data.screen_size)) - 1.0; + pos.z = texelFetch(sampler2D(depth_buffer, linear_sampler), screen_pos, 0).r * 2.0 - 1.0; + pos.w = 1.0; + + pos = scene_data.inv_projection[params.view_index] * pos; + + return pos.xyz / pos.w; +#else + vec3 pos; + pos.z = texelFetch(sampler2D(depth_buffer, linear_sampler), screen_pos, 0).r; + + pos.z = pos.z * 2.0 - 1.0; + if (params.orthogonal) { + pos.z = ((pos.z + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; + } else { + pos.z = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - pos.z * (params.z_far - params.z_near)); + } + pos.z = -pos.z; + + pos.xy = vec2(screen_pos) * params.proj_info.xy + params.proj_info.zw; + if (!params.orthogonal) { + pos.xy *= pos.z; + } + + return pos; +#endif +} + +void sdfvoxel_gi_process(uint cascade, vec3 cascade_pos, vec3 cam_pos, vec3 cam_normal, vec3 cam_specular_normal, float roughness, out vec3 diffuse_light, out vec3 specular_light) { + cascade_pos += cam_normal * sdfgi.normal_bias; + + vec3 base_pos = floor(cascade_pos); + //cascade_pos += mix(vec3(0.0),vec3(0.01),lessThan(abs(cascade_pos-base_pos),vec3(0.01))) * cam_normal; + ivec3 probe_base_pos = ivec3(base_pos); + + vec4 diffuse_accum = vec4(0.0); + vec3 specular_accum; + + ivec3 tex_pos = ivec3(probe_base_pos.xy, int(cascade)); + tex_pos.x += probe_base_pos.z * sdfgi.probe_axis_size; + tex_pos.xy = tex_pos.xy * (SDFGI_OCT_SIZE + 2) + ivec2(1); + + vec3 diffuse_posf = (vec3(tex_pos) + vec3(octahedron_encode(cam_normal) * float(SDFGI_OCT_SIZE), 0.0)) * sdfgi.lightprobe_tex_pixel_size; + + vec3 specular_posf = (vec3(tex_pos) + vec3(octahedron_encode(cam_specular_normal) * float(SDFGI_OCT_SIZE), 0.0)) * sdfgi.lightprobe_tex_pixel_size; + + specular_accum = vec3(0.0); + + vec4 light_accum = vec4(0.0); + float weight_accum = 0.0; + + for (uint j = 0; j < 8; j++) { + ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1); + ivec3 probe_posi = probe_base_pos; + probe_posi += offset; + + // Compute weight + + vec3 probe_pos = vec3(probe_posi); + vec3 probe_to_pos = cascade_pos - probe_pos; + vec3 probe_dir = normalize(-probe_to_pos); + + vec3 trilinear = vec3(1.0) - abs(probe_to_pos); + float weight = trilinear.x * trilinear.y * trilinear.z * max(0.005, dot(cam_normal, probe_dir)); + + // Compute lightprobe occlusion + + if (sdfgi.use_occlusion) { + ivec3 occ_indexv = abs((sdfgi.cascades[cascade].probe_world_offset + probe_posi) & ivec3(1, 1, 1)) * ivec3(1, 2, 4); + vec4 occ_mask = mix(vec4(0.0), vec4(1.0), equal(ivec4(occ_indexv.x | occ_indexv.y), ivec4(0, 1, 2, 3))); + + vec3 occ_pos = clamp(cascade_pos, probe_pos - sdfgi.occlusion_clamp, probe_pos + sdfgi.occlusion_clamp) * sdfgi.probe_to_uvw; + occ_pos.z += float(cascade); + if (occ_indexv.z != 0) { //z bit is on, means index is >=4, so make it switch to the other half of textures + occ_pos.x += 1.0; + } + + occ_pos *= sdfgi.occlusion_renormalize; + float occlusion = dot(textureLod(sampler3D(occlusion_texture, linear_sampler), occ_pos, 0.0), occ_mask); + + weight *= max(occlusion, 0.01); + } + + // Compute lightprobe texture position + + vec3 diffuse; + vec3 pos_uvw = diffuse_posf; + pos_uvw.xy += vec2(offset.xy) * sdfgi.lightprobe_uv_offset.xy; + pos_uvw.x += float(offset.z) * sdfgi.lightprobe_uv_offset.z; + diffuse = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0).rgb; + + diffuse_accum += vec4(diffuse * weight, weight); + + { + vec3 specular = vec3(0.0); + vec3 pos_uvw = specular_posf; + pos_uvw.xy += vec2(offset.xy) * sdfgi.lightprobe_uv_offset.xy; + pos_uvw.x += float(offset.z) * sdfgi.lightprobe_uv_offset.z; + if (roughness < 0.99) { + specular = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw + vec3(0, 0, float(sdfgi.max_cascades)), 0.0).rgb; + } + if (roughness > 0.2) { + specular = mix(specular, textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0).rgb, (roughness - 0.2) * 1.25); + } + + specular_accum += specular * weight; + } + } + + if (diffuse_accum.a > 0.0) { + diffuse_accum.rgb /= diffuse_accum.a; + } + + diffuse_light = diffuse_accum.rgb; + + if (diffuse_accum.a > 0.0) { + specular_accum /= diffuse_accum.a; + } + + specular_light = specular_accum; +} + +void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, out vec4 ambient_light, out vec4 reflection_light) { + //make vertex orientation the world one, but still align to camera + vertex.y *= sdfgi.y_mult; + normal.y *= sdfgi.y_mult; + reflection.y *= sdfgi.y_mult; + + //renormalize + normal = normalize(normal); + reflection = normalize(reflection); + + vec3 cam_pos = vertex; + vec3 cam_normal = normal; + + vec4 light_accum = vec4(0.0); + float weight_accum = 0.0; + + vec4 light_blend_accum = vec4(0.0); + float weight_blend_accum = 0.0; + + float blend = -1.0; + + // helper constants, compute once + + uint cascade = 0xFFFFFFFF; + vec3 cascade_pos; + vec3 cascade_normal; + + for (uint i = 0; i < sdfgi.max_cascades; i++) { + cascade_pos = (cam_pos - sdfgi.cascades[i].position) * sdfgi.cascades[i].to_probe; + + if (any(lessThan(cascade_pos, vec3(0.0))) || any(greaterThanEqual(cascade_pos, sdfgi.cascade_probe_size))) { + continue; //skip cascade + } + + cascade = i; + break; + } + + if (cascade < SDFGI_MAX_CASCADES) { + ambient_light = vec4(0, 0, 0, 1); + reflection_light = vec4(0, 0, 0, 1); + + float blend; + vec3 diffuse, specular; + sdfvoxel_gi_process(cascade, cascade_pos, cam_pos, cam_normal, reflection, roughness, diffuse, specular); + + { + //process blend + float blend_from = (float(sdfgi.probe_axis_size - 1) / 2.0) - 2.5; + float blend_to = blend_from + 2.0; + + vec3 inner_pos = cam_pos * sdfgi.cascades[cascade].to_probe; + + float len = length(inner_pos); + + inner_pos = abs(normalize(inner_pos)); + len *= max(inner_pos.x, max(inner_pos.y, inner_pos.z)); + + if (len >= blend_from) { + blend = smoothstep(blend_from, blend_to, len); + } else { + blend = 0.0; + } + } + + if (blend > 0.0) { + //blend + if (cascade == sdfgi.max_cascades - 1) { + ambient_light.a = 1.0 - blend; + reflection_light.a = 1.0 - blend; + + } else { + vec3 diffuse2, specular2; + cascade_pos = (cam_pos - sdfgi.cascades[cascade + 1].position) * sdfgi.cascades[cascade + 1].to_probe; + sdfvoxel_gi_process(cascade + 1, cascade_pos, cam_pos, cam_normal, reflection, roughness, diffuse2, specular2); + diffuse = mix(diffuse, diffuse2, blend); + specular = mix(specular, specular2, blend); + } + } + + ambient_light.rgb = diffuse; + + if (roughness < 0.2) { + vec3 pos_to_uvw = 1.0 / sdfgi.grid_size; + vec4 light_accum = vec4(0.0); + + float blend_size = (sdfgi.grid_size.x / float(sdfgi.probe_axis_size - 1)) * 0.5; + + float radius_sizes[SDFGI_MAX_CASCADES]; + cascade = 0xFFFF; + + float base_distance = length(cam_pos); + for (uint i = 0; i < sdfgi.max_cascades; i++) { + radius_sizes[i] = (1.0 / sdfgi.cascades[i].to_cell) * (sdfgi.grid_size.x * 0.5 - blend_size); + if (cascade == 0xFFFF && base_distance < radius_sizes[i]) { + cascade = i; + } + } + + cascade = min(cascade, sdfgi.max_cascades - 1); + + float max_distance = radius_sizes[sdfgi.max_cascades - 1]; + vec3 ray_pos = cam_pos; + vec3 ray_dir = reflection; + + { + float prev_radius = cascade > 0 ? radius_sizes[cascade - 1] : 0.0; + float base_blend = (base_distance - prev_radius) / (radius_sizes[cascade] - prev_radius); + float bias = (1.0 + base_blend) * 1.1; + vec3 abs_ray_dir = abs(ray_dir); + //ray_pos += ray_dir * (bias / sdfgi.cascades[cascade].to_cell); //bias to avoid self occlusion + ray_pos += (ray_dir * 1.0 / max(abs_ray_dir.x, max(abs_ray_dir.y, abs_ray_dir.z)) + cam_normal * 1.4) * bias / sdfgi.cascades[cascade].to_cell; + } + float softness = 0.2 + min(1.0, roughness * 5.0) * 4.0; //approximation to roughness so it does not seem like a hard fade + uint i = 0; + bool found = false; + while (true) { + if (length(ray_pos) >= max_distance || light_accum.a > 0.99) { + break; + } + if (!found && i >= cascade && length(ray_pos) < radius_sizes[i]) { + uint next_i = min(i + 1, sdfgi.max_cascades - 1); + cascade = max(i, cascade); //never go down + + vec3 pos = ray_pos - sdfgi.cascades[i].position; + pos *= sdfgi.cascades[i].to_cell * pos_to_uvw; + + float fdistance = textureLod(sampler3D(sdf_cascades[i], linear_sampler), pos, 0.0).r * 255.0 - 1.1; + + vec4 hit_light = vec4(0.0); + if (fdistance < softness) { + hit_light.rgb = textureLod(sampler3D(light_cascades[i], linear_sampler), pos, 0.0).rgb; + hit_light.rgb *= 0.5; //approximation given value read is actually meant for anisotropy + hit_light.a = clamp(1.0 - (fdistance / softness), 0.0, 1.0); + hit_light.rgb *= hit_light.a; + } + + fdistance /= sdfgi.cascades[i].to_cell; + + if (i < (sdfgi.max_cascades - 1)) { + pos = ray_pos - sdfgi.cascades[next_i].position; + pos *= sdfgi.cascades[next_i].to_cell * pos_to_uvw; + + float fdistance2 = textureLod(sampler3D(sdf_cascades[next_i], linear_sampler), pos, 0.0).r * 255.0 - 1.1; + + vec4 hit_light2 = vec4(0.0); + if (fdistance2 < softness) { + hit_light2.rgb = textureLod(sampler3D(light_cascades[next_i], linear_sampler), pos, 0.0).rgb; + hit_light2.rgb *= 0.5; //approximation given value read is actually meant for anisotropy + hit_light2.a = clamp(1.0 - (fdistance2 / softness), 0.0, 1.0); + hit_light2.rgb *= hit_light2.a; + } + + float prev_radius = i == 0 ? 0.0 : radius_sizes[max(0, i - 1)]; + float blend = clamp((length(ray_pos) - prev_radius) / (radius_sizes[i] - prev_radius), 0.0, 1.0); + + fdistance2 /= sdfgi.cascades[next_i].to_cell; + + hit_light = mix(hit_light, hit_light2, blend); + fdistance = mix(fdistance, fdistance2, blend); + } + + light_accum += hit_light; + ray_pos += ray_dir * fdistance; + found = true; + } + i++; + if (i == sdfgi.max_cascades) { + i = 0; + found = false; + } + } + + vec3 light = light_accum.rgb / max(light_accum.a, 0.00001); + float alpha = min(1.0, light_accum.a); + + float b = min(1.0, roughness * 5.0); + + float sa = 1.0 - b; + + reflection_light.a = alpha * sa + b; + if (reflection_light.a == 0) { + specular = vec3(0.0); + } else { + specular = (light * alpha * sa + specular * b) / reflection_light.a; + } + } + + reflection_light.rgb = specular; + + ambient_light.rgb *= sdfgi.energy; + reflection_light.rgb *= sdfgi.energy; + } else { + ambient_light = vec4(0); + reflection_light = vec4(0); + } +} + +//standard voxel cone trace +vec4 voxel_cone_trace(texture3D probe, vec3 cell_size, vec3 pos, vec3 direction, float tan_half_angle, float max_distance, float p_bias) { + float dist = p_bias; + vec4 color = vec4(0.0); + + while (dist < max_distance && color.a < 0.95) { + float diameter = max(1.0, 2.0 * tan_half_angle * dist); + vec3 uvw_pos = (pos + dist * direction) * cell_size; + float half_diameter = diameter * 0.5; + //check if outside, then break + if (any(greaterThan(abs(uvw_pos - 0.5), vec3(0.5f + half_diameter * cell_size)))) { + break; + } + vec4 scolor = textureLod(sampler3D(probe, linear_sampler_with_mipmaps), uvw_pos, log2(diameter)); + float a = (1.0 - color.a); + color += a * scolor; + dist += half_diameter; + } + + return color; +} + +vec4 voxel_cone_trace_45_degrees(texture3D probe, vec3 cell_size, vec3 pos, vec3 direction, float max_distance, float p_bias) { + float dist = p_bias; + vec4 color = vec4(0.0); + float radius = max(0.5, dist); + float lod_level = log2(radius * 2.0); + + while (dist < max_distance && color.a < 0.95) { + vec3 uvw_pos = (pos + dist * direction) * cell_size; + + //check if outside, then break + if (any(greaterThan(abs(uvw_pos - 0.5), vec3(0.5f + radius * cell_size)))) { + break; + } + vec4 scolor = textureLod(sampler3D(probe, linear_sampler_with_mipmaps), uvw_pos, lod_level); + lod_level += 1.0; + + float a = (1.0 - color.a); + scolor *= a; + color += scolor; + dist += radius; + radius = max(0.5, dist); + } + return color; +} + +void voxel_gi_compute(uint index, vec3 position, vec3 normal, vec3 ref_vec, mat3 normal_xform, float roughness, inout vec4 out_spec, inout vec4 out_diff, inout float out_blend) { + position = (voxel_gi_instances.data[index].xform * vec4(position, 1.0)).xyz; + ref_vec = normalize((voxel_gi_instances.data[index].xform * vec4(ref_vec, 0.0)).xyz); + normal = normalize((voxel_gi_instances.data[index].xform * vec4(normal, 0.0)).xyz); + + position += normal * voxel_gi_instances.data[index].normal_bias; + + //this causes corrupted pixels, i have no idea why.. + if (any(bvec2(any(lessThan(position, vec3(0.0))), any(greaterThan(position, voxel_gi_instances.data[index].bounds))))) { + return; + } + + mat3 dir_xform = mat3(voxel_gi_instances.data[index].xform) * normal_xform; + + vec3 blendv = abs(position / voxel_gi_instances.data[index].bounds * 2.0 - 1.0); + float blend = clamp(1.0 - max(blendv.x, max(blendv.y, blendv.z)), 0.0, 1.0); + //float blend=1.0; + + float max_distance = length(voxel_gi_instances.data[index].bounds); + vec3 cell_size = 1.0 / voxel_gi_instances.data[index].bounds; + + //irradiance + + vec4 light = vec4(0.0); + + if (params.high_quality_vct) { + const uint cone_dir_count = 6; + vec3 cone_dirs[cone_dir_count] = vec3[]( + vec3(0.0, 0.0, 1.0), + vec3(0.866025, 0.0, 0.5), + vec3(0.267617, 0.823639, 0.5), + vec3(-0.700629, 0.509037, 0.5), + vec3(-0.700629, -0.509037, 0.5), + vec3(0.267617, -0.823639, 0.5)); + + float cone_weights[cone_dir_count] = float[](0.25, 0.15, 0.15, 0.15, 0.15, 0.15); + float cone_angle_tan = 0.577; + + for (uint i = 0; i < cone_dir_count; i++) { + vec3 dir = normalize(dir_xform * cone_dirs[i]); + light += cone_weights[i] * voxel_cone_trace(voxel_gi_textures[index], cell_size, position, dir, cone_angle_tan, max_distance, voxel_gi_instances.data[index].bias); + } + } else { + const uint cone_dir_count = 4; + vec3 cone_dirs[cone_dir_count] = vec3[]( + vec3(0.707107, 0.0, 0.707107), + vec3(0.0, 0.707107, 0.707107), + vec3(-0.707107, 0.0, 0.707107), + vec3(0.0, -0.707107, 0.707107)); + + float cone_weights[cone_dir_count] = float[](0.25, 0.25, 0.25, 0.25); + for (int i = 0; i < cone_dir_count; i++) { + vec3 dir = normalize(dir_xform * cone_dirs[i]); + light += cone_weights[i] * voxel_cone_trace_45_degrees(voxel_gi_textures[index], cell_size, position, dir, max_distance, voxel_gi_instances.data[index].bias); + } + } + + light.rgb *= voxel_gi_instances.data[index].dynamic_range; + if (!voxel_gi_instances.data[index].blend_ambient) { + light.a = 1.0; + } + + out_diff += light * blend; + + //radiance + vec4 irr_light = voxel_cone_trace(voxel_gi_textures[index], cell_size, position, ref_vec, tan(roughness * 0.5 * M_PI * 0.99), max_distance, voxel_gi_instances.data[index].bias); + irr_light.rgb *= voxel_gi_instances.data[index].dynamic_range; + if (!voxel_gi_instances.data[index].blend_ambient) { + irr_light.a = 1.0; + } + + out_spec += irr_light * blend; + + out_blend += blend; +} + +vec4 fetch_normal_and_roughness(ivec2 pos) { + vec4 normal_roughness = texelFetch(sampler2D(normal_roughness_buffer, linear_sampler), pos, 0); + + normal_roughness.xyz = normalize(normal_roughness.xyz * 2.0 - 1.0); + return normal_roughness; +} + +void process_gi(ivec2 pos, vec3 vertex, inout vec4 ambient_light, inout vec4 reflection_light) { + vec4 normal_roughness = fetch_normal_and_roughness(pos); + + vec3 normal = normal_roughness.xyz; + + if (normal.length() > 0.5) { + //valid normal, can do GI + float roughness = normal_roughness.w; + vec3 view = -normalize(mat3(scene_data.cam_transform) * (vertex - scene_data.eye_offset[params.view_index].xyz)); + vertex = mat3(scene_data.cam_transform) * vertex; + normal = normalize(mat3(scene_data.cam_transform) * normal); + vec3 reflection = normalize(reflect(-view, normal)); + +#ifdef USE_SDFGI + sdfgi_process(vertex, normal, reflection, roughness, ambient_light, reflection_light); +#endif + +#ifdef USE_VOXEL_GI_INSTANCES + { + uvec2 voxel_gi_tex = texelFetch(usampler2D(voxel_gi_buffer, linear_sampler), pos, 0).rg; + roughness *= roughness; + //find arbitrary tangent and bitangent, then build a matrix + vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0); + vec3 tangent = normalize(cross(v0, normal)); + vec3 bitangent = normalize(cross(tangent, normal)); + mat3 normal_mat = mat3(tangent, bitangent, normal); + + vec4 amb_accum = vec4(0.0); + vec4 spec_accum = vec4(0.0); + float blend_accum = 0.0; + + for (uint i = 0; i < params.max_voxel_gi_instances; i++) { + if (any(equal(uvec2(i), voxel_gi_tex))) { + voxel_gi_compute(i, vertex, normal, reflection, normal_mat, roughness, spec_accum, amb_accum, blend_accum); + } + } + if (blend_accum > 0.0) { + amb_accum /= blend_accum; + spec_accum /= blend_accum; + } + +#ifdef USE_SDFGI + reflection_light = blend_color(spec_accum, reflection_light); + ambient_light = blend_color(amb_accum, ambient_light); +#else + reflection_light = spec_accum; + ambient_light = amb_accum; +#endif + } +#endif + } +} + +void main() { + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + +#ifdef MODE_HALF_RES + pos <<= 1; +#endif + if (any(greaterThanEqual(pos, scene_data.screen_size))) { //too large, do nothing + return; + } + + vec4 ambient_light = vec4(0.0); + vec4 reflection_light = vec4(0.0); + + vec3 vertex = reconstruct_position(pos); + vertex.y = -vertex.y; + + process_gi(pos, vertex, ambient_light, reflection_light); + +#ifdef MODE_HALF_RES + pos >>= 1; +#endif + + imageStore(ambient_buffer, pos, ambient_light); + imageStore(reflection_buffer, pos, reflection_light); +} diff --git a/servers/rendering/renderer_rd/shaders/environment/sdfgi_debug.glsl b/servers/rendering/renderer_rd/shaders/environment/sdfgi_debug.glsl new file mode 100644 index 0000000000..af5f7d0a58 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/environment/sdfgi_debug.glsl @@ -0,0 +1,178 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#define MAX_CASCADES 8 + +layout(set = 0, binding = 1) uniform texture3D sdf_cascades[MAX_CASCADES]; +layout(set = 0, binding = 2) uniform texture3D light_cascades[MAX_CASCADES]; +layout(set = 0, binding = 3) uniform texture3D aniso0_cascades[MAX_CASCADES]; +layout(set = 0, binding = 4) uniform texture3D aniso1_cascades[MAX_CASCADES]; +layout(set = 0, binding = 5) uniform texture3D occlusion_texture; + +layout(set = 0, binding = 8) uniform sampler linear_sampler; + +struct CascadeData { + vec3 offset; //offset of (0,0,0) in world coordinates + float to_cell; // 1/bounds * grid_size + ivec3 probe_world_offset; + uint pad; +}; + +layout(set = 0, binding = 9, std140) uniform Cascades { + CascadeData data[MAX_CASCADES]; +} +cascades; + +layout(rgba16f, set = 0, binding = 10) uniform restrict writeonly image2D screen_buffer; + +layout(set = 0, binding = 11) uniform texture2DArray lightprobe_texture; + +layout(push_constant, std430) uniform Params { + vec3 grid_size; + uint max_cascades; + + ivec2 screen_size; + bool use_occlusion; + float y_mult; + + int probe_axis_size; + float z_near; + float reserved1; + float reserved2; + + mat4 cam_transform; + mat4 inv_projection; +} +params; + +vec3 linear_to_srgb(vec3 color) { + //if going to srgb, clamp from 0 to 1. + color = clamp(color, vec3(0.0), vec3(1.0)); + const vec3 a = vec3(0.055f); + return mix((vec3(1.0f) + a) * pow(color.rgb, vec3(1.0f / 2.4f)) - a, 12.92f * color.rgb, lessThan(color.rgb, vec3(0.0031308f))); +} + +vec2 octahedron_wrap(vec2 v) { + vec2 signVal; + signVal.x = v.x >= 0.0 ? 1.0 : -1.0; + signVal.y = v.y >= 0.0 ? 1.0 : -1.0; + return (1.0 - abs(v.yx)) * signVal; +} + +vec2 octahedron_encode(vec3 n) { + // https://twitter.com/Stubbesaurus/status/937994790553227264 + n /= (abs(n.x) + abs(n.y) + abs(n.z)); + n.xy = n.z >= 0.0 ? n.xy : octahedron_wrap(n.xy); + n.xy = n.xy * 0.5 + 0.5; + return n.xy; +} + +void main() { + // Pixel being shaded + ivec2 screen_pos = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(screen_pos, params.screen_size))) { //too large, do nothing + return; + } + + vec3 ray_pos; + vec3 ray_dir; + { + ray_pos = params.cam_transform[3].xyz; + + ray_dir.xy = ((vec2(screen_pos) / vec2(params.screen_size)) * 2.0 - 1.0); + ray_dir.z = params.z_near; + ray_dir = (params.inv_projection * vec4(ray_dir, 1.0)).xyz; + + ray_dir = normalize(mat3(params.cam_transform) * ray_dir); + } + + ray_pos.y *= params.y_mult; + ray_dir.y *= params.y_mult; + ray_dir = normalize(ray_dir); + + vec3 pos_to_uvw = 1.0 / params.grid_size; + + vec3 light = vec3(0.0); + float blend = 0.0; + +#if 1 + // No interpolation + + vec3 inv_dir = 1.0 / ray_dir; + + float rough = 0.5; + bool hit = false; + + for (uint i = 0; i < params.max_cascades; i++) { + //convert to local bounds + vec3 pos = ray_pos - cascades.data[i].offset; + pos *= cascades.data[i].to_cell; + + // Should never happen for debug, since we start mostly at the bounds center, + // but add anyway. + //if (any(lessThan(pos,vec3(0.0))) || any(greaterThanEqual(pos,params.grid_size))) { + // continue; //already past bounds for this cascade, goto next + //} + + //find maximum advance distance (until reaching bounds) + vec3 t0 = -pos * inv_dir; + vec3 t1 = (params.grid_size - pos) * inv_dir; + vec3 tmax = max(t0, t1); + float max_advance = min(tmax.x, min(tmax.y, tmax.z)); + + float advance = 0.0; + vec3 uvw; + hit = false; + + while (advance < max_advance) { + //read how much to advance from SDF + uvw = (pos + ray_dir * advance) * pos_to_uvw; + + float distance = texture(sampler3D(sdf_cascades[i], linear_sampler), uvw).r * 255.0 - 1.7; + + if (distance < 0.001) { + //consider hit + hit = true; + break; + } + + advance += distance; + } + + if (!hit) { + pos += ray_dir * min(advance, max_advance); + pos /= cascades.data[i].to_cell; + pos += cascades.data[i].offset; + ray_pos = pos; + continue; + } + + //compute albedo, emission and normal at hit point + + const float EPSILON = 0.001; + vec3 hit_normal = normalize(vec3( + texture(sampler3D(sdf_cascades[i], linear_sampler), uvw + vec3(EPSILON, 0.0, 0.0)).r - texture(sampler3D(sdf_cascades[i], linear_sampler), uvw - vec3(EPSILON, 0.0, 0.0)).r, + texture(sampler3D(sdf_cascades[i], linear_sampler), uvw + vec3(0.0, EPSILON, 0.0)).r - texture(sampler3D(sdf_cascades[i], linear_sampler), uvw - vec3(0.0, EPSILON, 0.0)).r, + texture(sampler3D(sdf_cascades[i], linear_sampler), uvw + vec3(0.0, 0.0, EPSILON)).r - texture(sampler3D(sdf_cascades[i], linear_sampler), uvw - vec3(0.0, 0.0, EPSILON)).r)); + + vec3 hit_light = texture(sampler3D(light_cascades[i], linear_sampler), uvw).rgb; + vec4 aniso0 = texture(sampler3D(aniso0_cascades[i], linear_sampler), uvw); + vec3 hit_aniso0 = aniso0.rgb; + vec3 hit_aniso1 = vec3(aniso0.a, texture(sampler3D(aniso1_cascades[i], linear_sampler), uvw).rg); + + hit_light *= (dot(max(vec3(0.0), (hit_normal * hit_aniso0)), vec3(1.0)) + dot(max(vec3(0.0), (-hit_normal * hit_aniso1)), vec3(1.0))); + + light = hit_light; + + break; + } + +#endif + + imageStore(screen_buffer, screen_pos, vec4(linear_to_srgb(light), 1.0)); +} diff --git a/servers/rendering/renderer_rd/shaders/environment/sdfgi_debug_probes.glsl b/servers/rendering/renderer_rd/shaders/environment/sdfgi_debug_probes.glsl new file mode 100644 index 0000000000..75b1ad2130 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/environment/sdfgi_debug_probes.glsl @@ -0,0 +1,267 @@ +#[vertex] + +#version 450 + +#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview) +#extension GL_EXT_multiview : enable +#endif + +#ifdef USE_MULTIVIEW +#ifdef has_VK_KHR_multiview +#define ViewIndex gl_ViewIndex +#else // has_VK_KHR_multiview +// !BAS! This needs to become an input once we implement our fallback! +#define ViewIndex 0 +#endif // has_VK_KHR_multiview +#else // USE_MULTIVIEW +// Set to zero, not supported in non stereo +#define ViewIndex 0 +#endif //USE_MULTIVIEW + +#VERSION_DEFINES + +#define MAX_CASCADES 8 +#define MAX_VIEWS 2 + +layout(push_constant, std430) uniform Params { + uint band_power; + uint sections_in_band; + uint band_mask; + float section_arc; + + vec3 grid_size; + uint cascade; + + uint pad; + float y_mult; + uint probe_debug_index; + int probe_axis_size; +} +params; + +// https://in4k.untergrund.net/html_articles/hugi_27_-_coding_corner_polaris_sphere_tessellation_101.htm + +vec3 get_sphere_vertex(uint p_vertex_id) { + float x_angle = float(p_vertex_id & 1u) + (p_vertex_id >> params.band_power); + + float y_angle = + float((p_vertex_id & params.band_mask) >> 1) + ((p_vertex_id >> params.band_power) * params.sections_in_band); + + x_angle *= params.section_arc * 0.5f; // remember - 180AA x rot not 360 + y_angle *= -params.section_arc; + + vec3 point = vec3(sin(x_angle) * sin(y_angle), cos(x_angle), sin(x_angle) * cos(y_angle)); + + return point; +} + +#ifdef MODE_PROBES + +layout(location = 0) out vec3 normal_interp; +layout(location = 1) out flat uint probe_index; + +#endif + +#ifdef MODE_VISIBILITY + +layout(location = 0) out float visibility; + +#endif + +struct CascadeData { + vec3 offset; //offset of (0,0,0) in world coordinates + float to_cell; // 1/bounds * grid_size + ivec3 probe_world_offset; + uint pad; +}; + +layout(set = 0, binding = 1, std140) uniform Cascades { + CascadeData data[MAX_CASCADES]; +} +cascades; + +layout(set = 0, binding = 4) uniform texture3D occlusion_texture; +layout(set = 0, binding = 3) uniform sampler linear_sampler; + +layout(set = 0, binding = 5, std140) uniform SceneData { + mat4 projection[MAX_VIEWS]; +} +scene_data; + +void main() { +#ifdef MODE_PROBES + probe_index = gl_InstanceIndex; + + normal_interp = get_sphere_vertex(gl_VertexIndex); + + vec3 vertex = normal_interp * 0.2; + + float probe_cell_size = float(params.grid_size / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell; + + ivec3 probe_cell; + probe_cell.x = int(probe_index % params.probe_axis_size); + probe_cell.y = int(probe_index / (params.probe_axis_size * params.probe_axis_size)); + probe_cell.z = int((probe_index / params.probe_axis_size) % params.probe_axis_size); + + vertex += (cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size) / vec3(1.0, params.y_mult, 1.0); + + gl_Position = scene_data.projection[ViewIndex] * vec4(vertex, 1.0); +#endif + +#ifdef MODE_VISIBILITY + + int probe_index = int(params.probe_debug_index); + + vec3 vertex = get_sphere_vertex(gl_VertexIndex) * 0.01; + + float probe_cell_size = float(params.grid_size / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell; + + ivec3 probe_cell; + probe_cell.x = int(probe_index % params.probe_axis_size); + probe_cell.y = int((probe_index % (params.probe_axis_size * params.probe_axis_size)) / params.probe_axis_size); + probe_cell.z = int(probe_index / (params.probe_axis_size * params.probe_axis_size)); + + vertex += (cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size) / vec3(1.0, params.y_mult, 1.0); + + int probe_voxels = int(params.grid_size.x) / int(params.probe_axis_size - 1); + int occluder_index = int(gl_InstanceIndex); + + int diameter = probe_voxels * 2; + ivec3 occluder_pos; + occluder_pos.x = int(occluder_index % diameter); + occluder_pos.y = int(occluder_index / (diameter * diameter)); + occluder_pos.z = int((occluder_index / diameter) % diameter); + + float cell_size = 1.0 / cascades.data[params.cascade].to_cell; + + ivec3 occluder_offset = occluder_pos - ivec3(diameter / 2); + vertex += ((vec3(occluder_offset) + vec3(0.5)) * cell_size) / vec3(1.0, params.y_mult, 1.0); + + ivec3 global_cell = probe_cell + cascades.data[params.cascade].probe_world_offset; + uint occlusion_layer = 0; + if ((global_cell.x & 1) != 0) { + occlusion_layer |= 1; + } + if ((global_cell.y & 1) != 0) { + occlusion_layer |= 2; + } + if ((global_cell.z & 1) != 0) { + occlusion_layer |= 4; + } + ivec3 tex_pos = probe_cell * probe_voxels + occluder_offset; + + const vec4 layer_axis[4] = vec4[]( + vec4(1, 0, 0, 0), + vec4(0, 1, 0, 0), + vec4(0, 0, 1, 0), + vec4(0, 0, 0, 1)); + + tex_pos.z += int(params.cascade) * int(params.grid_size); + if (occlusion_layer >= 4) { + tex_pos.x += int(params.grid_size.x); + occlusion_layer &= 3; + } + + visibility = dot(texelFetch(sampler3D(occlusion_texture, linear_sampler), tex_pos, 0), layer_axis[occlusion_layer]); + + gl_Position = scene_data.projection[ViewIndex] * vec4(vertex, 1.0); + +#endif +} + +#[fragment] + +#version 450 + +#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview) +#extension GL_EXT_multiview : enable +#endif + +#ifdef USE_MULTIVIEW +#ifdef has_VK_KHR_multiview +#define ViewIndex gl_ViewIndex +#else // has_VK_KHR_multiview +// !BAS! This needs to become an input once we implement our fallback! +#define ViewIndex 0 +#endif // has_VK_KHR_multiview +#else // USE_MULTIVIEW +// Set to zero, not supported in non stereo +#define ViewIndex 0 +#endif //USE_MULTIVIEW + +#VERSION_DEFINES + +#define MAX_VIEWS 2 + +layout(location = 0) out vec4 frag_color; + +layout(set = 0, binding = 2) uniform texture2DArray lightprobe_texture; +layout(set = 0, binding = 3) uniform sampler linear_sampler; + +layout(push_constant, std430) uniform Params { + uint band_power; + uint sections_in_band; + uint band_mask; + float section_arc; + + vec3 grid_size; + uint cascade; + + uint pad; + float y_mult; + uint probe_debug_index; + int probe_axis_size; +} +params; + +#ifdef MODE_PROBES + +layout(location = 0) in vec3 normal_interp; +layout(location = 1) in flat uint probe_index; + +#endif + +#ifdef MODE_VISIBILITY +layout(location = 0) in float visibility; +#endif + +vec2 octahedron_wrap(vec2 v) { + vec2 signVal; + signVal.x = v.x >= 0.0 ? 1.0 : -1.0; + signVal.y = v.y >= 0.0 ? 1.0 : -1.0; + return (1.0 - abs(v.yx)) * signVal; +} + +vec2 octahedron_encode(vec3 n) { + // https://twitter.com/Stubbesaurus/status/937994790553227264 + n /= (abs(n.x) + abs(n.y) + abs(n.z)); + n.xy = n.z >= 0.0 ? n.xy : octahedron_wrap(n.xy); + n.xy = n.xy * 0.5 + 0.5; + return n.xy; +} + +void main() { +#ifdef MODE_PROBES + + ivec3 tex_pos; + tex_pos.x = int(probe_index) % params.probe_axis_size; //x + tex_pos.y = int(probe_index) / (params.probe_axis_size * params.probe_axis_size); + tex_pos.x += params.probe_axis_size * ((int(probe_index) / params.probe_axis_size) % params.probe_axis_size); //z + tex_pos.z = int(params.cascade); + + vec3 tex_pos_ofs = vec3(octahedron_encode(normal_interp) * float(OCT_SIZE), 0.0); + vec3 tex_posf = vec3(vec2(tex_pos.xy * (OCT_SIZE + 2) + ivec2(1)), float(tex_pos.z)) + tex_pos_ofs; + + tex_posf.xy /= vec2(ivec2(params.probe_axis_size * params.probe_axis_size * (OCT_SIZE + 2), params.probe_axis_size * (OCT_SIZE + 2))); + + vec4 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), tex_posf, 0.0); + + frag_color = indirect_light; + +#endif + +#ifdef MODE_VISIBILITY + + frag_color = vec4(vec3(1, visibility, visibility), 1.0); +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/environment/sdfgi_direct_light.glsl b/servers/rendering/renderer_rd/shaders/environment/sdfgi_direct_light.glsl new file mode 100644 index 0000000000..b95fad650e --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/environment/sdfgi_direct_light.glsl @@ -0,0 +1,506 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +#define MAX_CASCADES 8 + +layout(set = 0, binding = 1) uniform texture3D sdf_cascades[MAX_CASCADES]; +layout(set = 0, binding = 2) uniform sampler linear_sampler; + +layout(set = 0, binding = 3, std430) restrict readonly buffer DispatchData { + uint x; + uint y; + uint z; + uint total_count; +} +dispatch_data; + +struct ProcessVoxel { + uint position; // xyz 7 bit packed, extra 11 bits for neighbors. + uint albedo; // rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neighbors. + uint light; // rgbe8985 encoded total saved light, extra 2 bits for neighbors. + uint light_aniso; // 55555 light anisotropy, extra 2 bits for neighbors. + //total neighbours: 26 +}; + +#ifdef MODE_PROCESS_STATIC +layout(set = 0, binding = 4, std430) restrict buffer ProcessVoxels { +#else +layout(set = 0, binding = 4, std430) restrict buffer readonly ProcessVoxels { +#endif + ProcessVoxel data[]; +} +process_voxels; + +layout(r32ui, set = 0, binding = 5) uniform restrict uimage3D dst_light; +layout(rgba8, set = 0, binding = 6) uniform restrict image3D dst_aniso0; +layout(rg8, set = 0, binding = 7) uniform restrict image3D dst_aniso1; + +struct CascadeData { + vec3 offset; //offset of (0,0,0) in world coordinates + float to_cell; // 1/bounds * grid_size + ivec3 probe_world_offset; + uint pad; +}; + +layout(set = 0, binding = 8, std140) uniform Cascades { + CascadeData data[MAX_CASCADES]; +} +cascades; + +#define LIGHT_TYPE_DIRECTIONAL 0 +#define LIGHT_TYPE_OMNI 1 +#define LIGHT_TYPE_SPOT 2 + +struct Light { + vec3 color; + float energy; + + vec3 direction; + bool has_shadow; + + vec3 position; + float attenuation; + + uint type; + float cos_spot_angle; + float inv_spot_attenuation; + float radius; +}; + +layout(set = 0, binding = 9, std140) buffer restrict readonly Lights { + Light data[]; +} +lights; + +layout(set = 0, binding = 10) uniform texture2DArray lightprobe_texture; +layout(set = 0, binding = 11) uniform texture3D occlusion_texture; + +layout(push_constant, std430) uniform Params { + vec3 grid_size; + uint max_cascades; + + uint cascade; + uint light_count; + uint process_offset; + uint process_increment; + + int probe_axis_size; + float bounce_feedback; + float y_mult; + bool use_occlusion; +} +params; + +vec2 octahedron_wrap(vec2 v) { + vec2 signVal; + signVal.x = v.x >= 0.0 ? 1.0 : -1.0; + signVal.y = v.y >= 0.0 ? 1.0 : -1.0; + return (1.0 - abs(v.yx)) * signVal; +} + +vec2 octahedron_encode(vec3 n) { + // https://twitter.com/Stubbesaurus/status/937994790553227264 + n /= (abs(n.x) + abs(n.y) + abs(n.z)); + n.xy = n.z >= 0.0 ? n.xy : octahedron_wrap(n.xy); + n.xy = n.xy * 0.5 + 0.5; + return n.xy; +} + +float get_omni_attenuation(float distance, float inv_range, float decay) { + float nd = distance * inv_range; + nd *= nd; + nd *= nd; // nd^4 + nd = max(1.0 - nd, 0.0); + nd *= nd; // nd^2 + return nd * pow(max(distance, 0.0001), -decay); +} + +void main() { + uint voxel_index = uint(gl_GlobalInvocationID.x); + + //used for skipping voxels every N frames + if (params.process_increment > 1) { + voxel_index *= params.process_increment; + voxel_index += params.process_offset; + } + + if (voxel_index >= dispatch_data.total_count) { + return; + } + + uint voxel_position = process_voxels.data[voxel_index].position; + + //keep for storing to texture + ivec3 positioni = ivec3((uvec3(voxel_position, voxel_position, voxel_position) >> uvec3(0, 7, 14)) & uvec3(0x7F)); + + vec3 position = vec3(positioni) + vec3(0.5); + position /= cascades.data[params.cascade].to_cell; + position += cascades.data[params.cascade].offset; + + uint voxel_albedo = process_voxels.data[voxel_index].albedo; + + vec3 albedo = vec3(uvec3(voxel_albedo >> 10, voxel_albedo >> 5, voxel_albedo) & uvec3(0x1F)) / float(0x1F); + vec3 light_accum[6] = vec3[](vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0)); + uint valid_aniso = (voxel_albedo >> 15) & 0x3F; + + const vec3 aniso_dir[6] = vec3[]( + vec3(1, 0, 0), + vec3(0, 1, 0), + vec3(0, 0, 1), + vec3(-1, 0, 0), + vec3(0, -1, 0), + vec3(0, 0, -1)); + + // Add indirect light first, in order to save computation resources +#ifdef MODE_PROCESS_DYNAMIC + if (params.bounce_feedback > 0.001) { + vec3 feedback = (params.bounce_feedback < 1.0) ? (albedo * params.bounce_feedback) : mix(albedo, vec3(1.0), params.bounce_feedback - 1.0); + vec3 pos = (vec3(positioni) + vec3(0.5)) * float(params.probe_axis_size - 1) / params.grid_size; + ivec3 probe_base_pos = ivec3(pos); + + float weight_accum[6] = float[](0, 0, 0, 0, 0, 0); + + ivec3 tex_pos = ivec3(probe_base_pos.xy, int(params.cascade)); + tex_pos.x += probe_base_pos.z * int(params.probe_axis_size); + + tex_pos.xy = tex_pos.xy * (OCT_SIZE + 2) + ivec2(1); + + vec3 base_tex_posf = vec3(tex_pos); + vec2 tex_pixel_size = 1.0 / vec2(ivec2((OCT_SIZE + 2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE + 2) * params.probe_axis_size)); + vec3 probe_uv_offset = vec3(ivec3(OCT_SIZE + 2, OCT_SIZE + 2, (OCT_SIZE + 2) * params.probe_axis_size)) * tex_pixel_size.xyx; + + for (uint j = 0; j < 8; j++) { + ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1); + ivec3 probe_posi = probe_base_pos; + probe_posi += offset; + + // Compute weight + + vec3 probe_pos = vec3(probe_posi); + vec3 probe_to_pos = pos - probe_pos; + vec3 probe_dir = normalize(-probe_to_pos); + + // Compute lightprobe texture position + + vec3 trilinear = vec3(1.0) - abs(probe_to_pos); + + for (uint k = 0; k < 6; k++) { + if (bool(valid_aniso & (1 << k))) { + vec3 n = aniso_dir[k]; + float weight = trilinear.x * trilinear.y * trilinear.z * max(0, dot(n, probe_dir)); + + if (weight > 0.0 && params.use_occlusion) { + ivec3 occ_indexv = abs((cascades.data[params.cascade].probe_world_offset + probe_posi) & ivec3(1, 1, 1)) * ivec3(1, 2, 4); + vec4 occ_mask = mix(vec4(0.0), vec4(1.0), equal(ivec4(occ_indexv.x | occ_indexv.y), ivec4(0, 1, 2, 3))); + + vec3 occ_pos = (vec3(positioni) + aniso_dir[k] + vec3(0.5)) / params.grid_size; + occ_pos.z += float(params.cascade); + if (occ_indexv.z != 0) { //z bit is on, means index is >=4, so make it switch to the other half of textures + occ_pos.x += 1.0; + } + occ_pos *= vec3(0.5, 1.0, 1.0 / float(params.max_cascades)); //renormalize + float occlusion = dot(textureLod(sampler3D(occlusion_texture, linear_sampler), occ_pos, 0.0), occ_mask); + + weight *= occlusion; + } + + if (weight > 0.0) { + vec3 tex_posf = base_tex_posf + vec3(octahedron_encode(n) * float(OCT_SIZE), 0.0); + tex_posf.xy *= tex_pixel_size; + + vec3 pos_uvw = tex_posf; + pos_uvw.xy += vec2(offset.xy) * probe_uv_offset.xy; + pos_uvw.x += float(offset.z) * probe_uv_offset.z; + vec3 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0).rgb; + + light_accum[k] += indirect_light * weight; + weight_accum[k] += weight; + } + } + } + } + + for (uint k = 0; k < 6; k++) { + if (weight_accum[k] > 0.0) { + light_accum[k] /= weight_accum[k]; + light_accum[k] *= feedback; + } + } + } + +#endif + + { + uint rgbe = process_voxels.data[voxel_index].light; + + //read rgbe8985 + float r = float((rgbe & 0xff) << 1); + float g = float((rgbe >> 8) & 0x1ff); + float b = float(((rgbe >> 17) & 0xff) << 1); + float e = float((rgbe >> 25) & 0x1F); + float m = pow(2.0, e - 15.0 - 9.0); + + vec3 l = vec3(r, g, b) * m; + + uint aniso = process_voxels.data[voxel_index].light_aniso; + for (uint i = 0; i < 6; i++) { + float strength = ((aniso >> (i * 5)) & 0x1F) / float(0x1F); + light_accum[i] += l * strength; + } + } + + // Raytrace light + + vec3 pos_to_uvw = 1.0 / params.grid_size; + vec3 uvw_ofs = pos_to_uvw * 0.5; + + for (uint i = 0; i < params.light_count; i++) { + float attenuation = 1.0; + vec3 direction; + float light_distance = 1e20; + + switch (lights.data[i].type) { + case LIGHT_TYPE_DIRECTIONAL: { + direction = -lights.data[i].direction; + } break; + case LIGHT_TYPE_OMNI: { + vec3 rel_vec = lights.data[i].position - position; + direction = normalize(rel_vec); + light_distance = length(rel_vec); + rel_vec.y /= params.y_mult; + attenuation = get_omni_attenuation(light_distance, 1.0 / lights.data[i].radius, lights.data[i].attenuation); + + } break; + case LIGHT_TYPE_SPOT: { + vec3 rel_vec = lights.data[i].position - position; + direction = normalize(rel_vec); + light_distance = length(rel_vec); + rel_vec.y /= params.y_mult; + attenuation = get_omni_attenuation(light_distance, 1.0 / lights.data[i].radius, lights.data[i].attenuation); + + float cos_spot_angle = lights.data[i].cos_spot_angle; + float cos_angle = dot(-direction, lights.data[i].direction); + + if (cos_angle < cos_spot_angle) { + continue; + } + + float scos = max(cos_angle, cos_spot_angle); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cos_spot_angle)); + attenuation *= 1.0 - pow(spot_rim, lights.data[i].inv_spot_attenuation); + } break; + } + + if (attenuation < 0.001) { + continue; + } + + bool hit = false; + + vec3 ray_pos = position; + vec3 ray_dir = direction; + vec3 inv_dir = 1.0 / ray_dir; + + //this is how to properly bias outgoing rays + float cell_size = 1.0 / cascades.data[params.cascade].to_cell; + ray_pos += sign(direction) * cell_size * 0.48; // go almost to the box edge but remain inside + ray_pos += ray_dir * 0.4 * cell_size; //apply a small bias from there + + for (uint j = params.cascade; j < params.max_cascades; j++) { + //convert to local bounds + vec3 pos = ray_pos - cascades.data[j].offset; + pos *= cascades.data[j].to_cell; + float local_distance = light_distance * cascades.data[j].to_cell; + + if (any(lessThan(pos, vec3(0.0))) || any(greaterThanEqual(pos, params.grid_size))) { + continue; //already past bounds for this cascade, goto next + } + + //find maximum advance distance (until reaching bounds) + vec3 t0 = -pos * inv_dir; + vec3 t1 = (params.grid_size - pos) * inv_dir; + vec3 tmax = max(t0, t1); + float max_advance = min(tmax.x, min(tmax.y, tmax.z)); + + max_advance = min(local_distance, max_advance); + + float advance = 0.0; + float occlusion = 1.0; + + while (advance < max_advance) { + //read how much to advance from SDF + vec3 uvw = (pos + ray_dir * advance) * pos_to_uvw; + + float distance = texture(sampler3D(sdf_cascades[j], linear_sampler), uvw).r * 255.0 - 1.0; + if (distance < 0.001) { + //consider hit + hit = true; + break; + } + + occlusion = min(occlusion, distance); + + advance += distance; + } + + if (hit) { + attenuation *= occlusion; + break; + } + + if (advance >= local_distance) { + break; //past light distance, abandon search + } + //change ray origin to collision with bounds + pos += ray_dir * max_advance; + pos /= cascades.data[j].to_cell; + pos += cascades.data[j].offset; + light_distance -= max_advance / cascades.data[j].to_cell; + ray_pos = pos; + } + + if (!hit) { + vec3 light = albedo * lights.data[i].color.rgb * lights.data[i].energy * attenuation; + + for (int j = 0; j < 6; j++) { + if (bool(valid_aniso & (1 << j))) { + light_accum[j] += max(0.0, dot(aniso_dir[j], direction)) * light; + } + } + } + } + + // Store the light in the light texture + + float lumas[6]; + vec3 light_total = vec3(0); + + for (int i = 0; i < 6; i++) { + light_total += light_accum[i]; + lumas[i] = max(light_accum[i].r, max(light_accum[i].g, light_accum[i].b)); + } + + float luma_total = max(light_total.r, max(light_total.g, light_total.b)); + + uint light_total_rgbe; + + { + //compress to RGBE9995 to save space + + const float pow2to9 = 512.0f; + const float B = 15.0f; + const float N = 9.0f; + const float LN2 = 0.6931471805599453094172321215; + + float cRed = clamp(light_total.r, 0.0, 65408.0); + float cGreen = clamp(light_total.g, 0.0, 65408.0); + float cBlue = clamp(light_total.b, 0.0, 65408.0); + + float cMax = max(cRed, max(cGreen, cBlue)); + + float expp = max(-B - 1.0f, floor(log(cMax) / LN2)) + 1.0f + B; + + float sMax = floor((cMax / pow(2.0f, expp - B - N)) + 0.5f); + + float exps = expp + 1.0f; + + if (0.0 <= sMax && sMax < pow2to9) { + exps = expp; + } + + float sRed = floor((cRed / pow(2.0f, exps - B - N)) + 0.5f); + float sGreen = floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f); + float sBlue = floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f); +#ifdef MODE_PROCESS_STATIC + //since its self-save, use RGBE8985 + light_total_rgbe = ((uint(sRed) & 0x1FF) >> 1) | ((uint(sGreen) & 0x1FF) << 8) | (((uint(sBlue) & 0x1FF) >> 1) << 17) | ((uint(exps) & 0x1F) << 25); + +#else + light_total_rgbe = (uint(sRed) & 0x1FF) | ((uint(sGreen) & 0x1FF) << 9) | ((uint(sBlue) & 0x1FF) << 18) | ((uint(exps) & 0x1F) << 27); +#endif + } + +#ifdef MODE_PROCESS_DYNAMIC + + vec4 aniso0; + aniso0.r = lumas[0] / luma_total; + aniso0.g = lumas[1] / luma_total; + aniso0.b = lumas[2] / luma_total; + aniso0.a = lumas[3] / luma_total; + + vec2 aniso1; + aniso1.r = lumas[4] / luma_total; + aniso1.g = lumas[5] / luma_total; + + //save to 3D textures + imageStore(dst_aniso0, positioni, aniso0); + imageStore(dst_aniso1, positioni, vec4(aniso1, 0.0, 0.0)); + imageStore(dst_light, positioni, uvec4(light_total_rgbe)); + + //also fill neighbours, so light interpolation during the indirect pass works + + //recover the neighbour list from the leftover bits + uint neighbours = (voxel_albedo >> 21) | ((voxel_position >> 21) << 11) | ((process_voxels.data[voxel_index].light >> 30) << 22) | ((process_voxels.data[voxel_index].light_aniso >> 30) << 24); + + const uint max_neighbours = 26; + const ivec3 neighbour_positions[max_neighbours] = ivec3[]( + ivec3(-1, -1, -1), + ivec3(-1, -1, 0), + ivec3(-1, -1, 1), + ivec3(-1, 0, -1), + ivec3(-1, 0, 0), + ivec3(-1, 0, 1), + ivec3(-1, 1, -1), + ivec3(-1, 1, 0), + ivec3(-1, 1, 1), + ivec3(0, -1, -1), + ivec3(0, -1, 0), + ivec3(0, -1, 1), + ivec3(0, 0, -1), + ivec3(0, 0, 1), + ivec3(0, 1, -1), + ivec3(0, 1, 0), + ivec3(0, 1, 1), + ivec3(1, -1, -1), + ivec3(1, -1, 0), + ivec3(1, -1, 1), + ivec3(1, 0, -1), + ivec3(1, 0, 0), + ivec3(1, 0, 1), + ivec3(1, 1, -1), + ivec3(1, 1, 0), + ivec3(1, 1, 1)); + + for (uint i = 0; i < max_neighbours; i++) { + if (bool(neighbours & (1 << i))) { + ivec3 neighbour_pos = positioni + neighbour_positions[i]; + imageStore(dst_light, neighbour_pos, uvec4(light_total_rgbe)); + imageStore(dst_aniso0, neighbour_pos, aniso0); + imageStore(dst_aniso1, neighbour_pos, vec4(aniso1, 0.0, 0.0)); + } + } + +#endif + +#ifdef MODE_PROCESS_STATIC + + //save back the anisotropic + + uint light = process_voxels.data[voxel_index].light & (3 << 30); + light |= light_total_rgbe; + process_voxels.data[voxel_index].light = light; //replace + + uint light_aniso = process_voxels.data[voxel_index].light_aniso & (3 << 30); + for (int i = 0; i < 6; i++) { + light_aniso |= min(31, uint((lumas[i] / luma_total) * 31.0)) << (i * 5); + } + + process_voxels.data[voxel_index].light_aniso = light_aniso; + +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/environment/sdfgi_integrate.glsl b/servers/rendering/renderer_rd/shaders/environment/sdfgi_integrate.glsl new file mode 100644 index 0000000000..9c03297f5c --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/environment/sdfgi_integrate.glsl @@ -0,0 +1,612 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +#define MAX_CASCADES 8 + +layout(set = 0, binding = 1) uniform texture3D sdf_cascades[MAX_CASCADES]; +layout(set = 0, binding = 2) uniform texture3D light_cascades[MAX_CASCADES]; +layout(set = 0, binding = 3) uniform texture3D aniso0_cascades[MAX_CASCADES]; +layout(set = 0, binding = 4) uniform texture3D aniso1_cascades[MAX_CASCADES]; + +layout(set = 0, binding = 6) uniform sampler linear_sampler; + +struct CascadeData { + vec3 offset; //offset of (0,0,0) in world coordinates + float to_cell; // 1/bounds * grid_size + ivec3 probe_world_offset; + uint pad; +}; + +layout(set = 0, binding = 7, std140) uniform Cascades { + CascadeData data[MAX_CASCADES]; +} +cascades; + +layout(r32ui, set = 0, binding = 8) uniform restrict uimage2DArray lightprobe_texture_data; +layout(rgba16i, set = 0, binding = 9) uniform restrict iimage2DArray lightprobe_history_texture; +layout(rgba32i, set = 0, binding = 10) uniform restrict iimage2D lightprobe_average_texture; + +//used for scrolling +layout(rgba16i, set = 0, binding = 11) uniform restrict iimage2DArray lightprobe_history_scroll_texture; +layout(rgba32i, set = 0, binding = 12) uniform restrict iimage2D lightprobe_average_scroll_texture; + +layout(rgba32i, set = 0, binding = 13) uniform restrict iimage2D lightprobe_average_parent_texture; + +layout(rgba16f, set = 0, binding = 14) uniform restrict writeonly image2DArray lightprobe_ambient_texture; + +#ifdef USE_CUBEMAP_ARRAY +layout(set = 1, binding = 0) uniform textureCubeArray sky_irradiance; +#else +layout(set = 1, binding = 0) uniform textureCube sky_irradiance; +#endif +layout(set = 1, binding = 1) uniform sampler linear_sampler_mipmaps; + +#define HISTORY_BITS 10 + +#define SKY_MODE_DISABLED 0 +#define SKY_MODE_COLOR 1 +#define SKY_MODE_SKY 2 + +layout(push_constant, std430) uniform Params { + vec3 grid_size; + uint max_cascades; + + uint probe_axis_size; + uint cascade; + uint history_index; + uint history_size; + + uint ray_count; + float ray_bias; + ivec2 image_size; + + ivec3 world_offset; + uint sky_mode; + + ivec3 scroll; + float sky_energy; + + vec3 sky_color; + float y_mult; + + bool store_ambient_texture; + uint pad[3]; +} +params; + +const float PI = 3.14159265f; +const float GOLDEN_ANGLE = PI * (3.0 - sqrt(5.0)); + +vec3 vogel_hemisphere(uint p_index, uint p_count, float p_offset) { + float r = sqrt(float(p_index) + 0.5f) / sqrt(float(p_count)); + float theta = float(p_index) * GOLDEN_ANGLE + p_offset; + float y = cos(r * PI * 0.5); + float l = sin(r * PI * 0.5); + return vec3(l * cos(theta), l * sin(theta), y * (float(p_index & 1) * 2.0 - 1.0)); +} + +uvec3 hash3(uvec3 x) { + x = ((x >> 16) ^ x) * 0x45d9f3b; + x = ((x >> 16) ^ x) * 0x45d9f3b; + x = (x >> 16) ^ x; + return x; +} + +float hashf3(vec3 co) { + return fract(sin(dot(co, vec3(12.9898, 78.233, 137.13451))) * 43758.5453); +} + +vec3 octahedron_encode(vec2 f) { + // https://twitter.com/Stubbesaurus/status/937994790553227264 + f = f * 2.0 - 1.0; + vec3 n = vec3(f.x, f.y, 1.0f - abs(f.x) - abs(f.y)); + float t = clamp(-n.z, 0.0, 1.0); + n.x += n.x >= 0 ? -t : t; + n.y += n.y >= 0 ? -t : t; + return normalize(n); +} + +uint rgbe_encode(vec3 color) { + const float pow2to9 = 512.0f; + const float B = 15.0f; + const float N = 9.0f; + const float LN2 = 0.6931471805599453094172321215; + + float cRed = clamp(color.r, 0.0, 65408.0); + float cGreen = clamp(color.g, 0.0, 65408.0); + float cBlue = clamp(color.b, 0.0, 65408.0); + + float cMax = max(cRed, max(cGreen, cBlue)); + + float expp = max(-B - 1.0f, floor(log(cMax) / LN2)) + 1.0f + B; + + float sMax = floor((cMax / pow(2.0f, expp - B - N)) + 0.5f); + + float exps = expp + 1.0f; + + if (0.0 <= sMax && sMax < pow2to9) { + exps = expp; + } + + float sRed = floor((cRed / pow(2.0f, exps - B - N)) + 0.5f); + float sGreen = floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f); + float sBlue = floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f); + return (uint(sRed) & 0x1FF) | ((uint(sGreen) & 0x1FF) << 9) | ((uint(sBlue) & 0x1FF) << 18) | ((uint(exps) & 0x1F) << 27); +} + +struct SH { +#if (SH_SIZE == 16) + float c[48]; +#else + float c[28]; +#endif +}; + +shared SH sh_accum[64]; //8x8 + +void main() { + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(pos, params.image_size))) { //too large, do nothing + return; + } + + uint probe_index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * 8; + +#ifdef MODE_PROCESS + + float probe_cell_size = float(params.grid_size.x / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell; + + ivec3 probe_cell; + probe_cell.x = pos.x % int(params.probe_axis_size); + probe_cell.y = pos.y; + probe_cell.z = pos.x / int(params.probe_axis_size); + + vec3 probe_pos = cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size; + vec3 pos_to_uvw = 1.0 / params.grid_size; + + for (uint i = 0; i < SH_SIZE * 3; i++) { + sh_accum[probe_index].c[i] = 0.0; + } + + // quickly ensure each probe has a different "offset" for the vogel function, based on integer world position + uvec3 h3 = hash3(uvec3(params.world_offset + probe_cell)); + float offset = hashf3(vec3(h3 & uvec3(0xFFFFF))); + + //for a more homogeneous hemisphere, alternate based on history frames + uint ray_offset = params.history_index; + uint ray_mult = params.history_size; + uint ray_total = ray_mult * params.ray_count; + + for (uint i = 0; i < params.ray_count; i++) { + vec3 ray_dir = vogel_hemisphere(ray_offset + i * ray_mult, ray_total, offset); + ray_dir.y *= params.y_mult; + ray_dir = normalize(ray_dir); + + //needs to be visible + vec3 ray_pos = probe_pos; + vec3 inv_dir = 1.0 / ray_dir; + + bool hit = false; + uint hit_cascade; + + float bias = params.ray_bias; + vec3 abs_ray_dir = abs(ray_dir); + ray_pos += ray_dir * 1.0 / max(abs_ray_dir.x, max(abs_ray_dir.y, abs_ray_dir.z)) * bias / cascades.data[params.cascade].to_cell; + vec3 uvw; + + for (uint j = params.cascade; j < params.max_cascades; j++) { + //convert to local bounds + vec3 pos = ray_pos - cascades.data[j].offset; + pos *= cascades.data[j].to_cell; + + if (any(lessThan(pos, vec3(0.0))) || any(greaterThanEqual(pos, params.grid_size))) { + continue; //already past bounds for this cascade, goto next + } + + //find maximum advance distance (until reaching bounds) + vec3 t0 = -pos * inv_dir; + vec3 t1 = (params.grid_size - pos) * inv_dir; + vec3 tmax = max(t0, t1); + float max_advance = min(tmax.x, min(tmax.y, tmax.z)); + + float advance = 0.0; + + while (advance < max_advance) { + //read how much to advance from SDF + uvw = (pos + ray_dir * advance) * pos_to_uvw; + + float distance = texture(sampler3D(sdf_cascades[j], linear_sampler), uvw).r * 255.0 - 1.0; + if (distance < 0.05) { + //consider hit + hit = true; + break; + } + + advance += distance; + } + + if (hit) { + hit_cascade = j; + break; + } + + //change ray origin to collision with bounds + pos += ray_dir * max_advance; + pos /= cascades.data[j].to_cell; + pos += cascades.data[j].offset; + ray_pos = pos; + } + + vec4 light; + if (hit) { + //avoid reading different texture from different threads + for (uint j = params.cascade; j < params.max_cascades; j++) { + if (j == hit_cascade) { + const float EPSILON = 0.001; + vec3 hit_normal = normalize(vec3( + texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw + vec3(EPSILON, 0.0, 0.0)).r - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw - vec3(EPSILON, 0.0, 0.0)).r, + texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw + vec3(0.0, EPSILON, 0.0)).r - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw - vec3(0.0, EPSILON, 0.0)).r, + texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw + vec3(0.0, 0.0, EPSILON)).r - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw - vec3(0.0, 0.0, EPSILON)).r)); + + vec3 hit_light = texture(sampler3D(light_cascades[hit_cascade], linear_sampler), uvw).rgb; + vec4 aniso0 = texture(sampler3D(aniso0_cascades[hit_cascade], linear_sampler), uvw); + vec3 hit_aniso0 = aniso0.rgb; + vec3 hit_aniso1 = vec3(aniso0.a, texture(sampler3D(aniso1_cascades[hit_cascade], linear_sampler), uvw).rg); + + //one liner magic + light.rgb = hit_light * (dot(max(vec3(0.0), (hit_normal * hit_aniso0)), vec3(1.0)) + dot(max(vec3(0.0), (-hit_normal * hit_aniso1)), vec3(1.0))); + light.a = 1.0; + } + } + + } else if (params.sky_mode == SKY_MODE_SKY) { +#ifdef USE_CUBEMAP_ARRAY + light.rgb = textureLod(samplerCubeArray(sky_irradiance, linear_sampler_mipmaps), vec4(ray_dir, 0.0), 2.0).rgb; // Use second mipmap because we don't usually throw a lot of rays, so this compensates. +#else + light.rgb = textureLod(samplerCube(sky_irradiance, linear_sampler_mipmaps), ray_dir, 2.0).rgb; // Use second mipmap because we don't usually throw a lot of rays, so this compensates. +#endif + light.rgb *= params.sky_energy; + light.a = 0.0; + + } else if (params.sky_mode == SKY_MODE_COLOR) { + light.rgb = params.sky_color; + light.rgb *= params.sky_energy; + light.a = 0.0; + } else { + light = vec4(0, 0, 0, 0); + } + + vec3 ray_dir2 = ray_dir * ray_dir; + +#define SH_ACCUM(m_idx, m_value) \ + { \ + vec3 l = light.rgb * (m_value); \ + sh_accum[probe_index].c[m_idx * 3 + 0] += l.r; \ + sh_accum[probe_index].c[m_idx * 3 + 1] += l.g; \ + sh_accum[probe_index].c[m_idx * 3 + 2] += l.b; \ + } + SH_ACCUM(0, 0.282095); //l0 + SH_ACCUM(1, 0.488603 * ray_dir.y); //l1n1 + SH_ACCUM(2, 0.488603 * ray_dir.z); //l1n0 + SH_ACCUM(3, 0.488603 * ray_dir.x); //l1p1 + SH_ACCUM(4, 1.092548 * ray_dir.x * ray_dir.y); //l2n2 + SH_ACCUM(5, 1.092548 * ray_dir.y * ray_dir.z); //l2n1 + SH_ACCUM(6, 0.315392 * (3.0 * ray_dir2.z - 1.0)); //l20 + SH_ACCUM(7, 1.092548 * ray_dir.x * ray_dir.z); //l2p1 + SH_ACCUM(8, 0.546274 * (ray_dir2.x - ray_dir2.y)); //l2p2 +#if (SH_SIZE == 16) + SH_ACCUM(9, 0.590043 * ray_dir.y * (3.0f * ray_dir2.x - ray_dir2.y)); + SH_ACCUM(10, 2.890611 * ray_dir.y * ray_dir.x * ray_dir.z); + SH_ACCUM(11, 0.646360 * ray_dir.y * (-1.0f + 5.0f * ray_dir2.z)); + SH_ACCUM(12, 0.373176 * (5.0f * ray_dir2.z * ray_dir.z - 3.0f * ray_dir.z)); + SH_ACCUM(13, 0.457045 * ray_dir.x * (-1.0f + 5.0f * ray_dir2.z)); + SH_ACCUM(14, 1.445305 * (ray_dir2.x - ray_dir2.y) * ray_dir.z); + SH_ACCUM(15, 0.590043 * ray_dir.x * (ray_dir2.x - 3.0f * ray_dir2.y)); + +#endif + } + + for (uint i = 0; i < SH_SIZE; i++) { + // store in history texture + ivec3 prev_pos = ivec3(pos.x, pos.y * SH_SIZE + i, int(params.history_index)); + ivec2 average_pos = prev_pos.xy; + + vec4 value = vec4(sh_accum[probe_index].c[i * 3 + 0], sh_accum[probe_index].c[i * 3 + 1], sh_accum[probe_index].c[i * 3 + 2], 1.0) * 4.0 / float(params.ray_count); + + ivec4 ivalue = clamp(ivec4(value * float(1 << HISTORY_BITS)), -32768, 32767); //clamp to 16 bits, so higher values don't break average + + ivec4 prev_value = imageLoad(lightprobe_history_texture, prev_pos); + ivec4 average = imageLoad(lightprobe_average_texture, average_pos); + + average -= prev_value; + average += ivalue; + + imageStore(lightprobe_history_texture, prev_pos, ivalue); + imageStore(lightprobe_average_texture, average_pos, average); + + if (params.store_ambient_texture && i == 0) { + ivec3 ambient_pos = ivec3(pos, int(params.cascade)); + vec4 ambient_light = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS); + ambient_light *= 0.88622; // SHL0 + imageStore(lightprobe_ambient_texture, ambient_pos, ambient_light); + } + } +#endif // MODE PROCESS + +#ifdef MODE_STORE + + // converting to octahedral in this step is required because + // octahedral is much faster to read from the screen than spherical harmonics, + // despite the very slight quality loss + + ivec2 sh_pos = (pos / OCT_SIZE) * ivec2(1, SH_SIZE); + ivec2 oct_pos = (pos / OCT_SIZE) * (OCT_SIZE + 2) + ivec2(1); + ivec2 local_pos = pos % OCT_SIZE; + + //compute the octahedral normal for this texel + vec3 normal = octahedron_encode(vec2(local_pos) / float(OCT_SIZE)); + + // read the spherical harmonic + + vec3 normal2 = normal * normal; + float c[SH_SIZE] = float[]( + + 0.282095, //l0 + 0.488603 * normal.y, //l1n1 + 0.488603 * normal.z, //l1n0 + 0.488603 * normal.x, //l1p1 + 1.092548 * normal.x * normal.y, //l2n2 + 1.092548 * normal.y * normal.z, //l2n1 + 0.315392 * (3.0 * normal2.z - 1.0), //l20 + 1.092548 * normal.x * normal.z, //l2p1 + 0.546274 * (normal2.x - normal2.y) //l2p2 +#if (SH_SIZE == 16) + , + 0.590043 * normal.y * (3.0f * normal2.x - normal2.y), + 2.890611 * normal.y * normal.x * normal.z, + 0.646360 * normal.y * (-1.0f + 5.0f * normal2.z), + 0.373176 * (5.0f * normal2.z * normal.z - 3.0f * normal.z), + 0.457045 * normal.x * (-1.0f + 5.0f * normal2.z), + 1.445305 * (normal2.x - normal2.y) * normal.z, + 0.590043 * normal.x * (normal2.x - 3.0f * normal2.y) + +#endif + ); + + const float l_mult[SH_SIZE] = float[]( + 1.0, + 2.0 / 3.0, + 2.0 / 3.0, + 2.0 / 3.0, + 1.0 / 4.0, + 1.0 / 4.0, + 1.0 / 4.0, + 1.0 / 4.0, + 1.0 / 4.0 +#if (SH_SIZE == 16) + , // l4 does not contribute to irradiance + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 +#endif + ); + + vec3 irradiance = vec3(0.0); + vec3 radiance = vec3(0.0); + + for (uint i = 0; i < SH_SIZE; i++) { + // store in history texture + ivec2 average_pos = sh_pos + ivec2(0, i); + ivec4 average = imageLoad(lightprobe_average_texture, average_pos); + + vec4 sh = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS); + + vec3 m = sh.rgb * c[i] * 4.0; + + irradiance += m * l_mult[i]; + radiance += m; + } + + //encode RGBE9995 for the final texture + + uint irradiance_rgbe = rgbe_encode(irradiance); + uint radiance_rgbe = rgbe_encode(radiance); + + //store in octahedral map + + ivec3 texture_pos = ivec3(oct_pos, int(params.cascade)); + ivec3 copy_to[4] = ivec3[](ivec3(-2, -2, -2), ivec3(-2, -2, -2), ivec3(-2, -2, -2), ivec3(-2, -2, -2)); + copy_to[0] = texture_pos + ivec3(local_pos, 0); + + if (local_pos == ivec2(0, 0)) { + copy_to[1] = texture_pos + ivec3(OCT_SIZE - 1, -1, 0); + copy_to[2] = texture_pos + ivec3(-1, OCT_SIZE - 1, 0); + copy_to[3] = texture_pos + ivec3(OCT_SIZE, OCT_SIZE, 0); + } else if (local_pos == ivec2(OCT_SIZE - 1, 0)) { + copy_to[1] = texture_pos + ivec3(0, -1, 0); + copy_to[2] = texture_pos + ivec3(OCT_SIZE, OCT_SIZE - 1, 0); + copy_to[3] = texture_pos + ivec3(-1, OCT_SIZE, 0); + } else if (local_pos == ivec2(0, OCT_SIZE - 1)) { + copy_to[1] = texture_pos + ivec3(-1, 0, 0); + copy_to[2] = texture_pos + ivec3(OCT_SIZE - 1, OCT_SIZE, 0); + copy_to[3] = texture_pos + ivec3(OCT_SIZE, -1, 0); + } else if (local_pos == ivec2(OCT_SIZE - 1, OCT_SIZE - 1)) { + copy_to[1] = texture_pos + ivec3(0, OCT_SIZE, 0); + copy_to[2] = texture_pos + ivec3(OCT_SIZE, 0, 0); + copy_to[3] = texture_pos + ivec3(-1, -1, 0); + } else if (local_pos.y == 0) { + copy_to[1] = texture_pos + ivec3(OCT_SIZE - local_pos.x - 1, local_pos.y - 1, 0); + } else if (local_pos.x == 0) { + copy_to[1] = texture_pos + ivec3(local_pos.x - 1, OCT_SIZE - local_pos.y - 1, 0); + } else if (local_pos.y == OCT_SIZE - 1) { + copy_to[1] = texture_pos + ivec3(OCT_SIZE - local_pos.x - 1, local_pos.y + 1, 0); + } else if (local_pos.x == OCT_SIZE - 1) { + copy_to[1] = texture_pos + ivec3(local_pos.x + 1, OCT_SIZE - local_pos.y - 1, 0); + } + + for (int i = 0; i < 4; i++) { + if (copy_to[i] == ivec3(-2, -2, -2)) { + continue; + } + imageStore(lightprobe_texture_data, copy_to[i], uvec4(irradiance_rgbe)); + imageStore(lightprobe_texture_data, copy_to[i] + ivec3(0, 0, int(params.max_cascades)), uvec4(radiance_rgbe)); + } + +#endif + +#ifdef MODE_SCROLL + + ivec3 probe_cell; + probe_cell.x = pos.x % int(params.probe_axis_size); + probe_cell.y = pos.y; + probe_cell.z = pos.x / int(params.probe_axis_size); + + ivec3 read_probe = probe_cell - params.scroll; + + if (all(greaterThanEqual(read_probe, ivec3(0))) && all(lessThan(read_probe, ivec3(params.probe_axis_size)))) { + // can scroll + ivec2 tex_pos; + tex_pos = read_probe.xy; + tex_pos.x += read_probe.z * int(params.probe_axis_size); + + //scroll + for (uint j = 0; j < params.history_size; j++) { + for (int i = 0; i < SH_SIZE; i++) { + // copy from history texture + ivec3 src_pos = ivec3(tex_pos.x, tex_pos.y * SH_SIZE + i, int(j)); + ivec3 dst_pos = ivec3(pos.x, pos.y * SH_SIZE + i, int(j)); + ivec4 value = imageLoad(lightprobe_history_texture, src_pos); + imageStore(lightprobe_history_scroll_texture, dst_pos, value); + } + } + + for (int i = 0; i < SH_SIZE; i++) { + // copy from average texture + ivec2 src_pos = ivec2(tex_pos.x, tex_pos.y * SH_SIZE + i); + ivec2 dst_pos = ivec2(pos.x, pos.y * SH_SIZE + i); + ivec4 value = imageLoad(lightprobe_average_texture, src_pos); + imageStore(lightprobe_average_scroll_texture, dst_pos, value); + } + } else if (params.cascade < params.max_cascades - 1) { + //can't scroll, must look for position in parent cascade + + //to global coords + float cell_to_probe = float(params.grid_size.x / float(params.probe_axis_size - 1)); + + float probe_cell_size = cell_to_probe / cascades.data[params.cascade].to_cell; + vec3 probe_pos = cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size; + + //to parent local coords + float probe_cell_size_next = cell_to_probe / cascades.data[params.cascade + 1].to_cell; + probe_pos -= cascades.data[params.cascade + 1].offset; + probe_pos /= probe_cell_size_next; + + ivec3 probe_posi = ivec3(probe_pos); + //add up all light, no need to use occlusion here, since occlusion will do its work afterwards + + vec4 average_light[SH_SIZE] = vec4[](vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0) +#if (SH_SIZE == 16) + , + vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0) +#endif + ); + float total_weight = 0.0; + + for (int i = 0; i < 8; i++) { + ivec3 offset = probe_posi + ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1)); + + vec3 trilinear = vec3(1.0) - abs(probe_pos - vec3(offset)); + float weight = trilinear.x * trilinear.y * trilinear.z; + + ivec2 tex_pos; + tex_pos = offset.xy; + tex_pos.x += offset.z * int(params.probe_axis_size); + + for (int j = 0; j < SH_SIZE; j++) { + // copy from history texture + ivec2 src_pos = ivec2(tex_pos.x, tex_pos.y * SH_SIZE + j); + ivec4 average = imageLoad(lightprobe_average_parent_texture, src_pos); + vec4 value = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS); + average_light[j] += value * weight; + } + + total_weight += weight; + } + + if (total_weight > 0.0) { + total_weight = 1.0 / total_weight; + } + //store the averaged values everywhere + + for (int i = 0; i < SH_SIZE; i++) { + ivec4 ivalue = clamp(ivec4(average_light[i] * total_weight * float(1 << HISTORY_BITS)), ivec4(-32768), ivec4(32767)); //clamp to 16 bits, so higher values don't break average + // copy from history texture + ivec3 dst_pos = ivec3(pos.x, pos.y * SH_SIZE + i, 0); + for (uint j = 0; j < params.history_size; j++) { + dst_pos.z = int(j); + imageStore(lightprobe_history_scroll_texture, dst_pos, ivalue); + } + + ivalue *= int(params.history_size); //average needs to have all history added up + imageStore(lightprobe_average_scroll_texture, dst_pos.xy, ivalue); + } + + } else { + //scroll at the edge of the highest cascade, just copy what is there, + //since its the closest we have anyway + + for (uint j = 0; j < params.history_size; j++) { + ivec2 tex_pos; + tex_pos = probe_cell.xy; + tex_pos.x += probe_cell.z * int(params.probe_axis_size); + + for (int i = 0; i < SH_SIZE; i++) { + // copy from history texture + ivec3 src_pos = ivec3(tex_pos.x, tex_pos.y * SH_SIZE + i, int(j)); + ivec3 dst_pos = ivec3(pos.x, pos.y * SH_SIZE + i, int(j)); + ivec4 value = imageLoad(lightprobe_history_texture, dst_pos); + imageStore(lightprobe_history_scroll_texture, dst_pos, value); + } + } + + for (int i = 0; i < SH_SIZE; i++) { + // copy from average texture + ivec2 spos = ivec2(pos.x, pos.y * SH_SIZE + i); + ivec4 average = imageLoad(lightprobe_average_texture, spos); + imageStore(lightprobe_average_scroll_texture, spos, average); + } + } + +#endif + +#ifdef MODE_SCROLL_STORE + + //do not update probe texture, as these will be updated later + + for (uint j = 0; j < params.history_size; j++) { + for (int i = 0; i < SH_SIZE; i++) { + // copy from history texture + ivec3 spos = ivec3(pos.x, pos.y * SH_SIZE + i, int(j)); + ivec4 value = imageLoad(lightprobe_history_scroll_texture, spos); + imageStore(lightprobe_history_texture, spos, value); + } + } + + for (int i = 0; i < SH_SIZE; i++) { + // copy from average texture + ivec2 spos = ivec2(pos.x, pos.y * SH_SIZE + i); + ivec4 average = imageLoad(lightprobe_average_scroll_texture, spos); + imageStore(lightprobe_average_texture, spos, average); + } + +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/environment/sdfgi_preprocess.glsl b/servers/rendering/renderer_rd/shaders/environment/sdfgi_preprocess.glsl new file mode 100644 index 0000000000..bce98f4054 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/environment/sdfgi_preprocess.glsl @@ -0,0 +1,1056 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#ifdef MODE_JUMPFLOOD_OPTIMIZED +#define GROUP_SIZE 8 + +layout(local_size_x = GROUP_SIZE, local_size_y = GROUP_SIZE, local_size_z = GROUP_SIZE) in; + +#elif defined(MODE_OCCLUSION) || defined(MODE_SCROLL) +//buffer layout +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +#else +//grid layout +layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; + +#endif + +#if defined(MODE_INITIALIZE_JUMP_FLOOD) || defined(MODE_INITIALIZE_JUMP_FLOOD_HALF) +layout(r16ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_color; +layout(rgba8ui, set = 0, binding = 2) uniform restrict writeonly uimage3D dst_positions; +#endif + +#ifdef MODE_UPSCALE_JUMP_FLOOD +layout(r16ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_color; +layout(rgba8ui, set = 0, binding = 2) uniform restrict readonly uimage3D src_positions_half; +layout(rgba8ui, set = 0, binding = 3) uniform restrict writeonly uimage3D dst_positions; +#endif + +#if defined(MODE_JUMPFLOOD) || defined(MODE_JUMPFLOOD_OPTIMIZED) +layout(rgba8ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_positions; +layout(rgba8ui, set = 0, binding = 2) uniform restrict writeonly uimage3D dst_positions; +#endif + +#ifdef MODE_JUMPFLOOD_OPTIMIZED + +shared uvec4 group_positions[(GROUP_SIZE + 2) * (GROUP_SIZE + 2) * (GROUP_SIZE + 2)]; //4x4x4 with margins + +void group_store(ivec3 p_pos, uvec4 p_value) { + uint offset = uint(p_pos.z * (GROUP_SIZE + 2) * (GROUP_SIZE + 2) + p_pos.y * (GROUP_SIZE + 2) + p_pos.x); + group_positions[offset] = p_value; +} + +uvec4 group_load(ivec3 p_pos) { + uint offset = uint(p_pos.z * (GROUP_SIZE + 2) * (GROUP_SIZE + 2) + p_pos.y * (GROUP_SIZE + 2) + p_pos.x); + return group_positions[offset]; +} + +#endif + +#ifdef MODE_OCCLUSION + +layout(r16ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_color; +layout(r8, set = 0, binding = 2) uniform restrict image3D dst_occlusion[8]; +layout(r32ui, set = 0, binding = 3) uniform restrict readonly uimage3D src_facing; + +const uvec2 group_size_offset[11] = uvec2[](uvec2(1, 0), uvec2(3, 1), uvec2(6, 4), uvec2(10, 10), uvec2(15, 20), uvec2(21, 35), uvec2(28, 56), uvec2(36, 84), uvec2(42, 120), uvec2(46, 162), uvec2(48, 208)); +const uint group_pos[256] = uint[](0, + 65536, 256, 1, + 131072, 65792, 512, 65537, 257, 2, + 196608, 131328, 66048, 768, 131073, 65793, 513, 65538, 258, 3, + 262144, 196864, 131584, 66304, 1024, 196609, 131329, 66049, 769, 131074, 65794, 514, 65539, 259, 4, + 327680, 262400, 197120, 131840, 66560, 1280, 262145, 196865, 131585, 66305, 1025, 196610, 131330, 66050, 770, 131075, 65795, 515, 65540, 260, 5, + 393216, 327936, 262656, 197376, 132096, 66816, 1536, 327681, 262401, 197121, 131841, 66561, 1281, 262146, 196866, 131586, 66306, 1026, 196611, 131331, 66051, 771, 131076, 65796, 516, 65541, 261, 6, + 458752, 393472, 328192, 262912, 197632, 132352, 67072, 1792, 393217, 327937, 262657, 197377, 132097, 66817, 1537, 327682, 262402, 197122, 131842, 66562, 1282, 262147, 196867, 131587, 66307, 1027, 196612, 131332, 66052, 772, 131077, 65797, 517, 65542, 262, 7, + 459008, 393728, 328448, 263168, 197888, 132608, 67328, 458753, 393473, 328193, 262913, 197633, 132353, 67073, 1793, 393218, 327938, 262658, 197378, 132098, 66818, 1538, 327683, 262403, 197123, 131843, 66563, 1283, 262148, 196868, 131588, 66308, 1028, 196613, 131333, 66053, 773, 131078, 65798, 518, 65543, 263, + 459264, 393984, 328704, 263424, 198144, 132864, 459009, 393729, 328449, 263169, 197889, 132609, 67329, 458754, 393474, 328194, 262914, 197634, 132354, 67074, 1794, 393219, 327939, 262659, 197379, 132099, 66819, 1539, 327684, 262404, 197124, 131844, 66564, 1284, 262149, 196869, 131589, 66309, 1029, 196614, 131334, 66054, 774, 131079, 65799, 519, + 459520, 394240, 328960, 263680, 198400, 459265, 393985, 328705, 263425, 198145, 132865, 459010, 393730, 328450, 263170, 197890, 132610, 67330, 458755, 393475, 328195, 262915, 197635, 132355, 67075, 1795, 393220, 327940, 262660, 197380, 132100, 66820, 1540, 327685, 262405, 197125, 131845, 66565, 1285, 262150, 196870, 131590, 66310, 1030, 196615, 131335, 66055, 775); + +shared uint occlusion_facing[((OCCLUSION_SIZE * 2) * (OCCLUSION_SIZE * 2) * (OCCLUSION_SIZE * 2)) / 4]; + +uint get_facing(ivec3 p_pos) { + uint ofs = uint(p_pos.z * OCCLUSION_SIZE * 2 * OCCLUSION_SIZE * 2 + p_pos.y * OCCLUSION_SIZE * 2 + p_pos.x); + uint v = occlusion_facing[ofs / 4]; + return (v >> ((ofs % 4) * 8)) & 0xFF; +} + +#endif + +#ifdef MODE_STORE + +layout(rgba8ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_positions; +layout(r16ui, set = 0, binding = 2) uniform restrict readonly uimage3D src_albedo; +layout(r8, set = 0, binding = 3) uniform restrict readonly image3D src_occlusion[8]; +layout(r32ui, set = 0, binding = 4) uniform restrict readonly uimage3D src_light; +layout(r32ui, set = 0, binding = 5) uniform restrict readonly uimage3D src_light_aniso; +layout(r32ui, set = 0, binding = 6) uniform restrict readonly uimage3D src_facing; + +layout(r8, set = 0, binding = 7) uniform restrict writeonly image3D dst_sdf; +layout(r16ui, set = 0, binding = 8) uniform restrict writeonly uimage3D dst_occlusion; + +layout(set = 0, binding = 10, std430) restrict buffer DispatchData { + uint x; + uint y; + uint z; + uint total_count; +} +dispatch_data; + +struct ProcessVoxel { + uint position; // xyz 7 bit packed, extra 11 bits for neighbors. + uint albedo; //rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neighbours + uint light; //rgbe8985 encoded total saved light, extra 2 bits for neighbours + uint light_aniso; //55555 light anisotropy, extra 2 bits for neighbours + //total neighbours: 26 +}; + +layout(set = 0, binding = 11, std430) restrict buffer writeonly ProcessVoxels { + ProcessVoxel data[]; +} +dst_process_voxels; + +shared ProcessVoxel store_positions[4 * 4 * 4]; +shared uint store_position_count; +shared uint store_from_index; +#endif + +#ifdef MODE_SCROLL + +layout(r16ui, set = 0, binding = 1) uniform restrict writeonly uimage3D dst_albedo; +layout(r32ui, set = 0, binding = 2) uniform restrict writeonly uimage3D dst_facing; +layout(r32ui, set = 0, binding = 3) uniform restrict writeonly uimage3D dst_light; +layout(r32ui, set = 0, binding = 4) uniform restrict writeonly uimage3D dst_light_aniso; + +layout(set = 0, binding = 5, std430) restrict buffer readonly DispatchData { + uint x; + uint y; + uint z; + uint total_count; +} +dispatch_data; + +struct ProcessVoxel { + uint position; // xyz 7 bit packed, extra 11 bits for neighbors. + uint albedo; //rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neighbours + uint light; //rgbe8985 encoded total saved light, extra 2 bits for neighbours + uint light_aniso; //55555 light anisotropy, extra 2 bits for neighbours + //total neighbours: 26 +}; + +layout(set = 0, binding = 6, std430) restrict buffer readonly ProcessVoxels { + ProcessVoxel data[]; +} +src_process_voxels; + +#endif + +#ifdef MODE_SCROLL_OCCLUSION + +layout(r8, set = 0, binding = 1) uniform restrict image3D dst_occlusion[8]; +layout(r16ui, set = 0, binding = 2) uniform restrict readonly uimage3D src_occlusion; + +#endif + +layout(push_constant, std430) uniform Params { + ivec3 scroll; + + int grid_size; + + ivec3 probe_offset; + int step_size; + + bool half_size; + uint occlusion_index; + int cascade; + uint pad; +} +params; + +void main() { +#ifdef MODE_SCROLL + + // Pixel being shaded + int index = int(gl_GlobalInvocationID.x); + if (index >= dispatch_data.total_count) { //too big + return; + } + + ivec3 read_pos = (ivec3(src_process_voxels.data[index].position) >> ivec3(0, 7, 14)) & ivec3(0x7F); + ivec3 write_pos = read_pos + params.scroll; + + if (any(lessThan(write_pos, ivec3(0))) || any(greaterThanEqual(write_pos, ivec3(params.grid_size)))) { + return; // Fits outside the 3D texture, don't do anything. + } + + uint albedo = ((src_process_voxels.data[index].albedo & 0x7FFF) << 1) | 1; //add solid bit + imageStore(dst_albedo, write_pos, uvec4(albedo)); + + uint facing = (src_process_voxels.data[index].albedo >> 15) & 0x3F; //6 anisotropic facing bits + imageStore(dst_facing, write_pos, uvec4(facing)); + + uint light = src_process_voxels.data[index].light & 0x3fffffff; //30 bits of RGBE8985 + imageStore(dst_light, write_pos, uvec4(light)); + + uint light_aniso = src_process_voxels.data[index].light_aniso & 0x3fffffff; //30 bits of 6 anisotropic 5 bits values + imageStore(dst_light_aniso, write_pos, uvec4(light_aniso)); + +#endif + +#ifdef MODE_SCROLL_OCCLUSION + + ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + if (any(greaterThanEqual(pos, ivec3(params.grid_size) - abs(params.scroll)))) { //too large, do nothing + return; + } + + ivec3 read_pos = pos + max(ivec3(0), -params.scroll); + ivec3 write_pos = pos + max(ivec3(0), params.scroll); + + read_pos.z += params.cascade * params.grid_size; + uint occlusion = imageLoad(src_occlusion, read_pos).r; + read_pos.x += params.grid_size; + occlusion |= imageLoad(src_occlusion, read_pos).r << 16; + + const uint occlusion_shift[8] = uint[](12, 8, 4, 0, 28, 24, 20, 16); + + for (uint i = 0; i < 8; i++) { + float o = float((occlusion >> occlusion_shift[i]) & 0xF) / 15.0; + imageStore(dst_occlusion[i], write_pos, vec4(o)); + } + +#endif + +#ifdef MODE_INITIALIZE_JUMP_FLOOD + + ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + + uint c = imageLoad(src_color, pos).r; + uvec4 v; + if (bool(c & 0x1)) { + //bit set means this is solid + v.xyz = uvec3(pos); + v.w = 255; //not zero means used + } else { + v.xyz = uvec3(0); + v.w = 0; // zero means unused + } + + imageStore(dst_positions, pos, v); +#endif + +#ifdef MODE_INITIALIZE_JUMP_FLOOD_HALF + + ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + ivec3 base_pos = pos * 2; + + //since we store in half size, lets kind of randomize what we store, so + //the half size jump flood has a bit better chance to find something + uvec4 closest[8]; + int closest_count = 0; + + for (uint i = 0; i < 8; i++) { + ivec3 src_pos = base_pos + ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1)); + uint c = imageLoad(src_color, src_pos).r; + if (bool(c & 1)) { + uvec4 v = uvec4(uvec3(src_pos), 255); + closest[closest_count] = v; + closest_count++; + } + } + + if (closest_count == 0) { + imageStore(dst_positions, pos, uvec4(0)); + } else { + ivec3 indexv = (pos & ivec3(1, 1, 1)) * ivec3(1, 2, 4); + int index = (indexv.x | indexv.y | indexv.z) % closest_count; + imageStore(dst_positions, pos, closest[index]); + } + +#endif + +#ifdef MODE_JUMPFLOOD + + //regular jumpflood, efficient for large steps, inefficient for small steps + ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + + vec3 posf = vec3(pos); + + if (params.half_size) { + posf = posf * 2.0 + 0.5; + } + + uvec4 p = imageLoad(src_positions, pos); + + if (!params.half_size && p == uvec4(uvec3(pos), 255)) { + imageStore(dst_positions, pos, p); + return; //points to itself and valid, nothing better can be done, just pass + } + + float p_dist; + + if (p.w != 0) { + p_dist = distance(posf, vec3(p.xyz)); + } else { + p_dist = 0.0; //should not matter + } + + const uint offset_count = 26; + const ivec3 offsets[offset_count] = ivec3[]( + ivec3(-1, -1, -1), + ivec3(-1, -1, 0), + ivec3(-1, -1, 1), + ivec3(-1, 0, -1), + ivec3(-1, 0, 0), + ivec3(-1, 0, 1), + ivec3(-1, 1, -1), + ivec3(-1, 1, 0), + ivec3(-1, 1, 1), + ivec3(0, -1, -1), + ivec3(0, -1, 0), + ivec3(0, -1, 1), + ivec3(0, 0, -1), + ivec3(0, 0, 1), + ivec3(0, 1, -1), + ivec3(0, 1, 0), + ivec3(0, 1, 1), + ivec3(1, -1, -1), + ivec3(1, -1, 0), + ivec3(1, -1, 1), + ivec3(1, 0, -1), + ivec3(1, 0, 0), + ivec3(1, 0, 1), + ivec3(1, 1, -1), + ivec3(1, 1, 0), + ivec3(1, 1, 1)); + + for (uint i = 0; i < offset_count; i++) { + ivec3 ofs = pos + offsets[i] * params.step_size; + if (any(lessThan(ofs, ivec3(0))) || any(greaterThanEqual(ofs, ivec3(params.grid_size)))) { + continue; + } + uvec4 q = imageLoad(src_positions, ofs); + + if (q.w == 0) { + continue; //was not initialized yet, ignore + } + + float q_dist = distance(posf, vec3(q.xyz)); + if (p.w == 0 || q_dist < p_dist) { + p = q; //just replace because current is unused + p_dist = q_dist; + } + } + + imageStore(dst_positions, pos, p); +#endif + +#ifdef MODE_JUMPFLOOD_OPTIMIZED + //optimized version using shared compute memory + + ivec3 group_offset = ivec3(gl_WorkGroupID.xyz) % params.step_size; + ivec3 group_pos = group_offset + (ivec3(gl_WorkGroupID.xyz) / params.step_size) * ivec3(GROUP_SIZE * params.step_size); + + //load data into local group memory + + if (all(lessThan(ivec3(gl_LocalInvocationID.xyz), ivec3((GROUP_SIZE + 2) / 2)))) { + //use this thread for loading, this method uses less threads for this but its simpler and less divergent + ivec3 base_pos = ivec3(gl_LocalInvocationID.xyz) * 2; + for (uint i = 0; i < 8; i++) { + ivec3 load_pos = base_pos + ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1)); + ivec3 load_global_pos = group_pos + (load_pos - ivec3(1)) * params.step_size; + uvec4 q; + if (all(greaterThanEqual(load_global_pos, ivec3(0))) && all(lessThan(load_global_pos, ivec3(params.grid_size)))) { + q = imageLoad(src_positions, load_global_pos); + } else { + q = uvec4(0); //unused + } + + group_store(load_pos, q); + } + } + + ivec3 global_pos = group_pos + ivec3(gl_LocalInvocationID.xyz) * params.step_size; + + if (any(lessThan(global_pos, ivec3(0))) || any(greaterThanEqual(global_pos, ivec3(params.grid_size)))) { + return; //do nothing else, end here because outside range + } + + //sync + groupMemoryBarrier(); + barrier(); + + ivec3 local_pos = ivec3(gl_LocalInvocationID.xyz) + ivec3(1); + + const uint offset_count = 27; + const ivec3 offsets[offset_count] = ivec3[]( + ivec3(-1, -1, -1), + ivec3(-1, -1, 0), + ivec3(-1, -1, 1), + ivec3(-1, 0, -1), + ivec3(-1, 0, 0), + ivec3(-1, 0, 1), + ivec3(-1, 1, -1), + ivec3(-1, 1, 0), + ivec3(-1, 1, 1), + ivec3(0, -1, -1), + ivec3(0, -1, 0), + ivec3(0, -1, 1), + ivec3(0, 0, -1), + ivec3(0, 0, 0), + ivec3(0, 0, 1), + ivec3(0, 1, -1), + ivec3(0, 1, 0), + ivec3(0, 1, 1), + ivec3(1, -1, -1), + ivec3(1, -1, 0), + ivec3(1, -1, 1), + ivec3(1, 0, -1), + ivec3(1, 0, 0), + ivec3(1, 0, 1), + ivec3(1, 1, -1), + ivec3(1, 1, 0), + ivec3(1, 1, 1)); + + //only makes sense if point is inside screen + uvec4 closest = uvec4(0); + float closest_dist = 0.0; + + vec3 posf = vec3(global_pos); + + if (params.half_size) { + posf = posf * 2.0 + 0.5; + } + + for (uint i = 0; i < offset_count; i++) { + uvec4 point = group_load(local_pos + offsets[i]); + + if (point.w == 0) { + continue; //was not initialized yet, ignore + } + + float dist = distance(posf, vec3(point.xyz)); + if (closest.w == 0 || dist < closest_dist) { + closest = point; + closest_dist = dist; + } + } + + imageStore(dst_positions, global_pos, closest); + +#endif + +#ifdef MODE_UPSCALE_JUMP_FLOOD + + ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + + uint c = imageLoad(src_color, pos).r; + uvec4 v; + if (bool(c & 1)) { + //bit set means this is solid + v.xyz = uvec3(pos); + v.w = 255; //not zero means used + } else { + v = imageLoad(src_positions_half, pos >> 1); + float d = length(vec3(ivec3(v.xyz) - pos)); + + ivec3 vbase = ivec3(v.xyz - (v.xyz & uvec3(1))); + + //search around if there is a better candidate from the same block + for (int i = 0; i < 8; i++) { + ivec3 bits = ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1)); + ivec3 p = vbase + bits; + + float d2 = length(vec3(p - pos)); + if (d2 < d) { //check valid distance before test so we avoid a read + uint c2 = imageLoad(src_color, p).r; + if (bool(c2 & 1)) { + v.xyz = uvec3(p); + d = d2; + } + } + } + + //could validate better position.. + } + + imageStore(dst_positions, pos, v); + +#endif + +#ifdef MODE_OCCLUSION + + uint invocation_idx = uint(gl_LocalInvocationID.x); + ivec3 region = ivec3(gl_WorkGroupID); + + ivec3 region_offset = -ivec3(OCCLUSION_SIZE); + region_offset += region * OCCLUSION_SIZE * 2; + region_offset += params.probe_offset * OCCLUSION_SIZE; + + if (params.scroll != ivec3(0)) { + //validate scroll region + ivec3 region_offset_to = region_offset + ivec3(OCCLUSION_SIZE * 2); + uvec3 scroll_mask = uvec3(notEqual(params.scroll, ivec3(0))); //save which axes acre scrolling + ivec3 scroll_from = mix(ivec3(0), ivec3(params.grid_size) + params.scroll, lessThan(params.scroll, ivec3(0))); + ivec3 scroll_to = mix(ivec3(params.grid_size), params.scroll, greaterThan(params.scroll, ivec3(0))); + + if ((uvec3(lessThanEqual(region_offset_to, scroll_from)) | uvec3(greaterThanEqual(region_offset, scroll_to))) * scroll_mask == scroll_mask) { //all axes that scroll are out, exit + return; //region outside scroll bounds, quit + } + } + +#define OCC_HALF_SIZE (OCCLUSION_SIZE / 2) + + ivec3 local_ofs = ivec3(uvec3(invocation_idx % OCC_HALF_SIZE, (invocation_idx % (OCC_HALF_SIZE * OCC_HALF_SIZE)) / OCC_HALF_SIZE, invocation_idx / (OCC_HALF_SIZE * OCC_HALF_SIZE))) * 4; + + /* for(int i=0;i<64;i++) { + ivec3 offset = region_offset + local_ofs + ((ivec3(i) >> ivec3(0,2,4)) & ivec3(3,3,3)); + uint facig = + if (all(greaterThanEqual(offset,ivec3(0))) && all(lessThan(offset,ivec3(params.grid_size)))) {*/ + + for (int i = 0; i < 16; i++) { //skip x, so it can be packed + + ivec3 offset = local_ofs + ((ivec3(i * 4) >> ivec3(0, 2, 4)) & ivec3(3, 3, 3)); + + uint facing_pack = 0; + for (int j = 0; j < 4; j++) { + ivec3 foffset = region_offset + offset + ivec3(j, 0, 0); + if (all(greaterThanEqual(foffset, ivec3(0))) && all(lessThan(foffset, ivec3(params.grid_size)))) { + uint f = imageLoad(src_facing, foffset).r; + facing_pack |= f << (j * 8); + } + } + + occlusion_facing[(offset.z * (OCCLUSION_SIZE * 2 * OCCLUSION_SIZE * 2) + offset.y * (OCCLUSION_SIZE * 2) + offset.x) / 4] = facing_pack; + } + + //sync occlusion saved + groupMemoryBarrier(); + barrier(); + + //process occlusion + +#define OCC_STEPS (OCCLUSION_SIZE * 3 - 2) +#define OCC_HALF_STEPS (OCC_STEPS / 2) + + for (int step = 0; step < OCC_STEPS; step++) { + bool shrink = step >= OCC_HALF_STEPS; + int occ_step = shrink ? OCC_HALF_STEPS - (step - OCC_HALF_STEPS) - 1 : step; + + if (invocation_idx < group_size_offset[occ_step].x) { + uint pv = group_pos[group_size_offset[occ_step].y + invocation_idx]; + ivec3 proc_abs = (ivec3(int(pv)) >> ivec3(0, 8, 16)) & ivec3(0xFF); + + if (shrink) { + proc_abs = ivec3(OCCLUSION_SIZE) - proc_abs - ivec3(1); + } + + for (int i = 0; i < 8; i++) { + ivec3 bits = ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1)); + ivec3 proc_sign = bits * 2 - 1; + ivec3 local_offset = ivec3(OCCLUSION_SIZE) + proc_abs * proc_sign - (ivec3(1) - bits); + ivec3 offset = local_offset + region_offset; + if (all(greaterThanEqual(offset, ivec3(0))) && all(lessThan(offset, ivec3(params.grid_size)))) { + float occ; + + uint facing = get_facing(local_offset); + + if (facing != 0) { //solid + occ = 0.0; + } else if (step == 0) { +#if 0 + occ = 0.0; + if (get_facing(local_offset - ivec3(proc_sign.x,0,0))==0) { + occ+=1.0; + } + if (get_facing(local_offset - ivec3(0,proc_sign.y,0))==0) { + occ+=1.0; + } + if (get_facing(local_offset - ivec3(0,0,proc_sign.z))==0) { + occ+=1.0; + } + /* + if (get_facing(local_offset - proc_sign)==0) { + occ+=1.0; + }*/ + + occ/=3.0; +#endif + occ = 1.0; + + } else { + ivec3 read_dir = -proc_sign; + + ivec3 major_axis; + if (proc_abs.x < proc_abs.y) { + if (proc_abs.z < proc_abs.y) { + major_axis = ivec3(0, 1, 0); + } else { + major_axis = ivec3(0, 0, 1); + } + } else { + if (proc_abs.z < proc_abs.x) { + major_axis = ivec3(1, 0, 0); + } else { + major_axis = ivec3(0, 0, 1); + } + } + + float avg = 0.0; + occ = 0.0; + + ivec3 read_x = offset + ivec3(read_dir.x, 0, 0) + (proc_abs.x == 0 ? major_axis * read_dir : ivec3(0)); + ivec3 read_y = offset + ivec3(0, read_dir.y, 0) + (proc_abs.y == 0 ? major_axis * read_dir : ivec3(0)); + ivec3 read_z = offset + ivec3(0, 0, read_dir.z) + (proc_abs.z == 0 ? major_axis * read_dir : ivec3(0)); + + uint facing_x = get_facing(read_x - region_offset); + if (facing_x == 0) { + if (all(greaterThanEqual(read_x, ivec3(0))) && all(lessThan(read_x, ivec3(params.grid_size)))) { + occ += imageLoad(dst_occlusion[params.occlusion_index], read_x).r; + avg += 1.0; + } + } else { + if (proc_abs.x != 0) { //do not occlude from voxels in the opposite octant + avg += 1.0; + } + } + + uint facing_y = get_facing(read_y - region_offset); + if (facing_y == 0) { + if (all(greaterThanEqual(read_y, ivec3(0))) && all(lessThan(read_y, ivec3(params.grid_size)))) { + occ += imageLoad(dst_occlusion[params.occlusion_index], read_y).r; + avg += 1.0; + } + } else { + if (proc_abs.y != 0) { + avg += 1.0; + } + } + + uint facing_z = get_facing(read_z - region_offset); + if (facing_z == 0) { + if (all(greaterThanEqual(read_z, ivec3(0))) && all(lessThan(read_z, ivec3(params.grid_size)))) { + occ += imageLoad(dst_occlusion[params.occlusion_index], read_z).r; + avg += 1.0; + } + } else { + if (proc_abs.z != 0) { + avg += 1.0; + } + } + + if (avg > 0.0) { + occ /= avg; + } + } + + imageStore(dst_occlusion[params.occlusion_index], offset, vec4(occ)); + } + } + } + + groupMemoryBarrier(); + barrier(); + } +#if 1 + //bias solid voxels away + + for (int i = 0; i < 64; i++) { + ivec3 local_offset = local_ofs + ((ivec3(i) >> ivec3(0, 2, 4)) & ivec3(3, 3, 3)); + ivec3 offset = region_offset + local_offset; + + if (all(greaterThanEqual(offset, ivec3(0))) && all(lessThan(offset, ivec3(params.grid_size)))) { + uint facing = get_facing(local_offset); + + if (facing != 0) { + //only work on solids + + ivec3 proc_pos = local_offset - ivec3(OCCLUSION_SIZE); + proc_pos += mix(ivec3(0), ivec3(1), greaterThanEqual(proc_pos, ivec3(0))); + + float avg = 0.0; + float occ = 0.0; + + ivec3 read_dir = -sign(proc_pos); + ivec3 read_dir_x = ivec3(read_dir.x, 0, 0); + ivec3 read_dir_y = ivec3(0, read_dir.y, 0); + ivec3 read_dir_z = ivec3(0, 0, read_dir.z); + //solid +#if 0 + + uvec3 facing_pos_base = (uvec3(facing) >> uvec3(0,1,2)) & uvec3(1,1,1); + uvec3 facing_neg_base = (uvec3(facing) >> uvec3(3,4,5)) & uvec3(1,1,1); + uvec3 facing_pos= facing_pos_base &((~facing_neg_base)&uvec3(1,1,1)); + uvec3 facing_neg= facing_neg_base &((~facing_pos_base)&uvec3(1,1,1)); +#else + uvec3 facing_pos = (uvec3(facing) >> uvec3(0, 1, 2)) & uvec3(1, 1, 1); + uvec3 facing_neg = (uvec3(facing) >> uvec3(3, 4, 5)) & uvec3(1, 1, 1); +#endif + bvec3 read_valid = bvec3(mix(facing_neg, facing_pos, greaterThan(read_dir, ivec3(0)))); + + //sides + if (read_valid.x) { + ivec3 read_offset = local_offset + read_dir_x; + uint f = get_facing(read_offset); + if (f == 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; + avg += 1.0; + } + } + } + + if (read_valid.y) { + ivec3 read_offset = local_offset + read_dir_y; + uint f = get_facing(read_offset); + if (f == 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; + avg += 1.0; + } + } + } + + if (read_valid.z) { + ivec3 read_offset = local_offset + read_dir_z; + uint f = get_facing(read_offset); + if (f == 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; + avg += 1.0; + } + } + } + + //adjacents + + if (all(read_valid.yz)) { + ivec3 read_offset = local_offset + read_dir_y + read_dir_z; + uint f = get_facing(read_offset); + if (f == 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; + avg += 1.0; + } + } + } + + if (all(read_valid.xz)) { + ivec3 read_offset = local_offset + read_dir_x + read_dir_z; + uint f = get_facing(read_offset); + if (f == 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; + avg += 1.0; + } + } + } + + if (all(read_valid.xy)) { + ivec3 read_offset = local_offset + read_dir_x + read_dir_y; + uint f = get_facing(read_offset); + if (f == 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; + avg += 1.0; + } + } + } + + //diagonal + + if (all(read_valid)) { + ivec3 read_offset = local_offset + read_dir; + uint f = get_facing(read_offset); + if (f == 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; + avg += 1.0; + } + } + } + + if (avg > 0.0) { + occ /= avg; + } + + imageStore(dst_occlusion[params.occlusion_index], offset, vec4(occ)); + } + } + } + +#endif + +#if 1 + groupMemoryBarrier(); + barrier(); + + for (int i = 0; i < 64; i++) { + ivec3 local_offset = local_ofs + ((ivec3(i) >> ivec3(0, 2, 4)) & ivec3(3, 3, 3)); + ivec3 offset = region_offset + local_offset; + + if (all(greaterThanEqual(offset, ivec3(0))) && all(lessThan(offset, ivec3(params.grid_size)))) { + uint facing = get_facing(local_offset); + + if (facing == 0) { + ivec3 proc_pos = local_offset - ivec3(OCCLUSION_SIZE); + proc_pos += mix(ivec3(0), ivec3(1), greaterThanEqual(proc_pos, ivec3(0))); + + ivec3 proc_abs = abs(proc_pos); + + ivec3 read_dir = sign(proc_pos); //opposite direction + ivec3 read_dir_x = ivec3(read_dir.x, 0, 0); + ivec3 read_dir_y = ivec3(0, read_dir.y, 0); + ivec3 read_dir_z = ivec3(0, 0, read_dir.z); + //solid + uvec3 read_mask = mix(uvec3(1, 2, 4), uvec3(8, 16, 32), greaterThan(read_dir, ivec3(0))); //match positive with negative normals + uvec3 block_mask = mix(uvec3(1, 2, 4), uvec3(8, 16, 32), lessThan(read_dir, ivec3(0))); //match positive with negative normals + + block_mask = uvec3(0); + + float visible = 0.0; + float occlude_total = 0.0; + + if (proc_abs.x < OCCLUSION_SIZE) { + ivec3 read_offset = local_offset + read_dir_x; + uint x_mask = get_facing(read_offset); + if (x_mask != 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occlude_total += 1.0; + if (bool(x_mask & read_mask.x) && !bool(x_mask & block_mask.x)) { + visible += 1.0; + } + } + } + } + + if (proc_abs.y < OCCLUSION_SIZE) { + ivec3 read_offset = local_offset + read_dir_y; + uint y_mask = get_facing(read_offset); + if (y_mask != 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occlude_total += 1.0; + if (bool(y_mask & read_mask.y) && !bool(y_mask & block_mask.y)) { + visible += 1.0; + } + } + } + } + + if (proc_abs.z < OCCLUSION_SIZE) { + ivec3 read_offset = local_offset + read_dir_z; + uint z_mask = get_facing(read_offset); + if (z_mask != 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occlude_total += 1.0; + if (bool(z_mask & read_mask.z) && !bool(z_mask & block_mask.z)) { + visible += 1.0; + } + } + } + } + + //if near the cartesian plane, test in opposite direction too + + read_mask = mix(uvec3(1, 2, 4), uvec3(8, 16, 32), lessThan(read_dir, ivec3(0))); //match negative with positive normals + block_mask = mix(uvec3(1, 2, 4), uvec3(8, 16, 32), greaterThan(read_dir, ivec3(0))); //match negative with positive normals + block_mask = uvec3(0); + + if (proc_abs.x == 1) { + ivec3 read_offset = local_offset - read_dir_x; + uint x_mask = get_facing(read_offset); + if (x_mask != 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occlude_total += 1.0; + if (bool(x_mask & read_mask.x) && !bool(x_mask & block_mask.x)) { + visible += 1.0; + } + } + } + } + + if (proc_abs.y == 1) { + ivec3 read_offset = local_offset - read_dir_y; + uint y_mask = get_facing(read_offset); + if (y_mask != 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occlude_total += 1.0; + if (bool(y_mask & read_mask.y) && !bool(y_mask & block_mask.y)) { + visible += 1.0; + } + } + } + } + + if (proc_abs.z == 1) { + ivec3 read_offset = local_offset - read_dir_z; + uint z_mask = get_facing(read_offset); + if (z_mask != 0) { + read_offset += region_offset; + if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { + occlude_total += 1.0; + if (bool(z_mask & read_mask.z) && !bool(z_mask & block_mask.z)) { + visible += 1.0; + } + } + } + } + + if (occlude_total > 0.0) { + float occ = imageLoad(dst_occlusion[params.occlusion_index], offset).r; + occ *= visible / occlude_total; + imageStore(dst_occlusion[params.occlusion_index], offset, vec4(occ)); + } + } + } + } + +#endif + + /* + for(int i=0;i<8;i++) { + ivec3 local_offset = local_pos + ((ivec3(i) >> ivec3(2,1,0)) & ivec3(1,1,1)) * OCCLUSION_SIZE; + ivec3 offset = local_offset - ivec3(OCCLUSION_SIZE); //looking around probe, so starts negative + offset += region * OCCLUSION_SIZE * 2; //offset by region + offset += params.probe_offset * OCCLUSION_SIZE; // offset by probe offset + if (all(greaterThanEqual(offset,ivec3(0))) && all(lessThan(offset,ivec3(params.grid_size)))) { + imageStore(dst_occlusion[params.occlusion_index],offset,vec4( occlusion_data[ to_linear(local_offset) ] )); + //imageStore(dst_occlusion[params.occlusion_index],offset,vec4( occlusion_solid[ to_linear(local_offset) ] )); + } + } +*/ + +#endif + +#ifdef MODE_STORE + + ivec3 local = ivec3(gl_LocalInvocationID.xyz); + ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); + // store SDF + uvec4 p = imageLoad(src_positions, pos); + + bool solid = false; + float d; + if (ivec3(p.xyz) == pos) { + //solid block + d = 0; + solid = true; + } else { + //distance block + d = 1.0 + length(vec3(p.xyz) - vec3(pos)); + } + + d /= 255.0; + + imageStore(dst_sdf, pos, vec4(d)); + + // STORE OCCLUSION + + uint occlusion = 0; + const uint occlusion_shift[8] = uint[](12, 8, 4, 0, 28, 24, 20, 16); + for (int i = 0; i < 8; i++) { + float occ = imageLoad(src_occlusion[i], pos).r; + occlusion |= uint(clamp(occ * 15.0, 0.0, 15.0)) << occlusion_shift[i]; + } + { + ivec3 occ_pos = pos; + occ_pos.z += params.cascade * params.grid_size; + imageStore(dst_occlusion, occ_pos, uvec4(occlusion & 0xFFFF)); + occ_pos.x += params.grid_size; + imageStore(dst_occlusion, occ_pos, uvec4(occlusion >> 16)); + } + + // STORE POSITIONS + + if (local == ivec3(0)) { + store_position_count = 0; //base one stores as zero, the others wait + } + + groupMemoryBarrier(); + barrier(); + + if (solid) { + uint index = atomicAdd(store_position_count, 1); + // At least do the conversion work in parallel + store_positions[index].position = uint(pos.x | (pos.y << 7) | (pos.z << 14)); + + //see around which voxels point to this one, add them to the list + uint bit_index = 0; + uint neighbour_bits = 0; + for (int i = -1; i <= 1; i++) { + for (int j = -1; j <= 1; j++) { + for (int k = -1; k <= 1; k++) { + if (i == 0 && j == 0 && k == 0) { + continue; + } + ivec3 npos = pos + ivec3(i, j, k); + if (all(greaterThanEqual(npos, ivec3(0))) && all(lessThan(npos, ivec3(params.grid_size)))) { + p = imageLoad(src_positions, npos); + if (ivec3(p.xyz) == pos) { + neighbour_bits |= (1 << bit_index); + } + } + bit_index++; + } + } + } + + uint rgb = imageLoad(src_albedo, pos).r; + uint facing = imageLoad(src_facing, pos).r; + + store_positions[index].albedo = rgb >> 1; //store as it comes (555) to avoid precision loss (and move away the alpha bit) + store_positions[index].albedo |= (facing & 0x3F) << 15; // store facing in bits 15-21 + + store_positions[index].albedo |= neighbour_bits << 21; //store lower 11 bits of neighbours with remaining albedo + store_positions[index].position |= (neighbour_bits >> 11) << 21; //store 11 bits more of neighbours with position + + store_positions[index].light = imageLoad(src_light, pos).r; + store_positions[index].light_aniso = imageLoad(src_light_aniso, pos).r; + //add neighbours + store_positions[index].light |= (neighbour_bits >> 22) << 30; //store 2 bits more of neighbours with light + store_positions[index].light_aniso |= (neighbour_bits >> 24) << 30; //store 2 bits more of neighbours with aniso + } + + groupMemoryBarrier(); + barrier(); + + // global increment only once per group, to reduce pressure + + if (local == ivec3(0) && store_position_count > 0) { + store_from_index = atomicAdd(dispatch_data.total_count, store_position_count); + uint group_count = (store_from_index + store_position_count - 1) / 64 + 1; + atomicMax(dispatch_data.x, group_count); + } + + groupMemoryBarrier(); + barrier(); + + uint read_index = uint(local.z * 4 * 4 + local.y * 4 + local.x); + uint write_index = store_from_index + read_index; + + if (read_index < store_position_count) { + dst_process_voxels.data[write_index] = store_positions[read_index]; + } + + if (pos == ivec3(0)) { + //this thread clears y and z + dispatch_data.y = 1; + dispatch_data.z = 1; + } +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/environment/voxel_gi.glsl b/servers/rendering/renderer_rd/shaders/environment/voxel_gi.glsl new file mode 100644 index 0000000000..577c6d0cd0 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/environment/voxel_gi.glsl @@ -0,0 +1,616 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#ifdef MODE_DYNAMIC +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; +#else +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; +#endif + +#ifndef MODE_DYNAMIC + +#define NO_CHILDREN 0xFFFFFFFF + +struct CellChildren { + uint children[8]; +}; + +layout(set = 0, binding = 1, std430) buffer CellChildrenBuffer { + CellChildren data[]; +} +cell_children; + +struct CellData { + uint position; // xyz 10 bits + uint albedo; //rgb albedo + uint emission; //rgb normalized with e as multiplier + uint normal; //RGB normal encoded +}; + +layout(set = 0, binding = 2, std430) buffer CellDataBuffer { + CellData data[]; +} +cell_data; + +#endif // MODE DYNAMIC + +#define LIGHT_TYPE_DIRECTIONAL 0 +#define LIGHT_TYPE_OMNI 1 +#define LIGHT_TYPE_SPOT 2 + +#if defined(MODE_COMPUTE_LIGHT) || defined(MODE_DYNAMIC_LIGHTING) + +struct Light { + uint type; + float energy; + float radius; + float attenuation; + + vec3 color; + float cos_spot_angle; + + vec3 position; + float inv_spot_attenuation; + + vec3 direction; + bool has_shadow; +}; + +layout(set = 0, binding = 3, std140) uniform Lights { + Light data[MAX_LIGHTS]; +} +lights; + +#endif // MODE COMPUTE LIGHT + +#ifdef MODE_SECOND_BOUNCE + +layout(set = 0, binding = 5) uniform texture3D color_texture; + +#endif // MODE_SECOND_BOUNCE + +#ifndef MODE_DYNAMIC + +layout(push_constant, std430) uniform Params { + ivec3 limits; + uint stack_size; + + float emission_scale; + float propagation; + float dynamic_range; + + uint light_count; + uint cell_offset; + uint cell_count; + float aniso_strength; + uint pad; +} +params; + +layout(set = 0, binding = 4, std430) buffer Outputs { + vec4 data[]; +} +outputs; + +#endif // MODE DYNAMIC + +layout(set = 0, binding = 9) uniform texture3D texture_sdf; +layout(set = 0, binding = 10) uniform sampler texture_sampler; + +#ifdef MODE_WRITE_TEXTURE + +layout(rgba8, set = 0, binding = 5) uniform restrict writeonly image3D color_tex; + +#endif + +#ifdef MODE_DYNAMIC + +layout(push_constant, std430) uniform Params { + ivec3 limits; + uint light_count; //when not lighting + ivec3 x_dir; + float z_base; + ivec3 y_dir; + float z_sign; + ivec3 z_dir; + float pos_multiplier; + ivec2 rect_pos; + ivec2 rect_size; + ivec2 prev_rect_ofs; + ivec2 prev_rect_size; + bool flip_x; + bool flip_y; + float dynamic_range; + bool on_mipmap; + float propagation; + float pad[3]; +} +params; + +#ifdef MODE_DYNAMIC_LIGHTING + +layout(rgba8, set = 0, binding = 5) uniform restrict readonly image2D source_albedo; +layout(rgba8, set = 0, binding = 6) uniform restrict readonly image2D source_normal; +layout(rgba8, set = 0, binding = 7) uniform restrict readonly image2D source_orm; +//layout (set=0,binding=8) uniform texture2D source_depth; +layout(rgba16f, set = 0, binding = 11) uniform restrict image2D emission; +layout(r32f, set = 0, binding = 12) uniform restrict image2D depth; + +#endif + +#ifdef MODE_DYNAMIC_SHRINK + +layout(rgba16f, set = 0, binding = 5) uniform restrict readonly image2D source_light; +layout(r32f, set = 0, binding = 6) uniform restrict readonly image2D source_depth; + +#ifdef MODE_DYNAMIC_SHRINK_WRITE + +layout(rgba16f, set = 0, binding = 7) uniform restrict writeonly image2D light; +layout(r32f, set = 0, binding = 8) uniform restrict writeonly image2D depth; + +#endif // MODE_DYNAMIC_SHRINK_WRITE + +#ifdef MODE_DYNAMIC_SHRINK_PLOT + +layout(rgba8, set = 0, binding = 11) uniform restrict image3D color_texture; + +#endif //MODE_DYNAMIC_SHRINK_PLOT + +#endif // MODE_DYNAMIC_SHRINK + +//layout (rgba8,set=0,binding=5) uniform restrict writeonly image3D color_tex; + +#endif // MODE DYNAMIC + +#if defined(MODE_COMPUTE_LIGHT) || defined(MODE_DYNAMIC_LIGHTING) + +float raymarch(float distance, float distance_adv, vec3 from, vec3 direction) { + vec3 cell_size = 1.0 / vec3(params.limits); + float occlusion = 1.0; + while (distance > 0.5) { //use this to avoid precision errors + float advance = texture(sampler3D(texture_sdf, texture_sampler), from * cell_size).r * 255.0 - 1.0; + if (advance < 0.0) { + occlusion = 0.0; + break; + } + + occlusion = min(advance, occlusion); + + advance = max(distance_adv, advance - mod(advance, distance_adv)); //should always advance in multiples of distance_adv + + from += direction * advance; + distance -= advance; + } + + return occlusion; //max(0.0,distance); +} + +float get_omni_attenuation(float distance, float inv_range, float decay) { + float nd = distance * inv_range; + nd *= nd; + nd *= nd; // nd^4 + nd = max(1.0 - nd, 0.0); + nd *= nd; // nd^2 + return nd * pow(max(distance, 0.0001), -decay); +} + +bool compute_light_vector(uint light, vec3 pos, out float attenuation, out vec3 light_pos) { + if (lights.data[light].type == LIGHT_TYPE_DIRECTIONAL) { + light_pos = pos - lights.data[light].direction * length(vec3(params.limits)); + attenuation = 1.0; + + } else { + light_pos = lights.data[light].position; + float distance = length(pos - light_pos); + if (distance >= lights.data[light].radius) { + return false; + } + + attenuation = get_omni_attenuation(distance, 1.0 / lights.data[light].radius, lights.data[light].attenuation); + + if (lights.data[light].type == LIGHT_TYPE_SPOT) { + vec3 rel = normalize(pos - light_pos); + float cos_spot_angle = lights.data[light].cos_spot_angle; + float cos_angle = dot(rel, lights.data[light].direction); + if (cos_angle < cos_spot_angle) { + return false; + } + + float scos = max(cos_angle, cos_spot_angle); + float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cos_spot_angle)); + attenuation *= 1.0 - pow(spot_rim, lights.data[light].inv_spot_attenuation); + } + } + + return true; +} + +float get_normal_advance(vec3 p_normal) { + vec3 normal = p_normal; + vec3 unorm = abs(normal); + + if ((unorm.x >= unorm.y) && (unorm.x >= unorm.z)) { + // x code + unorm = normal.x > 0.0 ? vec3(1.0, 0.0, 0.0) : vec3(-1.0, 0.0, 0.0); + } else if ((unorm.y > unorm.x) && (unorm.y >= unorm.z)) { + // y code + unorm = normal.y > 0.0 ? vec3(0.0, 1.0, 0.0) : vec3(0.0, -1.0, 0.0); + } else if ((unorm.z > unorm.x) && (unorm.z > unorm.y)) { + // z code + unorm = normal.z > 0.0 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 0.0, -1.0); + } else { + // oh-no we messed up code + // has to be + unorm = vec3(1.0, 0.0, 0.0); + } + + return 1.0 / dot(normal, unorm); +} + +void clip_segment(vec4 plane, vec3 begin, inout vec3 end) { + vec3 segment = begin - end; + float den = dot(plane.xyz, segment); + + //printf("den is %i\n",den); + if (den < 0.0001) { + return; + } + + float dist = (dot(plane.xyz, begin) - plane.w) / den; + + if (dist < 0.0001 || dist > 1.0001) { + return; + } + + end = begin + segment * -dist; +} + +bool compute_light_at_pos(uint index, vec3 pos, vec3 normal, inout vec3 light, inout vec3 light_dir) { + float attenuation; + vec3 light_pos; + + if (!compute_light_vector(index, pos, attenuation, light_pos)) { + return false; + } + + light_dir = normalize(pos - light_pos); + + if (attenuation < 0.01 || (length(normal) > 0.2 && dot(normal, light_dir) >= 0)) { + return false; //not facing the light, or attenuation is near zero + } + + if (lights.data[index].has_shadow) { + float distance_adv = get_normal_advance(light_dir); + + vec3 to = pos; + if (length(normal) > 0.2) { + to += normal * distance_adv * 0.51; + } else { + to -= sign(light_dir) * 0.45; //go near the edge towards the light direction to avoid self occlusion + } + + //clip + clip_segment(mix(vec4(-1.0, 0.0, 0.0, 0.0), vec4(1.0, 0.0, 0.0, float(params.limits.x - 1)), bvec4(light_dir.x < 0.0)), to, light_pos); + clip_segment(mix(vec4(0.0, -1.0, 0.0, 0.0), vec4(0.0, 1.0, 0.0, float(params.limits.y - 1)), bvec4(light_dir.y < 0.0)), to, light_pos); + clip_segment(mix(vec4(0.0, 0.0, -1.0, 0.0), vec4(0.0, 0.0, 1.0, float(params.limits.z - 1)), bvec4(light_dir.z < 0.0)), to, light_pos); + + float distance = length(to - light_pos); + if (distance < 0.1) { + return false; // hit + } + + distance += distance_adv - mod(distance, distance_adv); //make it reach the center of the box always + light_pos = to - light_dir * distance; + + //from -= sign(light_dir)*0.45; //go near the edge towards the light direction to avoid self occlusion + + /*float dist = raymarch(distance,distance_adv,light_pos,light_dir); + + if (dist > distance_adv) { + return false; + } + + attenuation *= 1.0 - smoothstep(0.1*distance_adv,distance_adv,dist); + */ + + float occlusion = raymarch(distance, distance_adv, light_pos, light_dir); + + if (occlusion == 0.0) { + return false; + } + + attenuation *= occlusion; //1.0 - smoothstep(0.1*distance_adv,distance_adv,dist); + } + + light = lights.data[index].color * attenuation * lights.data[index].energy; + return true; +} + +#endif // MODE COMPUTE LIGHT + +void main() { +#ifndef MODE_DYNAMIC + + uint cell_index = gl_GlobalInvocationID.x; + if (cell_index >= params.cell_count) { + return; + } + cell_index += params.cell_offset; + + uvec3 posu = uvec3(cell_data.data[cell_index].position & 0x7FF, (cell_data.data[cell_index].position >> 11) & 0x3FF, cell_data.data[cell_index].position >> 21); + vec4 albedo = unpackUnorm4x8(cell_data.data[cell_index].albedo); + +#endif + + /////////////////COMPUTE LIGHT/////////////////////////////// + +#ifdef MODE_COMPUTE_LIGHT + + vec3 pos = vec3(posu) + vec3(0.5); + + vec3 emission = vec3(uvec3(cell_data.data[cell_index].emission & 0x1ff, (cell_data.data[cell_index].emission >> 9) & 0x1ff, (cell_data.data[cell_index].emission >> 18) & 0x1ff)) * pow(2.0, float(cell_data.data[cell_index].emission >> 27) - 15.0 - 9.0); + vec3 normal = unpackSnorm4x8(cell_data.data[cell_index].normal).xyz; + + vec3 accum = vec3(0.0); + + for (uint i = 0; i < params.light_count; i++) { + vec3 light; + vec3 light_dir; + if (!compute_light_at_pos(i, pos, normal.xyz, light, light_dir)) { + continue; + } + + light *= albedo.rgb; + + if (length(normal) > 0.2) { + accum += max(0.0, dot(normal, -light_dir)) * light; + } else { + //all directions + accum += light; + } + } + + outputs.data[cell_index] = vec4(accum + emission, 0.0); + +#endif //MODE_COMPUTE_LIGHT + + /////////////////SECOND BOUNCE/////////////////////////////// + +#ifdef MODE_SECOND_BOUNCE + vec3 pos = vec3(posu) + vec3(0.5); + ivec3 ipos = ivec3(posu); + vec4 normal = unpackSnorm4x8(cell_data.data[cell_index].normal); + + vec3 accum = outputs.data[cell_index].rgb; + + if (length(normal.xyz) > 0.2) { + vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0); + vec3 tangent = normalize(cross(v0, normal.xyz)); + vec3 bitangent = normalize(cross(tangent, normal.xyz)); + mat3 normal_mat = mat3(tangent, bitangent, normal.xyz); + +#define MAX_CONE_DIRS 6 + + vec3 cone_dirs[MAX_CONE_DIRS] = vec3[]( + vec3(0.0, 0.0, 1.0), + vec3(0.866025, 0.0, 0.5), + vec3(0.267617, 0.823639, 0.5), + vec3(-0.700629, 0.509037, 0.5), + vec3(-0.700629, -0.509037, 0.5), + vec3(0.267617, -0.823639, 0.5)); + + float cone_weights[MAX_CONE_DIRS] = float[](0.25, 0.15, 0.15, 0.15, 0.15, 0.15); + float tan_half_angle = 0.577; + + for (int i = 0; i < MAX_CONE_DIRS; i++) { + vec3 direction = normal_mat * cone_dirs[i]; + vec4 color = vec4(0.0); + { + float dist = 1.5; + float max_distance = length(vec3(params.limits)); + vec3 cell_size = 1.0 / vec3(params.limits); + + while (dist < max_distance && color.a < 0.95) { + float diameter = max(1.0, 2.0 * tan_half_angle * dist); + vec3 uvw_pos = (pos + dist * direction) * cell_size; + float half_diameter = diameter * 0.5; + //check if outside, then break + //if ( any(greaterThan(abs(uvw_pos - 0.5),vec3(0.5f + half_diameter * cell_size)) ) ) { + // break; + //} + + float log2_diameter = log2(diameter); + vec4 scolor = textureLod(sampler3D(color_texture, texture_sampler), uvw_pos, log2_diameter); + float a = (1.0 - color.a); + color += a * scolor; + dist += half_diameter; + } + } + color *= cone_weights[i] * vec4(albedo.rgb, 1.0) * params.dynamic_range; //restore range + accum += color.rgb; + } + } + + outputs.data[cell_index] = vec4(accum, 0.0); + +#endif // MODE_SECOND_BOUNCE + + /////////////////UPDATE MIPMAPS/////////////////////////////// + +#ifdef MODE_UPDATE_MIPMAPS + + { + vec3 light_accum = vec3(0.0); + float count = 0.0; + for (uint i = 0; i < 8; i++) { + uint child_index = cell_children.data[cell_index].children[i]; + if (child_index == NO_CHILDREN) { + continue; + } + light_accum += outputs.data[child_index].rgb; + + count += 1.0; + } + + float divisor = mix(8.0, count, params.propagation); + outputs.data[cell_index] = vec4(light_accum / divisor, 0.0); + } +#endif + + ///////////////////WRITE TEXTURE///////////////////////////// + +#ifdef MODE_WRITE_TEXTURE + { + imageStore(color_tex, ivec3(posu), vec4(outputs.data[cell_index].rgb / params.dynamic_range, albedo.a)); + } +#endif + + ///////////////////DYNAMIC LIGHTING///////////////////////////// + +#ifdef MODE_DYNAMIC + + ivec2 pos_xy = ivec2(gl_GlobalInvocationID.xy); + if (any(greaterThanEqual(pos_xy, params.rect_size))) { + return; //out of bounds + } + + ivec2 uv_xy = pos_xy; + if (params.flip_x) { + uv_xy.x = params.rect_size.x - pos_xy.x - 1; + } + if (params.flip_y) { + uv_xy.y = params.rect_size.y - pos_xy.y - 1; + } + +#ifdef MODE_DYNAMIC_LIGHTING + + { + float z = params.z_base + imageLoad(depth, uv_xy).x * params.z_sign; + + ivec3 pos = params.x_dir * (params.rect_pos.x + pos_xy.x) + params.y_dir * (params.rect_pos.y + pos_xy.y) + abs(params.z_dir) * int(z); + + vec3 normal = imageLoad(source_normal, uv_xy).xyz * 2.0 - 1.0; + normal = vec3(params.x_dir) * normal.x * mix(1.0, -1.0, params.flip_x) + vec3(params.y_dir) * normal.y * mix(1.0, -1.0, params.flip_y) - vec3(params.z_dir) * normal.z; + + vec4 albedo = imageLoad(source_albedo, uv_xy); + + //determine the position in space + + vec3 accum = vec3(0.0); + for (uint i = 0; i < params.light_count; i++) { + vec3 light; + vec3 light_dir; + if (!compute_light_at_pos(i, vec3(pos) * params.pos_multiplier, normal, light, light_dir)) { + continue; + } + + light *= albedo.rgb; + + accum += max(0.0, dot(normal, -light_dir)) * light; + } + + accum += imageLoad(emission, uv_xy).xyz; + + imageStore(emission, uv_xy, vec4(accum, albedo.a)); + imageStore(depth, uv_xy, vec4(z)); + } + +#endif // MODE DYNAMIC LIGHTING + +#ifdef MODE_DYNAMIC_SHRINK + + { + vec4 accum = vec4(0.0); + float accum_z = 0.0; + float count = 0.0; + + for (int i = 0; i < 4; i++) { + ivec2 ofs = pos_xy * 2 + ivec2(i & 1, i >> 1) - params.prev_rect_ofs; + if (any(lessThan(ofs, ivec2(0))) || any(greaterThanEqual(ofs, params.prev_rect_size))) { + continue; + } + if (params.flip_x) { + ofs.x = params.prev_rect_size.x - ofs.x - 1; + } + if (params.flip_y) { + ofs.y = params.prev_rect_size.y - ofs.y - 1; + } + + vec4 light = imageLoad(source_light, ofs); + if (light.a == 0.0) { //ignore empty + continue; + } + accum += light; + float z = imageLoad(source_depth, ofs).x; + accum_z += z * 0.5; //shrink half too + count += 1.0; + } + + if (params.on_mipmap) { + accum.rgb /= mix(8.0, count, params.propagation); + accum.a /= 8.0; + } else { + accum /= 4.0; + } + + if (count == 0.0) { + accum_z = 0.0; //avoid nan + } else { + accum_z /= count; + } + +#ifdef MODE_DYNAMIC_SHRINK_WRITE + + imageStore(light, uv_xy, accum); + imageStore(depth, uv_xy, vec4(accum_z)); +#endif + +#ifdef MODE_DYNAMIC_SHRINK_PLOT + + if (accum.a < 0.001) { + return; //do not blit if alpha is too low + } + + ivec3 pos = params.x_dir * (params.rect_pos.x + pos_xy.x) + params.y_dir * (params.rect_pos.y + pos_xy.y) + abs(params.z_dir) * int(accum_z); + + float z_frac = fract(accum_z); + + for (int i = 0; i < 2; i++) { + ivec3 pos3d = pos + abs(params.z_dir) * i; + if (any(lessThan(pos3d, ivec3(0))) || any(greaterThanEqual(pos3d, params.limits))) { + //skip if offlimits + continue; + } + vec4 color_blit = accum * (i == 0 ? 1.0 - z_frac : z_frac); + vec4 color = imageLoad(color_texture, pos3d); + color.rgb *= params.dynamic_range; + +#if 0 + color.rgb = mix(color.rgb,color_blit.rgb,color_blit.a); + color.a+=color_blit.a; +#else + + float sa = 1.0 - color_blit.a; + vec4 result; + result.a = color.a * sa + color_blit.a; + if (result.a == 0.0) { + result = vec4(0.0); + } else { + result.rgb = (color.rgb * color.a * sa + color_blit.rgb * color_blit.a) / result.a; + color = result; + } + +#endif + color.rgb /= params.dynamic_range; + imageStore(color_texture, pos3d, color); + //imageStore(color_texture,pos3d,vec4(1,1,1,1)); + } +#endif // MODE_DYNAMIC_SHRINK_PLOT + } +#endif + +#endif // MODE DYNAMIC +} diff --git a/servers/rendering/renderer_rd/shaders/environment/voxel_gi_debug.glsl b/servers/rendering/renderer_rd/shaders/environment/voxel_gi_debug.glsl new file mode 100644 index 0000000000..fd7a2bf8ad --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/environment/voxel_gi_debug.glsl @@ -0,0 +1,168 @@ +#[vertex] + +#version 450 + +#VERSION_DEFINES + +struct CellData { + uint position; // xyz 10 bits + uint albedo; //rgb albedo + uint emission; //rgb normalized with e as multiplier + uint normal; //RGB normal encoded +}; + +layout(set = 0, binding = 1, std140) buffer CellDataBuffer { + CellData data[]; +} +cell_data; + +layout(set = 0, binding = 2) uniform texture3D color_tex; + +layout(set = 0, binding = 3) uniform sampler tex_sampler; + +layout(push_constant, std430) uniform Params { + mat4 projection; + uint cell_offset; + float dynamic_range; + float alpha; + uint level; + ivec3 bounds; + uint pad; +} +params; + +layout(location = 0) out vec4 color_interp; + +void main() { + const vec3 cube_triangles[36] = vec3[]( + vec3(-1.0f, -1.0f, -1.0f), + vec3(-1.0f, -1.0f, 1.0f), + vec3(-1.0f, 1.0f, 1.0f), + vec3(1.0f, 1.0f, -1.0f), + vec3(-1.0f, -1.0f, -1.0f), + vec3(-1.0f, 1.0f, -1.0f), + vec3(1.0f, -1.0f, 1.0f), + vec3(-1.0f, -1.0f, -1.0f), + vec3(1.0f, -1.0f, -1.0f), + vec3(1.0f, 1.0f, -1.0f), + vec3(1.0f, -1.0f, -1.0f), + vec3(-1.0f, -1.0f, -1.0f), + vec3(-1.0f, -1.0f, -1.0f), + vec3(-1.0f, 1.0f, 1.0f), + vec3(-1.0f, 1.0f, -1.0f), + vec3(1.0f, -1.0f, 1.0f), + vec3(-1.0f, -1.0f, 1.0f), + vec3(-1.0f, -1.0f, -1.0f), + vec3(-1.0f, 1.0f, 1.0f), + vec3(-1.0f, -1.0f, 1.0f), + vec3(1.0f, -1.0f, 1.0f), + vec3(1.0f, 1.0f, 1.0f), + vec3(1.0f, -1.0f, -1.0f), + vec3(1.0f, 1.0f, -1.0f), + vec3(1.0f, -1.0f, -1.0f), + vec3(1.0f, 1.0f, 1.0f), + vec3(1.0f, -1.0f, 1.0f), + vec3(1.0f, 1.0f, 1.0f), + vec3(1.0f, 1.0f, -1.0f), + vec3(-1.0f, 1.0f, -1.0f), + vec3(1.0f, 1.0f, 1.0f), + vec3(-1.0f, 1.0f, -1.0f), + vec3(-1.0f, 1.0f, 1.0f), + vec3(1.0f, 1.0f, 1.0f), + vec3(-1.0f, 1.0f, 1.0f), + vec3(1.0f, -1.0f, 1.0f)); + + vec3 vertex = cube_triangles[gl_VertexIndex] * 0.5 + 0.5; +#ifdef MODE_DEBUG_LIGHT_FULL + uvec3 posu = uvec3(gl_InstanceIndex % params.bounds.x, (gl_InstanceIndex / params.bounds.x) % params.bounds.y, gl_InstanceIndex / (params.bounds.y * params.bounds.x)); +#else + uint cell_index = gl_InstanceIndex + params.cell_offset; + + uvec3 posu = uvec3(cell_data.data[cell_index].position & 0x7FF, (cell_data.data[cell_index].position >> 11) & 0x3FF, cell_data.data[cell_index].position >> 21); +#endif + +#ifdef MODE_DEBUG_EMISSION + color_interp.xyz = vec3(uvec3(cell_data.data[cell_index].emission & 0x1ff, (cell_data.data[cell_index].emission >> 9) & 0x1ff, (cell_data.data[cell_index].emission >> 18) & 0x1ff)) * pow(2.0, float(cell_data.data[cell_index].emission >> 27) - 15.0 - 9.0); +#endif + +#ifdef MODE_DEBUG_COLOR + color_interp.xyz = unpackUnorm4x8(cell_data.data[cell_index].albedo).xyz; +#endif + +#ifdef MODE_DEBUG_LIGHT + color_interp = texelFetch(sampler3D(color_tex, tex_sampler), ivec3(posu), int(params.level)); + color_interp.xyz *params.dynamic_range; +#endif + + float scale = (1 << params.level); + + gl_Position = params.projection * vec4((vec3(posu) + vertex) * scale, 1.0); + +#ifdef MODE_DEBUG_LIGHT_FULL + if (color_interp.a == 0.0) { + gl_Position = vec4(0.0); //force clip and not draw + } +#else + color_interp.a = params.alpha; +#endif +} + +#[fragment] + +#version 450 + +#VERSION_DEFINES + +layout(location = 0) in vec4 color_interp; +layout(location = 0) out vec4 frag_color; + +void main() { + frag_color = color_interp; + +#ifdef MODE_DEBUG_LIGHT_FULL + + //there really is no alpha, so use dither + + int x = int(gl_FragCoord.x) % 4; + int y = int(gl_FragCoord.y) % 4; + int index = x + y * 4; + float limit = 0.0; + if (x < 8) { + if (index == 0) + limit = 0.0625; + if (index == 1) + limit = 0.5625; + if (index == 2) + limit = 0.1875; + if (index == 3) + limit = 0.6875; + if (index == 4) + limit = 0.8125; + if (index == 5) + limit = 0.3125; + if (index == 6) + limit = 0.9375; + if (index == 7) + limit = 0.4375; + if (index == 8) + limit = 0.25; + if (index == 9) + limit = 0.75; + if (index == 10) + limit = 0.125; + if (index == 11) + limit = 0.625; + if (index == 12) + limit = 1.0; + if (index == 13) + limit = 0.5; + if (index == 14) + limit = 0.875; + if (index == 15) + limit = 0.375; + } + if (frag_color.a < limit) { + discard; + } +#endif +} diff --git a/servers/rendering/renderer_rd/shaders/environment/voxel_gi_sdf.glsl b/servers/rendering/renderer_rd/shaders/environment/voxel_gi_sdf.glsl new file mode 100644 index 0000000000..47a611a543 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/environment/voxel_gi_sdf.glsl @@ -0,0 +1,180 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; + +#define MAX_DISTANCE 100000.0 + +#define NO_CHILDREN 0xFFFFFFFF + +struct CellChildren { + uint children[8]; +}; + +layout(set = 0, binding = 1, std430) buffer CellChildrenBuffer { + CellChildren data[]; +} +cell_children; + +struct CellData { + uint position; // xyz 10 bits + uint albedo; //rgb albedo + uint emission; //rgb normalized with e as multiplier + uint normal; //RGB normal encoded +}; + +layout(set = 0, binding = 2, std430) buffer CellDataBuffer { + CellData data[]; +} +cell_data; + +layout(r8ui, set = 0, binding = 3) uniform restrict writeonly uimage3D sdf_tex; + +layout(push_constant, std430) uniform Params { + uint offset; + uint end; + uint pad0; + uint pad1; +} +params; + +void main() { + vec3 pos = vec3(gl_GlobalInvocationID); + float closest_dist = MAX_DISTANCE; + + for (uint i = params.offset; i < params.end; i++) { + vec3 posu = vec3(uvec3(cell_data.data[i].position & 0x7FF, (cell_data.data[i].position >> 11) & 0x3FF, cell_data.data[i].position >> 21)); + float dist = length(pos - posu); + if (dist < closest_dist) { + closest_dist = dist; + } + } + + uint dist_8; + + if (closest_dist < 0.0001) { // same cell + dist_8 = 0; //equals to -1 + } else { + dist_8 = clamp(uint(closest_dist), 0, 254) + 1; //conservative, 0 is 1, so <1 is considered solid + } + + imageStore(sdf_tex, ivec3(gl_GlobalInvocationID), uvec4(dist_8)); + //imageStore(sdf_tex,pos,uvec4(pos*2,0)); +} + +#if 0 +layout(push_constant, std430) uniform Params { + ivec3 limits; + uint stack_size; +} +params; + +float distance_to_aabb(ivec3 pos, ivec3 aabb_pos, ivec3 aabb_size) { + vec3 delta = vec3(max(ivec3(0), max(aabb_pos - pos, pos - (aabb_pos + aabb_size - ivec3(1))))); + return length(delta); +} + +void main() { + ivec3 pos = ivec3(gl_GlobalInvocationID); + + uint stack[10] = uint[](0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + uint stack_indices[10] = uint[](0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + ivec3 stack_positions[10] = ivec3[](ivec3(0), ivec3(0), ivec3(0), ivec3(0), ivec3(0), ivec3(0), ivec3(0), ivec3(0), ivec3(0), ivec3(0)); + + const uint cell_orders[8] = uint[]( + 0x11f58d1, + 0xe2e70a, + 0xd47463, + 0xbb829c, + 0x8d11f5, + 0x70ae2e, + 0x463d47, + 0x29cbb8); + + bool cell_found = false; + bool cell_found_exact = false; + ivec3 closest_cell_pos; + float closest_distance = MAX_DISTANCE; + int stack_pos = 0; + + while (true) { + uint index = stack_indices[stack_pos] >> 24; + + if (index == 8) { + //go up + if (stack_pos == 0) { + break; //done going through octree + } + stack_pos--; + continue; + } + + stack_indices[stack_pos] = (stack_indices[stack_pos] & ((1 << 24) - 1)) | ((index + 1) << 24); + + uint cell_index = (stack_indices[stack_pos] >> (index * 3)) & 0x7; + uint child_cell = cell_children.data[stack[stack_pos]].children[cell_index]; + + if (child_cell == NO_CHILDREN) { + continue; + } + + ivec3 child_cell_size = params.limits >> (stack_pos + 1); + ivec3 child_cell_pos = stack_positions[stack_pos]; + + child_cell_pos += mix(ivec3(0), child_cell_size, bvec3(uvec3(index & 1, index & 2, index & 4) != uvec3(0))); + + bool is_leaf = stack_pos == (params.stack_size - 2); + + if (child_cell_pos == pos && is_leaf) { + //we may actually end up in the exact cell. + //if this happens, just abort + cell_found_exact = true; + break; + } + + if (cell_found) { + //discard by distance + float distance = distance_to_aabb(pos, child_cell_pos, child_cell_size); + if (distance >= closest_distance) { + continue; //pointless, just test next child + } else if (is_leaf) { + //closer than what we have AND end of stack, save and continue + closest_cell_pos = child_cell_pos; + closest_distance = distance; + continue; + } + } else if (is_leaf) { + //first solid cell we find, save and continue + closest_distance = distance_to_aabb(pos, child_cell_pos, child_cell_size); + closest_cell_pos = child_cell_pos; + cell_found = true; + continue; + } + + bvec3 direction = greaterThan((pos - (child_cell_pos + (child_cell_size >> 1))), ivec3(0)); + uint cell_order = 0; + cell_order |= mix(0, 1, direction.x); + cell_order |= mix(0, 2, direction.y); + cell_order |= mix(0, 4, direction.z); + + stack[stack_pos + 1] = child_cell; + stack_indices[stack_pos + 1] = cell_orders[cell_order]; //start counting + stack_positions[stack_pos + 1] = child_cell_pos; + stack_pos++; //go up stack + } + + uint dist_8; + + if (cell_found_exact) { + dist_8 = 0; //equals to -1 + } else { + float closest_distance = length(vec3(pos - closest_cell_pos)); + dist_8 = clamp(uint(closest_distance), 0, 254) + 1; //conservative, 0 is 1, so <1 is considered solid + } + + imageStore(sdf_tex, pos, uvec4(dist_8)); +} +#endif diff --git a/servers/rendering/renderer_rd/shaders/gi.glsl b/servers/rendering/renderer_rd/shaders/gi.glsl deleted file mode 100644 index 0c7f08813b..0000000000 --- a/servers/rendering/renderer_rd/shaders/gi.glsl +++ /dev/null @@ -1,650 +0,0 @@ -#[compute] - -#version 450 - -#VERSION_DEFINES - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -#define M_PI 3.141592 - -#define SDFGI_MAX_CASCADES 8 - -//set 0 for SDFGI and render buffers - -layout(set = 0, binding = 1) uniform texture3D sdf_cascades[SDFGI_MAX_CASCADES]; -layout(set = 0, binding = 2) uniform texture3D light_cascades[SDFGI_MAX_CASCADES]; -layout(set = 0, binding = 3) uniform texture3D aniso0_cascades[SDFGI_MAX_CASCADES]; -layout(set = 0, binding = 4) uniform texture3D aniso1_cascades[SDFGI_MAX_CASCADES]; -layout(set = 0, binding = 5) uniform texture3D occlusion_texture; - -layout(set = 0, binding = 6) uniform sampler linear_sampler; -layout(set = 0, binding = 7) uniform sampler linear_sampler_with_mipmaps; - -struct ProbeCascadeData { - vec3 position; - float to_probe; - ivec3 probe_world_offset; - float to_cell; // 1/bounds * grid_size -}; - -layout(rgba16f, set = 0, binding = 9) uniform restrict writeonly image2D ambient_buffer; -layout(rgba16f, set = 0, binding = 10) uniform restrict writeonly image2D reflection_buffer; - -layout(set = 0, binding = 11) uniform texture2DArray lightprobe_texture; - -layout(set = 0, binding = 12) uniform texture2D depth_buffer; -layout(set = 0, binding = 13) uniform texture2D normal_roughness_buffer; -layout(set = 0, binding = 14) uniform utexture2D voxel_gi_buffer; - -layout(set = 0, binding = 15, std140) uniform SDFGI { - vec3 grid_size; - uint max_cascades; - - bool use_occlusion; - int probe_axis_size; - float probe_to_uvw; - float normal_bias; - - vec3 lightprobe_tex_pixel_size; - float energy; - - vec3 lightprobe_uv_offset; - float y_mult; - - vec3 occlusion_clamp; - uint pad3; - - vec3 occlusion_renormalize; - uint pad4; - - vec3 cascade_probe_size; - uint pad5; - - ProbeCascadeData cascades[SDFGI_MAX_CASCADES]; -} -sdfgi; - -#define MAX_VOXEL_GI_INSTANCES 8 - -struct VoxelGIData { - mat4 xform; // 64 - 64 - - vec3 bounds; // 12 - 76 - float dynamic_range; // 4 - 80 - - float bias; // 4 - 84 - float normal_bias; // 4 - 88 - bool blend_ambient; // 4 - 92 - uint mipmaps; // 4 - 96 -}; - -layout(set = 0, binding = 16, std140) uniform VoxelGIs { - VoxelGIData data[MAX_VOXEL_GI_INSTANCES]; -} -voxel_gi_instances; - -layout(set = 0, binding = 17) uniform texture3D voxel_gi_textures[MAX_VOXEL_GI_INSTANCES]; - -layout(push_constant, std430) uniform Params { - ivec2 screen_size; - float z_near; - float z_far; - - vec4 proj_info; - - uint max_voxel_gi_instances; - bool high_quality_vct; - bool orthogonal; - uint pad; - - mat3x4 cam_rotation; -} -params; - -vec2 octahedron_wrap(vec2 v) { - vec2 signVal; - signVal.x = v.x >= 0.0 ? 1.0 : -1.0; - signVal.y = v.y >= 0.0 ? 1.0 : -1.0; - return (1.0 - abs(v.yx)) * signVal; -} - -vec2 octahedron_encode(vec3 n) { - // https://twitter.com/Stubbesaurus/status/937994790553227264 - n /= (abs(n.x) + abs(n.y) + abs(n.z)); - n.xy = n.z >= 0.0 ? n.xy : octahedron_wrap(n.xy); - n.xy = n.xy * 0.5 + 0.5; - return n.xy; -} - -vec4 blend_color(vec4 src, vec4 dst) { - vec4 res; - float sa = 1.0 - src.a; - res.a = dst.a * sa + src.a; - if (res.a == 0.0) { - res.rgb = vec3(0); - } else { - res.rgb = (dst.rgb * dst.a * sa + src.rgb * src.a) / res.a; - } - return res; -} - -vec3 reconstruct_position(ivec2 screen_pos) { - vec3 pos; - pos.z = texelFetch(sampler2D(depth_buffer, linear_sampler), screen_pos, 0).r; - - pos.z = pos.z * 2.0 - 1.0; - if (params.orthogonal) { - pos.z = ((pos.z + (params.z_far + params.z_near) / (params.z_far - params.z_near)) * (params.z_far - params.z_near)) / 2.0; - } else { - pos.z = 2.0 * params.z_near * params.z_far / (params.z_far + params.z_near - pos.z * (params.z_far - params.z_near)); - } - pos.z = -pos.z; - - pos.xy = vec2(screen_pos) * params.proj_info.xy + params.proj_info.zw; - if (!params.orthogonal) { - pos.xy *= pos.z; - } - - return pos; -} - -void sdfvoxel_gi_process(uint cascade, vec3 cascade_pos, vec3 cam_pos, vec3 cam_normal, vec3 cam_specular_normal, float roughness, out vec3 diffuse_light, out vec3 specular_light) { - cascade_pos += cam_normal * sdfgi.normal_bias; - - vec3 base_pos = floor(cascade_pos); - //cascade_pos += mix(vec3(0.0),vec3(0.01),lessThan(abs(cascade_pos-base_pos),vec3(0.01))) * cam_normal; - ivec3 probe_base_pos = ivec3(base_pos); - - vec4 diffuse_accum = vec4(0.0); - vec3 specular_accum; - - ivec3 tex_pos = ivec3(probe_base_pos.xy, int(cascade)); - tex_pos.x += probe_base_pos.z * sdfgi.probe_axis_size; - tex_pos.xy = tex_pos.xy * (SDFGI_OCT_SIZE + 2) + ivec2(1); - - vec3 diffuse_posf = (vec3(tex_pos) + vec3(octahedron_encode(cam_normal) * float(SDFGI_OCT_SIZE), 0.0)) * sdfgi.lightprobe_tex_pixel_size; - - vec3 specular_posf = (vec3(tex_pos) + vec3(octahedron_encode(cam_specular_normal) * float(SDFGI_OCT_SIZE), 0.0)) * sdfgi.lightprobe_tex_pixel_size; - - specular_accum = vec3(0.0); - - vec4 light_accum = vec4(0.0); - float weight_accum = 0.0; - - for (uint j = 0; j < 8; j++) { - ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1); - ivec3 probe_posi = probe_base_pos; - probe_posi += offset; - - // Compute weight - - vec3 probe_pos = vec3(probe_posi); - vec3 probe_to_pos = cascade_pos - probe_pos; - vec3 probe_dir = normalize(-probe_to_pos); - - vec3 trilinear = vec3(1.0) - abs(probe_to_pos); - float weight = trilinear.x * trilinear.y * trilinear.z * max(0.005, dot(cam_normal, probe_dir)); - - // Compute lightprobe occlusion - - if (sdfgi.use_occlusion) { - ivec3 occ_indexv = abs((sdfgi.cascades[cascade].probe_world_offset + probe_posi) & ivec3(1, 1, 1)) * ivec3(1, 2, 4); - vec4 occ_mask = mix(vec4(0.0), vec4(1.0), equal(ivec4(occ_indexv.x | occ_indexv.y), ivec4(0, 1, 2, 3))); - - vec3 occ_pos = clamp(cascade_pos, probe_pos - sdfgi.occlusion_clamp, probe_pos + sdfgi.occlusion_clamp) * sdfgi.probe_to_uvw; - occ_pos.z += float(cascade); - if (occ_indexv.z != 0) { //z bit is on, means index is >=4, so make it switch to the other half of textures - occ_pos.x += 1.0; - } - - occ_pos *= sdfgi.occlusion_renormalize; - float occlusion = dot(textureLod(sampler3D(occlusion_texture, linear_sampler), occ_pos, 0.0), occ_mask); - - weight *= max(occlusion, 0.01); - } - - // Compute lightprobe texture position - - vec3 diffuse; - vec3 pos_uvw = diffuse_posf; - pos_uvw.xy += vec2(offset.xy) * sdfgi.lightprobe_uv_offset.xy; - pos_uvw.x += float(offset.z) * sdfgi.lightprobe_uv_offset.z; - diffuse = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0).rgb; - - diffuse_accum += vec4(diffuse * weight, weight); - - { - vec3 specular = vec3(0.0); - vec3 pos_uvw = specular_posf; - pos_uvw.xy += vec2(offset.xy) * sdfgi.lightprobe_uv_offset.xy; - pos_uvw.x += float(offset.z) * sdfgi.lightprobe_uv_offset.z; - if (roughness < 0.99) { - specular = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw + vec3(0, 0, float(sdfgi.max_cascades)), 0.0).rgb; - } - if (roughness > 0.2) { - specular = mix(specular, textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0).rgb, (roughness - 0.2) * 1.25); - } - - specular_accum += specular * weight; - } - } - - if (diffuse_accum.a > 0.0) { - diffuse_accum.rgb /= diffuse_accum.a; - } - - diffuse_light = diffuse_accum.rgb; - - if (diffuse_accum.a > 0.0) { - specular_accum /= diffuse_accum.a; - } - - specular_light = specular_accum; -} - -void sdfgi_process(vec3 vertex, vec3 normal, vec3 reflection, float roughness, out vec4 ambient_light, out vec4 reflection_light) { - //make vertex orientation the world one, but still align to camera - vertex.y *= sdfgi.y_mult; - normal.y *= sdfgi.y_mult; - reflection.y *= sdfgi.y_mult; - - //renormalize - normal = normalize(normal); - reflection = normalize(reflection); - - vec3 cam_pos = vertex; - vec3 cam_normal = normal; - - vec4 light_accum = vec4(0.0); - float weight_accum = 0.0; - - vec4 light_blend_accum = vec4(0.0); - float weight_blend_accum = 0.0; - - float blend = -1.0; - - // helper constants, compute once - - uint cascade = 0xFFFFFFFF; - vec3 cascade_pos; - vec3 cascade_normal; - - for (uint i = 0; i < sdfgi.max_cascades; i++) { - cascade_pos = (cam_pos - sdfgi.cascades[i].position) * sdfgi.cascades[i].to_probe; - - if (any(lessThan(cascade_pos, vec3(0.0))) || any(greaterThanEqual(cascade_pos, sdfgi.cascade_probe_size))) { - continue; //skip cascade - } - - cascade = i; - break; - } - - if (cascade < SDFGI_MAX_CASCADES) { - ambient_light = vec4(0, 0, 0, 1); - reflection_light = vec4(0, 0, 0, 1); - - float blend; - vec3 diffuse, specular; - sdfvoxel_gi_process(cascade, cascade_pos, cam_pos, cam_normal, reflection, roughness, diffuse, specular); - - { - //process blend - float blend_from = (float(sdfgi.probe_axis_size - 1) / 2.0) - 2.5; - float blend_to = blend_from + 2.0; - - vec3 inner_pos = cam_pos * sdfgi.cascades[cascade].to_probe; - - float len = length(inner_pos); - - inner_pos = abs(normalize(inner_pos)); - len *= max(inner_pos.x, max(inner_pos.y, inner_pos.z)); - - if (len >= blend_from) { - blend = smoothstep(blend_from, blend_to, len); - } else { - blend = 0.0; - } - } - - if (blend > 0.0) { - //blend - if (cascade == sdfgi.max_cascades - 1) { - ambient_light.a = 1.0 - blend; - reflection_light.a = 1.0 - blend; - - } else { - vec3 diffuse2, specular2; - cascade_pos = (cam_pos - sdfgi.cascades[cascade + 1].position) * sdfgi.cascades[cascade + 1].to_probe; - sdfvoxel_gi_process(cascade + 1, cascade_pos, cam_pos, cam_normal, reflection, roughness, diffuse2, specular2); - diffuse = mix(diffuse, diffuse2, blend); - specular = mix(specular, specular2, blend); - } - } - - ambient_light.rgb = diffuse; - - if (roughness < 0.2) { - vec3 pos_to_uvw = 1.0 / sdfgi.grid_size; - vec4 light_accum = vec4(0.0); - - float blend_size = (sdfgi.grid_size.x / float(sdfgi.probe_axis_size - 1)) * 0.5; - - float radius_sizes[SDFGI_MAX_CASCADES]; - cascade = 0xFFFF; - - float base_distance = length(cam_pos); - for (uint i = 0; i < sdfgi.max_cascades; i++) { - radius_sizes[i] = (1.0 / sdfgi.cascades[i].to_cell) * (sdfgi.grid_size.x * 0.5 - blend_size); - if (cascade == 0xFFFF && base_distance < radius_sizes[i]) { - cascade = i; - } - } - - cascade = min(cascade, sdfgi.max_cascades - 1); - - float max_distance = radius_sizes[sdfgi.max_cascades - 1]; - vec3 ray_pos = cam_pos; - vec3 ray_dir = reflection; - - { - float prev_radius = cascade > 0 ? radius_sizes[cascade - 1] : 0.0; - float base_blend = (base_distance - prev_radius) / (radius_sizes[cascade] - prev_radius); - float bias = (1.0 + base_blend) * 1.1; - vec3 abs_ray_dir = abs(ray_dir); - //ray_pos += ray_dir * (bias / sdfgi.cascades[cascade].to_cell); //bias to avoid self occlusion - ray_pos += (ray_dir * 1.0 / max(abs_ray_dir.x, max(abs_ray_dir.y, abs_ray_dir.z)) + cam_normal * 1.4) * bias / sdfgi.cascades[cascade].to_cell; - } - float softness = 0.2 + min(1.0, roughness * 5.0) * 4.0; //approximation to roughness so it does not seem like a hard fade - uint i = 0; - bool found = false; - while (true) { - if (length(ray_pos) >= max_distance || light_accum.a > 0.99) { - break; - } - if (!found && i >= cascade && length(ray_pos) < radius_sizes[i]) { - uint next_i = min(i + 1, sdfgi.max_cascades - 1); - cascade = max(i, cascade); //never go down - - vec3 pos = ray_pos - sdfgi.cascades[i].position; - pos *= sdfgi.cascades[i].to_cell * pos_to_uvw; - - float fdistance = textureLod(sampler3D(sdf_cascades[i], linear_sampler), pos, 0.0).r * 255.0 - 1.1; - - vec4 hit_light = vec4(0.0); - if (fdistance < softness) { - hit_light.rgb = textureLod(sampler3D(light_cascades[i], linear_sampler), pos, 0.0).rgb; - hit_light.rgb *= 0.5; //approximation given value read is actually meant for anisotropy - hit_light.a = clamp(1.0 - (fdistance / softness), 0.0, 1.0); - hit_light.rgb *= hit_light.a; - } - - fdistance /= sdfgi.cascades[i].to_cell; - - if (i < (sdfgi.max_cascades - 1)) { - pos = ray_pos - sdfgi.cascades[next_i].position; - pos *= sdfgi.cascades[next_i].to_cell * pos_to_uvw; - - float fdistance2 = textureLod(sampler3D(sdf_cascades[next_i], linear_sampler), pos, 0.0).r * 255.0 - 1.1; - - vec4 hit_light2 = vec4(0.0); - if (fdistance2 < softness) { - hit_light2.rgb = textureLod(sampler3D(light_cascades[next_i], linear_sampler), pos, 0.0).rgb; - hit_light2.rgb *= 0.5; //approximation given value read is actually meant for anisotropy - hit_light2.a = clamp(1.0 - (fdistance2 / softness), 0.0, 1.0); - hit_light2.rgb *= hit_light2.a; - } - - float prev_radius = i == 0 ? 0.0 : radius_sizes[max(0, i - 1)]; - float blend = clamp((length(ray_pos) - prev_radius) / (radius_sizes[i] - prev_radius), 0.0, 1.0); - - fdistance2 /= sdfgi.cascades[next_i].to_cell; - - hit_light = mix(hit_light, hit_light2, blend); - fdistance = mix(fdistance, fdistance2, blend); - } - - light_accum += hit_light; - ray_pos += ray_dir * fdistance; - found = true; - } - i++; - if (i == sdfgi.max_cascades) { - i = 0; - found = false; - } - } - - vec3 light = light_accum.rgb / max(light_accum.a, 0.00001); - float alpha = min(1.0, light_accum.a); - - float b = min(1.0, roughness * 5.0); - - float sa = 1.0 - b; - - reflection_light.a = alpha * sa + b; - if (reflection_light.a == 0) { - specular = vec3(0.0); - } else { - specular = (light * alpha * sa + specular * b) / reflection_light.a; - } - } - - reflection_light.rgb = specular; - - ambient_light.rgb *= sdfgi.energy; - reflection_light.rgb *= sdfgi.energy; - } else { - ambient_light = vec4(0); - reflection_light = vec4(0); - } -} - -//standard voxel cone trace -vec4 voxel_cone_trace(texture3D probe, vec3 cell_size, vec3 pos, vec3 direction, float tan_half_angle, float max_distance, float p_bias) { - float dist = p_bias; - vec4 color = vec4(0.0); - - while (dist < max_distance && color.a < 0.95) { - float diameter = max(1.0, 2.0 * tan_half_angle * dist); - vec3 uvw_pos = (pos + dist * direction) * cell_size; - float half_diameter = diameter * 0.5; - //check if outside, then break - if (any(greaterThan(abs(uvw_pos - 0.5), vec3(0.5f + half_diameter * cell_size)))) { - break; - } - vec4 scolor = textureLod(sampler3D(probe, linear_sampler_with_mipmaps), uvw_pos, log2(diameter)); - float a = (1.0 - color.a); - color += a * scolor; - dist += half_diameter; - } - - return color; -} - -vec4 voxel_cone_trace_45_degrees(texture3D probe, vec3 cell_size, vec3 pos, vec3 direction, float max_distance, float p_bias) { - float dist = p_bias; - vec4 color = vec4(0.0); - float radius = max(0.5, dist); - float lod_level = log2(radius * 2.0); - - while (dist < max_distance && color.a < 0.95) { - vec3 uvw_pos = (pos + dist * direction) * cell_size; - - //check if outside, then break - if (any(greaterThan(abs(uvw_pos - 0.5), vec3(0.5f + radius * cell_size)))) { - break; - } - vec4 scolor = textureLod(sampler3D(probe, linear_sampler_with_mipmaps), uvw_pos, lod_level); - lod_level += 1.0; - - float a = (1.0 - color.a); - scolor *= a; - color += scolor; - dist += radius; - radius = max(0.5, dist); - } - return color; -} - -void voxel_gi_compute(uint index, vec3 position, vec3 normal, vec3 ref_vec, mat3 normal_xform, float roughness, inout vec4 out_spec, inout vec4 out_diff, inout float out_blend) { - position = (voxel_gi_instances.data[index].xform * vec4(position, 1.0)).xyz; - ref_vec = normalize((voxel_gi_instances.data[index].xform * vec4(ref_vec, 0.0)).xyz); - normal = normalize((voxel_gi_instances.data[index].xform * vec4(normal, 0.0)).xyz); - - position += normal * voxel_gi_instances.data[index].normal_bias; - - //this causes corrupted pixels, i have no idea why.. - if (any(bvec2(any(lessThan(position, vec3(0.0))), any(greaterThan(position, voxel_gi_instances.data[index].bounds))))) { - return; - } - - mat3 dir_xform = mat3(voxel_gi_instances.data[index].xform) * normal_xform; - - vec3 blendv = abs(position / voxel_gi_instances.data[index].bounds * 2.0 - 1.0); - float blend = clamp(1.0 - max(blendv.x, max(blendv.y, blendv.z)), 0.0, 1.0); - //float blend=1.0; - - float max_distance = length(voxel_gi_instances.data[index].bounds); - vec3 cell_size = 1.0 / voxel_gi_instances.data[index].bounds; - - //irradiance - - vec4 light = vec4(0.0); - - if (params.high_quality_vct) { - const uint cone_dir_count = 6; - vec3 cone_dirs[cone_dir_count] = vec3[]( - vec3(0.0, 0.0, 1.0), - vec3(0.866025, 0.0, 0.5), - vec3(0.267617, 0.823639, 0.5), - vec3(-0.700629, 0.509037, 0.5), - vec3(-0.700629, -0.509037, 0.5), - vec3(0.267617, -0.823639, 0.5)); - - float cone_weights[cone_dir_count] = float[](0.25, 0.15, 0.15, 0.15, 0.15, 0.15); - float cone_angle_tan = 0.577; - - for (uint i = 0; i < cone_dir_count; i++) { - vec3 dir = normalize(dir_xform * cone_dirs[i]); - light += cone_weights[i] * voxel_cone_trace(voxel_gi_textures[index], cell_size, position, dir, cone_angle_tan, max_distance, voxel_gi_instances.data[index].bias); - } - } else { - const uint cone_dir_count = 4; - vec3 cone_dirs[cone_dir_count] = vec3[]( - vec3(0.707107, 0.0, 0.707107), - vec3(0.0, 0.707107, 0.707107), - vec3(-0.707107, 0.0, 0.707107), - vec3(0.0, -0.707107, 0.707107)); - - float cone_weights[cone_dir_count] = float[](0.25, 0.25, 0.25, 0.25); - for (int i = 0; i < cone_dir_count; i++) { - vec3 dir = normalize(dir_xform * cone_dirs[i]); - light += cone_weights[i] * voxel_cone_trace_45_degrees(voxel_gi_textures[index], cell_size, position, dir, max_distance, voxel_gi_instances.data[index].bias); - } - } - - light.rgb *= voxel_gi_instances.data[index].dynamic_range; - if (!voxel_gi_instances.data[index].blend_ambient) { - light.a = 1.0; - } - - out_diff += light * blend; - - //radiance - vec4 irr_light = voxel_cone_trace(voxel_gi_textures[index], cell_size, position, ref_vec, tan(roughness * 0.5 * M_PI * 0.99), max_distance, voxel_gi_instances.data[index].bias); - irr_light.rgb *= voxel_gi_instances.data[index].dynamic_range; - if (!voxel_gi_instances.data[index].blend_ambient) { - irr_light.a = 1.0; - } - - out_spec += irr_light * blend; - - out_blend += blend; -} - -vec4 fetch_normal_and_roughness(ivec2 pos) { - vec4 normal_roughness = texelFetch(sampler2D(normal_roughness_buffer, linear_sampler), pos, 0); - - normal_roughness.xyz = normalize(normal_roughness.xyz * 2.0 - 1.0); - return normal_roughness; -} - -void process_gi(ivec2 pos, vec3 vertex, inout vec4 ambient_light, inout vec4 reflection_light) { - vec4 normal_roughness = fetch_normal_and_roughness(pos); - - vec3 normal = normal_roughness.xyz; - - if (normal.length() > 0.5) { - //valid normal, can do GI - float roughness = normal_roughness.w; - vertex = mat3(params.cam_rotation) * vertex; - normal = normalize(mat3(params.cam_rotation) * normal); - vec3 reflection = normalize(reflect(normalize(vertex), normal)); - -#ifdef USE_SDFGI - sdfgi_process(vertex, normal, reflection, roughness, ambient_light, reflection_light); -#endif - -#ifdef USE_VOXEL_GI_INSTANCES - { - uvec2 voxel_gi_tex = texelFetch(usampler2D(voxel_gi_buffer, linear_sampler), pos, 0).rg; - roughness *= roughness; - //find arbitrary tangent and bitangent, then build a matrix - vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0); - vec3 tangent = normalize(cross(v0, normal)); - vec3 bitangent = normalize(cross(tangent, normal)); - mat3 normal_mat = mat3(tangent, bitangent, normal); - - vec4 amb_accum = vec4(0.0); - vec4 spec_accum = vec4(0.0); - float blend_accum = 0.0; - - for (uint i = 0; i < params.max_voxel_gi_instances; i++) { - if (any(equal(uvec2(i), voxel_gi_tex))) { - voxel_gi_compute(i, vertex, normal, reflection, normal_mat, roughness, spec_accum, amb_accum, blend_accum); - } - } - if (blend_accum > 0.0) { - amb_accum /= blend_accum; - spec_accum /= blend_accum; - } - -#ifdef USE_SDFGI - reflection_light = blend_color(spec_accum, reflection_light); - ambient_light = blend_color(amb_accum, ambient_light); -#else - reflection_light = spec_accum; - ambient_light = amb_accum; -#endif - } -#endif - } -} - -void main() { - ivec2 pos = ivec2(gl_GlobalInvocationID.xy); - -#ifdef MODE_HALF_RES - pos <<= 1; -#endif - if (any(greaterThanEqual(pos, params.screen_size))) { //too large, do nothing - return; - } - - vec4 ambient_light = vec4(0.0); - vec4 reflection_light = vec4(0.0); - - vec3 vertex = reconstruct_position(pos); - vertex.y = -vertex.y; - - process_gi(pos, vertex, ambient_light, reflection_light); - -#ifdef MODE_HALF_RES - pos >>= 1; -#endif - - imageStore(ambient_buffer, pos, ambient_light); - imageStore(reflection_buffer, pos, reflection_light); -} diff --git a/servers/rendering/renderer_rd/shaders/resolve.glsl b/servers/rendering/renderer_rd/shaders/resolve.glsl deleted file mode 100644 index 0e086331c0..0000000000 --- a/servers/rendering/renderer_rd/shaders/resolve.glsl +++ /dev/null @@ -1,236 +0,0 @@ -#[compute] - -#version 450 - -#VERSION_DEFINES - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -#ifdef MODE_RESOLVE_DEPTH -layout(set = 0, binding = 0) uniform sampler2DMS source_depth; -layout(r32f, set = 1, binding = 0) uniform restrict writeonly image2D dest_depth; -#endif - -#ifdef MODE_RESOLVE_GI -layout(set = 0, binding = 0) uniform sampler2DMS source_depth; -layout(set = 0, binding = 1) uniform sampler2DMS source_normal_roughness; - -layout(r32f, set = 1, binding = 0) uniform restrict writeonly image2D dest_depth; -layout(rgba8, set = 1, binding = 1) uniform restrict writeonly image2D dest_normal_roughness; - -#ifdef VOXEL_GI_RESOLVE -layout(set = 2, binding = 0) uniform usampler2DMS source_voxel_gi; -layout(rg8ui, set = 3, binding = 0) uniform restrict writeonly uimage2D dest_voxel_gi; -#endif - -#endif - -layout(push_constant, std430) uniform Params { - ivec2 screen_size; - int sample_count; - uint pad; -} -params; - -void main() { - // Pixel being shaded - ivec2 pos = ivec2(gl_GlobalInvocationID.xy); - if (any(greaterThanEqual(pos, params.screen_size))) { //too large, do nothing - return; - } - -#ifdef MODE_RESOLVE_DEPTH - - float depth_avg = 0.0; - for (int i = 0; i < params.sample_count; i++) { - depth_avg += texelFetch(source_depth, pos, i).r; - } - depth_avg /= float(params.sample_count); - imageStore(dest_depth, pos, vec4(depth_avg)); - -#endif - -#ifdef MODE_RESOLVE_GI - - float best_depth = 1e20; - vec4 best_normal_roughness = vec4(0.0); -#ifdef VOXEL_GI_RESOLVE - uvec2 best_voxel_gi; -#endif - -#if 0 - - for(int i=0;i= 4) { - group1.z = texelFetch(source_depth, pos, 2).r; - group1.w = texelFetch(source_depth, pos, 3).r; - } - //8X - if (params.sample_count >= 8) { - group2.x = texelFetch(source_depth, pos, 4).r; - group2.y = texelFetch(source_depth, pos, 5).r; - group2.z = texelFetch(source_depth, pos, 6).r; - group2.w = texelFetch(source_depth, pos, 7).r; - } - //16X - if (params.sample_count >= 16) { - group3.x = texelFetch(source_depth, pos, 8).r; - group3.y = texelFetch(source_depth, pos, 9).r; - group3.z = texelFetch(source_depth, pos, 10).r; - group3.w = texelFetch(source_depth, pos, 11).r; - - group4.x = texelFetch(source_depth, pos, 12).r; - group4.y = texelFetch(source_depth, pos, 13).r; - group4.z = texelFetch(source_depth, pos, 14).r; - group4.w = texelFetch(source_depth, pos, 15).r; - } - - if (params.sample_count == 2) { - best_index = (pos.x & 1) ^ ((pos.y >> 1) & 1); //not much can be done here - } else if (params.sample_count == 4) { - vec4 freq = vec4(equal(group1, vec4(group1.x))); - freq += vec4(equal(group1, vec4(group1.y))); - freq += vec4(equal(group1, vec4(group1.z))); - freq += vec4(equal(group1, vec4(group1.w))); - - float min_f = freq.x; - best_index = 0; - if (freq.y < min_f) { - best_index = 1; - min_f = freq.y; - } - if (freq.z < min_f) { - best_index = 2; - min_f = freq.z; - } - if (freq.w < min_f) { - best_index = 3; - } - } else if (params.sample_count == 8) { - vec4 freq0 = vec4(equal(group1, vec4(group1.x))); - vec4 freq1 = vec4(equal(group2, vec4(group1.x))); - freq0 += vec4(equal(group1, vec4(group1.y))); - freq1 += vec4(equal(group2, vec4(group1.y))); - freq0 += vec4(equal(group1, vec4(group1.z))); - freq1 += vec4(equal(group2, vec4(group1.z))); - freq0 += vec4(equal(group1, vec4(group1.w))); - freq1 += vec4(equal(group2, vec4(group1.w))); - freq0 += vec4(equal(group1, vec4(group2.x))); - freq1 += vec4(equal(group2, vec4(group2.x))); - freq0 += vec4(equal(group1, vec4(group2.y))); - freq1 += vec4(equal(group2, vec4(group2.y))); - freq0 += vec4(equal(group1, vec4(group2.z))); - freq1 += vec4(equal(group2, vec4(group2.z))); - freq0 += vec4(equal(group1, vec4(group2.w))); - freq1 += vec4(equal(group2, vec4(group2.w))); - - float min_f0 = freq0.x; - int best_index0 = 0; - if (freq0.y < min_f0) { - best_index0 = 1; - min_f0 = freq0.y; - } - if (freq0.z < min_f0) { - best_index0 = 2; - min_f0 = freq0.z; - } - if (freq0.w < min_f0) { - best_index0 = 3; - min_f0 = freq0.w; - } - - float min_f1 = freq1.x; - int best_index1 = 4; - if (freq1.y < min_f1) { - best_index1 = 5; - min_f1 = freq1.y; - } - if (freq1.z < min_f1) { - best_index1 = 6; - min_f1 = freq1.z; - } - if (freq1.w < min_f1) { - best_index1 = 7; - min_f1 = freq1.w; - } - - best_index = mix(best_index0, best_index1, min_f0 < min_f1); - } - -#else - float depths[16]; - int depth_indices[16]; - int depth_amount[16]; - int depth_count = 0; - - for (int i = 0; i < params.sample_count; i++) { - float depth = texelFetch(source_depth, pos, i).r; - int depth_index = -1; - for (int j = 0; j < depth_count; j++) { - if (abs(depths[j] - depth) < 0.000001) { - depth_index = j; - break; - } - } - - if (depth_index == -1) { - depths[depth_count] = depth; - depth_indices[depth_count] = i; - depth_amount[depth_count] = 1; - depth_count += 1; - } else { - depth_amount[depth_index] += 1; - } - } - - int depth_least = 0xFFFF; - int best_index = 0; - for (int j = 0; j < depth_count; j++) { - if (depth_amount[j] < depth_least) { - best_index = depth_indices[j]; - depth_least = depth_amount[j]; - } - } -#endif - best_depth = texelFetch(source_depth, pos, best_index).r; - best_normal_roughness = texelFetch(source_normal_roughness, pos, best_index); -#ifdef VOXEL_GI_RESOLVE - best_voxel_gi = texelFetch(source_voxel_gi, pos, best_index).rg; -#endif - -#endif - - imageStore(dest_depth, pos, vec4(best_depth)); - imageStore(dest_normal_roughness, pos, vec4(best_normal_roughness)); -#ifdef VOXEL_GI_RESOLVE - imageStore(dest_voxel_gi, pos, uvec4(best_voxel_gi, 0, 0)); -#endif - -#endif -} diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl index b992e948c3..e08ccd2ecb 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl @@ -1231,12 +1231,20 @@ void fragment_shader(in SceneData scene_data) { if (scene_data.gi_upscale_for_msaa) { vec2 base_coord = screen_uv; vec2 closest_coord = base_coord; +#ifdef USE_MULTIVIEW + float closest_ang = dot(normal, textureLod(sampler2DArray(normal_roughness_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), vec3(base_coord, ViewIndex), 0.0).xyz * 2.0 - 1.0); +#else // USE_MULTIVIEW float closest_ang = dot(normal, textureLod(sampler2D(normal_roughness_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), base_coord, 0.0).xyz * 2.0 - 1.0); +#endif // USE_MULTIVIEW for (int i = 0; i < 4; i++) { const vec2 neighbours[4] = vec2[](vec2(-1, 0), vec2(1, 0), vec2(0, -1), vec2(0, 1)); vec2 neighbour_coord = base_coord + neighbours[i] * scene_data.screen_pixel_size; +#ifdef USE_MULTIVIEW + float neighbour_ang = dot(normal, textureLod(sampler2DArray(normal_roughness_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), vec3(neighbour_coord, ViewIndex), 0.0).xyz * 2.0 - 1.0); +#else // USE_MULTIVIEW float neighbour_ang = dot(normal, textureLod(sampler2D(normal_roughness_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), neighbour_coord, 0.0).xyz * 2.0 - 1.0); +#endif // USE_MULTIVIEW if (neighbour_ang > closest_ang) { closest_ang = neighbour_ang; closest_coord = neighbour_coord; @@ -1249,8 +1257,13 @@ void fragment_shader(in SceneData scene_data) { coord = screen_uv; } +#ifdef USE_MULTIVIEW + vec4 buffer_ambient = textureLod(sampler2DArray(ambient_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), vec3(coord, ViewIndex), 0.0); + vec4 buffer_reflection = textureLod(sampler2DArray(reflection_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), vec3(coord, ViewIndex), 0.0); +#else // USE_MULTIVIEW vec4 buffer_ambient = textureLod(sampler2D(ambient_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), coord, 0.0); vec4 buffer_reflection = textureLod(sampler2D(reflection_buffer, material_samplers[SAMPLER_LINEAR_CLAMP]), coord, 0.0); +#endif // USE_MULTIVIEW ambient_light = mix(ambient_light, buffer_ambient.rgb, buffer_ambient.a); specular_light = mix(specular_light, buffer_reflection.rgb, buffer_reflection.a); diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_clustered_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_clustered_inc.glsl index 32ea83397a..0c23de96c3 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_clustered_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_clustered_inc.glsl @@ -317,10 +317,16 @@ layout(r32ui, set = 1, binding = 12) uniform restrict uimage3D geom_facing_grid; layout(set = 1, binding = 9) uniform texture2D depth_buffer; layout(set = 1, binding = 10) uniform texture2D color_buffer; +#ifdef USE_MULTIVIEW +layout(set = 1, binding = 11) uniform texture2DArray normal_roughness_buffer; +layout(set = 1, binding = 13) uniform texture2DArray ambient_buffer; +layout(set = 1, binding = 14) uniform texture2DArray reflection_buffer; +#else // USE_MULTIVIEW layout(set = 1, binding = 11) uniform texture2D normal_roughness_buffer; -layout(set = 1, binding = 12) uniform texture2D ao_buffer; layout(set = 1, binding = 13) uniform texture2D ambient_buffer; layout(set = 1, binding = 14) uniform texture2D reflection_buffer; +#endif +layout(set = 1, binding = 12) uniform texture2D ao_buffer; layout(set = 1, binding = 15) uniform texture2DArray sdfgi_lightprobe_texture; layout(set = 1, binding = 16) uniform texture3D sdfgi_occlusion_cascades; diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl deleted file mode 100644 index 802a410825..0000000000 --- a/servers/rendering/renderer_rd/shaders/sdfgi_debug.glsl +++ /dev/null @@ -1,174 +0,0 @@ -#[compute] - -#version 450 - -#VERSION_DEFINES - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -#define MAX_CASCADES 8 - -layout(set = 0, binding = 1) uniform texture3D sdf_cascades[MAX_CASCADES]; -layout(set = 0, binding = 2) uniform texture3D light_cascades[MAX_CASCADES]; -layout(set = 0, binding = 3) uniform texture3D aniso0_cascades[MAX_CASCADES]; -layout(set = 0, binding = 4) uniform texture3D aniso1_cascades[MAX_CASCADES]; -layout(set = 0, binding = 5) uniform texture3D occlusion_texture; - -layout(set = 0, binding = 8) uniform sampler linear_sampler; - -struct CascadeData { - vec3 offset; //offset of (0,0,0) in world coordinates - float to_cell; // 1/bounds * grid_size - ivec3 probe_world_offset; - uint pad; -}; - -layout(set = 0, binding = 9, std140) uniform Cascades { - CascadeData data[MAX_CASCADES]; -} -cascades; - -layout(rgba16f, set = 0, binding = 10) uniform restrict writeonly image2D screen_buffer; - -layout(set = 0, binding = 11) uniform texture2DArray lightprobe_texture; - -layout(push_constant, std430) uniform Params { - vec3 grid_size; - uint max_cascades; - - ivec2 screen_size; - bool use_occlusion; - float y_mult; - - vec3 cam_extent; - int probe_axis_size; - - mat4 cam_transform; -} -params; - -vec3 linear_to_srgb(vec3 color) { - //if going to srgb, clamp from 0 to 1. - color = clamp(color, vec3(0.0), vec3(1.0)); - const vec3 a = vec3(0.055f); - return mix((vec3(1.0f) + a) * pow(color.rgb, vec3(1.0f / 2.4f)) - a, 12.92f * color.rgb, lessThan(color.rgb, vec3(0.0031308f))); -} - -vec2 octahedron_wrap(vec2 v) { - vec2 signVal; - signVal.x = v.x >= 0.0 ? 1.0 : -1.0; - signVal.y = v.y >= 0.0 ? 1.0 : -1.0; - return (1.0 - abs(v.yx)) * signVal; -} - -vec2 octahedron_encode(vec3 n) { - // https://twitter.com/Stubbesaurus/status/937994790553227264 - n /= (abs(n.x) + abs(n.y) + abs(n.z)); - n.xy = n.z >= 0.0 ? n.xy : octahedron_wrap(n.xy); - n.xy = n.xy * 0.5 + 0.5; - return n.xy; -} - -void main() { - // Pixel being shaded - ivec2 screen_pos = ivec2(gl_GlobalInvocationID.xy); - if (any(greaterThanEqual(screen_pos, params.screen_size))) { //too large, do nothing - return; - } - - vec3 ray_pos; - vec3 ray_dir; - { - ray_pos = params.cam_transform[3].xyz; - - ray_dir.xy = params.cam_extent.xy * ((vec2(screen_pos) / vec2(params.screen_size)) * 2.0 - 1.0); - ray_dir.z = params.cam_extent.z; - - ray_dir = normalize(mat3(params.cam_transform) * ray_dir); - } - - ray_pos.y *= params.y_mult; - ray_dir.y *= params.y_mult; - ray_dir = normalize(ray_dir); - - vec3 pos_to_uvw = 1.0 / params.grid_size; - - vec3 light = vec3(0.0); - float blend = 0.0; - -#if 1 - // No interpolation - - vec3 inv_dir = 1.0 / ray_dir; - - float rough = 0.5; - bool hit = false; - - for (uint i = 0; i < params.max_cascades; i++) { - //convert to local bounds - vec3 pos = ray_pos - cascades.data[i].offset; - pos *= cascades.data[i].to_cell; - - // Should never happen for debug, since we start mostly at the bounds center, - // but add anyway. - //if (any(lessThan(pos,vec3(0.0))) || any(greaterThanEqual(pos,params.grid_size))) { - // continue; //already past bounds for this cascade, goto next - //} - - //find maximum advance distance (until reaching bounds) - vec3 t0 = -pos * inv_dir; - vec3 t1 = (params.grid_size - pos) * inv_dir; - vec3 tmax = max(t0, t1); - float max_advance = min(tmax.x, min(tmax.y, tmax.z)); - - float advance = 0.0; - vec3 uvw; - hit = false; - - while (advance < max_advance) { - //read how much to advance from SDF - uvw = (pos + ray_dir * advance) * pos_to_uvw; - - float distance = texture(sampler3D(sdf_cascades[i], linear_sampler), uvw).r * 255.0 - 1.7; - - if (distance < 0.001) { - //consider hit - hit = true; - break; - } - - advance += distance; - } - - if (!hit) { - pos += ray_dir * min(advance, max_advance); - pos /= cascades.data[i].to_cell; - pos += cascades.data[i].offset; - ray_pos = pos; - continue; - } - - //compute albedo, emission and normal at hit point - - const float EPSILON = 0.001; - vec3 hit_normal = normalize(vec3( - texture(sampler3D(sdf_cascades[i], linear_sampler), uvw + vec3(EPSILON, 0.0, 0.0)).r - texture(sampler3D(sdf_cascades[i], linear_sampler), uvw - vec3(EPSILON, 0.0, 0.0)).r, - texture(sampler3D(sdf_cascades[i], linear_sampler), uvw + vec3(0.0, EPSILON, 0.0)).r - texture(sampler3D(sdf_cascades[i], linear_sampler), uvw - vec3(0.0, EPSILON, 0.0)).r, - texture(sampler3D(sdf_cascades[i], linear_sampler), uvw + vec3(0.0, 0.0, EPSILON)).r - texture(sampler3D(sdf_cascades[i], linear_sampler), uvw - vec3(0.0, 0.0, EPSILON)).r)); - - vec3 hit_light = texture(sampler3D(light_cascades[i], linear_sampler), uvw).rgb; - vec4 aniso0 = texture(sampler3D(aniso0_cascades[i], linear_sampler), uvw); - vec3 hit_aniso0 = aniso0.rgb; - vec3 hit_aniso1 = vec3(aniso0.a, texture(sampler3D(aniso1_cascades[i], linear_sampler), uvw).rg); - - hit_light *= (dot(max(vec3(0.0), (hit_normal * hit_aniso0)), vec3(1.0)) + dot(max(vec3(0.0), (-hit_normal * hit_aniso1)), vec3(1.0))); - - light = hit_light; - - break; - } - -#endif - - imageStore(screen_buffer, screen_pos, vec4(linear_to_srgb(light), 1.0)); -} diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_debug_probes.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_debug_probes.glsl deleted file mode 100644 index e0be0bca12..0000000000 --- a/servers/rendering/renderer_rd/shaders/sdfgi_debug_probes.glsl +++ /dev/null @@ -1,231 +0,0 @@ -#[vertex] - -#version 450 - -#VERSION_DEFINES - -#define MAX_CASCADES 8 - -layout(push_constant, std430) uniform Params { - mat4 projection; - - uint band_power; - uint sections_in_band; - uint band_mask; - float section_arc; - - vec3 grid_size; - uint cascade; - - uint pad; - float y_mult; - uint probe_debug_index; - int probe_axis_size; -} -params; - -// https://in4k.untergrund.net/html_articles/hugi_27_-_coding_corner_polaris_sphere_tessellation_101.htm - -vec3 get_sphere_vertex(uint p_vertex_id) { - float x_angle = float(p_vertex_id & 1u) + (p_vertex_id >> params.band_power); - - float y_angle = - float((p_vertex_id & params.band_mask) >> 1) + ((p_vertex_id >> params.band_power) * params.sections_in_band); - - x_angle *= params.section_arc * 0.5f; // remember - 180AA x rot not 360 - y_angle *= -params.section_arc; - - vec3 point = vec3(sin(x_angle) * sin(y_angle), cos(x_angle), sin(x_angle) * cos(y_angle)); - - return point; -} - -#ifdef MODE_PROBES - -layout(location = 0) out vec3 normal_interp; -layout(location = 1) out flat uint probe_index; - -#endif - -#ifdef MODE_VISIBILITY - -layout(location = 0) out float visibility; - -#endif - -struct CascadeData { - vec3 offset; //offset of (0,0,0) in world coordinates - float to_cell; // 1/bounds * grid_size - ivec3 probe_world_offset; - uint pad; -}; - -layout(set = 0, binding = 1, std140) uniform Cascades { - CascadeData data[MAX_CASCADES]; -} -cascades; - -layout(set = 0, binding = 4) uniform texture3D occlusion_texture; -layout(set = 0, binding = 3) uniform sampler linear_sampler; - -void main() { -#ifdef MODE_PROBES - probe_index = gl_InstanceIndex; - - normal_interp = get_sphere_vertex(gl_VertexIndex); - - vec3 vertex = normal_interp * 0.2; - - float probe_cell_size = float(params.grid_size / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell; - - ivec3 probe_cell; - probe_cell.x = int(probe_index % params.probe_axis_size); - probe_cell.y = int(probe_index / (params.probe_axis_size * params.probe_axis_size)); - probe_cell.z = int((probe_index / params.probe_axis_size) % params.probe_axis_size); - - vertex += (cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size) / vec3(1.0, params.y_mult, 1.0); - - gl_Position = params.projection * vec4(vertex, 1.0); -#endif - -#ifdef MODE_VISIBILITY - - int probe_index = int(params.probe_debug_index); - - vec3 vertex = get_sphere_vertex(gl_VertexIndex) * 0.01; - - float probe_cell_size = float(params.grid_size / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell; - - ivec3 probe_cell; - probe_cell.x = int(probe_index % params.probe_axis_size); - probe_cell.y = int((probe_index % (params.probe_axis_size * params.probe_axis_size)) / params.probe_axis_size); - probe_cell.z = int(probe_index / (params.probe_axis_size * params.probe_axis_size)); - - vertex += (cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size) / vec3(1.0, params.y_mult, 1.0); - - int probe_voxels = int(params.grid_size.x) / int(params.probe_axis_size - 1); - int occluder_index = int(gl_InstanceIndex); - - int diameter = probe_voxels * 2; - ivec3 occluder_pos; - occluder_pos.x = int(occluder_index % diameter); - occluder_pos.y = int(occluder_index / (diameter * diameter)); - occluder_pos.z = int((occluder_index / diameter) % diameter); - - float cell_size = 1.0 / cascades.data[params.cascade].to_cell; - - ivec3 occluder_offset = occluder_pos - ivec3(diameter / 2); - vertex += ((vec3(occluder_offset) + vec3(0.5)) * cell_size) / vec3(1.0, params.y_mult, 1.0); - - ivec3 global_cell = probe_cell + cascades.data[params.cascade].probe_world_offset; - uint occlusion_layer = 0; - if ((global_cell.x & 1) != 0) { - occlusion_layer |= 1; - } - if ((global_cell.y & 1) != 0) { - occlusion_layer |= 2; - } - if ((global_cell.z & 1) != 0) { - occlusion_layer |= 4; - } - ivec3 tex_pos = probe_cell * probe_voxels + occluder_offset; - - const vec4 layer_axis[4] = vec4[]( - vec4(1, 0, 0, 0), - vec4(0, 1, 0, 0), - vec4(0, 0, 1, 0), - vec4(0, 0, 0, 1)); - - tex_pos.z += int(params.cascade) * int(params.grid_size); - if (occlusion_layer >= 4) { - tex_pos.x += int(params.grid_size.x); - occlusion_layer &= 3; - } - - visibility = dot(texelFetch(sampler3D(occlusion_texture, linear_sampler), tex_pos, 0), layer_axis[occlusion_layer]); - - gl_Position = params.projection * vec4(vertex, 1.0); - -#endif -} - -#[fragment] - -#version 450 - -#VERSION_DEFINES - -layout(location = 0) out vec4 frag_color; - -layout(set = 0, binding = 2) uniform texture2DArray lightprobe_texture; -layout(set = 0, binding = 3) uniform sampler linear_sampler; - -layout(push_constant, std430) uniform Params { - mat4 projection; - - uint band_power; - uint sections_in_band; - uint band_mask; - float section_arc; - - vec3 grid_size; - uint cascade; - - uint pad; - float y_mult; - uint probe_debug_index; - int probe_axis_size; -} -params; - -#ifdef MODE_PROBES - -layout(location = 0) in vec3 normal_interp; -layout(location = 1) in flat uint probe_index; - -#endif - -#ifdef MODE_VISIBILITY -layout(location = 0) in float visibility; -#endif - -vec2 octahedron_wrap(vec2 v) { - vec2 signVal; - signVal.x = v.x >= 0.0 ? 1.0 : -1.0; - signVal.y = v.y >= 0.0 ? 1.0 : -1.0; - return (1.0 - abs(v.yx)) * signVal; -} - -vec2 octahedron_encode(vec3 n) { - // https://twitter.com/Stubbesaurus/status/937994790553227264 - n /= (abs(n.x) + abs(n.y) + abs(n.z)); - n.xy = n.z >= 0.0 ? n.xy : octahedron_wrap(n.xy); - n.xy = n.xy * 0.5 + 0.5; - return n.xy; -} - -void main() { -#ifdef MODE_PROBES - - ivec3 tex_pos; - tex_pos.x = int(probe_index) % params.probe_axis_size; //x - tex_pos.y = int(probe_index) / (params.probe_axis_size * params.probe_axis_size); - tex_pos.x += params.probe_axis_size * ((int(probe_index) / params.probe_axis_size) % params.probe_axis_size); //z - tex_pos.z = int(params.cascade); - - vec3 tex_pos_ofs = vec3(octahedron_encode(normal_interp) * float(OCT_SIZE), 0.0); - vec3 tex_posf = vec3(vec2(tex_pos.xy * (OCT_SIZE + 2) + ivec2(1)), float(tex_pos.z)) + tex_pos_ofs; - - tex_posf.xy /= vec2(ivec2(params.probe_axis_size * params.probe_axis_size * (OCT_SIZE + 2), params.probe_axis_size * (OCT_SIZE + 2))); - - vec4 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), tex_posf, 0.0); - - frag_color = indirect_light; - -#endif - -#ifdef MODE_VISIBILITY - - frag_color = vec4(vec3(1, visibility, visibility), 1.0); -#endif -} diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl deleted file mode 100644 index b95fad650e..0000000000 --- a/servers/rendering/renderer_rd/shaders/sdfgi_direct_light.glsl +++ /dev/null @@ -1,506 +0,0 @@ -#[compute] - -#version 450 - -#VERSION_DEFINES - -layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; - -#define MAX_CASCADES 8 - -layout(set = 0, binding = 1) uniform texture3D sdf_cascades[MAX_CASCADES]; -layout(set = 0, binding = 2) uniform sampler linear_sampler; - -layout(set = 0, binding = 3, std430) restrict readonly buffer DispatchData { - uint x; - uint y; - uint z; - uint total_count; -} -dispatch_data; - -struct ProcessVoxel { - uint position; // xyz 7 bit packed, extra 11 bits for neighbors. - uint albedo; // rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neighbors. - uint light; // rgbe8985 encoded total saved light, extra 2 bits for neighbors. - uint light_aniso; // 55555 light anisotropy, extra 2 bits for neighbors. - //total neighbours: 26 -}; - -#ifdef MODE_PROCESS_STATIC -layout(set = 0, binding = 4, std430) restrict buffer ProcessVoxels { -#else -layout(set = 0, binding = 4, std430) restrict buffer readonly ProcessVoxels { -#endif - ProcessVoxel data[]; -} -process_voxels; - -layout(r32ui, set = 0, binding = 5) uniform restrict uimage3D dst_light; -layout(rgba8, set = 0, binding = 6) uniform restrict image3D dst_aniso0; -layout(rg8, set = 0, binding = 7) uniform restrict image3D dst_aniso1; - -struct CascadeData { - vec3 offset; //offset of (0,0,0) in world coordinates - float to_cell; // 1/bounds * grid_size - ivec3 probe_world_offset; - uint pad; -}; - -layout(set = 0, binding = 8, std140) uniform Cascades { - CascadeData data[MAX_CASCADES]; -} -cascades; - -#define LIGHT_TYPE_DIRECTIONAL 0 -#define LIGHT_TYPE_OMNI 1 -#define LIGHT_TYPE_SPOT 2 - -struct Light { - vec3 color; - float energy; - - vec3 direction; - bool has_shadow; - - vec3 position; - float attenuation; - - uint type; - float cos_spot_angle; - float inv_spot_attenuation; - float radius; -}; - -layout(set = 0, binding = 9, std140) buffer restrict readonly Lights { - Light data[]; -} -lights; - -layout(set = 0, binding = 10) uniform texture2DArray lightprobe_texture; -layout(set = 0, binding = 11) uniform texture3D occlusion_texture; - -layout(push_constant, std430) uniform Params { - vec3 grid_size; - uint max_cascades; - - uint cascade; - uint light_count; - uint process_offset; - uint process_increment; - - int probe_axis_size; - float bounce_feedback; - float y_mult; - bool use_occlusion; -} -params; - -vec2 octahedron_wrap(vec2 v) { - vec2 signVal; - signVal.x = v.x >= 0.0 ? 1.0 : -1.0; - signVal.y = v.y >= 0.0 ? 1.0 : -1.0; - return (1.0 - abs(v.yx)) * signVal; -} - -vec2 octahedron_encode(vec3 n) { - // https://twitter.com/Stubbesaurus/status/937994790553227264 - n /= (abs(n.x) + abs(n.y) + abs(n.z)); - n.xy = n.z >= 0.0 ? n.xy : octahedron_wrap(n.xy); - n.xy = n.xy * 0.5 + 0.5; - return n.xy; -} - -float get_omni_attenuation(float distance, float inv_range, float decay) { - float nd = distance * inv_range; - nd *= nd; - nd *= nd; // nd^4 - nd = max(1.0 - nd, 0.0); - nd *= nd; // nd^2 - return nd * pow(max(distance, 0.0001), -decay); -} - -void main() { - uint voxel_index = uint(gl_GlobalInvocationID.x); - - //used for skipping voxels every N frames - if (params.process_increment > 1) { - voxel_index *= params.process_increment; - voxel_index += params.process_offset; - } - - if (voxel_index >= dispatch_data.total_count) { - return; - } - - uint voxel_position = process_voxels.data[voxel_index].position; - - //keep for storing to texture - ivec3 positioni = ivec3((uvec3(voxel_position, voxel_position, voxel_position) >> uvec3(0, 7, 14)) & uvec3(0x7F)); - - vec3 position = vec3(positioni) + vec3(0.5); - position /= cascades.data[params.cascade].to_cell; - position += cascades.data[params.cascade].offset; - - uint voxel_albedo = process_voxels.data[voxel_index].albedo; - - vec3 albedo = vec3(uvec3(voxel_albedo >> 10, voxel_albedo >> 5, voxel_albedo) & uvec3(0x1F)) / float(0x1F); - vec3 light_accum[6] = vec3[](vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0), vec3(0.0)); - uint valid_aniso = (voxel_albedo >> 15) & 0x3F; - - const vec3 aniso_dir[6] = vec3[]( - vec3(1, 0, 0), - vec3(0, 1, 0), - vec3(0, 0, 1), - vec3(-1, 0, 0), - vec3(0, -1, 0), - vec3(0, 0, -1)); - - // Add indirect light first, in order to save computation resources -#ifdef MODE_PROCESS_DYNAMIC - if (params.bounce_feedback > 0.001) { - vec3 feedback = (params.bounce_feedback < 1.0) ? (albedo * params.bounce_feedback) : mix(albedo, vec3(1.0), params.bounce_feedback - 1.0); - vec3 pos = (vec3(positioni) + vec3(0.5)) * float(params.probe_axis_size - 1) / params.grid_size; - ivec3 probe_base_pos = ivec3(pos); - - float weight_accum[6] = float[](0, 0, 0, 0, 0, 0); - - ivec3 tex_pos = ivec3(probe_base_pos.xy, int(params.cascade)); - tex_pos.x += probe_base_pos.z * int(params.probe_axis_size); - - tex_pos.xy = tex_pos.xy * (OCT_SIZE + 2) + ivec2(1); - - vec3 base_tex_posf = vec3(tex_pos); - vec2 tex_pixel_size = 1.0 / vec2(ivec2((OCT_SIZE + 2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE + 2) * params.probe_axis_size)); - vec3 probe_uv_offset = vec3(ivec3(OCT_SIZE + 2, OCT_SIZE + 2, (OCT_SIZE + 2) * params.probe_axis_size)) * tex_pixel_size.xyx; - - for (uint j = 0; j < 8; j++) { - ivec3 offset = (ivec3(j) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1); - ivec3 probe_posi = probe_base_pos; - probe_posi += offset; - - // Compute weight - - vec3 probe_pos = vec3(probe_posi); - vec3 probe_to_pos = pos - probe_pos; - vec3 probe_dir = normalize(-probe_to_pos); - - // Compute lightprobe texture position - - vec3 trilinear = vec3(1.0) - abs(probe_to_pos); - - for (uint k = 0; k < 6; k++) { - if (bool(valid_aniso & (1 << k))) { - vec3 n = aniso_dir[k]; - float weight = trilinear.x * trilinear.y * trilinear.z * max(0, dot(n, probe_dir)); - - if (weight > 0.0 && params.use_occlusion) { - ivec3 occ_indexv = abs((cascades.data[params.cascade].probe_world_offset + probe_posi) & ivec3(1, 1, 1)) * ivec3(1, 2, 4); - vec4 occ_mask = mix(vec4(0.0), vec4(1.0), equal(ivec4(occ_indexv.x | occ_indexv.y), ivec4(0, 1, 2, 3))); - - vec3 occ_pos = (vec3(positioni) + aniso_dir[k] + vec3(0.5)) / params.grid_size; - occ_pos.z += float(params.cascade); - if (occ_indexv.z != 0) { //z bit is on, means index is >=4, so make it switch to the other half of textures - occ_pos.x += 1.0; - } - occ_pos *= vec3(0.5, 1.0, 1.0 / float(params.max_cascades)); //renormalize - float occlusion = dot(textureLod(sampler3D(occlusion_texture, linear_sampler), occ_pos, 0.0), occ_mask); - - weight *= occlusion; - } - - if (weight > 0.0) { - vec3 tex_posf = base_tex_posf + vec3(octahedron_encode(n) * float(OCT_SIZE), 0.0); - tex_posf.xy *= tex_pixel_size; - - vec3 pos_uvw = tex_posf; - pos_uvw.xy += vec2(offset.xy) * probe_uv_offset.xy; - pos_uvw.x += float(offset.z) * probe_uv_offset.z; - vec3 indirect_light = textureLod(sampler2DArray(lightprobe_texture, linear_sampler), pos_uvw, 0.0).rgb; - - light_accum[k] += indirect_light * weight; - weight_accum[k] += weight; - } - } - } - } - - for (uint k = 0; k < 6; k++) { - if (weight_accum[k] > 0.0) { - light_accum[k] /= weight_accum[k]; - light_accum[k] *= feedback; - } - } - } - -#endif - - { - uint rgbe = process_voxels.data[voxel_index].light; - - //read rgbe8985 - float r = float((rgbe & 0xff) << 1); - float g = float((rgbe >> 8) & 0x1ff); - float b = float(((rgbe >> 17) & 0xff) << 1); - float e = float((rgbe >> 25) & 0x1F); - float m = pow(2.0, e - 15.0 - 9.0); - - vec3 l = vec3(r, g, b) * m; - - uint aniso = process_voxels.data[voxel_index].light_aniso; - for (uint i = 0; i < 6; i++) { - float strength = ((aniso >> (i * 5)) & 0x1F) / float(0x1F); - light_accum[i] += l * strength; - } - } - - // Raytrace light - - vec3 pos_to_uvw = 1.0 / params.grid_size; - vec3 uvw_ofs = pos_to_uvw * 0.5; - - for (uint i = 0; i < params.light_count; i++) { - float attenuation = 1.0; - vec3 direction; - float light_distance = 1e20; - - switch (lights.data[i].type) { - case LIGHT_TYPE_DIRECTIONAL: { - direction = -lights.data[i].direction; - } break; - case LIGHT_TYPE_OMNI: { - vec3 rel_vec = lights.data[i].position - position; - direction = normalize(rel_vec); - light_distance = length(rel_vec); - rel_vec.y /= params.y_mult; - attenuation = get_omni_attenuation(light_distance, 1.0 / lights.data[i].radius, lights.data[i].attenuation); - - } break; - case LIGHT_TYPE_SPOT: { - vec3 rel_vec = lights.data[i].position - position; - direction = normalize(rel_vec); - light_distance = length(rel_vec); - rel_vec.y /= params.y_mult; - attenuation = get_omni_attenuation(light_distance, 1.0 / lights.data[i].radius, lights.data[i].attenuation); - - float cos_spot_angle = lights.data[i].cos_spot_angle; - float cos_angle = dot(-direction, lights.data[i].direction); - - if (cos_angle < cos_spot_angle) { - continue; - } - - float scos = max(cos_angle, cos_spot_angle); - float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cos_spot_angle)); - attenuation *= 1.0 - pow(spot_rim, lights.data[i].inv_spot_attenuation); - } break; - } - - if (attenuation < 0.001) { - continue; - } - - bool hit = false; - - vec3 ray_pos = position; - vec3 ray_dir = direction; - vec3 inv_dir = 1.0 / ray_dir; - - //this is how to properly bias outgoing rays - float cell_size = 1.0 / cascades.data[params.cascade].to_cell; - ray_pos += sign(direction) * cell_size * 0.48; // go almost to the box edge but remain inside - ray_pos += ray_dir * 0.4 * cell_size; //apply a small bias from there - - for (uint j = params.cascade; j < params.max_cascades; j++) { - //convert to local bounds - vec3 pos = ray_pos - cascades.data[j].offset; - pos *= cascades.data[j].to_cell; - float local_distance = light_distance * cascades.data[j].to_cell; - - if (any(lessThan(pos, vec3(0.0))) || any(greaterThanEqual(pos, params.grid_size))) { - continue; //already past bounds for this cascade, goto next - } - - //find maximum advance distance (until reaching bounds) - vec3 t0 = -pos * inv_dir; - vec3 t1 = (params.grid_size - pos) * inv_dir; - vec3 tmax = max(t0, t1); - float max_advance = min(tmax.x, min(tmax.y, tmax.z)); - - max_advance = min(local_distance, max_advance); - - float advance = 0.0; - float occlusion = 1.0; - - while (advance < max_advance) { - //read how much to advance from SDF - vec3 uvw = (pos + ray_dir * advance) * pos_to_uvw; - - float distance = texture(sampler3D(sdf_cascades[j], linear_sampler), uvw).r * 255.0 - 1.0; - if (distance < 0.001) { - //consider hit - hit = true; - break; - } - - occlusion = min(occlusion, distance); - - advance += distance; - } - - if (hit) { - attenuation *= occlusion; - break; - } - - if (advance >= local_distance) { - break; //past light distance, abandon search - } - //change ray origin to collision with bounds - pos += ray_dir * max_advance; - pos /= cascades.data[j].to_cell; - pos += cascades.data[j].offset; - light_distance -= max_advance / cascades.data[j].to_cell; - ray_pos = pos; - } - - if (!hit) { - vec3 light = albedo * lights.data[i].color.rgb * lights.data[i].energy * attenuation; - - for (int j = 0; j < 6; j++) { - if (bool(valid_aniso & (1 << j))) { - light_accum[j] += max(0.0, dot(aniso_dir[j], direction)) * light; - } - } - } - } - - // Store the light in the light texture - - float lumas[6]; - vec3 light_total = vec3(0); - - for (int i = 0; i < 6; i++) { - light_total += light_accum[i]; - lumas[i] = max(light_accum[i].r, max(light_accum[i].g, light_accum[i].b)); - } - - float luma_total = max(light_total.r, max(light_total.g, light_total.b)); - - uint light_total_rgbe; - - { - //compress to RGBE9995 to save space - - const float pow2to9 = 512.0f; - const float B = 15.0f; - const float N = 9.0f; - const float LN2 = 0.6931471805599453094172321215; - - float cRed = clamp(light_total.r, 0.0, 65408.0); - float cGreen = clamp(light_total.g, 0.0, 65408.0); - float cBlue = clamp(light_total.b, 0.0, 65408.0); - - float cMax = max(cRed, max(cGreen, cBlue)); - - float expp = max(-B - 1.0f, floor(log(cMax) / LN2)) + 1.0f + B; - - float sMax = floor((cMax / pow(2.0f, expp - B - N)) + 0.5f); - - float exps = expp + 1.0f; - - if (0.0 <= sMax && sMax < pow2to9) { - exps = expp; - } - - float sRed = floor((cRed / pow(2.0f, exps - B - N)) + 0.5f); - float sGreen = floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f); - float sBlue = floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f); -#ifdef MODE_PROCESS_STATIC - //since its self-save, use RGBE8985 - light_total_rgbe = ((uint(sRed) & 0x1FF) >> 1) | ((uint(sGreen) & 0x1FF) << 8) | (((uint(sBlue) & 0x1FF) >> 1) << 17) | ((uint(exps) & 0x1F) << 25); - -#else - light_total_rgbe = (uint(sRed) & 0x1FF) | ((uint(sGreen) & 0x1FF) << 9) | ((uint(sBlue) & 0x1FF) << 18) | ((uint(exps) & 0x1F) << 27); -#endif - } - -#ifdef MODE_PROCESS_DYNAMIC - - vec4 aniso0; - aniso0.r = lumas[0] / luma_total; - aniso0.g = lumas[1] / luma_total; - aniso0.b = lumas[2] / luma_total; - aniso0.a = lumas[3] / luma_total; - - vec2 aniso1; - aniso1.r = lumas[4] / luma_total; - aniso1.g = lumas[5] / luma_total; - - //save to 3D textures - imageStore(dst_aniso0, positioni, aniso0); - imageStore(dst_aniso1, positioni, vec4(aniso1, 0.0, 0.0)); - imageStore(dst_light, positioni, uvec4(light_total_rgbe)); - - //also fill neighbours, so light interpolation during the indirect pass works - - //recover the neighbour list from the leftover bits - uint neighbours = (voxel_albedo >> 21) | ((voxel_position >> 21) << 11) | ((process_voxels.data[voxel_index].light >> 30) << 22) | ((process_voxels.data[voxel_index].light_aniso >> 30) << 24); - - const uint max_neighbours = 26; - const ivec3 neighbour_positions[max_neighbours] = ivec3[]( - ivec3(-1, -1, -1), - ivec3(-1, -1, 0), - ivec3(-1, -1, 1), - ivec3(-1, 0, -1), - ivec3(-1, 0, 0), - ivec3(-1, 0, 1), - ivec3(-1, 1, -1), - ivec3(-1, 1, 0), - ivec3(-1, 1, 1), - ivec3(0, -1, -1), - ivec3(0, -1, 0), - ivec3(0, -1, 1), - ivec3(0, 0, -1), - ivec3(0, 0, 1), - ivec3(0, 1, -1), - ivec3(0, 1, 0), - ivec3(0, 1, 1), - ivec3(1, -1, -1), - ivec3(1, -1, 0), - ivec3(1, -1, 1), - ivec3(1, 0, -1), - ivec3(1, 0, 0), - ivec3(1, 0, 1), - ivec3(1, 1, -1), - ivec3(1, 1, 0), - ivec3(1, 1, 1)); - - for (uint i = 0; i < max_neighbours; i++) { - if (bool(neighbours & (1 << i))) { - ivec3 neighbour_pos = positioni + neighbour_positions[i]; - imageStore(dst_light, neighbour_pos, uvec4(light_total_rgbe)); - imageStore(dst_aniso0, neighbour_pos, aniso0); - imageStore(dst_aniso1, neighbour_pos, vec4(aniso1, 0.0, 0.0)); - } - } - -#endif - -#ifdef MODE_PROCESS_STATIC - - //save back the anisotropic - - uint light = process_voxels.data[voxel_index].light & (3 << 30); - light |= light_total_rgbe; - process_voxels.data[voxel_index].light = light; //replace - - uint light_aniso = process_voxels.data[voxel_index].light_aniso & (3 << 30); - for (int i = 0; i < 6; i++) { - light_aniso |= min(31, uint((lumas[i] / luma_total) * 31.0)) << (i * 5); - } - - process_voxels.data[voxel_index].light_aniso = light_aniso; - -#endif -} diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl deleted file mode 100644 index 9c03297f5c..0000000000 --- a/servers/rendering/renderer_rd/shaders/sdfgi_integrate.glsl +++ /dev/null @@ -1,612 +0,0 @@ -#[compute] - -#version 450 - -#VERSION_DEFINES - -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -#define MAX_CASCADES 8 - -layout(set = 0, binding = 1) uniform texture3D sdf_cascades[MAX_CASCADES]; -layout(set = 0, binding = 2) uniform texture3D light_cascades[MAX_CASCADES]; -layout(set = 0, binding = 3) uniform texture3D aniso0_cascades[MAX_CASCADES]; -layout(set = 0, binding = 4) uniform texture3D aniso1_cascades[MAX_CASCADES]; - -layout(set = 0, binding = 6) uniform sampler linear_sampler; - -struct CascadeData { - vec3 offset; //offset of (0,0,0) in world coordinates - float to_cell; // 1/bounds * grid_size - ivec3 probe_world_offset; - uint pad; -}; - -layout(set = 0, binding = 7, std140) uniform Cascades { - CascadeData data[MAX_CASCADES]; -} -cascades; - -layout(r32ui, set = 0, binding = 8) uniform restrict uimage2DArray lightprobe_texture_data; -layout(rgba16i, set = 0, binding = 9) uniform restrict iimage2DArray lightprobe_history_texture; -layout(rgba32i, set = 0, binding = 10) uniform restrict iimage2D lightprobe_average_texture; - -//used for scrolling -layout(rgba16i, set = 0, binding = 11) uniform restrict iimage2DArray lightprobe_history_scroll_texture; -layout(rgba32i, set = 0, binding = 12) uniform restrict iimage2D lightprobe_average_scroll_texture; - -layout(rgba32i, set = 0, binding = 13) uniform restrict iimage2D lightprobe_average_parent_texture; - -layout(rgba16f, set = 0, binding = 14) uniform restrict writeonly image2DArray lightprobe_ambient_texture; - -#ifdef USE_CUBEMAP_ARRAY -layout(set = 1, binding = 0) uniform textureCubeArray sky_irradiance; -#else -layout(set = 1, binding = 0) uniform textureCube sky_irradiance; -#endif -layout(set = 1, binding = 1) uniform sampler linear_sampler_mipmaps; - -#define HISTORY_BITS 10 - -#define SKY_MODE_DISABLED 0 -#define SKY_MODE_COLOR 1 -#define SKY_MODE_SKY 2 - -layout(push_constant, std430) uniform Params { - vec3 grid_size; - uint max_cascades; - - uint probe_axis_size; - uint cascade; - uint history_index; - uint history_size; - - uint ray_count; - float ray_bias; - ivec2 image_size; - - ivec3 world_offset; - uint sky_mode; - - ivec3 scroll; - float sky_energy; - - vec3 sky_color; - float y_mult; - - bool store_ambient_texture; - uint pad[3]; -} -params; - -const float PI = 3.14159265f; -const float GOLDEN_ANGLE = PI * (3.0 - sqrt(5.0)); - -vec3 vogel_hemisphere(uint p_index, uint p_count, float p_offset) { - float r = sqrt(float(p_index) + 0.5f) / sqrt(float(p_count)); - float theta = float(p_index) * GOLDEN_ANGLE + p_offset; - float y = cos(r * PI * 0.5); - float l = sin(r * PI * 0.5); - return vec3(l * cos(theta), l * sin(theta), y * (float(p_index & 1) * 2.0 - 1.0)); -} - -uvec3 hash3(uvec3 x) { - x = ((x >> 16) ^ x) * 0x45d9f3b; - x = ((x >> 16) ^ x) * 0x45d9f3b; - x = (x >> 16) ^ x; - return x; -} - -float hashf3(vec3 co) { - return fract(sin(dot(co, vec3(12.9898, 78.233, 137.13451))) * 43758.5453); -} - -vec3 octahedron_encode(vec2 f) { - // https://twitter.com/Stubbesaurus/status/937994790553227264 - f = f * 2.0 - 1.0; - vec3 n = vec3(f.x, f.y, 1.0f - abs(f.x) - abs(f.y)); - float t = clamp(-n.z, 0.0, 1.0); - n.x += n.x >= 0 ? -t : t; - n.y += n.y >= 0 ? -t : t; - return normalize(n); -} - -uint rgbe_encode(vec3 color) { - const float pow2to9 = 512.0f; - const float B = 15.0f; - const float N = 9.0f; - const float LN2 = 0.6931471805599453094172321215; - - float cRed = clamp(color.r, 0.0, 65408.0); - float cGreen = clamp(color.g, 0.0, 65408.0); - float cBlue = clamp(color.b, 0.0, 65408.0); - - float cMax = max(cRed, max(cGreen, cBlue)); - - float expp = max(-B - 1.0f, floor(log(cMax) / LN2)) + 1.0f + B; - - float sMax = floor((cMax / pow(2.0f, expp - B - N)) + 0.5f); - - float exps = expp + 1.0f; - - if (0.0 <= sMax && sMax < pow2to9) { - exps = expp; - } - - float sRed = floor((cRed / pow(2.0f, exps - B - N)) + 0.5f); - float sGreen = floor((cGreen / pow(2.0f, exps - B - N)) + 0.5f); - float sBlue = floor((cBlue / pow(2.0f, exps - B - N)) + 0.5f); - return (uint(sRed) & 0x1FF) | ((uint(sGreen) & 0x1FF) << 9) | ((uint(sBlue) & 0x1FF) << 18) | ((uint(exps) & 0x1F) << 27); -} - -struct SH { -#if (SH_SIZE == 16) - float c[48]; -#else - float c[28]; -#endif -}; - -shared SH sh_accum[64]; //8x8 - -void main() { - ivec2 pos = ivec2(gl_GlobalInvocationID.xy); - if (any(greaterThanEqual(pos, params.image_size))) { //too large, do nothing - return; - } - - uint probe_index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * 8; - -#ifdef MODE_PROCESS - - float probe_cell_size = float(params.grid_size.x / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell; - - ivec3 probe_cell; - probe_cell.x = pos.x % int(params.probe_axis_size); - probe_cell.y = pos.y; - probe_cell.z = pos.x / int(params.probe_axis_size); - - vec3 probe_pos = cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size; - vec3 pos_to_uvw = 1.0 / params.grid_size; - - for (uint i = 0; i < SH_SIZE * 3; i++) { - sh_accum[probe_index].c[i] = 0.0; - } - - // quickly ensure each probe has a different "offset" for the vogel function, based on integer world position - uvec3 h3 = hash3(uvec3(params.world_offset + probe_cell)); - float offset = hashf3(vec3(h3 & uvec3(0xFFFFF))); - - //for a more homogeneous hemisphere, alternate based on history frames - uint ray_offset = params.history_index; - uint ray_mult = params.history_size; - uint ray_total = ray_mult * params.ray_count; - - for (uint i = 0; i < params.ray_count; i++) { - vec3 ray_dir = vogel_hemisphere(ray_offset + i * ray_mult, ray_total, offset); - ray_dir.y *= params.y_mult; - ray_dir = normalize(ray_dir); - - //needs to be visible - vec3 ray_pos = probe_pos; - vec3 inv_dir = 1.0 / ray_dir; - - bool hit = false; - uint hit_cascade; - - float bias = params.ray_bias; - vec3 abs_ray_dir = abs(ray_dir); - ray_pos += ray_dir * 1.0 / max(abs_ray_dir.x, max(abs_ray_dir.y, abs_ray_dir.z)) * bias / cascades.data[params.cascade].to_cell; - vec3 uvw; - - for (uint j = params.cascade; j < params.max_cascades; j++) { - //convert to local bounds - vec3 pos = ray_pos - cascades.data[j].offset; - pos *= cascades.data[j].to_cell; - - if (any(lessThan(pos, vec3(0.0))) || any(greaterThanEqual(pos, params.grid_size))) { - continue; //already past bounds for this cascade, goto next - } - - //find maximum advance distance (until reaching bounds) - vec3 t0 = -pos * inv_dir; - vec3 t1 = (params.grid_size - pos) * inv_dir; - vec3 tmax = max(t0, t1); - float max_advance = min(tmax.x, min(tmax.y, tmax.z)); - - float advance = 0.0; - - while (advance < max_advance) { - //read how much to advance from SDF - uvw = (pos + ray_dir * advance) * pos_to_uvw; - - float distance = texture(sampler3D(sdf_cascades[j], linear_sampler), uvw).r * 255.0 - 1.0; - if (distance < 0.05) { - //consider hit - hit = true; - break; - } - - advance += distance; - } - - if (hit) { - hit_cascade = j; - break; - } - - //change ray origin to collision with bounds - pos += ray_dir * max_advance; - pos /= cascades.data[j].to_cell; - pos += cascades.data[j].offset; - ray_pos = pos; - } - - vec4 light; - if (hit) { - //avoid reading different texture from different threads - for (uint j = params.cascade; j < params.max_cascades; j++) { - if (j == hit_cascade) { - const float EPSILON = 0.001; - vec3 hit_normal = normalize(vec3( - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw + vec3(EPSILON, 0.0, 0.0)).r - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw - vec3(EPSILON, 0.0, 0.0)).r, - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw + vec3(0.0, EPSILON, 0.0)).r - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw - vec3(0.0, EPSILON, 0.0)).r, - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw + vec3(0.0, 0.0, EPSILON)).r - texture(sampler3D(sdf_cascades[hit_cascade], linear_sampler), uvw - vec3(0.0, 0.0, EPSILON)).r)); - - vec3 hit_light = texture(sampler3D(light_cascades[hit_cascade], linear_sampler), uvw).rgb; - vec4 aniso0 = texture(sampler3D(aniso0_cascades[hit_cascade], linear_sampler), uvw); - vec3 hit_aniso0 = aniso0.rgb; - vec3 hit_aniso1 = vec3(aniso0.a, texture(sampler3D(aniso1_cascades[hit_cascade], linear_sampler), uvw).rg); - - //one liner magic - light.rgb = hit_light * (dot(max(vec3(0.0), (hit_normal * hit_aniso0)), vec3(1.0)) + dot(max(vec3(0.0), (-hit_normal * hit_aniso1)), vec3(1.0))); - light.a = 1.0; - } - } - - } else if (params.sky_mode == SKY_MODE_SKY) { -#ifdef USE_CUBEMAP_ARRAY - light.rgb = textureLod(samplerCubeArray(sky_irradiance, linear_sampler_mipmaps), vec4(ray_dir, 0.0), 2.0).rgb; // Use second mipmap because we don't usually throw a lot of rays, so this compensates. -#else - light.rgb = textureLod(samplerCube(sky_irradiance, linear_sampler_mipmaps), ray_dir, 2.0).rgb; // Use second mipmap because we don't usually throw a lot of rays, so this compensates. -#endif - light.rgb *= params.sky_energy; - light.a = 0.0; - - } else if (params.sky_mode == SKY_MODE_COLOR) { - light.rgb = params.sky_color; - light.rgb *= params.sky_energy; - light.a = 0.0; - } else { - light = vec4(0, 0, 0, 0); - } - - vec3 ray_dir2 = ray_dir * ray_dir; - -#define SH_ACCUM(m_idx, m_value) \ - { \ - vec3 l = light.rgb * (m_value); \ - sh_accum[probe_index].c[m_idx * 3 + 0] += l.r; \ - sh_accum[probe_index].c[m_idx * 3 + 1] += l.g; \ - sh_accum[probe_index].c[m_idx * 3 + 2] += l.b; \ - } - SH_ACCUM(0, 0.282095); //l0 - SH_ACCUM(1, 0.488603 * ray_dir.y); //l1n1 - SH_ACCUM(2, 0.488603 * ray_dir.z); //l1n0 - SH_ACCUM(3, 0.488603 * ray_dir.x); //l1p1 - SH_ACCUM(4, 1.092548 * ray_dir.x * ray_dir.y); //l2n2 - SH_ACCUM(5, 1.092548 * ray_dir.y * ray_dir.z); //l2n1 - SH_ACCUM(6, 0.315392 * (3.0 * ray_dir2.z - 1.0)); //l20 - SH_ACCUM(7, 1.092548 * ray_dir.x * ray_dir.z); //l2p1 - SH_ACCUM(8, 0.546274 * (ray_dir2.x - ray_dir2.y)); //l2p2 -#if (SH_SIZE == 16) - SH_ACCUM(9, 0.590043 * ray_dir.y * (3.0f * ray_dir2.x - ray_dir2.y)); - SH_ACCUM(10, 2.890611 * ray_dir.y * ray_dir.x * ray_dir.z); - SH_ACCUM(11, 0.646360 * ray_dir.y * (-1.0f + 5.0f * ray_dir2.z)); - SH_ACCUM(12, 0.373176 * (5.0f * ray_dir2.z * ray_dir.z - 3.0f * ray_dir.z)); - SH_ACCUM(13, 0.457045 * ray_dir.x * (-1.0f + 5.0f * ray_dir2.z)); - SH_ACCUM(14, 1.445305 * (ray_dir2.x - ray_dir2.y) * ray_dir.z); - SH_ACCUM(15, 0.590043 * ray_dir.x * (ray_dir2.x - 3.0f * ray_dir2.y)); - -#endif - } - - for (uint i = 0; i < SH_SIZE; i++) { - // store in history texture - ivec3 prev_pos = ivec3(pos.x, pos.y * SH_SIZE + i, int(params.history_index)); - ivec2 average_pos = prev_pos.xy; - - vec4 value = vec4(sh_accum[probe_index].c[i * 3 + 0], sh_accum[probe_index].c[i * 3 + 1], sh_accum[probe_index].c[i * 3 + 2], 1.0) * 4.0 / float(params.ray_count); - - ivec4 ivalue = clamp(ivec4(value * float(1 << HISTORY_BITS)), -32768, 32767); //clamp to 16 bits, so higher values don't break average - - ivec4 prev_value = imageLoad(lightprobe_history_texture, prev_pos); - ivec4 average = imageLoad(lightprobe_average_texture, average_pos); - - average -= prev_value; - average += ivalue; - - imageStore(lightprobe_history_texture, prev_pos, ivalue); - imageStore(lightprobe_average_texture, average_pos, average); - - if (params.store_ambient_texture && i == 0) { - ivec3 ambient_pos = ivec3(pos, int(params.cascade)); - vec4 ambient_light = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS); - ambient_light *= 0.88622; // SHL0 - imageStore(lightprobe_ambient_texture, ambient_pos, ambient_light); - } - } -#endif // MODE PROCESS - -#ifdef MODE_STORE - - // converting to octahedral in this step is required because - // octahedral is much faster to read from the screen than spherical harmonics, - // despite the very slight quality loss - - ivec2 sh_pos = (pos / OCT_SIZE) * ivec2(1, SH_SIZE); - ivec2 oct_pos = (pos / OCT_SIZE) * (OCT_SIZE + 2) + ivec2(1); - ivec2 local_pos = pos % OCT_SIZE; - - //compute the octahedral normal for this texel - vec3 normal = octahedron_encode(vec2(local_pos) / float(OCT_SIZE)); - - // read the spherical harmonic - - vec3 normal2 = normal * normal; - float c[SH_SIZE] = float[]( - - 0.282095, //l0 - 0.488603 * normal.y, //l1n1 - 0.488603 * normal.z, //l1n0 - 0.488603 * normal.x, //l1p1 - 1.092548 * normal.x * normal.y, //l2n2 - 1.092548 * normal.y * normal.z, //l2n1 - 0.315392 * (3.0 * normal2.z - 1.0), //l20 - 1.092548 * normal.x * normal.z, //l2p1 - 0.546274 * (normal2.x - normal2.y) //l2p2 -#if (SH_SIZE == 16) - , - 0.590043 * normal.y * (3.0f * normal2.x - normal2.y), - 2.890611 * normal.y * normal.x * normal.z, - 0.646360 * normal.y * (-1.0f + 5.0f * normal2.z), - 0.373176 * (5.0f * normal2.z * normal.z - 3.0f * normal.z), - 0.457045 * normal.x * (-1.0f + 5.0f * normal2.z), - 1.445305 * (normal2.x - normal2.y) * normal.z, - 0.590043 * normal.x * (normal2.x - 3.0f * normal2.y) - -#endif - ); - - const float l_mult[SH_SIZE] = float[]( - 1.0, - 2.0 / 3.0, - 2.0 / 3.0, - 2.0 / 3.0, - 1.0 / 4.0, - 1.0 / 4.0, - 1.0 / 4.0, - 1.0 / 4.0, - 1.0 / 4.0 -#if (SH_SIZE == 16) - , // l4 does not contribute to irradiance - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0 -#endif - ); - - vec3 irradiance = vec3(0.0); - vec3 radiance = vec3(0.0); - - for (uint i = 0; i < SH_SIZE; i++) { - // store in history texture - ivec2 average_pos = sh_pos + ivec2(0, i); - ivec4 average = imageLoad(lightprobe_average_texture, average_pos); - - vec4 sh = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS); - - vec3 m = sh.rgb * c[i] * 4.0; - - irradiance += m * l_mult[i]; - radiance += m; - } - - //encode RGBE9995 for the final texture - - uint irradiance_rgbe = rgbe_encode(irradiance); - uint radiance_rgbe = rgbe_encode(radiance); - - //store in octahedral map - - ivec3 texture_pos = ivec3(oct_pos, int(params.cascade)); - ivec3 copy_to[4] = ivec3[](ivec3(-2, -2, -2), ivec3(-2, -2, -2), ivec3(-2, -2, -2), ivec3(-2, -2, -2)); - copy_to[0] = texture_pos + ivec3(local_pos, 0); - - if (local_pos == ivec2(0, 0)) { - copy_to[1] = texture_pos + ivec3(OCT_SIZE - 1, -1, 0); - copy_to[2] = texture_pos + ivec3(-1, OCT_SIZE - 1, 0); - copy_to[3] = texture_pos + ivec3(OCT_SIZE, OCT_SIZE, 0); - } else if (local_pos == ivec2(OCT_SIZE - 1, 0)) { - copy_to[1] = texture_pos + ivec3(0, -1, 0); - copy_to[2] = texture_pos + ivec3(OCT_SIZE, OCT_SIZE - 1, 0); - copy_to[3] = texture_pos + ivec3(-1, OCT_SIZE, 0); - } else if (local_pos == ivec2(0, OCT_SIZE - 1)) { - copy_to[1] = texture_pos + ivec3(-1, 0, 0); - copy_to[2] = texture_pos + ivec3(OCT_SIZE - 1, OCT_SIZE, 0); - copy_to[3] = texture_pos + ivec3(OCT_SIZE, -1, 0); - } else if (local_pos == ivec2(OCT_SIZE - 1, OCT_SIZE - 1)) { - copy_to[1] = texture_pos + ivec3(0, OCT_SIZE, 0); - copy_to[2] = texture_pos + ivec3(OCT_SIZE, 0, 0); - copy_to[3] = texture_pos + ivec3(-1, -1, 0); - } else if (local_pos.y == 0) { - copy_to[1] = texture_pos + ivec3(OCT_SIZE - local_pos.x - 1, local_pos.y - 1, 0); - } else if (local_pos.x == 0) { - copy_to[1] = texture_pos + ivec3(local_pos.x - 1, OCT_SIZE - local_pos.y - 1, 0); - } else if (local_pos.y == OCT_SIZE - 1) { - copy_to[1] = texture_pos + ivec3(OCT_SIZE - local_pos.x - 1, local_pos.y + 1, 0); - } else if (local_pos.x == OCT_SIZE - 1) { - copy_to[1] = texture_pos + ivec3(local_pos.x + 1, OCT_SIZE - local_pos.y - 1, 0); - } - - for (int i = 0; i < 4; i++) { - if (copy_to[i] == ivec3(-2, -2, -2)) { - continue; - } - imageStore(lightprobe_texture_data, copy_to[i], uvec4(irradiance_rgbe)); - imageStore(lightprobe_texture_data, copy_to[i] + ivec3(0, 0, int(params.max_cascades)), uvec4(radiance_rgbe)); - } - -#endif - -#ifdef MODE_SCROLL - - ivec3 probe_cell; - probe_cell.x = pos.x % int(params.probe_axis_size); - probe_cell.y = pos.y; - probe_cell.z = pos.x / int(params.probe_axis_size); - - ivec3 read_probe = probe_cell - params.scroll; - - if (all(greaterThanEqual(read_probe, ivec3(0))) && all(lessThan(read_probe, ivec3(params.probe_axis_size)))) { - // can scroll - ivec2 tex_pos; - tex_pos = read_probe.xy; - tex_pos.x += read_probe.z * int(params.probe_axis_size); - - //scroll - for (uint j = 0; j < params.history_size; j++) { - for (int i = 0; i < SH_SIZE; i++) { - // copy from history texture - ivec3 src_pos = ivec3(tex_pos.x, tex_pos.y * SH_SIZE + i, int(j)); - ivec3 dst_pos = ivec3(pos.x, pos.y * SH_SIZE + i, int(j)); - ivec4 value = imageLoad(lightprobe_history_texture, src_pos); - imageStore(lightprobe_history_scroll_texture, dst_pos, value); - } - } - - for (int i = 0; i < SH_SIZE; i++) { - // copy from average texture - ivec2 src_pos = ivec2(tex_pos.x, tex_pos.y * SH_SIZE + i); - ivec2 dst_pos = ivec2(pos.x, pos.y * SH_SIZE + i); - ivec4 value = imageLoad(lightprobe_average_texture, src_pos); - imageStore(lightprobe_average_scroll_texture, dst_pos, value); - } - } else if (params.cascade < params.max_cascades - 1) { - //can't scroll, must look for position in parent cascade - - //to global coords - float cell_to_probe = float(params.grid_size.x / float(params.probe_axis_size - 1)); - - float probe_cell_size = cell_to_probe / cascades.data[params.cascade].to_cell; - vec3 probe_pos = cascades.data[params.cascade].offset + vec3(probe_cell) * probe_cell_size; - - //to parent local coords - float probe_cell_size_next = cell_to_probe / cascades.data[params.cascade + 1].to_cell; - probe_pos -= cascades.data[params.cascade + 1].offset; - probe_pos /= probe_cell_size_next; - - ivec3 probe_posi = ivec3(probe_pos); - //add up all light, no need to use occlusion here, since occlusion will do its work afterwards - - vec4 average_light[SH_SIZE] = vec4[](vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0) -#if (SH_SIZE == 16) - , - vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0), vec4(0) -#endif - ); - float total_weight = 0.0; - - for (int i = 0; i < 8; i++) { - ivec3 offset = probe_posi + ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1)); - - vec3 trilinear = vec3(1.0) - abs(probe_pos - vec3(offset)); - float weight = trilinear.x * trilinear.y * trilinear.z; - - ivec2 tex_pos; - tex_pos = offset.xy; - tex_pos.x += offset.z * int(params.probe_axis_size); - - for (int j = 0; j < SH_SIZE; j++) { - // copy from history texture - ivec2 src_pos = ivec2(tex_pos.x, tex_pos.y * SH_SIZE + j); - ivec4 average = imageLoad(lightprobe_average_parent_texture, src_pos); - vec4 value = (vec4(average) / float(params.history_size)) / float(1 << HISTORY_BITS); - average_light[j] += value * weight; - } - - total_weight += weight; - } - - if (total_weight > 0.0) { - total_weight = 1.0 / total_weight; - } - //store the averaged values everywhere - - for (int i = 0; i < SH_SIZE; i++) { - ivec4 ivalue = clamp(ivec4(average_light[i] * total_weight * float(1 << HISTORY_BITS)), ivec4(-32768), ivec4(32767)); //clamp to 16 bits, so higher values don't break average - // copy from history texture - ivec3 dst_pos = ivec3(pos.x, pos.y * SH_SIZE + i, 0); - for (uint j = 0; j < params.history_size; j++) { - dst_pos.z = int(j); - imageStore(lightprobe_history_scroll_texture, dst_pos, ivalue); - } - - ivalue *= int(params.history_size); //average needs to have all history added up - imageStore(lightprobe_average_scroll_texture, dst_pos.xy, ivalue); - } - - } else { - //scroll at the edge of the highest cascade, just copy what is there, - //since its the closest we have anyway - - for (uint j = 0; j < params.history_size; j++) { - ivec2 tex_pos; - tex_pos = probe_cell.xy; - tex_pos.x += probe_cell.z * int(params.probe_axis_size); - - for (int i = 0; i < SH_SIZE; i++) { - // copy from history texture - ivec3 src_pos = ivec3(tex_pos.x, tex_pos.y * SH_SIZE + i, int(j)); - ivec3 dst_pos = ivec3(pos.x, pos.y * SH_SIZE + i, int(j)); - ivec4 value = imageLoad(lightprobe_history_texture, dst_pos); - imageStore(lightprobe_history_scroll_texture, dst_pos, value); - } - } - - for (int i = 0; i < SH_SIZE; i++) { - // copy from average texture - ivec2 spos = ivec2(pos.x, pos.y * SH_SIZE + i); - ivec4 average = imageLoad(lightprobe_average_texture, spos); - imageStore(lightprobe_average_scroll_texture, spos, average); - } - } - -#endif - -#ifdef MODE_SCROLL_STORE - - //do not update probe texture, as these will be updated later - - for (uint j = 0; j < params.history_size; j++) { - for (int i = 0; i < SH_SIZE; i++) { - // copy from history texture - ivec3 spos = ivec3(pos.x, pos.y * SH_SIZE + i, int(j)); - ivec4 value = imageLoad(lightprobe_history_scroll_texture, spos); - imageStore(lightprobe_history_texture, spos, value); - } - } - - for (int i = 0; i < SH_SIZE; i++) { - // copy from average texture - ivec2 spos = ivec2(pos.x, pos.y * SH_SIZE + i); - ivec4 average = imageLoad(lightprobe_average_scroll_texture, spos); - imageStore(lightprobe_average_texture, spos, average); - } - -#endif -} diff --git a/servers/rendering/renderer_rd/shaders/sdfgi_preprocess.glsl b/servers/rendering/renderer_rd/shaders/sdfgi_preprocess.glsl deleted file mode 100644 index bce98f4054..0000000000 --- a/servers/rendering/renderer_rd/shaders/sdfgi_preprocess.glsl +++ /dev/null @@ -1,1056 +0,0 @@ -#[compute] - -#version 450 - -#VERSION_DEFINES - -#ifdef MODE_JUMPFLOOD_OPTIMIZED -#define GROUP_SIZE 8 - -layout(local_size_x = GROUP_SIZE, local_size_y = GROUP_SIZE, local_size_z = GROUP_SIZE) in; - -#elif defined(MODE_OCCLUSION) || defined(MODE_SCROLL) -//buffer layout -layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; - -#else -//grid layout -layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; - -#endif - -#if defined(MODE_INITIALIZE_JUMP_FLOOD) || defined(MODE_INITIALIZE_JUMP_FLOOD_HALF) -layout(r16ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_color; -layout(rgba8ui, set = 0, binding = 2) uniform restrict writeonly uimage3D dst_positions; -#endif - -#ifdef MODE_UPSCALE_JUMP_FLOOD -layout(r16ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_color; -layout(rgba8ui, set = 0, binding = 2) uniform restrict readonly uimage3D src_positions_half; -layout(rgba8ui, set = 0, binding = 3) uniform restrict writeonly uimage3D dst_positions; -#endif - -#if defined(MODE_JUMPFLOOD) || defined(MODE_JUMPFLOOD_OPTIMIZED) -layout(rgba8ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_positions; -layout(rgba8ui, set = 0, binding = 2) uniform restrict writeonly uimage3D dst_positions; -#endif - -#ifdef MODE_JUMPFLOOD_OPTIMIZED - -shared uvec4 group_positions[(GROUP_SIZE + 2) * (GROUP_SIZE + 2) * (GROUP_SIZE + 2)]; //4x4x4 with margins - -void group_store(ivec3 p_pos, uvec4 p_value) { - uint offset = uint(p_pos.z * (GROUP_SIZE + 2) * (GROUP_SIZE + 2) + p_pos.y * (GROUP_SIZE + 2) + p_pos.x); - group_positions[offset] = p_value; -} - -uvec4 group_load(ivec3 p_pos) { - uint offset = uint(p_pos.z * (GROUP_SIZE + 2) * (GROUP_SIZE + 2) + p_pos.y * (GROUP_SIZE + 2) + p_pos.x); - return group_positions[offset]; -} - -#endif - -#ifdef MODE_OCCLUSION - -layout(r16ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_color; -layout(r8, set = 0, binding = 2) uniform restrict image3D dst_occlusion[8]; -layout(r32ui, set = 0, binding = 3) uniform restrict readonly uimage3D src_facing; - -const uvec2 group_size_offset[11] = uvec2[](uvec2(1, 0), uvec2(3, 1), uvec2(6, 4), uvec2(10, 10), uvec2(15, 20), uvec2(21, 35), uvec2(28, 56), uvec2(36, 84), uvec2(42, 120), uvec2(46, 162), uvec2(48, 208)); -const uint group_pos[256] = uint[](0, - 65536, 256, 1, - 131072, 65792, 512, 65537, 257, 2, - 196608, 131328, 66048, 768, 131073, 65793, 513, 65538, 258, 3, - 262144, 196864, 131584, 66304, 1024, 196609, 131329, 66049, 769, 131074, 65794, 514, 65539, 259, 4, - 327680, 262400, 197120, 131840, 66560, 1280, 262145, 196865, 131585, 66305, 1025, 196610, 131330, 66050, 770, 131075, 65795, 515, 65540, 260, 5, - 393216, 327936, 262656, 197376, 132096, 66816, 1536, 327681, 262401, 197121, 131841, 66561, 1281, 262146, 196866, 131586, 66306, 1026, 196611, 131331, 66051, 771, 131076, 65796, 516, 65541, 261, 6, - 458752, 393472, 328192, 262912, 197632, 132352, 67072, 1792, 393217, 327937, 262657, 197377, 132097, 66817, 1537, 327682, 262402, 197122, 131842, 66562, 1282, 262147, 196867, 131587, 66307, 1027, 196612, 131332, 66052, 772, 131077, 65797, 517, 65542, 262, 7, - 459008, 393728, 328448, 263168, 197888, 132608, 67328, 458753, 393473, 328193, 262913, 197633, 132353, 67073, 1793, 393218, 327938, 262658, 197378, 132098, 66818, 1538, 327683, 262403, 197123, 131843, 66563, 1283, 262148, 196868, 131588, 66308, 1028, 196613, 131333, 66053, 773, 131078, 65798, 518, 65543, 263, - 459264, 393984, 328704, 263424, 198144, 132864, 459009, 393729, 328449, 263169, 197889, 132609, 67329, 458754, 393474, 328194, 262914, 197634, 132354, 67074, 1794, 393219, 327939, 262659, 197379, 132099, 66819, 1539, 327684, 262404, 197124, 131844, 66564, 1284, 262149, 196869, 131589, 66309, 1029, 196614, 131334, 66054, 774, 131079, 65799, 519, - 459520, 394240, 328960, 263680, 198400, 459265, 393985, 328705, 263425, 198145, 132865, 459010, 393730, 328450, 263170, 197890, 132610, 67330, 458755, 393475, 328195, 262915, 197635, 132355, 67075, 1795, 393220, 327940, 262660, 197380, 132100, 66820, 1540, 327685, 262405, 197125, 131845, 66565, 1285, 262150, 196870, 131590, 66310, 1030, 196615, 131335, 66055, 775); - -shared uint occlusion_facing[((OCCLUSION_SIZE * 2) * (OCCLUSION_SIZE * 2) * (OCCLUSION_SIZE * 2)) / 4]; - -uint get_facing(ivec3 p_pos) { - uint ofs = uint(p_pos.z * OCCLUSION_SIZE * 2 * OCCLUSION_SIZE * 2 + p_pos.y * OCCLUSION_SIZE * 2 + p_pos.x); - uint v = occlusion_facing[ofs / 4]; - return (v >> ((ofs % 4) * 8)) & 0xFF; -} - -#endif - -#ifdef MODE_STORE - -layout(rgba8ui, set = 0, binding = 1) uniform restrict readonly uimage3D src_positions; -layout(r16ui, set = 0, binding = 2) uniform restrict readonly uimage3D src_albedo; -layout(r8, set = 0, binding = 3) uniform restrict readonly image3D src_occlusion[8]; -layout(r32ui, set = 0, binding = 4) uniform restrict readonly uimage3D src_light; -layout(r32ui, set = 0, binding = 5) uniform restrict readonly uimage3D src_light_aniso; -layout(r32ui, set = 0, binding = 6) uniform restrict readonly uimage3D src_facing; - -layout(r8, set = 0, binding = 7) uniform restrict writeonly image3D dst_sdf; -layout(r16ui, set = 0, binding = 8) uniform restrict writeonly uimage3D dst_occlusion; - -layout(set = 0, binding = 10, std430) restrict buffer DispatchData { - uint x; - uint y; - uint z; - uint total_count; -} -dispatch_data; - -struct ProcessVoxel { - uint position; // xyz 7 bit packed, extra 11 bits for neighbors. - uint albedo; //rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neighbours - uint light; //rgbe8985 encoded total saved light, extra 2 bits for neighbours - uint light_aniso; //55555 light anisotropy, extra 2 bits for neighbours - //total neighbours: 26 -}; - -layout(set = 0, binding = 11, std430) restrict buffer writeonly ProcessVoxels { - ProcessVoxel data[]; -} -dst_process_voxels; - -shared ProcessVoxel store_positions[4 * 4 * 4]; -shared uint store_position_count; -shared uint store_from_index; -#endif - -#ifdef MODE_SCROLL - -layout(r16ui, set = 0, binding = 1) uniform restrict writeonly uimage3D dst_albedo; -layout(r32ui, set = 0, binding = 2) uniform restrict writeonly uimage3D dst_facing; -layout(r32ui, set = 0, binding = 3) uniform restrict writeonly uimage3D dst_light; -layout(r32ui, set = 0, binding = 4) uniform restrict writeonly uimage3D dst_light_aniso; - -layout(set = 0, binding = 5, std430) restrict buffer readonly DispatchData { - uint x; - uint y; - uint z; - uint total_count; -} -dispatch_data; - -struct ProcessVoxel { - uint position; // xyz 7 bit packed, extra 11 bits for neighbors. - uint albedo; //rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neighbours - uint light; //rgbe8985 encoded total saved light, extra 2 bits for neighbours - uint light_aniso; //55555 light anisotropy, extra 2 bits for neighbours - //total neighbours: 26 -}; - -layout(set = 0, binding = 6, std430) restrict buffer readonly ProcessVoxels { - ProcessVoxel data[]; -} -src_process_voxels; - -#endif - -#ifdef MODE_SCROLL_OCCLUSION - -layout(r8, set = 0, binding = 1) uniform restrict image3D dst_occlusion[8]; -layout(r16ui, set = 0, binding = 2) uniform restrict readonly uimage3D src_occlusion; - -#endif - -layout(push_constant, std430) uniform Params { - ivec3 scroll; - - int grid_size; - - ivec3 probe_offset; - int step_size; - - bool half_size; - uint occlusion_index; - int cascade; - uint pad; -} -params; - -void main() { -#ifdef MODE_SCROLL - - // Pixel being shaded - int index = int(gl_GlobalInvocationID.x); - if (index >= dispatch_data.total_count) { //too big - return; - } - - ivec3 read_pos = (ivec3(src_process_voxels.data[index].position) >> ivec3(0, 7, 14)) & ivec3(0x7F); - ivec3 write_pos = read_pos + params.scroll; - - if (any(lessThan(write_pos, ivec3(0))) || any(greaterThanEqual(write_pos, ivec3(params.grid_size)))) { - return; // Fits outside the 3D texture, don't do anything. - } - - uint albedo = ((src_process_voxels.data[index].albedo & 0x7FFF) << 1) | 1; //add solid bit - imageStore(dst_albedo, write_pos, uvec4(albedo)); - - uint facing = (src_process_voxels.data[index].albedo >> 15) & 0x3F; //6 anisotropic facing bits - imageStore(dst_facing, write_pos, uvec4(facing)); - - uint light = src_process_voxels.data[index].light & 0x3fffffff; //30 bits of RGBE8985 - imageStore(dst_light, write_pos, uvec4(light)); - - uint light_aniso = src_process_voxels.data[index].light_aniso & 0x3fffffff; //30 bits of 6 anisotropic 5 bits values - imageStore(dst_light_aniso, write_pos, uvec4(light_aniso)); - -#endif - -#ifdef MODE_SCROLL_OCCLUSION - - ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); - if (any(greaterThanEqual(pos, ivec3(params.grid_size) - abs(params.scroll)))) { //too large, do nothing - return; - } - - ivec3 read_pos = pos + max(ivec3(0), -params.scroll); - ivec3 write_pos = pos + max(ivec3(0), params.scroll); - - read_pos.z += params.cascade * params.grid_size; - uint occlusion = imageLoad(src_occlusion, read_pos).r; - read_pos.x += params.grid_size; - occlusion |= imageLoad(src_occlusion, read_pos).r << 16; - - const uint occlusion_shift[8] = uint[](12, 8, 4, 0, 28, 24, 20, 16); - - for (uint i = 0; i < 8; i++) { - float o = float((occlusion >> occlusion_shift[i]) & 0xF) / 15.0; - imageStore(dst_occlusion[i], write_pos, vec4(o)); - } - -#endif - -#ifdef MODE_INITIALIZE_JUMP_FLOOD - - ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); - - uint c = imageLoad(src_color, pos).r; - uvec4 v; - if (bool(c & 0x1)) { - //bit set means this is solid - v.xyz = uvec3(pos); - v.w = 255; //not zero means used - } else { - v.xyz = uvec3(0); - v.w = 0; // zero means unused - } - - imageStore(dst_positions, pos, v); -#endif - -#ifdef MODE_INITIALIZE_JUMP_FLOOD_HALF - - ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); - ivec3 base_pos = pos * 2; - - //since we store in half size, lets kind of randomize what we store, so - //the half size jump flood has a bit better chance to find something - uvec4 closest[8]; - int closest_count = 0; - - for (uint i = 0; i < 8; i++) { - ivec3 src_pos = base_pos + ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1)); - uint c = imageLoad(src_color, src_pos).r; - if (bool(c & 1)) { - uvec4 v = uvec4(uvec3(src_pos), 255); - closest[closest_count] = v; - closest_count++; - } - } - - if (closest_count == 0) { - imageStore(dst_positions, pos, uvec4(0)); - } else { - ivec3 indexv = (pos & ivec3(1, 1, 1)) * ivec3(1, 2, 4); - int index = (indexv.x | indexv.y | indexv.z) % closest_count; - imageStore(dst_positions, pos, closest[index]); - } - -#endif - -#ifdef MODE_JUMPFLOOD - - //regular jumpflood, efficient for large steps, inefficient for small steps - ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); - - vec3 posf = vec3(pos); - - if (params.half_size) { - posf = posf * 2.0 + 0.5; - } - - uvec4 p = imageLoad(src_positions, pos); - - if (!params.half_size && p == uvec4(uvec3(pos), 255)) { - imageStore(dst_positions, pos, p); - return; //points to itself and valid, nothing better can be done, just pass - } - - float p_dist; - - if (p.w != 0) { - p_dist = distance(posf, vec3(p.xyz)); - } else { - p_dist = 0.0; //should not matter - } - - const uint offset_count = 26; - const ivec3 offsets[offset_count] = ivec3[]( - ivec3(-1, -1, -1), - ivec3(-1, -1, 0), - ivec3(-1, -1, 1), - ivec3(-1, 0, -1), - ivec3(-1, 0, 0), - ivec3(-1, 0, 1), - ivec3(-1, 1, -1), - ivec3(-1, 1, 0), - ivec3(-1, 1, 1), - ivec3(0, -1, -1), - ivec3(0, -1, 0), - ivec3(0, -1, 1), - ivec3(0, 0, -1), - ivec3(0, 0, 1), - ivec3(0, 1, -1), - ivec3(0, 1, 0), - ivec3(0, 1, 1), - ivec3(1, -1, -1), - ivec3(1, -1, 0), - ivec3(1, -1, 1), - ivec3(1, 0, -1), - ivec3(1, 0, 0), - ivec3(1, 0, 1), - ivec3(1, 1, -1), - ivec3(1, 1, 0), - ivec3(1, 1, 1)); - - for (uint i = 0; i < offset_count; i++) { - ivec3 ofs = pos + offsets[i] * params.step_size; - if (any(lessThan(ofs, ivec3(0))) || any(greaterThanEqual(ofs, ivec3(params.grid_size)))) { - continue; - } - uvec4 q = imageLoad(src_positions, ofs); - - if (q.w == 0) { - continue; //was not initialized yet, ignore - } - - float q_dist = distance(posf, vec3(q.xyz)); - if (p.w == 0 || q_dist < p_dist) { - p = q; //just replace because current is unused - p_dist = q_dist; - } - } - - imageStore(dst_positions, pos, p); -#endif - -#ifdef MODE_JUMPFLOOD_OPTIMIZED - //optimized version using shared compute memory - - ivec3 group_offset = ivec3(gl_WorkGroupID.xyz) % params.step_size; - ivec3 group_pos = group_offset + (ivec3(gl_WorkGroupID.xyz) / params.step_size) * ivec3(GROUP_SIZE * params.step_size); - - //load data into local group memory - - if (all(lessThan(ivec3(gl_LocalInvocationID.xyz), ivec3((GROUP_SIZE + 2) / 2)))) { - //use this thread for loading, this method uses less threads for this but its simpler and less divergent - ivec3 base_pos = ivec3(gl_LocalInvocationID.xyz) * 2; - for (uint i = 0; i < 8; i++) { - ivec3 load_pos = base_pos + ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1)); - ivec3 load_global_pos = group_pos + (load_pos - ivec3(1)) * params.step_size; - uvec4 q; - if (all(greaterThanEqual(load_global_pos, ivec3(0))) && all(lessThan(load_global_pos, ivec3(params.grid_size)))) { - q = imageLoad(src_positions, load_global_pos); - } else { - q = uvec4(0); //unused - } - - group_store(load_pos, q); - } - } - - ivec3 global_pos = group_pos + ivec3(gl_LocalInvocationID.xyz) * params.step_size; - - if (any(lessThan(global_pos, ivec3(0))) || any(greaterThanEqual(global_pos, ivec3(params.grid_size)))) { - return; //do nothing else, end here because outside range - } - - //sync - groupMemoryBarrier(); - barrier(); - - ivec3 local_pos = ivec3(gl_LocalInvocationID.xyz) + ivec3(1); - - const uint offset_count = 27; - const ivec3 offsets[offset_count] = ivec3[]( - ivec3(-1, -1, -1), - ivec3(-1, -1, 0), - ivec3(-1, -1, 1), - ivec3(-1, 0, -1), - ivec3(-1, 0, 0), - ivec3(-1, 0, 1), - ivec3(-1, 1, -1), - ivec3(-1, 1, 0), - ivec3(-1, 1, 1), - ivec3(0, -1, -1), - ivec3(0, -1, 0), - ivec3(0, -1, 1), - ivec3(0, 0, -1), - ivec3(0, 0, 0), - ivec3(0, 0, 1), - ivec3(0, 1, -1), - ivec3(0, 1, 0), - ivec3(0, 1, 1), - ivec3(1, -1, -1), - ivec3(1, -1, 0), - ivec3(1, -1, 1), - ivec3(1, 0, -1), - ivec3(1, 0, 0), - ivec3(1, 0, 1), - ivec3(1, 1, -1), - ivec3(1, 1, 0), - ivec3(1, 1, 1)); - - //only makes sense if point is inside screen - uvec4 closest = uvec4(0); - float closest_dist = 0.0; - - vec3 posf = vec3(global_pos); - - if (params.half_size) { - posf = posf * 2.0 + 0.5; - } - - for (uint i = 0; i < offset_count; i++) { - uvec4 point = group_load(local_pos + offsets[i]); - - if (point.w == 0) { - continue; //was not initialized yet, ignore - } - - float dist = distance(posf, vec3(point.xyz)); - if (closest.w == 0 || dist < closest_dist) { - closest = point; - closest_dist = dist; - } - } - - imageStore(dst_positions, global_pos, closest); - -#endif - -#ifdef MODE_UPSCALE_JUMP_FLOOD - - ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); - - uint c = imageLoad(src_color, pos).r; - uvec4 v; - if (bool(c & 1)) { - //bit set means this is solid - v.xyz = uvec3(pos); - v.w = 255; //not zero means used - } else { - v = imageLoad(src_positions_half, pos >> 1); - float d = length(vec3(ivec3(v.xyz) - pos)); - - ivec3 vbase = ivec3(v.xyz - (v.xyz & uvec3(1))); - - //search around if there is a better candidate from the same block - for (int i = 0; i < 8; i++) { - ivec3 bits = ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1)); - ivec3 p = vbase + bits; - - float d2 = length(vec3(p - pos)); - if (d2 < d) { //check valid distance before test so we avoid a read - uint c2 = imageLoad(src_color, p).r; - if (bool(c2 & 1)) { - v.xyz = uvec3(p); - d = d2; - } - } - } - - //could validate better position.. - } - - imageStore(dst_positions, pos, v); - -#endif - -#ifdef MODE_OCCLUSION - - uint invocation_idx = uint(gl_LocalInvocationID.x); - ivec3 region = ivec3(gl_WorkGroupID); - - ivec3 region_offset = -ivec3(OCCLUSION_SIZE); - region_offset += region * OCCLUSION_SIZE * 2; - region_offset += params.probe_offset * OCCLUSION_SIZE; - - if (params.scroll != ivec3(0)) { - //validate scroll region - ivec3 region_offset_to = region_offset + ivec3(OCCLUSION_SIZE * 2); - uvec3 scroll_mask = uvec3(notEqual(params.scroll, ivec3(0))); //save which axes acre scrolling - ivec3 scroll_from = mix(ivec3(0), ivec3(params.grid_size) + params.scroll, lessThan(params.scroll, ivec3(0))); - ivec3 scroll_to = mix(ivec3(params.grid_size), params.scroll, greaterThan(params.scroll, ivec3(0))); - - if ((uvec3(lessThanEqual(region_offset_to, scroll_from)) | uvec3(greaterThanEqual(region_offset, scroll_to))) * scroll_mask == scroll_mask) { //all axes that scroll are out, exit - return; //region outside scroll bounds, quit - } - } - -#define OCC_HALF_SIZE (OCCLUSION_SIZE / 2) - - ivec3 local_ofs = ivec3(uvec3(invocation_idx % OCC_HALF_SIZE, (invocation_idx % (OCC_HALF_SIZE * OCC_HALF_SIZE)) / OCC_HALF_SIZE, invocation_idx / (OCC_HALF_SIZE * OCC_HALF_SIZE))) * 4; - - /* for(int i=0;i<64;i++) { - ivec3 offset = region_offset + local_ofs + ((ivec3(i) >> ivec3(0,2,4)) & ivec3(3,3,3)); - uint facig = - if (all(greaterThanEqual(offset,ivec3(0))) && all(lessThan(offset,ivec3(params.grid_size)))) {*/ - - for (int i = 0; i < 16; i++) { //skip x, so it can be packed - - ivec3 offset = local_ofs + ((ivec3(i * 4) >> ivec3(0, 2, 4)) & ivec3(3, 3, 3)); - - uint facing_pack = 0; - for (int j = 0; j < 4; j++) { - ivec3 foffset = region_offset + offset + ivec3(j, 0, 0); - if (all(greaterThanEqual(foffset, ivec3(0))) && all(lessThan(foffset, ivec3(params.grid_size)))) { - uint f = imageLoad(src_facing, foffset).r; - facing_pack |= f << (j * 8); - } - } - - occlusion_facing[(offset.z * (OCCLUSION_SIZE * 2 * OCCLUSION_SIZE * 2) + offset.y * (OCCLUSION_SIZE * 2) + offset.x) / 4] = facing_pack; - } - - //sync occlusion saved - groupMemoryBarrier(); - barrier(); - - //process occlusion - -#define OCC_STEPS (OCCLUSION_SIZE * 3 - 2) -#define OCC_HALF_STEPS (OCC_STEPS / 2) - - for (int step = 0; step < OCC_STEPS; step++) { - bool shrink = step >= OCC_HALF_STEPS; - int occ_step = shrink ? OCC_HALF_STEPS - (step - OCC_HALF_STEPS) - 1 : step; - - if (invocation_idx < group_size_offset[occ_step].x) { - uint pv = group_pos[group_size_offset[occ_step].y + invocation_idx]; - ivec3 proc_abs = (ivec3(int(pv)) >> ivec3(0, 8, 16)) & ivec3(0xFF); - - if (shrink) { - proc_abs = ivec3(OCCLUSION_SIZE) - proc_abs - ivec3(1); - } - - for (int i = 0; i < 8; i++) { - ivec3 bits = ((ivec3(i) >> ivec3(0, 1, 2)) & ivec3(1, 1, 1)); - ivec3 proc_sign = bits * 2 - 1; - ivec3 local_offset = ivec3(OCCLUSION_SIZE) + proc_abs * proc_sign - (ivec3(1) - bits); - ivec3 offset = local_offset + region_offset; - if (all(greaterThanEqual(offset, ivec3(0))) && all(lessThan(offset, ivec3(params.grid_size)))) { - float occ; - - uint facing = get_facing(local_offset); - - if (facing != 0) { //solid - occ = 0.0; - } else if (step == 0) { -#if 0 - occ = 0.0; - if (get_facing(local_offset - ivec3(proc_sign.x,0,0))==0) { - occ+=1.0; - } - if (get_facing(local_offset - ivec3(0,proc_sign.y,0))==0) { - occ+=1.0; - } - if (get_facing(local_offset - ivec3(0,0,proc_sign.z))==0) { - occ+=1.0; - } - /* - if (get_facing(local_offset - proc_sign)==0) { - occ+=1.0; - }*/ - - occ/=3.0; -#endif - occ = 1.0; - - } else { - ivec3 read_dir = -proc_sign; - - ivec3 major_axis; - if (proc_abs.x < proc_abs.y) { - if (proc_abs.z < proc_abs.y) { - major_axis = ivec3(0, 1, 0); - } else { - major_axis = ivec3(0, 0, 1); - } - } else { - if (proc_abs.z < proc_abs.x) { - major_axis = ivec3(1, 0, 0); - } else { - major_axis = ivec3(0, 0, 1); - } - } - - float avg = 0.0; - occ = 0.0; - - ivec3 read_x = offset + ivec3(read_dir.x, 0, 0) + (proc_abs.x == 0 ? major_axis * read_dir : ivec3(0)); - ivec3 read_y = offset + ivec3(0, read_dir.y, 0) + (proc_abs.y == 0 ? major_axis * read_dir : ivec3(0)); - ivec3 read_z = offset + ivec3(0, 0, read_dir.z) + (proc_abs.z == 0 ? major_axis * read_dir : ivec3(0)); - - uint facing_x = get_facing(read_x - region_offset); - if (facing_x == 0) { - if (all(greaterThanEqual(read_x, ivec3(0))) && all(lessThan(read_x, ivec3(params.grid_size)))) { - occ += imageLoad(dst_occlusion[params.occlusion_index], read_x).r; - avg += 1.0; - } - } else { - if (proc_abs.x != 0) { //do not occlude from voxels in the opposite octant - avg += 1.0; - } - } - - uint facing_y = get_facing(read_y - region_offset); - if (facing_y == 0) { - if (all(greaterThanEqual(read_y, ivec3(0))) && all(lessThan(read_y, ivec3(params.grid_size)))) { - occ += imageLoad(dst_occlusion[params.occlusion_index], read_y).r; - avg += 1.0; - } - } else { - if (proc_abs.y != 0) { - avg += 1.0; - } - } - - uint facing_z = get_facing(read_z - region_offset); - if (facing_z == 0) { - if (all(greaterThanEqual(read_z, ivec3(0))) && all(lessThan(read_z, ivec3(params.grid_size)))) { - occ += imageLoad(dst_occlusion[params.occlusion_index], read_z).r; - avg += 1.0; - } - } else { - if (proc_abs.z != 0) { - avg += 1.0; - } - } - - if (avg > 0.0) { - occ /= avg; - } - } - - imageStore(dst_occlusion[params.occlusion_index], offset, vec4(occ)); - } - } - } - - groupMemoryBarrier(); - barrier(); - } -#if 1 - //bias solid voxels away - - for (int i = 0; i < 64; i++) { - ivec3 local_offset = local_ofs + ((ivec3(i) >> ivec3(0, 2, 4)) & ivec3(3, 3, 3)); - ivec3 offset = region_offset + local_offset; - - if (all(greaterThanEqual(offset, ivec3(0))) && all(lessThan(offset, ivec3(params.grid_size)))) { - uint facing = get_facing(local_offset); - - if (facing != 0) { - //only work on solids - - ivec3 proc_pos = local_offset - ivec3(OCCLUSION_SIZE); - proc_pos += mix(ivec3(0), ivec3(1), greaterThanEqual(proc_pos, ivec3(0))); - - float avg = 0.0; - float occ = 0.0; - - ivec3 read_dir = -sign(proc_pos); - ivec3 read_dir_x = ivec3(read_dir.x, 0, 0); - ivec3 read_dir_y = ivec3(0, read_dir.y, 0); - ivec3 read_dir_z = ivec3(0, 0, read_dir.z); - //solid -#if 0 - - uvec3 facing_pos_base = (uvec3(facing) >> uvec3(0,1,2)) & uvec3(1,1,1); - uvec3 facing_neg_base = (uvec3(facing) >> uvec3(3,4,5)) & uvec3(1,1,1); - uvec3 facing_pos= facing_pos_base &((~facing_neg_base)&uvec3(1,1,1)); - uvec3 facing_neg= facing_neg_base &((~facing_pos_base)&uvec3(1,1,1)); -#else - uvec3 facing_pos = (uvec3(facing) >> uvec3(0, 1, 2)) & uvec3(1, 1, 1); - uvec3 facing_neg = (uvec3(facing) >> uvec3(3, 4, 5)) & uvec3(1, 1, 1); -#endif - bvec3 read_valid = bvec3(mix(facing_neg, facing_pos, greaterThan(read_dir, ivec3(0)))); - - //sides - if (read_valid.x) { - ivec3 read_offset = local_offset + read_dir_x; - uint f = get_facing(read_offset); - if (f == 0) { - read_offset += region_offset; - if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { - occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; - avg += 1.0; - } - } - } - - if (read_valid.y) { - ivec3 read_offset = local_offset + read_dir_y; - uint f = get_facing(read_offset); - if (f == 0) { - read_offset += region_offset; - if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { - occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; - avg += 1.0; - } - } - } - - if (read_valid.z) { - ivec3 read_offset = local_offset + read_dir_z; - uint f = get_facing(read_offset); - if (f == 0) { - read_offset += region_offset; - if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { - occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; - avg += 1.0; - } - } - } - - //adjacents - - if (all(read_valid.yz)) { - ivec3 read_offset = local_offset + read_dir_y + read_dir_z; - uint f = get_facing(read_offset); - if (f == 0) { - read_offset += region_offset; - if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { - occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; - avg += 1.0; - } - } - } - - if (all(read_valid.xz)) { - ivec3 read_offset = local_offset + read_dir_x + read_dir_z; - uint f = get_facing(read_offset); - if (f == 0) { - read_offset += region_offset; - if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { - occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; - avg += 1.0; - } - } - } - - if (all(read_valid.xy)) { - ivec3 read_offset = local_offset + read_dir_x + read_dir_y; - uint f = get_facing(read_offset); - if (f == 0) { - read_offset += region_offset; - if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { - occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; - avg += 1.0; - } - } - } - - //diagonal - - if (all(read_valid)) { - ivec3 read_offset = local_offset + read_dir; - uint f = get_facing(read_offset); - if (f == 0) { - read_offset += region_offset; - if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { - occ += imageLoad(dst_occlusion[params.occlusion_index], read_offset).r; - avg += 1.0; - } - } - } - - if (avg > 0.0) { - occ /= avg; - } - - imageStore(dst_occlusion[params.occlusion_index], offset, vec4(occ)); - } - } - } - -#endif - -#if 1 - groupMemoryBarrier(); - barrier(); - - for (int i = 0; i < 64; i++) { - ivec3 local_offset = local_ofs + ((ivec3(i) >> ivec3(0, 2, 4)) & ivec3(3, 3, 3)); - ivec3 offset = region_offset + local_offset; - - if (all(greaterThanEqual(offset, ivec3(0))) && all(lessThan(offset, ivec3(params.grid_size)))) { - uint facing = get_facing(local_offset); - - if (facing == 0) { - ivec3 proc_pos = local_offset - ivec3(OCCLUSION_SIZE); - proc_pos += mix(ivec3(0), ivec3(1), greaterThanEqual(proc_pos, ivec3(0))); - - ivec3 proc_abs = abs(proc_pos); - - ivec3 read_dir = sign(proc_pos); //opposite direction - ivec3 read_dir_x = ivec3(read_dir.x, 0, 0); - ivec3 read_dir_y = ivec3(0, read_dir.y, 0); - ivec3 read_dir_z = ivec3(0, 0, read_dir.z); - //solid - uvec3 read_mask = mix(uvec3(1, 2, 4), uvec3(8, 16, 32), greaterThan(read_dir, ivec3(0))); //match positive with negative normals - uvec3 block_mask = mix(uvec3(1, 2, 4), uvec3(8, 16, 32), lessThan(read_dir, ivec3(0))); //match positive with negative normals - - block_mask = uvec3(0); - - float visible = 0.0; - float occlude_total = 0.0; - - if (proc_abs.x < OCCLUSION_SIZE) { - ivec3 read_offset = local_offset + read_dir_x; - uint x_mask = get_facing(read_offset); - if (x_mask != 0) { - read_offset += region_offset; - if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { - occlude_total += 1.0; - if (bool(x_mask & read_mask.x) && !bool(x_mask & block_mask.x)) { - visible += 1.0; - } - } - } - } - - if (proc_abs.y < OCCLUSION_SIZE) { - ivec3 read_offset = local_offset + read_dir_y; - uint y_mask = get_facing(read_offset); - if (y_mask != 0) { - read_offset += region_offset; - if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { - occlude_total += 1.0; - if (bool(y_mask & read_mask.y) && !bool(y_mask & block_mask.y)) { - visible += 1.0; - } - } - } - } - - if (proc_abs.z < OCCLUSION_SIZE) { - ivec3 read_offset = local_offset + read_dir_z; - uint z_mask = get_facing(read_offset); - if (z_mask != 0) { - read_offset += region_offset; - if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { - occlude_total += 1.0; - if (bool(z_mask & read_mask.z) && !bool(z_mask & block_mask.z)) { - visible += 1.0; - } - } - } - } - - //if near the cartesian plane, test in opposite direction too - - read_mask = mix(uvec3(1, 2, 4), uvec3(8, 16, 32), lessThan(read_dir, ivec3(0))); //match negative with positive normals - block_mask = mix(uvec3(1, 2, 4), uvec3(8, 16, 32), greaterThan(read_dir, ivec3(0))); //match negative with positive normals - block_mask = uvec3(0); - - if (proc_abs.x == 1) { - ivec3 read_offset = local_offset - read_dir_x; - uint x_mask = get_facing(read_offset); - if (x_mask != 0) { - read_offset += region_offset; - if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { - occlude_total += 1.0; - if (bool(x_mask & read_mask.x) && !bool(x_mask & block_mask.x)) { - visible += 1.0; - } - } - } - } - - if (proc_abs.y == 1) { - ivec3 read_offset = local_offset - read_dir_y; - uint y_mask = get_facing(read_offset); - if (y_mask != 0) { - read_offset += region_offset; - if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { - occlude_total += 1.0; - if (bool(y_mask & read_mask.y) && !bool(y_mask & block_mask.y)) { - visible += 1.0; - } - } - } - } - - if (proc_abs.z == 1) { - ivec3 read_offset = local_offset - read_dir_z; - uint z_mask = get_facing(read_offset); - if (z_mask != 0) { - read_offset += region_offset; - if (all(greaterThanEqual(read_offset, ivec3(0))) && all(lessThan(read_offset, ivec3(params.grid_size)))) { - occlude_total += 1.0; - if (bool(z_mask & read_mask.z) && !bool(z_mask & block_mask.z)) { - visible += 1.0; - } - } - } - } - - if (occlude_total > 0.0) { - float occ = imageLoad(dst_occlusion[params.occlusion_index], offset).r; - occ *= visible / occlude_total; - imageStore(dst_occlusion[params.occlusion_index], offset, vec4(occ)); - } - } - } - } - -#endif - - /* - for(int i=0;i<8;i++) { - ivec3 local_offset = local_pos + ((ivec3(i) >> ivec3(2,1,0)) & ivec3(1,1,1)) * OCCLUSION_SIZE; - ivec3 offset = local_offset - ivec3(OCCLUSION_SIZE); //looking around probe, so starts negative - offset += region * OCCLUSION_SIZE * 2; //offset by region - offset += params.probe_offset * OCCLUSION_SIZE; // offset by probe offset - if (all(greaterThanEqual(offset,ivec3(0))) && all(lessThan(offset,ivec3(params.grid_size)))) { - imageStore(dst_occlusion[params.occlusion_index],offset,vec4( occlusion_data[ to_linear(local_offset) ] )); - //imageStore(dst_occlusion[params.occlusion_index],offset,vec4( occlusion_solid[ to_linear(local_offset) ] )); - } - } -*/ - -#endif - -#ifdef MODE_STORE - - ivec3 local = ivec3(gl_LocalInvocationID.xyz); - ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); - // store SDF - uvec4 p = imageLoad(src_positions, pos); - - bool solid = false; - float d; - if (ivec3(p.xyz) == pos) { - //solid block - d = 0; - solid = true; - } else { - //distance block - d = 1.0 + length(vec3(p.xyz) - vec3(pos)); - } - - d /= 255.0; - - imageStore(dst_sdf, pos, vec4(d)); - - // STORE OCCLUSION - - uint occlusion = 0; - const uint occlusion_shift[8] = uint[](12, 8, 4, 0, 28, 24, 20, 16); - for (int i = 0; i < 8; i++) { - float occ = imageLoad(src_occlusion[i], pos).r; - occlusion |= uint(clamp(occ * 15.0, 0.0, 15.0)) << occlusion_shift[i]; - } - { - ivec3 occ_pos = pos; - occ_pos.z += params.cascade * params.grid_size; - imageStore(dst_occlusion, occ_pos, uvec4(occlusion & 0xFFFF)); - occ_pos.x += params.grid_size; - imageStore(dst_occlusion, occ_pos, uvec4(occlusion >> 16)); - } - - // STORE POSITIONS - - if (local == ivec3(0)) { - store_position_count = 0; //base one stores as zero, the others wait - } - - groupMemoryBarrier(); - barrier(); - - if (solid) { - uint index = atomicAdd(store_position_count, 1); - // At least do the conversion work in parallel - store_positions[index].position = uint(pos.x | (pos.y << 7) | (pos.z << 14)); - - //see around which voxels point to this one, add them to the list - uint bit_index = 0; - uint neighbour_bits = 0; - for (int i = -1; i <= 1; i++) { - for (int j = -1; j <= 1; j++) { - for (int k = -1; k <= 1; k++) { - if (i == 0 && j == 0 && k == 0) { - continue; - } - ivec3 npos = pos + ivec3(i, j, k); - if (all(greaterThanEqual(npos, ivec3(0))) && all(lessThan(npos, ivec3(params.grid_size)))) { - p = imageLoad(src_positions, npos); - if (ivec3(p.xyz) == pos) { - neighbour_bits |= (1 << bit_index); - } - } - bit_index++; - } - } - } - - uint rgb = imageLoad(src_albedo, pos).r; - uint facing = imageLoad(src_facing, pos).r; - - store_positions[index].albedo = rgb >> 1; //store as it comes (555) to avoid precision loss (and move away the alpha bit) - store_positions[index].albedo |= (facing & 0x3F) << 15; // store facing in bits 15-21 - - store_positions[index].albedo |= neighbour_bits << 21; //store lower 11 bits of neighbours with remaining albedo - store_positions[index].position |= (neighbour_bits >> 11) << 21; //store 11 bits more of neighbours with position - - store_positions[index].light = imageLoad(src_light, pos).r; - store_positions[index].light_aniso = imageLoad(src_light_aniso, pos).r; - //add neighbours - store_positions[index].light |= (neighbour_bits >> 22) << 30; //store 2 bits more of neighbours with light - store_positions[index].light_aniso |= (neighbour_bits >> 24) << 30; //store 2 bits more of neighbours with aniso - } - - groupMemoryBarrier(); - barrier(); - - // global increment only once per group, to reduce pressure - - if (local == ivec3(0) && store_position_count > 0) { - store_from_index = atomicAdd(dispatch_data.total_count, store_position_count); - uint group_count = (store_from_index + store_position_count - 1) / 64 + 1; - atomicMax(dispatch_data.x, group_count); - } - - groupMemoryBarrier(); - barrier(); - - uint read_index = uint(local.z * 4 * 4 + local.y * 4 + local.x); - uint write_index = store_from_index + read_index; - - if (read_index < store_position_count) { - dst_process_voxels.data[write_index] = store_positions[read_index]; - } - - if (pos == ivec3(0)) { - //this thread clears y and z - dispatch_data.y = 1; - dispatch_data.z = 1; - } -#endif -} diff --git a/servers/rendering/renderer_rd/shaders/voxel_gi.glsl b/servers/rendering/renderer_rd/shaders/voxel_gi.glsl deleted file mode 100644 index 577c6d0cd0..0000000000 --- a/servers/rendering/renderer_rd/shaders/voxel_gi.glsl +++ /dev/null @@ -1,616 +0,0 @@ -#[compute] - -#version 450 - -#VERSION_DEFINES - -#ifdef MODE_DYNAMIC -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; -#else -layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; -#endif - -#ifndef MODE_DYNAMIC - -#define NO_CHILDREN 0xFFFFFFFF - -struct CellChildren { - uint children[8]; -}; - -layout(set = 0, binding = 1, std430) buffer CellChildrenBuffer { - CellChildren data[]; -} -cell_children; - -struct CellData { - uint position; // xyz 10 bits - uint albedo; //rgb albedo - uint emission; //rgb normalized with e as multiplier - uint normal; //RGB normal encoded -}; - -layout(set = 0, binding = 2, std430) buffer CellDataBuffer { - CellData data[]; -} -cell_data; - -#endif // MODE DYNAMIC - -#define LIGHT_TYPE_DIRECTIONAL 0 -#define LIGHT_TYPE_OMNI 1 -#define LIGHT_TYPE_SPOT 2 - -#if defined(MODE_COMPUTE_LIGHT) || defined(MODE_DYNAMIC_LIGHTING) - -struct Light { - uint type; - float energy; - float radius; - float attenuation; - - vec3 color; - float cos_spot_angle; - - vec3 position; - float inv_spot_attenuation; - - vec3 direction; - bool has_shadow; -}; - -layout(set = 0, binding = 3, std140) uniform Lights { - Light data[MAX_LIGHTS]; -} -lights; - -#endif // MODE COMPUTE LIGHT - -#ifdef MODE_SECOND_BOUNCE - -layout(set = 0, binding = 5) uniform texture3D color_texture; - -#endif // MODE_SECOND_BOUNCE - -#ifndef MODE_DYNAMIC - -layout(push_constant, std430) uniform Params { - ivec3 limits; - uint stack_size; - - float emission_scale; - float propagation; - float dynamic_range; - - uint light_count; - uint cell_offset; - uint cell_count; - float aniso_strength; - uint pad; -} -params; - -layout(set = 0, binding = 4, std430) buffer Outputs { - vec4 data[]; -} -outputs; - -#endif // MODE DYNAMIC - -layout(set = 0, binding = 9) uniform texture3D texture_sdf; -layout(set = 0, binding = 10) uniform sampler texture_sampler; - -#ifdef MODE_WRITE_TEXTURE - -layout(rgba8, set = 0, binding = 5) uniform restrict writeonly image3D color_tex; - -#endif - -#ifdef MODE_DYNAMIC - -layout(push_constant, std430) uniform Params { - ivec3 limits; - uint light_count; //when not lighting - ivec3 x_dir; - float z_base; - ivec3 y_dir; - float z_sign; - ivec3 z_dir; - float pos_multiplier; - ivec2 rect_pos; - ivec2 rect_size; - ivec2 prev_rect_ofs; - ivec2 prev_rect_size; - bool flip_x; - bool flip_y; - float dynamic_range; - bool on_mipmap; - float propagation; - float pad[3]; -} -params; - -#ifdef MODE_DYNAMIC_LIGHTING - -layout(rgba8, set = 0, binding = 5) uniform restrict readonly image2D source_albedo; -layout(rgba8, set = 0, binding = 6) uniform restrict readonly image2D source_normal; -layout(rgba8, set = 0, binding = 7) uniform restrict readonly image2D source_orm; -//layout (set=0,binding=8) uniform texture2D source_depth; -layout(rgba16f, set = 0, binding = 11) uniform restrict image2D emission; -layout(r32f, set = 0, binding = 12) uniform restrict image2D depth; - -#endif - -#ifdef MODE_DYNAMIC_SHRINK - -layout(rgba16f, set = 0, binding = 5) uniform restrict readonly image2D source_light; -layout(r32f, set = 0, binding = 6) uniform restrict readonly image2D source_depth; - -#ifdef MODE_DYNAMIC_SHRINK_WRITE - -layout(rgba16f, set = 0, binding = 7) uniform restrict writeonly image2D light; -layout(r32f, set = 0, binding = 8) uniform restrict writeonly image2D depth; - -#endif // MODE_DYNAMIC_SHRINK_WRITE - -#ifdef MODE_DYNAMIC_SHRINK_PLOT - -layout(rgba8, set = 0, binding = 11) uniform restrict image3D color_texture; - -#endif //MODE_DYNAMIC_SHRINK_PLOT - -#endif // MODE_DYNAMIC_SHRINK - -//layout (rgba8,set=0,binding=5) uniform restrict writeonly image3D color_tex; - -#endif // MODE DYNAMIC - -#if defined(MODE_COMPUTE_LIGHT) || defined(MODE_DYNAMIC_LIGHTING) - -float raymarch(float distance, float distance_adv, vec3 from, vec3 direction) { - vec3 cell_size = 1.0 / vec3(params.limits); - float occlusion = 1.0; - while (distance > 0.5) { //use this to avoid precision errors - float advance = texture(sampler3D(texture_sdf, texture_sampler), from * cell_size).r * 255.0 - 1.0; - if (advance < 0.0) { - occlusion = 0.0; - break; - } - - occlusion = min(advance, occlusion); - - advance = max(distance_adv, advance - mod(advance, distance_adv)); //should always advance in multiples of distance_adv - - from += direction * advance; - distance -= advance; - } - - return occlusion; //max(0.0,distance); -} - -float get_omni_attenuation(float distance, float inv_range, float decay) { - float nd = distance * inv_range; - nd *= nd; - nd *= nd; // nd^4 - nd = max(1.0 - nd, 0.0); - nd *= nd; // nd^2 - return nd * pow(max(distance, 0.0001), -decay); -} - -bool compute_light_vector(uint light, vec3 pos, out float attenuation, out vec3 light_pos) { - if (lights.data[light].type == LIGHT_TYPE_DIRECTIONAL) { - light_pos = pos - lights.data[light].direction * length(vec3(params.limits)); - attenuation = 1.0; - - } else { - light_pos = lights.data[light].position; - float distance = length(pos - light_pos); - if (distance >= lights.data[light].radius) { - return false; - } - - attenuation = get_omni_attenuation(distance, 1.0 / lights.data[light].radius, lights.data[light].attenuation); - - if (lights.data[light].type == LIGHT_TYPE_SPOT) { - vec3 rel = normalize(pos - light_pos); - float cos_spot_angle = lights.data[light].cos_spot_angle; - float cos_angle = dot(rel, lights.data[light].direction); - if (cos_angle < cos_spot_angle) { - return false; - } - - float scos = max(cos_angle, cos_spot_angle); - float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cos_spot_angle)); - attenuation *= 1.0 - pow(spot_rim, lights.data[light].inv_spot_attenuation); - } - } - - return true; -} - -float get_normal_advance(vec3 p_normal) { - vec3 normal = p_normal; - vec3 unorm = abs(normal); - - if ((unorm.x >= unorm.y) && (unorm.x >= unorm.z)) { - // x code - unorm = normal.x > 0.0 ? vec3(1.0, 0.0, 0.0) : vec3(-1.0, 0.0, 0.0); - } else if ((unorm.y > unorm.x) && (unorm.y >= unorm.z)) { - // y code - unorm = normal.y > 0.0 ? vec3(0.0, 1.0, 0.0) : vec3(0.0, -1.0, 0.0); - } else if ((unorm.z > unorm.x) && (unorm.z > unorm.y)) { - // z code - unorm = normal.z > 0.0 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 0.0, -1.0); - } else { - // oh-no we messed up code - // has to be - unorm = vec3(1.0, 0.0, 0.0); - } - - return 1.0 / dot(normal, unorm); -} - -void clip_segment(vec4 plane, vec3 begin, inout vec3 end) { - vec3 segment = begin - end; - float den = dot(plane.xyz, segment); - - //printf("den is %i\n",den); - if (den < 0.0001) { - return; - } - - float dist = (dot(plane.xyz, begin) - plane.w) / den; - - if (dist < 0.0001 || dist > 1.0001) { - return; - } - - end = begin + segment * -dist; -} - -bool compute_light_at_pos(uint index, vec3 pos, vec3 normal, inout vec3 light, inout vec3 light_dir) { - float attenuation; - vec3 light_pos; - - if (!compute_light_vector(index, pos, attenuation, light_pos)) { - return false; - } - - light_dir = normalize(pos - light_pos); - - if (attenuation < 0.01 || (length(normal) > 0.2 && dot(normal, light_dir) >= 0)) { - return false; //not facing the light, or attenuation is near zero - } - - if (lights.data[index].has_shadow) { - float distance_adv = get_normal_advance(light_dir); - - vec3 to = pos; - if (length(normal) > 0.2) { - to += normal * distance_adv * 0.51; - } else { - to -= sign(light_dir) * 0.45; //go near the edge towards the light direction to avoid self occlusion - } - - //clip - clip_segment(mix(vec4(-1.0, 0.0, 0.0, 0.0), vec4(1.0, 0.0, 0.0, float(params.limits.x - 1)), bvec4(light_dir.x < 0.0)), to, light_pos); - clip_segment(mix(vec4(0.0, -1.0, 0.0, 0.0), vec4(0.0, 1.0, 0.0, float(params.limits.y - 1)), bvec4(light_dir.y < 0.0)), to, light_pos); - clip_segment(mix(vec4(0.0, 0.0, -1.0, 0.0), vec4(0.0, 0.0, 1.0, float(params.limits.z - 1)), bvec4(light_dir.z < 0.0)), to, light_pos); - - float distance = length(to - light_pos); - if (distance < 0.1) { - return false; // hit - } - - distance += distance_adv - mod(distance, distance_adv); //make it reach the center of the box always - light_pos = to - light_dir * distance; - - //from -= sign(light_dir)*0.45; //go near the edge towards the light direction to avoid self occlusion - - /*float dist = raymarch(distance,distance_adv,light_pos,light_dir); - - if (dist > distance_adv) { - return false; - } - - attenuation *= 1.0 - smoothstep(0.1*distance_adv,distance_adv,dist); - */ - - float occlusion = raymarch(distance, distance_adv, light_pos, light_dir); - - if (occlusion == 0.0) { - return false; - } - - attenuation *= occlusion; //1.0 - smoothstep(0.1*distance_adv,distance_adv,dist); - } - - light = lights.data[index].color * attenuation * lights.data[index].energy; - return true; -} - -#endif // MODE COMPUTE LIGHT - -void main() { -#ifndef MODE_DYNAMIC - - uint cell_index = gl_GlobalInvocationID.x; - if (cell_index >= params.cell_count) { - return; - } - cell_index += params.cell_offset; - - uvec3 posu = uvec3(cell_data.data[cell_index].position & 0x7FF, (cell_data.data[cell_index].position >> 11) & 0x3FF, cell_data.data[cell_index].position >> 21); - vec4 albedo = unpackUnorm4x8(cell_data.data[cell_index].albedo); - -#endif - - /////////////////COMPUTE LIGHT/////////////////////////////// - -#ifdef MODE_COMPUTE_LIGHT - - vec3 pos = vec3(posu) + vec3(0.5); - - vec3 emission = vec3(uvec3(cell_data.data[cell_index].emission & 0x1ff, (cell_data.data[cell_index].emission >> 9) & 0x1ff, (cell_data.data[cell_index].emission >> 18) & 0x1ff)) * pow(2.0, float(cell_data.data[cell_index].emission >> 27) - 15.0 - 9.0); - vec3 normal = unpackSnorm4x8(cell_data.data[cell_index].normal).xyz; - - vec3 accum = vec3(0.0); - - for (uint i = 0; i < params.light_count; i++) { - vec3 light; - vec3 light_dir; - if (!compute_light_at_pos(i, pos, normal.xyz, light, light_dir)) { - continue; - } - - light *= albedo.rgb; - - if (length(normal) > 0.2) { - accum += max(0.0, dot(normal, -light_dir)) * light; - } else { - //all directions - accum += light; - } - } - - outputs.data[cell_index] = vec4(accum + emission, 0.0); - -#endif //MODE_COMPUTE_LIGHT - - /////////////////SECOND BOUNCE/////////////////////////////// - -#ifdef MODE_SECOND_BOUNCE - vec3 pos = vec3(posu) + vec3(0.5); - ivec3 ipos = ivec3(posu); - vec4 normal = unpackSnorm4x8(cell_data.data[cell_index].normal); - - vec3 accum = outputs.data[cell_index].rgb; - - if (length(normal.xyz) > 0.2) { - vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0); - vec3 tangent = normalize(cross(v0, normal.xyz)); - vec3 bitangent = normalize(cross(tangent, normal.xyz)); - mat3 normal_mat = mat3(tangent, bitangent, normal.xyz); - -#define MAX_CONE_DIRS 6 - - vec3 cone_dirs[MAX_CONE_DIRS] = vec3[]( - vec3(0.0, 0.0, 1.0), - vec3(0.866025, 0.0, 0.5), - vec3(0.267617, 0.823639, 0.5), - vec3(-0.700629, 0.509037, 0.5), - vec3(-0.700629, -0.509037, 0.5), - vec3(0.267617, -0.823639, 0.5)); - - float cone_weights[MAX_CONE_DIRS] = float[](0.25, 0.15, 0.15, 0.15, 0.15, 0.15); - float tan_half_angle = 0.577; - - for (int i = 0; i < MAX_CONE_DIRS; i++) { - vec3 direction = normal_mat * cone_dirs[i]; - vec4 color = vec4(0.0); - { - float dist = 1.5; - float max_distance = length(vec3(params.limits)); - vec3 cell_size = 1.0 / vec3(params.limits); - - while (dist < max_distance && color.a < 0.95) { - float diameter = max(1.0, 2.0 * tan_half_angle * dist); - vec3 uvw_pos = (pos + dist * direction) * cell_size; - float half_diameter = diameter * 0.5; - //check if outside, then break - //if ( any(greaterThan(abs(uvw_pos - 0.5),vec3(0.5f + half_diameter * cell_size)) ) ) { - // break; - //} - - float log2_diameter = log2(diameter); - vec4 scolor = textureLod(sampler3D(color_texture, texture_sampler), uvw_pos, log2_diameter); - float a = (1.0 - color.a); - color += a * scolor; - dist += half_diameter; - } - } - color *= cone_weights[i] * vec4(albedo.rgb, 1.0) * params.dynamic_range; //restore range - accum += color.rgb; - } - } - - outputs.data[cell_index] = vec4(accum, 0.0); - -#endif // MODE_SECOND_BOUNCE - - /////////////////UPDATE MIPMAPS/////////////////////////////// - -#ifdef MODE_UPDATE_MIPMAPS - - { - vec3 light_accum = vec3(0.0); - float count = 0.0; - for (uint i = 0; i < 8; i++) { - uint child_index = cell_children.data[cell_index].children[i]; - if (child_index == NO_CHILDREN) { - continue; - } - light_accum += outputs.data[child_index].rgb; - - count += 1.0; - } - - float divisor = mix(8.0, count, params.propagation); - outputs.data[cell_index] = vec4(light_accum / divisor, 0.0); - } -#endif - - ///////////////////WRITE TEXTURE///////////////////////////// - -#ifdef MODE_WRITE_TEXTURE - { - imageStore(color_tex, ivec3(posu), vec4(outputs.data[cell_index].rgb / params.dynamic_range, albedo.a)); - } -#endif - - ///////////////////DYNAMIC LIGHTING///////////////////////////// - -#ifdef MODE_DYNAMIC - - ivec2 pos_xy = ivec2(gl_GlobalInvocationID.xy); - if (any(greaterThanEqual(pos_xy, params.rect_size))) { - return; //out of bounds - } - - ivec2 uv_xy = pos_xy; - if (params.flip_x) { - uv_xy.x = params.rect_size.x - pos_xy.x - 1; - } - if (params.flip_y) { - uv_xy.y = params.rect_size.y - pos_xy.y - 1; - } - -#ifdef MODE_DYNAMIC_LIGHTING - - { - float z = params.z_base + imageLoad(depth, uv_xy).x * params.z_sign; - - ivec3 pos = params.x_dir * (params.rect_pos.x + pos_xy.x) + params.y_dir * (params.rect_pos.y + pos_xy.y) + abs(params.z_dir) * int(z); - - vec3 normal = imageLoad(source_normal, uv_xy).xyz * 2.0 - 1.0; - normal = vec3(params.x_dir) * normal.x * mix(1.0, -1.0, params.flip_x) + vec3(params.y_dir) * normal.y * mix(1.0, -1.0, params.flip_y) - vec3(params.z_dir) * normal.z; - - vec4 albedo = imageLoad(source_albedo, uv_xy); - - //determine the position in space - - vec3 accum = vec3(0.0); - for (uint i = 0; i < params.light_count; i++) { - vec3 light; - vec3 light_dir; - if (!compute_light_at_pos(i, vec3(pos) * params.pos_multiplier, normal, light, light_dir)) { - continue; - } - - light *= albedo.rgb; - - accum += max(0.0, dot(normal, -light_dir)) * light; - } - - accum += imageLoad(emission, uv_xy).xyz; - - imageStore(emission, uv_xy, vec4(accum, albedo.a)); - imageStore(depth, uv_xy, vec4(z)); - } - -#endif // MODE DYNAMIC LIGHTING - -#ifdef MODE_DYNAMIC_SHRINK - - { - vec4 accum = vec4(0.0); - float accum_z = 0.0; - float count = 0.0; - - for (int i = 0; i < 4; i++) { - ivec2 ofs = pos_xy * 2 + ivec2(i & 1, i >> 1) - params.prev_rect_ofs; - if (any(lessThan(ofs, ivec2(0))) || any(greaterThanEqual(ofs, params.prev_rect_size))) { - continue; - } - if (params.flip_x) { - ofs.x = params.prev_rect_size.x - ofs.x - 1; - } - if (params.flip_y) { - ofs.y = params.prev_rect_size.y - ofs.y - 1; - } - - vec4 light = imageLoad(source_light, ofs); - if (light.a == 0.0) { //ignore empty - continue; - } - accum += light; - float z = imageLoad(source_depth, ofs).x; - accum_z += z * 0.5; //shrink half too - count += 1.0; - } - - if (params.on_mipmap) { - accum.rgb /= mix(8.0, count, params.propagation); - accum.a /= 8.0; - } else { - accum /= 4.0; - } - - if (count == 0.0) { - accum_z = 0.0; //avoid nan - } else { - accum_z /= count; - } - -#ifdef MODE_DYNAMIC_SHRINK_WRITE - - imageStore(light, uv_xy, accum); - imageStore(depth, uv_xy, vec4(accum_z)); -#endif - -#ifdef MODE_DYNAMIC_SHRINK_PLOT - - if (accum.a < 0.001) { - return; //do not blit if alpha is too low - } - - ivec3 pos = params.x_dir * (params.rect_pos.x + pos_xy.x) + params.y_dir * (params.rect_pos.y + pos_xy.y) + abs(params.z_dir) * int(accum_z); - - float z_frac = fract(accum_z); - - for (int i = 0; i < 2; i++) { - ivec3 pos3d = pos + abs(params.z_dir) * i; - if (any(lessThan(pos3d, ivec3(0))) || any(greaterThanEqual(pos3d, params.limits))) { - //skip if offlimits - continue; - } - vec4 color_blit = accum * (i == 0 ? 1.0 - z_frac : z_frac); - vec4 color = imageLoad(color_texture, pos3d); - color.rgb *= params.dynamic_range; - -#if 0 - color.rgb = mix(color.rgb,color_blit.rgb,color_blit.a); - color.a+=color_blit.a; -#else - - float sa = 1.0 - color_blit.a; - vec4 result; - result.a = color.a * sa + color_blit.a; - if (result.a == 0.0) { - result = vec4(0.0); - } else { - result.rgb = (color.rgb * color.a * sa + color_blit.rgb * color_blit.a) / result.a; - color = result; - } - -#endif - color.rgb /= params.dynamic_range; - imageStore(color_texture, pos3d, color); - //imageStore(color_texture,pos3d,vec4(1,1,1,1)); - } -#endif // MODE_DYNAMIC_SHRINK_PLOT - } -#endif - -#endif // MODE DYNAMIC -} diff --git a/servers/rendering/renderer_rd/shaders/voxel_gi_debug.glsl b/servers/rendering/renderer_rd/shaders/voxel_gi_debug.glsl deleted file mode 100644 index fd7a2bf8ad..0000000000 --- a/servers/rendering/renderer_rd/shaders/voxel_gi_debug.glsl +++ /dev/null @@ -1,168 +0,0 @@ -#[vertex] - -#version 450 - -#VERSION_DEFINES - -struct CellData { - uint position; // xyz 10 bits - uint albedo; //rgb albedo - uint emission; //rgb normalized with e as multiplier - uint normal; //RGB normal encoded -}; - -layout(set = 0, binding = 1, std140) buffer CellDataBuffer { - CellData data[]; -} -cell_data; - -layout(set = 0, binding = 2) uniform texture3D color_tex; - -layout(set = 0, binding = 3) uniform sampler tex_sampler; - -layout(push_constant, std430) uniform Params { - mat4 projection; - uint cell_offset; - float dynamic_range; - float alpha; - uint level; - ivec3 bounds; - uint pad; -} -params; - -layout(location = 0) out vec4 color_interp; - -void main() { - const vec3 cube_triangles[36] = vec3[]( - vec3(-1.0f, -1.0f, -1.0f), - vec3(-1.0f, -1.0f, 1.0f), - vec3(-1.0f, 1.0f, 1.0f), - vec3(1.0f, 1.0f, -1.0f), - vec3(-1.0f, -1.0f, -1.0f), - vec3(-1.0f, 1.0f, -1.0f), - vec3(1.0f, -1.0f, 1.0f), - vec3(-1.0f, -1.0f, -1.0f), - vec3(1.0f, -1.0f, -1.0f), - vec3(1.0f, 1.0f, -1.0f), - vec3(1.0f, -1.0f, -1.0f), - vec3(-1.0f, -1.0f, -1.0f), - vec3(-1.0f, -1.0f, -1.0f), - vec3(-1.0f, 1.0f, 1.0f), - vec3(-1.0f, 1.0f, -1.0f), - vec3(1.0f, -1.0f, 1.0f), - vec3(-1.0f, -1.0f, 1.0f), - vec3(-1.0f, -1.0f, -1.0f), - vec3(-1.0f, 1.0f, 1.0f), - vec3(-1.0f, -1.0f, 1.0f), - vec3(1.0f, -1.0f, 1.0f), - vec3(1.0f, 1.0f, 1.0f), - vec3(1.0f, -1.0f, -1.0f), - vec3(1.0f, 1.0f, -1.0f), - vec3(1.0f, -1.0f, -1.0f), - vec3(1.0f, 1.0f, 1.0f), - vec3(1.0f, -1.0f, 1.0f), - vec3(1.0f, 1.0f, 1.0f), - vec3(1.0f, 1.0f, -1.0f), - vec3(-1.0f, 1.0f, -1.0f), - vec3(1.0f, 1.0f, 1.0f), - vec3(-1.0f, 1.0f, -1.0f), - vec3(-1.0f, 1.0f, 1.0f), - vec3(1.0f, 1.0f, 1.0f), - vec3(-1.0f, 1.0f, 1.0f), - vec3(1.0f, -1.0f, 1.0f)); - - vec3 vertex = cube_triangles[gl_VertexIndex] * 0.5 + 0.5; -#ifdef MODE_DEBUG_LIGHT_FULL - uvec3 posu = uvec3(gl_InstanceIndex % params.bounds.x, (gl_InstanceIndex / params.bounds.x) % params.bounds.y, gl_InstanceIndex / (params.bounds.y * params.bounds.x)); -#else - uint cell_index = gl_InstanceIndex + params.cell_offset; - - uvec3 posu = uvec3(cell_data.data[cell_index].position & 0x7FF, (cell_data.data[cell_index].position >> 11) & 0x3FF, cell_data.data[cell_index].position >> 21); -#endif - -#ifdef MODE_DEBUG_EMISSION - color_interp.xyz = vec3(uvec3(cell_data.data[cell_index].emission & 0x1ff, (cell_data.data[cell_index].emission >> 9) & 0x1ff, (cell_data.data[cell_index].emission >> 18) & 0x1ff)) * pow(2.0, float(cell_data.data[cell_index].emission >> 27) - 15.0 - 9.0); -#endif - -#ifdef MODE_DEBUG_COLOR - color_interp.xyz = unpackUnorm4x8(cell_data.data[cell_index].albedo).xyz; -#endif - -#ifdef MODE_DEBUG_LIGHT - color_interp = texelFetch(sampler3D(color_tex, tex_sampler), ivec3(posu), int(params.level)); - color_interp.xyz *params.dynamic_range; -#endif - - float scale = (1 << params.level); - - gl_Position = params.projection * vec4((vec3(posu) + vertex) * scale, 1.0); - -#ifdef MODE_DEBUG_LIGHT_FULL - if (color_interp.a == 0.0) { - gl_Position = vec4(0.0); //force clip and not draw - } -#else - color_interp.a = params.alpha; -#endif -} - -#[fragment] - -#version 450 - -#VERSION_DEFINES - -layout(location = 0) in vec4 color_interp; -layout(location = 0) out vec4 frag_color; - -void main() { - frag_color = color_interp; - -#ifdef MODE_DEBUG_LIGHT_FULL - - //there really is no alpha, so use dither - - int x = int(gl_FragCoord.x) % 4; - int y = int(gl_FragCoord.y) % 4; - int index = x + y * 4; - float limit = 0.0; - if (x < 8) { - if (index == 0) - limit = 0.0625; - if (index == 1) - limit = 0.5625; - if (index == 2) - limit = 0.1875; - if (index == 3) - limit = 0.6875; - if (index == 4) - limit = 0.8125; - if (index == 5) - limit = 0.3125; - if (index == 6) - limit = 0.9375; - if (index == 7) - limit = 0.4375; - if (index == 8) - limit = 0.25; - if (index == 9) - limit = 0.75; - if (index == 10) - limit = 0.125; - if (index == 11) - limit = 0.625; - if (index == 12) - limit = 1.0; - if (index == 13) - limit = 0.5; - if (index == 14) - limit = 0.875; - if (index == 15) - limit = 0.375; - } - if (frag_color.a < limit) { - discard; - } -#endif -} diff --git a/servers/rendering/renderer_rd/shaders/voxel_gi_sdf.glsl b/servers/rendering/renderer_rd/shaders/voxel_gi_sdf.glsl deleted file mode 100644 index 47a611a543..0000000000 --- a/servers/rendering/renderer_rd/shaders/voxel_gi_sdf.glsl +++ /dev/null @@ -1,180 +0,0 @@ -#[compute] - -#version 450 - -#VERSION_DEFINES - -layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; - -#define MAX_DISTANCE 100000.0 - -#define NO_CHILDREN 0xFFFFFFFF - -struct CellChildren { - uint children[8]; -}; - -layout(set = 0, binding = 1, std430) buffer CellChildrenBuffer { - CellChildren data[]; -} -cell_children; - -struct CellData { - uint position; // xyz 10 bits - uint albedo; //rgb albedo - uint emission; //rgb normalized with e as multiplier - uint normal; //RGB normal encoded -}; - -layout(set = 0, binding = 2, std430) buffer CellDataBuffer { - CellData data[]; -} -cell_data; - -layout(r8ui, set = 0, binding = 3) uniform restrict writeonly uimage3D sdf_tex; - -layout(push_constant, std430) uniform Params { - uint offset; - uint end; - uint pad0; - uint pad1; -} -params; - -void main() { - vec3 pos = vec3(gl_GlobalInvocationID); - float closest_dist = MAX_DISTANCE; - - for (uint i = params.offset; i < params.end; i++) { - vec3 posu = vec3(uvec3(cell_data.data[i].position & 0x7FF, (cell_data.data[i].position >> 11) & 0x3FF, cell_data.data[i].position >> 21)); - float dist = length(pos - posu); - if (dist < closest_dist) { - closest_dist = dist; - } - } - - uint dist_8; - - if (closest_dist < 0.0001) { // same cell - dist_8 = 0; //equals to -1 - } else { - dist_8 = clamp(uint(closest_dist), 0, 254) + 1; //conservative, 0 is 1, so <1 is considered solid - } - - imageStore(sdf_tex, ivec3(gl_GlobalInvocationID), uvec4(dist_8)); - //imageStore(sdf_tex,pos,uvec4(pos*2,0)); -} - -#if 0 -layout(push_constant, std430) uniform Params { - ivec3 limits; - uint stack_size; -} -params; - -float distance_to_aabb(ivec3 pos, ivec3 aabb_pos, ivec3 aabb_size) { - vec3 delta = vec3(max(ivec3(0), max(aabb_pos - pos, pos - (aabb_pos + aabb_size - ivec3(1))))); - return length(delta); -} - -void main() { - ivec3 pos = ivec3(gl_GlobalInvocationID); - - uint stack[10] = uint[](0, 0, 0, 0, 0, 0, 0, 0, 0, 0); - uint stack_indices[10] = uint[](0, 0, 0, 0, 0, 0, 0, 0, 0, 0); - ivec3 stack_positions[10] = ivec3[](ivec3(0), ivec3(0), ivec3(0), ivec3(0), ivec3(0), ivec3(0), ivec3(0), ivec3(0), ivec3(0), ivec3(0)); - - const uint cell_orders[8] = uint[]( - 0x11f58d1, - 0xe2e70a, - 0xd47463, - 0xbb829c, - 0x8d11f5, - 0x70ae2e, - 0x463d47, - 0x29cbb8); - - bool cell_found = false; - bool cell_found_exact = false; - ivec3 closest_cell_pos; - float closest_distance = MAX_DISTANCE; - int stack_pos = 0; - - while (true) { - uint index = stack_indices[stack_pos] >> 24; - - if (index == 8) { - //go up - if (stack_pos == 0) { - break; //done going through octree - } - stack_pos--; - continue; - } - - stack_indices[stack_pos] = (stack_indices[stack_pos] & ((1 << 24) - 1)) | ((index + 1) << 24); - - uint cell_index = (stack_indices[stack_pos] >> (index * 3)) & 0x7; - uint child_cell = cell_children.data[stack[stack_pos]].children[cell_index]; - - if (child_cell == NO_CHILDREN) { - continue; - } - - ivec3 child_cell_size = params.limits >> (stack_pos + 1); - ivec3 child_cell_pos = stack_positions[stack_pos]; - - child_cell_pos += mix(ivec3(0), child_cell_size, bvec3(uvec3(index & 1, index & 2, index & 4) != uvec3(0))); - - bool is_leaf = stack_pos == (params.stack_size - 2); - - if (child_cell_pos == pos && is_leaf) { - //we may actually end up in the exact cell. - //if this happens, just abort - cell_found_exact = true; - break; - } - - if (cell_found) { - //discard by distance - float distance = distance_to_aabb(pos, child_cell_pos, child_cell_size); - if (distance >= closest_distance) { - continue; //pointless, just test next child - } else if (is_leaf) { - //closer than what we have AND end of stack, save and continue - closest_cell_pos = child_cell_pos; - closest_distance = distance; - continue; - } - } else if (is_leaf) { - //first solid cell we find, save and continue - closest_distance = distance_to_aabb(pos, child_cell_pos, child_cell_size); - closest_cell_pos = child_cell_pos; - cell_found = true; - continue; - } - - bvec3 direction = greaterThan((pos - (child_cell_pos + (child_cell_size >> 1))), ivec3(0)); - uint cell_order = 0; - cell_order |= mix(0, 1, direction.x); - cell_order |= mix(0, 2, direction.y); - cell_order |= mix(0, 4, direction.z); - - stack[stack_pos + 1] = child_cell; - stack_indices[stack_pos + 1] = cell_orders[cell_order]; //start counting - stack_positions[stack_pos + 1] = child_cell_pos; - stack_pos++; //go up stack - } - - uint dist_8; - - if (cell_found_exact) { - dist_8 = 0; //equals to -1 - } else { - float closest_distance = length(vec3(pos - closest_cell_pos)); - dist_8 = clamp(uint(closest_distance), 0, 254) + 1; //conservative, 0 is 1, so <1 is considered solid - } - - imageStore(sdf_tex, pos, uvec4(dist_8)); -} -#endif -- cgit v1.2.3